Skip to content

Commit 7a75e66

Browse files
barrbrainjrn
authored andcommitted
vcs-svn: implement text-delta handling
Handle input in Subversion's dumpfile format, version 3. This is the format produced by "svnrdump dump" and "svnadmin dump --deltas", and the main difference between v3 dumpfiles and the dumpfiles already handled is that these can include nodes whose properties and text are expressed relative to some other node. To handle such nodes, we find which node the text and properties are based on, handle its property changes, use the cat-blob command to request the basis blob from the fast-import backend, use the svndiff0_apply() helper to apply the text delta on the fly, writing output to a temporary file, and then measure that postimage file's length and write its content to the fast-import stream. The temporary postimage file is shared between delta-using nodes to avoid some file system overhead. The svn-fe interface needs to be more complicated to accomodate the backward flow of information from the fast-import backend to svn-fe. The backflow fd is not needed when parsing streams without deltas, though, so existing scripts using svn-fe on v2 dumps should continue to work. NEEDSWORK: generalize interface so caller sets the backflow fd, close temporary file before exiting Signed-off-by: David Barr <david.barr@cordelta.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com> Signed-off-by: David Barr <david.barr@cordelta.com> Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
1 parent e9f3f8b commit 7a75e66

5 files changed

Lines changed: 227 additions & 11 deletions

File tree

contrib/svn-fe/svn-fe.txt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ SYNOPSIS
99
--------
1010
[verse]
1111
mkfifo backchannel &&
12-
svnadmin dump --incremental REPO |
12+
svnadmin dump --deltas REPO |
1313
svn-fe [url] 3<backchannel |
1414
git fast-import --cat-blob-fd=3 3>backchannel
1515

@@ -32,9 +32,6 @@ Subversion's repository dump format is documented in full in
3232
Files in this format can be generated using the 'svnadmin dump' or
3333
'svk admin dump' command.
3434

35-
Dumps produced with 'svnadmin dump --deltas' (dumpfile format v3)
36-
are not supported.
37-
3835
OUTPUT FORMAT
3936
-------------
4037
The fast-import format is documented by the git-fast-import(1)

t/t9010-svn-fe.sh

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ test_expect_success PIPE 'change file mode and reiterate content' '
674674
test_cmp hello actual.target
675675
'
676676

677-
test_expect_success PIPE 'deltas not supported' '
677+
test_expect_success PIPE 'deltas supported' '
678678
reinit_git &&
679679
{
680680
# (old) h + (inline) ello + (old) \n
@@ -735,7 +735,7 @@ test_expect_success PIPE 'deltas not supported' '
735735
echo PROPS-END &&
736736
cat delta
737737
} >delta.dump &&
738-
test_must_fail try_dump delta.dump
738+
try_dump delta.dump
739739
'
740740

741741
test_expect_success PIPE 'property deltas supported' '
@@ -942,6 +942,110 @@ test_expect_success PIPE 'deltas for typechange' '
942942
test_cmp expect actual
943943
'
944944

945+
test_expect_success PIPE 'deltas need not consume the whole preimage' '
946+
reinit_git &&
947+
cat >expect <<-\EOF &&
948+
OBJID
949+
:120000 100644 OBJID OBJID T postimage
950+
OBJID
951+
:100644 120000 OBJID OBJID T postimage
952+
OBJID
953+
:000000 100644 OBJID OBJID A postimage
954+
EOF
955+
echo "first preimage" >expect.1 &&
956+
printf target >expect.2 &&
957+
printf lnk >expect.3 &&
958+
{
959+
printf "SVNQ%b%b%b" "QQ\017\001\017" "\0217" "first preimage\n" |
960+
q_to_nul
961+
} >delta.1 &&
962+
{
963+
properties svn:special "*" &&
964+
echo PROPS-END
965+
} >symlink.props &&
966+
{
967+
printf "SVNQ%b%b%b" "Q\002\013\004\012" "\0201\001\001\0211" "lnk target" |
968+
q_to_nul
969+
} >delta.2 &&
970+
{
971+
printf "SVNQ%b%b" "Q\004\003\004Q" "\001Q\002\002" |
972+
q_to_nul
973+
} >delta.3 &&
974+
{
975+
cat <<-\EOF &&
976+
SVN-fs-dump-format-version: 3
977+
978+
Revision-number: 1
979+
Prop-content-length: 10
980+
Content-length: 10
981+
982+
PROPS-END
983+
984+
Node-path: postimage
985+
Node-kind: file
986+
Node-action: add
987+
Text-delta: true
988+
Prop-content-length: 10
989+
EOF
990+
echo Text-content-length: $(wc -c <delta.1) &&
991+
echo Content-length: $((10 + $(wc -c <delta.1))) &&
992+
echo &&
993+
echo PROPS-END &&
994+
cat delta.1 &&
995+
cat <<-\EOF &&
996+
997+
Revision-number: 2
998+
Prop-content-length: 10
999+
Content-length: 10
1000+
1001+
PROPS-END
1002+
1003+
Node-path: postimage
1004+
Node-kind: file
1005+
Node-action: change
1006+
Text-delta: true
1007+
EOF
1008+
echo Prop-content-length: $(wc -c <symlink.props) &&
1009+
echo Text-content-length: $(wc -c <delta.2) &&
1010+
echo Content-length: $(($(wc -c <symlink.props) + $(wc -c <delta.2))) &&
1011+
echo &&
1012+
cat symlink.props &&
1013+
cat delta.2 &&
1014+
cat <<-\EOF &&
1015+
1016+
Revision-number: 3
1017+
Prop-content-length: 10
1018+
Content-length: 10
1019+
1020+
PROPS-END
1021+
1022+
Node-path: postimage
1023+
Node-kind: file
1024+
Node-action: change
1025+
Text-delta: true
1026+
Prop-content-length: 10
1027+
EOF
1028+
echo Text-content-length: $(wc -c <delta.3) &&
1029+
echo Content-length: $((10 + $(wc -c <delta.3))) &&
1030+
echo &&
1031+
echo PROPS-END &&
1032+
cat delta.3 &&
1033+
echo
1034+
} >deltapartial.dump &&
1035+
try_dump deltapartial.dump &&
1036+
{
1037+
git rev-list HEAD |
1038+
git diff-tree --root --stdin |
1039+
sed "s/$_x40/OBJID/g"
1040+
} >actual &&
1041+
test_cmp expect actual &&
1042+
git show HEAD:postimage >actual.3 &&
1043+
git show HEAD^:postimage >actual.2 &&
1044+
git show HEAD^^:postimage >actual.1 &&
1045+
test_cmp expect.1 actual.1 &&
1046+
test_cmp expect.2 actual.2 &&
1047+
test_cmp expect.3 actual.3
1048+
'
9451049

9461050
test_expect_success 'set up svn repo' '
9471051
svnconf=$PWD/svnconf &&

vcs-svn/fast_export.c

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,38 @@
77
#include "strbuf.h"
88
#include "quote.h"
99
#include "fast_export.h"
10-
#include "line_buffer.h"
1110
#include "repo_tree.h"
1211
#include "strbuf.h"
12+
#include "svndiff.h"
13+
#include "sliding_window.h"
14+
#include "line_buffer.h"
1315

1416
#define MAX_GITSVN_LINE_LEN 4096
17+
#define REPORT_FILENO 3
1518

1619
static uint32_t first_commit_done;
20+
static struct line_buffer postimage = LINE_BUFFER_INIT;
1721
static struct line_buffer report_buffer = LINE_BUFFER_INIT;
1822

23+
/* NEEDSWORK: move to fast_export_init() */
24+
static int init_postimage(void)
25+
{
26+
static int postimage_initialized;
27+
if (postimage_initialized)
28+
return 0;
29+
postimage_initialized = 1;
30+
return buffer_tmpfile_init(&postimage);
31+
}
32+
33+
static int init_report_buffer(int fd)
34+
{
35+
static int report_buffer_initialized;
36+
if (report_buffer_initialized)
37+
return 0;
38+
report_buffer_initialized = 1;
39+
return buffer_fdinit(&report_buffer, fd);
40+
}
41+
1942
void fast_export_init(int fd)
2043
{
2144
if (buffer_fdinit(&report_buffer, fd))
@@ -132,6 +155,73 @@ static void die_short_read(struct line_buffer *input)
132155
die("invalid dump: unexpected end of file");
133156
}
134157

158+
static int ends_with(const char *s, size_t len, const char *suffix)
159+
{
160+
const size_t suffixlen = strlen(suffix);
161+
if (len < suffixlen)
162+
return 0;
163+
return !memcmp(s + len - suffixlen, suffix, suffixlen);
164+
}
165+
166+
static int parse_cat_response_line(const char *header, off_t *len)
167+
{
168+
size_t headerlen = strlen(header);
169+
const char *type;
170+
const char *end;
171+
172+
if (ends_with(header, headerlen, " missing"))
173+
return error("cat-blob reports missing blob: %s", header);
174+
type = memmem(header, headerlen, " blob ", strlen(" blob "));
175+
if (!type)
176+
return error("cat-blob header has wrong object type: %s", header);
177+
*len = strtoumax(type + strlen(" blob "), (char **) &end, 10);
178+
if (end == type + strlen(" blob "))
179+
return error("cat-blob header does not contain length: %s", header);
180+
if (*end)
181+
return error("cat-blob header contains garbage after length: %s", header);
182+
return 0;
183+
}
184+
185+
static long apply_delta(off_t len, struct line_buffer *input,
186+
const char *old_data, uint32_t old_mode)
187+
{
188+
long ret;
189+
off_t preimage_len = 0;
190+
struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer);
191+
FILE *out;
192+
193+
if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage)))
194+
die("cannot open temporary file for blob retrieval");
195+
if (init_report_buffer(REPORT_FILENO))
196+
die("cannot open fd 3 for feedback from fast-import");
197+
if (old_data) {
198+
const char *response;
199+
printf("cat-blob %s\n", old_data);
200+
fflush(stdout);
201+
response = get_response_line();
202+
if (parse_cat_response_line(response, &preimage_len))
203+
die("invalid cat-blob response: %s", response);
204+
}
205+
if (old_mode == REPO_MODE_LNK) {
206+
strbuf_addstr(&preimage.buf, "link ");
207+
preimage_len += strlen("link ");
208+
}
209+
if (svndiff0_apply(input, len, &preimage, out))
210+
die("cannot apply delta");
211+
if (old_data) {
212+
/* Read the remainder of preimage and trailing newline. */
213+
if (move_window(&preimage, preimage_len, 1))
214+
die("cannot seek to end of input");
215+
if (preimage.buf.buf[0] != '\n')
216+
die("missing newline after cat-blob response");
217+
}
218+
ret = buffer_tmpfile_prepare_to_read(&postimage);
219+
if (ret < 0)
220+
die("cannot read temporary file for blob retrieval");
221+
strbuf_release(&preimage.buf);
222+
return ret;
223+
}
224+
135225
void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input)
136226
{
137227
if (mode == REPO_MODE_LNK) {
@@ -199,3 +289,20 @@ int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref)
199289
ls_from_active_commit(path);
200290
return parse_ls_response(get_response_line(), mode, dataref);
201291
}
292+
293+
void fast_export_blob_delta(uint32_t mode,
294+
uint32_t old_mode, const char *old_data,
295+
uint32_t len, struct line_buffer *input)
296+
{
297+
long postimage_len;
298+
if (len > maximum_signed_value_of_type(off_t))
299+
die("enormous delta");
300+
postimage_len = apply_delta((off_t) len, input, old_data, old_mode);
301+
if (mode == REPO_MODE_LNK) {
302+
buffer_skip_bytes(&postimage, strlen("link "));
303+
postimage_len -= strlen("link ");
304+
}
305+
printf("data %ld\n", postimage_len);
306+
buffer_copy_bytes(&postimage, postimage_len);
307+
fputc('\n', stdout);
308+
}

vcs-svn/fast_export.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ void fast_export_begin_commit(uint32_t revision, const char *author,
1515
const char *url, unsigned long timestamp);
1616
void fast_export_end_commit(uint32_t revision);
1717
void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input);
18+
void fast_export_blob_delta(uint32_t mode,
19+
uint32_t old_mode, const char *old_data,
20+
uint32_t len, struct line_buffer *input);
1821

1922
/* If there is no such file at that rev, returns -1, errno == ENOENT. */
2023
int fast_export_ls_rev(uint32_t rev, const char *path,

vcs-svn/svndump.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,7 @@ static void handle_node(void)
217217
*/
218218
static const char *const empty_blob = "::empty::";
219219
const char *old_data = NULL;
220-
221-
if (node_ctx.text_delta)
222-
die("text deltas not supported");
220+
uint32_t old_mode = REPO_MODE_BLB;
223221

224222
if (node_ctx.action == NODEACT_DELETE) {
225223
if (have_text || have_props || node_ctx.srcRev)
@@ -255,6 +253,7 @@ static void handle_node(void)
255253
if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR)
256254
die("invalid dump: cannot modify a file into a directory");
257255
node_ctx.type = mode;
256+
old_mode = mode;
258257
} else if (node_ctx.action == NODEACT_ADD) {
259258
if (type == REPO_MODE_DIR)
260259
old_data = NULL;
@@ -289,8 +288,14 @@ static void handle_node(void)
289288
fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data);
290289
return;
291290
}
291+
if (!node_ctx.text_delta) {
292+
fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
293+
fast_export_data(node_ctx.type, node_ctx.textLength, &input);
294+
return;
295+
}
292296
fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline");
293-
fast_export_data(node_ctx.type, node_ctx.textLength, &input);
297+
fast_export_blob_delta(node_ctx.type, old_mode, old_data,
298+
node_ctx.textLength, &input);
294299
}
295300

296301
static void begin_revision(void)

0 commit comments

Comments
 (0)