Skip to content

Commit 8eaf517

Browse files
committed
Merge branch 'ks/tree-diff-nway'
Instead of running N pair-wise diff-trees when inspecting a N-parent merge, find the set of paths that were touched by walking N+1 trees in parallel. These set of paths can then be turned into N pair-wise diff-tree results to be processed through rename detections and such. And N=2 case nicely degenerates to the usual 2-way diff-tree, which is very nice. * ks/tree-diff-nway: mingw: activate alloca combine-diff: speed it up, by using multiparent diff tree-walker directly tree-diff: rework diff_tree() to generate diffs for multiparent cases as well Portable alloca for Git tree-diff: reuse base str(buf) memory on sub-tree recursion tree-diff: no need to call "full" diff_tree_sha1 from show_path() tree-diff: rework diff_tree interface to be sha1 based tree-diff: diff_tree() should now be static tree-diff: remove special-case diff-emitting code for empty-tree cases tree-diff: simplify tree_entry_pathcmp tree-diff: show_path prototype is not needed anymore tree-diff: rename compare_tree_entry -> tree_entry_pathcmp tree-diff: move all action-taking code out of compare_tree_entry() tree-diff: don't assume compare_tree_entry() returns -1,0,1 tree-diff: consolidate code for emitting diffs and recursion in one place tree-diff: show_tree() is not needed tree-diff: no need to pass match to skip_uninteresting() tree-diff: no need to manually verify that there is no mode change for a path combine-diff: move changed-paths scanning logic into its own function combine-diff: move show_log_first logic/action out of paths scanning
2 parents f008cef + 22f4c27 commit 8eaf517

10 files changed

Lines changed: 723 additions & 170 deletions

File tree

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ all::
3030
# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
3131
# /foo/bar/include and /foo/bar/lib directories.
3232
#
33+
# Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header.
34+
#
3335
# Define NO_CURL if you do not have libcurl installed. git-http-fetch and
3436
# git-http-push are not built, and you cannot use http:// and https://
3537
# transports (neither smart nor dumb).
@@ -1111,6 +1113,10 @@ ifdef USE_LIBPCRE
11111113
EXTLIBS += -lpcre
11121114
endif
11131115

1116+
ifdef HAVE_ALLOCA_H
1117+
BASIC_CFLAGS += -DHAVE_ALLOCA_H
1118+
endif
1119+
11141120
ifdef NO_CURL
11151121
BASIC_CFLAGS += -DNO_CURL
11161122
REMOTE_CURL_PRIMARY =

cache.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,21 @@ unsigned long git_deflate_bound(git_zstream *, unsigned long);
7474
#define S_IFGITLINK 0160000
7575
#define S_ISGITLINK(m) (((m) & S_IFMT) == S_IFGITLINK)
7676

77+
/*
78+
* Some mode bits are also used internally for computations.
79+
*
80+
* They *must* not overlap with any valid modes, and they *must* not be emitted
81+
* to outside world - i.e. appear on disk or network. In other words, it's just
82+
* temporary fields, which we internally use, but they have to stay in-house.
83+
*
84+
* ( such approach is valid, as standard S_IF* fits into 16 bits, and in Git
85+
* codebase mode is `unsigned int` which is assumed to be at least 32 bits )
86+
*/
87+
88+
/* used internally in tree-diff */
89+
#define S_DIFFTREE_IFXMIN_NEQ 0x80000000
90+
91+
7792
/*
7893
* Intensive research over the course of many years has shown that
7994
* port 9418 is totally unused by anything else. Or

combine-diff.c

Lines changed: 138 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,56 +1301,164 @@ static const char *path_path(void *obj)
13011301
return path->path;
13021302
}
13031303

1304+
1305+
/* find set of paths that every parent touches */
1306+
static struct combine_diff_path *find_paths_generic(const unsigned char *sha1,
1307+
const struct sha1_array *parents, struct diff_options *opt)
1308+
{
1309+
struct combine_diff_path *paths = NULL;
1310+
int i, num_parent = parents->nr;
1311+
1312+
int output_format = opt->output_format;
1313+
const char *orderfile = opt->orderfile;
1314+
1315+
opt->output_format = DIFF_FORMAT_NO_OUTPUT;
1316+
/* tell diff_tree to emit paths in sorted (=tree) order */
1317+
opt->orderfile = NULL;
1318+
1319+
/* D(A,P1...Pn) = D(A,P1) ^ ... ^ D(A,Pn) (wrt paths) */
1320+
for (i = 0; i < num_parent; i++) {
1321+
/*
1322+
* show stat against the first parent even when doing
1323+
* combined diff.
1324+
*/
1325+
int stat_opt = (output_format &
1326+
(DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT));
1327+
if (i == 0 && stat_opt)
1328+
opt->output_format = stat_opt;
1329+
else
1330+
opt->output_format = DIFF_FORMAT_NO_OUTPUT;
1331+
diff_tree_sha1(parents->sha1[i], sha1, "", opt);
1332+
diffcore_std(opt);
1333+
paths = intersect_paths(paths, i, num_parent);
1334+
1335+
/* if showing diff, show it in requested order */
1336+
if (opt->output_format != DIFF_FORMAT_NO_OUTPUT &&
1337+
orderfile) {
1338+
diffcore_order(orderfile);
1339+
}
1340+
1341+
diff_flush(opt);
1342+
}
1343+
1344+
opt->output_format = output_format;
1345+
opt->orderfile = orderfile;
1346+
return paths;
1347+
}
1348+
1349+
1350+
/*
1351+
* find set of paths that everybody touches, assuming diff is run without
1352+
* rename/copy detection, etc, comparing all trees simultaneously (= faster).
1353+
*/
1354+
static struct combine_diff_path *find_paths_multitree(
1355+
const unsigned char *sha1, const struct sha1_array *parents,
1356+
struct diff_options *opt)
1357+
{
1358+
int i, nparent = parents->nr;
1359+
const unsigned char **parents_sha1;
1360+
struct combine_diff_path paths_head;
1361+
struct strbuf base;
1362+
1363+
parents_sha1 = xmalloc(nparent * sizeof(parents_sha1[0]));
1364+
for (i = 0; i < nparent; i++)
1365+
parents_sha1[i] = parents->sha1[i];
1366+
1367+
/* fake list head, so worker can assume it is non-NULL */
1368+
paths_head.next = NULL;
1369+
1370+
strbuf_init(&base, PATH_MAX);
1371+
diff_tree_paths(&paths_head, sha1, parents_sha1, nparent, &base, opt);
1372+
1373+
strbuf_release(&base);
1374+
free(parents_sha1);
1375+
return paths_head.next;
1376+
}
1377+
1378+
13041379
void diff_tree_combined(const unsigned char *sha1,
13051380
const struct sha1_array *parents,
13061381
int dense,
13071382
struct rev_info *rev)
13081383
{
13091384
struct diff_options *opt = &rev->diffopt;
13101385
struct diff_options diffopts;
1311-
struct combine_diff_path *p, *paths = NULL;
1386+
struct combine_diff_path *p, *paths;
13121387
int i, num_paths, needsep, show_log_first, num_parent = parents->nr;
1388+
int need_generic_pathscan;
1389+
1390+
/* nothing to do, if no parents */
1391+
if (!num_parent)
1392+
return;
1393+
1394+
show_log_first = !!rev->loginfo && !rev->no_commit_id;
1395+
needsep = 0;
1396+
if (show_log_first) {
1397+
show_log(rev);
1398+
1399+
if (rev->verbose_header && opt->output_format)
1400+
printf("%s%c", diff_line_prefix(opt),
1401+
opt->line_termination);
1402+
}
13131403

13141404
diffopts = *opt;
13151405
copy_pathspec(&diffopts.pathspec, &opt->pathspec);
1316-
diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
13171406
DIFF_OPT_SET(&diffopts, RECURSIVE);
13181407
DIFF_OPT_CLR(&diffopts, ALLOW_EXTERNAL);
1319-
/* tell diff_tree to emit paths in sorted (=tree) order */
1320-
diffopts.orderfile = NULL;
13211408

1322-
show_log_first = !!rev->loginfo && !rev->no_commit_id;
1323-
needsep = 0;
1324-
/* find set of paths that everybody touches */
1325-
for (i = 0; i < num_parent; i++) {
1326-
/* show stat against the first parent even
1409+
/* find set of paths that everybody touches
1410+
*
1411+
* NOTE
1412+
*
1413+
* Diffcore transformations are bound to diff_filespec and logic
1414+
* comparing two entries - i.e. they do not apply directly to combine
1415+
* diff.
1416+
*
1417+
* If some of such transformations is requested - we launch generic
1418+
* path scanning, which works significantly slower compared to
1419+
* simultaneous all-trees-in-one-go scan in find_paths_multitree().
1420+
*
1421+
* TODO some of the filters could be ported to work on
1422+
* combine_diff_paths - i.e. all functionality that skips paths, so in
1423+
* theory, we could end up having only multitree path scanning.
1424+
*
1425+
* NOTE please keep this semantically in sync with diffcore_std()
1426+
*/
1427+
need_generic_pathscan = opt->skip_stat_unmatch ||
1428+
DIFF_OPT_TST(opt, FOLLOW_RENAMES) ||
1429+
opt->break_opt != -1 ||
1430+
opt->detect_rename ||
1431+
opt->pickaxe ||
1432+
opt->filter;
1433+
1434+
1435+
if (need_generic_pathscan) {
1436+
/*
1437+
* NOTE generic case also handles --stat, as it computes
1438+
* diff(sha1,parent_i) for all i to do the job, specifically
1439+
* for parent0.
1440+
*/
1441+
paths = find_paths_generic(sha1, parents, &diffopts);
1442+
}
1443+
else {
1444+
int stat_opt;
1445+
paths = find_paths_multitree(sha1, parents, &diffopts);
1446+
1447+
/*
1448+
* show stat against the first parent even
13271449
* when doing combined diff.
13281450
*/
1329-
int stat_opt = (opt->output_format &
1451+
stat_opt = (opt->output_format &
13301452
(DIFF_FORMAT_NUMSTAT|DIFF_FORMAT_DIFFSTAT));
1331-
if (i == 0 && stat_opt)
1453+
if (stat_opt) {
13321454
diffopts.output_format = stat_opt;
1333-
else
1334-
diffopts.output_format = DIFF_FORMAT_NO_OUTPUT;
1335-
diff_tree_sha1(parents->sha1[i], sha1, "", &diffopts);
1336-
diffcore_std(&diffopts);
1337-
paths = intersect_paths(paths, i, num_parent);
13381455

1339-
if (show_log_first && i == 0) {
1340-
show_log(rev);
1341-
1342-
if (rev->verbose_header && opt->output_format)
1343-
printf("%s%c", diff_line_prefix(opt),
1344-
opt->line_termination);
1456+
diff_tree_sha1(parents->sha1[0], sha1, "", &diffopts);
1457+
diffcore_std(&diffopts);
1458+
if (opt->orderfile)
1459+
diffcore_order(opt->orderfile);
1460+
diff_flush(&diffopts);
13451461
}
1346-
1347-
/* if showing diff, show it in requested order */
1348-
if (diffopts.output_format != DIFF_FORMAT_NO_OUTPUT &&
1349-
opt->orderfile) {
1350-
diffcore_order(opt->orderfile);
1351-
}
1352-
1353-
diff_flush(&diffopts);
13541462
}
13551463

13561464
/* find out number of surviving paths */

config.mak.uname

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@ ifeq ($(uname_S),OSF1)
2828
NO_NSEC = YesPlease
2929
endif
3030
ifeq ($(uname_S),Linux)
31+
HAVE_ALLOCA_H = YesPlease
3132
NO_STRLCPY = YesPlease
3233
NO_MKSTEMPS = YesPlease
3334
HAVE_PATHS_H = YesPlease
3435
LIBC_CONTAINS_LIBINTL = YesPlease
3536
HAVE_DEV_TTY = YesPlease
3637
endif
3738
ifeq ($(uname_S),GNU/kFreeBSD)
39+
HAVE_ALLOCA_H = YesPlease
3840
NO_STRLCPY = YesPlease
3941
NO_MKSTEMPS = YesPlease
4042
HAVE_PATHS_H = YesPlease
@@ -103,6 +105,7 @@ ifeq ($(uname_S),SunOS)
103105
NEEDS_NSL = YesPlease
104106
SHELL_PATH = /bin/bash
105107
SANE_TOOL_PATH = /usr/xpg6/bin:/usr/xpg4/bin
108+
HAVE_ALLOCA_H = YesPlease
106109
NO_STRCASESTR = YesPlease
107110
NO_MEMMEM = YesPlease
108111
NO_MKDTEMP = YesPlease
@@ -145,7 +148,7 @@ ifeq ($(uname_S),SunOS)
145148
endif
146149
INSTALL = /usr/ucb/install
147150
TAR = gtar
148-
BASIC_CFLAGS += -D__EXTENSIONS__ -D__sun__ -DHAVE_ALLOCA_H
151+
BASIC_CFLAGS += -D__EXTENSIONS__ -D__sun__
149152
endif
150153
ifeq ($(uname_O),Cygwin)
151154
ifeq ($(shell expr "$(uname_R)" : '1\.[1-6]\.'),4)
@@ -165,6 +168,7 @@ ifeq ($(uname_O),Cygwin)
165168
else
166169
NO_REGEX = UnfortunatelyYes
167170
endif
171+
HAVE_ALLOCA_H = YesPlease
168172
NEEDS_LIBICONV = YesPlease
169173
NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes
170174
NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease
@@ -239,6 +243,7 @@ ifeq ($(uname_S),AIX)
239243
endif
240244
ifeq ($(uname_S),GNU)
241245
# GNU/Hurd
246+
HAVE_ALLOCA_H = YesPlease
242247
NO_STRLCPY = YesPlease
243248
NO_MKSTEMPS = YesPlease
244249
HAVE_PATHS_H = YesPlease
@@ -313,6 +318,7 @@ endif
313318
ifeq ($(uname_S),Windows)
314319
GIT_VERSION := $(GIT_VERSION).MSVC
315320
pathsep = ;
321+
HAVE_ALLOCA_H = YesPlease
316322
NO_PREAD = YesPlease
317323
NEEDS_CRYPTO_WITH_SSL = YesPlease
318324
NO_LIBGEN_H = YesPlease
@@ -357,7 +363,7 @@ ifeq ($(uname_S),Windows)
357363
COMPAT_OBJS = compat/msvc.o compat/winansi.o \
358364
compat/win32/pthread.o compat/win32/syslog.o \
359365
compat/win32/dirent.o
360-
COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -DHAVE_ALLOCA_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\"
366+
COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\"
361367
BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE -NODEFAULTLIB:MSVCRT.lib
362368
EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib invalidcontinue.obj
363369
PTHREAD_LIBS =
@@ -465,6 +471,7 @@ ifeq ($(uname_S),NONSTOP_KERNEL)
465471
endif
466472
ifneq (,$(findstring MINGW,$(uname_S)))
467473
pathsep = ;
474+
HAVE_ALLOCA_H = YesPlease
468475
NO_PREAD = YesPlease
469476
NEEDS_CRYPTO_WITH_SSL = YesPlease
470477
NO_LIBGEN_H = YesPlease

configure.ac

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,14 @@ AS_HELP_STRING([], [ARG can be also prefix for libpcre library and hea
272272
GIT_CONF_SUBST([LIBPCREDIR])
273273
fi)
274274
#
275+
# Define HAVE_ALLOCA_H if you have working alloca(3) defined in that header.
276+
AC_FUNC_ALLOCA
277+
case $ac_cv_working_alloca_h in
278+
yes) HAVE_ALLOCA_H=YesPlease;;
279+
*) HAVE_ALLOCA_H='';;
280+
esac
281+
GIT_CONF_SUBST([HAVE_ALLOCA_H])
282+
#
275283
# Define NO_CURL if you do not have curl installed. git-http-pull and
276284
# git-http-push are not built, and you cannot use http:// and https://
277285
# transports.

diff.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3205,6 +3205,7 @@ void diff_setup(struct diff_options *options)
32053205
options->context = diff_context_default;
32063206
DIFF_OPT_SET(options, RENAME_EMPTY);
32073207

3208+
/* pathchange left =NULL by default */
32083209
options->change = diff_change;
32093210
options->add_remove = diff_addremove;
32103211
options->use_color = diff_use_color_default;
@@ -4749,6 +4750,7 @@ void diffcore_fix_diff_index(struct diff_options *options)
47494750

47504751
void diffcore_std(struct diff_options *options)
47514752
{
4753+
/* NOTE please keep the following in sync with diff_tree_combined() */
47524754
if (options->skip_stat_unmatch)
47534755
diffcore_skip_stat_unmatch(options);
47544756
if (!options->found_follow) {

diff.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ struct diff_filespec;
1515
struct userdiff_driver;
1616
struct sha1_array;
1717
struct commit;
18+
struct combine_diff_path;
19+
20+
typedef int (*pathchange_fn_t)(struct diff_options *options,
21+
struct combine_diff_path *path);
1822

1923
typedef void (*change_fn_t)(struct diff_options *options,
2024
unsigned old_mode, unsigned new_mode,
@@ -157,6 +161,7 @@ struct diff_options {
157161
int close_file;
158162

159163
struct pathspec pathspec;
164+
pathchange_fn_t pathchange;
160165
change_fn_t change;
161166
add_remove_fn_t add_remove;
162167
diff_format_fn_t format_callback;
@@ -189,8 +194,10 @@ const char *diff_line_prefix(struct diff_options *);
189194

190195
extern const char mime_boundary_leader[];
191196

192-
extern int diff_tree(struct tree_desc *t1, struct tree_desc *t2,
193-
const char *base, struct diff_options *opt);
197+
extern struct combine_diff_path *diff_tree_paths(
198+
struct combine_diff_path *p, const unsigned char *sha1,
199+
const unsigned char **parent_sha1, int nparent,
200+
struct strbuf *base, struct diff_options *opt);
194201
extern int diff_tree_sha1(const unsigned char *old, const unsigned char *new,
195202
const char *base, struct diff_options *opt);
196203
extern int diff_root_tree_sha1(const unsigned char *new, const char *base,

git-compat-util.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,14 @@ extern void release_pack_memory(size_t);
521521
typedef void (*try_to_free_t)(size_t);
522522
extern try_to_free_t set_try_to_free_routine(try_to_free_t);
523523

524+
#ifdef HAVE_ALLOCA_H
525+
# include <alloca.h>
526+
# define xalloca(size) (alloca(size))
527+
# define xalloca_free(p) do {} while (0)
528+
#else
529+
# define xalloca(size) (xmalloc(size))
530+
# define xalloca_free(p) (free(p))
531+
#endif
524532
extern char *xstrdup(const char *str);
525533
extern void *xmalloc(size_t size);
526534
extern void *xmallocz(size_t size);

0 commit comments

Comments
 (0)