Skip to content

Commit c18b80a

Browse files
pcloudsgitster
authored andcommitted
update-index: new options to enable/disable split index mode
If you have a large work tree but only make changes in a subset, then $GIT_DIR/index's size should be stable after a while. If you change branches that touch something else, $GIT_DIR/index's size may grow large that it becomes as slow as the unified index. Do --split-index again occasionally to force all changes back to the shared index and keep $GIT_DIR/index small. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent b3c96fb commit c18b80a

5 files changed

Lines changed: 114 additions & 6 deletions

File tree

Documentation/git-update-index.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,17 @@ may not support it yet.
161161
Only meaningful with `--stdin` or `--index-info`; paths are
162162
separated with NUL character instead of LF.
163163

164+
--split-index::
165+
--no-split-index::
166+
Enable or disable split index mode. If enabled, the index is
167+
split into two files, $GIT_DIR/index and $GIT_DIR/sharedindex.<SHA-1>.
168+
Changes are accumulated in $GIT_DIR/index while the shared
169+
index file contains all index entries stays unchanged. If
170+
split-index mode is already enabled and `--split-index` is
171+
given again, all changes in $GIT_DIR/index are pushed back to
172+
the shared index file. This mode is designed for very large
173+
indexes that take a signficant amount of time to read or write.
174+
164175
\--::
165176
Do not interpret any more arguments as options.
166177

builtin/update-index.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "parse-options.h"
1414
#include "pathspec.h"
1515
#include "dir.h"
16+
#include "split-index.h"
1617

1718
/*
1819
* Default to not allowing changes to the list of files. The
@@ -742,6 +743,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
742743
char set_executable_bit = 0;
743744
struct refresh_params refresh_args = {0, &has_errors};
744745
int lock_error = 0;
746+
int split_index = -1;
745747
struct lock_file *lock_file;
746748
struct parse_opt_ctx_t ctx;
747749
int parseopt_state = PARSE_OPT_UNKNOWN;
@@ -824,6 +826,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
824826
resolve_undo_clear_callback},
825827
OPT_INTEGER(0, "index-version", &preferred_index_format,
826828
N_("write index in this format")),
829+
OPT_BOOL(0, "split-index", &split_index,
830+
N_("enable or disable split index")),
827831
OPT_END()
828832
};
829833

@@ -917,6 +921,20 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
917921
strbuf_release(&buf);
918922
}
919923

924+
if (split_index > 0) {
925+
init_split_index(&the_index);
926+
the_index.cache_changed |= SPLIT_INDEX_ORDERED;
927+
} else if (!split_index && the_index.split_index) {
928+
/*
929+
* can't discard_split_index(&the_index); because that
930+
* will destroy split_index->base->cache[], which may
931+
* be shared with the_index.cache[]. So yeah we're
932+
* leaking a bit here.
933+
*/
934+
the_index.split_index = NULL;
935+
the_index.cache_changed |= SOMETHING_CHANGED;
936+
}
937+
920938
if (active_cache_changed) {
921939
if (newfd < 0) {
922940
if (refresh_args.flags & REFRESH_QUIET)

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ static inline unsigned int canon_mode(unsigned int mode)
278278
#define CE_ENTRY_ADDED (1 << 3)
279279
#define RESOLVE_UNDO_CHANGED (1 << 4)
280280
#define CACHE_TREE_CHANGED (1 << 5)
281+
#define SPLIT_INDEX_ORDERED (1 << 6)
281282

282283
struct split_index;
283284
struct index_state {

read-cache.c

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "strbuf.h"
1616
#include "varint.h"
1717
#include "split-index.h"
18+
#include "sigchain.h"
1819

1920
static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
2021
unsigned int options);
@@ -39,7 +40,8 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
3940

4041
/* changes that can be kept in $GIT_DIR/index (basically all extensions) */
4142
#define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
42-
CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED)
43+
CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
44+
SPLIT_INDEX_ORDERED)
4345

4446
struct index_state the_index;
4547
static const char *alternate_index_output;
@@ -1860,7 +1862,8 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile
18601862
rollback_lock_file(lockfile);
18611863
}
18621864

1863-
static int do_write_index(struct index_state *istate, int newfd)
1865+
static int do_write_index(struct index_state *istate, int newfd,
1866+
int strip_extensions)
18641867
{
18651868
git_SHA_CTX c;
18661869
struct cache_header hdr;
@@ -1923,7 +1926,7 @@ static int do_write_index(struct index_state *istate, int newfd)
19231926
strbuf_release(&previous_name_buf);
19241927

19251928
/* Write extension data here */
1926-
if (istate->split_index) {
1929+
if (!strip_extensions && istate->split_index) {
19271930
struct strbuf sb = STRBUF_INIT;
19281931

19291932
err = write_link_extension(&sb, istate) < 0 ||
@@ -1934,7 +1937,7 @@ static int do_write_index(struct index_state *istate, int newfd)
19341937
if (err)
19351938
return -1;
19361939
}
1937-
if (istate->cache_tree) {
1940+
if (!strip_extensions && istate->cache_tree) {
19381941
struct strbuf sb = STRBUF_INIT;
19391942

19401943
cache_tree_write(&sb, istate->cache_tree);
@@ -1944,7 +1947,7 @@ static int do_write_index(struct index_state *istate, int newfd)
19441947
if (err)
19451948
return -1;
19461949
}
1947-
if (istate->resolve_undo) {
1950+
if (!strip_extensions && istate->resolve_undo) {
19481951
struct strbuf sb = STRBUF_INIT;
19491952

19501953
resolve_undo_write(&sb, istate->resolve_undo);
@@ -1985,7 +1988,7 @@ static int commit_locked_index(struct lock_file *lk)
19851988
static int do_write_locked_index(struct index_state *istate, struct lock_file *lock,
19861989
unsigned flags)
19871990
{
1988-
int ret = do_write_index(istate, lock->fd);
1991+
int ret = do_write_index(istate, lock->fd, 0);
19891992
if (ret)
19901993
return ret;
19911994
assert((flags & (COMMIT_LOCK | CLOSE_LOCK)) !=
@@ -2009,6 +2012,52 @@ static int write_split_index(struct index_state *istate,
20092012
return ret;
20102013
}
20112014

2015+
static char *temporary_sharedindex;
2016+
2017+
static void remove_temporary_sharedindex(void)
2018+
{
2019+
if (temporary_sharedindex) {
2020+
unlink_or_warn(temporary_sharedindex);
2021+
free(temporary_sharedindex);
2022+
temporary_sharedindex = NULL;
2023+
}
2024+
}
2025+
2026+
static void remove_temporary_sharedindex_on_signal(int signo)
2027+
{
2028+
remove_temporary_sharedindex();
2029+
sigchain_pop(signo);
2030+
raise(signo);
2031+
}
2032+
2033+
static int write_shared_index(struct index_state *istate)
2034+
{
2035+
struct split_index *si = istate->split_index;
2036+
static int installed_handler;
2037+
int fd, ret;
2038+
2039+
temporary_sharedindex = git_pathdup("sharedindex_XXXXXX");
2040+
fd = xmkstemp(temporary_sharedindex);
2041+
if (!installed_handler) {
2042+
atexit(remove_temporary_sharedindex);
2043+
sigchain_push_common(remove_temporary_sharedindex_on_signal);
2044+
}
2045+
move_cache_to_base_index(istate);
2046+
ret = do_write_index(si->base, fd, 1);
2047+
close(fd);
2048+
if (ret) {
2049+
remove_temporary_sharedindex();
2050+
return ret;
2051+
}
2052+
ret = rename(temporary_sharedindex,
2053+
git_path("sharedindex.%s", sha1_to_hex(si->base->sha1)));
2054+
free(temporary_sharedindex);
2055+
temporary_sharedindex = NULL;
2056+
if (!ret)
2057+
hashcpy(si->base_sha1, si->base->sha1);
2058+
return ret;
2059+
}
2060+
20122061
int write_locked_index(struct index_state *istate, struct lock_file *lock,
20132062
unsigned flags)
20142063
{
@@ -2020,6 +2069,12 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
20202069
return do_write_locked_index(istate, lock, flags);
20212070
}
20222071

2072+
if (istate->cache_changed & SPLIT_INDEX_ORDERED) {
2073+
int ret = write_shared_index(istate);
2074+
if (ret)
2075+
return ret;
2076+
}
2077+
20232078
return write_split_index(istate, lock, flags);
20242079
}
20252080

split-index.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,29 @@ static void mark_base_index_entries(struct index_state *base)
7474
base->cache[i]->index = i + 1;
7575
}
7676

77+
void move_cache_to_base_index(struct index_state *istate)
78+
{
79+
struct split_index *si = istate->split_index;
80+
int i;
81+
82+
/*
83+
* do not delete old si->base, its index entries may be shared
84+
* with istate->cache[]. Accept a bit of leaking here because
85+
* this code is only used by short-lived update-index.
86+
*/
87+
si->base = xcalloc(1, sizeof(*si->base));
88+
si->base->version = istate->version;
89+
/* zero timestamp disables racy test in ce_write_index() */
90+
si->base->timestamp = istate->timestamp;
91+
ALLOC_GROW(si->base->cache, istate->cache_nr, si->base->cache_alloc);
92+
si->base->cache_nr = istate->cache_nr;
93+
memcpy(si->base->cache, istate->cache,
94+
sizeof(*istate->cache) * istate->cache_nr);
95+
mark_base_index_entries(si->base);
96+
for (i = 0; i < si->base->cache_nr; i++)
97+
si->base->cache[i]->ce_flags &= ~CE_UPDATE_IN_BASE;
98+
}
99+
77100
static void mark_entry_for_delete(size_t pos, void *data)
78101
{
79102
struct index_state *istate = data;

0 commit comments

Comments
 (0)