@@ -51,11 +51,12 @@ struct packed_ref_cache {
5151 int mmapped ;
5252
5353 /*
54- * The contents of the `packed-refs` file. If the file is
55- * mmapped, this points at the mmapped contents of the file.
56- * If not, this points at heap-allocated memory containing the
57- * contents. If there were no contents (e.g., because the file
58- * didn't exist), `buf` and `eof` are both NULL.
54+ * The contents of the `packed-refs` file. If the file was
55+ * already sorted, this points at the mmapped contents of the
56+ * file. If not, this points at heap-allocated memory
57+ * containing the contents, sorted. If there were no contents
58+ * (e.g., because the file didn't exist), `buf` and `eof` are
59+ * both NULL.
5960 */
6061 char * buf , * eof ;
6162
@@ -358,7 +359,7 @@ struct ref_iterator *mmapped_ref_iterator_begin(
358359 if (!packed_refs -> buf )
359360 return empty_ref_iterator_begin ();
360361
361- base_ref_iterator_init (ref_iterator , & mmapped_ref_iterator_vtable , 0 );
362+ base_ref_iterator_init (ref_iterator , & mmapped_ref_iterator_vtable , 1 );
362363
363364 iter -> packed_refs = packed_refs ;
364365 acquire_packed_ref_cache (iter -> packed_refs );
@@ -371,6 +372,170 @@ struct ref_iterator *mmapped_ref_iterator_begin(
371372 return ref_iterator ;
372373}
373374
375+ struct packed_ref_entry {
376+ const char * start ;
377+ size_t len ;
378+ };
379+
380+ static int cmp_packed_ref_entries (const void * v1 , const void * v2 )
381+ {
382+ const struct packed_ref_entry * e1 = v1 , * e2 = v2 ;
383+ const char * r1 = e1 -> start + GIT_SHA1_HEXSZ + 1 ;
384+ const char * r2 = e2 -> start + GIT_SHA1_HEXSZ + 1 ;
385+
386+ while (1 ) {
387+ if (* r1 == '\n' )
388+ return * r2 == '\n' ? 0 : -1 ;
389+ if (* r1 != * r2 ) {
390+ if (* r2 == '\n' )
391+ return 1 ;
392+ else
393+ return (unsigned char )* r1 < (unsigned char )* r2 ? -1 : +1 ;
394+ }
395+ r1 ++ ;
396+ r2 ++ ;
397+ }
398+ }
399+
400+ /*
401+ * `packed_refs->buf` is not known to be sorted. Check whether it is,
402+ * and if not, sort it into new memory and munmap/free the old
403+ * storage.
404+ */
405+ static void sort_packed_refs (struct packed_ref_cache * packed_refs )
406+ {
407+ struct packed_ref_entry * entries = NULL ;
408+ size_t alloc = 0 , nr = 0 ;
409+ int sorted = 1 ;
410+ const char * pos , * eof , * eol ;
411+ size_t len , i ;
412+ char * new_buffer , * dst ;
413+
414+ pos = packed_refs -> buf + packed_refs -> header_len ;
415+ eof = packed_refs -> eof ;
416+ len = eof - pos ;
417+
418+ if (!len )
419+ return ;
420+
421+ /*
422+ * Initialize entries based on a crude estimate of the number
423+ * of references in the file (we'll grow it below if needed):
424+ */
425+ ALLOC_GROW (entries , len / 80 + 20 , alloc );
426+
427+ while (pos < eof ) {
428+ eol = memchr (pos , '\n' , eof - pos );
429+ if (!eol )
430+ /* The safety check should prevent this. */
431+ BUG ("unterminated line found in packed-refs" );
432+ if (eol - pos < GIT_SHA1_HEXSZ + 2 )
433+ die_invalid_line (packed_refs -> refs -> path ,
434+ pos , eof - pos );
435+ eol ++ ;
436+ if (eol < eof && * eol == '^' ) {
437+ /*
438+ * Keep any peeled line together with its
439+ * reference:
440+ */
441+ const char * peeled_start = eol ;
442+
443+ eol = memchr (peeled_start , '\n' , eof - peeled_start );
444+ if (!eol )
445+ /* The safety check should prevent this. */
446+ BUG ("unterminated peeled line found in packed-refs" );
447+ eol ++ ;
448+ }
449+
450+ ALLOC_GROW (entries , nr + 1 , alloc );
451+ entries [nr ].start = pos ;
452+ entries [nr ].len = eol - pos ;
453+ nr ++ ;
454+
455+ if (sorted &&
456+ nr > 1 &&
457+ cmp_packed_ref_entries (& entries [nr - 2 ],
458+ & entries [nr - 1 ]) >= 0 )
459+ sorted = 0 ;
460+
461+ pos = eol ;
462+ }
463+
464+ if (sorted )
465+ goto cleanup ;
466+
467+ /* We need to sort the memory. First we sort the entries array: */
468+ QSORT (entries , nr , cmp_packed_ref_entries );
469+
470+ /*
471+ * Allocate a new chunk of memory, and copy the old memory to
472+ * the new in the order indicated by `entries` (not bothering
473+ * with the header line):
474+ */
475+ new_buffer = xmalloc (len );
476+ for (dst = new_buffer , i = 0 ; i < nr ; i ++ ) {
477+ memcpy (dst , entries [i ].start , entries [i ].len );
478+ dst += entries [i ].len ;
479+ }
480+
481+ /*
482+ * Now munmap the old buffer and use the sorted buffer in its
483+ * place:
484+ */
485+ release_packed_ref_buffer (packed_refs );
486+ packed_refs -> buf = new_buffer ;
487+ packed_refs -> eof = new_buffer + len ;
488+ packed_refs -> header_len = 0 ;
489+
490+ cleanup :
491+ free (entries );
492+ }
493+
494+ /*
495+ * Return a pointer to the start of the record that contains the
496+ * character `*p` (which must be within the buffer). If no other
497+ * record start is found, return `buf`.
498+ */
499+ static const char * find_start_of_record (const char * buf , const char * p )
500+ {
501+ while (p > buf && (p [-1 ] != '\n' || p [0 ] == '^' ))
502+ p -- ;
503+ return p ;
504+ }
505+
506+ /*
507+ * We want to be able to compare mmapped reference records quickly,
508+ * without totally parsing them. We can do so because the records are
509+ * LF-terminated, and the refname should start exactly (GIT_SHA1_HEXSZ
510+ * + 1) bytes past the beginning of the record.
511+ *
512+ * But what if the `packed-refs` file contains garbage? We're willing
513+ * to tolerate not detecting the problem, as long as we don't produce
514+ * totally garbled output (we can't afford to check the integrity of
515+ * the whole file during every Git invocation). But we do want to be
516+ * sure that we never read past the end of the buffer in memory and
517+ * perform an illegal memory access.
518+ *
519+ * Guarantee that minimum level of safety by verifying that the last
520+ * record in the file is LF-terminated, and that it has at least
521+ * (GIT_SHA1_HEXSZ + 1) characters before the LF. Die if either of
522+ * these checks fails.
523+ */
524+ static void verify_buffer_safe (struct packed_ref_cache * packed_refs )
525+ {
526+ const char * buf = packed_refs -> buf + packed_refs -> header_len ;
527+ const char * eof = packed_refs -> eof ;
528+ const char * last_line ;
529+
530+ if (buf == eof )
531+ return ;
532+
533+ last_line = find_start_of_record (buf , eof - 1 );
534+ if (* (eof - 1 ) != '\n' || eof - last_line < GIT_SHA1_HEXSZ + 2 )
535+ die_invalid_line (packed_refs -> refs -> path ,
536+ last_line , eof - last_line );
537+ }
538+
374539/*
375540 * Depending on `mmap_strategy`, either mmap or read the contents of
376541 * the `packed-refs` file into the `packed_refs` instance. Return 1 if
@@ -408,14 +573,14 @@ static int load_contents(struct packed_ref_cache *packed_refs)
408573
409574 switch (mmap_strategy ) {
410575 case MMAP_NONE :
411- case MMAP_TEMPORARY :
412576 packed_refs -> buf = xmalloc (size );
413577 bytes_read = read_in_full (fd , packed_refs -> buf , size );
414578 if (bytes_read < 0 || bytes_read != size )
415579 die_errno ("couldn't read %s" , packed_refs -> refs -> path );
416580 packed_refs -> eof = packed_refs -> buf + size ;
417581 packed_refs -> mmapped = 0 ;
418582 break ;
583+ case MMAP_TEMPORARY :
419584 case MMAP_OK :
420585 packed_refs -> buf = xmmap (NULL , size , PROT_READ , MAP_PRIVATE , fd , 0 );
421586 packed_refs -> eof = packed_refs -> buf + size ;
@@ -435,32 +600,37 @@ static int load_contents(struct packed_ref_cache *packed_refs)
435600 * A comment line of the form "# pack-refs with: " may contain zero or
436601 * more traits. We interpret the traits as follows:
437602 *
438- * No traits :
603+ * Neither `peeled` nor `fully-peeled` :
439604 *
440605 * Probably no references are peeled. But if the file contains a
441606 * peeled value for a reference, we will use it.
442607 *
443- * peeled:
608+ * ` peeled` :
444609 *
445610 * References under "refs/tags/", if they *can* be peeled, *are*
446611 * peeled in this file. References outside of "refs/tags/" are
447612 * probably not peeled even if they could have been, but if we find
448613 * a peeled value for such a reference we will use it.
449614 *
450- * fully-peeled:
615+ * ` fully-peeled` :
451616 *
452617 * All references in the file that can be peeled are peeled.
453618 * Inversely (and this is more important), any references in the
454619 * file for which no peeled value is recorded is not peelable. This
455620 * trait should typically be written alongside "peeled" for
456621 * compatibility with older clients, but we do not require it
457622 * (i.e., "peeled" is a no-op if "fully-peeled" is set).
623+ *
624+ * `sorted`:
625+ *
626+ * The references in this file are known to be sorted by refname.
458627 */
459628static struct packed_ref_cache * read_packed_refs (struct packed_ref_store * refs )
460629{
461630 struct packed_ref_cache * packed_refs = xcalloc (1 , sizeof (* packed_refs ));
462631 struct ref_dir * dir ;
463632 struct ref_iterator * iter ;
633+ int sorted = 0 ;
464634 int ok ;
465635
466636 packed_refs -> refs = refs ;
@@ -499,6 +669,9 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs)
499669 packed_refs -> peeled = PEELED_FULLY ;
500670 else if (unsorted_string_list_has_string (& traits , "peeled" ))
501671 packed_refs -> peeled = PEELED_TAGS ;
672+
673+ sorted = unsorted_string_list_has_string (& traits , "sorted" );
674+
502675 /* perhaps other traits later as well */
503676
504677 /* The "+ 1" is for the LF character. */
@@ -508,6 +681,34 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs)
508681 strbuf_release (& tmp );
509682 }
510683
684+ verify_buffer_safe (packed_refs );
685+
686+ if (!sorted ) {
687+ sort_packed_refs (packed_refs );
688+
689+ /*
690+ * Reordering the records might have moved a short one
691+ * to the end of the buffer, so verify the buffer's
692+ * safety again:
693+ */
694+ verify_buffer_safe (packed_refs );
695+ }
696+
697+ if (mmap_strategy != MMAP_OK && packed_refs -> mmapped ) {
698+ /*
699+ * We don't want to leave the file mmapped, so we are
700+ * forced to make a copy now:
701+ */
702+ size_t size = packed_refs -> eof -
703+ (packed_refs -> buf + packed_refs -> header_len );
704+ char * buf_copy = xmalloc (size );
705+
706+ memcpy (buf_copy , packed_refs -> buf + packed_refs -> header_len , size );
707+ release_packed_ref_buffer (packed_refs );
708+ packed_refs -> buf = buf_copy ;
709+ packed_refs -> eof = buf_copy + size ;
710+ }
711+
511712 dir = get_ref_dir (packed_refs -> cache -> root );
512713 iter = mmapped_ref_iterator_begin (
513714 packed_refs ,
@@ -811,7 +1012,7 @@ int packed_refs_is_locked(struct ref_store *ref_store)
8111012 * the colon and the trailing space are required.
8121013 */
8131014static const char PACKED_REFS_HEADER [] =
814- "# pack-refs with: peeled fully-peeled \n" ;
1015+ "# pack-refs with: peeled fully-peeled sorted \n" ;
8151016
8161017static int packed_init_db (struct ref_store * ref_store , struct strbuf * err )
8171018{
0 commit comments