@@ -1539,6 +1539,8 @@ static int pack_offset_sort(const void *_a, const void *_b)
15391539 * 2. Updating our size/type to the non-delta representation. These were
15401540 * either not recorded initially (size) or overwritten with the delta type
15411541 * (type) when check_object() decided to reuse the delta.
1542+ *
1543+ * 3. Resetting our delta depth, as we are now a base object.
15421544 */
15431545static void drop_reused_delta (struct object_entry * entry )
15441546{
@@ -1552,6 +1554,7 @@ static void drop_reused_delta(struct object_entry *entry)
15521554 p = & (* p )-> delta_sibling ;
15531555 }
15541556 entry -> delta = NULL ;
1557+ entry -> depth = 0 ;
15551558
15561559 oi .sizep = & entry -> size ;
15571560 oi .typep = & entry -> type ;
@@ -1570,39 +1573,123 @@ static void drop_reused_delta(struct object_entry *entry)
15701573 * Follow the chain of deltas from this entry onward, throwing away any links
15711574 * that cause us to hit a cycle (as determined by the DFS state flags in
15721575 * the entries).
1576+ *
1577+ * We also detect too-long reused chains that would violate our --depth
1578+ * limit.
15731579 */
15741580static void break_delta_chains (struct object_entry * entry )
15751581{
1576- /* If it's not a delta, it can't be part of a cycle. */
1577- if (!entry -> delta ) {
1578- entry -> dfs_state = DFS_DONE ;
1579- return ;
1580- }
1582+ /*
1583+ * The actual depth of each object we will write is stored as an int,
1584+ * as it cannot exceed our int "depth" limit. But before we break
1585+ * changes based no that limit, we may potentially go as deep as the
1586+ * number of objects, which is elsewhere bounded to a uint32_t.
1587+ */
1588+ uint32_t total_depth ;
1589+ struct object_entry * cur , * next ;
1590+
1591+ for (cur = entry , total_depth = 0 ;
1592+ cur ;
1593+ cur = cur -> delta , total_depth ++ ) {
1594+ if (cur -> dfs_state == DFS_DONE ) {
1595+ /*
1596+ * We've already seen this object and know it isn't
1597+ * part of a cycle. We do need to append its depth
1598+ * to our count.
1599+ */
1600+ total_depth += cur -> depth ;
1601+ break ;
1602+ }
15811603
1582- switch (entry -> dfs_state ) {
1583- case DFS_NONE :
15841604 /*
1585- * This is the first time we've seen the object. We mark it as
1586- * part of the active potential cycle and recurse.
1605+ * We break cycles before looping, so an ACTIVE state (or any
1606+ * other cruft which made its way into the state variable)
1607+ * is a bug.
15871608 */
1588- entry -> dfs_state = DFS_ACTIVE ;
1589- break_delta_chains (entry -> delta );
1590- entry -> dfs_state = DFS_DONE ;
1591- break ;
1609+ if (cur -> dfs_state != DFS_NONE )
1610+ die ("BUG: confusing delta dfs state in first pass: %d" ,
1611+ cur -> dfs_state );
15921612
1593- case DFS_DONE :
1594- /* object already examined, and not part of a cycle */
1595- break ;
1613+ /*
1614+ * Now we know this is the first time we've seen the object. If
1615+ * it's not a delta, we're done traversing, but we'll mark it
1616+ * done to save time on future traversals.
1617+ */
1618+ if (!cur -> delta ) {
1619+ cur -> dfs_state = DFS_DONE ;
1620+ break ;
1621+ }
15961622
1597- case DFS_ACTIVE :
15981623 /*
1599- * We found a cycle that needs broken. It would be correct to
1600- * break any link in the chain, but it's convenient to
1601- * break this one.
1624+ * Mark ourselves as active and see if the next step causes
1625+ * us to cycle to another active object. It's important to do
1626+ * this _before_ we loop, because it impacts where we make the
1627+ * cut, and thus how our total_depth counter works.
1628+ * E.g., We may see a partial loop like:
1629+ *
1630+ * A -> B -> C -> D -> B
1631+ *
1632+ * Cutting B->C breaks the cycle. But now the depth of A is
1633+ * only 1, and our total_depth counter is at 3. The size of the
1634+ * error is always one less than the size of the cycle we
1635+ * broke. Commits C and D were "lost" from A's chain.
1636+ *
1637+ * If we instead cut D->B, then the depth of A is correct at 3.
1638+ * We keep all commits in the chain that we examined.
16021639 */
1603- drop_reused_delta (entry );
1604- entry -> dfs_state = DFS_DONE ;
1605- break ;
1640+ cur -> dfs_state = DFS_ACTIVE ;
1641+ if (cur -> delta -> dfs_state == DFS_ACTIVE ) {
1642+ drop_reused_delta (cur );
1643+ cur -> dfs_state = DFS_DONE ;
1644+ break ;
1645+ }
1646+ }
1647+
1648+ /*
1649+ * And now that we've gone all the way to the bottom of the chain, we
1650+ * need to clear the active flags and set the depth fields as
1651+ * appropriate. Unlike the loop above, which can quit when it drops a
1652+ * delta, we need to keep going to look for more depth cuts. So we need
1653+ * an extra "next" pointer to keep going after we reset cur->delta.
1654+ */
1655+ for (cur = entry ; cur ; cur = next ) {
1656+ next = cur -> delta ;
1657+
1658+ /*
1659+ * We should have a chain of zero or more ACTIVE states down to
1660+ * a final DONE. We can quit after the DONE, because either it
1661+ * has no bases, or we've already handled them in a previous
1662+ * call.
1663+ */
1664+ if (cur -> dfs_state == DFS_DONE )
1665+ break ;
1666+ else if (cur -> dfs_state != DFS_ACTIVE )
1667+ die ("BUG: confusing delta dfs state in second pass: %d" ,
1668+ cur -> dfs_state );
1669+
1670+ /*
1671+ * If the total_depth is more than depth, then we need to snip
1672+ * the chain into two or more smaller chains that don't exceed
1673+ * the maximum depth. Most of the resulting chains will contain
1674+ * (depth + 1) entries (i.e., depth deltas plus one base), and
1675+ * the last chain (i.e., the one containing entry) will contain
1676+ * whatever entries are left over, namely
1677+ * (total_depth % (depth + 1)) of them.
1678+ *
1679+ * Since we are iterating towards decreasing depth, we need to
1680+ * decrement total_depth as we go, and we need to write to the
1681+ * entry what its final depth will be after all of the
1682+ * snipping. Since we're snipping into chains of length (depth
1683+ * + 1) entries, the final depth of an entry will be its
1684+ * original depth modulo (depth + 1). Any time we encounter an
1685+ * entry whose final depth is supposed to be zero, we snip it
1686+ * from its delta base, thereby making it so.
1687+ */
1688+ cur -> depth = (total_depth -- ) % (depth + 1 );
1689+ if (!cur -> depth )
1690+ drop_reused_delta (cur );
1691+
1692+ cur -> dfs_state = DFS_DONE ;
16061693 }
16071694}
16081695
0 commit comments