|
| 1 | +Backported patch upstream to apply to CBL-Mariner. |
| 2 | +Reference: https://gitlab.isc.org/isc-projects/bind9/-/commit/da0eafcdee52147e72d407cc3b9f179378ee1d3a |
| 3 | + |
| 4 | +From da0eafcdee52147e72d407cc3b9f179378ee1d3a Mon Sep 17 00:00:00 2001 |
| 5 | +From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= <ondrej@isc.org> |
| 6 | +Date: Tue, 30 May 2023 08:46:17 +0200 |
| 7 | +Subject: [PATCH] Improve RBT overmem cache cleaning |
| 8 | + |
| 9 | +When cache memory usage is over the configured cache size (overmem) and |
| 10 | +we are cleaning unused entries, it might not be enough to clean just two |
| 11 | +entries if the entries to be expired are smaller than the newly added |
| 12 | +rdata. This could be abused by an attacker to cause a remote Denial of |
| 13 | +Service by possibly running out of the operating system memory. |
| 14 | + |
| 15 | +Currently, the addrdataset() tries to do a single TTL-based cleaning |
| 16 | +considering the serve-stale TTL and then optionally moves to overmem |
| 17 | +cleaning if we are in that condition. Then the overmem_purge() tries to |
| 18 | +do another single TTL based cleaning from the TTL heap and then continue |
| 19 | +with LRU-based cleaning up to 2 entries cleaned. |
| 20 | + |
| 21 | +Squash the TTL-cleaning mechanism into single call from addrdataset(), |
| 22 | +but ignore the serve-stale TTL if we are currently overmem. |
| 23 | + |
| 24 | +Then instead of having a fixed number of entries to clean, pass the size |
| 25 | +of newly added rdatasetheader to the overmem_purge() function and |
| 26 | +cleanup at least the size of the newly added data. This prevents the |
| 27 | +cache going over the configured memory limit (`max-cache-size`). |
| 28 | + |
| 29 | +Additionally, refactor the overmem_purge() function to reduce for-loop |
| 30 | +nesting for readability. |
| 31 | +--- |
| 32 | + bind_ln/lib/dns/rbtdb.c | 102 ++++++++++++++++++------------ |
| 33 | + 1 file changed, 60 insertions(+), 42 deletions(-) |
| 34 | + |
| 35 | +diff --git a/bind_ln/lib/dns/rbtdb.c b/bind_ln/lib/dns/rbtdb.c |
| 36 | +index 3ee1876..68b45d8 100644 |
| 37 | +--- a/bind_ln/lib/dns/rbtdb.c |
| 38 | ++++ b/bind_ln/lib/dns/rbtdb.c |
| 39 | +@@ -815,7 +815,7 @@ static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, |
| 40 | + static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, |
| 41 | + bool tree_locked, expire_t reason); |
| 42 | + static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, |
| 43 | +- isc_stdtime_t now, bool tree_locked); |
| 44 | ++ size_t purgesize, bool tree_locked); |
| 45 | + static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx, |
| 46 | + rdatasetheader_t *newheader); |
| 47 | + static void resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, |
| 48 | +@@ -6817,6 +6817,16 @@ addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader, |
| 49 | + |
| 50 | + static dns_dbmethods_t zone_methods; |
| 51 | + |
| 52 | ++static size_t |
| 53 | ++rdataset_size(rdatasetheader_t *header) { |
| 54 | ++ if (!NONEXISTENT(header)) { |
| 55 | ++ return (dns_rdataslab_size((unsigned char *)header, |
| 56 | ++ sizeof(*header))); |
| 57 | ++ } |
| 58 | ++ |
| 59 | ++ return (sizeof(*header)); |
| 60 | ++} |
| 61 | ++ |
| 62 | + static isc_result_t |
| 63 | + addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, |
| 64 | + isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options, |
| 65 | +@@ -6971,7 +6981,7 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, |
| 66 | + } |
| 67 | + |
| 68 | + if (cache_is_overmem) |
| 69 | +- overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked); |
| 70 | ++ overmem_purge(rbtdb, rbtnode->locknum, rdataset_size(newheader), tree_locked); |
| 71 | + |
| 72 | + NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock, |
| 73 | + isc_rwlocktype_write); |
| 74 | +@@ -6986,10 +6996,14 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, |
| 75 | + cleanup_dead_nodes(rbtdb, rbtnode->locknum); |
| 76 | + |
| 77 | + header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1); |
| 78 | +- if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) |
| 79 | +- expire_header(rbtdb, header, tree_locked, |
| 80 | +- expire_ttl); |
| 81 | ++ if (header != NULL) { |
| 82 | ++ dns_ttl_t rdh_ttl = header->rdh_ttl; |
| 83 | + |
| 84 | ++ if (rdh_ttl < now - RBTDB_VIRTUAL) { |
| 85 | ++ expire_header(rbtdb, header, tree_locked, |
| 86 | ++ expire_ttl); |
| 87 | ++ } |
| 88 | ++ } |
| 89 | + /* |
| 90 | + * If we've been holding a write lock on the tree just for |
| 91 | + * cleaning, we can release it now. However, we still need the |
| 92 | +@@ -10494,54 +10508,58 @@ update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, |
| 93 | + ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link); |
| 94 | + } |
| 95 | + |
| 96 | +-/*% |
| 97 | +- * Purge some expired and/or stale (i.e. unused for some period) cache entries |
| 98 | +- * under an overmem condition. To recover from this condition quickly, up to |
| 99 | +- * 2 entries will be purged. This process is triggered while adding a new |
| 100 | +- * entry, and we specifically avoid purging entries in the same LRU bucket as |
| 101 | +- * the one to which the new entry will belong. Otherwise, we might purge |
| 102 | +- * entries of the same name of different RR types while adding RRsets from a |
| 103 | +- * single response (consider the case where we're adding A and AAAA glue records |
| 104 | +- * of the same NS name). |
| 105 | ++static size_t |
| 106 | ++expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, size_t purgesize, |
| 107 | ++ bool tree_locked) { |
| 108 | ++ rdatasetheader_t *header, *header_prev; |
| 109 | ++ size_t purged = 0; |
| 110 | ++ |
| 111 | ++ for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]); |
| 112 | ++ header != NULL && purged <= purgesize; header = header_prev) |
| 113 | ++ { |
| 114 | ++ header_prev = ISC_LIST_PREV(header, link); |
| 115 | ++ /* |
| 116 | ++ * Unlink the entry at this point to avoid checking it |
| 117 | ++ * again even if it's currently used someone else and |
| 118 | ++ * cannot be purged at this moment. This entry won't be |
| 119 | ++ * referenced any more (so unlinking is safe) since the |
| 120 | ++ * TTL was reset to 0. |
| 121 | ++ */ |
| 122 | ++ ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, link); |
| 123 | ++ size_t header_size = rdataset_size(header); |
| 124 | ++ expire_header(rbtdb, header, tree_locked, expire_lru); |
| 125 | ++ purged += header_size; |
| 126 | ++ } |
| 127 | ++ |
| 128 | ++ return (purged); |
| 129 | ++} |
| 130 | ++ |
| 131 | ++ /*% |
| 132 | ++ * Purge some stale (i.e. unused for some period - LRU based cleaning) cache |
| 133 | ++ * entries under the overmem condition. To recover from this condition quickly, |
| 134 | ++ * we cleanup entries up to the size of newly added rdata (passed as purgesize). |
| 135 | ++ * |
| 136 | ++ * This process is triggered while adding a new entry, and we specifically avoid |
| 137 | ++ * purging entries in the same LRU bucket as the one to which the new entry will |
| 138 | ++ * belong. Otherwise, we might purge entries of the same name of different RR |
| 139 | ++ * types while adding RRsets from a single response (consider the case where |
| 140 | ++ * we're adding A and AAAA glue records of the same NS name). |
| 141 | + */ |
| 142 | + static void |
| 143 | + overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, |
| 144 | +- isc_stdtime_t now, bool tree_locked) |
| 145 | ++ size_t purgesize, bool tree_locked) |
| 146 | + { |
| 147 | +- rdatasetheader_t *header, *header_prev; |
| 148 | + unsigned int locknum; |
| 149 | +- int purgecount = 2; |
| 150 | ++ size_t purged = 0; |
| 151 | + |
| 152 | + for (locknum = (locknum_start + 1) % rbtdb->node_lock_count; |
| 153 | +- locknum != locknum_start && purgecount > 0; |
| 154 | ++ locknum != locknum_start && purged <= purgesize; |
| 155 | + locknum = (locknum + 1) % rbtdb->node_lock_count) { |
| 156 | + NODE_LOCK(&rbtdb->node_locks[locknum].lock, |
| 157 | + isc_rwlocktype_write); |
| 158 | + |
| 159 | +- header = isc_heap_element(rbtdb->heaps[locknum], 1); |
| 160 | +- if (header && header->rdh_ttl < now - RBTDB_VIRTUAL) { |
| 161 | +- expire_header(rbtdb, header, tree_locked, |
| 162 | +- expire_ttl); |
| 163 | +- purgecount--; |
| 164 | +- } |
| 165 | +- |
| 166 | +- for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]); |
| 167 | +- header != NULL && purgecount > 0; |
| 168 | +- header = header_prev) { |
| 169 | +- header_prev = ISC_LIST_PREV(header, link); |
| 170 | +- /* |
| 171 | +- * Unlink the entry at this point to avoid checking it |
| 172 | +- * again even if it's currently used someone else and |
| 173 | +- * cannot be purged at this moment. This entry won't be |
| 174 | +- * referenced any more (so unlinking is safe) since the |
| 175 | +- * TTL was reset to 0. |
| 176 | +- */ |
| 177 | +- ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, |
| 178 | +- link); |
| 179 | +- expire_header(rbtdb, header, tree_locked, |
| 180 | +- expire_lru); |
| 181 | +- purgecount--; |
| 182 | +- } |
| 183 | ++ purged += expire_lru_headers(rbtdb, locknum, purgesize - purged, |
| 184 | ++ tree_locked); |
| 185 | + |
| 186 | + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, |
| 187 | + isc_rwlocktype_write); |
| 188 | +-- |
| 189 | +2.25.1 |
| 190 | + |
0 commit comments