@@ -124,7 +124,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
124124 return (cache -> flags & bits ) == bits ;
125125}
126126
127- static void btrfs_get_block_group (struct btrfs_block_group_cache * cache )
127+ void btrfs_get_block_group (struct btrfs_block_group_cache * cache )
128128{
129129 atomic_inc (& cache -> count );
130130}
@@ -5915,19 +5915,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
59155915 set_extent_dirty (info -> pinned_extents ,
59165916 bytenr , bytenr + num_bytes - 1 ,
59175917 GFP_NOFS | __GFP_NOFAIL );
5918- /*
5919- * No longer have used bytes in this block group, queue
5920- * it for deletion.
5921- */
5922- if (old_val == 0 ) {
5923- spin_lock (& info -> unused_bgs_lock );
5924- if (list_empty (& cache -> bg_list )) {
5925- btrfs_get_block_group (cache );
5926- list_add_tail (& cache -> bg_list ,
5927- & info -> unused_bgs );
5928- }
5929- spin_unlock (& info -> unused_bgs_lock );
5930- }
59315918 }
59325919
59335920 spin_lock (& trans -> transaction -> dirty_bgs_lock );
@@ -5939,6 +5926,22 @@ static int update_block_group(struct btrfs_trans_handle *trans,
59395926 }
59405927 spin_unlock (& trans -> transaction -> dirty_bgs_lock );
59415928
5929+ /*
5930+ * No longer have used bytes in this block group, queue it for
5931+ * deletion. We do this after adding the block group to the
5932+ * dirty list to avoid races between cleaner kthread and space
5933+ * cache writeout.
5934+ */
5935+ if (!alloc && old_val == 0 ) {
5936+ spin_lock (& info -> unused_bgs_lock );
5937+ if (list_empty (& cache -> bg_list )) {
5938+ btrfs_get_block_group (cache );
5939+ list_add_tail (& cache -> bg_list ,
5940+ & info -> unused_bgs );
5941+ }
5942+ spin_unlock (& info -> unused_bgs_lock );
5943+ }
5944+
59425945 btrfs_put_block_group (cache );
59435946 total -= num_bytes ;
59445947 bytenr += num_bytes ;
@@ -8105,21 +8108,47 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
81058108}
81068109
81078110/*
8108- * TODO: Modify related function to add related node/leaf to dirty_extent_root,
8109- * for later qgroup accounting.
8110- *
8111- * Current, this function does nothing.
8111+ * These may not be seen by the usual inc/dec ref code so we have to
8112+ * add them here.
81128113 */
8114+ static int record_one_subtree_extent (struct btrfs_trans_handle * trans ,
8115+ struct btrfs_root * root , u64 bytenr ,
8116+ u64 num_bytes )
8117+ {
8118+ struct btrfs_qgroup_extent_record * qrecord ;
8119+ struct btrfs_delayed_ref_root * delayed_refs ;
8120+
8121+ qrecord = kmalloc (sizeof (* qrecord ), GFP_NOFS );
8122+ if (!qrecord )
8123+ return - ENOMEM ;
8124+
8125+ qrecord -> bytenr = bytenr ;
8126+ qrecord -> num_bytes = num_bytes ;
8127+ qrecord -> old_roots = NULL ;
8128+
8129+ delayed_refs = & trans -> transaction -> delayed_refs ;
8130+ spin_lock (& delayed_refs -> lock );
8131+ if (btrfs_qgroup_insert_dirty_extent (delayed_refs , qrecord ))
8132+ kfree (qrecord );
8133+ spin_unlock (& delayed_refs -> lock );
8134+
8135+ return 0 ;
8136+ }
8137+
81138138static int account_leaf_items (struct btrfs_trans_handle * trans ,
81148139 struct btrfs_root * root ,
81158140 struct extent_buffer * eb )
81168141{
81178142 int nr = btrfs_header_nritems (eb );
8118- int i , extent_type ;
8143+ int i , extent_type , ret ;
81198144 struct btrfs_key key ;
81208145 struct btrfs_file_extent_item * fi ;
81218146 u64 bytenr , num_bytes ;
81228147
8148+ /* We can be called directly from walk_up_proc() */
8149+ if (!root -> fs_info -> quota_enabled )
8150+ return 0 ;
8151+
81238152 for (i = 0 ; i < nr ; i ++ ) {
81248153 btrfs_item_key_to_cpu (eb , & key , i );
81258154
@@ -8138,6 +8167,10 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
81388167 continue ;
81398168
81408169 num_bytes = btrfs_file_extent_disk_num_bytes (eb , fi );
8170+
8171+ ret = record_one_subtree_extent (trans , root , bytenr , num_bytes );
8172+ if (ret )
8173+ return ret ;
81418174 }
81428175 return 0 ;
81438176}
@@ -8206,8 +8239,6 @@ static int adjust_slots_upwards(struct btrfs_root *root,
82068239
82078240/*
82088241 * root_eb is the subtree root and is locked before this function is called.
8209- * TODO: Modify this function to mark all (including complete shared node)
8210- * to dirty_extent_root to allow it get accounted in qgroup.
82118242 */
82128243static int account_shared_subtree (struct btrfs_trans_handle * trans ,
82138244 struct btrfs_root * root ,
@@ -8285,6 +8316,11 @@ static int account_shared_subtree(struct btrfs_trans_handle *trans,
82858316 btrfs_tree_read_lock (eb );
82868317 btrfs_set_lock_blocking_rw (eb , BTRFS_READ_LOCK );
82878318 path -> locks [level ] = BTRFS_READ_LOCK_BLOCKING ;
8319+
8320+ ret = record_one_subtree_extent (trans , root , child_bytenr ,
8321+ root -> nodesize );
8322+ if (ret )
8323+ goto out ;
82888324 }
82898325
82908326 if (level == 0 ) {
@@ -10256,6 +10292,47 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
1025610292 return ret ;
1025710293}
1025810294
10295+ struct btrfs_trans_handle *
10296+ btrfs_start_trans_remove_block_group (struct btrfs_fs_info * fs_info ,
10297+ const u64 chunk_offset )
10298+ {
10299+ struct extent_map_tree * em_tree = & fs_info -> mapping_tree .map_tree ;
10300+ struct extent_map * em ;
10301+ struct map_lookup * map ;
10302+ unsigned int num_items ;
10303+
10304+ read_lock (& em_tree -> lock );
10305+ em = lookup_extent_mapping (em_tree , chunk_offset , 1 );
10306+ read_unlock (& em_tree -> lock );
10307+ ASSERT (em && em -> start == chunk_offset );
10308+
10309+ /*
10310+ * We need to reserve 3 + N units from the metadata space info in order
10311+ * to remove a block group (done at btrfs_remove_chunk() and at
10312+ * btrfs_remove_block_group()), which are used for:
10313+ *
10314+ * 1 unit for adding the free space inode's orphan (located in the tree
10315+ * of tree roots).
10316+ * 1 unit for deleting the block group item (located in the extent
10317+ * tree).
10318+ * 1 unit for deleting the free space item (located in tree of tree
10319+ * roots).
10320+ * N units for deleting N device extent items corresponding to each
10321+ * stripe (located in the device tree).
10322+ *
10323+ * In order to remove a block group we also need to reserve units in the
10324+ * system space info in order to update the chunk tree (update one or
10325+ * more device items and remove one chunk item), but this is done at
10326+ * btrfs_remove_chunk() through a call to check_system_chunk().
10327+ */
10328+ map = (struct map_lookup * )em -> bdev ;
10329+ num_items = 3 + map -> num_stripes ;
10330+ free_extent_map (em );
10331+
10332+ return btrfs_start_transaction_fallback_global_rsv (fs_info -> extent_root ,
10333+ num_items , 1 );
10334+ }
10335+
1025910336/*
1026010337 * Process the unused_bgs list and remove any that don't have any allocated
1026110338 * space inside of them.
@@ -10322,8 +10399,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
1032210399 * Want to do this before we do anything else so we can recover
1032310400 * properly if we fail to join the transaction.
1032410401 */
10325- /* 1 for btrfs_orphan_reserve_metadata() */
10326- trans = btrfs_start_transaction ( root , 1 );
10402+ trans = btrfs_start_trans_remove_block_group ( fs_info ,
10403+ block_group -> key . objectid );
1032710404 if (IS_ERR (trans )) {
1032810405 btrfs_dec_block_group_ro (root , block_group );
1032910406 ret = PTR_ERR (trans );
0 commit comments