Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

f2fs: enhance alloc_nid and build_free_nids flows

In order to avoid build_free_nid lock contention, let's change the order of
function calls as follows.

At first, check whether there is enough free nids.
- If available, just get a free nid with spin_lock without any overhead.
- Otherwise, conduct build_free_nids.
: scan nat pages, journal nat entries, and nat cache entries.

We should consider carefullly not to serve free nids intermediately made by
build_free_nids.
We can get stable free nids only after build_free_nids is done.

Reviewed-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>

+37 -47
+1 -1
fs/f2fs/f2fs.h
··· 190 190 struct f2fs_nm_info { 191 191 block_t nat_blkaddr; /* base disk address of NAT */ 192 192 nid_t max_nid; /* maximum possible node ids */ 193 - nid_t init_scan_nid; /* the first nid to be scanned */ 194 193 nid_t next_scan_nid; /* the next nid to be scanned */ 195 194 196 195 /* NAT cache management */ ··· 359 360 struct mutex writepages; /* mutex for writepages() */ 360 361 unsigned char next_lock_num; /* round-robin global locks */ 361 362 int por_doing; /* recovery is doing or not */ 363 + int on_build_free_nids; /* build_free_nids is doing */ 362 364 363 365 /* for orphan inode management */ 364 366 struct list_head orphan_inode_list; /* orphan inode list */
+36 -46
fs/f2fs/node.c
··· 1309 1309 struct f2fs_nm_info *nm_i = NM_I(sbi); 1310 1310 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1311 1311 struct f2fs_summary_block *sum = curseg->sum_blk; 1312 - nid_t nid = 0; 1313 - bool is_cycled = false; 1314 - int fcnt = 0; 1315 - int i; 1312 + int fcnt = 0, i = 0; 1313 + nid_t nid = nm_i->next_scan_nid; 1316 1314 1317 - nid = nm_i->next_scan_nid; 1318 - nm_i->init_scan_nid = nid; 1315 + /* Enough entries */ 1316 + if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) 1317 + return; 1319 1318 1319 + /* readahead nat pages to be scanned */ 1320 1320 ra_nat_pages(sbi, nid); 1321 1321 1322 1322 while (1) { ··· 1326 1326 f2fs_put_page(page, 1); 1327 1327 1328 1328 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); 1329 - 1330 - if (nid >= nm_i->max_nid) { 1329 + if (nid >= nm_i->max_nid) 1331 1330 nid = 0; 1332 - is_cycled = true; 1333 - } 1334 - if (fcnt > MAX_FREE_NIDS) 1335 - break; 1336 - if (is_cycled && nm_i->init_scan_nid <= nid) 1331 + 1332 + if (i++ == FREE_NID_PAGES) 1337 1333 break; 1338 1334 } 1339 1335 1340 - /* go to the next nat page in order to reuse free nids first */ 1341 - nm_i->next_scan_nid = nm_i->init_scan_nid + NAT_ENTRY_PER_BLOCK; 1336 + /* go to the next free nat pages to find free nids abundantly */ 1337 + nm_i->next_scan_nid = nid; 1342 1338 1343 1339 /* find free nids from current sum_pages */ 1344 1340 mutex_lock(&curseg->curseg_mutex); ··· 1371 1375 struct free_nid *i = NULL; 1372 1376 struct list_head *this; 1373 1377 retry: 1374 - mutex_lock(&nm_i->build_lock); 1375 - if (!nm_i->fcnt) { 1376 - /* scan NAT in order to build free nid list */ 1377 - build_free_nids(sbi); 1378 - if (!nm_i->fcnt) { 1379 - mutex_unlock(&nm_i->build_lock); 1380 - return false; 1381 - } 1382 - } 1383 - mutex_unlock(&nm_i->build_lock); 1378 + if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) 1379 + return false; 1384 1380 1385 - /* 1386 - * We check fcnt again since previous check is racy as 1387 - * we didn't hold free_nid_list_lock. So other thread 1388 - * could consume all of free nids. 1389 - */ 1390 1381 spin_lock(&nm_i->free_nid_list_lock); 1391 - if (!nm_i->fcnt) { 1382 + 1383 + /* We should not use stale free nids created by build_free_nids */ 1384 + if (nm_i->fcnt && !sbi->on_build_free_nids) { 1385 + BUG_ON(list_empty(&nm_i->free_nid_list)); 1386 + list_for_each(this, &nm_i->free_nid_list) { 1387 + i = list_entry(this, struct free_nid, list); 1388 + if (i->state == NID_NEW) 1389 + break; 1390 + } 1391 + 1392 + BUG_ON(i->state != NID_NEW); 1393 + *nid = i->nid; 1394 + i->state = NID_ALLOC; 1395 + nm_i->fcnt--; 1392 1396 spin_unlock(&nm_i->free_nid_list_lock); 1393 - goto retry; 1397 + return true; 1394 1398 } 1395 - 1396 - BUG_ON(list_empty(&nm_i->free_nid_list)); 1397 - list_for_each(this, &nm_i->free_nid_list) { 1398 - i = list_entry(this, struct free_nid, list); 1399 - if (i->state == NID_NEW) 1400 - break; 1401 - } 1402 - 1403 - BUG_ON(i->state != NID_NEW); 1404 - *nid = i->nid; 1405 - i->state = NID_ALLOC; 1406 - nm_i->fcnt--; 1407 1399 spin_unlock(&nm_i->free_nid_list_lock); 1408 - return true; 1400 + 1401 + /* Let's scan nat pages and its caches to get free nids */ 1402 + mutex_lock(&nm_i->build_lock); 1403 + sbi->on_build_free_nids = 1; 1404 + build_free_nids(sbi); 1405 + sbi->on_build_free_nids = 0; 1406 + mutex_unlock(&nm_i->build_lock); 1407 + goto retry; 1409 1408 } 1410 1409 1411 1410 /* ··· 1687 1696 spin_lock_init(&nm_i->free_nid_list_lock); 1688 1697 rwlock_init(&nm_i->nat_tree_lock); 1689 1698 1690 - nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 1691 1699 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); 1692 1700 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); 1693 1701 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);