|
|
Subject: [Patch 10/10]: ext3 online resize: make group-add asynchronous. - msg#00026
List: file-systems.ext2.devel
There's not much to be gained from making the resize group-add operation
fully synchronous. It's already atomic by virtue of the journal, and a
resize operation usually doesn't add just one group, but many. The real
need is to sync things up at the end of the *entire* resize, not after
each individual group-add; and by syncing each group separately, the
whole resize operation is made much, much slower if we're running on a
live filesystem.
Signed-off-by: Stephen Tweedie <sct@xxxxxxxxxx>
--- linux-2.6.9-rc2-mm4/fs/ext3/resize.c.=K0009=.orig
+++ linux-2.6.9-rc2-mm4/fs/ext3/resize.c
@@ -872,7 +872,6 @@ int ext3_group_add(struct super_block *s
exit_journal:
unlock_super(sb);
- handle->h_sync = 1;
if ((err2 = ext3_journal_stop(handle)) && !err)
err = err2;
if (!err) {
-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
Was this page helpful?
Thread at a glance:
Previous Message by Date:
click to view message preview
[Patch 9/10]: ext3 online resize: remove on-stack special resize inode
Resize is currently using a dummy inode in order to return blocks to
the free list via ext3_free_blocks().
Refactor the core free-blocks code to use ext3_free_blocks_sb(), which
takes a super_block rather than an inode. The resize code can now use
that to avoid the need for a dummy inode.
Signed-off-by: Stephen Tweedie <sct@xxxxxxxxxx>
--- linux-2.6.9-rc2-mm4/fs/ext3/balloc.c.=K0008=.orig
+++ linux-2.6.9-rc2-mm4/fs/ext3/balloc.c
@@ -274,8 +274,9 @@ void ext3_discard_reservation(struct ino
}
/* Free given blocks, update quota and i_blocks field */
-void ext3_free_blocks(handle_t *handle, struct inode *inode,
- unsigned long block, unsigned long count)
+void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
+ unsigned long block, unsigned long count,
+ int *pdquot_freed_blocks)
{
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *gd_bh;
@@ -283,18 +284,12 @@ void ext3_free_blocks(handle_t *handle,
unsigned long bit;
unsigned long i;
unsigned long overflow;
- struct super_block * sb;
struct ext3_group_desc * gdp;
struct ext3_super_block * es;
struct ext3_sb_info *sbi;
int err = 0, ret;
- int dquot_freed_blocks = 0;
- sb = inode->i_sb;
- if (!sb) {
- printk ("ext3_free_blocks: nonexistent device");
- return;
- }
+ *pdquot_freed_blocks = 0;
sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) ||
@@ -426,7 +421,7 @@ do_more:
jbd_lock_bh_state(bitmap_bh);
BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else {
- dquot_freed_blocks++;
+ (*pdquot_freed_blocks)++;
}
}
jbd_unlock_bh_state(bitmap_bh);
@@ -434,7 +429,7 @@ do_more:
spin_lock(sb_bgl_lock(sbi, block_group));
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
- dquot_freed_blocks);
+ *pdquot_freed_blocks);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_mod(&sbi->s_freeblocks_counter, count);
@@ -456,7 +451,23 @@ do_more:
error_return:
brelse(bitmap_bh);
ext3_std_error(sb, err);
- if (dquot_freed_blocks && !(EXT3_I(inode)->i_state & EXT3_STATE_RESIZE))
+ return;
+}
+
+/* Free given blocks, update quota and i_blocks field */
+void ext3_free_blocks(handle_t *handle, struct inode *inode,
+ unsigned long block, unsigned long count)
+{
+ struct super_block * sb;
+ int dquot_freed_blocks;
+
+ sb = inode->i_sb;
+ if (!sb) {
+ printk ("ext3_free_blocks: nonexistent device");
+ return;
+ }
+ ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+ if (dquot_freed_blocks)
DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
return;
}
--- linux-2.6.9-rc2-mm4/fs/ext3/resize.c.=K0008=.orig
+++ linux-2.6.9-rc2-mm4/fs/ext3/resize.c
@@ -902,10 +902,9 @@ int ext3_group_extend(struct super_block
unsigned long o_groups_count;
unsigned long last;
int add;
- struct inode *inode;
struct buffer_head * bh;
handle_t *handle;
- int err;
+ int err, freed_blocks;
/* We don't need to worry about locking wrt other resizers just
* yet: we're going to revalidate es->s_blocks_count after
@@ -955,20 +954,10 @@ int ext3_group_extend(struct super_block
}
brelse(bh);
- /* Get a bogus inode to "free" the new blocks in this group. */
- if (!(inode = new_inode(sb))) {
- ext3_warning(sb, __FUNCTION__,
- "error getting dummy resize inode");
- return -ENOMEM;
- }
- inode->i_ino = 0;
-
- EXT3_I(inode)->i_state = EXT3_STATE_RESIZE;
-
/* We will update the superblock, one block bitmap, and
* one group descriptor via ext3_free_blocks().
*/
- handle = ext3_journal_start(inode, 3);
+ handle = ext3_journal_start_sb(sb, 3);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
ext3_warning(sb, __FUNCTION__, "error %d on journal start",err);
@@ -997,7 +986,7 @@ int ext3_group_extend(struct super_block
unlock_super(sb);
ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count,
o_blocks_count + add);
- ext3_free_blocks(handle, inode, o_blocks_count, add);
+ ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
ext3_debug("freed blocks %ld through %ld\n", o_blocks_count,
o_blocks_count + add);
if ((err = ext3_journal_stop(handle)))
@@ -1008,7 +997,5 @@ int ext3_group_extend(struct super_block
update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext3_super_block));
exit_put:
- iput(inode);
-
return err;
} /* ext3_group_extend */
--- linux-2.6.9-rc2-mm4/include/linux/ext3_fs.h.=K0008=.orig
+++ linux-2.6.9-rc2-mm4/include/linux/ext3_fs.h
@@ -195,7 +195,6 @@ struct ext3_group_desc
*/
#define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
#define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
-#define EXT3_STATE_RESIZE 0x00000004 /* fake inode for resizing */
/* Used to pass group descriptor data when online resize is done */
@@ -715,6 +714,8 @@ extern unsigned long ext3_bg_num_gdb(str
extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
unsigned long);
+extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
+ unsigned long, unsigned long, int *);
extern unsigned long ext3_count_free_blocks (struct super_block *);
extern void ext3_check_blocks_bitmap (struct super_block *);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
Next Message by Date:
click to view message preview
[Patch 8/10]: ext3 online resize: remove s_debts
s_debts is currently not used by ext3 (it is created, destroyed and
checked but never set). Remove it for now.
Resurrecting this will require adding it back in changed form. In
existing form it's already unsafe wrt. byte-tearing as it performs
unlocked byte increment/decrement on words which may be being accessed
simultaneously on other CPUs. It is also the only in-memory dynamic
table which needs to be extended by online-resize, so locking it will
require care.
Signed-off-by: Stephen Tweedie <sct@xxxxxxxxxx>
--- linux-2.6.9-rc2-mm4/fs/ext3/ialloc.c.=K0007=.orig
+++ linux-2.6.9-rc2-mm4/fs/ext3/ialloc.c
@@ -320,8 +320,6 @@ static int find_group_orlov(struct super
desc = ext3_get_group_desc (sb, group, &bh);
if (!desc || !desc->bg_free_inodes_count)
continue;
- if (sbi->s_debts[group] >= max_debt)
- continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
continue;
if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
--- linux-2.6.9-rc2-mm4/fs/ext3/super.c.=K0007=.orig
+++ linux-2.6.9-rc2-mm4/fs/ext3/super.c
@@ -400,7 +400,6 @@ void ext3_put_super (struct super_block
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
- kfree(sbi->s_debts);
brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++) {
@@ -1460,13 +1459,6 @@ static int ext3_fill_super (struct super
printk (KERN_ERR "EXT3-fs: not enough memory\n");
goto failed_mount;
}
- sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(u8),
- GFP_KERNEL);
- if (!sbi->s_debts) {
- printk("EXT3-fs: not enough memory to allocate s_bgi\n");
- goto failed_mount2;
- }
- memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(u8));
percpu_counter_init(&sbi->s_freeblocks_counter);
percpu_counter_init(&sbi->s_freeinodes_counter);
@@ -1618,7 +1610,6 @@ static int ext3_fill_super (struct super
failed_mount3:
journal_destroy(sbi->s_journal);
failed_mount2:
- kfree(sbi->s_debts);
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
--- linux-2.6.9-rc2-mm4/include/linux/ext3_fs_sb.h.=K0007=.orig
+++ linux-2.6.9-rc2-mm4/include/linux/ext3_fs_sb.h
@@ -54,7 +54,6 @@ struct ext3_sb_info {
u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
- u8 *s_debts;
struct percpu_counter s_freeblocks_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
Previous Message by Thread:
click to view message preview
[Patch 9/10]: ext3 online resize: remove on-stack special resize inode
Resize is currently using a dummy inode in order to return blocks to
the free list via ext3_free_blocks().
Refactor the core free-blocks code to use ext3_free_blocks_sb(), which
takes a super_block rather than an inode. The resize code can now use
that to avoid the need for a dummy inode.
Signed-off-by: Stephen Tweedie <sct@xxxxxxxxxx>
--- linux-2.6.9-rc2-mm4/fs/ext3/balloc.c.=K0008=.orig
+++ linux-2.6.9-rc2-mm4/fs/ext3/balloc.c
@@ -274,8 +274,9 @@ void ext3_discard_reservation(struct ino
}
/* Free given blocks, update quota and i_blocks field */
-void ext3_free_blocks(handle_t *handle, struct inode *inode,
- unsigned long block, unsigned long count)
+void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
+ unsigned long block, unsigned long count,
+ int *pdquot_freed_blocks)
{
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *gd_bh;
@@ -283,18 +284,12 @@ void ext3_free_blocks(handle_t *handle,
unsigned long bit;
unsigned long i;
unsigned long overflow;
- struct super_block * sb;
struct ext3_group_desc * gdp;
struct ext3_super_block * es;
struct ext3_sb_info *sbi;
int err = 0, ret;
- int dquot_freed_blocks = 0;
- sb = inode->i_sb;
- if (!sb) {
- printk ("ext3_free_blocks: nonexistent device");
- return;
- }
+ *pdquot_freed_blocks = 0;
sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) ||
@@ -426,7 +421,7 @@ do_more:
jbd_lock_bh_state(bitmap_bh);
BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else {
- dquot_freed_blocks++;
+ (*pdquot_freed_blocks)++;
}
}
jbd_unlock_bh_state(bitmap_bh);
@@ -434,7 +429,7 @@ do_more:
spin_lock(sb_bgl_lock(sbi, block_group));
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
- dquot_freed_blocks);
+ *pdquot_freed_blocks);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_mod(&sbi->s_freeblocks_counter, count);
@@ -456,7 +451,23 @@ do_more:
error_return:
brelse(bitmap_bh);
ext3_std_error(sb, err);
- if (dquot_freed_blocks && !(EXT3_I(inode)->i_state & EXT3_STATE_RESIZE))
+ return;
+}
+
+/* Free given blocks, update quota and i_blocks field */
+void ext3_free_blocks(handle_t *handle, struct inode *inode,
+ unsigned long block, unsigned long count)
+{
+ struct super_block * sb;
+ int dquot_freed_blocks;
+
+ sb = inode->i_sb;
+ if (!sb) {
+ printk ("ext3_free_blocks: nonexistent device");
+ return;
+ }
+ ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+ if (dquot_freed_blocks)
DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
return;
}
--- linux-2.6.9-rc2-mm4/fs/ext3/resize.c.=K0008=.orig
+++ linux-2.6.9-rc2-mm4/fs/ext3/resize.c
@@ -902,10 +902,9 @@ int ext3_group_extend(struct super_block
unsigned long o_groups_count;
unsigned long last;
int add;
- struct inode *inode;
struct buffer_head * bh;
handle_t *handle;
- int err;
+ int err, freed_blocks;
/* We don't need to worry about locking wrt other resizers just
* yet: we're going to revalidate es->s_blocks_count after
@@ -955,20 +954,10 @@ int ext3_group_extend(struct super_block
}
brelse(bh);
- /* Get a bogus inode to "free" the new blocks in this group. */
- if (!(inode = new_inode(sb))) {
- ext3_warning(sb, __FUNCTION__,
- "error getting dummy resize inode");
- return -ENOMEM;
- }
- inode->i_ino = 0;
-
- EXT3_I(inode)->i_state = EXT3_STATE_RESIZE;
-
/* We will update the superblock, one block bitmap, and
* one group descriptor via ext3_free_blocks().
*/
- handle = ext3_journal_start(inode, 3);
+ handle = ext3_journal_start_sb(sb, 3);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
ext3_warning(sb, __FUNCTION__, "error %d on journal start",err);
@@ -997,7 +986,7 @@ int ext3_group_extend(struct super_block
unlock_super(sb);
ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count,
o_blocks_count + add);
- ext3_free_blocks(handle, inode, o_blocks_count, add);
+ ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
ext3_debug("freed blocks %ld through %ld\n", o_blocks_count,
o_blocks_count + add);
if ((err = ext3_journal_stop(handle)))
@@ -1008,7 +997,5 @@ int ext3_group_extend(struct super_block
update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext3_super_block));
exit_put:
- iput(inode);
-
return err;
} /* ext3_group_extend */
--- linux-2.6.9-rc2-mm4/include/linux/ext3_fs.h.=K0008=.orig
+++ linux-2.6.9-rc2-mm4/include/linux/ext3_fs.h
@@ -195,7 +195,6 @@ struct ext3_group_desc
*/
#define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
#define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
-#define EXT3_STATE_RESIZE 0x00000004 /* fake inode for resizing */
/* Used to pass group descriptor data when online resize is done */
@@ -715,6 +714,8 @@ extern unsigned long ext3_bg_num_gdb(str
extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
unsigned long);
+extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
+ unsigned long, unsigned long, int *);
extern unsigned long ext3_count_free_blocks (struct super_block *);
extern void ext3_check_blocks_bitmap (struct super_block *);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
Next Message by Thread:
click to view message preview
[Patch 8/10]: ext3 online resize: remove s_debts
s_debts is currently not used by ext3 (it is created, destroyed and
checked but never set). Remove it for now.
Resurrecting this will require adding it back in changed form. In
existing form it's already unsafe wrt. byte-tearing as it performs
unlocked byte increment/decrement on words which may be being accessed
simultaneously on other CPUs. It is also the only in-memory dynamic
table which needs to be extended by online-resize, so locking it will
require care.
Signed-off-by: Stephen Tweedie <sct@xxxxxxxxxx>
--- linux-2.6.9-rc2-mm4/fs/ext3/ialloc.c.=K0007=.orig
+++ linux-2.6.9-rc2-mm4/fs/ext3/ialloc.c
@@ -320,8 +320,6 @@ static int find_group_orlov(struct super
desc = ext3_get_group_desc (sb, group, &bh);
if (!desc || !desc->bg_free_inodes_count)
continue;
- if (sbi->s_debts[group] >= max_debt)
- continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
continue;
if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
--- linux-2.6.9-rc2-mm4/fs/ext3/super.c.=K0007=.orig
+++ linux-2.6.9-rc2-mm4/fs/ext3/super.c
@@ -400,7 +400,6 @@ void ext3_put_super (struct super_block
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
- kfree(sbi->s_debts);
brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++) {
@@ -1460,13 +1459,6 @@ static int ext3_fill_super (struct super
printk (KERN_ERR "EXT3-fs: not enough memory\n");
goto failed_mount;
}
- sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(u8),
- GFP_KERNEL);
- if (!sbi->s_debts) {
- printk("EXT3-fs: not enough memory to allocate s_bgi\n");
- goto failed_mount2;
- }
- memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(u8));
percpu_counter_init(&sbi->s_freeblocks_counter);
percpu_counter_init(&sbi->s_freeinodes_counter);
@@ -1618,7 +1610,6 @@ static int ext3_fill_super (struct super
failed_mount3:
journal_destroy(sbi->s_journal);
failed_mount2:
- kfree(sbi->s_debts);
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
--- linux-2.6.9-rc2-mm4/include/linux/ext3_fs_sb.h.=K0007=.orig
+++ linux-2.6.9-rc2-mm4/include/linux/ext3_fs_sb.h
@@ -54,7 +54,6 @@ struct ext3_sb_info {
u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
- u8 *s_debts;
struct percpu_counter s_freeblocks_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
-------------------------------------------------------
This SF.net email is sponsored by: IT Product Guide on ITManagersJournal
Use IT products in your business? Tell us what you think of them. Give us
Your Opinions, Get Free ThinkGeek Gift Certificates! Click to find out more
http://productguide.itmanagersjournal.com/guidepromo.tmpl
|
|