/*
 *  linux/fs/ext4/ialloc.c
 * April 1997
 * by Mihai Budiu (budiu@cs.cornell.edu)
 * 
 *  from 
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  BSD ufs-inspired inode and directory allocation by 
 *  Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
 */

/*
 * ialloc.c contains the inodes allocation and deallocation routines
 */

/*
 * The free inodes are managed by bitmaps.  A file system contains several
 * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
 * block for inodes, N blocks for the inode table and data blocks.
 *
 * The file system contains group descriptors which are located after the
 * super block.  Each descriptor contains the number of the bitmap block and
 * the free blocks count in the block.  The descriptors are loaded in memory
 * when a file system is mounted (see ext4_read_super).
 */

#include <linux/fs.h>
#include <linux/ext4_fs.h>
#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/locks.h>

#include <asm/bitops.h>

static struct ext4_group_desc * get_group_desc (struct super_block * sb,
						unsigned int block_group,
						struct buffer_head ** bh,
						int partition)
{
	unsigned long group_desc;
	unsigned long desc;
	struct ext4_group_desc * gdp;

	if (block_group >= sb->u.ext4_sb[partition].s_groups_count)
		ext4_panic (sb, "get_group_desc",
			    "block_group >= groups_count - "
			    "block_group = %d, groups_count = %lu",
			    block_group, sb->u.ext4_sb[partition].s_groups_count);

	group_desc = block_group / EXT4_DESC_PER_BLOCK(sb, partition);
	desc = block_group % EXT4_DESC_PER_BLOCK(sb, partition);
	if (!sb->u.ext4_sb[partition].s_group_desc[group_desc])
		ext4_panic (sb, "get_group_desc",
			    "Group descriptor not loaded - "
			    "block_group = %d, group_desc = %lu, desc = %lu",
			     block_group, group_desc, desc);
	gdp = (struct ext4_group_desc *) 
		sb->u.ext4_sb[partition].s_group_desc[group_desc]->b_data;
	if (bh)
		*bh = sb->u.ext4_sb[partition].s_group_desc[group_desc];
	return gdp + desc;
}

static void read_inode_bitmap (struct super_block * sb,
			       unsigned long block_group,
			       unsigned int bitmap_nr,
			       int partition)
{
	struct ext4_group_desc * gdp;
	struct buffer_head * bh;

	gdp = get_group_desc (sb, block_group, NULL, partition);
	bh = bread (sb->u.ext4_sb[partition].realdevice, 
		    gdp->bg_inode_bitmap, 
		    sb->s_blocksize);
	if (!bh)
		ext4_panic (sb, "read_inode_bitmap",
			    "Cannot read inode bitmap - "
			    "block_group = %lu, inode_bitmap = %lu",
			    block_group, (unsigned long) gdp->bg_inode_bitmap);
	sb->u.ext4_sb[partition].s_inode_bitmap_number[bitmap_nr] 
		= block_group;
	sb->u.ext4_sb[partition].s_inode_bitmap[bitmap_nr] = bh;
}

/*
 * load_inode_bitmap loads the inode bitmap for a blocks group
 *
 * It maintains a cache for the last bitmaps loaded.  This cache is managed
 * with a LRU algorithm.
 *
 * Notes:
 * 1/ There is one cache per mounted file system.
 * 2/ If the file system contains less than EXT4_MAX_GROUP_LOADED groups,
 *    this function reads the bitmap without maintaining a LRU cache.
 */
static int load_inode_bitmap (struct super_block * sb,
			      unsigned int block_group,
			      int partition)
{
	int i, j;
	unsigned long inode_bitmap_number;
	struct buffer_head * inode_bitmap;

	if (block_group >= sb->u.ext4_sb[partition].s_groups_count)
		ext4_panic (sb, "load_inode_bitmap",
			    "block_group >= groups_count - "
			    "block_group = %d, groups_count = %lu",
			     block_group, 
			    sb->u.ext4_sb[partition].s_groups_count);
	if (sb->u.ext4_sb[partition].s_loaded_inode_bitmaps > 0 &&
	    sb->u.ext4_sb[partition].s_inode_bitmap_number[0] == block_group)
		return 0;
	if (sb->u.ext4_sb[partition].s_groups_count 
	    <= EXT4_MAX_GROUP_LOADED) {
		if (sb->u.ext4_sb[partition].s_inode_bitmap[block_group]) {
			if (sb->u.ext4_sb[partition].s_inode_bitmap_number[block_group] 
			    != block_group)
				ext4_panic (sb, "load_inode_bitmap",
					    "block_group != inode_bitmap_number");
			else
				return block_group;
		} else {
			read_inode_bitmap (sb, block_group, 
					   block_group, partition);
			return block_group;
		}
	}

	for (i = 0; i < sb->u.ext4_sb[partition].s_loaded_inode_bitmaps &&
		    sb->u.ext4_sb[partition].s_inode_bitmap_number[i] 
		     != block_group;
	     i++)
		;
	if (i < sb->u.ext4_sb[partition].s_loaded_inode_bitmaps &&
  	    sb->u.ext4_sb[partition].s_inode_bitmap_number[i] == block_group) {
		inode_bitmap_number = 
			sb->u.ext4_sb[partition].s_inode_bitmap_number[i];
		inode_bitmap = 
			sb->u.ext4_sb[partition].s_inode_bitmap[i];
		for (j = i; j > 0; j--) {
			sb->u.ext4_sb[partition].s_inode_bitmap_number[j] =
				sb->u.ext4_sb[partition].s_inode_bitmap_number[j - 1];
			sb->u.ext4_sb[partition].s_inode_bitmap[j] =
				sb->u.ext4_sb[partition].s_inode_bitmap[j - 1];
		}
		sb->u.ext4_sb[partition].s_inode_bitmap_number[0] = inode_bitmap_number;
		sb->u.ext4_sb[partition].s_inode_bitmap[0] = inode_bitmap;
	} else {
		if (sb->u.ext4_sb[partition].s_loaded_inode_bitmaps < EXT4_MAX_GROUP_LOADED)
			sb->u.ext4_sb[partition].s_loaded_inode_bitmaps++;
		else
			brelse (sb->u.ext4_sb[partition].s_inode_bitmap[EXT4_MAX_GROUP_LOADED - 1]);
		for (j = sb->u.ext4_sb[partition].s_loaded_inode_bitmaps - 1; 
		     j > 0; 
		     j--) {
			sb->u.ext4_sb[partition].s_inode_bitmap_number[j] =
				sb->u.ext4_sb[partition].s_inode_bitmap_number[j - 1];
			sb->u.ext4_sb[partition].s_inode_bitmap[j] =
				sb->u.ext4_sb[partition].s_inode_bitmap[j - 1];
		}
		read_inode_bitmap (sb, block_group, 0, partition);
	}
	return 0;
}

void ext4_free_inode (struct inode * inode)
{
	struct super_block * sb;
	struct buffer_head * bh;
	struct buffer_head * bh2;
	unsigned long block_group;
	unsigned long bit;
	int bitmap_nr;
	unsigned long ino;
	int partition;
	struct ext4_group_desc * gdp;
	struct ext4_super_block * es;

	if (!inode)
		return;
	if (!inode->i_dev) {
		printk ("ext4_free_inode: inode has no device\n");
		return;
	}
	if (inode->i_count > 1) {
		printk ("ext4_free_inode: inode has count=%d\n",
			inode->i_count);
		return;
	}
	if (inode->i_nlink) {
		printk ("ext4_free_inode: inode has nlink=%d\n",
			inode->i_nlink);
		return;
	}
	if (!inode->i_sb) {
		printk("ext4_free_inode: inode on nonexistent device\n");
		return;
	}

	ext4_debug ("freeing inode %lu\n", inode->i_ino);

	sb = inode->i_sb;
	lock_super (sb);
	ino = EXT4_REAL_INODE_NO(inode->i_ino, inode->i_sb);
	partition = EXT4_INO_PARTITION(inode->i_ino, inode->i_sb);
	if (ino < EXT4_FIRST_INO(sb) ||
	    ino > sb->u.ext4_sb[partition].s_es->s_inodes_count) {
		ext4_error (sb, "free_inode",
			    "reserved inode or nonexistent inode");
		unlock_super (sb);
		return;
	}
	es = sb->u.ext4_sb[partition].s_es;
	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb, partition);
	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb, partition);
	bitmap_nr = load_inode_bitmap (sb, block_group, partition);
	bh = sb->u.ext4_sb[partition].s_inode_bitmap[bitmap_nr];
	if (!clear_bit (bit, bh->b_data))
		ext4_warning (sb, "ext4_free_inode",
			      "bit already cleared for inode %lu", inode->i_ino);
	else {
		gdp = get_group_desc (sb, block_group, &bh2, partition);
		gdp->bg_free_inodes_count++;
		if (S_ISDIR(inode->i_mode))
			gdp->bg_used_dirs_count--;
		mark_buffer_dirty(bh2, 1);
		es->s_free_inodes_count++;
		mark_buffer_dirty(sb->u.ext4_sb[partition].s_sbh, 1);
		inode->i_dirt = 0;
	}
	mark_buffer_dirty(bh, 1);
	if (sb->s_flags & MS_SYNCHRONOUS) {
		ll_rw_block (WRITE, 1, &bh);
		wait_on_buffer (bh);
	}
	if (sb->dq_op)
		sb->dq_op->free_inode (inode, 1);
	sb->s_dirt = 1;
	clear_inode (inode);
	unlock_super (sb);
}

/*
 * This function increments the inode version number
 *
 * This may be used one day by the NFS server
 */
static void inc_inode_version (struct inode * inode,
			       struct ext4_group_desc *gdp,
			       int mode)
{
	inode->u.ext4_i.i_version++;
	inode->i_dirt = 1;

	return;
}

/*
 * There are two policies for allocating an inode.  If the new inode is
 * a directory, then a forward search is made for a block group with both
 * free space and a low directory-to-inode ratio; if that fails, then of
 * the groups with above-average free space, that group with the fewest
 * directories already is chosen.
 *
 * For other inodes, search forward from the parent directory\'s block
 * group to find a free inode.
 */
struct inode * ext4_new_inode (const struct inode * dir, int mode, int * err)
{
	struct super_block * sb;
	struct buffer_head * bh;
	struct buffer_head * bh2;
	int i, j, avefreei;
	struct inode * inode;
	int bitmap_nr;
	struct ext4_group_desc * gdp;
	struct ext4_group_desc * tmp;
	struct ext4_super_block * es;
	int partition;

	if (!dir || !(inode = get_empty_inode ()))
		return NULL;
	sb = dir->i_sb;
	inode->i_sb = sb;
	inode->i_flags = sb->s_flags;
	lock_super (sb);
	if (S_ISDIR(mode)) partition = EXT4_DIR_PARTITION;
	else partition = EXT4_FILE_PARTITION;
	ext4_debug("Allocating inode on partition %d\n", partition);
	es = sb->u.ext4_sb[partition].s_es;
repeat:
	gdp = NULL; i=0;
	
	*err = -ENOSPC;
	if (S_ISDIR(mode)) {
		avefreei = es->s_free_inodes_count /
			sb->u.ext4_sb[partition].s_groups_count;
		if (!gdp) {
			for (j = 0; 
			     j < sb->u.ext4_sb[partition].s_groups_count; 
			     j++) {
				tmp = get_group_desc (sb, j, &bh2, partition);
				if (tmp->bg_free_inodes_count &&
				    tmp->bg_free_inodes_count >= avefreei) {
					if (!gdp || 
					    (tmp->bg_free_blocks_count >
					     gdp->bg_free_blocks_count)) {
						i = j;
						gdp = tmp;
					}
				}
			}
		}
	}
	else {  /* regular file */
		/* REFINE THIS */
		/* use as a hint the block group of the parent directory;
		   this is on the other partition, but... */
		i = dir->u.ext4_i.i_block_group;
		i %= sb->u.ext4_sb[partition].s_groups_count;
		tmp = get_group_desc (sb, i, &bh2, partition);
		if (tmp->bg_free_inodes_count)
			gdp = tmp;
		else
		{
			/*
			 * Use a quadratic hash to find a group with a
			 * free inode
			 */
			for (j = 1; 
			     j < sb->u.ext4_sb[partition].s_groups_count; 
			     j <<= 1) {
				i += j;
				if (i>=sb->u.ext4_sb[partition].s_groups_count)
					i -= sb->u.ext4_sb[partition].
						s_groups_count;
				tmp = get_group_desc (sb, i, &bh2, partition);
				if (tmp->bg_free_inodes_count) {
					gdp = tmp;
					break;
				}
			}
		}
		if (!gdp) {
			/*
			 * That failed: try linear search for a free inode
			 */
			i = dir->u.ext4_i.i_block_group + 1;
			i %= sb->u.ext4_sb[partition].s_groups_count;
			for (j = 2; 
			     j < sb->u.ext4_sb[partition].s_groups_count; 
			     j++) {
				if (++i >= sb->u.ext4_sb[partition].s_groups_count)
					i = 0;
				tmp = get_group_desc (sb, i, &bh2, partition);
				if (tmp->bg_free_inodes_count) {
					gdp = tmp;
					break;
				}
			}
		}
	}

	if (!gdp) {
		ext4_debug("Failed to get inode\n");
		unlock_super (sb);
		iput(inode);
		return NULL;
	}
	ext4_debug("Chosen block group %d\n", i);
	bitmap_nr = load_inode_bitmap (sb, i, partition);
	bh = sb->u.ext4_sb[partition].s_inode_bitmap[bitmap_nr];
	if ((j = find_first_zero_bit ((unsigned long *) bh->b_data,
				      EXT4_INODES_PER_GROUP(sb, partition))) <
	    EXT4_INODES_PER_GROUP(sb, partition)) {
		ext4_debug("find_first_0_bit->bit %d, grp %d\n", j, i);
		if (set_bit (j, bh->b_data)) {
			ext4_warning (sb, "ext4_new_inode",
				      "bit already set for inode %d", j);
			goto repeat;
		}
		mark_buffer_dirty(bh, 1);
		if (sb->s_flags & MS_SYNCHRONOUS) {
			ll_rw_block (WRITE, 1, &bh);
			wait_on_buffer (bh);
		}
	} else {
		if (gdp->bg_free_inodes_count != 0) {
			ext4_error (sb, "ext4_new_inode",
				    "Free inodes count corrupted in group %d",
				    i);
			unlock_super (sb);
			iput (inode);
			return NULL;
		}
		goto repeat;
	}
	j += i * EXT4_INODES_PER_GROUP(sb, partition) + 1;
	if (j < EXT4_FIRST_INO(sb) || j > es->s_inodes_count) {
		ext4_error (sb, "ext4_new_inode",
			    "reserved inode or inode > inodes count - "
			    "block_group = %d,inode=%d", i, j);
		unlock_super (sb);
		iput (inode);
		return NULL;
	}
	gdp->bg_free_inodes_count--;
	if (S_ISDIR(mode))
		gdp->bg_used_dirs_count++;
	mark_buffer_dirty(bh2, 1);
	es->s_free_inodes_count--;
	mark_buffer_dirty(sb->u.ext4_sb[partition].s_sbh, 1);
	sb->s_dirt = 1;
	inode->i_mode = mode;
	inode->i_sb = sb;
	inode->i_count = 1;
	inode->i_nlink = 1;
	inode->i_dev = sb->s_dev;
	inode->i_uid = current->fsuid;
	if (test_opt (sb, GRPID))
		inode->i_gid = dir->i_gid;
	else if (dir->i_mode & S_ISGID) {
		inode->i_gid = dir->i_gid;
		if (S_ISDIR(mode))
			mode |= S_ISGID;
	} else
		inode->i_gid = current->fsgid;
	inode->i_dirt = 1;
	inode->i_ino = j << 1;
	if (!S_ISDIR(mode)) inode->i_ino++;
	ext4_debug("Inode has number %ld\n", inode->i_ino);
	inode->i_blksize = PAGE_SIZE;	
            /* This is the optimal IO size (for stat), not the fs block size */
	inode->i_blocks = 0;
	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
	inode->u.ext4_i.i_new_inode = 1;
	inode->u.ext4_i.i_flags = dir->u.ext4_i.i_flags;
	if (S_ISLNK(mode))
		inode->u.ext4_i.i_flags &= 
			~(EXT4_IMMUTABLE_FL | EXT4_APPEND_FL);
	inode->u.ext4_i.i_faddr = 0;
	inode->u.ext4_i.i_frag_no = 0;
	inode->u.ext4_i.i_frag_size = 0;
	inode->u.ext4_i.i_file_acl = 0;
	inode->u.ext4_i.i_dir_acl = 0;
	inode->u.ext4_i.i_dtime = 0;
	inode->u.ext4_i.i_block_group = i;
	inode->i_op = NULL;
	if (inode->u.ext4_i.i_flags & EXT4_SYNC_FL)
		inode->i_flags |= MS_SYNCHRONOUS;
	insert_inode_hash(inode);
	inc_inode_version (inode, gdp, mode);

	unlock_super (sb);
	if (sb->dq_op) {
		sb->dq_op->initialize (inode, -1);
		if (sb->dq_op->alloc_inode (inode, 1)) {
			sb->dq_op->drop (inode);
			inode->i_nlink = 0;
			iput (inode);
			*err = -EDQUOT;
			return NULL;
		}
		inode->i_flags |= S_WRITE;
	}
	ext4_debug("allocating inode %lu\n", inode->i_ino);

	*err = 0;
	return inode;
}

unsigned long ext4_count_free_inodes (struct super_block * sb)
{
#ifdef EXT4FS_DEBUG
	struct ext4_super_block * es;
	unsigned long desc_count, bitmap_count, x;
	int bitmap_nr;
	struct ext4_group_desc * gdp;
	int i, j;

	lock_super (sb);
	desc_count = 0;
	bitmap_count = 0;
	gdp = NULL;
	for (j=0; j < 2; j++) {
		es = sb->u.ext4_sb[j].s_es;
		for (i = 0; i < sb->u.ext4_sb[j].s_groups_count; i++) {
			gdp = get_group_desc (sb, i, NULL, j);
			desc_count += gdp->bg_free_inodes_count;
			bitmap_nr = load_inode_bitmap (sb, i, j);
			x = ext4_count_free(sb->u.ext4_sb[j].s_inode_bitmap[bitmap_nr],
					    EXT4_INODES_PER_GROUP(sb,j) / 8);
			printk ("group %d: stored = %d, counted = %lu\n",
				i, gdp->bg_free_inodes_count, x);
			bitmap_count += x;
		}
		printk("ext4_count_free_inodes: stored = %lu, "
		       "computed = %lu, %lu\n",
		       es->s_free_inodes_count, desc_count, bitmap_count);
	}
	unlock_super (sb);
	return desc_count;
#else
	return sb->u.ext4_sb[0].s_es->s_free_inodes_count +
		sb->u.ext4_sb[1].s_es->s_free_inodes_count;
#endif
}

void ext4_check_inodes_bitmap (struct super_block * sb)
{
	struct ext4_super_block * es;
	unsigned long desc_count, bitmap_count, x;
	int bitmap_nr;
	struct ext4_group_desc * gdp;
	int i, j;

	lock_super (sb);
	gdp = NULL;
	for (j=0; j < 2; j++) {
		desc_count = 0;
		bitmap_count = 0;

		es = sb->u.ext4_sb[j].s_es;
		for (i = 0; i < sb->u.ext4_sb[j].s_groups_count; i++) {
			gdp = get_group_desc (sb, i, NULL, j);
			desc_count += gdp->bg_free_inodes_count;
			bitmap_nr = load_inode_bitmap (sb, i, j);
			x = ext4_count_free (sb->u.ext4_sb[j].s_inode_bitmap[bitmap_nr],
					     EXT4_INODES_PER_GROUP(sb, j) / 8);
			if (gdp->bg_free_inodes_count != x)
				ext4_error (sb, "ext4_check_inodes_bitmap",
					    "Wrong free inodes count in group %d, "
					    "stored = %d, counted = %lu", i,
					    gdp->bg_free_inodes_count, x);
			bitmap_count += x;
		}
		if (es->s_free_inodes_count != bitmap_count)
			ext4_error (sb, "ext4_check_inodes_bitmap",
				    "Wrong free inodes count in super block, "
				    "stored = %lu, counted = %lu",
				    (unsigned long) es->s_free_inodes_count, 
				    bitmap_count);
	}
	unlock_super (sb);
}
