Logo Search packages:      
Sourcecode: linux version File versions  Download package

jfs_dtree.c

/*
 *   Copyright (C) International Business Machines Corp., 2000-2004
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

/*
 *    jfs_dtree.c: directory B+-tree manager
 *
 * B+-tree with variable length key directory:
 *
 * each directory page is structured as an array of 32-byte
 * directory entry slots initialized as a freelist
 * to avoid search/compaction of free space at insertion.
 * when an entry is inserted, a number of slots are allocated
 * from the freelist as required to store variable length data
 * of the entry; when the entry is deleted, slots of the entry
 * are returned to freelist.
 *
 * leaf entry stores full name as key and file serial number
 * (aka inode number) as data.
 * internal/router entry stores sufffix compressed name
 * as key and simple extent descriptor as data.
 *
 * each directory page maintains a sorted entry index table
 * which stores the start slot index of sorted entries
 * to allow binary search on the table.
 *
 * directory starts as a root/leaf page in on-disk inode
 * inline data area.
 * when it becomes full, it starts a leaf of a external extent
 * of length of 1 block. each time the first leaf becomes full,
 * it is extended rather than split (its size is doubled),
 * until its length becoms 4 KBytes, from then the extent is split
 * with new 4 Kbyte extent when it becomes full
 * to reduce external fragmentation of small directories.
 *
 * blah, blah, blah, for linear scan of directory in pieces by
 * readdir().
 *
 *
 *    case-insensitive directory file system
 *
 * names are stored in case-sensitive way in leaf entry.
 * but stored, searched and compared in case-insensitive (uppercase) order
 * (i.e., both search key and entry key are folded for search/compare):
 * (note that case-sensitive order is BROKEN in storage, e.g.,
 *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
 *
 *  entries which folds to the same key makes up a equivalent class
 *  whose members are stored as contiguous cluster (may cross page boundary)
 *  but whose order is arbitrary and acts as duplicate, e.g.,
 *  abc, Abc, aBc, abC)
 *
 * once match is found at leaf, requires scan forward/backward
 * either for, in case-insensitive search, duplicate
 * or for, in case-sensitive search, for exact match
 *
 * router entry must be created/stored in case-insensitive way
 * in internal entry:
 * (right most key of left page and left most key of right page
 * are folded, and its suffix compression is propagated as router
 * key in parent)
 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
 * should be made the router key for the split)
 *
 * case-insensitive search:
 *
 *    fold search key;
 *
 *    case-insensitive search of B-tree:
 *    for internal entry, router key is already folded;
 *    for leaf entry, fold the entry key before comparison.
 *
 *    if (leaf entry case-insensitive match found)
 *          if (next entry satisfies case-insensitive match)
 *                return EDUPLICATE;
 *          if (prev entry satisfies case-insensitive match)
 *                return EDUPLICATE;
 *          return match;
 *    else
 *          return no match;
 *
 *    serialization:
 * target directory inode lock is being held on entry/exit
 * of all main directory service routines.
 *
 *    log based recovery:
 */

#include <linux/fs.h>
#include <linux/quotaops.h>
#include "jfs_incore.h"
#include "jfs_superblock.h"
#include "jfs_filsys.h"
#include "jfs_metapage.h"
#include "jfs_dmap.h"
#include "jfs_unicode.h"
#include "jfs_debug.h"

/* dtree split parameter */
struct dtsplit {
      struct metapage *mp;
      s16 index;
      s16 nslot;
      struct component_name *key;
      ddata_t *data;
      struct pxdlist *pxdlist;
};

#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)

/* get page buffer for specified block address */
#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
{\
      BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\
      if (!(RC))\
      {\
            if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\
                ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\
            {\
                  BT_PUTPAGE(MP);\
                  jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\
                  MP = NULL;\
                  RC = -EIO;\
            }\
      }\
}

/* for consistency */
#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)

#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
      BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)

/*
 * forward references
 */
static int dtSplitUp(tid_t tid, struct inode *ip,
                 struct dtsplit * split, struct btstack * btstack);

static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
                   struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);

static int dtExtendPage(tid_t tid, struct inode *ip,
                  struct dtsplit * split, struct btstack * btstack);

static int dtSplitRoot(tid_t tid, struct inode *ip,
                   struct dtsplit * split, struct metapage ** rmpp);

static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
                  dtpage_t * fp, struct btstack * btstack);

static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);

static int dtReadFirst(struct inode *ip, struct btstack * btstack);

static int dtReadNext(struct inode *ip,
                  loff_t * offset, struct btstack * btstack);

static int dtCompare(struct component_name * key, dtpage_t * p, int si);

static int ciCompare(struct component_name * key, dtpage_t * p, int si,
                 int flag);

static void dtGetKey(dtpage_t * p, int i, struct component_name * key,
                 int flag);

static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
                        int ri, struct component_name * key, int flag);

static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
                    ddata_t * data, struct dt_lock **);

static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
                  struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
                  int do_index);

static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock);

static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock);

static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock);

#define ciToUpper(c)    UniStrupr((c)->name)

/*
 *    read_index_page()
 *
 *    Reads a page of a directory's index table.
 *    Having metadata mapped into the directory inode's address space
 *    presents a multitude of problems.  We avoid this by mapping to
 *    the absolute address space outside of the *_metapage routines
 */
static struct metapage *read_index_page(struct inode *inode, s64 blkno)
{
      int rc;
      s64 xaddr;
      int xflag;
      s32 xlen;

      rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
      if (rc || (xaddr == 0))
            return NULL;

      return read_metapage(inode, xaddr, PSIZE, 1);
}

/*
 *    get_index_page()
 *
 *    Same as get_index_page(), but get's a new page without reading
 */
static struct metapage *get_index_page(struct inode *inode, s64 blkno)
{
      int rc;
      s64 xaddr;
      int xflag;
      s32 xlen;

      rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
      if (rc || (xaddr == 0))
            return NULL;

      return get_metapage(inode, xaddr, PSIZE, 1);
}

/*
 *    find_index()
 *
 *    Returns dtree page containing directory table entry for specified
 *    index and pointer to its entry.
 *
 *    mp must be released by caller.
 */
static struct dir_table_slot *find_index(struct inode *ip, u32 index,
                               struct metapage ** mp, s64 *lblock)
{
      struct jfs_inode_info *jfs_ip = JFS_IP(ip);
      s64 blkno;
      s64 offset;
      int page_offset;
      struct dir_table_slot *slot;
      static int maxWarnings = 10;

      if (index < 2) {
            if (maxWarnings) {
                  jfs_warn("find_entry called with index = %d", index);
                  maxWarnings--;
            }
            return NULL;
      }

      if (index >= jfs_ip->next_index) {
            jfs_warn("find_entry called with index >= next_index");
            return NULL;
      }

      if (jfs_dirtable_inline(ip)) {
            /*
             * Inline directory table
             */
            *mp = NULL;
            slot = &jfs_ip->i_dirtable[index - 2];
      } else {
            offset = (index - 2) * sizeof(struct dir_table_slot);
            page_offset = offset & (PSIZE - 1);
            blkno = ((offset + 1) >> L2PSIZE) <<
                JFS_SBI(ip->i_sb)->l2nbperpage;

            if (*mp && (*lblock != blkno)) {
                  release_metapage(*mp);
                  *mp = NULL;
            }
            if (*mp == 0) {
                  *lblock = blkno;
                  *mp = read_index_page(ip, blkno);
            }
            if (*mp == 0) {
                  jfs_err("free_index: error reading directory table");
                  return NULL;
            }

            slot =
                (struct dir_table_slot *) ((char *) (*mp)->data +
                                     page_offset);
      }
      return slot;
}

static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
                        u32 index)
{
      struct tlock *tlck;
      struct linelock *llck;
      struct lv *lv;

      tlck = txLock(tid, ip, mp, tlckDATA);
      llck = (struct linelock *) tlck->lock;

      if (llck->index >= llck->maxcnt)
            llck = txLinelock(llck);
      lv = &llck->lv[llck->index];

      /*
       *    Linelock slot size is twice the size of directory table
       *    slot size.  512 entries per page.
       */
      lv->offset = ((index - 2) & 511) >> 1;
      lv->length = 1;
      llck->index++;
}

/*
 *    add_index()
 *
 *    Adds an entry to the directory index table.  This is used to provide
 *    each directory entry with a persistent index in which to resume
 *    directory traversals
 */
static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
{
      struct super_block *sb = ip->i_sb;
      struct jfs_sb_info *sbi = JFS_SBI(sb);
      struct jfs_inode_info *jfs_ip = JFS_IP(ip);
      u64 blkno;
      struct dir_table_slot *dirtab_slot;
      u32 index;
      struct linelock *llck;
      struct lv *lv;
      struct metapage *mp;
      s64 offset;
      uint page_offset;
      struct tlock *tlck;
      s64 xaddr;

      ASSERT(DO_INDEX(ip));

      if (jfs_ip->next_index < 2) {
            jfs_warn("add_index: next_index = %d.  Resetting!",
                     jfs_ip->next_index);
            jfs_ip->next_index = 2;
      }

      index = jfs_ip->next_index++;

      if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
            /*
             * i_size reflects size of index table, or 8 bytes per entry.
             */
            ip->i_size = (loff_t) (index - 1) << 3;

            /*
             * dir table fits inline within inode
             */
            dirtab_slot = &jfs_ip->i_dirtable[index-2];
            dirtab_slot->flag = DIR_INDEX_VALID;
            dirtab_slot->slot = slot;
            DTSaddress(dirtab_slot, bn);

            set_cflag(COMMIT_Dirtable, ip);

            return index;
      }
      if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
            struct dir_table_slot temp_table[12];

            /*
             * It's time to move the inline table to an external
             * page and begin to build the xtree
             */
            if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage))
                  goto clean_up;
            if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) {
                  DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
                  goto clean_up;
            }

            /*
             * Save the table, we're going to overwrite it with the
             * xtree root
             */
            memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));

            /*
             * Initialize empty x-tree
             */
            xtInitRoot(tid, ip);

            /*
             * Add the first block to the xtree
             */
            if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) {
                  /* This really shouldn't fail */
                  jfs_warn("add_index: xtInsert failed!");
                  memcpy(&jfs_ip->i_dirtable, temp_table,
                         sizeof (temp_table));
                  dbFree(ip, xaddr, sbi->nbperpage);
                  DQUOT_FREE_BLOCK(ip, sbi->nbperpage);
                  goto clean_up;
            }
            ip->i_size = PSIZE;

            if ((mp = get_index_page(ip, 0)) == 0) {
                  jfs_err("add_index: get_metapage failed!");
                  xtTruncate(tid, ip, 0, COMMIT_PWMAP);
                  memcpy(&jfs_ip->i_dirtable, temp_table,
                         sizeof (temp_table));
                  goto clean_up;
            }
            tlck = txLock(tid, ip, mp, tlckDATA);
            llck = (struct linelock *) & tlck->lock;
            ASSERT(llck->index == 0);
            lv = &llck->lv[0];

            lv->offset = 0;
            lv->length = 6;   /* tlckDATA slot size is 16 bytes */
            llck->index++;

            memcpy(mp->data, temp_table, sizeof(temp_table));

            mark_metapage_dirty(mp);
            release_metapage(mp);

            /*
             * Logging is now directed by xtree tlocks
             */
            clear_cflag(COMMIT_Dirtable, ip);
      }

      offset = (index - 2) * sizeof(struct dir_table_slot);
      page_offset = offset & (PSIZE - 1);
      blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
      if (page_offset == 0) {
            /*
             * This will be the beginning of a new page
             */
            xaddr = 0;
            if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) {
                  jfs_warn("add_index: xtInsert failed!");
                  goto clean_up;
            }
            ip->i_size += PSIZE;

            if ((mp = get_index_page(ip, blkno)))
                  memset(mp->data, 0, PSIZE);   /* Just looks better */
            else
                  xtTruncate(tid, ip, offset, COMMIT_PWMAP);
      } else
            mp = read_index_page(ip, blkno);

      if (mp == 0) {
            jfs_err("add_index: get/read_metapage failed!");
            goto clean_up;
      }

      lock_index(tid, ip, mp, index);

      dirtab_slot =
          (struct dir_table_slot *) ((char *) mp->data + page_offset);
      dirtab_slot->flag = DIR_INDEX_VALID;
      dirtab_slot->slot = slot;
      DTSaddress(dirtab_slot, bn);

      mark_metapage_dirty(mp);
      release_metapage(mp);

      return index;

      clean_up:

      jfs_ip->next_index--;

      return 0;
}

/*
 *    free_index()
 *
 *    Marks an entry to the directory index table as free.
 */
static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
{
      struct dir_table_slot *dirtab_slot;
      s64 lblock;
      struct metapage *mp = NULL;

      dirtab_slot = find_index(ip, index, &mp, &lblock);

      if (dirtab_slot == 0)
            return;

      dirtab_slot->flag = DIR_INDEX_FREE;
      dirtab_slot->slot = dirtab_slot->addr1 = 0;
      dirtab_slot->addr2 = cpu_to_le32(next);

      if (mp) {
            lock_index(tid, ip, mp, index);
            mark_metapage_dirty(mp);
            release_metapage(mp);
      } else
            set_cflag(COMMIT_Dirtable, ip);
}

/*
 *    modify_index()
 *
 *    Changes an entry in the directory index table
 */
static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
                   int slot, struct metapage ** mp, s64 *lblock)
{
      struct dir_table_slot *dirtab_slot;

      dirtab_slot = find_index(ip, index, mp, lblock);

      if (dirtab_slot == 0)
            return;

      DTSaddress(dirtab_slot, bn);
      dirtab_slot->slot = slot;

      if (*mp) {
            lock_index(tid, ip, *mp, index);
            mark_metapage_dirty(*mp);
      } else
            set_cflag(COMMIT_Dirtable, ip);
}

/*
 *    read_index()
 *
 *    reads a directory table slot
 */
static int read_index(struct inode *ip, u32 index,
                 struct dir_table_slot * dirtab_slot)
{
      s64 lblock;
      struct metapage *mp = NULL;
      struct dir_table_slot *slot;

      slot = find_index(ip, index, &mp, &lblock);
      if (slot == 0) {
            return -EIO;
      }

      memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot));

      if (mp)
            release_metapage(mp);

      return 0;
}

/*
 *    dtSearch()
 *
 * function:
 *    Search for the entry with specified key
 *
 * parameter:
 *
 * return: 0 - search result on stack, leaf page pinned;
 *       errno - I/O error
 */
int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
           struct btstack * btstack, int flag)
{
      int rc = 0;
      int cmp = 1;            /* init for empty page */
      s64 bn;
      struct metapage *mp;
      dtpage_t *p;
      s8 *stbl;
      int base, index, lim;
      struct btframe *btsp;
      pxd_t *pxd;
      int psize = 288;  /* initial in-line directory */
      ino_t inumber;
      struct component_name ciKey;
      struct super_block *sb = ip->i_sb;

      ciKey.name =
          (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
                        GFP_NOFS);
      if (ciKey.name == 0) {
            rc = -ENOMEM;
            goto dtSearch_Exit2;
      }


      /* uppercase search key for c-i directory */
      UniStrcpy(ciKey.name, key->name);
      ciKey.namlen = key->namlen;

      /* only uppercase if case-insensitive support is on */
      if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
            ciToUpper(&ciKey);
      }
      BT_CLR(btstack);  /* reset stack */

      /* init level count for max pages to split */
      btstack->nsplit = 1;

      /*
       *    search down tree from root:
       *
       * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
       * internal page, child page Pi contains entry with k, Ki <= K < Kj.
       *
       * if entry with search key K is not found
       * internal page search find the entry with largest key Ki
       * less than K which point to the child page to search;
       * leaf page search find the entry with smallest key Kj
       * greater than K so that the returned index is the position of
       * the entry to be shifted right for insertion of new entry.
       * for empty tree, search key is greater than any key of the tree.
       *
       * by convention, root bn = 0.
       */
      for (bn = 0;;) {
            /* get/pin the page to search */
            DT_GETPAGE(ip, bn, mp, psize, p, rc);
            if (rc)
                  goto dtSearch_Exit1;

            /* get sorted entry table of the page */
            stbl = DT_GETSTBL(p);

            /*
             * binary search with search key K on the current page.
             */
            for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
                  index = base + (lim >> 1);

                  if (p->header.flag & BT_LEAF) {
                        /* uppercase leaf name to compare */
                        cmp =
                            ciCompare(&ciKey, p, stbl[index],
                                    JFS_SBI(sb)->mntflag);
                  } else {
                        /* router key is in uppercase */

                        cmp = dtCompare(&ciKey, p, stbl[index]);


                  }
                  if (cmp == 0) {
                        /*
                         *    search hit
                         */
                        /* search hit - leaf page:
                         * return the entry found
                         */
                        if (p->header.flag & BT_LEAF) {
                              inumber = le32_to_cpu(
                  ((struct ldtentry *) & p->slot[stbl[index]])->inumber);

                              /*
                               * search for JFS_LOOKUP
                               */
                              if (flag == JFS_LOOKUP) {
                                    *data = inumber;
                                    rc = 0;
                                    goto out;
                              }

                              /*
                               * search for JFS_CREATE
                               */
                              if (flag == JFS_CREATE) {
                                    *data = inumber;
                                    rc = -EEXIST;
                                    goto out;
                              }

                              /*
                               * search for JFS_REMOVE or JFS_RENAME
                               */
                              if ((flag == JFS_REMOVE ||
                                   flag == JFS_RENAME) &&
                                  *data != inumber) {
                                    rc = -ESTALE;
                                    goto out;
                              }

                              /*
                               * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
                               */
                              /* save search result */
                              *data = inumber;
                              btsp = btstack->top;
                              btsp->bn = bn;
                              btsp->index = index;
                              btsp->mp = mp;

                              rc = 0;
                              goto dtSearch_Exit1;
                        }

                        /* search hit - internal page:
                         * descend/search its child page
                         */
                        goto getChild;
                  }

                  if (cmp > 0) {
                        base = index + 1;
                        --lim;
                  }
            }

            /*
             *    search miss
             *
             * base is the smallest index with key (Kj) greater than
             * search key (K) and may be zero or (maxindex + 1) index.
             */
            /*
             * search miss - leaf page
             *
             * return location of entry (base) where new entry with
             * search key K is to be inserted.
             */
            if (p->header.flag & BT_LEAF) {
                  /*
                   * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
                   */
                  if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
                      flag == JFS_RENAME) {
                        rc = -ENOENT;
                        goto out;
                  }

                  /*
                   * search for JFS_CREATE|JFS_FINDDIR:
                   *
                   * save search result
                   */
                  *data = 0;
                  btsp = btstack->top;
                  btsp->bn = bn;
                  btsp->index = base;
                  btsp->mp = mp;

                  rc = 0;
                  goto dtSearch_Exit1;
            }

            /*
             * search miss - internal page
             *
             * if base is non-zero, decrement base by one to get the parent
             * entry of the child page to search.
             */
            index = base ? base - 1 : base;

            /*
             * go down to child page
             */
            getChild:
            /* update max. number of pages to split */
            if (BT_STACK_FULL(btstack)) {
                  /* Something's corrupted, mark filesystem dirty so
                   * chkdsk will fix it.
                   */
                  jfs_error(sb, "stack overrun in dtSearch!");
                  BT_STACK_DUMP(btstack);
                  rc = -EIO;
                  goto out;
            }
            btstack->nsplit++;

            /* push (bn, index) of the parent page/entry */
            BT_PUSH(btstack, bn, index);

            /* get the child page block number */
            pxd = (pxd_t *) & p->slot[stbl[index]];
            bn = addressPXD(pxd);
            psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;

            /* unpin the parent page */
            DT_PUTPAGE(mp);
      }

      out:
      DT_PUTPAGE(mp);

      dtSearch_Exit1:

      kfree(ciKey.name);

      dtSearch_Exit2:

      return rc;
}


/*
 *    dtInsert()
 *
 * function: insert an entry to directory tree
 *
 * parameter:
 *
 * return: 0 - success;
 *       errno - failure;
 */
int dtInsert(tid_t tid, struct inode *ip,
       struct component_name * name, ino_t * fsn, struct btstack * btstack)
{
      int rc = 0;
      struct metapage *mp;    /* meta-page buffer */
      dtpage_t *p;            /* base B+-tree index page */
      s64 bn;
      int index;
      struct dtsplit split;   /* split information */
      ddata_t data;
      struct dt_lock *dtlck;
      int n;
      struct tlock *tlck;
      struct lv *lv;

      /*
       *    retrieve search result
       *
       * dtSearch() returns (leaf page pinned, index at which to insert).
       * n.b. dtSearch() may return index of (maxindex + 1) of
       * the full page.
       */
      DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);

      /*
       *    insert entry for new key
       */
      if (DO_INDEX(ip)) {
            if (JFS_IP(ip)->next_index == DIREND) {
                  DT_PUTPAGE(mp);
                  return -EMLINK;
            }
            n = NDTLEAF(name->namlen);
            data.leaf.tid = tid;
            data.leaf.ip = ip;
      } else {
            n = NDTLEAF_LEGACY(name->namlen);
            data.leaf.ip = NULL;    /* signifies legacy directory format */
      }
      data.leaf.ino = *fsn;

      /*
       *    leaf page does not have enough room for new entry:
       *
       *    extend/split the leaf page;
       *
       * dtSplitUp() will insert the entry and unpin the leaf page.
       */
      if (n > p->header.freecnt) {
            split.mp = mp;
            split.index = index;
            split.nslot = n;
            split.key = name;
            split.data = &data;
            rc = dtSplitUp(tid, ip, &split, btstack);
            return rc;
      }

      /*
       *    leaf page does have enough room for new entry:
       *
       *    insert the new data entry into the leaf page;
       */
      BT_MARK_DIRTY(mp, ip);
      /*
       * acquire a transaction lock on the leaf page
       */
      tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
      dtlck = (struct dt_lock *) & tlck->lock;
      ASSERT(dtlck->index == 0);
      lv = & dtlck->lv[0];

      /* linelock header */
      lv->offset = 0;
      lv->length = 1;
      dtlck->index++;

      dtInsertEntry(p, index, name, &data, &dtlck);

      /* linelock stbl of non-root leaf page */
      if (!(p->header.flag & BT_ROOT)) {
            if (dtlck->index >= dtlck->maxcnt)
                  dtlck = (struct dt_lock *) txLinelock(dtlck);
            lv = & dtlck->lv[dtlck->index];
            n = index >> L2DTSLOTSIZE;
            lv->offset = p->header.stblindex + n;
            lv->length =
                ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
            dtlck->index++;
      }

      /* unpin the leaf page */
      DT_PUTPAGE(mp);

      return 0;
}


/*
 *    dtSplitUp()
 *
 * function: propagate insertion bottom up;
 *
 * parameter:
 *
 * return: 0 - success;
 *       errno - failure;
 *    leaf page unpinned;
 */
static int dtSplitUp(tid_t tid,
        struct inode *ip, struct dtsplit * split, struct btstack * btstack)
{
      struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
      int rc = 0;
      struct metapage *smp;
      dtpage_t *sp;           /* split page */
      struct metapage *rmp;
      dtpage_t *rp;           /* new right page split from sp */
      pxd_t rpxd;       /* new right page extent descriptor */
      struct metapage *lmp;
      dtpage_t *lp;           /* left child page */
      int skip;         /* index of entry of insertion */
      struct btframe *parent; /* parent page entry on traverse stack */
      s64 xaddr, nxaddr;
      int xlen, xsize;
      struct pxdlist pxdlist;
      pxd_t *pxd;
      struct component_name key = { 0, NULL };
      ddata_t *data = split->data;
      int n;
      struct dt_lock *dtlck;
      struct tlock *tlck;
      struct lv *lv;
      int quota_allocation = 0;

      /* get split page */
      smp = split->mp;
      sp = DT_PAGE(ip, smp);

      key.name =
          (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t),
                        GFP_NOFS);
      if (key.name == 0) {
            DT_PUTPAGE(smp);
            rc = -ENOMEM;
            goto dtSplitUp_Exit;
      }

      /*
       *    split leaf page
       *
       * The split routines insert the new entry, and
       * acquire txLock as appropriate.
       */
      /*
       *    split root leaf page:
       */
      if (sp->header.flag & BT_ROOT) {
            /*
             * allocate a single extent child page
             */
            xlen = 1;
            n = sbi->bsize >> L2DTSLOTSIZE;
            n -= (n + 31) >> L2DTSLOTSIZE;      /* stbl size */
            n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
            if (n <= split->nslot)
                  xlen++;
            if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) {
                  DT_PUTPAGE(smp);
                  goto freeKeyName;
            }

            pxdlist.maxnpxd = 1;
            pxdlist.npxd = 0;
            pxd = &pxdlist.pxd[0];
            PXDaddress(pxd, xaddr);
            PXDlength(pxd, xlen);
            split->pxdlist = &pxdlist;
            rc = dtSplitRoot(tid, ip, split, &rmp);

            if (rc)
                  dbFree(ip, xaddr, xlen);
            else
                  DT_PUTPAGE(rmp);

            DT_PUTPAGE(smp);

            if (!DO_INDEX(ip))
                  ip->i_size = xlen << sbi->l2bsize;

            goto freeKeyName;
      }

      /*
       *    extend first leaf page
       *
       * extend the 1st extent if less than buffer page size
       * (dtExtendPage() reurns leaf page unpinned)
       */
      pxd = &sp->header.self;
      xlen = lengthPXD(pxd);
      xsize = xlen << sbi->l2bsize;
      if (xsize < PSIZE) {
            xaddr = addressPXD(pxd);
            n = xsize >> L2DTSLOTSIZE;
            n -= (n + 31) >> L2DTSLOTSIZE;      /* stbl size */
            if ((n + sp->header.freecnt) <= split->nslot)
                  n = xlen + (xlen << 1);
            else
                  n = xlen;

            /* Allocate blocks to quota. */
            if (DQUOT_ALLOC_BLOCK(ip, n)) {
                  rc = -EDQUOT;
                  goto extendOut;
            }
            quota_allocation += n;

            if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
                            (s64) n, &nxaddr)))
                  goto extendOut;

            pxdlist.maxnpxd = 1;
            pxdlist.npxd = 0;
            pxd = &pxdlist.pxd[0];
            PXDaddress(pxd, nxaddr)
                PXDlength(pxd, xlen + n);
            split->pxdlist = &pxdlist;
            if ((rc = dtExtendPage(tid, ip, split, btstack))) {
                  nxaddr = addressPXD(pxd);
                  if (xaddr != nxaddr) {
                        /* free relocated extent */
                        xlen = lengthPXD(pxd);
                        dbFree(ip, nxaddr, (s64) xlen);
                  } else {
                        /* free extended delta */
                        xlen = lengthPXD(pxd) - n;
                        xaddr = addressPXD(pxd) + xlen;
                        dbFree(ip, xaddr, (s64) n);
                  }
            } else if (!DO_INDEX(ip))
                  ip->i_size = lengthPXD(pxd) << sbi->l2bsize;


            extendOut:
            DT_PUTPAGE(smp);
            goto freeKeyName;
      }

      /*
       *    split leaf page <sp> into <sp> and a new right page <rp>.
       *
       * return <rp> pinned and its extent descriptor <rpxd>
       */
      /*
       * allocate new directory page extent and
       * new index page(s) to cover page split(s)
       *
       * allocation hint: ?
       */
      n = btstack->nsplit;
      pxdlist.maxnpxd = pxdlist.npxd = 0;
      xlen = sbi->nbperpage;
      for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
            if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
                  PXDaddress(pxd, xaddr);
                  PXDlength(pxd, xlen);
                  pxdlist.maxnpxd++;
                  continue;
            }

            DT_PUTPAGE(smp);

            /* undo allocation */
            goto splitOut;
      }

      split->pxdlist = &pxdlist;
      if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
            DT_PUTPAGE(smp);

            /* undo allocation */
            goto splitOut;
      }

      if (!DO_INDEX(ip))
            ip->i_size += PSIZE;

      /*
       * propagate up the router entry for the leaf page just split
       *
       * insert a router entry for the new page into the parent page,
       * propagate the insert/split up the tree by walking back the stack
       * of (bn of parent page, index of child page entry in parent page)
       * that were traversed during the search for the page that split.
       *
       * the propagation of insert/split up the tree stops if the root
       * splits or the page inserted into doesn't have to split to hold
       * the new entry.
       *
       * the parent entry for the split page remains the same, and
       * a new entry is inserted at its right with the first key and
       * block number of the new right page.
       *
       * There are a maximum of 4 pages pinned at any time:
       * two children, left parent and right parent (when the parent splits).
       * keep the child pages pinned while working on the parent.
       * make sure that all pins are released at exit.
       */
      while ((parent = BT_POP(btstack)) != NULL) {
            /* parent page specified by stack frame <parent> */

            /* keep current child pages (<lp>, <rp>) pinned */
            lmp = smp;
            lp = sp;

            /*
             * insert router entry in parent for new right child page <rp>
             */
            /* get the parent page <sp> */
            DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
            if (rc) {
                  DT_PUTPAGE(lmp);
                  DT_PUTPAGE(rmp);
                  goto splitOut;
            }

            /*
             * The new key entry goes ONE AFTER the index of parent entry,
             * because the split was to the right.
             */
            skip = parent->index + 1;

            /*
             * compute the key for the router entry
             *
             * key suffix compression:
             * for internal pages that have leaf pages as children,
             * retain only what's needed to distinguish between
             * the new entry and the entry on the page to its left.
             * If the keys compare equal, retain the entire key.
             *
             * note that compression is performed only at computing
             * router key at the lowest internal level.
             * further compression of the key between pairs of higher
             * level internal pages loses too much information and
             * the search may fail.
             * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
             * results in two adjacent parent entries (a)(xx).
             * if split occurs between these two entries, and
             * if compression is applied, the router key of parent entry
             * of right page (x) will divert search for x into right
             * subtree and miss x in the left subtree.)
             *
             * the entire key must be retained for the next-to-leftmost
             * internal key at any level of the tree, or search may fail
             * (e.g., ?)
             */
            switch (rp->header.flag & BT_TYPE) {
            case BT_LEAF:
                  /*
                   * compute the length of prefix for suffix compression
                   * between last entry of left page and first entry
                   * of right page
                   */
                  if ((sp->header.flag & BT_ROOT && skip > 1) ||
                      sp->header.prev != 0 || skip > 1) {
                        /* compute uppercase router prefix key */
                        rc = ciGetLeafPrefixKey(lp,
                                          lp->header.nextindex-1,
                                          rp, 0, &key,
                                          sbi->mntflag);
                        if (rc) {
                              DT_PUTPAGE(lmp);
                              DT_PUTPAGE(rmp);
                              DT_PUTPAGE(smp);
                              goto splitOut;
                        }
                  } else {
                        /* next to leftmost entry of
                           lowest internal level */

                        /* compute uppercase router key */
                        dtGetKey(rp, 0, &key, sbi->mntflag);
                        key.name[key.namlen] = 0;

                        if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
                              ciToUpper(&key);
                  }

                  n = NDTINTERNAL(key.namlen);
                  break;

            case BT_INTERNAL:
                  dtGetKey(rp, 0, &key, sbi->mntflag);
                  n = NDTINTERNAL(key.namlen);
                  break;

            default:
                  jfs_err("dtSplitUp(): UFO!");
                  break;
            }

            /* unpin left child page */
            DT_PUTPAGE(lmp);

            /*
             * compute the data for the router entry
             */
            data->xd = rpxd;  /* child page xd */

            /*
             * parent page is full - split the parent page
             */
            if (n > sp->header.freecnt) {
                  /* init for parent page split */
                  split->mp = smp;
                  split->index = skip;    /* index at insert */
                  split->nslot = n;
                  split->key = &key;
                  /* split->data = data; */

                  /* unpin right child page */
                  DT_PUTPAGE(rmp);

                  /* The split routines insert the new entry,
                   * acquire txLock as appropriate.
                   * return <rp> pinned and its block number <rbn>.
                   */
                  rc = (sp->header.flag & BT_ROOT) ?
                      dtSplitRoot(tid, ip, split, &rmp) :
                      dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
                  if (rc) {
                        DT_PUTPAGE(smp);
                        goto splitOut;
                  }

                  /* smp and rmp are pinned */
            }
            /*
             * parent page is not full - insert router entry in parent page
             */
            else {
                  BT_MARK_DIRTY(smp, ip);
                  /*
                   * acquire a transaction lock on the parent page
                   */
                  tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
                  dtlck = (struct dt_lock *) & tlck->lock;
                  ASSERT(dtlck->index == 0);
                  lv = & dtlck->lv[0];

                  /* linelock header */
                  lv->offset = 0;
                  lv->length = 1;
                  dtlck->index++;

                  /* linelock stbl of non-root parent page */
                  if (!(sp->header.flag & BT_ROOT)) {
                        lv++;
                        n = skip >> L2DTSLOTSIZE;
                        lv->offset = sp->header.stblindex + n;
                        lv->length =
                            ((sp->header.nextindex -
                              1) >> L2DTSLOTSIZE) - n + 1;
                        dtlck->index++;
                  }

                  dtInsertEntry(sp, skip, &key, data, &dtlck);

                  /* exit propagate up */
                  break;
            }
      }

      /* unpin current split and its right page */
      DT_PUTPAGE(smp);
      DT_PUTPAGE(rmp);

      /*
       * free remaining extents allocated for split
       */
      splitOut:
      n = pxdlist.npxd;
      pxd = &pxdlist.pxd[n];
      for (; n < pxdlist.maxnpxd; n++, pxd++)
            dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));

      freeKeyName:
      kfree(key.name);

      /* Rollback quota allocation */
      if (rc && quota_allocation)
            DQUOT_FREE_BLOCK(ip, quota_allocation);

      dtSplitUp_Exit:

      return rc;
}


/*
 *    dtSplitPage()
 *
 * function: Split a non-root page of a btree.
 *
 * parameter:
 *
 * return: 0 - success;
 *       errno - failure;
 *    return split and new page pinned;
 */
static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
          struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
{
      int rc = 0;
      struct metapage *smp;
      dtpage_t *sp;
      struct metapage *rmp;
      dtpage_t *rp;           /* new right page allocated */
      s64 rbn;          /* new right page block number */
      struct metapage *mp;
      dtpage_t *p;
      s64 nextbn;
      struct pxdlist *pxdlist;
      pxd_t *pxd;
      int skip, nextindex, half, left, nxt, off, si;
      struct ldtentry *ldtentry;
      struct idtentry *idtentry;
      u8 *stbl;
      struct dtslot *f;
      int fsi, stblsize;
      int n;
      struct dt_lock *sdtlck, *rdtlck;
      struct tlock *tlck;
      struct dt_lock *dtlck;
      struct lv *slv, *rlv, *lv;

      /* get split page */
      smp = split->mp;
      sp = DT_PAGE(ip, smp);

      /*
       * allocate the new right page for the split
       */
      pxdlist = split->pxdlist;
      pxd = &pxdlist->pxd[pxdlist->npxd];
      pxdlist->npxd++;
      rbn = addressPXD(pxd);
      rmp = get_metapage(ip, rbn, PSIZE, 1);
      if (rmp == NULL)
            return -EIO;

      /* Allocate blocks to quota. */
      if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
            release_metapage(rmp);
            return -EDQUOT;
      }

      jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);

      BT_MARK_DIRTY(rmp, ip);
      /*
       * acquire a transaction lock on the new right page
       */
      tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
      rdtlck = (struct dt_lock *) & tlck->lock;

      rp = (dtpage_t *) rmp->data;
      *rpp = rp;
      rp->header.self = *pxd;

      BT_MARK_DIRTY(smp, ip);
      /*
       * acquire a transaction lock on the split page
       *
       * action:
       */
      tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
      sdtlck = (struct dt_lock *) & tlck->lock;

      /* linelock header of split page */
      ASSERT(sdtlck->index == 0);
      slv = & sdtlck->lv[0];
      slv->offset = 0;
      slv->length = 1;
      sdtlck->index++;

      /*
       * initialize/update sibling pointers between sp and rp
       */
      nextbn = le64_to_cpu(sp->header.next);
      rp->header.next = cpu_to_le64(nextbn);
      rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
      sp->header.next = cpu_to_le64(rbn);

      /*
       * initialize new right page
       */
      rp->header.flag = sp->header.flag;

      /* compute sorted entry table at start of extent data area */
      rp->header.nextindex = 0;
      rp->header.stblindex = 1;

      n = PSIZE >> L2DTSLOTSIZE;
      rp->header.maxslot = n;
      stblsize = (n + 31) >> L2DTSLOTSIZE;      /* in unit of slot */

      /* init freelist */
      fsi = rp->header.stblindex + stblsize;
      rp->header.freelist = fsi;
      rp->header.freecnt = rp->header.maxslot - fsi;

      /*
       *    sequential append at tail: append without split
       *
       * If splitting the last page on a level because of appending
       * a entry to it (skip is maxentry), it's likely that the access is
       * sequential. Adding an empty page on the side of the level is less
       * work and can push the fill factor much higher than normal.
       * If we're wrong it's no big deal, we'll just do the split the right
       * way next time.
       * (It may look like it's equally easy to do a similar hack for
       * reverse sorted data, that is, split the tree left,
       * but it's not. Be my guest.)
       */
      if (nextbn == 0 && split->index == sp->header.nextindex) {
            /* linelock header + stbl (first slot) of new page */
            rlv = & rdtlck->lv[rdtlck->index];
            rlv->offset = 0;
            rlv->length = 2;
            rdtlck->index++;

            /*
             * initialize freelist of new right page
             */
            f = &rp->slot[fsi];
            for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
                  f->next = fsi;
            f->next = -1;

            /* insert entry at the first entry of the new right page */
            dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);

            goto out;
      }

      /*
       *    non-sequential insert (at possibly middle page)
       */

      /*
       * update prev pointer of previous right sibling page;
       */
      if (nextbn != 0) {
            DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
            if (rc) {
                  discard_metapage(rmp);
                  return rc;
            }

            BT_MARK_DIRTY(mp, ip);
            /*
             * acquire a transaction lock on the next page
             */
            tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
            jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p",
                  tlck, ip, mp);
            dtlck = (struct dt_lock *) & tlck->lock;

            /* linelock header of previous right sibling page */
            lv = & dtlck->lv[dtlck->index];
            lv->offset = 0;
            lv->length = 1;
            dtlck->index++;

            p->header.prev = cpu_to_le64(rbn);

            DT_PUTPAGE(mp);
      }

      /*
       * split the data between the split and right pages.
       */
      skip = split->index;
      half = (PSIZE >> L2DTSLOTSIZE) >> 1;      /* swag */
      left = 0;

      /*
       *    compute fill factor for split pages
       *
       * <nxt> traces the next entry to move to rp
       * <off> traces the next entry to stay in sp
       */
      stbl = (u8 *) & sp->slot[sp->header.stblindex];
      nextindex = sp->header.nextindex;
      for (nxt = off = 0; nxt < nextindex; ++off) {
            if (off == skip)
                  /* check for fill factor with new entry size */
                  n = split->nslot;
            else {
                  si = stbl[nxt];
                  switch (sp->header.flag & BT_TYPE) {
                  case BT_LEAF:
                        ldtentry = (struct ldtentry *) & sp->slot[si];
                        if (DO_INDEX(ip))
                              n = NDTLEAF(ldtentry->namlen);
                        else
                              n = NDTLEAF_LEGACY(ldtentry->
                                             namlen);
                        break;

                  case BT_INTERNAL:
                        idtentry = (struct idtentry *) & sp->slot[si];
                        n = NDTINTERNAL(idtentry->namlen);
                        break;

                  default:
                        break;
                  }

                  ++nxt;      /* advance to next entry to move in sp */
            }

            left += n;
            if (left >= half)
                  break;
      }

      /* <nxt> poins to the 1st entry to move */

      /*
       *    move entries to right page
       *
       * dtMoveEntry() initializes rp and reserves entry for insertion
       *
       * split page moved out entries are linelocked;
       * new/right page moved in entries are linelocked;
       */
      /* linelock header + stbl of new right page */
      rlv = & rdtlck->lv[rdtlck->index];
      rlv->offset = 0;
      rlv->length = 5;
      rdtlck->index++;

      dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));

      sp->header.nextindex = nxt;

      /*
       * finalize freelist of new right page
       */
      fsi = rp->header.freelist;
      f = &rp->slot[fsi];
      for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
            f->next = fsi;
      f->next = -1;

      /*
       * Update directory index table for entries now in right page
       */
      if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
            s64 lblock;

            mp = NULL;
            stbl = DT_GETSTBL(rp);
            for (n = 0; n < rp->header.nextindex; n++) {
                  ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
                  modify_index(tid, ip, le32_to_cpu(ldtentry->index),
                             rbn, n, &mp, &lblock);
            }
            if (mp)
                  release_metapage(mp);
      }

      /*
       * the skipped index was on the left page,
       */
      if (skip <= off) {
            /* insert the new entry in the split page */
            dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);

            /* linelock stbl of split page */
            if (sdtlck->index >= sdtlck->maxcnt)
                  sdtlck = (struct dt_lock *) txLinelock(sdtlck);
            slv = & sdtlck->lv[sdtlck->index];
            n = skip >> L2DTSLOTSIZE;
            slv->offset = sp->header.stblindex + n;
            slv->length =
                ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
            sdtlck->index++;
      }
      /*
       * the skipped index was on the right page,
       */
      else {
            /* adjust the skip index to reflect the new position */
            skip -= nxt;

            /* insert the new entry in the right page */
            dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
      }

      out:
      *rmpp = rmp;
      *rpxdp = *pxd;

      return rc;
}


/*
 *    dtExtendPage()
 *
 * function: extend 1st/only directory leaf page
 *
 * parameter:
 *
 * return: 0 - success;
 *       errno - failure;
 *    return extended page pinned;
 */
static int dtExtendPage(tid_t tid,
           struct inode *ip, struct dtsplit * split, struct btstack * btstack)
{
      struct super_block *sb = ip->i_sb;
      int rc;
      struct metapage *smp, *pmp, *mp;
      dtpage_t *sp, *pp;
      struct pxdlist *pxdlist;
      pxd_t *pxd, *tpxd;
      int xlen, xsize;
      int newstblindex, newstblsize;
      int oldstblindex, oldstblsize;
      int fsi, last;
      struct dtslot *f;
      struct btframe *parent;
      int n;
      struct dt_lock *dtlck;
      s64 xaddr, txaddr;
      struct tlock *tlck;
      struct pxd_lock *pxdlock;
      struct lv *lv;
      uint type;
      struct ldtentry *ldtentry;
      u8 *stbl;

      /* get page to extend */
      smp = split->mp;
      sp = DT_PAGE(ip, smp);

      /* get parent/root page */
      parent = BT_POP(btstack);
      DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
      if (rc)
            return (rc);

      /*
       *    extend the extent
       */
      pxdlist = split->pxdlist;
      pxd = &pxdlist->pxd[pxdlist->npxd];
      pxdlist->npxd++;

      xaddr = addressPXD(pxd);
      tpxd = &sp->header.self;
      txaddr = addressPXD(tpxd);
      /* in-place extension */
      if (xaddr == txaddr) {
            type = tlckEXTEND;
      }
      /* relocation */
      else {
            type = tlckNEW;

            /* save moved extent descriptor for later free */
            tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
            pxdlock = (struct pxd_lock *) & tlck->lock;
            pxdlock->flag = mlckFREEPXD;
            pxdlock->pxd = sp->header.self;
            pxdlock->index = 1;

            /*
             * Update directory index table to reflect new page address
             */
            if (DO_INDEX(ip)) {
                  s64 lblock;

                  mp = NULL;
                  stbl = DT_GETSTBL(sp);
                  for (n = 0; n < sp->header.nextindex; n++) {
                        ldtentry =
                            (struct ldtentry *) & sp->slot[stbl[n]];
                        modify_index(tid, ip,
                                   le32_to_cpu(ldtentry->index),
                                   xaddr, n, &mp, &lblock);
                  }
                  if (mp)
                        release_metapage(mp);
            }
      }

      /*
       *    extend the page
       */
      sp->header.self = *pxd;

      jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp);

      BT_MARK_DIRTY(smp, ip);
      /*
       * acquire a transaction lock on the extended/leaf page
       */
      tlck = txLock(tid, ip, smp, tlckDTREE | type);
      dtlck = (struct dt_lock *) & tlck->lock;
      lv = & dtlck->lv[0];

      /* update buffer extent descriptor of extended page */
      xlen = lengthPXD(pxd);
      xsize = xlen << JFS_SBI(sb)->l2bsize;

      /*
       * copy old stbl to new stbl at start of extended area
       */
      oldstblindex = sp->header.stblindex;
      oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
      newstblindex = sp->header.maxslot;
      n = xsize >> L2DTSLOTSIZE;
      newstblsize = (n + 31) >> L2DTSLOTSIZE;
      memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
             sp->header.nextindex);

      /*
       * in-line extension: linelock old area of extended page
       */
      if (type == tlckEXTEND) {
            /* linelock header */
            lv->offset = 0;
            lv->length = 1;
            dtlck->index++;
            lv++;

            /* linelock new stbl of extended page */
            lv->offset = newstblindex;
            lv->length = newstblsize;
      }
      /*
       * relocation: linelock whole relocated area
       */
      else {
            lv->offset = 0;
            lv->length = sp->header.maxslot + newstblsize;
      }

      dtlck->index++;

      sp->header.maxslot = n;
      sp->header.stblindex = newstblindex;
      /* sp->header.nextindex remains the same */

      /*
       * add old stbl region at head of freelist
       */
      fsi = oldstblindex;
      f = &sp->slot[fsi];
      last = sp->header.freelist;
      for (n = 0; n < oldstblsize; n++, fsi++, f++) {
            f->next = last;
            last = fsi;
      }
      sp->header.freelist = last;
      sp->header.freecnt += oldstblsize;

      /*
       * append free region of newly extended area at tail of freelist
       */
      /* init free region of newly extended area */
      fsi = n = newstblindex + newstblsize;
      f = &sp->slot[fsi];
      for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
            f->next = fsi;
      f->next = -1;

      /* append new free region at tail of old freelist */
      fsi = sp->header.freelist;
      if (fsi == -1)
            sp->header.freelist = n;
      else {
            do {
                  f = &sp->slot[fsi];
                  fsi = f->next;
            } while (fsi != -1);

            f->next = n;
      }

      sp->header.freecnt += sp->header.maxslot - n;

      /*
       * insert the new entry
       */
      dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);

      BT_MARK_DIRTY(pmp, ip);
      /*
       * linelock any freeslots residing in old extent
       */
      if (type == tlckEXTEND) {
            n = sp->header.maxslot >> 2;
            if (sp->header.freelist < n)
                  dtLinelockFreelist(sp, n, &dtlck);
      }

      /*
       *    update parent entry on the parent/root page
       */
      /*
       * acquire a transaction lock on the parent/root page
       */
      tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
      dtlck = (struct dt_lock *) & tlck->lock;
      lv = & dtlck->lv[dtlck->index];

      /* linelock parent entry - 1st slot */
      lv->offset = 1;
      lv->length = 1;
      dtlck->index++;

      /* update the parent pxd for page extension */
      tpxd = (pxd_t *) & pp->slot[1];
      *tpxd = *pxd;

      DT_PUTPAGE(pmp);
      return 0;
}


/*
 *    dtSplitRoot()
 *
 * function:
 *    split the full root page into
 *    original/root/split page and new right page
 *    i.e., root remains fixed in tree anchor (inode) and
 *    the root is copied to a single new right child page
 *    since root page << non-root page, and
 *    the split root page contains a single entry for the
 *    new right child page.
 *
 * parameter:
 *
 * return: 0 - success;
 *       errno - failure;
 *    return new page pinned;
 */
static int dtSplitRoot(tid_t tid,
          struct inode *ip, struct dtsplit * split, struct metapage ** rmpp)
{
      struct super_block *sb = ip->i_sb;
      struct metapage *smp;
      dtroot_t *sp;
      struct metapage *rmp;
      dtpage_t *rp;
      s64 rbn;
      int xlen;
      int xsize;
      struct dtslot *f;
      s8 *stbl;
      int fsi, stblsize, n;
      struct idtentry *s;
      pxd_t *ppxd;
      struct pxdlist *pxdlist;
      pxd_t *pxd;
      struct dt_lock *dtlck;
      struct tlock *tlck;
      struct lv *lv;

      /* get split root page */
      smp = split->mp;
      sp = &JFS_IP(ip)->i_dtroot;

      /*
       *    allocate/initialize a single (right) child page
       *
       * N.B. at first split, a one (or two) block to fit new entry
       * is allocated; at subsequent split, a full page is allocated;
       */
      pxdlist = split->pxdlist;
      pxd = &pxdlist->pxd[pxdlist->npxd];
      pxdlist->npxd++;
      rbn = addressPXD(pxd);
      xlen = lengthPXD(pxd);
      xsize = xlen << JFS_SBI(sb)->l2bsize;
      rmp = get_metapage(ip, rbn, xsize, 1);
      if (!rmp)
            return -EIO;

      rp = rmp->data;

      /* Allocate blocks to quota. */
      if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
            release_metapage(rmp);
            return -EDQUOT;
      }

      BT_MARK_DIRTY(rmp, ip);
      /*
       * acquire a transaction lock on the new right page
       */
      tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
      dtlck = (struct dt_lock *) & tlck->lock;

      rp->header.flag =
          (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
      rp->header.self = *pxd;

      /* initialize sibling pointers */
      rp->header.next = 0;
      rp->header.prev = 0;

      /*
       *    move in-line root page into new right page extent
       */
      /* linelock header + copied entries + new stbl (1st slot) in new page */
      ASSERT(dtlck->index == 0);
      lv = & dtlck->lv[0];
      lv->offset = 0;
      lv->length = 10;  /* 1 + 8 + 1 */
      dtlck->index++;

      n = xsize >> L2DTSLOTSIZE;
      rp->header.maxslot = n;
      stblsize = (n + 31) >> L2DTSLOTSIZE;

      /* copy old stbl to new stbl at start of extended area */
      rp->header.stblindex = DTROOTMAXSLOT;
      stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
      memcpy(stbl, sp->header.stbl, sp->header.nextindex);
      rp->header.nextindex = sp->header.nextindex;

      /* copy old data area to start of new data area */
      memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);

      /*
       * append free region of newly extended area at tail of freelist
       */
      /* init free region of newly extended area */
      fsi = n = DTROOTMAXSLOT + stblsize;
      f = &rp->slot[fsi];
      for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
            f->next = fsi;
      f->next = -1;

      /* append new free region at tail of old freelist */
      fsi = sp->header.freelist;
      if (fsi == -1)
            rp->header.freelist = n;
      else {
            rp->header.freelist = fsi;

            do {
                  f = &rp->slot[fsi];
                  fsi = f->next;
            } while (fsi != -1);

            f->next = n;
      }

      rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;

      /*
       * Update directory index table for entries now in right page
       */
      if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
            s64 lblock;
            struct metapage *mp = NULL;
            struct ldtentry *ldtentry;

            stbl = DT_GETSTBL(rp);
            for (n = 0; n < rp->header.nextindex; n++) {
                  ldtentry = (struct ldtentry *) & rp->slot[stbl[n]];
                  modify_index(tid, ip, le32_to_cpu(ldtentry->index),
                             rbn, n, &mp, &lblock);
            }
            if (mp)
                  release_metapage(mp);
      }
      /*
       * insert the new entry into the new right/child page
       * (skip index in the new right page will not change)
       */
      dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);

      /*
       *    reset parent/root page
       *
       * set the 1st entry offset to 0, which force the left-most key
       * at any level of the tree to be less than any search key.
       *
       * The btree comparison code guarantees that the left-most key on any
       * level of the tree is never used, so it doesn't need to be filled in.
       */
      BT_MARK_DIRTY(smp, ip);
      /*
       * acquire a transaction lock on the root page (in-memory inode)
       */
      tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
      dtlck = (struct dt_lock *) & tlck->lock;

      /* linelock root */
      ASSERT(dtlck->index == 0);
      lv = & dtlck->lv[0];
      lv->offset = 0;
      lv->length = DTROOTMAXSLOT;
      dtlck->index++;

      /* update page header of root */
      if (sp->header.flag & BT_LEAF) {
            sp->header.flag &= ~BT_LEAF;
            sp->header.flag |= BT_INTERNAL;
      }

      /* init the first entry */
      s = (struct idtentry *) & sp->slot[DTENTRYSTART];
      ppxd = (pxd_t *) s;
      *ppxd = *pxd;
      s->next = -1;
      s->namlen = 0;

      stbl = sp->header.stbl;
      stbl[0] = DTENTRYSTART;
      sp->header.nextindex = 1;

      /* init freelist */
      fsi = DTENTRYSTART + 1;
      f = &sp->slot[fsi];

      /* init free region of remaining area */
      for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
            f->next = fsi;
      f->next = -1;

      sp->header.freelist = DTENTRYSTART + 1;
      sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);

      *rmpp = rmp;

      return 0;
}


/*
 *    dtDelete()
 *
 * function: delete the entry(s) referenced by a key.
 *
 * parameter:
 *
 * return:
 */
int dtDelete(tid_t tid,
       struct inode *ip, struct component_name * key, ino_t * ino, int flag)
{
      int rc = 0;
      s64 bn;
      struct metapage *mp, *imp;
      dtpage_t *p;
      int index;
      struct btstack btstack;
      struct dt_lock *dtlck;
      struct tlock *tlck;
      struct lv *lv;
      int i;
      struct ldtentry *ldtentry;
      u8 *stbl;
      u32 table_index, next_index;
      struct metapage *nmp;
      dtpage_t *np;

      /*
       *    search for the entry to delete:
       *
       * dtSearch() returns (leaf page pinned, index at which to delete).
       */
      if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
            return rc;

      /* retrieve search result */
      DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);

      /*
       * We need to find put the index of the next entry into the
       * directory index table in order to resume a readdir from this
       * entry.
       */
      if (DO_INDEX(ip)) {
            stbl = DT_GETSTBL(p);
            ldtentry = (struct ldtentry *) & p->slot[stbl[index]];
            table_index = le32_to_cpu(ldtentry->index);
            if (index == (p->header.nextindex - 1)) {
                  /*
                   * Last entry in this leaf page
                   */
                  if ((p->header.flag & BT_ROOT)
                      || (p->header.next == 0))
                        next_index = -1;
                  else {
                        /* Read next leaf page */
                        DT_GETPAGE(ip, le64_to_cpu(p->header.next),
                                 nmp, PSIZE, np, rc);
                        if (rc)
                              next_index = -1;
                        else {
                              stbl = DT_GETSTBL(np);
                              ldtentry =
                                  (struct ldtentry *) & np->
                                  slot[stbl[0]];
                              next_index =
                                  le32_to_cpu(ldtentry->index);
                              DT_PUTPAGE(nmp);
                        }
                  }
            } else {
                  ldtentry =
                      (struct ldtentry *) & p->slot[stbl[index + 1]];
                  next_index = le32_to_cpu(ldtentry->index);
            }
            free_index(tid, ip, table_index, next_index);
      }
      /*
       * the leaf page becomes empty, delete the page
       */
      if (p->header.nextindex == 1) {
            /* delete empty page */
            rc = dtDeleteUp(tid, ip, mp, p, &btstack);
      }
      /*
       * the leaf page has other entries remaining:
       *
       * delete the entry from the leaf page.
       */
      else {
            BT_MARK_DIRTY(mp, ip);
            /*
             * acquire a transaction lock on the leaf page
             */
            tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
            dtlck = (struct dt_lock *) & tlck->lock;

            /*
             * Do not assume that dtlck->index will be zero.  During a
             * rename within a directory, this transaction may have
             * modified this page already when adding the new entry.
             */

            /* linelock header */
            if (dtlck->index >= dtlck->maxcnt)
                  dtlck = (struct dt_lock *) txLinelock(dtlck);
            lv = & dtlck->lv[dtlck->index];
            lv->offset = 0;
            lv->length = 1;
            dtlck->index++;

            /* linelock stbl of non-root leaf page */
            if (!(p->header.flag & BT_ROOT)) {
                  if (dtlck->index >= dtlck->maxcnt)
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
                  lv = & dtlck->lv[dtlck->index];
                  i = index >> L2DTSLOTSIZE;
                  lv->offset = p->header.stblindex + i;
                  lv->length =
                      ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
                      i + 1;
                  dtlck->index++;
            }

            /* free the leaf entry */
            dtDeleteEntry(p, index, &dtlck);

            /*
             * Update directory index table for entries moved in stbl
             */
            if (DO_INDEX(ip) && index < p->header.nextindex) {
                  s64 lblock;

                  imp = NULL;
                  stbl = DT_GETSTBL(p);
                  for (i = index; i < p->header.nextindex; i++) {
                        ldtentry =
                            (struct ldtentry *) & p->slot[stbl[i]];
                        modify_index(tid, ip,
                                   le32_to_cpu(ldtentry->index),
                                   bn, i, &imp, &lblock);
                  }
                  if (imp)
                        release_metapage(imp);
            }

            DT_PUTPAGE(mp);
      }

      return rc;
}


/*
 *    dtDeleteUp()
 *
 * function:
 *    free empty pages as propagating deletion up the tree
 *
 * parameter:
 *
 * return:
 */
static int dtDeleteUp(tid_t tid, struct inode *ip,
         struct metapage * fmp, dtpage_t * fp, struct btstack * btstack)
{
      int rc = 0;
      struct metapage *mp;
      dtpage_t *p;
      int index, nextindex;
      int xlen;
      struct btframe *parent;
      struct dt_lock *dtlck;
      struct tlock *tlck;
      struct lv *lv;
      struct pxd_lock *pxdlock;
      int i;

      /*
       *    keep the root leaf page which has become empty
       */
      if (BT_IS_ROOT(fmp)) {
            /*
             * reset the root
             *
             * dtInitRoot() acquires txlock on the root
             */
            dtInitRoot(tid, ip, PARENT(ip));

            DT_PUTPAGE(fmp);

            return 0;
      }

      /*
       *    free the non-root leaf page
       */
      /*
       * acquire a transaction lock on the page
       *
       * write FREEXTENT|NOREDOPAGE log record
       * N.B. linelock is overlaid as freed extent descriptor, and
       * the buffer page is freed;
       */
      tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
      pxdlock = (struct pxd_lock *) & tlck->lock;
      pxdlock->flag = mlckFREEPXD;
      pxdlock->pxd = fp->header.self;
      pxdlock->index = 1;

      /* update sibling pointers */
      if ((rc = dtRelink(tid, ip, fp))) {
            BT_PUTPAGE(fmp);
            return rc;
      }

      xlen = lengthPXD(&fp->header.self);

      /* Free quota allocation. */
      DQUOT_FREE_BLOCK(ip, xlen);

      /* free/invalidate its buffer page */
      discard_metapage(fmp);

      /*
       *    propagate page deletion up the directory tree
       *
       * If the delete from the parent page makes it empty,
       * continue all the way up the tree.
       * stop if the root page is reached (which is never deleted) or
       * if the entry deletion does not empty the page.
       */
      while ((parent = BT_POP(btstack)) != NULL) {
            /* pin the parent page <sp> */
            DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
            if (rc)
                  return rc;

            /*
             * free the extent of the child page deleted
             */
            index = parent->index;

            /*
             * delete the entry for the child page from parent
             */
            nextindex = p->header.nextindex;

            /*
             * the parent has the single entry being deleted:
             *
             * free the parent page which has become empty.
             */
            if (nextindex == 1) {
                  /*
                   * keep the root internal page which has become empty
                   */
                  if (p->header.flag & BT_ROOT) {
                        /*
                         * reset the root
                         *
                         * dtInitRoot() acquires txlock on the root
                         */
                        dtInitRoot(tid, ip, PARENT(ip));

                        DT_PUTPAGE(mp);

                        return 0;
                  }
                  /*
                   * free the parent page
                   */
                  else {
                        /*
                         * acquire a transaction lock on the page
                         *
                         * write FREEXTENT|NOREDOPAGE log record
                         */
                        tlck =
                            txMaplock(tid, ip,
                                    tlckDTREE | tlckFREE);
                        pxdlock = (struct pxd_lock *) & tlck->lock;
                        pxdlock->flag = mlckFREEPXD;
                        pxdlock->pxd = p->header.self;
                        pxdlock->index = 1;

                        /* update sibling pointers */
                        if ((rc = dtRelink(tid, ip, p))) {
                              DT_PUTPAGE(mp);
                              return rc;
                        }

                        xlen = lengthPXD(&p->header.self);

                        /* Free quota allocation */
                        DQUOT_FREE_BLOCK(ip, xlen);

                        /* free/invalidate its buffer page */
                        discard_metapage(mp);

                        /* propagate up */
                        continue;
                  }
            }

            /*
             * the parent has other entries remaining:
             *
             * delete the router entry from the parent page.
             */
            BT_MARK_DIRTY(mp, ip);
            /*
             * acquire a transaction lock on the page
             *
             * action: router entry deletion
             */
            tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
            dtlck = (struct dt_lock *) & tlck->lock;

            /* linelock header */
            if (dtlck->index >= dtlck->maxcnt)
                  dtlck = (struct dt_lock *) txLinelock(dtlck);
            lv = & dtlck->lv[dtlck->index];
            lv->offset = 0;
            lv->length = 1;
            dtlck->index++;

            /* linelock stbl of non-root leaf page */
            if (!(p->header.flag & BT_ROOT)) {
                  if (dtlck->index < dtlck->maxcnt)
                        lv++;
                  else {
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
                        lv = & dtlck->lv[0];
                  }
                  i = index >> L2DTSLOTSIZE;
                  lv->offset = p->header.stblindex + i;
                  lv->length =
                      ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
                      i + 1;
                  dtlck->index++;
            }

            /* free the router entry */
            dtDeleteEntry(p, index, &dtlck);

            /* reset key of new leftmost entry of level (for consistency) */
            if (index == 0 &&
                ((p->header.flag & BT_ROOT) || p->header.prev == 0))
                  dtTruncateEntry(p, 0, &dtlck);

            /* unpin the parent page */
            DT_PUTPAGE(mp);

            /* exit propagation up */
            break;
      }

      if (!DO_INDEX(ip))
            ip->i_size -= PSIZE;

      return 0;
}

#ifdef _NOTYET
/*
 * NAME:    dtRelocate()
 *
 * FUNCTION:      relocate dtpage (internal or leaf) of directory;
 *          This function is mainly used by defragfs utility.
 */
int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
             s64 nxaddr)
{
      int rc = 0;
      struct metapage *mp, *pmp, *lmp, *rmp;
      dtpage_t *p, *pp, *rp = 0, *lp= 0;
      s64 bn;
      int index;
      struct btstack btstack;
      pxd_t *pxd;
      s64 oxaddr, nextbn, prevbn;
      int xlen, xsize;
      struct tlock *tlck;
      struct dt_lock *dtlck;
      struct pxd_lock *pxdlock;
      s8 *stbl;
      struct lv *lv;

      oxaddr = addressPXD(opxd);
      xlen = lengthPXD(opxd);

      jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d",
               (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr,
               xlen);

      /*
       *    1. get the internal parent dtpage covering
       *    router entry for the tartget page to be relocated;
       */
      rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
      if (rc)
            return rc;

      /* retrieve search result */
      DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
      jfs_info("dtRelocate: parent router entry validated.");

      /*
       *    2. relocate the target dtpage
       */
      /* read in the target page from src extent */
      DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
      if (rc) {
            /* release the pinned parent page */
            DT_PUTPAGE(pmp);
            return rc;
      }

      /*
       * read in sibling pages if any to update sibling pointers;
       */
      rmp = NULL;
      if (p->header.next) {
            nextbn = le64_to_cpu(p->header.next);
            DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
            if (rc) {
                  DT_PUTPAGE(mp);
                  DT_PUTPAGE(pmp);
                  return (rc);
            }
      }

      lmp = NULL;
      if (p->header.prev) {
            prevbn = le64_to_cpu(p->header.prev);
            DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
            if (rc) {
                  DT_PUTPAGE(mp);
                  DT_PUTPAGE(pmp);
                  if (rmp)
                        DT_PUTPAGE(rmp);
                  return (rc);
            }
      }

      /* at this point, all xtpages to be updated are in memory */

      /*
       * update sibling pointers of sibling dtpages if any;
       */
      if (lmp) {
            tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK);
            dtlck = (struct dt_lock *) & tlck->lock;
            /* linelock header */
            ASSERT(dtlck->index == 0);
            lv = & dtlck->lv[0];
            lv->offset = 0;
            lv->length = 1;
            dtlck->index++;

            lp->header.next = cpu_to_le64(nxaddr);
            DT_PUTPAGE(lmp);
      }

      if (rmp) {
            tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK);
            dtlck = (struct dt_lock *) & tlck->lock;
            /* linelock header */
            ASSERT(dtlck->index == 0);
            lv = & dtlck->lv[0];
            lv->offset = 0;
            lv->length = 1;
            dtlck->index++;

            rp->header.prev = cpu_to_le64(nxaddr);
            DT_PUTPAGE(rmp);
      }

      /*
       * update the target dtpage to be relocated
       *
       * write LOG_REDOPAGE of LOG_NEW type for dst page
       * for the whole target page (logredo() will apply
       * after image and update bmap for allocation of the
       * dst extent), and update bmap for allocation of
       * the dst extent;
       */
      tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW);
      dtlck = (struct dt_lock *) & tlck->lock;
      /* linelock header */
      ASSERT(dtlck->index == 0);
      lv = & dtlck->lv[0];

      /* update the self address in the dtpage header */
      pxd = &p->header.self;
      PXDaddress(pxd, nxaddr);

      /* the dst page is the same as the src page, i.e.,
       * linelock for afterimage of the whole page;
       */
      lv->offset = 0;
      lv->length = p->header.maxslot;
      dtlck->index++;

      /* update the buffer extent descriptor of the dtpage */
      xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;

      /* unpin the relocated page */
      DT_PUTPAGE(mp);
      jfs_info("dtRelocate: target dtpage relocated.");

      /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec
       * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec
       * will also force a bmap update ).
       */

      /*
       *    3. acquire maplock for the source extent to be freed;
       */
      /* for dtpage relocation, write a LOG_NOREDOPAGE record
       * for the source dtpage (logredo() will init NoRedoPage
       * filter and will also update bmap for free of the source
       * dtpage), and upadte bmap for free of the source dtpage;
       */
      tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
      pxdlock = (struct pxd_lock *) & tlck->lock;
      pxdlock->flag = mlckFREEPXD;
      PXDaddress(&pxdlock->pxd, oxaddr);
      PXDlength(&pxdlock->pxd, xlen);
      pxdlock->index = 1;

      /*
       *    4. update the parent router entry for relocation;
       *
       * acquire tlck for the parent entry covering the target dtpage;
       * write LOG_REDOPAGE to apply after image only;
       */
      jfs_info("dtRelocate: update parent router entry.");
      tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
      dtlck = (struct dt_lock *) & tlck->lock;
      lv = & dtlck->lv[dtlck->index];

      /* update the PXD with the new address */
      stbl = DT_GETSTBL(pp);
      pxd = (pxd_t *) & pp->slot[stbl[index]];
      PXDaddress(pxd, nxaddr);
      lv->offset = stbl[index];
      lv->length = 1;
      dtlck->index++;

      /* unpin the parent dtpage */
      DT_PUTPAGE(pmp);

      return rc;
}

/*
 * NAME:    dtSearchNode()
 *
 * FUNCTION:      Search for an dtpage containing a specified address
 *          This function is mainly used by defragfs utility.
 *
 * NOTE:    Search result on stack, the found page is pinned at exit.
 *          The result page must be an internal dtpage.
 *          lmxaddr give the address of the left most page of the
 *          dtree level, in which the required dtpage resides.
 */
static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
                  struct btstack * btstack)
{
      int rc = 0;
      s64 bn;
      struct metapage *mp;
      dtpage_t *p;
      int psize = 288;  /* initial in-line directory */
      s8 *stbl;
      int i;
      pxd_t *pxd;
      struct btframe *btsp;

      BT_CLR(btstack);  /* reset stack */

      /*
       *    descend tree to the level with specified leftmost page
       *
       *  by convention, root bn = 0.
       */
      for (bn = 0;;) {
            /* get/pin the page to search */
            DT_GETPAGE(ip, bn, mp, psize, p, rc);
            if (rc)
                  return rc;

            /* does the xaddr of leftmost page of the levevl
             * matches levevl search key ?
             */
            if (p->header.flag & BT_ROOT) {
                  if (lmxaddr == 0)
                        break;
            } else if (addressPXD(&p->header.self) == lmxaddr)
                  break;

            /*
             * descend down to leftmost child page
             */
            if (p->header.flag & BT_LEAF) {
                  DT_PUTPAGE(mp);
                  return -ESTALE;
            }

            /* get the leftmost entry */
            stbl = DT_GETSTBL(p);
            pxd = (pxd_t *) & p->slot[stbl[0]];

            /* get the child page block address */
            bn = addressPXD(pxd);
            psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
            /* unpin the parent page */
            DT_PUTPAGE(mp);
      }

      /*
       *    search each page at the current levevl
       */
      loop:
      stbl = DT_GETSTBL(p);
      for (i = 0; i < p->header.nextindex; i++) {
            pxd = (pxd_t *) & p->slot[stbl[i]];

            /* found the specified router entry */
            if (addressPXD(pxd) == addressPXD(kpxd) &&
                lengthPXD(pxd) == lengthPXD(kpxd)) {
                  btsp = btstack->top;
                  btsp->bn = bn;
                  btsp->index = i;
                  btsp->mp = mp;

                  return 0;
            }
      }

      /* get the right sibling page if any */
      if (p->header.next)
            bn = le64_to_cpu(p->header.next);
      else {
            DT_PUTPAGE(mp);
            return -ESTALE;
      }

      /* unpin current page */
      DT_PUTPAGE(mp);

      /* get the right sibling page */
      DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
      if (rc)
            return rc;

      goto loop;
}
#endif /* _NOTYET */

/*
 *    dtRelink()
 *
 * function:
 *    link around a freed page.
 *
 * parameter:
 *    fp:   page to be freed
 *
 * return:
 */
static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
{
      int rc;
      struct metapage *mp;
      s64 nextbn, prevbn;
      struct tlock *tlck;
      struct dt_lock *dtlck;
      struct lv *lv;

      nextbn = le64_to_cpu(p->header.next);
      prevbn = le64_to_cpu(p->header.prev);

      /* update prev pointer of the next page */
      if (nextbn != 0) {
            DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
            if (rc)
                  return rc;

            BT_MARK_DIRTY(mp, ip);
            /*
             * acquire a transaction lock on the next page
             *
             * action: update prev pointer;
             */
            tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
            jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
                  tlck, ip, mp);
            dtlck = (struct dt_lock *) & tlck->lock;

            /* linelock header */
            if (dtlck->index >= dtlck->maxcnt)
                  dtlck = (struct dt_lock *) txLinelock(dtlck);
            lv = & dtlck->lv[dtlck->index];
            lv->offset = 0;
            lv->length = 1;
            dtlck->index++;

            p->header.prev = cpu_to_le64(prevbn);
            DT_PUTPAGE(mp);
      }

      /* update next pointer of the previous page */
      if (prevbn != 0) {
            DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
            if (rc)
                  return rc;

            BT_MARK_DIRTY(mp, ip);
            /*
             * acquire a transaction lock on the prev page
             *
             * action: update next pointer;
             */
            tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
            jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p",
                  tlck, ip, mp);
            dtlck = (struct dt_lock *) & tlck->lock;

            /* linelock header */
            if (dtlck->index >= dtlck->maxcnt)
                  dtlck = (struct dt_lock *) txLinelock(dtlck);
            lv = & dtlck->lv[dtlck->index];
            lv->offset = 0;
            lv->length = 1;
            dtlck->index++;

            p->header.next = cpu_to_le64(nextbn);
            DT_PUTPAGE(mp);
      }

      return 0;
}


/*
 *    dtInitRoot()
 *
 * initialize directory root (inline in inode)
 */
void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
{
      struct jfs_inode_info *jfs_ip = JFS_IP(ip);
      dtroot_t *p;
      int fsi;
      struct dtslot *f;
      struct tlock *tlck;
      struct dt_lock *dtlck;
      struct lv *lv;
      u16 xflag_save;

      /*
       * If this was previously an non-empty directory, we need to remove
       * the old directory table.
       */
      if (DO_INDEX(ip)) {
            if (!jfs_dirtable_inline(ip)) {
                  struct tblock *tblk = tid_to_tblock(tid);
                  /*
                   * We're playing games with the tid's xflag.  If
                   * we're removing a regular file, the file's xtree
                   * is committed with COMMIT_PMAP, but we always
                   * commit the directories xtree with COMMIT_PWMAP.
                   */
                  xflag_save = tblk->xflag;
                  tblk->xflag = 0;
                  /*
                   * xtTruncate isn't guaranteed to fully truncate
                   * the xtree.  The caller needs to check i_size
                   * after committing the transaction to see if
                   * additional truncation is needed.  The
                   * COMMIT_Stale flag tells caller that we
                   * initiated the truncation.
                   */
                  xtTruncate(tid, ip, 0, COMMIT_PWMAP);
                  set_cflag(COMMIT_Stale, ip);

                  tblk->xflag = xflag_save;
            } else
                  ip->i_size = 1;

            jfs_ip->next_index = 2;
      } else
            ip->i_size = IDATASIZE;

      /*
       * acquire a transaction lock on the root
       *
       * action: directory initialization;
       */
      tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag,
                  tlckDTREE | tlckENTRY | tlckBTROOT);
      dtlck = (struct dt_lock *) & tlck->lock;

      /* linelock root */
      ASSERT(dtlck->index == 0);
      lv = & dtlck->lv[0];
      lv->offset = 0;
      lv->length = DTROOTMAXSLOT;
      dtlck->index++;

      p = &jfs_ip->i_dtroot;

      p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;

      p->header.nextindex = 0;

      /* init freelist */
      fsi = 1;
      f = &p->slot[fsi];

      /* init data area of root */
      for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
            f->next = fsi;
      f->next = -1;

      p->header.freelist = 1;
      p->header.freecnt = 8;

      /* init '..' entry */
      p->header.idotdot = cpu_to_le32(idotdot);

      return;
}

/*
 *    add_missing_indices()
 *
 * function: Fix dtree page in which one or more entries has an invalid index.
 *         fsck.jfs should really fix this, but it currently does not.
 *         Called from jfs_readdir when bad index is detected.
 */
static void add_missing_indices(struct inode *inode, s64 bn)
{
      struct ldtentry *d;
      struct dt_lock *dtlck;
      int i;
      uint index;
      struct lv *lv;
      struct metapage *mp;
      dtpage_t *p;
      int rc;
      s8 *stbl;
      tid_t tid;
      struct tlock *tlck;

      tid = txBegin(inode->i_sb, 0);

      DT_GETPAGE(inode, bn, mp, PSIZE, p, rc);

      if (rc) {
            printk(KERN_ERR "DT_GETPAGE failed!\n");
            goto end;
      }
      BT_MARK_DIRTY(mp, inode);

      ASSERT(p->header.flag & BT_LEAF);

      tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY);
      if (BT_IS_ROOT(mp))
            tlck->type |= tlckBTROOT;

      dtlck = (struct dt_lock *) &tlck->lock;

      stbl = DT_GETSTBL(p);
      for (i = 0; i < p->header.nextindex; i++) {
            d = (struct ldtentry *) &p->slot[stbl[i]];
            index = le32_to_cpu(d->index);
            if ((index < 2) || (index >= JFS_IP(inode)->next_index)) {
                  d->index = cpu_to_le32(add_index(tid, inode, bn, i));
                  if (dtlck->index >= dtlck->maxcnt)
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
                  lv = &dtlck->lv[dtlck->index];
                  lv->offset = stbl[i];
                  lv->length = 1;
                  dtlck->index++;
            }
      }

      DT_PUTPAGE(mp);
      (void) txCommit(tid, 1, &inode, 0);
end:
      txEnd(tid);
}

/*
 * Buffer to hold directory entry info while traversing a dtree page
 * before being fed to the filldir function
 */
struct jfs_dirent {
      loff_t position;
      int ino;
      u16 name_len;
      char name[0];
};

/*
 * function to determine next variable-sized jfs_dirent in buffer
 */
static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent)
{
      return (struct jfs_dirent *)
            ((char *)dirent +
             ((sizeof (struct jfs_dirent) + dirent->name_len + 1 +
               sizeof (loff_t) - 1) &
              ~(sizeof (loff_t) - 1)));
}

/*
 *    jfs_readdir()
 *
 * function: read directory entries sequentially
 *    from the specified entry offset
 *
 * parameter:
 *
 * return: offset = (pn, index) of start entry
 *    of next jfs_readdir()/dtRead()
 */
int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
      struct inode *ip = filp->f_path.dentry->d_inode;
      struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
      int rc = 0;
      loff_t dtpos;     /* legacy OS/2 style position */
      struct dtoffset {
            s16 pn;
            s16 index;
            s32 unused;
      } *dtoffset = (struct dtoffset *) &dtpos;
      s64 bn;
      struct metapage *mp;
      dtpage_t *p;
      int index;
      s8 *stbl;
      struct btstack btstack;
      int i, next;
      struct ldtentry *d;
      struct dtslot *t;
      int d_namleft, len, outlen;
      unsigned long dirent_buf;
      char *name_ptr;
      u32 dir_index;
      int do_index = 0;
      uint loop_count = 0;
      struct jfs_dirent *jfs_dirent;
      int jfs_dirents;
      int overflow, fix_page, page_fixed = 0;
      static int unique_pos = 2;    /* If we can't fix broken index */

      if (filp->f_pos == DIREND)
            return 0;

      if (DO_INDEX(ip)) {
            /*
             * persistent index is stored in directory entries.
             * Special cases:  0 = .
             *                 1 = ..
             *                -1 = End of directory
             */
            do_index = 1;

            dir_index = (u32) filp->f_pos;

            if (dir_index > 1) {
                  struct dir_table_slot dirtab_slot;

                  if (dtEmpty(ip) ||
                      (dir_index >= JFS_IP(ip)->next_index)) {
                        /* Stale position.  Directory has shrunk */
                        filp->f_pos = DIREND;
                        return 0;
                  }
                  repeat:
                  rc = read_index(ip, dir_index, &dirtab_slot);
                  if (rc) {
                        filp->f_pos = DIREND;
                        return rc;
                  }
                  if (dirtab_slot.flag == DIR_INDEX_FREE) {
                        if (loop_count++ > JFS_IP(ip)->next_index) {
                              jfs_err("jfs_readdir detected "
                                       "infinite loop!");
                              filp->f_pos = DIREND;
                              return 0;
                        }
                        dir_index = le32_to_cpu(dirtab_slot.addr2);
                        if (dir_index == -1) {
                              filp->f_pos = DIREND;
                              return 0;
                        }
                        goto repeat;
                  }
                  bn = addressDTS(&dirtab_slot);
                  index = dirtab_slot.slot;
                  DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
                  if (rc) {
                        filp->f_pos = DIREND;
                        return 0;
                  }
                  if (p->header.flag & BT_INTERNAL) {
                        jfs_err("jfs_readdir: bad index table");
                        DT_PUTPAGE(mp);
                        filp->f_pos = -1;
                        return 0;
                  }
            } else {
                  if (dir_index == 0) {
                        /*
                         * self "."
                         */
                        filp->f_pos = 0;
                        if (filldir(dirent, ".", 1, 0, ip->i_ino,
                                  DT_DIR))
                              return 0;
                  }
                  /*
                   * parent ".."
                   */
                  filp->f_pos = 1;
                  if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR))
                        return 0;

                  /*
                   * Find first entry of left-most leaf
                   */
                  if (dtEmpty(ip)) {
                        filp->f_pos = DIREND;
                        return 0;
                  }

                  if ((rc = dtReadFirst(ip, &btstack)))
                        return rc;

                  DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
            }
      } else {
            /*
             * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
             *
             * pn = index = 0:      First entry "."
             * pn = 0; index = 1:   Second entry ".."
             * pn > 0:        Real entries, pn=1 -> leftmost page
             * pn = index = -1:     No more entries
             */
            dtpos = filp->f_pos;
            if (dtpos == 0) {
                  /* build "." entry */

                  if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino,
                            DT_DIR))
                        return 0;
                  dtoffset->index = 1;
                  filp->f_pos = dtpos;
            }

            if (dtoffset->pn == 0) {
                  if (dtoffset->index == 1) {
                        /* build ".." entry */

                        if (filldir(dirent, "..", 2, filp->f_pos,
                                  PARENT(ip), DT_DIR))
                              return 0;
                  } else {
                        jfs_err("jfs_readdir called with "
                              "invalid offset!");
                  }
                  dtoffset->pn = 1;
                  dtoffset->index = 0;
                  filp->f_pos = dtpos;
            }

            if (dtEmpty(ip)) {
                  filp->f_pos = DIREND;
                  return 0;
            }

            if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) {
                  jfs_err("jfs_readdir: unexpected rc = %d "
                        "from dtReadNext", rc);
                  filp->f_pos = DIREND;
                  return 0;
            }
            /* get start leaf page and index */
            DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);

            /* offset beyond directory eof ? */
            if (bn < 0) {
                  filp->f_pos = DIREND;
                  return 0;
            }
      }

      dirent_buf = __get_free_page(GFP_KERNEL);
      if (dirent_buf == 0) {
            DT_PUTPAGE(mp);
            jfs_warn("jfs_readdir: __get_free_page failed!");
            filp->f_pos = DIREND;
            return -ENOMEM;
      }

      while (1) {
            jfs_dirent = (struct jfs_dirent *) dirent_buf;
            jfs_dirents = 0;
            overflow = fix_page = 0;

            stbl = DT_GETSTBL(p);

            for (i = index; i < p->header.nextindex; i++) {
                  d = (struct ldtentry *) & p->slot[stbl[i]];

                  if (((long) jfs_dirent + d->namlen + 1) >
                      (dirent_buf + PAGE_SIZE)) {
                        /* DBCS codepages could overrun dirent_buf */
                        index = i;
                        overflow = 1;
                        break;
                  }

                  d_namleft = d->namlen;
                  name_ptr = jfs_dirent->name;
                  jfs_dirent->ino = le32_to_cpu(d->inumber);

                  if (do_index) {
                        len = min(d_namleft, DTLHDRDATALEN);
                        jfs_dirent->position = le32_to_cpu(d->index);
                        /*
                         * d->index should always be valid, but it
                         * isn't.  fsck.jfs doesn't create the
                         * directory index for the lost+found
                         * directory.  Rather than let it go,
                         * we can try to fix it.
                         */
                        if ((jfs_dirent->position < 2) ||
                            (jfs_dirent->position >=
                             JFS_IP(ip)->next_index)) {
                              if (!page_fixed && !isReadOnly(ip)) {
                                    fix_page = 1;
                                    /*
                                     * setting overflow and setting
                                     * index to i will cause the
                                     * same page to be processed
                                     * again starting here
                                     */
                                    overflow = 1;
                                    index = i;
                                    break;
                              }
                              jfs_dirent->position = unique_pos++;
                        }
                  } else {
                        jfs_dirent->position = dtpos;
                        len = min(d_namleft, DTLHDRDATALEN_LEGACY);
                  }

                  /* copy the name of head/only segment */
                  outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
                                       codepage);
                  jfs_dirent->name_len = outlen;

                  /* copy name in the additional segment(s) */
                  next = d->next;
                  while (next >= 0) {
                        t = (struct dtslot *) & p->slot[next];
                        name_ptr += outlen;
                        d_namleft -= len;
                        /* Sanity Check */
                        if (d_namleft == 0) {
                              jfs_error(ip->i_sb,
                                      "JFS:Dtree error: ino = "
                                      "%ld, bn=%Ld, index = %d",
                                      (long)ip->i_ino,
                                      (long long)bn,
                                      i);
                              goto skip_one;
                        }
                        len = min(d_namleft, DTSLOTDATALEN);
                        outlen = jfs_strfromUCS_le(name_ptr, t->name,
                                             len, codepage);
                        jfs_dirent->name_len += outlen;

                        next = t->next;
                  }

                  jfs_dirents++;
                  jfs_dirent = next_jfs_dirent(jfs_dirent);
skip_one:
                  if (!do_index)
                        dtoffset->index++;
            }

            if (!overflow) {
                  /* Point to next leaf page */
                  if (p->header.flag & BT_ROOT)
                        bn = 0;
                  else {
                        bn = le64_to_cpu(p->header.next);
                        index = 0;
                        /* update offset (pn:index) for new page */
                        if (!do_index) {
                              dtoffset->pn++;
                              dtoffset->index = 0;
                        }
                  }
                  page_fixed = 0;
            }

            /* unpin previous leaf page */
            DT_PUTPAGE(mp);

            jfs_dirent = (struct jfs_dirent *) dirent_buf;
            while (jfs_dirents--) {
                  filp->f_pos = jfs_dirent->position;
                  if (filldir(dirent, jfs_dirent->name,
                            jfs_dirent->name_len, filp->f_pos,
                            jfs_dirent->ino, DT_UNKNOWN))
                        goto out;
                  jfs_dirent = next_jfs_dirent(jfs_dirent);
            }

            if (fix_page) {
                  add_missing_indices(ip, bn);
                  page_fixed = 1;
            }

            if (!overflow && (bn == 0)) {
                  filp->f_pos = DIREND;
                  break;
            }

            DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
            if (rc) {
                  free_page(dirent_buf);
                  return rc;
            }
      }

      out:
      free_page(dirent_buf);

      return rc;
}


/*
 *    dtReadFirst()
 *
 * function: get the leftmost page of the directory
 */
static int dtReadFirst(struct inode *ip, struct btstack * btstack)
{
      int rc = 0;
      s64 bn;
      int psize = 288;  /* initial in-line directory */
      struct metapage *mp;
      dtpage_t *p;
      s8 *stbl;
      struct btframe *btsp;
      pxd_t *xd;

      BT_CLR(btstack);  /* reset stack */

      /*
       *    descend leftmost path of the tree
       *
       * by convention, root bn = 0.
       */
      for (bn = 0;;) {
            DT_GETPAGE(ip, bn, mp, psize, p, rc);
            if (rc)
                  return rc;

            /*
             * leftmost leaf page
             */
            if (p->header.flag & BT_LEAF) {
                  /* return leftmost entry */
                  btsp = btstack->top;
                  btsp->bn = bn;
                  btsp->index = 0;
                  btsp->mp = mp;

                  return 0;
            }

            /*
             * descend down to leftmost child page
             */
            if (BT_STACK_FULL(btstack)) {
                  DT_PUTPAGE(mp);
                  jfs_error(ip->i_sb, "dtReadFirst: btstack overrun");
                  BT_STACK_DUMP(btstack);
                  return -EIO;
            }
            /* push (bn, index) of the parent page/entry */
            BT_PUSH(btstack, bn, 0);

            /* get the leftmost entry */
            stbl = DT_GETSTBL(p);
            xd = (pxd_t *) & p->slot[stbl[0]];

            /* get the child page block address */
            bn = addressPXD(xd);
            psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;

            /* unpin the parent page */
            DT_PUTPAGE(mp);
      }
}


/*
 *    dtReadNext()
 *
 * function: get the page of the specified offset (pn:index)
 *
 * return: if (offset > eof), bn = -1;
 *
 * note: if index > nextindex of the target leaf page,
 * start with 1st entry of next leaf page;
 */
static int dtReadNext(struct inode *ip, loff_t * offset,
                  struct btstack * btstack)
{
      int rc = 0;
      struct dtoffset {
            s16 pn;
            s16 index;
            s32 unused;
      } *dtoffset = (struct dtoffset *) offset;
      s64 bn;
      struct metapage *mp;
      dtpage_t *p;
      int index;
      int pn;
      s8 *stbl;
      struct btframe *btsp, *parent;
      pxd_t *xd;

      /*
       * get leftmost leaf page pinned
       */
      if ((rc = dtReadFirst(ip, btstack)))
            return rc;

      /* get leaf page */
      DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);

      /* get the start offset (pn:index) */
      pn = dtoffset->pn - 1;  /* Now pn = 0 represents leftmost leaf */
      index = dtoffset->index;

      /* start at leftmost page ? */
      if (pn == 0) {
            /* offset beyond eof ? */
            if (index < p->header.nextindex)
                  goto out;

            if (p->header.flag & BT_ROOT) {
                  bn = -1;
                  goto out;
            }

            /* start with 1st entry of next leaf page */
            dtoffset->pn++;
            dtoffset->index = index = 0;
            goto a;
      }

      /* start at non-leftmost page: scan parent pages for large pn */
      if (p->header.flag & BT_ROOT) {
            bn = -1;
            goto out;
      }

      /* start after next leaf page ? */
      if (pn > 1)
            goto b;

      /* get leaf page pn = 1 */
      a:
      bn = le64_to_cpu(p->header.next);

      /* unpin leaf page */
      DT_PUTPAGE(mp);

      /* offset beyond eof ? */
      if (bn == 0) {
            bn = -1;
            goto out;
      }

      goto c;

      /*
       * scan last internal page level to get target leaf page
       */
      b:
      /* unpin leftmost leaf page */
      DT_PUTPAGE(mp);

      /* get left most parent page */
      btsp = btstack->top;
      parent = btsp - 1;
      bn = parent->bn;
      DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
      if (rc)
            return rc;

      /* scan parent pages at last internal page level */
      while (pn >= p->header.nextindex) {
            pn -= p->header.nextindex;

            /* get next parent page address */
            bn = le64_to_cpu(p->header.next);

            /* unpin current parent page */
            DT_PUTPAGE(mp);

            /* offset beyond eof ? */
            if (bn == 0) {
                  bn = -1;
                  goto out;
            }

            /* get next parent page */
            DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
            if (rc)
                  return rc;

            /* update parent page stack frame */
            parent->bn = bn;
      }

      /* get leaf page address */
      stbl = DT_GETSTBL(p);
      xd = (pxd_t *) & p->slot[stbl[pn]];
      bn = addressPXD(xd);

      /* unpin parent page */
      DT_PUTPAGE(mp);

      /*
       * get target leaf page
       */
      c:
      DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
      if (rc)
            return rc;

      /*
       * leaf page has been completed:
       * start with 1st entry of next leaf page
       */
      if (index >= p->header.nextindex) {
            bn = le64_to_cpu(p->header.next);

            /* unpin leaf page */
            DT_PUTPAGE(mp);

            /* offset beyond eof ? */
            if (bn == 0) {
                  bn = -1;
                  goto out;
            }

            /* get next leaf page */
            DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
            if (rc)
                  return rc;

            /* start with 1st entry of next leaf page */
            dtoffset->pn++;
            dtoffset->index = 0;
      }

      out:
      /* return target leaf page pinned */
      btsp = btstack->top;
      btsp->bn = bn;
      btsp->index = dtoffset->index;
      btsp->mp = mp;

      return 0;
}


/*
 *    dtCompare()
 *
 * function: compare search key with an internal entry
 *
 * return:
 *    < 0 if k is < record
 *    = 0 if k is = record
 *    > 0 if k is > record
 */
static int dtCompare(struct component_name * key,     /* search key */
                 dtpage_t * p,      /* directory page */
                 int si)
{                       /* entry slot index */
      wchar_t *kname;
      __le16 *name;
      int klen, namlen, len, rc;
      struct idtentry *ih;
      struct dtslot *t;

      /*
       * force the left-most key on internal pages, at any level of
       * the tree, to be less than any search key.
       * this obviates having to update the leftmost key on an internal
       * page when the user inserts a new key in the tree smaller than
       * anything that has been stored.
       *
       * (? if/when dtSearch() narrows down to 1st entry (index = 0),
       * at any internal page at any level of the tree,
       * it descends to child of the entry anyway -
       * ? make the entry as min size dummy entry)
       *
       * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
       * return (1);
       */

      kname = key->name;
      klen = key->namlen;

      ih = (struct idtentry *) & p->slot[si];
      si = ih->next;
      name = ih->name;
      namlen = ih->namlen;
      len = min(namlen, DTIHDRDATALEN);

      /* compare with head/only segment */
      len = min(klen, len);
      if ((rc = UniStrncmp_le(kname, name, len)))
            return rc;

      klen -= len;
      namlen -= len;

      /* compare with additional segment(s) */
      kname += len;
      while (klen > 0 && namlen > 0) {
            /* compare with next name segment */
            t = (struct dtslot *) & p->slot[si];
            len = min(namlen, DTSLOTDATALEN);
            len = min(klen, len);
            name = t->name;
            if ((rc = UniStrncmp_le(kname, name, len)))
                  return rc;

            klen -= len;
            namlen -= len;
            kname += len;
            si = t->next;
      }

      return (klen - namlen);
}




/*
 *    ciCompare()
 *
 * function: compare search key with an (leaf/internal) entry
 *
 * return:
 *    < 0 if k is < record
 *    = 0 if k is = record
 *    > 0 if k is > record
 */
static int ciCompare(struct component_name * key,     /* search key */
                 dtpage_t * p,      /* directory page */
                 int si,      /* entry slot index */
                 int flag)
{
      wchar_t *kname, x;
      __le16 *name;
      int klen, namlen, len, rc;
      struct ldtentry *lh;
      struct idtentry *ih;
      struct dtslot *t;
      int i;

      /*
       * force the left-most key on internal pages, at any level of
       * the tree, to be less than any search key.
       * this obviates having to update the leftmost key on an internal
       * page when the user inserts a new key in the tree smaller than
       * anything that has been stored.
       *
       * (? if/when dtSearch() narrows down to 1st entry (index = 0),
       * at any internal page at any level of the tree,
       * it descends to child of the entry anyway -
       * ? make the entry as min size dummy entry)
       *
       * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
       * return (1);
       */

      kname = key->name;
      klen = key->namlen;

      /*
       * leaf page entry
       */
      if (p->header.flag & BT_LEAF) {
            lh = (struct ldtentry *) & p->slot[si];
            si = lh->next;
            name = lh->name;
            namlen = lh->namlen;
            if (flag & JFS_DIR_INDEX)
                  len = min(namlen, DTLHDRDATALEN);
            else
                  len = min(namlen, DTLHDRDATALEN_LEGACY);
      }
      /*
       * internal page entry
       */
      else {
            ih = (struct idtentry *) & p->slot[si];
            si = ih->next;
            name = ih->name;
            namlen = ih->namlen;
            len = min(namlen, DTIHDRDATALEN);
      }

      /* compare with head/only segment */
      len = min(klen, len);
      for (i = 0; i < len; i++, kname++, name++) {
            /* only uppercase if case-insensitive support is on */
            if ((flag & JFS_OS2) == JFS_OS2)
                  x = UniToupper(le16_to_cpu(*name));
            else
                  x = le16_to_cpu(*name);
            if ((rc = *kname - x))
                  return rc;
      }

      klen -= len;
      namlen -= len;

      /* compare with additional segment(s) */
      while (klen > 0 && namlen > 0) {
            /* compare with next name segment */
            t = (struct dtslot *) & p->slot[si];
            len = min(namlen, DTSLOTDATALEN);
            len = min(klen, len);
            name = t->name;
            for (i = 0; i < len; i++, kname++, name++) {
                  /* only uppercase if case-insensitive support is on */
                  if ((flag & JFS_OS2) == JFS_OS2)
                        x = UniToupper(le16_to_cpu(*name));
                  else
                        x = le16_to_cpu(*name);

                  if ((rc = *kname - x))
                        return rc;
            }

            klen -= len;
            namlen -= len;
            si = t->next;
      }

      return (klen - namlen);
}


/*
 *    ciGetLeafPrefixKey()
 *
 * function: compute prefix of suffix compression
 *         from two adjacent leaf entries
 *         across page boundary
 *
 * return: non-zero on error
 *
 */
static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
                         int ri, struct component_name * key, int flag)
{
      int klen, namlen;
      wchar_t *pl, *pr, *kname;
      struct component_name lkey;
      struct component_name rkey;

      lkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
                              GFP_KERNEL);
      if (lkey.name == NULL)
            return -ENOMEM;

      rkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
                              GFP_KERNEL);
      if (rkey.name == NULL) {
            kfree(lkey.name);
            return -ENOMEM;
      }

      /* get left and right key */
      dtGetKey(lp, li, &lkey, flag);
      lkey.name[lkey.namlen] = 0;

      if ((flag & JFS_OS2) == JFS_OS2)
            ciToUpper(&lkey);

      dtGetKey(rp, ri, &rkey, flag);
      rkey.name[rkey.namlen] = 0;


      if ((flag & JFS_OS2) == JFS_OS2)
            ciToUpper(&rkey);

      /* compute prefix */
      klen = 0;
      kname = key->name;
      namlen = min(lkey.namlen, rkey.namlen);
      for (pl = lkey.name, pr = rkey.name;
           namlen; pl++, pr++, namlen--, klen++, kname++) {
            *kname = *pr;
            if (*pl != *pr) {
                  key->namlen = klen + 1;
                  goto free_names;
            }
      }

      /* l->namlen <= r->namlen since l <= r */
      if (lkey.namlen < rkey.namlen) {
            *kname = *pr;
            key->namlen = klen + 1;
      } else                  /* l->namelen == r->namelen */
            key->namlen = klen;

free_names:
      kfree(lkey.name);
      kfree(rkey.name);
      return 0;
}



/*
 *    dtGetKey()
 *
 * function: get key of the entry
 */
static void dtGetKey(dtpage_t * p, int i, /* entry index */
                 struct component_name * key, int flag)
{
      int si;
      s8 *stbl;
      struct ldtentry *lh;
      struct idtentry *ih;
      struct dtslot *t;
      int namlen, len;
      wchar_t *kname;
      __le16 *name;

      /* get entry */
      stbl = DT_GETSTBL(p);
      si = stbl[i];
      if (p->header.flag & BT_LEAF) {
            lh = (struct ldtentry *) & p->slot[si];
            si = lh->next;
            namlen = lh->namlen;
            name = lh->name;
            if (flag & JFS_DIR_INDEX)
                  len = min(namlen, DTLHDRDATALEN);
            else
                  len = min(namlen, DTLHDRDATALEN_LEGACY);
      } else {
            ih = (struct idtentry *) & p->slot[si];
            si = ih->next;
            namlen = ih->namlen;
            name = ih->name;
            len = min(namlen, DTIHDRDATALEN);
      }

      key->namlen = namlen;
      kname = key->name;

      /*
       * move head/only segment
       */
      UniStrncpy_from_le(kname, name, len);

      /*
       * move additional segment(s)
       */
      while (si >= 0) {
            /* get next segment */
            t = &p->slot[si];
            kname += len;
            namlen -= len;
            len = min(namlen, DTSLOTDATALEN);
            UniStrncpy_from_le(kname, t->name, len);

            si = t->next;
      }
}


/*
 *    dtInsertEntry()
 *
 * function: allocate free slot(s) and
 *         write a leaf/internal entry
 *
 * return: entry slot index
 */
static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key,
                    ddata_t * data, struct dt_lock ** dtlock)
{
      struct dtslot *h, *t;
      struct ldtentry *lh = NULL;
      struct idtentry *ih = NULL;
      int hsi, fsi, klen, len, nextindex;
      wchar_t *kname;
      __le16 *name;
      s8 *stbl;
      pxd_t *xd;
      struct dt_lock *dtlck = *dtlock;
      struct lv *lv;
      int xsi, n;
      s64 bn = 0;
      struct metapage *mp = NULL;

      klen = key->namlen;
      kname = key->name;

      /* allocate a free slot */
      hsi = fsi = p->header.freelist;
      h = &p->slot[fsi];
      p->header.freelist = h->next;
      --p->header.freecnt;

      /* open new linelock */
      if (dtlck->index >= dtlck->maxcnt)
            dtlck = (struct dt_lock *) txLinelock(dtlck);

      lv = & dtlck->lv[dtlck->index];
      lv->offset = hsi;

      /* write head/only segment */
      if (p->header.flag & BT_LEAF) {
            lh = (struct ldtentry *) h;
            lh->next = h->next;
            lh->inumber = cpu_to_le32(data->leaf.ino);
            lh->namlen = klen;
            name = lh->name;
            if (data->leaf.ip) {
                  len = min(klen, DTLHDRDATALEN);
                  if (!(p->header.flag & BT_ROOT))
                        bn = addressPXD(&p->header.self);
                  lh->index = cpu_to_le32(add_index(data->leaf.tid,
                                            data->leaf.ip,
                                            bn, index));
            } else
                  len = min(klen, DTLHDRDATALEN_LEGACY);
      } else {
            ih = (struct idtentry *) h;
            ih->next = h->next;
            xd = (pxd_t *) ih;
            *xd = data->xd;
            ih->namlen = klen;
            name = ih->name;
            len = min(klen, DTIHDRDATALEN);
      }

      UniStrncpy_to_le(name, kname, len);

      n = 1;
      xsi = hsi;

      /* write additional segment(s) */
      t = h;
      klen -= len;
      while (klen) {
            /* get free slot */
            fsi = p->header.freelist;
            t = &p->slot[fsi];
            p->header.freelist = t->next;
            --p->header.freecnt;

            /* is next slot contiguous ? */
            if (fsi != xsi + 1) {
                  /* close current linelock */
                  lv->length = n;
                  dtlck->index++;

                  /* open new linelock */
                  if (dtlck->index < dtlck->maxcnt)
                        lv++;
                  else {
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
                        lv = & dtlck->lv[0];
                  }

                  lv->offset = fsi;
                  n = 0;
            }

            kname += len;
            len = min(klen, DTSLOTDATALEN);
            UniStrncpy_to_le(t->name, kname, len);

            n++;
            xsi = fsi;
            klen -= len;
      }

      /* close current linelock */
      lv->length = n;
      dtlck->index++;

      *dtlock = dtlck;

      /* terminate last/only segment */
      if (h == t) {
            /* single segment entry */
            if (p->header.flag & BT_LEAF)
                  lh->next = -1;
            else
                  ih->next = -1;
      } else
            /* multi-segment entry */
            t->next = -1;

      /* if insert into middle, shift right succeeding entries in stbl */
      stbl = DT_GETSTBL(p);
      nextindex = p->header.nextindex;
      if (index < nextindex) {
            memmove(stbl + index + 1, stbl + index, nextindex - index);

            if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
                  s64 lblock;

                  /*
                   * Need to update slot number for entries that moved
                   * in the stbl
                   */
                  mp = NULL;
                  for (n = index + 1; n <= nextindex; n++) {
                        lh = (struct ldtentry *) & (p->slot[stbl[n]]);
                        modify_index(data->leaf.tid, data->leaf.ip,
                                   le32_to_cpu(lh->index), bn, n,
                                   &mp, &lblock);
                  }
                  if (mp)
                        release_metapage(mp);
            }
      }

      stbl[index] = hsi;

      /* advance next available entry index of stbl */
      ++p->header.nextindex;
}


/*
 *    dtMoveEntry()
 *
 * function: move entries from split/left page to new/right page
 *
 *    nextindex of dst page and freelist/freecnt of both pages
 *    are updated.
 */
static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
                  struct dt_lock ** sdtlock, struct dt_lock ** ddtlock,
                  int do_index)
{
      int ssi, next;          /* src slot index */
      int di;                 /* dst entry index */
      int dsi;          /* dst slot index */
      s8 *sstbl, *dstbl;      /* sorted entry table */
      int snamlen, len;
      struct ldtentry *slh, *dlh = NULL;
      struct idtentry *sih, *dih = NULL;
      struct dtslot *h, *s, *d;
      struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock;
      struct lv *slv, *dlv;
      int xssi, ns, nd;
      int sfsi;

      sstbl = (s8 *) & sp->slot[sp->header.stblindex];
      dstbl = (s8 *) & dp->slot[dp->header.stblindex];

      dsi = dp->header.freelist;    /* first (whole page) free slot */
      sfsi = sp->header.freelist;

      /* linelock destination entry slot */
      dlv = & ddtlck->lv[ddtlck->index];
      dlv->offset = dsi;

      /* linelock source entry slot */
      slv = & sdtlck->lv[sdtlck->index];
      slv->offset = sstbl[si];
      xssi = slv->offset - 1;

      /*
       * move entries
       */
      ns = nd = 0;
      for (di = 0; si < sp->header.nextindex; si++, di++) {
            ssi = sstbl[si];
            dstbl[di] = dsi;

            /* is next slot contiguous ? */
            if (ssi != xssi + 1) {
                  /* close current linelock */
                  slv->length = ns;
                  sdtlck->index++;

                  /* open new linelock */
                  if (sdtlck->index < sdtlck->maxcnt)
                        slv++;
                  else {
                        sdtlck = (struct dt_lock *) txLinelock(sdtlck);
                        slv = & sdtlck->lv[0];
                  }

                  slv->offset = ssi;
                  ns = 0;
            }

            /*
             * move head/only segment of an entry
             */
            /* get dst slot */
            h = d = &dp->slot[dsi];

            /* get src slot and move */
            s = &sp->slot[ssi];
            if (sp->header.flag & BT_LEAF) {
                  /* get source entry */
                  slh = (struct ldtentry *) s;
                  dlh = (struct ldtentry *) h;
                  snamlen = slh->namlen;

                  if (do_index) {
                        len = min(snamlen, DTLHDRDATALEN);
                        dlh->index = slh->index; /* little-endian */
                  } else
                        len = min(snamlen, DTLHDRDATALEN_LEGACY);

                  memcpy(dlh, slh, 6 + len * 2);

                  next = slh->next;

                  /* update dst head/only segment next field */
                  dsi++;
                  dlh->next = dsi;
            } else {
                  sih = (struct idtentry *) s;
                  snamlen = sih->namlen;

                  len = min(snamlen, DTIHDRDATALEN);
                  dih = (struct idtentry *) h;
                  memcpy(dih, sih, 10 + len * 2);
                  next = sih->next;

                  dsi++;
                  dih->next = dsi;
            }

            /* free src head/only segment */
            s->next = sfsi;
            s->cnt = 1;
            sfsi = ssi;

            ns++;
            nd++;
            xssi = ssi;

            /*
             * move additional segment(s) of the entry
             */
            snamlen -= len;
            while ((ssi = next) >= 0) {
                  /* is next slot contiguous ? */
                  if (ssi != xssi + 1) {
                        /* close current linelock */
                        slv->length = ns;
                        sdtlck->index++;

                        /* open new linelock */
                        if (sdtlck->index < sdtlck->maxcnt)
                              slv++;
                        else {
                              sdtlck =
                                  (struct dt_lock *)
                                  txLinelock(sdtlck);
                              slv = & sdtlck->lv[0];
                        }

                        slv->offset = ssi;
                        ns = 0;
                  }

                  /* get next source segment */
                  s = &sp->slot[ssi];

                  /* get next destination free slot */
                  d++;

                  len = min(snamlen, DTSLOTDATALEN);
                  UniStrncpy_le(d->name, s->name, len);

                  ns++;
                  nd++;
                  xssi = ssi;

                  dsi++;
                  d->next = dsi;

                  /* free source segment */
                  next = s->next;
                  s->next = sfsi;
                  s->cnt = 1;
                  sfsi = ssi;

                  snamlen -= len;
            }           /* end while */

            /* terminate dst last/only segment */
            if (h == d) {
                  /* single segment entry */
                  if (dp->header.flag & BT_LEAF)
                        dlh->next = -1;
                  else
                        dih->next = -1;
            } else
                  /* multi-segment entry */
                  d->next = -1;
      }                 /* end for */

      /* close current linelock */
      slv->length = ns;
      sdtlck->index++;
      *sdtlock = sdtlck;

      dlv->length = nd;
      ddtlck->index++;
      *ddtlock = ddtlck;

      /* update source header */
      sp->header.freelist = sfsi;
      sp->header.freecnt += nd;

      /* update destination header */
      dp->header.nextindex = di;

      dp->header.freelist = dsi;
      dp->header.freecnt -= nd;
}


/*
 *    dtDeleteEntry()
 *
 * function: free a (leaf/internal) entry
 *
 * log freelist header, stbl, and each segment slot of entry
 * (even though last/only segment next field is modified,
 * physical image logging requires all segment slots of
 * the entry logged to avoid applying previous updates
 * to the same slots)
 */
static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock)
{
      int fsi;          /* free entry slot index */
      s8 *stbl;
      struct dtslot *t;
      int si, freecnt;
      struct dt_lock *dtlck = *dtlock;
      struct lv *lv;
      int xsi, n;

      /* get free entry slot index */
      stbl = DT_GETSTBL(p);
      fsi = stbl[fi];

      /* open new linelock */
      if (dtlck->index >= dtlck->maxcnt)
            dtlck = (struct dt_lock *) txLinelock(dtlck);
      lv = & dtlck->lv[dtlck->index];

      lv->offset = fsi;

      /* get the head/only segment */
      t = &p->slot[fsi];
      if (p->header.flag & BT_LEAF)
            si = ((struct ldtentry *) t)->next;
      else
            si = ((struct idtentry *) t)->next;
      t->next = si;
      t->cnt = 1;

      n = freecnt = 1;
      xsi = fsi;

      /* find the last/only segment */
      while (si >= 0) {
            /* is next slot contiguous ? */
            if (si != xsi + 1) {
                  /* close current linelock */
                  lv->length = n;
                  dtlck->index++;

                  /* open new linelock */
                  if (dtlck->index < dtlck->maxcnt)
                        lv++;
                  else {
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
                        lv = & dtlck->lv[0];
                  }

                  lv->offset = si;
                  n = 0;
            }

            n++;
            xsi = si;
            freecnt++;

            t = &p->slot[si];
            t->cnt = 1;
            si = t->next;
      }

      /* close current linelock */
      lv->length = n;
      dtlck->index++;

      *dtlock = dtlck;

      /* update freelist */
      t->next = p->header.freelist;
      p->header.freelist = fsi;
      p->header.freecnt += freecnt;

      /* if delete from middle,
       * shift left the succedding entries in the stbl
       */
      si = p->header.nextindex;
      if (fi < si - 1)
            memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);

      p->header.nextindex--;
}


/*
 *    dtTruncateEntry()
 *
 * function: truncate a (leaf/internal) entry
 *
 * log freelist header, stbl, and each segment slot of entry
 * (even though last/only segment next field is modified,
 * physical image logging requires all segment slots of
 * the entry logged to avoid applying previous updates
 * to the same slots)
 */
static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock)
{
      int tsi;          /* truncate entry slot index */
      s8 *stbl;
      struct dtslot *t;
      int si, freecnt;
      struct dt_lock *dtlck = *dtlock;
      struct lv *lv;
      int fsi, xsi, n;

      /* get free entry slot index */
      stbl = DT_GETSTBL(p);
      tsi = stbl[ti];

      /* open new linelock */
      if (dtlck->index >= dtlck->maxcnt)
            dtlck = (struct dt_lock *) txLinelock(dtlck);
      lv = & dtlck->lv[dtlck->index];

      lv->offset = tsi;

      /* get the head/only segment */
      t = &p->slot[tsi];
      ASSERT(p->header.flag & BT_INTERNAL);
      ((struct idtentry *) t)->namlen = 0;
      si = ((struct idtentry *) t)->next;
      ((struct idtentry *) t)->next = -1;

      n = 1;
      freecnt = 0;
      fsi = si;
      xsi = tsi;

      /* find the last/only segment */
      while (si >= 0) {
            /* is next slot contiguous ? */
            if (si != xsi + 1) {
                  /* close current linelock */
                  lv->length = n;
                  dtlck->index++;

                  /* open new linelock */
                  if (dtlck->index < dtlck->maxcnt)
                        lv++;
                  else {
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
                        lv = & dtlck->lv[0];
                  }

                  lv->offset = si;
                  n = 0;
            }

            n++;
            xsi = si;
            freecnt++;

            t = &p->slot[si];
            t->cnt = 1;
            si = t->next;
      }

      /* close current linelock */
      lv->length = n;
      dtlck->index++;

      *dtlock = dtlck;

      /* update freelist */
      if (freecnt == 0)
            return;
      t->next = p->header.freelist;
      p->header.freelist = fsi;
      p->header.freecnt += freecnt;
}


/*
 *    dtLinelockFreelist()
 */
static void dtLinelockFreelist(dtpage_t * p,    /* directory page */
                         int m,     /* max slot index */
                         struct dt_lock ** dtlock)
{
      int fsi;          /* free entry slot index */
      struct dtslot *t;
      int si;
      struct dt_lock *dtlck = *dtlock;
      struct lv *lv;
      int xsi, n;

      /* get free entry slot index */
      fsi = p->header.freelist;

      /* open new linelock */
      if (dtlck->index >= dtlck->maxcnt)
            dtlck = (struct dt_lock *) txLinelock(dtlck);
      lv = & dtlck->lv[dtlck->index];

      lv->offset = fsi;

      n = 1;
      xsi = fsi;

      t = &p->slot[fsi];
      si = t->next;

      /* find the last/only segment */
      while (si < m && si >= 0) {
            /* is next slot contiguous ? */
            if (si != xsi + 1) {
                  /* close current linelock */
                  lv->length = n;
                  dtlck->index++;

                  /* open new linelock */
                  if (dtlck->index < dtlck->maxcnt)
                        lv++;
                  else {
                        dtlck = (struct dt_lock *) txLinelock(dtlck);
                        lv = & dtlck->lv[0];
                  }

                  lv->offset = si;
                  n = 0;
            }

            n++;
            xsi = si;

            t = &p->slot[si];
            si = t->next;
      }

      /* close current linelock */
      lv->length = n;
      dtlck->index++;

      *dtlock = dtlck;
}


/*
 * NAME: dtModify
 *
 * FUNCTION: Modify the inode number part of a directory entry
 *
 * PARAMETERS:
 *    tid   - Transaction id
 *    ip    - Inode of parent directory
 *    key   - Name of entry to be modified
 *    orig_ino    - Original inode number expected in entry
 *    new_ino     - New inode number to put into entry
 *    flag  - JFS_RENAME
 *
 * RETURNS:
 *    -ESTALE     - If entry found does not match orig_ino passed in
 *    -ENOENT     - If no entry can be found to match key
 *    0     - If successfully modified entry
 */
int dtModify(tid_t tid, struct inode *ip,
       struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag)
{
      int rc;
      s64 bn;
      struct metapage *mp;
      dtpage_t *p;
      int index;
      struct btstack btstack;
      struct tlock *tlck;
      struct dt_lock *dtlck;
      struct lv *lv;
      s8 *stbl;
      int entry_si;           /* entry slot index */
      struct ldtentry *entry;

      /*
       *    search for the entry to modify:
       *
       * dtSearch() returns (leaf page pinned, index at which to modify).
       */
      if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
            return rc;

      /* retrieve search result */
      DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);

      BT_MARK_DIRTY(mp, ip);
      /*
       * acquire a transaction lock on the leaf page of named entry
       */
      tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
      dtlck = (struct dt_lock *) & tlck->lock;

      /* get slot index of the entry */
      stbl = DT_GETSTBL(p);
      entry_si = stbl[index];

      /* linelock entry */
      ASSERT(dtlck->index == 0);
      lv = & dtlck->lv[0];
      lv->offset = entry_si;
      lv->length = 1;
      dtlck->index++;

      /* get the head/only segment */
      entry = (struct ldtentry *) & p->slot[entry_si];

      /* substitute the inode number of the entry */
      entry->inumber = cpu_to_le32(new_ino);

      /* unpin the leaf page */
      DT_PUTPAGE(mp);

      return 0;
}

Generated by  Doxygen 1.6.0   Back to index