#include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <string.h>
/* * this file is divided into sections: * stuff you'll probably want to place in a .h file... * implementation dependent * - you'll probably have to change something here * implementation independent * - types and function prototypes that typically go in a .h file * function prototypes * - prototypes for user functions * internals * - local functions * - user functions * main() */
/**************************** * implementation dependent * ****************************/ typedef long eAdrType; /* record address for external record */ typedef long bAdrType; /* record address for btree node */
#define CC_EQ 0 #define CC_GT 1 #define CC_LT -1
/* compare two keys and return: * CC_LT key1 < key2 * CC_GT key1 > key2 * CC_EQ key1 = key2 */ typedef int (*bCompType)(const void *key1, const void *key2);
/****************************** * implementation independent * ******************************/
/* statistics */ int maxHeight; /* maximum height attained */ int nNodesIns; /* number of nodes inserted */ int nNodesDel; /* number of nodes deleted */ int nKeysIns; /* number of keys inserted */ int nKeysDel; /* number of keys deleted */ int nDiskReads; /* number of disk reads */ int nDiskWrites; /* number of disk writes */
/* line number for last IO or memory error */ int bErrLineNo;
typedef enum {false, true} bool; typedef enum { bErrOk, bErrKeyNotFound, bErrDupKeys, bErrSectorSize, bErrFileNotOpen, bErrFileExists, bErrIO, bErrMemory } bErrType;
typedef void *bHandleType;
typedef struct { /* info for bOpen() */ char *iName; /* name of index file */ int keySize; /* length, in bytes, of key */ bool dupKeys; /* true if duplicate keys allowed */ int sectorSize; /* size of sector on disk */ bCompType comp; /* pointer to compare function */ } bOpenType;
/*********************** * function prototypes * ***********************/ bErrType bOpen(bOpenType info, bHandleType *handle); /* * input: * info info for open * output: * handle handle to btree, used in subsequent calls * returns: * bErrOk open was successful * bErrMemory insufficient memory * bErrSectorSize sector size too small or not 0 mod 4 * bErrFileNotOpen unable to open index file */
bErrType bClose(bHandleType handle); /* * input: * handle handle returned by bOpen * returns: * bErrOk file closed, resources deleted */
bErrType bInsertKey(bHandleType handle, void *key, eAdrType rec); /* * input: * handle handle returned by bOpen * key key to insert * rec record address * returns: * bErrOk operation successful * bErrDupKeys duplicate keys (and info.dupKeys = false) * notes: * If dupKeys is false, then all records inserted must have a * unique key. If dupkeys is true, then duplicate keys are * allowed, but they must all have unique record addresses. * In this case, record addresses are included in internal * nodes to generate a "unique" key. */
bErrType bDeleteKey(bHandleType handle, void *key, eAdrType *rec); /* * input: * handle handle returned by bOpen * key key to delete * rec record address of key to delete * output: * rec record address deleted * returns: * bErrOk operation successful * bErrKeyNotFound key not found * notes: * If dupKeys is false, all keys are unique, and rec is not used * to determine which key to delete. If dupKeys is true, then * rec is used to determine which key to delete. */
bErrType bFindKey(bHandleType handle, void *key, eAdrType *rec); /* * input: * handle handle returned by bOpen * key key to find * output: * rec record address * returns: * bErrOk operation successful * bErrKeyNotFound key not found */
bErrType bFindFirstKey(bHandleType handle, void *key, eAdrType *rec); /* * input: * handle handle returned by bOpen * output: * key first key in sequential set * rec record address * returns: * bErrOk operation successful * bErrKeyNotFound key not found */
bErrType bFindLastKey(bHandleType handle, void *key, eAdrType *rec); /* * input: * handle handle returned by bOpen * output: * key last key in sequential set * rec record address * returns: * bErrOk operation successful * bErrKeyNotFound key not found */
bErrType bFindNextKey(bHandleType handle, void *key, eAdrType *rec); /* * input: * handle handle returned by bOpen * output: * key key found * rec record address * returns: * bErrOk operation successful * bErrKeyNotFound key not found */
bErrType bFindPrevKey(bHandleType handle, void *key, eAdrType *rec); /* * input: * handle handle returned by bOpen * output: * key key found * rec record address * returns: * bErrOk operation successful * bErrKeyNotFound key not found */
/************* * internals * *************/
/* * algorithm: * A B+tree implementation, with keys stored in internal nodes, * and keys/record addresses stored in leaf nodes. Each node is * one sector in length, except the root node whose length is * 3 sectors. When traversing the tree to insert a key, full * children are adjusted to make room for possible new entries. * Similarly, on deletion, half-full nodes are adjusted to allow for * possible deleted entries. Adjustments are first done by * examining 2 nearest neighbors at the same level, and redistibuting * the keys if possible. If redistribution won't solve the problem, * nodes are split/joined as needed. Typically, a node is 3/4 full. * On insertion, if 3 nodes are full, they are split into 4 nodes, * each 3/4 full. On deletion, if 3 nodes are 1/2 full, they are * joined to create 2 nodes 3/4 full. * * A LRR (least-recently-read) buffering scheme for nodes is used to * simplify storage management, and, assuming some locality of reference, * improve performance. * * To simplify matters, both internal nodes and leafs contain the * same fields. * */
/* macros for addressing fields */
/* primitives */ #define bAdr(p) *(bAdrType *)(p) #define eAdr(p) *(eAdrType *)(p)
/* based on k = &[key,rec,childGE] */ #define childLT(k) bAdr((char *)k - sizeof(bAdrType)) #define key(k) (k) #define rec(k) eAdr((char *)(k) + h->keySize) #define childGE(k) bAdr((char *)(k) + h->keySize + sizeof(eAdrType))
/* based on b = &bufType */ #define leaf(b) b->p->leaf #define ct(b) b->p->ct #define next(b) b->p->next #define prev(b) b->p->prev #define fkey(b) &b->p->fkey #define lkey(b) (fkey(b) + ks((ct(b) - 1))) #define p(b) (char *)(b->p)
/* shortcuts */ #define ks(ct) ((ct) * h->ks)
typedef char keyType; /* keys entries are treated as char arrays */
typedef struct { unsigned int leaf:1; /* first bit = 1 if leaf */ unsigned int ct:15; /* count of keys present */ bAdrType prev; /* prev node in sequence (leaf) */ bAdrType next; /* next node in sequence (leaf) */ bAdrType childLT; /* child LT first key */ /* ct occurrences of [key,rec,childGE] */ keyType fkey; /* first occurrence */ } nodeType;
typedef struct bufTypeTag { /* location of node */ struct bufTypeTag *next; /* next */ struct bufTypeTag *prev; /* previous */ bAdrType adr; /* on disk */ nodeType *p; /* in memory */ bool valid; /* true if buffer contents valid */ bool modified; /* true if buffer modified */ } bufType;
/* one node for each open handle */ typedef struct hNodeTag { struct hNodeTag *prev; /* previous node */ struct hNodeTag *next; /* next node */ FILE *fp; /* idx file */ int keySize; /* key length */ bool dupKeys; /* true if duplicate keys */ int sectorSize; /* block size for idx records */ bCompType comp; /* pointer to compare routine */ bufType root; /* root of b-tree, room for 3 sets */ bufType bufList; /* head of buf list */ void *malloc1; /* malloc'd resources */ void *malloc2; /* malloc'd resources */ bufType gbuf; /* gather buffer, room for 3 sets */ bufType *curBuf; /* current location */ keyType *curKey; /* current key in current node */ unsigned int maxCt; /* minimum # keys in node */ int ks; /* sizeof key entry */ bAdrType nextFreeAdr; /* next free b-tree record address */ } hNode;
static hNode hList; /* list of hNodes */ static hNode *h; /* current hNode */
#define error(rc) lineError(__LINE__, rc)
static bErrType lineError(int lineno, bErrType rc) { if (rc == bErrIO || rc == bErrMemory) if (!bErrLineNo) bErrLineNo = lineno; return rc; }
static bAdrType allocAdr(void) { bAdrType adr; adr = h->nextFreeAdr; h->nextFreeAdr += h->sectorSize; return adr; }
static bErrType flush(bufType *buf) { int len; /* number of bytes to write */
/* flush buffer to disk */ len = h->sectorSize; if (buf->adr == 0) len *= 3; /* root */ if (fseek(h->fp, buf->adr, SEEK_SET)) return error(bErrIO); if (fwrite(buf->p, len, 1, h->fp) != 1) return error(bErrIO); buf->modified = false; nDiskWrites++; return bErrOk; }
static bErrType flushAll(void) { bErrType rc; /* return code */ bufType *buf; /* buffer */
if (h->root.modified) if ((rc = flush(&h->root)) != 0) return rc;
buf = h->bufList.next; while (buf != &h->bufList) { if (buf->modified) if ((rc = flush(buf)) != 0) return rc; buf = buf->next; } }
static bErrType assignBuf(bAdrType adr, bufType **b) { /* assign buf to adr */ bufType *buf; /* buffer */ bErrType rc; /* return code */
if (adr == 0) { *b = &h->root; return bErrOk; }
/* search for buf with matching adr */ buf = h->bufList.next; while (buf->next != &h->bufList) { if (buf->valid && buf->adr == adr) break; buf = buf->next; }
/* either buf points to a match, or it's last one in list (LRR) */ if (buf->valid) { if (buf->adr != adr) { if (buf->modified) { if ((rc = flush(buf)) != 0) return rc; } buf->adr = adr; buf->valid = false; } } else { buf->adr = adr; }
/* remove from current position and place at front of list */ buf->next->prev = buf->prev; buf->prev->next = buf->next; buf->next = h->bufList.next; buf->prev = &h->bufList; buf->next->prev = buf; buf->prev->next = buf; *b = buf; return bErrOk; }
static bErrType writeDisk(bufType *buf) { /* write buf to disk */ buf->valid = true; buf->modified = true; return bErrOk; }
static bErrType readDisk(bAdrType adr, bufType **b) { /* read data into buf */ int len; bufType *buf; /* buffer */ bErrType rc; /* return code */
if ((rc = assignBuf(adr, &buf)) != 0) return rc; if (!buf->valid) { len = h->sectorSize; if (adr == 0) len *= 3; /* root */ if (fseek(h->fp, adr, SEEK_SET)) return error(bErrIO); if (fread(buf->p, len, 1, h->fp) != 1) return error(bErrIO); buf->modified = false; buf->valid = true; nDiskReads++; } *b = buf; return bErrOk; }
typedef enum { MODE_FIRST, MODE_MATCH } modeEnum;
static int search( bufType *buf, void *key, eAdrType rec, keyType **mkey, modeEnum mode) { /* * input: * p pointer to node * key key to find * rec record address (dupkey only) * output: * k pointer to keyType info * returns: * CC_EQ key = mkey * CC_LT key < mkey * CC_GT key > mkey */ int cc; /* condition code */ int m; /* midpoint of search */ int lb; /* lower-bound of binary search */ int ub; /* upper-bound of binary search */ bool foundDup; /* true if found a duplicate key */
/* scan current node for key using binary search */ foundDup = false; lb = 0; ub = ct(buf) - 1; while (lb <= ub) { m = (lb + ub) / 2; *mkey = fkey(buf) + ks(m); cc = h->comp(key, key(*mkey)); if (cc < 0) /* key less than key[m] */ ub = m - 1; else if (cc > 0) /* key greater than key[m] */ lb = m + 1; else { /* keys match */ if (h->dupKeys) { switch (mode) { case MODE_FIRST: /* backtrack to first key */ ub = m - 1; foundDup = true; break; case MODE_MATCH: /* rec's must also match */ if (rec < rec(*mkey)) { ub = m - 1; cc = CC_LT; } else if (rec > rec(*mkey)) { lb = m + 1; cc = CC_GT; } else { return CC_EQ; } break; } } else { return cc; } } } if (ct(buf) == 0) { /* empty list */ *mkey = fkey(buf); return CC_LT; } if (h->dupKeys && (mode == MODE_FIRST) && foundDup) { /* next key is first key in set of duplicates */ *mkey += ks(1); return CC_EQ; } /* didn't find key */ return cc; }
static bErrType scatterRoot(void) { bufType *gbuf; bufType *root;
/* scatter gbuf to root */
root = &h->root; gbuf = &h->gbuf; memcpy(fkey(root), fkey(gbuf), ks(ct(gbuf))); childLT(fkey(root)) = childLT(fkey(gbuf)); ct(root) = ct(gbuf); leaf(root) = leaf(gbuf); return bErrOk; }
static bErrType scatter(bufType *pbuf, keyType *pkey, int is, bufType **tmp) { bufType *gbuf; /* gather buf */ keyType *gkey; /* gather buf key */ bErrType rc; /* return code */ int iu; /* number of tmp's used */ int k0Min; /* min #keys that can be mapped to tmp[0] */ int knMin; /* min #keys that can be mapped to tmp[1..3] */ int k0Max; /* max #keys that can be mapped to tmp[0] */ int knMax; /* max #keys that can be mapped to tmp[1..3] */ int sw; /* shift width */ int len; /* length of remainder of buf */ int base; /* base count distributed to tmps */ int extra; /* extra counts */ int ct; int i;
/* * input: * pbuf parent buffer of gathered keys * pkey where we insert a key if needed in parent * is number of supplied tmps * tmp array of tmp's to be used for scattering * output: * tmp array of tmp's used for scattering */
/* scatter gbuf to tmps, placing 3/4 max in each tmp */
gbuf = &h->gbuf; gkey = fkey(gbuf); ct = ct(gbuf);
/**************************************** * determine number of tmps to use (iu) * ****************************************/ iu = is;
/* determine limits */ if (leaf(gbuf)) { /* minus 1 to allow for insertion */ k0Max= h->maxCt - 1; knMax= h->maxCt - 1; /* plus 1 to allow for deletion */ k0Min= (h->maxCt / 2) + 1; knMin= (h->maxCt / 2) + 1; } else { /* can hold an extra gbuf key as it's translated to a LT pointer */ k0Max = h->maxCt - 1; knMax = h->maxCt; k0Min = (h->maxCt / 2) + 1; knMin = ((h->maxCt+1) / 2) + 1; }
/* calculate iu, number of tmps to use */ while(1) { if (iu == 0 || ct > (k0Max + (iu-1)*knMax)) { /* add a buffer */ if ((rc = assignBuf(allocAdr(), &tmp[iu])) != 0) return rc; /* update sequential links */ if (leaf(gbuf)) { /* adjust sequential links */ if (iu == 0) { /* no tmps supplied when splitting root for first time */ prev(tmp[0]) = 0; next(tmp[0]) = 0; } else { prev(tmp[iu]) = tmp[iu-1]->adr; next(tmp[iu]) = next(tmp[iu-1]); next(tmp[iu-1]) = tmp[iu]->adr; } } iu++; nNodesIns++; } else if (iu > 1 && ct < (k0Min + (iu-1)*knMin)) { /* del a buffer */ iu--; /* adjust sequential links */ if (leaf(gbuf) && tmp[iu-1]->adr) { next(tmp[iu-1]) = next(tmp[iu]); } next(tmp[iu-1]) = next(tmp[iu]); nNodesDel++; } else { break; } }
1 2
8 8 8
|