Связь и интернет Архив Программирование
   
Сделать стартовойСделать закладку            
   ПОИСК  
   
Главная / Алгоритмы / Структуры данных и хэширование / Словари для произвольных данных / Другие словари /
8  Perl
8  PHP
8  JavaScript
8  HTML
8  DHTML
8  XML
8  CSS
8  C / C++
8  Pascal и Delphi
8  Турбо Ассемблер
8  MySQL
8  CASE-технологии
8  Алгоритмы
8  Python
8  Обратная связь
8  Гостевая книга
Новости о мире


Б, Б+ и Б++ деревья - Программирование от RIN.RU
Б, Б+ и Б++ деревья





#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>


/*
* this file is divided into sections:
* stuff you'll probably want to place in a .h file...
* implementation dependent
* - you'll probably have to change something here
* implementation independent
* - types and function prototypes that typically go in a .h file
* function prototypes
* - prototypes for user functions
* internals
* - local functions
* - user functions
* main()
*/


/****************************
* implementation dependent *
****************************/


typedef long eAdrType; /* record address for external record */
typedef long bAdrType; /* record address for btree node */


#define CC_EQ 0
#define CC_GT 1
#define CC_LT -1


/* compare two keys and return:
* CC_LT key1 < key2
* CC_GT key1 > key2
* CC_EQ key1 = key2
*/
typedef int (*bCompType)(const void *key1, const void *key2);


/******************************
* implementation independent *
******************************/


/* statistics */
int maxHeight; /* maximum height attained */
int nNodesIns; /* number of nodes inserted */
int nNodesDel; /* number of nodes deleted */
int nKeysIns; /* number of keys inserted */
int nKeysDel; /* number of keys deleted */
int nDiskReads; /* number of disk reads */
int nDiskWrites; /* number of disk writes */


/* line number for last IO or memory error */
int bErrLineNo;


typedef enum {false, true} bool;
typedef enum {
bErrOk,
bErrKeyNotFound,
bErrDupKeys,
bErrSectorSize,
bErrFileNotOpen,
bErrFileExists,
bErrIO,
bErrMemory
} bErrType;


typedef void *bHandleType;


typedef struct { /* info for bOpen() */
char *iName; /* name of index file */
int keySize; /* length, in bytes, of key */
bool dupKeys; /* true if duplicate keys allowed */
int sectorSize; /* size of sector on disk */
bCompType comp; /* pointer to compare function */
} bOpenType;


/***********************
* function prototypes *
***********************/


bErrType bOpen(bOpenType info, bHandleType *handle);
/*
* input:
* info info for open
* output:
* handle handle to btree, used in subsequent calls
* returns:
* bErrOk open was successful
* bErrMemory insufficient memory
* bErrSectorSize sector size too small or not 0 mod 4
* bErrFileNotOpen unable to open index file
*/


bErrType bClose(bHandleType handle);
/*
* input:
* handle handle returned by bOpen
* returns:
* bErrOk file closed, resources deleted
*/


bErrType bInsertKey(bHandleType handle, void *key, eAdrType rec);
/*
* input:
* handle handle returned by bOpen
* key key to insert
* rec record address
* returns:
* bErrOk operation successful
* bErrDupKeys duplicate keys (and info.dupKeys = false)
* notes:
* If dupKeys is false, then all records inserted must have a
* unique key. If dupkeys is true, then duplicate keys are
* allowed, but they must all have unique record addresses.
* In this case, record addresses are included in internal
* nodes to generate a "unique" key.
*/


bErrType bDeleteKey(bHandleType handle, void *key, eAdrType *rec);
/*
* input:
* handle handle returned by bOpen
* key key to delete
* rec record address of key to delete
* output:
* rec record address deleted
* returns:
* bErrOk operation successful
* bErrKeyNotFound key not found
* notes:
* If dupKeys is false, all keys are unique, and rec is not used
* to determine which key to delete. If dupKeys is true, then
* rec is used to determine which key to delete.
*/


bErrType bFindKey(bHandleType handle, void *key, eAdrType *rec);
/*
* input:
* handle handle returned by bOpen
* key key to find
* output:
* rec record address
* returns:
* bErrOk operation successful
* bErrKeyNotFound key not found
*/


bErrType bFindFirstKey(bHandleType handle, void *key, eAdrType *rec);
/*
* input:
* handle handle returned by bOpen
* output:
* key first key in sequential set
* rec record address
* returns:
* bErrOk operation successful
* bErrKeyNotFound key not found
*/


bErrType bFindLastKey(bHandleType handle, void *key, eAdrType *rec);
/*
* input:
* handle handle returned by bOpen
* output:
* key last key in sequential set
* rec record address
* returns:
* bErrOk operation successful
* bErrKeyNotFound key not found
*/


bErrType bFindNextKey(bHandleType handle, void *key, eAdrType *rec);
/*
* input:
* handle handle returned by bOpen
* output:
* key key found
* rec record address
* returns:
* bErrOk operation successful
* bErrKeyNotFound key not found
*/


bErrType bFindPrevKey(bHandleType handle, void *key, eAdrType *rec);
/*
* input:
* handle handle returned by bOpen
* output:
* key key found
* rec record address
* returns:
* bErrOk operation successful
* bErrKeyNotFound key not found
*/


/*************
* internals *
*************/


/*
* algorithm:
* A B+tree implementation, with keys stored in internal nodes,
* and keys/record addresses stored in leaf nodes. Each node is
* one sector in length, except the root node whose length is
* 3 sectors. When traversing the tree to insert a key, full
* children are adjusted to make room for possible new entries.
* Similarly, on deletion, half-full nodes are adjusted to allow for
* possible deleted entries. Adjustments are first done by
* examining 2 nearest neighbors at the same level, and redistibuting
* the keys if possible. If redistribution won't solve the problem,
* nodes are split/joined as needed. Typically, a node is 3/4 full.
* On insertion, if 3 nodes are full, they are split into 4 nodes,
* each 3/4 full. On deletion, if 3 nodes are 1/2 full, they are
* joined to create 2 nodes 3/4 full.
*
* A LRR (least-recently-read) buffering scheme for nodes is used to
* simplify storage management, and, assuming some locality of reference,
* improve performance.
*
* To simplify matters, both internal nodes and leafs contain the
* same fields.
*
*/


/* macros for addressing fields */


/* primitives */
#define bAdr(p) *(bAdrType *)(p)
#define eAdr(p) *(eAdrType *)(p)


/* based on k = &[key,rec,childGE] */
#define childLT(k) bAdr((char *)k - sizeof(bAdrType))
#define key(k) (k)
#define rec(k) eAdr((char *)(k) + h->keySize)
#define childGE(k) bAdr((char *)(k) + h->keySize + sizeof(eAdrType))


/* based on b = &bufType */
#define leaf(b) b->p->leaf
#define ct(b) b->p->ct
#define next(b) b->p->next
#define prev(b) b->p->prev
#define fkey(b) &b->p->fkey
#define lkey(b) (fkey(b) + ks((ct(b) - 1)))
#define p(b) (char *)(b->p)


/* shortcuts */
#define ks(ct) ((ct) * h->ks)


typedef char keyType; /* keys entries are treated as char arrays */


typedef struct {
unsigned int leaf:1; /* first bit = 1 if leaf */
unsigned int ct:15; /* count of keys present */
bAdrType prev; /* prev node in sequence (leaf) */
bAdrType next; /* next node in sequence (leaf) */
bAdrType childLT; /* child LT first key */
/* ct occurrences of [key,rec,childGE] */
keyType fkey; /* first occurrence */
} nodeType;


typedef struct bufTypeTag { /* location of node */
struct bufTypeTag *next; /* next */
struct bufTypeTag *prev; /* previous */
bAdrType adr; /* on disk */
nodeType *p; /* in memory */
bool valid; /* true if buffer contents valid */
bool modified; /* true if buffer modified */
} bufType;


/* one node for each open handle */
typedef struct hNodeTag {
struct hNodeTag *prev; /* previous node */
struct hNodeTag *next; /* next node */
FILE *fp; /* idx file */
int keySize; /* key length */
bool dupKeys; /* true if duplicate keys */
int sectorSize; /* block size for idx records */
bCompType comp; /* pointer to compare routine */
bufType root; /* root of b-tree, room for 3 sets */
bufType bufList; /* head of buf list */
void *malloc1; /* malloc'd resources */
void *malloc2; /* malloc'd resources */
bufType gbuf; /* gather buffer, room for 3 sets */
bufType *curBuf; /* current location */
keyType *curKey; /* current key in current node */
unsigned int maxCt; /* minimum # keys in node */
int ks; /* sizeof key entry */
bAdrType nextFreeAdr; /* next free b-tree record address */
} hNode;


static hNode hList; /* list of hNodes */
static hNode *h; /* current hNode */


#define error(rc) lineError(__LINE__, rc)


static bErrType lineError(int lineno, bErrType rc) {
if (rc == bErrIO || rc == bErrMemory)
if (!bErrLineNo)
bErrLineNo = lineno;
return rc;
}


static bAdrType allocAdr(void) {
bAdrType adr;
adr = h->nextFreeAdr;
h->nextFreeAdr += h->sectorSize;
return adr;
}


static bErrType flush(bufType *buf) {
int len; /* number of bytes to write */


/* flush buffer to disk */
len = h->sectorSize;
if (buf->adr == 0) len *= 3; /* root */
if (fseek(h->fp, buf->adr, SEEK_SET)) return error(bErrIO);
if (fwrite(buf->p, len, 1, h->fp) != 1) return error(bErrIO);
buf->modified = false;
nDiskWrites++;
return bErrOk;
}


static bErrType flushAll(void) {
bErrType rc; /* return code */
bufType *buf; /* buffer */


if (h->root.modified)
if ((rc = flush(&h->root)) != 0) return rc;


buf = h->bufList.next;
while (buf != &h->bufList) {
if (buf->modified)
if ((rc = flush(buf)) != 0) return rc;
buf = buf->next;
}
}


static bErrType assignBuf(bAdrType adr, bufType **b) {
/* assign buf to adr */
bufType *buf; /* buffer */
bErrType rc; /* return code */


if (adr == 0) {
*b = &h->root;
return bErrOk;
}


/* search for buf with matching adr */
buf = h->bufList.next;
while (buf->next != &h->bufList) {
if (buf->valid && buf->adr == adr) break;
buf = buf->next;
}


/* either buf points to a match, or it's last one in list (LRR) */
if (buf->valid) {
if (buf->adr != adr) {
if (buf->modified) {
if ((rc = flush(buf)) != 0) return rc;
}
buf->adr = adr;
buf->valid = false;
}
} else {
buf->adr = adr;
}


/* remove from current position and place at front of list */
buf->next->prev = buf->prev;
buf->prev->next = buf->next;
buf->next = h->bufList.next;
buf->prev = &h->bufList;
buf->next->prev = buf;
buf->prev->next = buf;
*b = buf;
return bErrOk;
}


static bErrType writeDisk(bufType *buf) {
/* write buf to disk */
buf->valid = true;
buf->modified = true;
return bErrOk;
}


static bErrType readDisk(bAdrType adr, bufType **b) {
/* read data into buf */
int len;
bufType *buf; /* buffer */
bErrType rc; /* return code */


if ((rc = assignBuf(adr, &buf)) != 0) return rc;
if (!buf->valid) {
len = h->sectorSize;
if (adr == 0) len *= 3; /* root */
if (fseek(h->fp, adr, SEEK_SET)) return error(bErrIO);
if (fread(buf->p, len, 1, h->fp) != 1) return error(bErrIO);
buf->modified = false;
buf->valid = true;
nDiskReads++;
}
*b = buf;
return bErrOk;
}


typedef enum { MODE_FIRST, MODE_MATCH } modeEnum;


static int search(
bufType *buf,
void *key,
eAdrType rec,
keyType **mkey,
modeEnum mode) {
/*
* input:
* p pointer to node
* key key to find
* rec record address (dupkey only)
* output:
* k pointer to keyType info
* returns:
* CC_EQ key = mkey
* CC_LT key < mkey
* CC_GT key > mkey
*/
int cc; /* condition code */
int m; /* midpoint of search */
int lb; /* lower-bound of binary search */
int ub; /* upper-bound of binary search */
bool foundDup; /* true if found a duplicate key */


/* scan current node for key using binary search */
foundDup = false;
lb = 0;
ub = ct(buf) - 1;
while (lb <= ub) {
m = (lb + ub) / 2;
*mkey = fkey(buf) + ks(m);
cc = h->comp(key, key(*mkey));
if (cc < 0)
/* key less than key[m] */
ub = m - 1;
else if (cc > 0)
/* key greater than key[m] */
lb = m + 1;
else {
/* keys match */
if (h->dupKeys) {
switch (mode) {
case MODE_FIRST:
/* backtrack to first key */
ub = m - 1;
foundDup = true;
break;
case MODE_MATCH:
/* rec's must also match */
if (rec < rec(*mkey)) {
ub = m - 1;
cc = CC_LT;
} else if (rec > rec(*mkey)) {
lb = m + 1;
cc = CC_GT;
} else {
return CC_EQ;
}
break;
}
} else {
return cc;
}
}
}
if (ct(buf) == 0) {
/* empty list */
*mkey = fkey(buf);
return CC_LT;
}
if (h->dupKeys && (mode == MODE_FIRST) && foundDup) {
/* next key is first key in set of duplicates */
*mkey += ks(1);
return CC_EQ;
}
/* didn't find key */
return cc;
}


static bErrType scatterRoot(void) {
bufType *gbuf;
bufType *root;


/* scatter gbuf to root */


root = &h->root;
gbuf = &h->gbuf;
memcpy(fkey(root), fkey(gbuf), ks(ct(gbuf)));
childLT(fkey(root)) = childLT(fkey(gbuf));
ct(root) = ct(gbuf);
leaf(root) = leaf(gbuf);
return bErrOk;
}


static bErrType scatter(bufType *pbuf, keyType *pkey, int is, bufType **tmp) {
bufType *gbuf; /* gather buf */
keyType *gkey; /* gather buf key */
bErrType rc; /* return code */
int iu; /* number of tmp's used */
int k0Min; /* min #keys that can be mapped to tmp[0] */
int knMin; /* min #keys that can be mapped to tmp[1..3] */
int k0Max; /* max #keys that can be mapped to tmp[0] */
int knMax; /* max #keys that can be mapped to tmp[1..3] */
int sw; /* shift width */
int len; /* length of remainder of buf */
int base; /* base count distributed to tmps */
int extra; /* extra counts */
int ct;
int i;


/*
* input:
* pbuf parent buffer of gathered keys
* pkey where we insert a key if needed in parent
* is number of supplied tmps
* tmp array of tmp's to be used for scattering
* output:
* tmp array of tmp's used for scattering
*/


/* scatter gbuf to tmps, placing 3/4 max in each tmp */


gbuf = &h->gbuf;
gkey = fkey(gbuf);
ct = ct(gbuf);


/****************************************
* determine number of tmps to use (iu) *
****************************************/


iu = is;


/* determine limits */
if (leaf(gbuf)) {
/* minus 1 to allow for insertion */
k0Max= h->maxCt - 1;
knMax= h->maxCt - 1;
/* plus 1 to allow for deletion */
k0Min= (h->maxCt / 2) + 1;
knMin= (h->maxCt / 2) + 1;
} else {
/* can hold an extra gbuf key as it's translated to a LT pointer */
k0Max = h->maxCt - 1;
knMax = h->maxCt;
k0Min = (h->maxCt / 2) + 1;
knMin = ((h->maxCt+1) / 2) + 1;
}


/* calculate iu, number of tmps to use */
while(1) {
if (iu == 0 || ct > (k0Max + (iu-1)*knMax)) {
/* add a buffer */
if ((rc = assignBuf(allocAdr(), &tmp[iu])) != 0)
return rc;
/* update sequential links */
if (leaf(gbuf)) {
/* adjust sequential links */
if (iu == 0) {
/* no tmps supplied when splitting root for first time */
prev(tmp[0]) = 0;
next(tmp[0]) = 0;
} else {
prev(tmp[iu]) = tmp[iu-1]->adr;
next(tmp[iu]) = next(tmp[iu-1]);
next(tmp[iu-1]) = tmp[iu]->adr;
}
}
iu++;
nNodesIns++;
} else if (iu > 1 && ct < (k0Min + (iu-1)*knMin)) {
/* del a buffer */
iu--;
/* adjust sequential links */
if (leaf(gbuf) && tmp[iu-1]->adr) {
next(tmp[iu-1]) = next(tmp[iu]);
}
next(tmp[iu-1]) = next(tmp[iu]);
nNodesDel++;
} else {
break;
}
}


<<<  Назад
 1  2 


 8  Комментарии к статье  8  Обсудить в форуме  8  Обсудить в чате

 
  
  
    Copyright ©  RIN 2003 - 2004      * Обратная связь