Algorithm 版 (精华区)
发信人: Lerry (想不开·撞树), 信区: Algorithm
标 题: btree.c
发信站: 哈工大紫丁香 (2002年06月09日21:19:12 星期天), 站内信件
/*
* NoseyParker, the search engine for FTP archives
* Copyright (C) 1993-96 by Jiri A. Randus
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* The author can be reached as follows:
* Internet: <Jiri.Randus@vslib.cz>
* Phone: ++42 48 5227374
* SnailMail: Jiri Randus
* KIN HF TU v Liberci
* Halkova 6
* 46117 Liberec
* Czech Republic
*/
#define BTREE_VERSION "1.04"
#include "parker.h"
#include <malloc.h>
#include <unistd.h>
#include <sys/mman.h>
#ifdef _POSIX_MAPPED_FILES
#define MMAP_VERSION "using a mmap()ed temporary file"
#define INITMMAP() do_mmap_init()
#define ALLOC(x) do_mmap_alloc(x)
#define FREE(x,y) do_mmap_free(x,y)
#define MMAP_PAGESIZE (8*1024)
#else
#define MMAP_VERSION "using a malloc()ed temporary storage"
#define INITMMAP()
#define ALLOC(x) malloc(x)
#define FREE(x,y) free(x)
#endif
struct TripleList **TriIndex;
char *BTreeMax;
struct BTreeItem *Bhead=NULL;
struct TripleList *Thead=NULL,*Tptr=NULL,*Tptr2=NULL;
char genbuf[MAX];
char *ptr;
long triplecount;
long nodecount;
long diskoffset;
#ifdef _POSIX_MAPPED_FILES
off_t mmap_offset;
int mmap_fd;
void *mmap_ptr;
size_t mmap_left;
void do_mmap_init(void)
{
if(*MMAPTMPFILE!='/') sprintf(genbuf,"%s/%s",PARKER_HOME,MMAPTMPFILE);
else sprintf(genbuf,"%s",MMAPTMPFILE);
unlink(genbuf);
if((mmap_fd=open(genbuf, O_RDWR|O_CREAT|O_TRUNC, 0644))==-1)
{
perror("Cannot open mmap a file for mmap()ed access");
exit(55);
}
unlink(genbuf);
mmap_offset=0;
mmap_left=0;
}
char dummy_mmap_block[MMAP_PAGESIZE];
void *do_mmap_alloc(size_t size)
{
void *ptr;
if(size>mmap_left)
{
if(write(mmap_fd, dummy_mmap_block, MMAP_PAGESIZE)<MMAP_PAGESIZE)
{
perror("Out of disk space in Btree");
exit(112);
}
mmap_ptr=mmap(NULL, MMAP_PAGESIZE, PROT_READ|PROT_WRITE, MAP_SHARED, mma
p_fd, mmap_offset);
if((long)mmap_ptr==-1) {perror("mmap");exit(111);}
mmap_offset+=MMAP_PAGESIZE;
mmap_left=MMAP_PAGESIZE;
}
ptr=mmap_ptr;
mmap_ptr+=size;
mmap_left-=size;
return(ptr);
}
int do_mmap_free(void *ptr, size_t size)
{
return(0);
}
#endif
void AnnounceMethod(void)
{
printf("Btree %s - creating indexes %s\n",BTREE_VERSION,MMAP_VERSION);
INITMMAP();
}
void addB(struct TripleList *Tptr, struct BTreeItem *item,
struct BTreeItem **prev,char **max)
{
int i;
if(!item) {
nodecount++;
*prev=item=ALLOC(sizeof(struct BTreeItem));
memset(item,0,sizeof(struct BTreeItem));
item->key=Tptr->key;
item->lastfileno=(BTcnts)(-1);
*max=item->key;
return;
}
if(!strncmp(Tptr->key,item->key,KEYSIZE)) return;
for(i=0;i<(B-1);i++)
{
if((!item->refs[i])||(strncmp(Tptr->key,item->tops[i],KEYSIZE)<=0))
{
if(strncmp(Tptr->key,*max,KEYSIZE)>0) *max=Tptr->key;
addB(Tptr, item->refs[i], &(item->refs[i]), &(item->tops[i]));
break;
}
}
if(i==(B-1)) /* Full node !! */
{
if(strncmp(Tptr->key,*max,KEYSIZE)>0) *max=Tptr->key;
addB(Tptr, item->refs[B-1], &(item->refs[B-1]), &(item->tops[B-1]));
}
}
/* hi is not a valid index !! */
void CreateBtree(long lo, long hi)
{
double dist,off;
if(lo>=hi) return;
dist=(double)(hi-lo)/B;
off=lo+dist;
while((long)off<hi)
{
addB(TriIndex[(long)off],Bhead,&Bhead,&BTreeMax);
off+=dist;
}
CreateBtree(lo,(long)(lo+dist));
off=lo+dist;
while(1)
{
CreateBtree((long)(off+1),(long)(off+dist));
off+=dist;
if((off+dist)>hi) break;
}
CreateBtree((long)(off+1),(long)hi);
}
void LoadTriples(void)
{
long i;
triplecount=0L;
while(1)
{
fgets(genbuf,MAX,stdin);
if(feof(stdin)) break;
genbuf[KEYSIZE]='\0';
if((ptr=strchr(genbuf,CR))!=NULL) *ptr='\0';
if((ptr=strchr(genbuf,LF))!=NULL) *ptr='\0';
Tptr2=malloc(sizeof(struct TripleList));
Tptr2->key=strdup(genbuf);
Tptr2->next=NULL;
if(!Tptr) Thead=Tptr=Tptr2;
else {Tptr->next=Tptr2; Tptr=Tptr2;}
triplecount++;
}
Tptr=Thead;
TriIndex=malloc(triplecount*sizeof(struct TripleList *));
for(i=0L;i<triplecount;i++)
{
TriIndex[i]=Tptr;
Tptr=Tptr->next;
}
}
struct BTreeItem *LocateBItem(char *key, struct BTreeItem *item)
{
int i;
if(!strcasecmp(key,item->key)) return(item);
for(i=0;i<(B-1);i++)
if(!item->refs[i]) return(NULL);
else
if(strcasecmp(key,item->tops[i])<=0) return(LocateBItem(key, item->refs[i]
));
if(!item->refs[B-1]) return(NULL);
else return(LocateBItem(key, item->refs[B-1]));
}
void AddRef(struct BTreeItem *MyTriple, union DiskTripleRef *ref)
{
struct BTreeInfo *info;
int i;
info=MyTriple->info;
if((!info)||(info->cnt==BTREENUMINFO))
{
info=ALLOC(sizeof(struct BTreeInfo));
info->next=MyTriple->info;
MyTriple->info=info;
for(i=0;i<BTREENUMINFO;i++)
{
info->cnt=0;
info->refs[i].offset=0L;
}
}
info->refs[info->cnt++].offset=ref->offset;
}
long NumRefs(struct BTreeInfo *ptr)
{
if(!ptr) return(0L);
return(ptr->cnt+NumRefs(ptr->next));
}
void IndexOffset(struct BTreeItem *Bptr, long *offset)
{
int i;
Bptr->diskoffset=*offset;
*offset+=sizeof(struct DiskBTreeHead) +
NumRefs(Bptr->info)*sizeof(union DiskTripleRef);
for(i=0;i<B;i++) if(Bptr->refs[i]) IndexOffset(Bptr->refs[i],offset);
}
void WriteRefs(struct BTreeInfo *ptr, FILE *idx)
{
if(!ptr) return;
WriteRefs(ptr->next, idx);
fwrite(ptr->refs, sizeof(union DiskTripleRef), ptr->cnt, idx);
FREE(ptr,sizeof(struct BTreeInfo));
return;
}
void SaveBItem(struct BTreeItem *Bptr, FILE *idx, struct DiskBTreeHead *head
)
{
int i;
if(ftell(idx)!=Bptr->diskoffset)
{
printf("ERROR !!! Offsets does not match for `%s' (%ld != %ld)\n",
Bptr->key,ftell(idx),Bptr->diskoffset);
exit(110);
}
memcpy(head->key,Bptr->key,KEYSIZE);
for(i=0;i<B;i++)
if(Bptr->tops[i]) {
memcpy(head->treerefs[i].top,Bptr->tops[i],KEYSIZE);
head->treerefs[i].offset=Bptr->refs[i]->diskoffset;
}
else memcpy(head->treerefs[i].top,"\0\0\0",KEYSIZE);
head->numrefs=NumRefs(Bptr->info);
fwrite(head,sizeof(struct DiskBTreeHead),1,idx);
WriteRefs(Bptr->info,idx);
for(i=0;i<B;i++) if(Bptr->refs[i]) SaveBItem(Bptr->refs[i], idx, head);
FREE(Bptr, sizeof(struct BTreeItem));
}
void SaveIndex(char *IndexName)
{
FILE *idx;
struct DiskBTreeHead head;
if(!(idx=fopen(IndexName,"w"))) {
perror("Cannot open SEEDBTREE");
exit(109);
}
SaveBItem(Bhead,idx,&head);
fclose(idx);
}
--
当一个女孩儿觉得她不太容易了解那个男人的时候,她会爱他。
※ 来源:·哈工大紫丁香 bbs.hit.edu.cn·[FROM: 天外飞仙]
Powered by KBS BBS 2.0 (http://dev.kcn.cn)
页面执行时间:3.982毫秒