Algorithm 版 (精华区)
发信人: Lerry (想不开·撞树), 信区: Algorithm
标 题: btreefiles.c
发信站: 哈工大紫丁香 (2002年06月09日21:20:07 星期天), 站内信件
/*
* NoseyParker, the search engine for FTP archives
* Copyright (C) 1993-96 by Jiri A. Randus
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* The author can be reached as follows:
* Internet: <Jiri.Randus@vslib.cz>
* Phone: ++42 48 5227374
* SnailMail: Jiri Randus
* KIN HF TU v Liberci
* Halkova 6
* 46117 Liberec
* Czech Republic
*/
/*
* Uses procedures in btree.c
*/
#define BTREEFILES_VERSION "1.00"
#include "parker.h"
char *sort[]={SORT, "+2", NULL};
void IndexData(int fd)
{
FILE *in;
char buffer[MAX];
char *start;
BTcnts fileno;
long offset;
char *ptr;
char triple[KEYSIZE+1];
struct BTreeItem *MyTriple;
union DiskTripleRef ref;
long triplecnt;
int j,len;
triplecnt=0L;
if(!(in=fdopen(fd,"r"))) {
perror("Cannot fdopen input");
return;
}
triple[KEYSIZE]='\0';
while(1)
{
fgets(buffer,MAX,in);
if(feof(in)) break;
sscanf(buffer,"%d %ld ", &j, &offset);
fileno=j;
if(!(start=strchr(buffer,' '))) continue;
if(!(start=strchr(start+1,' '))) continue;
start++;
for(ptr=start;*ptr;ptr++) if(*ptr==' ') {*ptr='\0';break;}
len=strlen(start)-KEYSIZE;
for(j=0;j<=len;j++)
{
strncpy(triple,&start[j],KEYSIZE);
MyTriple=LocateBItem(triple,Bhead);
if(!MyTriple) {
printf("Error: Btree does not contain `%s' as found in\n",triple);
printf("`%s'\n",start);
continue;
}
if((MyTriple->lastfileno==fileno)&&(MyTriple->lastoffset==offset))
continue;
if((MyTriple->lastfileno==fileno)&&(MyTriple->lastoffset==offset))
continue;
if(MyTriple->lastfileno!=fileno) {
ref.file.mark=BTREEENDMARK;
MyTriple->lastfileno=ref.file.fileno=fileno;
AddRef(MyTriple,&ref);
triplecnt++;
}
ref.offset=offset;
AddRef(MyTriple,&ref);
triplecnt++;
}
}
fclose(in);
printf("BtreeFiles generated %8ld triples\n",triplecnt);
}
void DumpAllData(int fd)
{
BTcnts fileno;
FILE *filedb;
FILE *input;
FILE *output;
char file[MAX];
char *ptr;
long offset;
fileno=0;
if(!(output=fdopen(fd,"w"))) {
perror("Cannot fdopen output");
exit(101);
}
if(!(filedb=fopen(SEEDFILEDB,"r"))) {
perror("Cannot open SEEDFILEDB");
exit(102);
}
while(1)
{
fgets(file,MAX,filedb);
if(feof(filedb)) break;
if((ptr=strchr(file,CR))!=NULL) *ptr='\0';
if((ptr=strchr(file,LF))!=NULL) *ptr='\0';
if((input=fopen(file,"r"))!=NULL)
{
while(1)
{
offset=ftell(input);
fgets(file,MAX,input);
if(feof(input)) break;
fprintf(output,"%d %ld %s",fileno,offset,file); /* it ends with CR/L
F */
}
fclose(input);
}
else {fprintf(stderr,"Cannot open %s\n",file);}
fileno++;
}
fclose(filedb);
}
int main(void)
{
long lo;
pid_t child;
int pipefd[2];
int pipefd2[2];
printf("This is NoseyParker %s, BtreeFiles Version %s\n",
VERSION, BTREEFILES_VERSION);
AnnounceMethod();
/* Load triples from stdin */
LoadTriples();
printf("Found %ld unique keys\n",triplecount);
/* Ok, create the tree */
CreateBtree(0L,triplecount);
for(lo=0L;lo<triplecount;lo++)
addB(TriIndex[lo],Bhead,&Bhead,&BTreeMax);
printf("Created %ld Btree items\n",nodecount);
/* Free the linked triple list as it is no longer needed */
Tptr=Thead;
while(Tptr) {Tptr2=Tptr->next; free(Tptr); Tptr=Tptr2;}
/* close the stdin, just to make sure */
fclose(stdin);
/* launch the database dumper and 'sort' */
fflush(stdout);
pipe(pipefd);
if(!(child=fork())) {
close(pipefd[0]);
DumpAllData(pipefd[1]);
exit(0);
}
close(pipefd[1]);
pipe(pipefd2);
if(!(child=fork())) {
dup2(pipefd[0],0);
dup2(pipefd2[1],1);
close(pipefd2[0]);
execve(SORTPATH,sort,NULL);
exit(106);
}
close(pipefd[0]);
close(pipefd2[1]);
/* Read the databases, index them with Btree... */
IndexData(pipefd2[0]);
/* ... count the disk offsets... */
diskoffset=0L;
printf("Counting the database size\n");
IndexOffset(Bhead,&diskoffset);
printf("Expected size of the index: %ld KB\n",
(long)(diskoffset/1024));
/* ... and save the index */
SaveIndex(SEEDBTREEFILES);
return(0);
}
--
当一个女孩儿觉得她不太容易了解那个男人的时候,她会爱他。
※ 来源:·哈工大紫丁香 bbs.hit.edu.cn·[FROM: 天外飞仙]
Powered by KBS BBS 2.0 (http://dev.kcn.cn)
页面执行时间:3.370毫秒