diff options
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc')
-rw-r--r-- | debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc | 1024 |
1 files changed, 1024 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc new file mode 100644 index 00000000..eb43af30 --- /dev/null +++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc @@ -0,0 +1,1024 @@ +// +// WordDBPage.cc +// +// WordDBPage: Implements specific compression scheme for +// Berkeley DB pages containing WordReferences objects. +// +// Part of the ht://Dig package <http://www.htdig.org/> +// Copyright (c) 1999-2004 The ht://Dig Group +// For copyright details, see the file COPYING in your distribution +// or the GNU Library General Public License (LGPL) version 2 or later +// <http://www.gnu.org/copyleft/lgpl.html> +// +// $Id: WordDBPage.cc,v 1.5 2004/05/28 13:15:26 lha Exp $ +// +#ifdef HAVE_CONFIG_H +#include "htconfig.h" +#endif /* HAVE_CONFIG_H */ + +#include"WordDBPage.h" +#include"WordDBCompress.h" +#include<ctype.h> + +#define NBITS_CMPRTYPE 2 +#define CMPRTYPE_NORMALCOMRPESS 0 +#define CMPRTYPE_BADCOMPRESS 1 + +// *********************************************** +// ********** Compression Versions ************** +// *********************************************** + +// never change NBITS_COMPRESS_VERSION ! (otherwise version tracking will fail) +#define NBITS_COMPRESS_VERSION 11 + +// IMPORTANT: change these EVERY time you change something that affects the compression +#define COMPRESS_VERSION 4 +static const char *version_label[]={"INVALID_VERSION_0","INVALID_VERSION_1","INVALID_VERSION_2","14 Dec 1999","3 Jan 2000",NULL}; + +// returns the label of compression version v +static const char * +get_version_label(int v) +{ + // check if version number is ok + if(COMPRESS_VERSION <0 || COMPRESS_VERSION>((sizeof(version_label)/sizeof(*version_label))-1)) + { + errr("get_version_label: version_label[COMPRESS_VERSION] is not valid, please update version_label"); + } + if( v >= (int)((sizeof(version_label)/sizeof(*version_label))-1) ) + { + return("INVALID_VERSION"); + } + // return label + return(version_label[v]); +} + + + +// *********************************************** +// ********** WordDBPage *********************** +// *********************************************** + +// checks if compression/decompression sequence is harmless +int +WordDBPage::TestCompress(int debuglevel) +{ + if(debuglevel>2){printf("ttttttttttttt WordDBPage::TestCompress BEGIN\n");} + int compress_debug=debuglevel-1; + // start by compressing this page + Compressor *res=Compress(compress_debug); + + if(res) + { + int size=res->size(); + // now uncompress into pageu + WordDBPage pageu(pgsz); + res->rewind(); + pageu.Uncompress(res,compress_debug); + + // comapre this page and pageu + int cmp=Compare(pageu); + + // show some results + if(debuglevel>2)printf("TOTAL SIZE: %6d %8f\n",size,size/8.0); + // argh! compare failed somthing went wrong + // display the compress/decompress sequence and fail + if(cmp || size>8*1024*1000000000) + { + if(size>8*1024) + { + printf("---------------------------------------------------\n"); + printf("-----------overflow:%5d------------------------------\n",size/8); + printf("---------------------------------------------------\n"); + printf("---------------------------------------------------\n"); + } + printf("################### ORIGINAL #########################################\n"); + show(); + printf("################### REDECOMPRESSED #########################################\n"); + pageu.show(); + + // re-compress the page verbosely + Compressor *res2=Compress(2); + res2->rewind(); + // re-uncompress the page verbosely + WordDBPage pageu2(pgsz); + pageu2.Uncompress(res2,2); + pageu2.show(); + if(cmp){errr("Compare failed");} + delete res2; + } + pageu.delete_page(); + delete res; + + }else {errr("WordDBPage::TestCompress: Compress failed");} + + if(debuglevel>2){printf("ttttttttttttt WordDBPage::TestCompress END\n");} + return OK; +} + +// find position of first difference between 2 strings +static int first_diff(const String &s1,const String &s2) +{ + int j; + for(j=0;j<s1.length() && j<s2.length() && s1[j]==s2[j];j++); + return(j); +} + +// ******* Uncompress Compressor into this page +int +WordDBPage::Uncompress(Compressor *pin,int ndebug, DB_CMPR_INFO */*=NULL*/) +{ + debug=ndebug; + if(debug>1){verbose=1;} + if(verbose){printf("uuuuuuuuu WordDBPage::Uncompress: BEGIN\n");} + + + // ** first check if versions are OK + int read_version = pin->get_uint(NBITS_COMPRESS_VERSION,"COMPRESS_VERSION"); + if(read_version != COMPRESS_VERSION) + { + fprintf(stderr,"WordDBPage::Uncompress: *** Compression version mismatch ***\n"); + fprintf(stderr,"found version : %3d but using version : %3d\n",read_version,COMPRESS_VERSION); + fprintf(stderr,"found version label: %s\n",get_version_label(read_version)); + fprintf(stderr,"using version label: %s\n",get_version_label(COMPRESS_VERSION)); + fprintf(stderr,"Are you sure you're not reading an old DB with a newer version of the indexer??\n"); + errr("WordDBPage::Uncompress: *** Compression version mismatch ***"); + exit(1); + } + + + // ** now see if this page was a normal or uncorrectly compressed page + int cmprtype=pin->get_uint(NBITS_CMPRTYPE,"CMPRTYPE"); + // two possible cases + switch(cmprtype) + { + case CMPRTYPE_NORMALCOMRPESS:// this was a normaly compressed page + Uncompress_main(pin); + break; + case CMPRTYPE_BADCOMPRESS:// this page did not compress correctly + pin->get_zone((byte *)pg,pgsz*8,"INITIALBUFFER"); + break; + default: + errr("WordDBPage::Uncompress: CMPRTYPE incoherent"); + } + + if(verbose){printf("uuuuuuuuu WordDBPage::Uncompress: END\n");} + return OK; +} + +// ******* Uncompress Compressor into this page +// normally compressed page case +int +WordDBPage::Uncompress_main(Compressor *pin) +{ + if(!pin){errr("WordDBPage::Uncompress: no Compressor to uncompress from!!");} + Compressor &in=*((Compressor *)pin); + if(debug>0){in.set_use_tags();} + int i,j; + // number arrays used to reconstruct the original page + unsigned int **rnums=new unsigned int *[nnums]; + CHECK_MEM(rnums); + // sizes of each array + int *rnum_sizes=new int[nnums]; + CHECK_MEM(rnum_sizes); + // char differences between words + byte *rworddiffs=NULL; + int nrworddiffs; + + // *********** read header + if(Uncompress_header(in)!=OK){return NOTOK;} + + // get first key(s): + //type=5: key(0) stored seperately ... others are decompressed frome differences + // + //type=3: btikey(0) is particular (len=0) it is stored seperately + // btikey(1) stored seperately ... others are decompressed frome differences + // + int nkeysleft=nk; + if(nkeysleft>0) + { + WordDBKey key0=uncompress_key(in,0); + if(type==P_LBTREE){uncompress_data(in,0,key0.RecType());} + nkeysleft--; + } + if(nkeysleft>0 && type==P_IBTREE){uncompress_key(in,1);nkeysleft--;} + + if(nkeysleft>0) + { + // ********* read numerical fields + Uncompress_vals_chaged_flags(in,&(rnums[0]),&(rnum_sizes[0])); + for(j=1;j<nnums;j++) + { + if(verbose)printf("field %2d : start position:%4d \n",j,in.size()); + if(j==3 && verbose){in.verbose=2;} + rnum_sizes[j]=in.get_vals(&(rnums[j]),label_str("NumField",j));// *** + if(j==3 && verbose){in.verbose=0;} + if(verbose){printf("WordDBPage::Uncompress_main:got numfield:%2d:nvals:%4d\n",j,rnum_sizes[j]);} + } + + // ********* read word differences + nrworddiffs=in.get_fixedbitl(&rworddiffs,"WordDiffs"); + + + // ********* rebuild original page + Uncompress_rebuild(rnums,rnum_sizes,nnums,rworddiffs,nrworddiffs); + Uncompress_show_rebuild(rnums,rnum_sizes,nnums,rworddiffs,nrworddiffs); + + + for(i=0;i<nnums;i++){delete [] rnums[i];} + } + delete [] rnums; + delete [] rnum_sizes; + if(rworddiffs){delete [] rworddiffs;} + return 0; +} +void +WordDBPage::Uncompress_vals_chaged_flags(Compressor &in,unsigned int **pcflags,int *pn) +{ + int n=in.get_uint_vl(NBITS_NVALS,"FlagsField"); + unsigned int *cflags=new unsigned int[n]; + unsigned int ex=0; + int nbits=num_bits(n); + for(int i=0;i<n;i++) + { + ex=in.get_uint(WordKey::NFields(),label_str("cflags",i)); + cflags[i]=ex; + int rep=in.get("rep"); + if(rep) + { + rep=in.get_uint_vl(nbits,NULL); + for(int k=1;k<=rep;k++){cflags[k+i]=ex;} + i+=rep; + } + } + + *pn=n; + *pcflags=cflags; +} +int +WordDBPage::Uncompress_header(Compressor &in) +{ + pg->lsn.file =in.get_uint_vl( 8*sizeof(pg->lsn.file ),"page:lsn.file"); + pg->lsn.offset =in.get_uint_vl( 8*sizeof(pg->lsn.offset ),"page:lsn.offset"); + pg->pgno =in.get_uint_vl( 8*sizeof(pg->pgno ),"page:pgno"); + pg->prev_pgno =in.get_uint_vl( 8*sizeof(pg->prev_pgno ),"page:prev_pgno"); + pg->next_pgno =in.get_uint_vl( 8*sizeof(pg->next_pgno ),"page:next_pgno"); + pg->entries =in.get_uint_vl( 8*sizeof(pg->entries ),"page:entries"); + pg->hf_offset =in.get_uint_vl( 8*sizeof(pg->hf_offset ),"page:hf_offset"); + pg->level =in.get_uint_vl( 8*sizeof(pg->level ),"page:level"); + pg->type =in.get_uint_vl( 8*sizeof(pg->type ),"page:type"); + + init(); + + if(verbose) + { + printf("************************************\n"); + printf("******** WordDBPage::Uncompress: page header ***\n"); + printf("************************************\n"); + printf("page size:%d\n",(int)pgsz); + printf(" 00-07: Log sequence number. file : %d\n", pg->lsn.file ); + printf(" 00-07: Log sequence number. offset: %d\n", pg->lsn.offset ); + printf(" 08-11: Current page number. : %d\n", pg->pgno ); + printf(" 12-15: Previous page number. : %d\n", pg->prev_pgno ); + printf(" 16-19: Next page number. : %d\n", pg->next_pgno ); + printf(" 20-21: Number of item pairs on the page. : %d\n", pg->entries ); + printf(" 22-23: High free byte page offset. : %d\n", pg->hf_offset ); + printf(" 24: Btree tree level. : %d\n", pg->level ); + printf(" 25: Page type. : %d\n", pg->type ); + } + return OK; +} +void +WordDBPage::Uncompress_rebuild(unsigned int **rnums,int *rnum_sizes,int nnums0,byte *rworddiffs,int nrworddiffs) +{ + int irwordiffs=0; + int nfields=WordKey::NFields(); + int *rnum_pos=new int[ nnums0];// current index count + CHECK_MEM(rnum_pos); + + int ii,j; + for(j=0;j<nnums0;j++){rnum_pos[j]=0;} + + int i0=0; + if(type==P_IBTREE){i0=1;}// internal pages have particular first key + + WordDBKey pkey; + WordDBKey akey=get_WordDBKey(i0); + + // reconstruct each key using previous key and coded differences + for(ii=i0;ii<nk;ii++) + { + WordDBRecord arec; + BINTERNAL bti; + + if(type==P_LBTREE) + { + // **** get the data fields + arec.set_decompress(rnums,rnum_sizes,ii,CNDATADATA,CNDATASTATS0,CNDATASTATS1); + } + else + { + if(type!=3){errr("WordDBPage::Uncompress_rebuild: unsupported type!=3");} + // ****** btree internal page specific + bti.pgno =rnums[CNBTIPGNO ][rnum_pos[CNBTIPGNO ]++]; + bti.nrecs=rnums[CNBTINRECS][rnum_pos[CNBTINRECS]++]; + } + // all that follows codes differences between succesive entries + // that is: Numerical key fields, Words + if(ii>i0) + { + unsigned int flags=rnums[CNFLAGS][rnum_pos[CNFLAGS]++]; + int foundfchange=0; + // **** reconstruct the word + if(flags&pow2(nfields-1))// check flags to see if word has changed + { + foundfchange=1; + if(rnum_pos[CNWORDDIFFLEN]>=rnum_sizes[CNWORDDIFFLEN]){errr("WordDBPage::Uncompress read wrong num worddiffs");} + // get position of first character that changes in this word + int diffpos=rnums[CNWORDDIFFPOS][rnum_pos[CNWORDDIFFPOS]++]; + // get size of changed part of the word + int difflen=rnums[CNWORDDIFFLEN][rnum_pos[CNWORDDIFFLEN]++]; + int wordlen=diffpos+difflen; + char *str=new char [wordlen+1]; + CHECK_MEM(str); + // copy the unchanged part into str from previos key's word + if(diffpos)strncpy(str,(char *)pkey.GetWord(),diffpos); + // copy the changed part from coded word differences + strncpy(str+diffpos,(char *)rworddiffs+irwordiffs,difflen); + str[wordlen]=0; + if(verbose)printf("key %3d word:\"%s\"\n",ii,str); + akey.SetWord(str); + irwordiffs+=difflen; + delete [] str; + + }else{akey.SetWord(pkey.GetWord());} + // **** reconstruct the numerical key fields + for(j=1;j<nfields;j++) + { + // check flags to see if this field has changed + int changed=flags&pow2(j-1); + if(changed) + { + // this field's number + int k=CNFIELDS+j-1; + // current position within coded differences of this field + int indx=rnum_pos[k]; + if(indx>=rnum_sizes[k]){errr("WordDBPage::Uncompress read wrong num of changes in a field");} + if(!foundfchange) + { + // this is the first field that changes in this key + // so difference is coded compared to value in pevious key + akey.Set(j,rnums[k][indx]+pkey.Get(j)); + } + else + { + // this is NOT the first field that changes in this key + // so difference is coded from 0 + akey.Set(j,rnums[k][indx]); + } + // we read 1 element from coded differences in this field + rnum_pos[k]++; + foundfchange=1; + } + else + { + // no changes found, just copy from previous key + if(!foundfchange){akey.Set(j,pkey.Get(j));} + else{akey.Set(j,0);} + } + } + } + // now insert key/data into page + if(type==P_LBTREE) + { + if(ii>i0)insert_key(akey); + if(ii>i0)insert_data(arec); + } + else + { + if(type!=3){errr("WordDBPage::Uncompress_rebuild: unsupported type!=3");} + if(ii>i0)insert_btikey(akey,bti); + } + pkey=akey; + } + delete [] rnum_pos; +} + +// display +void +WordDBPage::Uncompress_show_rebuild(unsigned int **rnums,int *rnum_sizes,int nnums0,byte *rworddiffs,int nrworddiffs) +{ + int i,j; + if(verbose) + { + printf("WordDBPage::Uncompress_show_rebuild: rebuilt numerical fields\n"); + for(j=0;j<nnums0;j++) + { + printf("resfield %2d %13s:",j,number_field_label(j)); + for(i=0;i<rnum_sizes[j];i++) + { + printf("%4d ",rnums[j][i]); + } + printf("\n"); + printf("diffield %2d:",j); + for(i=0;i<rnum_sizes[j];i++) + { + ;// printf("%2d:%d ",i,nums[j*nk+i] == rnums[j][i]); + } + printf("\n"); + } + printf("reswordiffs:"); + for(i=0;i<nrworddiffs;i++){printf("%c",(isalnum(rworddiffs[i]) ? rworddiffs[i] : '#'));} + printf("\n"); + } +} + +Compressor * +WordDBPage::Compress(int ndebug, DB_CMPR_INFO *cmprInfo/*=NULL*/) +{ + debug=ndebug; + if(debug>1){verbose=1;} + + Compressor *res=(Compressor *)new Compressor((cmprInfo ? + pgsz/(1<<(cmprInfo->coefficient)) : + pgsz/4)); + CHECK_MEM(res); + if(debug>0){res->set_use_tags();} + + res->put_uint(COMPRESS_VERSION,NBITS_COMPRESS_VERSION,"COMPRESS_VERSION"); + res->put_uint(CMPRTYPE_NORMALCOMRPESS,NBITS_CMPRTYPE,"CMPRTYPE"); + + if(verbose){printf("WordDBPage::Compress: trying normal compress\n");} + int cmpr_ok=Compress_main(*((Compressor *)res)); + + if(cmpr_ok!=OK || res->buffsize()>pgsz) + { + if(verbose){printf("WordDBCompress::Compress full compress failed ... not compressing at all\n");} + show(); + + if(res){delete res;} + res=new Compressor; + CHECK_MEM(res); + + if(debug>0){res->set_use_tags();} + + res->put_uint(COMPRESS_VERSION,NBITS_COMPRESS_VERSION,"COMPRESS_VERSION"); + res->put_uint(CMPRTYPE_BADCOMPRESS,NBITS_CMPRTYPE,"CMPRTYPE"); + + res->put_zone((byte *)pg,pgsz*8,"INITIALBUFFER"); + } + + if(verbose) + { + printf("WordDBPage::Compress: Final bitstream result\n"); + res->show(); + } + return res; +}; + +int +WordDBPage::Compress_main(Compressor &out) +{ + if(debug>1){verbose=1;} + if(verbose){printf("WordDBPage::Compress_main: starting compression\n");} + + if(pg->type!=5 && pg->type!=3){ printf("pg->type:%3d\n",pg->type);return NOTOK;} +// if(pg->type==P_IBTREE){show();} + + + // *************** initialize data structures ************** + int j; + // 0 -> changed/unchanged flags : 4bits + // 1..n -> numerical fields delta : ?bits (depending on field) + // n+1 -> word changed size : 1 + int *nums =new int[nk*nnums]; + CHECK_MEM(nums); + int *nums_pos=new int[ nnums]; + CHECK_MEM(nums_pos); +// int *cnsizes =new int[ nnums]; + for(j=0;j<nnums;j++){nums_pos[j]=0;} +// for(j=1;j<nfields;j++) {cnsizes[j]=word_key_info->sort[j].bits;} +// cnsizes[CNFLAGS]=4; +// cnsizes[CNWORDDIFFPOS ]=8; +// cnsizes[CNWORDDIFFLEN ]=8; + HtVector_byte worddiffs; + + +//bmt_START; + // *************** extract values and wordiffs ************** + if(nk>0) + { + Compress_extract_vals_wordiffs(nums,nums_pos,nnums,worddiffs); + if(verbose)Compress_show_extracted(nums,nums_pos,nnums,worddiffs); + } + + // *************** init compression ************** + +//bmt_END;bmt_START; + Compress_header(out); + + // *************** compress values and wordiffs ************** + + // compress first key(s) + int nkeysleft=nk; + if(nkeysleft>0) + { + compress_key(out,0); + if(type==P_LBTREE){compress_data(out,0);} + nkeysleft--; + } + if(nkeysleft>0 && type==P_IBTREE){compress_key(out,1);nkeysleft--;} + + if(nkeysleft>0) + { +//bmt_END;bmt_START; + // compress values + Compress_vals(out,nums,nums_pos,nnums); +//bmt_END;bmt_START; + + // compress worddiffs + int size=out.put_fixedbitl(worddiffs.begin(),worddiffs.size(),"WordDiffs"); + if(verbose)printf("compressed wordiffs : %3d values: %4d bits %4f bytes\n",worddiffs.size(),size,size/8.0); +//bmt_END; + } + + // *************** cleanup ************** + + delete [] nums ; + delete [] nums_pos; + + return OK; +} + +void +WordDBPage::Compress_extract_vals_wordiffs(int *nums,int *nums_pos,int ,HtVector_byte &worddiffs) +{ + WordDBKey pkey; + + int ii,j; + int i0=0; + if(type==P_IBTREE){i0=1;}// internal pages have particular first key + for(ii=i0;ii<nk;ii++) + { + WordDBKey akey=get_WordDBKey(ii); + + if(type==P_LBTREE) + { + // ****** WordRecord (data/stats) + // get word record + WordDBRecord arec(data(ii),akey.RecType()); + // add record + if(arec.type==WORD_RECORD_STATS) + { + nums[CNDATASTATS0*nk+nums_pos[CNDATASTATS0]++]=arec.info.stats.noccurrence; + nums[CNDATASTATS1*nk+nums_pos[CNDATASTATS1]++]=arec.info.stats.ndoc; + } + else + if(arec.type==WORD_RECORD_DATA) + { + nums[CNDATADATA *nk+nums_pos[CNDATADATA ]++]=arec.info.data; + } + } + else + { + if(type!=3){errr("WordDBPage::Compress_extract_vals_wordiffs: unsupported type!=3");} + // ****** btree internal page specific + nums[CNBTIPGNO *nk+nums_pos[CNBTIPGNO ]++]=btikey(ii)->pgno ; + nums[CNBTINRECS*nk+nums_pos[CNBTINRECS]++]=btikey(ii)->nrecs; + } + + // all that follows codes differences between succesive entries + // that is: Numerical key fields, Words + if(ii>i0) + { + // clear changed falgs + int iflag=CNFLAGS*nk+nums_pos[CNFLAGS]++; + nums[iflag]=0; + + int foundfchange=0; + const String &aword=akey.GetWord(); + const String &pword=pkey.GetWord(); + if(!(aword==pword)){foundfchange=1;} + + // check numerical fields for changes + // ******** sets CNFIELDS and some of CNFLAGS ************ + for(j=1;j<akey.NFields();j++) + { + int diff=akey.Get(j)-(foundfchange ? 0 : pkey.Get(j)); + if(diff) + { + foundfchange=1; + nums[iflag]|=pow2(j-1); + nums[ j*nk+nums_pos[j]++]=diff; + } + } + + // ************ check word for changes + // ******** sets CNWORDDIFFPOS CNWORDDIFFLEN and some of CNFLAGS ************ + if(!(aword==pword)) + { + nums[iflag]|=pow2(akey.NFields()-1); + int fd=first_diff(aword,pword); + nums[CNWORDDIFFPOS*nk+nums_pos[CNWORDDIFFPOS]++]=fd; + nums[CNWORDDIFFLEN*nk+nums_pos[CNWORDDIFFLEN]++]=aword.length()-fd; + for(int s=fd;s<aword.length();s++){worddiffs.push_back(aword[s]);} + } + } + pkey=akey; + } +// nums_pos[CNFLAGS]=nk-1; + +} + +void +WordDBPage::Compress_vals_changed_flags(Compressor &out,unsigned int *cflags,int n) +{ + int size=out.size(); + out.put_uint_vl(n,NBITS_NVALS,"FlagsField"); + unsigned int ex=0; + int nbits=num_bits(n); + for(int i=0;i<n;i++) + { + ex=cflags[i]; + out.put_uint(ex,WordKey::NFields(),label_str("cflags",i)); + int k; + for(k=1;k+i<n;k++){if(ex!=cflags[i+k]){break;}} + k--; + if(k>0) + { + out.put(1,"rep"); + out.put_uint_vl(k,nbits,NULL); + i+=k; + } + else + {out.put(0,"rep");} + } + size=out.size()-size; + if(verbose)printf("compressed flags %2d : %3d values: %4d bits %8f bytes : ended bit field pos:%6d\n",0,n,size,size/8.0,out.size()); + +} + +void +WordDBPage::Compress_vals(Compressor &out,int *nums,int *nums_pos,int nnums0) +{ + // the changed flags fields are particular + Compress_vals_changed_flags(out,(unsigned int *)(nums+0*nk),nums_pos[0]); + + + // compress the difference numbers for the numerical fields + for( int j=1;j<nnums0;j++) + { + int nv=nums_pos[j]; + unsigned int *v=(unsigned int *)(nums+j*nk); + if((1 || j==3) && verbose){out.verbose=2;} + int size=out.put_vals(v,nv,label_str("NumField",j)); + if((1 || j==3) && verbose){out.verbose=0;} + if(verbose)printf("compressed field %2d : %3d values: %4d bits %8f bytes : ended bit field pos:%6d\n",j,n,size,size/8.0,out.size()); + } +} + +void +WordDBPage::Compress_header(Compressor &out) +{ +// no smart compression ... for now + out.put_uint_vl(pg->lsn.file , 8*sizeof(pg->lsn.file ),"page:lsn.file"); + out.put_uint_vl(pg->lsn.offset , 8*sizeof(pg->lsn.offset ),"page:lsn.offset"); + out.put_uint_vl(pg->pgno , 8*sizeof(pg->pgno ),"page:pgno"); + out.put_uint_vl(pg->prev_pgno , 8*sizeof(pg->prev_pgno ),"page:prev_pgno"); + out.put_uint_vl(pg->next_pgno , 8*sizeof(pg->next_pgno ),"page:next_pgno"); + out.put_uint_vl(pg->entries , 8*sizeof(pg->entries ),"page:entries"); + out.put_uint_vl(pg->hf_offset , 8*sizeof(pg->hf_offset ),"page:hf_offset"); + out.put_uint_vl(pg->level , 8*sizeof(pg->level ),"page:level"); + out.put_uint_vl(pg->type , 8*sizeof(pg->type ),"page:type"); +} + +void +WordDBPage::Compress_show_extracted(int *nums,int *nums_pos,int nnums0,HtVector_byte &worddiffs) +{ + int i,j; + int *cnindexe2=new int[ nnums0]; + CHECK_MEM(cnindexe2); + for(j=0;j<nnums0;j++){cnindexe2[j]=0;} + for(j=0;j<nnums0;j++) + { + printf("%13s",number_field_label(j)); + } + printf("\n"); + int w=0; + int mx=(nk>worddiffs.size() ? nk : worddiffs.size()); + for(i=0;i<mx;i++) + { + printf("%3d: ",i); + for(j=0;j<nnums0;j++) + { + int k=cnindexe2[j]++; + int nbits=(j ? 16:4);// just to show the flags field... + if(k<nums_pos[j]) + { + int val=nums[j*nk+k]; + if(nbits<8){show_bits(val,nbits);printf(" ");} + else + { + printf("|%12u",val); + } + } + else + { + if(nbits<8){printf(" ");} + else + { + printf("| "); + } + } + } + if(w<worddiffs.size()){printf(" %02x %c ",worddiffs[w],(isalnum(worddiffs[w]) ? worddiffs[w] : '#'));} + w++; + printf("\n"); + } + delete [] cnindexe2; +} + +// Compare two pages to check if equal +int +WordDBPage::Compare(WordDBPage &other) +{ + int res=0; + // Compare headers + if(other.pgsz != pgsz ){res++;printf("compare failed for pgsz \n");} + if(other.pg->lsn.file != pg->lsn.file ){res++;printf("compare failed for pg->lsn.file \n");} + if(other.pg->lsn.offset != pg->lsn.offset ){res++;printf("compare failed for pg->lsn.offset \n");} + if(other.pg->pgno != pg->pgno ){res++;printf("compare failed for pg->pgno \n");} + if(other.pg->prev_pgno != pg->prev_pgno ){res++;printf("compare failed for pg->prev_pgno \n");} + if(other.pg->next_pgno != pg->next_pgno ){res++;printf("compare failed for pg->next_pgno \n");} + if(other.pg->entries != pg->entries ){res++;printf("compare failed for pg->entries \n");} + if(other.pg->hf_offset != pg->hf_offset ){res++;printf("compare failed for pg->hf_offset \n");} + if(other.pg->level != pg->level ){res++;printf("compare failed for pg->level \n");} + if(other.pg->type != pg->type ){res++;printf("compare failed for pg->type \n");} + int i,k; + // double check header + if(memcmp((void *)pg,(void *)other.pg,sizeof(PAGE)-sizeof(db_indx_t))) + { + res++; + printf("compare failed in some unknown place in header:\n"); + for(i=0;i<(int)(sizeof(PAGE)-sizeof(db_indx_t));i++) + { + printf("%3d: %3x %3x\n",i,((byte *)pg)[i],((byte *)other.pg)[i]); + } + } + + // pg->type != 5 && !=3 pages are not really compressed: just memcmp + if(pg->type != 5 && pg->type != 3) + { + if(memcmp((void *)pg,(void *)other.pg,pgsz)) + { + printf("compare:PAGETYPE:!=5 and memcmp failed\n"); + res++; + printf("compare failed\n"); + } + return(res); + } + + // compare each key/data pair + for(i=0;i<(type==P_LBTREE ? pg->entries/2 : pg->entries);i++) + { + if(pg->type==P_LBTREE) + { + // compare keys + if(key(i)->len !=other.key(i)->len ) + { + printf("compare:key(%2d) len : %2d != %2d\n",i,key(i)->len ,other.key(i)->len ); + res++; + } + if(key(i)->type!=other.key(i)->type) + { + printf("compare:key(%2d) type: %2d != %2d\n",i,key(i)->type,other.key(i)->type); + res++; + } + if(memcmp(key(i)->data,other.key(i)->data,key(i)->len)) + { + printf("compare :key(%2d)\n",i); + for(k=0;k<key(i)->len;k++) + { + int c=key(i)->data[k]; + if(isalnum(c)){printf(" %c ",c);} + else{printf("%02x ",c);} + } + printf("\n"); + for(k=0;k<key(i)->len;k++) + { + int c=other.key(i)->data[k]; + if(isalnum(c)){printf(" %c ",c);} + else{printf("%02x ",c);} + } + printf("\n"); + res++;printf("compare:key failed\n"); + } + // compare data + if(data(i)->len !=other.data(i)->len ) + { + printf("compare:data(%2d) len : %2d != %2d\n",i,data(i)->len ,other.data(i)->len ); + res++; + } + if(data(i)->type!=other.data(i)->type) + { + printf("compare:data(%2d) type: %2d != %2d\n",i,data(i)->type,other.key(i)->type); + res++; + } + if(memcmp(data(i)->data,other.data(i)->data,data(i)->len)) + { + printf("compare :data(%2d)\n",i); + for(k=0;k<data(i)->len;k++) + { + printf("%02x ",data(i)->data[k]); + } + printf("\n"); + for(k=0;k<data(i)->len;k++) + { + printf("%02x ",other.data(i)->data[k]); + } + printf("\n"); + res++;printf("compare:data failed\n"); + } + } + else + { + if(type!=3){errr("WordDBPage::Compare: unsupported type!=3");} + if(btikey(i)->len != other.btikey(i)->len || + btikey(i)->type != other.btikey(i)->type || + btikey(i)->pgno != other.btikey(i)->pgno || + btikey(i)->nrecs != other.btikey(i)->nrecs ) + { + printf("compare:btikey(%2d) failed\n",i); + printf("this :len :%4d type :%4d pgno :%4d nrecs :%4d \n",btikey(i)->len,btikey(i)->type, + btikey(i)->pgno,btikey(i)->nrecs); + printf("other:len :%4d type :%4d pgno :%4d nrecs :%4d \n",other.btikey(i)->len,other.btikey(i)->type, + other.btikey(i)->pgno,other.btikey(i)->nrecs); + res++; + + } + if(memcmp(btikey(i)->data,other.btikey(i)->data,btikey(i)->len)) + { + printf("compare :btikey(%2d)\n",i); + for(k=0;k<btikey(i)->len;k++) + { + printf("%02x ",btikey(i)->data[k]); + } + printf("\n"); + for(k=0;k<btikey(i)->len;k++) + { + printf("%02x ",other.btikey(i)->data[k]); + } + printf("\n"); + res++;printf("compare:btikey failed\n"); + + } + } + } + if(pg->entries>0) + { + int smallestoffset=HtMaxMin::min_v(pg->inp,pg->entries); + int other_smallestoffset=HtMaxMin::min_v(other.pg->inp,other.pg->entries); + if(smallestoffset!=other_smallestoffset) + { + printf("compare fail:smallestoffset:%d other_smallestoffset:%d\n",smallestoffset,other_smallestoffset); + res++; + } + } + + return(res); +} + +// Bit stream description +// | field[last] changed only | yes -> delta field[last] +// + +// redo=0 -> +// redo=1 -> oops, dont show! +// redo=2 -> +void +WordDBPage::show() +{ + int i,j,dd,l; + + printf("************************************\n"); + printf("************************************\n"); + printf("************************************\n"); + printf("page size:%d\n",(int)pgsz); + printf(" 00-07: Log sequence number. file : %d\n", pg->lsn.file ); + printf(" 00-07: Log sequence number. offset: %d\n", pg->lsn.offset ); + printf(" 08-11: Current page number. : %d\n", pg->pgno ); + printf(" 12-15: Previous page number. : %d\n", pg->prev_pgno ); + printf(" 16-19: Next page number. : %d\n", pg->next_pgno ); + printf(" 20-21: Number of item pairs on the page. : %d\n", pg->entries ); + printf(" 22-23: High free byte page offset. : %d\n", pg->hf_offset ); + printf(" 24: Btree tree level. : %d\n", pg->level ); + printf(" 25: Page type. : %d\n", pg->type ); + + + printf("entry offsets:"); + for(i=0;i<pg->entries;i++){printf("%4d ",pg->inp[i]);} + printf("\n"); + + if(pg->type ==5) + { + + WordRecord dud; + WordKey prev; + int pagecl=0; + for(i=0;i<pg->entries;i++) + { + if( (i%2) && dud.type==WORD_RECORD_NONE){continue;} + printf("\n||%c:%3d:off:%03d:invoff:%4d:len:%2d:typ:%x:",i%2 ? 'D' : 'K',i,e_offset(i),pgsz-e_offset(i),entry(i)->len,entry(i)->type); + if(i>0) + { + l=entry(i)->len+3; + dd=(int)(e_offset(i-1))-l; + dd-=dd%4; + printf("% 5d:: ",(e_offset(i)-dd)); + } + if(!(i%2)) + { + WordDBKey tkey(entry(i)); + int fieldchanged[10]; + char *wordchange=NULL; + printf("\""); + printf("%s",(char *)tkey.GetWord()); + printf("\""); + for(j=0;j<20-tkey.GetWord().length();j++){printf(" ");} + printf("|"); + for(j=1;j<tkey.NFields();j++){printf("%4x ",tkey.Get(j));} + printf("|"); + + for(j=1;j<tkey.NFields();j++) + { + int diff=tkey.Get(j)-prev.Get(j); + if(diff<0){diff=tkey.Get(j);} + printf("%6d ",diff); + fieldchanged[j]=diff; + } + + String &word=tkey.GetWord(); + String &pword=prev.GetWord(); + if(word==pword){printf(" 00 ===");fieldchanged[0]=0;} + else + { + int fd=first_diff(word,pword); + fieldchanged[0]=fd+1; + wordchange=((char *)word)+fd; + printf(" %2d %s",fd,((char *)word)+fd); + } + + int keycl=tkey.NFields(); + for(j=1;j<tkey.NFields();j++) + { + if(fieldchanged[j]){keycl+=WordKeyInfo::Instance()->sort[j].bits;} + } + if(fieldchanged[0]){keycl+=3;keycl+=8*strlen(wordchange);} + printf(" ::%2d %f",keycl,keycl/8.0); + pagecl+=keycl; + prev=tkey; + } + else + { + if(entry(i)->len>100){printf("WordDBPage::show: aaargh strange failing\n");return;} + for(j=0;j<entry(i)->len;j++) + { + printf("%02x ",entry(i)->data[j]); + } + } + } + printf("\n"); + } + else + if(1) + { + int nn=0; + // dump hex + for(i=0;;i++) + { + printf("%5d: ",nn); + for(j=0;j<20;j++) + { + printf("%2x ",((byte *)pg)[nn++]); + if(nn>=pgsz){break;} + } + printf("\n"); + if(nn>=pgsz){break;} + } + } + if(pg->type == 3) + { + for(i=0;i<pg->entries;i++) + { + BINTERNAL *bie=GET_BINTERNAL(pg,i); + printf("%3d: off:%4d:len:%3d :type:%3d :pgno:%4d: nrecs:%4d:: ",i,pg->inp[i],bie->len,bie->type,bie->pgno,bie->nrecs); + WordDBKey tkey(bie); + for(j=0;j<bie->len-tkey.GetWord().length();j++){printf("%2x ",bie->data[j]);} + printf(" : "); + for(j=1;j<tkey.NFields();j++){printf("%5d ",tkey.Get(j));} + printf("\"%s\"\n",(char *)tkey.GetWord()); + } + } + +} + + |