summaryrefslogtreecommitdiffstats
path: root/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc
diff options
context:
space:
mode:
Diffstat (limited to 'debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc')
-rw-r--r--debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc1024
1 files changed, 1024 insertions, 0 deletions
diff --git a/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc b/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc
new file mode 100644
index 00000000..eb43af30
--- /dev/null
+++ b/debian/htdig/htdig-3.2.0b6/htword/WordDBPage.cc
@@ -0,0 +1,1024 @@
+//
+// WordDBPage.cc
+//
+// WordDBPage: Implements specific compression scheme for
+// Berkeley DB pages containing WordReferences objects.
+//
+// Part of the ht://Dig package <http://www.htdig.org/>
+// Copyright (c) 1999-2004 The ht://Dig Group
+// For copyright details, see the file COPYING in your distribution
+// or the GNU Library General Public License (LGPL) version 2 or later
+// <http://www.gnu.org/copyleft/lgpl.html>
+//
+// $Id: WordDBPage.cc,v 1.5 2004/05/28 13:15:26 lha Exp $
+//
+#ifdef HAVE_CONFIG_H
+#include "htconfig.h"
+#endif /* HAVE_CONFIG_H */
+
+#include"WordDBPage.h"
+#include"WordDBCompress.h"
+#include<ctype.h>
+
+#define NBITS_CMPRTYPE 2
+#define CMPRTYPE_NORMALCOMRPESS 0
+#define CMPRTYPE_BADCOMPRESS 1
+
+// ***********************************************
+// ********** Compression Versions **************
+// ***********************************************
+
+// never change NBITS_COMPRESS_VERSION ! (otherwise version tracking will fail)
+#define NBITS_COMPRESS_VERSION 11
+
+// IMPORTANT: change these EVERY time you change something that affects the compression
+#define COMPRESS_VERSION 4
+static const char *version_label[]={"INVALID_VERSION_0","INVALID_VERSION_1","INVALID_VERSION_2","14 Dec 1999","3 Jan 2000",NULL};
+
+// returns the label of compression version v
+static const char *
+get_version_label(int v)
+{
+ // check if version number is ok
+ if(COMPRESS_VERSION <0 || COMPRESS_VERSION>((sizeof(version_label)/sizeof(*version_label))-1))
+ {
+ errr("get_version_label: version_label[COMPRESS_VERSION] is not valid, please update version_label");
+ }
+ if( v >= (int)((sizeof(version_label)/sizeof(*version_label))-1) )
+ {
+ return("INVALID_VERSION");
+ }
+ // return label
+ return(version_label[v]);
+}
+
+
+
+// ***********************************************
+// ********** WordDBPage ***********************
+// ***********************************************
+
+// checks if compression/decompression sequence is harmless
+int
+WordDBPage::TestCompress(int debuglevel)
+{
+ if(debuglevel>2){printf("ttttttttttttt WordDBPage::TestCompress BEGIN\n");}
+ int compress_debug=debuglevel-1;
+ // start by compressing this page
+ Compressor *res=Compress(compress_debug);
+
+ if(res)
+ {
+ int size=res->size();
+ // now uncompress into pageu
+ WordDBPage pageu(pgsz);
+ res->rewind();
+ pageu.Uncompress(res,compress_debug);
+
+ // comapre this page and pageu
+ int cmp=Compare(pageu);
+
+ // show some results
+ if(debuglevel>2)printf("TOTAL SIZE: %6d %8f\n",size,size/8.0);
+ // argh! compare failed somthing went wrong
+ // display the compress/decompress sequence and fail
+ if(cmp || size>8*1024*1000000000)
+ {
+ if(size>8*1024)
+ {
+ printf("---------------------------------------------------\n");
+ printf("-----------overflow:%5d------------------------------\n",size/8);
+ printf("---------------------------------------------------\n");
+ printf("---------------------------------------------------\n");
+ }
+ printf("################### ORIGINAL #########################################\n");
+ show();
+ printf("################### REDECOMPRESSED #########################################\n");
+ pageu.show();
+
+ // re-compress the page verbosely
+ Compressor *res2=Compress(2);
+ res2->rewind();
+ // re-uncompress the page verbosely
+ WordDBPage pageu2(pgsz);
+ pageu2.Uncompress(res2,2);
+ pageu2.show();
+ if(cmp){errr("Compare failed");}
+ delete res2;
+ }
+ pageu.delete_page();
+ delete res;
+
+ }else {errr("WordDBPage::TestCompress: Compress failed");}
+
+ if(debuglevel>2){printf("ttttttttttttt WordDBPage::TestCompress END\n");}
+ return OK;
+}
+
+// find position of first difference between 2 strings
+static int first_diff(const String &s1,const String &s2)
+{
+ int j;
+ for(j=0;j<s1.length() && j<s2.length() && s1[j]==s2[j];j++);
+ return(j);
+}
+
+// ******* Uncompress Compressor into this page
+int
+WordDBPage::Uncompress(Compressor *pin,int ndebug, DB_CMPR_INFO */*=NULL*/)
+{
+ debug=ndebug;
+ if(debug>1){verbose=1;}
+ if(verbose){printf("uuuuuuuuu WordDBPage::Uncompress: BEGIN\n");}
+
+
+ // ** first check if versions are OK
+ int read_version = pin->get_uint(NBITS_COMPRESS_VERSION,"COMPRESS_VERSION");
+ if(read_version != COMPRESS_VERSION)
+ {
+ fprintf(stderr,"WordDBPage::Uncompress: *** Compression version mismatch ***\n");
+ fprintf(stderr,"found version : %3d but using version : %3d\n",read_version,COMPRESS_VERSION);
+ fprintf(stderr,"found version label: %s\n",get_version_label(read_version));
+ fprintf(stderr,"using version label: %s\n",get_version_label(COMPRESS_VERSION));
+ fprintf(stderr,"Are you sure you're not reading an old DB with a newer version of the indexer??\n");
+ errr("WordDBPage::Uncompress: *** Compression version mismatch ***");
+ exit(1);
+ }
+
+
+ // ** now see if this page was a normal or uncorrectly compressed page
+ int cmprtype=pin->get_uint(NBITS_CMPRTYPE,"CMPRTYPE");
+ // two possible cases
+ switch(cmprtype)
+ {
+ case CMPRTYPE_NORMALCOMRPESS:// this was a normaly compressed page
+ Uncompress_main(pin);
+ break;
+ case CMPRTYPE_BADCOMPRESS:// this page did not compress correctly
+ pin->get_zone((byte *)pg,pgsz*8,"INITIALBUFFER");
+ break;
+ default:
+ errr("WordDBPage::Uncompress: CMPRTYPE incoherent");
+ }
+
+ if(verbose){printf("uuuuuuuuu WordDBPage::Uncompress: END\n");}
+ return OK;
+}
+
+// ******* Uncompress Compressor into this page
+// normally compressed page case
+int
+WordDBPage::Uncompress_main(Compressor *pin)
+{
+ if(!pin){errr("WordDBPage::Uncompress: no Compressor to uncompress from!!");}
+ Compressor &in=*((Compressor *)pin);
+ if(debug>0){in.set_use_tags();}
+ int i,j;
+ // number arrays used to reconstruct the original page
+ unsigned int **rnums=new unsigned int *[nnums];
+ CHECK_MEM(rnums);
+ // sizes of each array
+ int *rnum_sizes=new int[nnums];
+ CHECK_MEM(rnum_sizes);
+ // char differences between words
+ byte *rworddiffs=NULL;
+ int nrworddiffs;
+
+ // *********** read header
+ if(Uncompress_header(in)!=OK){return NOTOK;}
+
+ // get first key(s):
+ //type=5: key(0) stored seperately ... others are decompressed frome differences
+ //
+ //type=3: btikey(0) is particular (len=0) it is stored seperately
+ // btikey(1) stored seperately ... others are decompressed frome differences
+ //
+ int nkeysleft=nk;
+ if(nkeysleft>0)
+ {
+ WordDBKey key0=uncompress_key(in,0);
+ if(type==P_LBTREE){uncompress_data(in,0,key0.RecType());}
+ nkeysleft--;
+ }
+ if(nkeysleft>0 && type==P_IBTREE){uncompress_key(in,1);nkeysleft--;}
+
+ if(nkeysleft>0)
+ {
+ // ********* read numerical fields
+ Uncompress_vals_chaged_flags(in,&(rnums[0]),&(rnum_sizes[0]));
+ for(j=1;j<nnums;j++)
+ {
+ if(verbose)printf("field %2d : start position:%4d \n",j,in.size());
+ if(j==3 && verbose){in.verbose=2;}
+ rnum_sizes[j]=in.get_vals(&(rnums[j]),label_str("NumField",j));// ***
+ if(j==3 && verbose){in.verbose=0;}
+ if(verbose){printf("WordDBPage::Uncompress_main:got numfield:%2d:nvals:%4d\n",j,rnum_sizes[j]);}
+ }
+
+ // ********* read word differences
+ nrworddiffs=in.get_fixedbitl(&rworddiffs,"WordDiffs");
+
+
+ // ********* rebuild original page
+ Uncompress_rebuild(rnums,rnum_sizes,nnums,rworddiffs,nrworddiffs);
+ Uncompress_show_rebuild(rnums,rnum_sizes,nnums,rworddiffs,nrworddiffs);
+
+
+ for(i=0;i<nnums;i++){delete [] rnums[i];}
+ }
+ delete [] rnums;
+ delete [] rnum_sizes;
+ if(rworddiffs){delete [] rworddiffs;}
+ return 0;
+}
+void
+WordDBPage::Uncompress_vals_chaged_flags(Compressor &in,unsigned int **pcflags,int *pn)
+{
+ int n=in.get_uint_vl(NBITS_NVALS,"FlagsField");
+ unsigned int *cflags=new unsigned int[n];
+ unsigned int ex=0;
+ int nbits=num_bits(n);
+ for(int i=0;i<n;i++)
+ {
+ ex=in.get_uint(WordKey::NFields(),label_str("cflags",i));
+ cflags[i]=ex;
+ int rep=in.get("rep");
+ if(rep)
+ {
+ rep=in.get_uint_vl(nbits,NULL);
+ for(int k=1;k<=rep;k++){cflags[k+i]=ex;}
+ i+=rep;
+ }
+ }
+
+ *pn=n;
+ *pcflags=cflags;
+}
+int
+WordDBPage::Uncompress_header(Compressor &in)
+{
+ pg->lsn.file =in.get_uint_vl( 8*sizeof(pg->lsn.file ),"page:lsn.file");
+ pg->lsn.offset =in.get_uint_vl( 8*sizeof(pg->lsn.offset ),"page:lsn.offset");
+ pg->pgno =in.get_uint_vl( 8*sizeof(pg->pgno ),"page:pgno");
+ pg->prev_pgno =in.get_uint_vl( 8*sizeof(pg->prev_pgno ),"page:prev_pgno");
+ pg->next_pgno =in.get_uint_vl( 8*sizeof(pg->next_pgno ),"page:next_pgno");
+ pg->entries =in.get_uint_vl( 8*sizeof(pg->entries ),"page:entries");
+ pg->hf_offset =in.get_uint_vl( 8*sizeof(pg->hf_offset ),"page:hf_offset");
+ pg->level =in.get_uint_vl( 8*sizeof(pg->level ),"page:level");
+ pg->type =in.get_uint_vl( 8*sizeof(pg->type ),"page:type");
+
+ init();
+
+ if(verbose)
+ {
+ printf("************************************\n");
+ printf("******** WordDBPage::Uncompress: page header ***\n");
+ printf("************************************\n");
+ printf("page size:%d\n",(int)pgsz);
+ printf(" 00-07: Log sequence number. file : %d\n", pg->lsn.file );
+ printf(" 00-07: Log sequence number. offset: %d\n", pg->lsn.offset );
+ printf(" 08-11: Current page number. : %d\n", pg->pgno );
+ printf(" 12-15: Previous page number. : %d\n", pg->prev_pgno );
+ printf(" 16-19: Next page number. : %d\n", pg->next_pgno );
+ printf(" 20-21: Number of item pairs on the page. : %d\n", pg->entries );
+ printf(" 22-23: High free byte page offset. : %d\n", pg->hf_offset );
+ printf(" 24: Btree tree level. : %d\n", pg->level );
+ printf(" 25: Page type. : %d\n", pg->type );
+ }
+ return OK;
+}
+void
+WordDBPage::Uncompress_rebuild(unsigned int **rnums,int *rnum_sizes,int nnums0,byte *rworddiffs,int nrworddiffs)
+{
+ int irwordiffs=0;
+ int nfields=WordKey::NFields();
+ int *rnum_pos=new int[ nnums0];// current index count
+ CHECK_MEM(rnum_pos);
+
+ int ii,j;
+ for(j=0;j<nnums0;j++){rnum_pos[j]=0;}
+
+ int i0=0;
+ if(type==P_IBTREE){i0=1;}// internal pages have particular first key
+
+ WordDBKey pkey;
+ WordDBKey akey=get_WordDBKey(i0);
+
+ // reconstruct each key using previous key and coded differences
+ for(ii=i0;ii<nk;ii++)
+ {
+ WordDBRecord arec;
+ BINTERNAL bti;
+
+ if(type==P_LBTREE)
+ {
+ // **** get the data fields
+ arec.set_decompress(rnums,rnum_sizes,ii,CNDATADATA,CNDATASTATS0,CNDATASTATS1);
+ }
+ else
+ {
+ if(type!=3){errr("WordDBPage::Uncompress_rebuild: unsupported type!=3");}
+ // ****** btree internal page specific
+ bti.pgno =rnums[CNBTIPGNO ][rnum_pos[CNBTIPGNO ]++];
+ bti.nrecs=rnums[CNBTINRECS][rnum_pos[CNBTINRECS]++];
+ }
+ // all that follows codes differences between succesive entries
+ // that is: Numerical key fields, Words
+ if(ii>i0)
+ {
+ unsigned int flags=rnums[CNFLAGS][rnum_pos[CNFLAGS]++];
+ int foundfchange=0;
+ // **** reconstruct the word
+ if(flags&pow2(nfields-1))// check flags to see if word has changed
+ {
+ foundfchange=1;
+ if(rnum_pos[CNWORDDIFFLEN]>=rnum_sizes[CNWORDDIFFLEN]){errr("WordDBPage::Uncompress read wrong num worddiffs");}
+ // get position of first character that changes in this word
+ int diffpos=rnums[CNWORDDIFFPOS][rnum_pos[CNWORDDIFFPOS]++];
+ // get size of changed part of the word
+ int difflen=rnums[CNWORDDIFFLEN][rnum_pos[CNWORDDIFFLEN]++];
+ int wordlen=diffpos+difflen;
+ char *str=new char [wordlen+1];
+ CHECK_MEM(str);
+ // copy the unchanged part into str from previos key's word
+ if(diffpos)strncpy(str,(char *)pkey.GetWord(),diffpos);
+ // copy the changed part from coded word differences
+ strncpy(str+diffpos,(char *)rworddiffs+irwordiffs,difflen);
+ str[wordlen]=0;
+ if(verbose)printf("key %3d word:\"%s\"\n",ii,str);
+ akey.SetWord(str);
+ irwordiffs+=difflen;
+ delete [] str;
+
+ }else{akey.SetWord(pkey.GetWord());}
+ // **** reconstruct the numerical key fields
+ for(j=1;j<nfields;j++)
+ {
+ // check flags to see if this field has changed
+ int changed=flags&pow2(j-1);
+ if(changed)
+ {
+ // this field's number
+ int k=CNFIELDS+j-1;
+ // current position within coded differences of this field
+ int indx=rnum_pos[k];
+ if(indx>=rnum_sizes[k]){errr("WordDBPage::Uncompress read wrong num of changes in a field");}
+ if(!foundfchange)
+ {
+ // this is the first field that changes in this key
+ // so difference is coded compared to value in pevious key
+ akey.Set(j,rnums[k][indx]+pkey.Get(j));
+ }
+ else
+ {
+ // this is NOT the first field that changes in this key
+ // so difference is coded from 0
+ akey.Set(j,rnums[k][indx]);
+ }
+ // we read 1 element from coded differences in this field
+ rnum_pos[k]++;
+ foundfchange=1;
+ }
+ else
+ {
+ // no changes found, just copy from previous key
+ if(!foundfchange){akey.Set(j,pkey.Get(j));}
+ else{akey.Set(j,0);}
+ }
+ }
+ }
+ // now insert key/data into page
+ if(type==P_LBTREE)
+ {
+ if(ii>i0)insert_key(akey);
+ if(ii>i0)insert_data(arec);
+ }
+ else
+ {
+ if(type!=3){errr("WordDBPage::Uncompress_rebuild: unsupported type!=3");}
+ if(ii>i0)insert_btikey(akey,bti);
+ }
+ pkey=akey;
+ }
+ delete [] rnum_pos;
+}
+
+// display
+void
+WordDBPage::Uncompress_show_rebuild(unsigned int **rnums,int *rnum_sizes,int nnums0,byte *rworddiffs,int nrworddiffs)
+{
+ int i,j;
+ if(verbose)
+ {
+ printf("WordDBPage::Uncompress_show_rebuild: rebuilt numerical fields\n");
+ for(j=0;j<nnums0;j++)
+ {
+ printf("resfield %2d %13s:",j,number_field_label(j));
+ for(i=0;i<rnum_sizes[j];i++)
+ {
+ printf("%4d ",rnums[j][i]);
+ }
+ printf("\n");
+ printf("diffield %2d:",j);
+ for(i=0;i<rnum_sizes[j];i++)
+ {
+ ;// printf("%2d:%d ",i,nums[j*nk+i] == rnums[j][i]);
+ }
+ printf("\n");
+ }
+ printf("reswordiffs:");
+ for(i=0;i<nrworddiffs;i++){printf("%c",(isalnum(rworddiffs[i]) ? rworddiffs[i] : '#'));}
+ printf("\n");
+ }
+}
+
+Compressor *
+WordDBPage::Compress(int ndebug, DB_CMPR_INFO *cmprInfo/*=NULL*/)
+{
+ debug=ndebug;
+ if(debug>1){verbose=1;}
+
+ Compressor *res=(Compressor *)new Compressor((cmprInfo ?
+ pgsz/(1<<(cmprInfo->coefficient)) :
+ pgsz/4));
+ CHECK_MEM(res);
+ if(debug>0){res->set_use_tags();}
+
+ res->put_uint(COMPRESS_VERSION,NBITS_COMPRESS_VERSION,"COMPRESS_VERSION");
+ res->put_uint(CMPRTYPE_NORMALCOMRPESS,NBITS_CMPRTYPE,"CMPRTYPE");
+
+ if(verbose){printf("WordDBPage::Compress: trying normal compress\n");}
+ int cmpr_ok=Compress_main(*((Compressor *)res));
+
+ if(cmpr_ok!=OK || res->buffsize()>pgsz)
+ {
+ if(verbose){printf("WordDBCompress::Compress full compress failed ... not compressing at all\n");}
+ show();
+
+ if(res){delete res;}
+ res=new Compressor;
+ CHECK_MEM(res);
+
+ if(debug>0){res->set_use_tags();}
+
+ res->put_uint(COMPRESS_VERSION,NBITS_COMPRESS_VERSION,"COMPRESS_VERSION");
+ res->put_uint(CMPRTYPE_BADCOMPRESS,NBITS_CMPRTYPE,"CMPRTYPE");
+
+ res->put_zone((byte *)pg,pgsz*8,"INITIALBUFFER");
+ }
+
+ if(verbose)
+ {
+ printf("WordDBPage::Compress: Final bitstream result\n");
+ res->show();
+ }
+ return res;
+};
+
+int
+WordDBPage::Compress_main(Compressor &out)
+{
+ if(debug>1){verbose=1;}
+ if(verbose){printf("WordDBPage::Compress_main: starting compression\n");}
+
+ if(pg->type!=5 && pg->type!=3){ printf("pg->type:%3d\n",pg->type);return NOTOK;}
+// if(pg->type==P_IBTREE){show();}
+
+
+ // *************** initialize data structures **************
+ int j;
+ // 0 -> changed/unchanged flags : 4bits
+ // 1..n -> numerical fields delta : ?bits (depending on field)
+ // n+1 -> word changed size : 1
+ int *nums =new int[nk*nnums];
+ CHECK_MEM(nums);
+ int *nums_pos=new int[ nnums];
+ CHECK_MEM(nums_pos);
+// int *cnsizes =new int[ nnums];
+ for(j=0;j<nnums;j++){nums_pos[j]=0;}
+// for(j=1;j<nfields;j++) {cnsizes[j]=word_key_info->sort[j].bits;}
+// cnsizes[CNFLAGS]=4;
+// cnsizes[CNWORDDIFFPOS ]=8;
+// cnsizes[CNWORDDIFFLEN ]=8;
+ HtVector_byte worddiffs;
+
+
+//bmt_START;
+ // *************** extract values and wordiffs **************
+ if(nk>0)
+ {
+ Compress_extract_vals_wordiffs(nums,nums_pos,nnums,worddiffs);
+ if(verbose)Compress_show_extracted(nums,nums_pos,nnums,worddiffs);
+ }
+
+ // *************** init compression **************
+
+//bmt_END;bmt_START;
+ Compress_header(out);
+
+ // *************** compress values and wordiffs **************
+
+ // compress first key(s)
+ int nkeysleft=nk;
+ if(nkeysleft>0)
+ {
+ compress_key(out,0);
+ if(type==P_LBTREE){compress_data(out,0);}
+ nkeysleft--;
+ }
+ if(nkeysleft>0 && type==P_IBTREE){compress_key(out,1);nkeysleft--;}
+
+ if(nkeysleft>0)
+ {
+//bmt_END;bmt_START;
+ // compress values
+ Compress_vals(out,nums,nums_pos,nnums);
+//bmt_END;bmt_START;
+
+ // compress worddiffs
+ int size=out.put_fixedbitl(worddiffs.begin(),worddiffs.size(),"WordDiffs");
+ if(verbose)printf("compressed wordiffs : %3d values: %4d bits %4f bytes\n",worddiffs.size(),size,size/8.0);
+//bmt_END;
+ }
+
+ // *************** cleanup **************
+
+ delete [] nums ;
+ delete [] nums_pos;
+
+ return OK;
+}
+
+void
+WordDBPage::Compress_extract_vals_wordiffs(int *nums,int *nums_pos,int ,HtVector_byte &worddiffs)
+{
+ WordDBKey pkey;
+
+ int ii,j;
+ int i0=0;
+ if(type==P_IBTREE){i0=1;}// internal pages have particular first key
+ for(ii=i0;ii<nk;ii++)
+ {
+ WordDBKey akey=get_WordDBKey(ii);
+
+ if(type==P_LBTREE)
+ {
+ // ****** WordRecord (data/stats)
+ // get word record
+ WordDBRecord arec(data(ii),akey.RecType());
+ // add record
+ if(arec.type==WORD_RECORD_STATS)
+ {
+ nums[CNDATASTATS0*nk+nums_pos[CNDATASTATS0]++]=arec.info.stats.noccurrence;
+ nums[CNDATASTATS1*nk+nums_pos[CNDATASTATS1]++]=arec.info.stats.ndoc;
+ }
+ else
+ if(arec.type==WORD_RECORD_DATA)
+ {
+ nums[CNDATADATA *nk+nums_pos[CNDATADATA ]++]=arec.info.data;
+ }
+ }
+ else
+ {
+ if(type!=3){errr("WordDBPage::Compress_extract_vals_wordiffs: unsupported type!=3");}
+ // ****** btree internal page specific
+ nums[CNBTIPGNO *nk+nums_pos[CNBTIPGNO ]++]=btikey(ii)->pgno ;
+ nums[CNBTINRECS*nk+nums_pos[CNBTINRECS]++]=btikey(ii)->nrecs;
+ }
+
+ // all that follows codes differences between succesive entries
+ // that is: Numerical key fields, Words
+ if(ii>i0)
+ {
+ // clear changed falgs
+ int iflag=CNFLAGS*nk+nums_pos[CNFLAGS]++;
+ nums[iflag]=0;
+
+ int foundfchange=0;
+ const String &aword=akey.GetWord();
+ const String &pword=pkey.GetWord();
+ if(!(aword==pword)){foundfchange=1;}
+
+ // check numerical fields for changes
+ // ******** sets CNFIELDS and some of CNFLAGS ************
+ for(j=1;j<akey.NFields();j++)
+ {
+ int diff=akey.Get(j)-(foundfchange ? 0 : pkey.Get(j));
+ if(diff)
+ {
+ foundfchange=1;
+ nums[iflag]|=pow2(j-1);
+ nums[ j*nk+nums_pos[j]++]=diff;
+ }
+ }
+
+ // ************ check word for changes
+ // ******** sets CNWORDDIFFPOS CNWORDDIFFLEN and some of CNFLAGS ************
+ if(!(aword==pword))
+ {
+ nums[iflag]|=pow2(akey.NFields()-1);
+ int fd=first_diff(aword,pword);
+ nums[CNWORDDIFFPOS*nk+nums_pos[CNWORDDIFFPOS]++]=fd;
+ nums[CNWORDDIFFLEN*nk+nums_pos[CNWORDDIFFLEN]++]=aword.length()-fd;
+ for(int s=fd;s<aword.length();s++){worddiffs.push_back(aword[s]);}
+ }
+ }
+ pkey=akey;
+ }
+// nums_pos[CNFLAGS]=nk-1;
+
+}
+
+void
+WordDBPage::Compress_vals_changed_flags(Compressor &out,unsigned int *cflags,int n)
+{
+ int size=out.size();
+ out.put_uint_vl(n,NBITS_NVALS,"FlagsField");
+ unsigned int ex=0;
+ int nbits=num_bits(n);
+ for(int i=0;i<n;i++)
+ {
+ ex=cflags[i];
+ out.put_uint(ex,WordKey::NFields(),label_str("cflags",i));
+ int k;
+ for(k=1;k+i<n;k++){if(ex!=cflags[i+k]){break;}}
+ k--;
+ if(k>0)
+ {
+ out.put(1,"rep");
+ out.put_uint_vl(k,nbits,NULL);
+ i+=k;
+ }
+ else
+ {out.put(0,"rep");}
+ }
+ size=out.size()-size;
+ if(verbose)printf("compressed flags %2d : %3d values: %4d bits %8f bytes : ended bit field pos:%6d\n",0,n,size,size/8.0,out.size());
+
+}
+
+void
+WordDBPage::Compress_vals(Compressor &out,int *nums,int *nums_pos,int nnums0)
+{
+ // the changed flags fields are particular
+ Compress_vals_changed_flags(out,(unsigned int *)(nums+0*nk),nums_pos[0]);
+
+
+ // compress the difference numbers for the numerical fields
+ for( int j=1;j<nnums0;j++)
+ {
+ int nv=nums_pos[j];
+ unsigned int *v=(unsigned int *)(nums+j*nk);
+ if((1 || j==3) && verbose){out.verbose=2;}
+ int size=out.put_vals(v,nv,label_str("NumField",j));
+ if((1 || j==3) && verbose){out.verbose=0;}
+ if(verbose)printf("compressed field %2d : %3d values: %4d bits %8f bytes : ended bit field pos:%6d\n",j,n,size,size/8.0,out.size());
+ }
+}
+
+void
+WordDBPage::Compress_header(Compressor &out)
+{
+// no smart compression ... for now
+ out.put_uint_vl(pg->lsn.file , 8*sizeof(pg->lsn.file ),"page:lsn.file");
+ out.put_uint_vl(pg->lsn.offset , 8*sizeof(pg->lsn.offset ),"page:lsn.offset");
+ out.put_uint_vl(pg->pgno , 8*sizeof(pg->pgno ),"page:pgno");
+ out.put_uint_vl(pg->prev_pgno , 8*sizeof(pg->prev_pgno ),"page:prev_pgno");
+ out.put_uint_vl(pg->next_pgno , 8*sizeof(pg->next_pgno ),"page:next_pgno");
+ out.put_uint_vl(pg->entries , 8*sizeof(pg->entries ),"page:entries");
+ out.put_uint_vl(pg->hf_offset , 8*sizeof(pg->hf_offset ),"page:hf_offset");
+ out.put_uint_vl(pg->level , 8*sizeof(pg->level ),"page:level");
+ out.put_uint_vl(pg->type , 8*sizeof(pg->type ),"page:type");
+}
+
+void
+WordDBPage::Compress_show_extracted(int *nums,int *nums_pos,int nnums0,HtVector_byte &worddiffs)
+{
+ int i,j;
+ int *cnindexe2=new int[ nnums0];
+ CHECK_MEM(cnindexe2);
+ for(j=0;j<nnums0;j++){cnindexe2[j]=0;}
+ for(j=0;j<nnums0;j++)
+ {
+ printf("%13s",number_field_label(j));
+ }
+ printf("\n");
+ int w=0;
+ int mx=(nk>worddiffs.size() ? nk : worddiffs.size());
+ for(i=0;i<mx;i++)
+ {
+ printf("%3d: ",i);
+ for(j=0;j<nnums0;j++)
+ {
+ int k=cnindexe2[j]++;
+ int nbits=(j ? 16:4);// just to show the flags field...
+ if(k<nums_pos[j])
+ {
+ int val=nums[j*nk+k];
+ if(nbits<8){show_bits(val,nbits);printf(" ");}
+ else
+ {
+ printf("|%12u",val);
+ }
+ }
+ else
+ {
+ if(nbits<8){printf(" ");}
+ else
+ {
+ printf("| ");
+ }
+ }
+ }
+ if(w<worddiffs.size()){printf(" %02x %c ",worddiffs[w],(isalnum(worddiffs[w]) ? worddiffs[w] : '#'));}
+ w++;
+ printf("\n");
+ }
+ delete [] cnindexe2;
+}
+
+// Compare two pages to check if equal
+int
+WordDBPage::Compare(WordDBPage &other)
+{
+ int res=0;
+ // Compare headers
+ if(other.pgsz != pgsz ){res++;printf("compare failed for pgsz \n");}
+ if(other.pg->lsn.file != pg->lsn.file ){res++;printf("compare failed for pg->lsn.file \n");}
+ if(other.pg->lsn.offset != pg->lsn.offset ){res++;printf("compare failed for pg->lsn.offset \n");}
+ if(other.pg->pgno != pg->pgno ){res++;printf("compare failed for pg->pgno \n");}
+ if(other.pg->prev_pgno != pg->prev_pgno ){res++;printf("compare failed for pg->prev_pgno \n");}
+ if(other.pg->next_pgno != pg->next_pgno ){res++;printf("compare failed for pg->next_pgno \n");}
+ if(other.pg->entries != pg->entries ){res++;printf("compare failed for pg->entries \n");}
+ if(other.pg->hf_offset != pg->hf_offset ){res++;printf("compare failed for pg->hf_offset \n");}
+ if(other.pg->level != pg->level ){res++;printf("compare failed for pg->level \n");}
+ if(other.pg->type != pg->type ){res++;printf("compare failed for pg->type \n");}
+ int i,k;
+ // double check header
+ if(memcmp((void *)pg,(void *)other.pg,sizeof(PAGE)-sizeof(db_indx_t)))
+ {
+ res++;
+ printf("compare failed in some unknown place in header:\n");
+ for(i=0;i<(int)(sizeof(PAGE)-sizeof(db_indx_t));i++)
+ {
+ printf("%3d: %3x %3x\n",i,((byte *)pg)[i],((byte *)other.pg)[i]);
+ }
+ }
+
+ // pg->type != 5 && !=3 pages are not really compressed: just memcmp
+ if(pg->type != 5 && pg->type != 3)
+ {
+ if(memcmp((void *)pg,(void *)other.pg,pgsz))
+ {
+ printf("compare:PAGETYPE:!=5 and memcmp failed\n");
+ res++;
+ printf("compare failed\n");
+ }
+ return(res);
+ }
+
+ // compare each key/data pair
+ for(i=0;i<(type==P_LBTREE ? pg->entries/2 : pg->entries);i++)
+ {
+ if(pg->type==P_LBTREE)
+ {
+ // compare keys
+ if(key(i)->len !=other.key(i)->len )
+ {
+ printf("compare:key(%2d) len : %2d != %2d\n",i,key(i)->len ,other.key(i)->len );
+ res++;
+ }
+ if(key(i)->type!=other.key(i)->type)
+ {
+ printf("compare:key(%2d) type: %2d != %2d\n",i,key(i)->type,other.key(i)->type);
+ res++;
+ }
+ if(memcmp(key(i)->data,other.key(i)->data,key(i)->len))
+ {
+ printf("compare :key(%2d)\n",i);
+ for(k=0;k<key(i)->len;k++)
+ {
+ int c=key(i)->data[k];
+ if(isalnum(c)){printf(" %c ",c);}
+ else{printf("%02x ",c);}
+ }
+ printf("\n");
+ for(k=0;k<key(i)->len;k++)
+ {
+ int c=other.key(i)->data[k];
+ if(isalnum(c)){printf(" %c ",c);}
+ else{printf("%02x ",c);}
+ }
+ printf("\n");
+ res++;printf("compare:key failed\n");
+ }
+ // compare data
+ if(data(i)->len !=other.data(i)->len )
+ {
+ printf("compare:data(%2d) len : %2d != %2d\n",i,data(i)->len ,other.data(i)->len );
+ res++;
+ }
+ if(data(i)->type!=other.data(i)->type)
+ {
+ printf("compare:data(%2d) type: %2d != %2d\n",i,data(i)->type,other.key(i)->type);
+ res++;
+ }
+ if(memcmp(data(i)->data,other.data(i)->data,data(i)->len))
+ {
+ printf("compare :data(%2d)\n",i);
+ for(k=0;k<data(i)->len;k++)
+ {
+ printf("%02x ",data(i)->data[k]);
+ }
+ printf("\n");
+ for(k=0;k<data(i)->len;k++)
+ {
+ printf("%02x ",other.data(i)->data[k]);
+ }
+ printf("\n");
+ res++;printf("compare:data failed\n");
+ }
+ }
+ else
+ {
+ if(type!=3){errr("WordDBPage::Compare: unsupported type!=3");}
+ if(btikey(i)->len != other.btikey(i)->len ||
+ btikey(i)->type != other.btikey(i)->type ||
+ btikey(i)->pgno != other.btikey(i)->pgno ||
+ btikey(i)->nrecs != other.btikey(i)->nrecs )
+ {
+ printf("compare:btikey(%2d) failed\n",i);
+ printf("this :len :%4d type :%4d pgno :%4d nrecs :%4d \n",btikey(i)->len,btikey(i)->type,
+ btikey(i)->pgno,btikey(i)->nrecs);
+ printf("other:len :%4d type :%4d pgno :%4d nrecs :%4d \n",other.btikey(i)->len,other.btikey(i)->type,
+ other.btikey(i)->pgno,other.btikey(i)->nrecs);
+ res++;
+
+ }
+ if(memcmp(btikey(i)->data,other.btikey(i)->data,btikey(i)->len))
+ {
+ printf("compare :btikey(%2d)\n",i);
+ for(k=0;k<btikey(i)->len;k++)
+ {
+ printf("%02x ",btikey(i)->data[k]);
+ }
+ printf("\n");
+ for(k=0;k<btikey(i)->len;k++)
+ {
+ printf("%02x ",other.btikey(i)->data[k]);
+ }
+ printf("\n");
+ res++;printf("compare:btikey failed\n");
+
+ }
+ }
+ }
+ if(pg->entries>0)
+ {
+ int smallestoffset=HtMaxMin::min_v(pg->inp,pg->entries);
+ int other_smallestoffset=HtMaxMin::min_v(other.pg->inp,other.pg->entries);
+ if(smallestoffset!=other_smallestoffset)
+ {
+ printf("compare fail:smallestoffset:%d other_smallestoffset:%d\n",smallestoffset,other_smallestoffset);
+ res++;
+ }
+ }
+
+ return(res);
+}
+
+// Bit stream description
+// | field[last] changed only | yes -> delta field[last]
+//
+
+// redo=0 ->
+// redo=1 -> oops, dont show!
+// redo=2 ->
+void
+WordDBPage::show()
+{
+ int i,j,dd,l;
+
+ printf("************************************\n");
+ printf("************************************\n");
+ printf("************************************\n");
+ printf("page size:%d\n",(int)pgsz);
+ printf(" 00-07: Log sequence number. file : %d\n", pg->lsn.file );
+ printf(" 00-07: Log sequence number. offset: %d\n", pg->lsn.offset );
+ printf(" 08-11: Current page number. : %d\n", pg->pgno );
+ printf(" 12-15: Previous page number. : %d\n", pg->prev_pgno );
+ printf(" 16-19: Next page number. : %d\n", pg->next_pgno );
+ printf(" 20-21: Number of item pairs on the page. : %d\n", pg->entries );
+ printf(" 22-23: High free byte page offset. : %d\n", pg->hf_offset );
+ printf(" 24: Btree tree level. : %d\n", pg->level );
+ printf(" 25: Page type. : %d\n", pg->type );
+
+
+ printf("entry offsets:");
+ for(i=0;i<pg->entries;i++){printf("%4d ",pg->inp[i]);}
+ printf("\n");
+
+ if(pg->type ==5)
+ {
+
+ WordRecord dud;
+ WordKey prev;
+ int pagecl=0;
+ for(i=0;i<pg->entries;i++)
+ {
+ if( (i%2) && dud.type==WORD_RECORD_NONE){continue;}
+ printf("\n||%c:%3d:off:%03d:invoff:%4d:len:%2d:typ:%x:",i%2 ? 'D' : 'K',i,e_offset(i),pgsz-e_offset(i),entry(i)->len,entry(i)->type);
+ if(i>0)
+ {
+ l=entry(i)->len+3;
+ dd=(int)(e_offset(i-1))-l;
+ dd-=dd%4;
+ printf("% 5d:: ",(e_offset(i)-dd));
+ }
+ if(!(i%2))
+ {
+ WordDBKey tkey(entry(i));
+ int fieldchanged[10];
+ char *wordchange=NULL;
+ printf("\"");
+ printf("%s",(char *)tkey.GetWord());
+ printf("\"");
+ for(j=0;j<20-tkey.GetWord().length();j++){printf(" ");}
+ printf("|");
+ for(j=1;j<tkey.NFields();j++){printf("%4x ",tkey.Get(j));}
+ printf("|");
+
+ for(j=1;j<tkey.NFields();j++)
+ {
+ int diff=tkey.Get(j)-prev.Get(j);
+ if(diff<0){diff=tkey.Get(j);}
+ printf("%6d ",diff);
+ fieldchanged[j]=diff;
+ }
+
+ String &word=tkey.GetWord();
+ String &pword=prev.GetWord();
+ if(word==pword){printf(" 00 ===");fieldchanged[0]=0;}
+ else
+ {
+ int fd=first_diff(word,pword);
+ fieldchanged[0]=fd+1;
+ wordchange=((char *)word)+fd;
+ printf(" %2d %s",fd,((char *)word)+fd);
+ }
+
+ int keycl=tkey.NFields();
+ for(j=1;j<tkey.NFields();j++)
+ {
+ if(fieldchanged[j]){keycl+=WordKeyInfo::Instance()->sort[j].bits;}
+ }
+ if(fieldchanged[0]){keycl+=3;keycl+=8*strlen(wordchange);}
+ printf(" ::%2d %f",keycl,keycl/8.0);
+ pagecl+=keycl;
+ prev=tkey;
+ }
+ else
+ {
+ if(entry(i)->len>100){printf("WordDBPage::show: aaargh strange failing\n");return;}
+ for(j=0;j<entry(i)->len;j++)
+ {
+ printf("%02x ",entry(i)->data[j]);
+ }
+ }
+ }
+ printf("\n");
+ }
+ else
+ if(1)
+ {
+ int nn=0;
+ // dump hex
+ for(i=0;;i++)
+ {
+ printf("%5d: ",nn);
+ for(j=0;j<20;j++)
+ {
+ printf("%2x ",((byte *)pg)[nn++]);
+ if(nn>=pgsz){break;}
+ }
+ printf("\n");
+ if(nn>=pgsz){break;}
+ }
+ }
+ if(pg->type == 3)
+ {
+ for(i=0;i<pg->entries;i++)
+ {
+ BINTERNAL *bie=GET_BINTERNAL(pg,i);
+ printf("%3d: off:%4d:len:%3d :type:%3d :pgno:%4d: nrecs:%4d:: ",i,pg->inp[i],bie->len,bie->type,bie->pgno,bie->nrecs);
+ WordDBKey tkey(bie);
+ for(j=0;j<bie->len-tkey.GetWord().length();j++){printf("%2x ",bie->data[j]);}
+ printf(" : ");
+ for(j=1;j<tkey.NFields();j++){printf("%5d ",tkey.Get(j));}
+ printf("\"%s\"\n",(char *)tkey.GetWord());
+ }
+ }
+
+}
+
+