00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include <stdlib.h>
00028 #include <stdio.h>
00029 #include <math.h>
00030 #include <string.h>
00031 #include <sys/time.h>
00032
00033 #include "vqgen.h"
00034 #include "vqsplit.h"
00035 #include "bookutil.h"
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062 long *isortvals;
00063 int iascsort(const void *a,const void *b){
00064 long av=isortvals[*((long *)a)];
00065 long bv=isortvals[*((long *)b)];
00066 return(av-bv);
00067 }
00068
00069 static float _Ndist(int el,float *a, float *b){
00070 int i;
00071 float acc=0.f;
00072 for(i=0;i<el;i++){
00073 float val=(a[i]-b[i]);
00074 acc+=val*val;
00075 }
00076 return sqrt(acc);
00077 }
00078
00079 #define _Npoint(i) (pointlist+dim*(i))
00080 #define _Nnow(i) (entrylist+dim*(i))
00081
00082
00083
00084 int vqsp_count(float *entrylist,float *pointlist,int dim,
00085 long *membership,long *reventry,
00086 long *entryindex,long entries,
00087 long *pointindex,long points,int splitp,
00088 long *entryA,long *entryB,
00089 long besti,long bestj,
00090 long *entriesA,long *entriesB,long *entriesC){
00091 long i,j;
00092 long A=0,B=0,C=0;
00093 long pointsA=0;
00094 long pointsB=0;
00095 long *temppointsA=NULL;
00096 long *temppointsB=NULL;
00097
00098 if(splitp){
00099 temppointsA=_ogg_malloc(points*sizeof(long));
00100 temppointsB=_ogg_malloc(points*sizeof(long));
00101 }
00102
00103 memset(entryA,0,sizeof(long)*entries);
00104 memset(entryB,0,sizeof(long)*entries);
00105
00106
00107
00108
00109 for(i=0;i<points;i++){
00110 float *ppt=_Npoint(pointindex[i]);
00111 long firstentry=membership[pointindex[i]];
00112
00113 if(firstentry==besti){
00114 entryA[reventry[firstentry]]=1;
00115 if(splitp)temppointsA[pointsA++]=pointindex[i];
00116 continue;
00117 }
00118 if(firstentry==bestj){
00119 entryB[reventry[firstentry]]=1;
00120 if(splitp)temppointsB[pointsB++]=pointindex[i];
00121 continue;
00122 }
00123 {
00124 float distA=_Ndist(dim,ppt,_Nnow(besti));
00125 float distB=_Ndist(dim,ppt,_Nnow(bestj));
00126 if(distA<distB){
00127 entryA[reventry[firstentry]]=1;
00128 if(splitp)temppointsA[pointsA++]=pointindex[i];
00129 }else{
00130 entryB[reventry[firstentry]]=1;
00131 if(splitp)temppointsB[pointsB++]=pointindex[i];
00132 }
00133 }
00134 }
00135
00136
00137
00138
00139
00140 for(j=0;j<entries;j++){
00141 if(entryA[j] && entryB[j])C++;
00142 if(entryA[j])entryA[A++]=entryindex[j];
00143 if(entryB[j])entryB[B++]=entryindex[j];
00144 }
00145 *entriesA=A;
00146 *entriesB=B;
00147 *entriesC=C;
00148 if(splitp){
00149 memcpy(pointindex,temppointsA,sizeof(long)*pointsA);
00150 memcpy(pointindex+pointsA,temppointsB,sizeof(long)*pointsB);
00151 free(temppointsA);
00152 free(temppointsB);
00153 }
00154 return(pointsA);
00155 }
00156
00157 int lp_split(float *pointlist,long totalpoints,
00158 codebook *b,
00159 long *entryindex,long entries,
00160 long *pointindex,long points,
00161 long *membership,long *reventry,
00162 long depth, long *pointsofar){
00163
00164 encode_aux_nearestmatch *t=b->c->nearest_tree;
00165
00166
00167
00168
00169
00170
00171
00172 int dim=b->dim;
00173 float *entrylist=b->valuelist;
00174 long ret;
00175 long *entryA=_ogg_calloc(entries,sizeof(long));
00176 long *entryB=_ogg_calloc(entries,sizeof(long));
00177 long entriesA=0;
00178 long entriesB=0;
00179 long entriesC=0;
00180 long pointsA=0;
00181 long i,j,k;
00182
00183 long besti=-1;
00184 long bestj=-1;
00185
00186 char spinbuf[80];
00187 sprintf(spinbuf,"splitting [%ld left]... ",totalpoints-*pointsofar);
00188
00189
00190 for(i=0;i<b->entries;i++)reventry[i]=-1;
00191 for(i=0;i<entries;i++)reventry[entryindex[i]]=i;
00192
00193
00194
00195
00196
00197
00198
00199 if(entries<8 || (float)points*entries*entries<16.f*1024*1024){
00200
00201 float best=0;
00202 float this;
00203 for(i=0;i<entries-1;i++){
00204 for(j=i+1;j<entries;j++){
00205 spinnit(spinbuf,entries-i);
00206 vqsp_count(b->valuelist,pointlist,dim,
00207 membership,reventry,
00208 entryindex,entries,
00209 pointindex,points,0,
00210 entryA,entryB,
00211 entryindex[i],entryindex[j],
00212 &entriesA,&entriesB,&entriesC);
00213 this=(entriesA-entriesC)*(entriesB-entriesC);
00214
00215
00216
00217
00218
00219
00220 if( (besti==-1) ||
00221 (this>best) ){
00222
00223 best=this;
00224 besti=entryindex[i];
00225 bestj=entryindex[j];
00226
00227 }
00228 }
00229 }
00230 }else{
00231 float *p=alloca(dim*sizeof(float));
00232 float *q=alloca(dim*sizeof(float));
00233 float best=0.f;
00234
00235
00236
00237
00238
00239 for(k=0;k<dim;k++){
00240 spinnit(spinbuf,entries);
00241
00242 p[k]=0.f;
00243 for(j=0;j<entries;j++)
00244 p[k]+=b->valuelist[entryindex[j]*dim+k];
00245 p[k]/=entries;
00246
00247 }
00248
00249
00250
00251
00252
00253 for(i=0;i<entries;i++){
00254 float *ppi=_Nnow(entryindex[i]);
00255 float ref_best=0.f;
00256 float ref_j=-1;
00257 float this;
00258 spinnit(spinbuf,entries-i);
00259
00260 for(k=0;k<dim;k++)
00261 q[k]=2*p[k]-ppi[k];
00262
00263 for(j=0;j<entries;j++){
00264 if(j!=i){
00265 float this=_Ndist(dim,q,_Nnow(entryindex[j]));
00266 if(ref_j==-1 || this<=ref_best){
00267 ref_best=this;
00268 ref_j=entryindex[j];
00269 }
00270 }
00271 }
00272
00273 vqsp_count(b->valuelist,pointlist,dim,
00274 membership,reventry,
00275 entryindex,entries,
00276 pointindex,points,0,
00277 entryA,entryB,
00278 entryindex[i],ref_j,
00279 &entriesA,&entriesB,&entriesC);
00280 this=(entriesA-entriesC)*(entriesB-entriesC);
00281
00282
00283
00284
00285
00286
00287 if( (besti==-1) ||
00288 (this>best) ){
00289
00290 best=this;
00291 besti=entryindex[i];
00292 bestj=ref_j;
00293
00294 }
00295 }
00296 if(besti>bestj){
00297 long temp=besti;
00298 besti=bestj;
00299 bestj=temp;
00300 }
00301
00302 }
00303
00304
00305
00306
00307 pointsA=vqsp_count(b->valuelist,pointlist,dim,
00308 membership,reventry,
00309 entryindex,entries,
00310 pointindex,points,1,
00311 entryA,entryB,
00312 besti,bestj,
00313 &entriesA,&entriesB,&entriesC);
00314
00315
00316
00317 {
00318 long thisaux=t->aux++;
00319 if(t->aux>=t->alloc){
00320 t->alloc*=2;
00321 t->ptr0=_ogg_realloc(t->ptr0,sizeof(long)*t->alloc);
00322 t->ptr1=_ogg_realloc(t->ptr1,sizeof(long)*t->alloc);
00323 t->p=_ogg_realloc(t->p,sizeof(long)*t->alloc);
00324 t->q=_ogg_realloc(t->q,sizeof(long)*t->alloc);
00325 }
00326
00327 t->p[thisaux]=besti;
00328 t->q[thisaux]=bestj;
00329
00330 if(entriesA==1){
00331 ret=1;
00332 t->ptr0[thisaux]=entryA[0];
00333 *pointsofar+=pointsA;
00334 }else{
00335 t->ptr0[thisaux]= -t->aux;
00336 ret=lp_split(pointlist,totalpoints,b,entryA,entriesA,pointindex,pointsA,
00337 membership,reventry,depth+1,pointsofar);
00338 }
00339 if(entriesB==1){
00340 ret++;
00341 t->ptr1[thisaux]=entryB[0];
00342 *pointsofar+=points-pointsA;
00343 }else{
00344 t->ptr1[thisaux]= -t->aux;
00345 ret+=lp_split(pointlist,totalpoints,b,entryB,entriesB,pointindex+pointsA,
00346 points-pointsA,membership,reventry,
00347 depth+1,pointsofar);
00348 }
00349 }
00350 free(entryA);
00351 free(entryB);
00352 return(ret);
00353 }
00354
00355 static int _node_eq(encode_aux_nearestmatch *v, long a, long b){
00356 long Aptr0=v->ptr0[a];
00357 long Aptr1=v->ptr1[a];
00358 long Bptr0=v->ptr0[b];
00359 long Bptr1=v->ptr1[b];
00360
00361
00362
00363
00364
00365 if(Aptr0==Bptr0 && Aptr1==Bptr1)
00366 return(1);
00367
00368 return(0);
00369 }
00370
00371 void vqsp_book(vqgen *v, codebook *b, long *quantlist){
00372 long i,j;
00373 static_codebook *c=(static_codebook *)b->c;
00374 encode_aux_nearestmatch *t;
00375
00376 memset(b,0,sizeof(codebook));
00377 memset(c,0,sizeof(static_codebook));
00378 b->c=c;
00379 t=c->nearest_tree=_ogg_calloc(1,sizeof(encode_aux_nearestmatch));
00380 c->maptype=2;
00381
00382
00383
00384
00385 for(i=0;i<v->entries;){
00386
00387 for(j=0;j<i;j++){
00388 if(_Ndist(v->elements,_now(v,i),_now(v,j))==0.f){
00389 fprintf(stderr,"found a duplicate entry! removing...\n");
00390 v->entries--;
00391 memcpy(_now(v,i),_now(v,v->entries),sizeof(float)*v->elements);
00392 memcpy(quantlist+i*v->elements,quantlist+v->entries*v->elements,
00393 sizeof(long)*v->elements);
00394 break;
00395 }
00396 }
00397 if(j==i)i++;
00398 }
00399
00400 {
00401 v->assigned=_ogg_calloc(v->entries,sizeof(long));
00402 for(i=0;i<v->points;i++){
00403 float *ppt=_point(v,i);
00404 float firstmetric=_Ndist(v->elements,_now(v,0),ppt);
00405 long firstentry=0;
00406
00407 if(!(i&0xff))spinnit("checking... ",v->points-i);
00408
00409 for(j=0;j<v->entries;j++){
00410 float thismetric=_Ndist(v->elements,_now(v,j),ppt);
00411 if(thismetric<firstmetric){
00412 firstmetric=thismetric;
00413 firstentry=j;
00414 }
00415 }
00416
00417 v->assigned[firstentry]++;
00418 }
00419
00420 for(j=0;j<v->entries;){
00421 if(v->assigned[j]==0){
00422 fprintf(stderr,"found an unused entry! removing...\n");
00423 v->entries--;
00424 memcpy(_now(v,j),_now(v,v->entries),sizeof(float)*v->elements);
00425 v->assigned[j]=v->assigned[v->elements];
00426 memcpy(quantlist+j*v->elements,quantlist+v->entries*v->elements,
00427 sizeof(long)*v->elements);
00428 continue;
00429 }
00430 j++;
00431 }
00432 }
00433
00434 fprintf(stderr,"Building a book with %ld unique entries...\n",v->entries);
00435
00436 {
00437 long *entryindex=_ogg_malloc(v->entries*sizeof(long *));
00438 long *pointindex=_ogg_malloc(v->points*sizeof(long));
00439 long *membership=_ogg_malloc(v->points*sizeof(long));
00440 long *reventry=_ogg_malloc(v->entries*sizeof(long));
00441 long pointssofar=0;
00442
00443 for(i=0;i<v->entries;i++)entryindex[i]=i;
00444 for(i=0;i<v->points;i++)pointindex[i]=i;
00445
00446 t->alloc=4096;
00447 t->ptr0=_ogg_malloc(sizeof(long)*t->alloc);
00448 t->ptr1=_ogg_malloc(sizeof(long)*t->alloc);
00449 t->p=_ogg_malloc(sizeof(long)*t->alloc);
00450 t->q=_ogg_malloc(sizeof(long)*t->alloc);
00451 t->aux=0;
00452 c->dim=v->elements;
00453 c->entries=v->entries;
00454 c->lengthlist=_ogg_calloc(c->entries,sizeof(long));
00455 b->valuelist=v->entrylist;
00456 b->dim=c->dim;
00457 b->entries=c->entries;
00458
00459 for(i=0;i<v->points;i++)membership[i]=-1;
00460 for(i=0;i<v->points;i++){
00461 float *ppt=_point(v,i);
00462 long firstentry=0;
00463 float firstmetric=_Ndist(v->elements,_now(v,0),ppt);
00464
00465 if(!(i&0xff))spinnit("assigning... ",v->points-i);
00466
00467 for(j=1;j<v->entries;j++){
00468 if(v->assigned[j]!=-1){
00469 float thismetric=_Ndist(v->elements,_now(v,j),ppt);
00470 if(thismetric<=firstmetric){
00471 firstmetric=thismetric;
00472 firstentry=j;
00473 }
00474 }
00475 }
00476
00477 membership[i]=firstentry;
00478 }
00479
00480 fprintf(stderr,"Leaves added: %d \n",
00481 lp_split(v->pointlist,v->points,
00482 b,entryindex,v->entries,
00483 pointindex,v->points,
00484 membership,reventry,
00485 0,&pointssofar));
00486
00487 free(pointindex);
00488 free(membership);
00489 free(reventry);
00490
00491 fprintf(stderr,"Paring/rerouting redundant branches... ");
00492
00493
00494 {
00495 int changedflag=1;
00496
00497 while(changedflag){
00498 changedflag=0;
00499
00500
00501
00502
00503 for(i=0;i<t->aux;){
00504 int k;
00505
00506
00507 for(j=0;j<i;j++)
00508 if(_node_eq(t,i,j))break;
00509
00510 if(j<i){
00511
00512
00513 changedflag=1;
00514 for(k=0;k<t->aux;k++){
00515 if(t->ptr0[k]==-i)t->ptr0[k]=-j;
00516 if(t->ptr1[k]==-i)t->ptr1[k]=-j;
00517 }
00518
00519
00520
00521
00522 t->aux--;
00523 t->ptr0[i]=t->ptr0[t->aux];
00524 t->ptr1[i]=t->ptr1[t->aux];
00525 t->p[i]=t->p[t->aux];
00526 t->q[i]=t->q[t->aux];
00527 for(k=0;k<t->aux;k++){
00528 if(t->ptr0[k]==-t->aux)t->ptr0[k]=-i;
00529 if(t->ptr1[k]==-t->aux)t->ptr1[k]=-i;
00530 }
00531
00532
00533 }else
00534 i++;
00535 }
00536
00537 fprintf(stderr,"\rParing/rerouting redundant branches... "
00538 "%ld remaining ",t->aux);
00539 }
00540 fprintf(stderr,"\n");
00541 }
00542 }
00543
00544
00545
00546 {
00547 long *probability=_ogg_malloc(c->entries*sizeof(long));
00548 for(i=0;i<c->entries;i++)probability[i]=1;
00549 b->dim=c->dim;
00550
00551
00552 for(i=0;i<t->aux;i++)t->p[i]*=c->dim;
00553 for(i=0;i<t->aux;i++)t->q[i]*=c->dim;
00554
00555 for(i=0;i<v->points;i++){
00556
00557
00558 int ret=_best(b,v->pointlist+i*v->elements,1);
00559 probability[ret]++;
00560 if(!(i&0xff))spinnit("counting hits... ",v->points-i);
00561 }
00562 for(i=0;i<t->aux;i++)t->p[i]/=c->dim;
00563 for(i=0;i<t->aux;i++)t->q[i]/=c->dim;
00564
00565 build_tree_from_lengths(c->entries,probability,c->lengthlist);
00566
00567 free(probability);
00568 }
00569
00570
00571
00572 {
00573 long *wordlen=c->lengthlist;
00574 long *index=_ogg_malloc(c->entries*sizeof(long));
00575 long *revindex=_ogg_malloc(c->entries*sizeof(long));
00576 int k;
00577 for(i=0;i<c->entries;i++)index[i]=i;
00578 isortvals=c->lengthlist;
00579 qsort(index,c->entries,sizeof(long),iascsort);
00580
00581
00582
00583 for(i=0;i<c->entries;i++)revindex[index[i]]=i;
00584 for(i=0;i<t->aux;i++){
00585 if(!(i&0x3f))spinnit("sorting... ",t->aux-i);
00586
00587 if(t->ptr0[i]>=0)t->ptr0[i]=revindex[t->ptr0[i]];
00588 if(t->ptr1[i]>=0)t->ptr1[i]=revindex[t->ptr1[i]];
00589 t->p[i]=revindex[t->p[i]];
00590 t->q[i]=revindex[t->q[i]];
00591 }
00592 free(revindex);
00593
00594
00595 c->lengthlist=_ogg_calloc(c->entries,sizeof(long));
00596 b->valuelist=_ogg_malloc(sizeof(float)*c->entries*c->dim);
00597 c->quantlist=_ogg_malloc(sizeof(long)*c->entries*c->dim);
00598 for(i=0;i<c->entries;i++){
00599 long e=index[i];
00600 for(k=0;k<c->dim;k++){
00601 b->valuelist[i*c->dim+k]=v->entrylist[e*c->dim+k];
00602 c->quantlist[i*c->dim+k]=quantlist[e*c->dim+k];
00603 }
00604 c->lengthlist[i]=wordlen[e];
00605 }
00606
00607 free(wordlen);
00608 }
00609
00610 fprintf(stderr,"Done. \n\n");
00611 }
00612