00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdlib.h>
00019 #include <stdio.h>
00020 #include <math.h>
00021 #include <string.h>
00022 #include <errno.h>
00023 #include "../lib/scales.h"
00024 #include "bookutil.h"
00025 #include "vqgen.h"
00026 #include "vqsplit.h"
00027 #include "../lib/os.h"
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 static float _dist(int el,float *a, float *b){
00060 int i;
00061 float acc=0.f;
00062 for(i=0;i<el;i++){
00063 float val=(a[i]-b[i]);
00064 acc+=val*val;
00065 }
00066 return(acc);
00067 }
00068
00069 static float *pointlist;
00070 static long points=0;
00071
00072 void add_vector(codebook *b,float *vec,long n){
00073 int dim=b->dim,i,j;
00074 int step=n/dim;
00075 for(i=0;i<step;i++){
00076 for(j=i;j<n;j+=step){
00077 pointlist[points++]=vec[j];
00078 }
00079 }
00080 }
00081
00082 static int bestm(codebook *b,float *vec){
00083 encode_aux_threshmatch *tt=b->c->thresh_tree;
00084 int dim=b->dim;
00085 int i,k,o;
00086 int best=0;
00087
00088
00089
00090
00091 for(k=0,o=dim-1;k<dim;k++,o--){
00092 int i;
00093 for(i=0;i<tt->threshvals-1;i++)
00094 if(vec[o]<tt->quantthresh[i])break;
00095 best=(best*tt->quantvals)+tt->quantmap[i];
00096 }
00097 return(best);
00098 }
00099
00100 static int closest(codebook *b,float *vec,int current){
00101 encode_aux_threshmatch *tt=b->c->thresh_tree;
00102 int dim=b->dim;
00103 int i,k,o;
00104
00105 float bestmetric=0;
00106 int bestentry=-1;
00107 int best=bestm(b,vec);
00108
00109 if(current<0 && b->c->lengthlist[best]>0)return best;
00110
00111 for(i=0;i<b->entries;i++){
00112 if(b->c->lengthlist[i]>0 && i!=best && i!=current){
00113 float thismetric=_dist(dim, vec, b->valuelist+i*dim);
00114 if(bestentry==-1 || thismetric<bestmetric){
00115 bestentry=i;
00116 bestmetric=thismetric;
00117 }
00118 }
00119 }
00120
00121 return(bestentry);
00122 }
00123
00124 static float _heuristic(codebook *b,float *ppt,int secondbest){
00125 float *secondcell=b->valuelist+secondbest*b->dim;
00126 int best=bestm(b,ppt);
00127 float *firstcell=b->valuelist+best*b->dim;
00128 float error=_dist(b->dim,firstcell,secondcell);
00129 float *zero=alloca(b->dim*sizeof(float));
00130 float fromzero;
00131
00132 memset(zero,0,b->dim*sizeof(float));
00133 fromzero=sqrt(_dist(b->dim,firstcell,zero));
00134
00135 return(error/fromzero);
00136 }
00137
00138 static int longsort(const void *a, const void *b){
00139 return **(long **)b-**(long **)a;
00140 }
00141
00142 void usage(void){
00143 fprintf(stderr,"Ogg/Vorbis lattice codebook paring utility\n\n"
00144 "usage: latticepare book.vqh data.vqd <target_cells> <protected_cells> base\n"
00145 "where <target_cells> is the desired number of final cells (or -1\n"
00146 " for no change)\n"
00147 " <protected_cells> is the number of highest-hit count cells\n"
00148 " to protect from dispersal\n"
00149 " base is the base name (not including .vqh) of the new\n"
00150 " book\n\n");
00151 exit(1);
00152 }
00153
00154 int main(int argc,char *argv[]){
00155 char *basename;
00156 codebook *b=NULL;
00157 int entries=0;
00158 int dim=0;
00159 long i,j,target=-1,protect=-1;
00160 FILE *out=NULL;
00161
00162 int argnum=0;
00163
00164 argv++;
00165 if(*argv==NULL){
00166 usage();
00167 exit(1);
00168 }
00169
00170 while(*argv){
00171 if(*argv[0]=='-'){
00172
00173 argv++;
00174
00175 }else{
00176 switch (argnum++){
00177 case 0:case 1:
00178 {
00179
00180
00181
00182
00183 char *dot;
00184 char *ext=NULL;
00185 char *name=strdup(*argv++);
00186 dot=strrchr(name,'.');
00187 if(dot)
00188 ext=dot+1;
00189 else{
00190 ext="";
00191
00192 }
00193
00194
00195
00196 if(!strcmp(ext,"vqh")){
00197
00198 basename=strrchr(name,'/');
00199 if(basename)
00200 basename=strdup(basename)+1;
00201 else
00202 basename=strdup(name);
00203 dot=strrchr(basename,'.');
00204 if(dot)*dot='\0';
00205
00206 b=codebook_load(name);
00207 dim=b->dim;
00208 entries=b->entries;
00209 }
00210
00211
00212
00213 if(!strcmp(ext,"vqd") && !points){
00214 int cols;
00215 long lines=0;
00216 char *line;
00217 float *vec;
00218 FILE *in=fopen(name,"r");
00219 if(!in){
00220 fprintf(stderr,"Could not open input file %s\n",name);
00221 exit(1);
00222 }
00223
00224 reset_next_value();
00225 line=setup_line(in);
00226
00227 {
00228 char *temp=line;
00229 while(*temp==' ')temp++;
00230 for(cols=0;*temp;cols++){
00231 while(*temp>32)temp++;
00232 while(*temp==' ')temp++;
00233 }
00234 }
00235 vec=alloca(cols*sizeof(float));
00236
00237
00238 while(line){
00239 lines++;
00240 for(j=0;j<cols;j++)
00241 if(get_line_value(in,vec+j)){
00242 fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
00243 exit(1);
00244 }
00245 if((lines&0xff)==0)spinnit("counting samples...",lines*cols);
00246 line=setup_line(in);
00247 }
00248 pointlist=_ogg_malloc((cols*lines+entries*dim)*sizeof(float));
00249
00250 rewind(in);
00251 line=setup_line(in);
00252 while(line){
00253 lines--;
00254 for(j=0;j<cols;j++)
00255 if(get_line_value(in,vec+j)){
00256 fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
00257 exit(1);
00258 }
00259
00260 add_vector(b,vec,cols);
00261 if((lines&0xff)==0)spinnit("loading samples...",lines*cols);
00262
00263 line=setup_line(in);
00264 }
00265 fclose(in);
00266 }
00267 }
00268 break;
00269 case 2:
00270 target=atol(*argv++);
00271 if(target==0)target=entries;
00272 break;
00273 case 3:
00274 protect=atol(*argv++);
00275 break;
00276 case 4:
00277 {
00278 char *buff=alloca(strlen(*argv)+5);
00279 sprintf(buff,"%s.vqh",*argv);
00280 basename=*argv++;
00281
00282 out=fopen(buff,"w");
00283 if(!out){
00284 fprintf(stderr,"unable ot open %s for output",buff);
00285 exit(1);
00286 }
00287 }
00288 break;
00289 default:
00290 usage();
00291 }
00292 }
00293 }
00294 if(!entries || !points || !out)usage();
00295 if(target==-1)usage();
00296
00297
00298 for(i=0;i<entries;i++)
00299 for(j=0;j<dim;j++)
00300 pointlist[points++]=b->valuelist[i*dim+j];
00301
00302 points/=dim;
00303
00304
00305 {
00306 encode_aux_nearestmatch *nt=NULL;
00307 long pointssofar=0;
00308 long *pointindex;
00309 long indexedpoints=0;
00310 long *entryindex;
00311 long *reventry;
00312 long *membership=_ogg_malloc(points*sizeof(long));
00313 long *firsthead=_ogg_malloc(entries*sizeof(long));
00314 long *secondary=_ogg_malloc(points*sizeof(long));
00315 long *secondhead=_ogg_malloc(entries*sizeof(long));
00316
00317 long *cellcount=_ogg_calloc(entries,sizeof(long));
00318 long *cellcount2=_ogg_calloc(entries,sizeof(long));
00319 float *cellerror=_ogg_calloc(entries,sizeof(float));
00320 float *cellerrormax=_ogg_calloc(entries,sizeof(float));
00321 long cellsleft=entries;
00322 for(i=0;i<points;i++)membership[i]=-1;
00323 for(i=0;i<entries;i++)firsthead[i]=-1;
00324 for(i=0;i<points;i++)secondary[i]=-1;
00325 for(i=0;i<entries;i++)secondhead[i]=-1;
00326
00327 for(i=0;i<points;i++){
00328
00329
00330 float *ppt=pointlist+i*dim;
00331 int firstentry=closest(b,ppt,-1);
00332 int secondentry=closest(b,ppt,firstentry);
00333 float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt);
00334 float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
00335
00336 if(!(i&0xff))spinnit("initializing... ",points-i);
00337
00338 membership[i]=firsthead[firstentry];
00339 firsthead[firstentry]=i;
00340 secondary[i]=secondhead[secondentry];
00341 secondhead[secondentry]=i;
00342
00343 if(i<points-entries){
00344 cellerror[firstentry]+=secondmetric-firstmetric;
00345 cellerrormax[firstentry]=max(cellerrormax[firstentry],
00346 _heuristic(b,ppt,secondentry));
00347 cellcount[firstentry]++;
00348 cellcount2[secondentry]++;
00349 }
00350 }
00351
00352
00353
00354 {
00355 long **countindex=_ogg_calloc(entries,sizeof(long *));
00356 for(i=0;i<entries;i++)countindex[i]=cellcount+i;
00357 qsort(countindex,entries,sizeof(long *),longsort);
00358 for(i=0;i<protect;i++){
00359 int ptr=countindex[i]-cellcount;
00360 cellerrormax[ptr]=9e50f;
00361 }
00362 }
00363
00364 {
00365 fprintf(stderr,"\r");
00366 for(i=0;i<entries;i++){
00367
00368 int entry=i;
00369 for(j=0;j<dim;j++){
00370 fprintf(stderr,"%d:",entry%b->c->thresh_tree->quantvals);
00371 entry/=b->c->thresh_tree->quantvals;
00372 }
00373
00374 fprintf(stderr,":%ld/%ld, ",cellcount[i],cellcount2[i]);
00375 }
00376 fprintf(stderr,"\n");
00377 }
00378
00379
00380 while(cellsleft>target){
00381 int bestcell=-1;
00382 float besterror=0;
00383 float besterror2=0;
00384 long head=-1;
00385 char spinbuf[80];
00386 sprintf(spinbuf,"cells left to eliminate: %ld : ",cellsleft-target);
00387
00388
00389 for(i=0;i<entries;i++){
00390 if(b->c->lengthlist[i]>0){
00391 if(bestcell==-1 || cellerrormax[i]<=besterror2){
00392 if(bestcell==-1 || cellerrormax[i]<besterror2 ||
00393 besterror>cellerror[i]){
00394 besterror=cellerror[i];
00395 besterror2=cellerrormax[i];
00396 bestcell=i;
00397 }
00398 }
00399 }
00400 }
00401
00402 fprintf(stderr,"\reliminating cell %d \n"
00403 " dispersal error of %g max/%g total (%ld hits)\n",
00404 bestcell,besterror2,besterror,cellcount[bestcell]);
00405
00406
00407
00408 b->c->lengthlist[bestcell]=0;
00409 head=firsthead[bestcell];
00410 firsthead[bestcell]=-1;
00411 while(head!=-1){
00412
00413 float *ppt=pointlist+head*dim;
00414 int firstentry=closest(b,ppt,-1);
00415 int secondentry=closest(b,ppt,firstentry);
00416 float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt);
00417 float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
00418 long next=membership[head];
00419
00420 if(head<points-entries){
00421 cellcount[firstentry]++;
00422 cellcount[bestcell]--;
00423 cellerror[firstentry]+=secondmetric-firstmetric;
00424 cellerrormax[firstentry]=max(cellerrormax[firstentry],
00425 _heuristic(b,ppt,secondentry));
00426 }
00427
00428 membership[head]=firsthead[firstentry];
00429 firsthead[firstentry]=head;
00430 head=next;
00431 if(cellcount[bestcell]%128==0)
00432 spinnit(spinbuf,cellcount[bestcell]+cellcount2[bestcell]);
00433
00434 }
00435
00436
00437
00438 head=secondhead[bestcell];
00439 secondhead[bestcell]=-1;
00440 while(head!=-1){
00441 float *ppt=pointlist+head*dim;
00442
00443 int firstentry=closest(b,ppt,-1);
00444
00445 int secondentry=closest(b,ppt,firstentry);
00446
00447 float oldsecondmetric=_dist(dim,b->valuelist+dim*bestcell,ppt);
00448
00449 float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
00450 long next=secondary[head];
00451
00452 if(head<points-entries){
00453 cellcount2[secondentry]++;
00454 cellcount2[bestcell]--;
00455 cellerror[firstentry]+=secondmetric-oldsecondmetric;
00456 cellerrormax[firstentry]=max(cellerrormax[firstentry],
00457 _heuristic(b,ppt,secondentry));
00458 }
00459
00460 secondary[head]=secondhead[secondentry];
00461 secondhead[secondentry]=head;
00462 head=next;
00463
00464 if(cellcount2[bestcell]%128==0)
00465 spinnit(spinbuf,cellcount2[bestcell]);
00466 }
00467
00468 cellsleft--;
00469 }
00470
00471
00472
00473
00474
00475 for(i=0;i<entries;i++){
00476 long head=firsthead[i];
00477 spinnit("rearranging membership cache... ",entries-i);
00478 while(head!=-1){
00479 long next=membership[head];
00480 membership[head]=i;
00481 head=next;
00482 }
00483 }
00484
00485 free(secondhead);
00486 free(firsthead);
00487 free(cellerror);
00488 free(cellerrormax);
00489 free(secondary);
00490
00491 pointindex=_ogg_malloc(points*sizeof(long));
00492
00493 for(i=0;i<points;i++){
00494 int best=_best(b,pointlist+i*dim,1);
00495 if(best==-1)
00496 pointindex[indexedpoints++]=i;
00497 spinnit("finding orphaned points... ",points-i);
00498 }
00499
00500
00501 entryindex=_ogg_malloc(entries*sizeof(long));
00502 target=0;
00503 for(i=0;i<entries;i++){
00504 if(b->c->lengthlist[i]>0)
00505 entryindex[target++]=i;
00506 }
00507
00508
00509 reventry=_ogg_malloc(entries*sizeof(long));
00510
00511
00512 nt=b->c->nearest_tree=
00513 _ogg_calloc(1,sizeof(encode_aux_nearestmatch));
00514
00515 nt->alloc=4096;
00516 nt->ptr0=_ogg_malloc(sizeof(long)*nt->alloc);
00517 nt->ptr1=_ogg_malloc(sizeof(long)*nt->alloc);
00518 nt->p=_ogg_malloc(sizeof(long)*nt->alloc);
00519 nt->q=_ogg_malloc(sizeof(long)*nt->alloc);
00520 nt->aux=0;
00521
00522 fprintf(stderr,"Leaves added: %d \n",
00523 lp_split(pointlist,points,
00524 b,entryindex,target,
00525 pointindex,indexedpoints,
00526 membership,reventry,
00527 0,&pointssofar));
00528 free(membership);
00529 free(reventry);
00530 free(pointindex);
00531
00532
00533
00534 for(i=0;i<nt->aux;i++)nt->p[i]*=dim;
00535 for(i=0;i<nt->aux;i++)nt->q[i]*=dim;
00536
00537
00538 for(i=0;i<entries;i++)entryindex[i]=1;
00539 for(i=0;i<points-entries;i++){
00540 int best=_best(b,pointlist+i*dim,1);
00541 float *a=pointlist+i*dim;
00542 if(!(i&0xff))spinnit("counting hits...",i);
00543 if(best==-1){
00544 fprintf(stderr,"\nINTERNAL ERROR; a point count not be matched to a\n"
00545 "codebook entry. The new decision tree is broken.\n");
00546 exit(1);
00547 }
00548 entryindex[best]++;
00549 }
00550 for(i=0;i<nt->aux;i++)nt->p[i]/=dim;
00551 for(i=0;i<nt->aux;i++)nt->q[i]/=dim;
00552
00553
00554
00555
00556 {
00557 int upper=0;
00558 long *lengthlist=_ogg_calloc(entries,sizeof(long));
00559 for(i=0;i<entries;i++){
00560 if(b->c->lengthlist[i]>0)
00561 entryindex[upper++]=entryindex[i];
00562 else{
00563 if(entryindex[i]>1){
00564 fprintf(stderr,"\nINTERNAL ERROR; _best matched to unused entry\n");
00565 exit(1);
00566 }
00567 }
00568 }
00569
00570
00571 if(upper != target){
00572 fprintf(stderr,"\nINTERNAL ERROR; packed the wrong number of entries\n");
00573 exit(1);
00574 }
00575
00576 build_tree_from_lengths(upper,entryindex,lengthlist);
00577
00578 upper=0;
00579 for(i=0;i<entries;i++){
00580 if(b->c->lengthlist[i]>0)
00581 b->c->lengthlist[i]=lengthlist[upper++];
00582 }
00583
00584 }
00585 }
00586
00587 write_codebook(out,basename,b->c);
00588
00589 fprintf(stderr,"\r \nDone.\n");
00590 return(0);
00591 }
00592
00593
00594
00595