00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include "ivorbiscodec.h"
00036 #include "os.h"
00037 #include "misc.h"
00038 #include "mdct.h"
00039 #include "mdct_lookup.h"
00040
00041
00042
00043 static void mdct_butterfly_8(DATA_TYPE *x){
00044
00045 REG_TYPE r0 = x[4] + x[0];
00046 REG_TYPE r1 = x[4] - x[0];
00047 REG_TYPE r2 = x[5] + x[1];
00048 REG_TYPE r3 = x[5] - x[1];
00049 REG_TYPE r4 = x[6] + x[2];
00050 REG_TYPE r5 = x[6] - x[2];
00051 REG_TYPE r6 = x[7] + x[3];
00052 REG_TYPE r7 = x[7] - x[3];
00053
00054 x[0] = r5 + r3;
00055 x[1] = r7 - r1;
00056 x[2] = r5 - r3;
00057 x[3] = r7 + r1;
00058 x[4] = r4 - r0;
00059 x[5] = r6 - r2;
00060 x[6] = r4 + r0;
00061 x[7] = r6 + r2;
00062 MB();
00063 }
00064
00065
00066 static void mdct_butterfly_16(DATA_TYPE *x){
00067
00068 REG_TYPE r0, r1;
00069
00070 r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
00071 r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
00072 x[ 0] = MULT31((r0 + r1) , cPI2_8);
00073 x[ 1] = MULT31((r1 - r0) , cPI2_8);
00074 MB();
00075
00076 r0 = x[10] - x[ 2]; x[10] += x[ 2];
00077 r1 = x[ 3] - x[11]; x[11] += x[ 3];
00078 x[ 2] = r1; x[ 3] = r0;
00079 MB();
00080
00081 r0 = x[12] - x[ 4]; x[12] += x[ 4];
00082 r1 = x[13] - x[ 5]; x[13] += x[ 5];
00083 x[ 4] = MULT31((r0 - r1) , cPI2_8);
00084 x[ 5] = MULT31((r0 + r1) , cPI2_8);
00085 MB();
00086
00087 r0 = x[14] - x[ 6]; x[14] += x[ 6];
00088 r1 = x[15] - x[ 7]; x[15] += x[ 7];
00089 x[ 6] = r0; x[ 7] = r1;
00090 MB();
00091
00092 mdct_butterfly_8(x);
00093 mdct_butterfly_8(x+8);
00094 }
00095
00096
00097 static void mdct_butterfly_32(DATA_TYPE *x){
00098
00099 REG_TYPE r0, r1;
00100
00101 r0 = x[30] - x[14]; x[30] += x[14];
00102 r1 = x[31] - x[15]; x[31] += x[15];
00103 x[14] = r0; x[15] = r1;
00104 MB();
00105
00106 r0 = x[28] - x[12]; x[28] += x[12];
00107 r1 = x[29] - x[13]; x[29] += x[13];
00108 XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
00109 MB();
00110
00111 r0 = x[26] - x[10]; x[26] += x[10];
00112 r1 = x[27] - x[11]; x[27] += x[11];
00113 x[10] = MULT31((r0 - r1) , cPI2_8);
00114 x[11] = MULT31((r0 + r1) , cPI2_8);
00115 MB();
00116
00117 r0 = x[24] - x[ 8]; x[24] += x[ 8];
00118 r1 = x[25] - x[ 9]; x[25] += x[ 9];
00119 XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
00120 MB();
00121
00122 r0 = x[22] - x[ 6]; x[22] += x[ 6];
00123 r1 = x[ 7] - x[23]; x[23] += x[ 7];
00124 x[ 6] = r1; x[ 7] = r0;
00125 MB();
00126
00127 r0 = x[ 4] - x[20]; x[20] += x[ 4];
00128 r1 = x[ 5] - x[21]; x[21] += x[ 5];
00129 XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
00130 MB();
00131
00132 r0 = x[ 2] - x[18]; x[18] += x[ 2];
00133 r1 = x[ 3] - x[19]; x[19] += x[ 3];
00134 x[ 2] = MULT31((r1 + r0) , cPI2_8);
00135 x[ 3] = MULT31((r1 - r0) , cPI2_8);
00136 MB();
00137
00138 r0 = x[ 0] - x[16]; x[16] += x[ 0];
00139 r1 = x[ 1] - x[17]; x[17] += x[ 1];
00140 XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
00141 MB();
00142
00143 mdct_butterfly_16(x);
00144 mdct_butterfly_16(x+16);
00145 }
00146
00147
00148 static void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
00149
00150 LOOKUP_T *T = sincos_lookup0;
00151 DATA_TYPE *x1 = x + points - 8;
00152 DATA_TYPE *x2 = x + (points>>1) - 8;
00153 REG_TYPE r0;
00154 REG_TYPE r1;
00155
00156 do{
00157 r0 = x1[6] - x2[6]; x1[6] += x2[6];
00158 r1 = x2[7] - x1[7]; x1[7] += x2[7];
00159 XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
00160
00161 r0 = x1[4] - x2[4]; x1[4] += x2[4];
00162 r1 = x2[5] - x1[5]; x1[5] += x2[5];
00163 XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step;
00164
00165 r0 = x1[2] - x2[2]; x1[2] += x2[2];
00166 r1 = x2[3] - x1[3]; x1[3] += x2[3];
00167 XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
00168
00169 r0 = x1[0] - x2[0]; x1[0] += x2[0];
00170 r1 = x2[1] - x1[1]; x1[1] += x2[1];
00171 XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step;
00172
00173 x1-=8; x2-=8;
00174 }while(T<sincos_lookup0+1024);
00175 do{
00176 r0 = x1[6] - x2[6]; x1[6] += x2[6];
00177 r1 = x1[7] - x2[7]; x1[7] += x2[7];
00178 XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
00179
00180 r0 = x1[4] - x2[4]; x1[4] += x2[4];
00181 r1 = x1[5] - x2[5]; x1[5] += x2[5];
00182 XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step;
00183
00184 r0 = x1[2] - x2[2]; x1[2] += x2[2];
00185 r1 = x1[3] - x2[3]; x1[3] += x2[3];
00186 XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
00187
00188 r0 = x1[0] - x2[0]; x1[0] += x2[0];
00189 r1 = x1[1] - x2[1]; x1[1] += x2[1];
00190 XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step;
00191
00192 x1-=8; x2-=8;
00193 }while(T>sincos_lookup0);
00194 do{
00195 r0 = x2[6] - x1[6]; x1[6] += x2[6];
00196 r1 = x2[7] - x1[7]; x1[7] += x2[7];
00197 XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
00198
00199 r0 = x2[4] - x1[4]; x1[4] += x2[4];
00200 r1 = x2[5] - x1[5]; x1[5] += x2[5];
00201 XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step;
00202
00203 r0 = x2[2] - x1[2]; x1[2] += x2[2];
00204 r1 = x2[3] - x1[3]; x1[3] += x2[3];
00205 XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
00206
00207 r0 = x2[0] - x1[0]; x1[0] += x2[0];
00208 r1 = x2[1] - x1[1]; x1[1] += x2[1];
00209 XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step;
00210
00211 x1-=8; x2-=8;
00212 }while(T<sincos_lookup0+1024);
00213 do{
00214 r0 = x1[6] - x2[6]; x1[6] += x2[6];
00215 r1 = x2[7] - x1[7]; x1[7] += x2[7];
00216 XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
00217
00218 r0 = x1[4] - x2[4]; x1[4] += x2[4];
00219 r1 = x2[5] - x1[5]; x1[5] += x2[5];
00220 XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step;
00221
00222 r0 = x1[2] - x2[2]; x1[2] += x2[2];
00223 r1 = x2[3] - x1[3]; x1[3] += x2[3];
00224 XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
00225
00226 r0 = x1[0] - x2[0]; x1[0] += x2[0];
00227 r1 = x2[1] - x1[1]; x1[1] += x2[1];
00228 XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step;
00229
00230 x1-=8; x2-=8;
00231 }while(T>sincos_lookup0);
00232 }
00233
00234 static void mdct_butterflies(DATA_TYPE *x,int points,int shift){
00235
00236 int stages=8-shift;
00237 int i,j;
00238
00239 for(i=0;--stages>0;i++){
00240 for(j=0;j<(1<<i);j++)
00241 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
00242 }
00243
00244 for(j=0;j<points;j+=32)
00245 mdct_butterfly_32(x+j);
00246
00247 }
00248
00249 const static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
00250
00251 static int bitrev12(int x){
00252 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
00253 }
00254
00255 static void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){
00256
00257 int bit = 0;
00258 DATA_TYPE *w0 = x;
00259 DATA_TYPE *w1 = x = w0+(n>>1);
00260 LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
00261 LOOKUP_T *Ttop = T+1024;
00262 DATA_TYPE r2;
00263
00264 do{
00265 DATA_TYPE r3 = bitrev12(bit++);
00266 DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
00267 DATA_TYPE *x1 = x + (r3>>shift);
00268
00269 REG_TYPE r0 = x0[0] + x1[0];
00270 REG_TYPE r1 = x1[1] - x0[1];
00271
00272 XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
00273
00274 w1 -= 4;
00275
00276 r0 = (x0[1] + x1[1])>>1;
00277 r1 = (x0[0] - x1[0])>>1;
00278 w0[0] = r0 + r2;
00279 w0[1] = r1 + r3;
00280 w1[2] = r0 - r2;
00281 w1[3] = r3 - r1;
00282
00283 r3 = bitrev12(bit++);
00284 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
00285 x1 = x + (r3>>shift);
00286
00287 r0 = x0[0] + x1[0];
00288 r1 = x1[1] - x0[1];
00289
00290 XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
00291
00292 r0 = (x0[1] + x1[1])>>1;
00293 r1 = (x0[0] - x1[0])>>1;
00294 w0[2] = r0 + r2;
00295 w0[3] = r1 + r3;
00296 w1[0] = r0 - r2;
00297 w1[1] = r3 - r1;
00298
00299 w0 += 4;
00300 }while(T<Ttop);
00301 do{
00302 DATA_TYPE r3 = bitrev12(bit++);
00303 DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
00304 DATA_TYPE *x1 = x + (r3>>shift);
00305
00306 REG_TYPE r0 = x0[0] + x1[0];
00307 REG_TYPE r1 = x1[1] - x0[1];
00308
00309 T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
00310
00311 w1 -= 4;
00312
00313 r0 = (x0[1] + x1[1])>>1;
00314 r1 = (x0[0] - x1[0])>>1;
00315 w0[0] = r0 + r2;
00316 w0[1] = r1 + r3;
00317 w1[2] = r0 - r2;
00318 w1[3] = r3 - r1;
00319
00320 r3 = bitrev12(bit++);
00321 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
00322 x1 = x + (r3>>shift);
00323
00324 r0 = x0[0] + x1[0];
00325 r1 = x1[1] - x0[1];
00326
00327 T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
00328
00329 r0 = (x0[1] + x1[1])>>1;
00330 r1 = (x0[0] - x1[0])>>1;
00331 w0[2] = r0 + r2;
00332 w0[3] = r1 + r3;
00333 w1[0] = r0 - r2;
00334 w1[1] = r3 - r1;
00335
00336 w0 += 4;
00337 }while(w0<w1);
00338 }
00339
00340 void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){
00341 int n2=n>>1;
00342 int n4=n>>2;
00343 DATA_TYPE *iX;
00344 DATA_TYPE *oX;
00345 LOOKUP_T *T;
00346 LOOKUP_T *V;
00347 int shift;
00348 int step;
00349
00350 for (shift=6;!(n&(1<<shift));shift++) {}
00351 shift=13-shift;
00352 step=2<<shift;
00353
00354
00355
00356 iX = in+n2-7;
00357 oX = out+n2+n4;
00358 T = sincos_lookup0;
00359
00360 do{
00361 oX-=4;
00362 XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
00363 XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
00364 iX-=8;
00365 }while(iX>=in+n4);
00366 do{
00367 oX-=4;
00368 XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
00369 XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
00370 iX-=8;
00371 }while(iX>=in);
00372
00373 iX = in+n2-8;
00374 oX = out+n2+n4;
00375 T = sincos_lookup0;
00376
00377 do{
00378 T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
00379 T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
00380 iX-=8;
00381 oX+=4;
00382 }while(iX>=in+n4);
00383 do{
00384 T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
00385 T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
00386 iX-=8;
00387 oX+=4;
00388 }while(iX>=in);
00389
00390 mdct_butterflies(out+n2,n2,shift);
00391 mdct_bitreverse(out,n,step,shift);
00392
00393
00394
00395 step>>=2;
00396 {
00397 DATA_TYPE *oX1=out+n2+n4;
00398 DATA_TYPE *oX2=out+n2+n4;
00399 DATA_TYPE *iX =out;
00400
00401 switch(step) {
00402 default: {
00403 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
00404 do{
00405 oX1-=4;
00406 XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
00407 XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
00408 XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
00409 XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
00410 oX2+=4;
00411 iX+=8;
00412 }while(iX<oX1);
00413 break;
00414 }
00415
00416 case 1: {
00417
00418 REG_TYPE t0,t1,v0,v1;
00419 T = sincos_lookup0;
00420 V = sincos_lookup1;
00421 t0 = (*T++)>>1;
00422 t1 = (*T++)>>1;
00423 do{
00424 oX1-=4;
00425
00426 t0 += (v0 = (*V++)>>1);
00427 t1 += (v1 = (*V++)>>1);
00428 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
00429 v0 += (t0 = (*T++)>>1);
00430 v1 += (t1 = (*T++)>>1);
00431 XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] );
00432 t0 += (v0 = (*V++)>>1);
00433 t1 += (v1 = (*V++)>>1);
00434 XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] );
00435 v0 += (t0 = (*T++)>>1);
00436 v1 += (t1 = (*T++)>>1);
00437 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
00438
00439 oX2+=4;
00440 iX+=8;
00441 }while(iX<oX1);
00442 break;
00443 }
00444
00445 case 0: {
00446
00447 REG_TYPE t0,t1,v0,v1,q0,q1;
00448 T = sincos_lookup0;
00449 V = sincos_lookup1;
00450 t0 = *T++;
00451 t1 = *T++;
00452 do{
00453 oX1-=4;
00454
00455 v0 = *V++;
00456 v1 = *V++;
00457 t0 += (q0 = (v0-t0)>>2);
00458 t1 += (q1 = (v1-t1)>>2);
00459 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
00460 t0 = v0-q0;
00461 t1 = v1-q1;
00462 XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] );
00463
00464 t0 = *T++;
00465 t1 = *T++;
00466 v0 += (q0 = (t0-v0)>>2);
00467 v1 += (q1 = (t1-v1)>>2);
00468 XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] );
00469 v0 = t0-q0;
00470 v1 = t1-q1;
00471 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
00472
00473 oX2+=4;
00474 iX+=8;
00475 }while(iX<oX1);
00476 break;
00477 }
00478 }
00479
00480 iX=out+n2+n4;
00481 oX1=out+n4;
00482 oX2=oX1;
00483
00484 do{
00485 oX1-=4;
00486 iX-=4;
00487
00488 oX2[0] = -(oX1[3] = iX[3]);
00489 oX2[1] = -(oX1[2] = iX[2]);
00490 oX2[2] = -(oX1[1] = iX[1]);
00491 oX2[3] = -(oX1[0] = iX[0]);
00492
00493 oX2+=4;
00494 }while(oX2<iX);
00495
00496 iX=out+n2+n4;
00497 oX1=out+n2+n4;
00498 oX2=out+n2;
00499
00500 do{
00501 oX1-=4;
00502 oX1[0]= iX[3];
00503 oX1[1]= iX[2];
00504 oX1[2]= iX[1];
00505 oX1[3]= iX[0];
00506 iX+=4;
00507 }while(oX1>oX2);
00508 }
00509 }
00510