Remove unused tests from configure.ac
[speexdsp.git] / tmv / ltp_tm.h
1 /* Copyright (C) 2007 Hong Zhiqian */\r
2 /**\r
3    @file ltp_tm.h\r
4    @author Hong Zhiqian\r
5    @brief Various compatibility routines for Speex (TriMedia version)\r
6 */\r
7 /*\r
8    Redistribution and use in source and binary forms, with or without\r
9    modification, are permitted provided that the following conditions\r
10    are met:\r
11    \r
12    - Redistributions of source code must retain the above copyright\r
13    notice, this list of conditions and the following disclaimer.\r
14    \r
15    - Redistributions in binary form must reproduce the above copyright\r
16    notice, this list of conditions and the following disclaimer in the\r
17    documentation and/or other materials provided with the distribution.\r
18    \r
19    - Neither the name of the Xiph.org Foundation nor the names of its\r
20    contributors may be used to endorse or promote products derived from\r
21    this software without specific prior written permission.\r
22    \r
23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
24    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
26    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR\r
27    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
28    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
29    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
30    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
31    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
32    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
33    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
34 */\r
35 #include <ops/custom_defs.h>\r
36 #include "profile_tm.h"\r
37 \r
38 #ifdef FIXED_POINT\r
39 \r
40 #define OVERRIDE_INNER_PROD\r
41 Int32 inner_prod(const Int16 * restrict x, const Int16 * restrict y, int len)\r
42 {\r
43         register int sum = 0;\r
44 \r
45         INNERPROD_START();\r
46 \r
47         if ( (int)x & 0x03 == 0 && (int)y & 0x03 == 0 )\r
48         {\r
49                 register int i;\r
50 \r
51                 len >>= 1;\r
52                 for ( i=0 ; i<len ; i+=4 )\r
53                 {\r
54                         register int x0, x1, y0, y1, x2, x3, y2, y3;\r
55 \r
56                         x0 = ld32x(x,i);\r
57                         y0 = ld32x(x,i);\r
58                         x1 = ld32x(x,i+1);\r
59                         y1 = ld32x(y,i+1);\r
60                         sum += (ifir16(x0,y0) + ifir16(x1,y1)) >> 6;\r
61 \r
62                         x2 = ld32x(x,i+2);\r
63                         y2 = ld32x(x,i+2);\r
64                         x3 = ld32x(x,i+3);\r
65                         y3 = ld32x(x,i+3);\r
66                         sum += (ifir16(x2,y2) + ifir16(x3,y3)) >> 6;\r
67 \r
68                 }\r
69         } else\r
70         {\r
71                 len >>= 3;\r
72                 while( len-- )\r
73                 {\r
74                         register int x0, x1, x2, x3, y0, y1, y2, y3;\r
75 \r
76                         x0 =    pack16lsb(x[0],x[1]);\r
77                         y0 =    pack16lsb(y[0],y[1]);\r
78                         x1 =    pack16lsb(x[2],x[3]);\r
79                         y1 =    pack16lsb(y[2],y[3]);\r
80                         sum     +=      (ifir16(x0,y0) + ifir16(x1,y1)) >> 6;\r
81 \r
82                         x2 =    pack16lsb(x[4],x[5]);\r
83                         y2 =    pack16lsb(y[4],y[5]);\r
84                         x3 =    pack16lsb(x[6],x[7]);\r
85                         y3 =    pack16lsb(y[6],y[7]);\r
86                         sum     +=      (ifir16(x2,y2) + ifir16(x3,y3)) >> 6;\r
87 \r
88                         x += 8;\r
89                         y += 8;\r
90                 }\r
91         }\r
92 \r
93         INNERPROD_STOP();\r
94         return sum;\r
95 }\r
96 \r
97 #define OVERRIDE_PITCH_XCORR\r
98 void pitch_xcorr(const Int16 *_x, const Int16 *_y, Int32 *corr, int len, int nb_pitch, char *stack)\r
99 {\r
100         register int sum_1, sum_2, sum_3, sum_4;\r
101         register int y10, y32, y54, y76, y21, y43, y65;\r
102         register int x10, x32;\r
103         register int i, j, k, limit;\r
104         \r
105         TMDEBUG_ALIGNMEM(_x);\r
106         TMDEBUG_ALIGNMEM(_y);\r
107 \r
108         PITCHXCORR_START();\r
109 \r
110         limit   = nb_pitch >> 1;\r
111         len             >>= 1;\r
112 \r
113         for (i=0 ; i<limit ; i+=2 )\r
114         {\r
115                 sum_1 = sum_2 = sum_3 = sum_4 = 0;\r
116                 \r
117                 y10     = ld32x(_y,i);\r
118                 y32     = ld32x(_y,i+1);\r
119                 \r
120                 for ( j=0 ; j<len ; j+=2 )\r
121                 {\r
122                         x10 = ld32x(_x,j);\r
123                         x32 = ld32x(_x,j+1);\r
124                         y54 = ld32x(_y,i+j+2);\r
125                         y76 = ld32x(_y,i+j+3);\r
126                         \r
127                         sum_1 += (ifir16(x10,y10) + ifir16(x32,y32)) >> 6;\r
128                         sum_3 += (ifir16(x10,y32) + ifir16(x32,y54)) >> 6;\r
129 \r
130                         y21 = funshift2(y32,y10);\r
131                         y43 = funshift2(y54,y32);\r
132                         y65 = funshift2(y76,y54);\r
133 \r
134                         sum_2 += (ifir16(x10,y21) + ifir16(x32,y43)) >> 6;\r
135                         sum_4 += (ifir16(x10,y43) + ifir16(x32,y65)) >> 6;\r
136 \r
137                         y10 = y54;\r
138                         y32 = y76;\r
139 \r
140                 }\r
141 \r
142                 k = i << 1;\r
143                 corr[nb_pitch-1-k]=sum_1;\r
144                 corr[nb_pitch-2-k]=sum_2;\r
145                 corr[nb_pitch-3-k]=sum_3;\r
146                 corr[nb_pitch-4-k]=sum_4;\r
147         }\r
148 \r
149 #ifndef REMARK_ON\r
150         (void)stack;\r
151 #endif\r
152 \r
153         PITCHXCORR_STOP();\r
154 }\r
155 \r
156 #ifndef ttisim\r
157 #define OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ\r
158 static int pitch_gain_search_3tap_vq\r
159 (\r
160         const signed char       *gain_cdbk,\r
161         int                                     gain_cdbk_size,\r
162         Int16                           *C16,\r
163         Int16                           max_gain\r
164 )\r
165 {\r
166         register int    pp = 0x00400040, p=64;\r
167         register int    g10, g2, g20, g21, g02, g22, g01;\r
168         register int    cb0, cb1, cb2, cb5432;\r
169         register int    C10, C32, C54, C76, C98, C83, C2;\r
170         register int    acc0, acc1, acc2, acc3, sum, gsum, bsum=-VERY_LARGE32;\r
171         register int    i, best_cdbk=0;\r
172         register Int16  tmp;\r
173 \r
174         TMDEBUG_ALIGNMEM(C16);\r
175         TMDEBUG_ALIGNMEM(gain_cdbk+2);\r
176 \r
177         PITCHGAINSEARCH3TAPVQ_START();\r
178 \r
179         tmp  = ild16(gain_cdbk);\r
180         C98      = ld32x(C16,4);\r
181         C32      = ld32x(C16,1);\r
182         C10  = ld32(C16);\r
183         C54      = ld32x(C16,2);\r
184         C76      = ld32x(C16,3);\r
185 \r
186         cb0  = sex8(tmp);\r
187         cb1      = sex8(tmp>>8);\r
188         C83      = funshift2(C98,C32);\r
189         C2       = sex16(C32);\r
190         gain_cdbk += 2;\r
191 \r
192 \r
193 #if (TM_UNROLL && TM_UNROLL_PITCHGAINSEARCH3TAPVQ > 0)\r
194 #pragma TCS_unroll=4\r
195 #pragma TCS_unrollexact=1\r
196 #endif\r
197         for ( i=0 ; i<gain_cdbk_size ; ++i ) \r
198         {\r
199          cb5432 = ld32x(gain_cdbk,i);\r
200                  cb2    = sex8(cb5432);\r
201                  gsum   = sex8(cb5432>>8);\r
202                  sum    = 0;\r
203                  \r
204                  g10    =  pack16lsb(cb1 + 32, cb0 + 32);\r
205                  g2             =  cb2 + 32;\r
206                  g02    =  pack16lsb(g10, g2);\r
207                  acc0   =  dspidualmul(g10,pp);\r
208                  sum    += ifir16(acc0,C10);\r
209                  sum    += p * g2 * C2;\r
210 \r
211                  g22    =  pack16lsb(g02, g02);\r
212                  g01    =  funshift2(g10, g10);\r
213 \r
214                  acc1   =  dspidualmul(g22, g01);\r
215                  sum    -= ifir16(acc1, C54);\r
216                  acc2   =  dspidualmul(g10, g10);\r
217                  sum    -= ifir16(acc2, C76);\r
218 \r
219                  g20    =  pack16lsb(g2, g10);\r
220                  g21    =  funshift2(g2, g10);\r
221                  acc3   =  dspidualmul(g20, g21);\r
222                  sum    -= ifir16(acc3, C83);\r
223         \r
224 \r
225                 if ( sum>bsum && gsum<=max_gain ) \r
226                 {       bsum = sum;\r
227                         best_cdbk=i;\r
228                 }\r
229 \r
230                 cb0     = sex8(cb5432 >> 16);\r
231                 cb1     = asri(24,cb5432);\r
232         }\r
233 #if (TM_UNROLL && TM_UNROLL_PITCHGAINSEARCH3TAPVQ > 0)\r
234 #pragma TCS_unrollexact=0\r
235 #pragma TCS_unroll=0\r
236 #endif\r
237 \r
238         PITCHGAINSEARCH3TAPVQ_STOP();\r
239         return best_cdbk;\r
240 }\r
241 #endif\r
242 \r
243 #define OVERRIDE_COMPUTE_PITCH_ERROR\r
244 #ifndef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ\r
245 inline Int32 compute_pitch_error(Int16 *C, Int16 *g, Int16 pitch_control)\r
246 {\r
247         register int c10, c32, c54, c76, c98, c83;\r
248         register int g10, g32, g02, g22, g01, g21, g20;\r
249         register int pp, tmp0, tmp1, tmp2, tmp3;\r
250         register int sum = 0;\r
251 \r
252         \r
253         COMPUTEPITCHERROR_START();\r
254 \r
255         g10  =  ld32(g);\r
256         g32  =  ld32x(g,1);\r
257         pp   =  pack16lsb(pitch_control,pitch_control);\r
258         c10  =  ld32(C);\r
259         c32  =  ld32x(C,1);\r
260         g02  =  pack16lsb(g10,g32);\r
261         g22      =  pack16lsb(g32,g32);\r
262         g01  =  funshift2(g10,g10);\r
263         tmp0 =  dspidualmul(g10,pp);\r
264         sum  += ifir16(tmp0, c10);\r
265         sum  += pitch_control * sex16(g32) * sex16(c32);\r
266         c54  =  ld32x(C,2);\r
267         c76  =  ld32x(C,3);\r
268         c98  =  ld32x(C,4);\r
269         tmp1 =  dspidualmul(g22,g01);\r
270         sum  -= ifir16(tmp1, c54);\r
271         tmp2 =  dspidualmul(g10,g10);\r
272         sum  -= ifir16(tmp2,c76);\r
273         c83  =  funshift2(c98,c32);\r
274         g20      =  funshift2(g02,g02);\r
275         g21  =  funshift2(g02,g10);\r
276         tmp3 =  dspidualmul(g20,g21);\r
277         sum      -= ifir16(tmp3,c83);\r
278 \r
279         COMPUTEPITCHERROR_STOP();\r
280     return sum;\r
281 }\r
282 #endif\r
283 \r
284 #define OVERRIDE_OPEN_LOOP_NBEST_PITCH\r
285 void open_loop_nbest_pitch(Int16 *sw, int start, int end, int len, int *pitch, Int16 *gain, int N, char *stack)\r
286 {\r
287         VARDECL(int *best_score);\r
288         VARDECL(int *best_ener);\r
289         VARDECL(Int32 *corr);\r
290         VARDECL(Int16 *corr16);\r
291         VARDECL(Int16 *ener16);\r
292         register int i, j, k, l, N4, N2;\r
293         register int _sw10, _sw32, _s0, _s2, limit;\r
294         register int *energy;\r
295         register int cshift=0, eshift=0;\r
296         register int scaledown = 0;\r
297         register int e0, _energy0;\r
298 \r
299         ALLOC(corr16, end-start+1, Int16);\r
300         ALLOC(ener16, end-start+1, Int16);\r
301         ALLOC(corr, end-start+1, Int32);\r
302         ALLOC(best_score, N, int);\r
303         ALLOC(best_ener, N, int);\r
304         energy = corr;\r
305         N4 = N << 2;\r
306         N2 = N >> 1;\r
307 \r
308         TMDEBUG_ALIGNMEM(sw);\r
309         TMDEBUG_ALIGNMEM(pitch);\r
310         TMDEBUG_ALIGNMEM(gain);\r
311         TMDEBUG_ALIGNMEM(best_score);\r
312         TMDEBUG_ALIGNMEM(best_ener);\r
313         TMDEBUG_ALIGNMEM(corr16);\r
314         TMDEBUG_ALIGNMEM(ener16);\r
315 \r
316         OPENLOOPNBESTPITCH_START();\r
317 \r
318         for ( i=0 ; i<N4 ; i+=4 )\r
319         {       st32d(i,best_score,-1);\r
320                 st32d(i,best_ener,0);\r
321                 st32d(i,pitch,start);\r
322         }\r
323 \r
324         for ( j=asri(1,-end) ; j<N2 ; ++j )\r
325         {       register int _sw10;\r
326 \r
327                 _sw10 = ld32x(sw,j);\r
328                 _sw10 = dspidualabs(_sw10);\r
329 \r
330                 if ( _sw10 & 0xC000C000 )\r
331                 {       scaledown = 1;\r
332                         break;\r
333                 }\r
334         }\r
335 \r
336         if ( scaledown )\r
337         {\r
338                 for ( j=asri(1,-end),k=asli(1,-end) ; j<N2 ; ++j,k+=4 )\r
339                 {       register int _sw10;\r
340                 \r
341                         _sw10 = ld32x(sw,j);\r
342                         _sw10 = dualasr(_sw10,1);\r
343                         st32d(k, sw, _sw10);\r
344                 }\r
345         }      \r
346 \r
347         energy[0] = _energy0 = inner_prod(sw-start, sw-start, len);\r
348         e0 = inner_prod(sw, sw, len);\r
349 \r
350         j=asri(1,-start-1); k=j+20;\r
351         _sw10 = ld32x(sw,j);\r
352         _sw32 = ld32x(sw,k);\r
353         limit = end-1-start;\r
354 \r
355         for ( i=1,--j,--k ; i<limit ; i+=2,--j,--k )\r
356         {       register int _energy1, __sw10, __sw32, __s0, __s2;\r
357       \r
358                 _s0    = sex16(_sw10);\r
359                 _s2        = sex16(_sw32);\r
360                 _energy1 = (_energy0 + ((_s0 * _s0) >> 6)) -  ((_s2 * _s2) >> 6);\r
361                 _energy0 = imax(0,_energy1);\r
362                 energy[i] = _energy0;\r
363                 __sw10 = ld32x(sw,j);\r
364                 __sw32 = ld32x(sw,k);\r
365                 __s0   = asri(16,__sw10);\r
366                 __s2   = asri(16,__sw32);\r
367                 _energy1 = (_energy0 + ((__s0 * __s0) >> 6)) -  ((__s2 * __s2) >> 6);\r
368                 _energy0 = imax(0,_energy1);\r
369                 energy[i+1] = _energy0;\r
370                 _sw10 = __sw10;\r
371                 _sw32 = __sw32;\r
372         }\r
373 \r
374         _s0    = sex16(_sw10);\r
375         _s2        = sex16(_sw32);\r
376         _energy0 = imax(0,(_energy0 + ((_s0 * _s0) >> 6)) -  ((_s2 * _s2) >> 6));\r
377         energy[i] = _energy0;\r
378 \r
379 \r
380         eshift = normalize16(energy, ener16, 32766, end-start+1); \r
381         /* In fixed-point, this actually overrites the energy array (aliased to corr) */\r
382         pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);\r
383         /* Normalize to 180 so we can square it and it still fits in 16 bits */\r
384         cshift = normalize16(corr, corr16, 180, end-start+1);\r
385         /* If we scaled weighted input down, we need to scale it up again (OK, so we've just lost the LSB, who cares?) */\r
386    \r
387         if ( scaledown )\r
388         {\r
389                 for ( j=asri(1,-end),k=asli(1,-end) ; j<N2 ; ++j,k+=4 )\r
390                 {       register int _sw10;\r
391                         \r
392                         _sw10 = ld32x(sw,j);\r
393                         _sw10 = dualasl(_sw10,1);\r
394                         st32d(k, sw, _sw10);\r
395                 }\r
396         }      \r
397 \r
398         /* Search for the best pitch prediction gain */\r
399         for ( i=start,l=0 ; i<end ; i+=2,++l )\r
400         {       register int _corr16, _c0, _c1;\r
401                 register int _ener16, _e0, _e1;\r
402 \r
403                 _corr16 = ld32x(corr16,l);\r
404                 _corr16 = dspidualmul(_corr16,_corr16);\r
405                 _c0     = sex16(_corr16);\r
406                 _c1     = asri(16,_corr16);\r
407 \r
408                 _ener16 = ld32x(ener16,l);\r
409                 _ener16 = dspidualadd(_ener16,0x00010001);\r
410                 _e0         = sex16(_ener16);\r
411                 _e1     = asri(16,_ener16);\r
412 \r
413       /* Instead of dividing the tmp by the energy, we multiply on the other side */\r
414       \r
415                 if ( (_c0 * best_ener[N-1]) > (best_score[N-1] * _e0) )\r
416                 {       \r
417                         best_score[N-1] = _c0;\r
418                         best_ener[N-1] = _e0;\r
419                         pitch[N-1] = i;\r
420 \r
421                         for( j=0 ; j<N-1 ; ++j )\r
422                         {       if ( (_c0 * best_ener[j]) > best_score[j] * _e0 )\r
423                                 {       for( k=N-1 ; k>j ; --k )\r
424                                         {\r
425                                                 best_score[k]=best_score[k-1];\r
426                                                 best_ener[k]=best_ener[k-1];\r
427                                                 pitch[k]=pitch[k-1];\r
428                                         }\r
429 \r
430                                 best_score[j]=_c0;\r
431                                         best_ener[j]=_e0;\r
432                                         pitch[j]=i;\r
433                                         break;\r
434                                 }\r
435                         }\r
436                 }\r
437 \r
438                 if ( (_c1 * best_ener[N-1]) > (best_score[N-1] * _e1) )\r
439                 {       \r
440                         best_score[N-1] = _c1;\r
441                         best_ener[N-1] = _e1;\r
442                         pitch[N-1] = i+1;\r
443 \r
444                         for( j=0 ; j<N-1 ; ++j )\r
445                         {       if ( (_c1 * best_ener[j]) > best_score[j] * _e1 )\r
446                                 {       for( k=N-1 ; k>j ; --k )\r
447                                         {\r
448                                                 best_score[k]=best_score[k-1];\r
449                                                 best_ener[k]=best_ener[k-1];\r
450                                                 pitch[k]=pitch[k-1];\r
451                                         }\r
452 \r
453                                 best_score[j]=_c1;\r
454                                         best_ener[j]=_e1;\r
455                                         pitch[j]=i+1;\r
456                                         break;\r
457                                 }\r
458                         }\r
459                 }\r
460    }\r
461    \r
462    /* Compute open-loop gain if necessary */\r
463    if (gain)\r
464    {\r
465                 for (j=0;j<N;j++)\r
466                 {\r
467                         spx_word16_t g;\r
468                         i=pitch[j];\r
469                         g = DIV32(SHL32(EXTEND32(corr16[i-start]),cshift), 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(SHL32(EXTEND32(ener16[i-start]),eshift))),6));\r
470                         gain[j] = imax(0,g);\r
471                 }\r
472         }\r
473 \r
474         OPENLOOPNBESTPITCH_STOP();\r
475 }\r
476 \r
477 \r
478 #endif\r
479 \r