Added a few "restrict" keywords and changed some divisions to shifts
[opus.git] / libcelt / celt.c
index b672240..2cefe4e 100644 (file)
@@ -1,4 +1,4 @@
-/* (C) 2007 Jean-Marc Valin, CSIRO
+/* (C) 2007-2008 Jean-Marc Valin, CSIRO
 */
 /*
    Redistribution and use in source and binary forms, with or without
 #include "quant_bands.h"
 #include "psy.h"
 #include "rate.h"
+#include "stack_alloc.h"
+
+static const celt_word16_t preemph = QCONST16(0.8f,15);
 
-#define MAX_PERIOD 1024
 
 /** Encoder state 
  @brief Encoder state
@@ -65,12 +67,10 @@ struct CELTEncoder {
    ec_byte_buffer buf;
    ec_enc         enc;
 
-   celt_word16_t preemph;
    celt_sig_t *preemph_memE;
    celt_sig_t *preemph_memD;
 
    kiss_fftr_cfg fft;
-   struct PsyDecay psy;
 
    celt_sig_t *in_mem;
    celt_sig_t *mdct_overlap;
@@ -79,9 +79,7 @@ struct CELTEncoder {
    celt_word16_t *oldBandE;
 };
 
-
-
-CELTEncoder *celt_encoder_create(const CELTMode *mode)
+CELTEncoder EXPORT *celt_encoder_create(const CELTMode *mode)
 {
    int N, B, C;
    CELTEncoder *st;
@@ -103,8 +101,7 @@ CELTEncoder *celt_encoder_create(const CELTMode *mode)
    ec_byte_writeinit(&st->buf);
    ec_enc_init(&st->enc,&st->buf);
 
-   st->fft = kiss_fftr_alloc(MAX_PERIOD, 0, 0);
-   psydecay_init(&st->psy, MAX_PERIOD/2, st->mode->Fs);
+   st->fft = pitch_state_alloc(MAX_PERIOD);
    
    st->in_mem = celt_alloc(N*C*sizeof(celt_sig_t));
    st->mdct_overlap = celt_alloc(N*C*sizeof(celt_sig_t));
@@ -112,14 +109,13 @@ CELTEncoder *celt_encoder_create(const CELTMode *mode)
 
    st->oldBandE = (celt_word16_t*)celt_alloc(C*mode->nbEBands*sizeof(celt_word16_t));
 
-   st->preemph = QCONST16(0.8f,15);
    st->preemph_memE = (celt_sig_t*)celt_alloc(C*sizeof(celt_sig_t));;
    st->preemph_memD = (celt_sig_t*)celt_alloc(C*sizeof(celt_sig_t));;
 
    return st;
 }
 
-void celt_encoder_destroy(CELTEncoder *st)
+void EXPORT celt_encoder_destroy(CELTEncoder *st)
 {
    if (st == NULL)
    {
@@ -131,8 +127,7 @@ void celt_encoder_destroy(CELTEncoder *st)
 
    ec_byte_writeclear(&st->buf);
 
-   kiss_fft_free(st->fft);
-   psydecay_clear(&st->psy);
+   pitch_state_free(st->fft);
 
    celt_free(st->in_mem);
    celt_free(st->mdct_overlap);
@@ -146,7 +141,7 @@ void celt_encoder_destroy(CELTEncoder *st)
    celt_free(st);
 }
 
-inline celt_int16_t SIG2INT16(celt_sig_t x)
+static inline celt_int16_t SIG2INT16(celt_sig_t x)
 {
    x = PSHR32(x, SIG_SHIFT);
    if (x>32767)
@@ -161,14 +156,14 @@ inline celt_int16_t SIG2INT16(celt_sig_t x)
 }
 
 /** Apply window and compute the MDCT for all sub-frames and all channels in a frame */
-static celt_word32_t compute_mdcts(const mdct_lookup *mdct_lookup, celt_word16_t *window, celt_sig_t *in, celt_sig_t *out, int N, int overlap, int B, int C)
+static celt_word32_t compute_mdcts(const mdct_lookup *lookup, const celt_word16_t * restrict window, celt_sig_t *in, celt_sig_t *out, int N, int overlap, int B, int C)
 {
    int i, c, N4;
    celt_word32_t E = 0;
-   VARDECL(celt_word32_t *x);
-   VARDECL(celt_word32_t *tmp);
+   VARDECL(celt_word32_tx);
+   VARDECL(celt_word32_ttmp);
    SAVE_STACK;
-   N4 = (N-overlap)/2;
+   N4 = (N-overlap)>>1;
    ALLOC(x, 2*N, celt_word32_t);
    ALLOC(tmp, N, celt_word32_t);
    for (c=0;c<C;c++)
@@ -190,7 +185,7 @@ static celt_word32_t compute_mdcts(const mdct_lookup *mdct_lookup, celt_word16_t
          }
          for (j=0;j<2*N;j++)
             E += MULT16_16(EXTRACT16(SHR32(x[j],SIG_SHIFT+4)),EXTRACT16(SHR32(x[j],SIG_SHIFT+4)));
-         mdct_forward(mdct_lookup, x, tmp);
+         mdct_forward(lookup, x, tmp);
          /* Interleaving the sub-frames */
          for (j=0;j<N;j++)
             out[C*B*j+C*i+c] = tmp[j];
@@ -201,15 +196,15 @@ static celt_word32_t compute_mdcts(const mdct_lookup *mdct_lookup, celt_word16_t
 }
 
 /** Compute the IMDCT and apply window for all sub-frames and all channels in a frame */
-static void compute_inv_mdcts(const mdct_lookup *mdct_lookup, celt_word16_t *window, celt_sig_t *X, celt_sig_t *out_mem, celt_sig_t *mdct_overlap, int N, int overlap, int B, int C)
+static void compute_inv_mdcts(const mdct_lookup *lookup, const celt_word16_t * restrict window, celt_sig_t *X, celt_sig_t *out_mem, celt_sig_t *mdct_overlap, int N, int overlap, int B, int C)
 {
    int i, c, N4;
-   VARDECL(celt_word32_t *x);
-   VARDECL(celt_word32_t *tmp);
+   VARDECL(celt_word32_tx);
+   VARDECL(celt_word32_ttmp);
    SAVE_STACK;
    ALLOC(x, 2*N, celt_word32_t);
    ALLOC(tmp, N, celt_word32_t);
-   N4 = (N-overlap)/2;
+   N4 = (N-overlap)>>1;
    for (c=0;c<C;c++)
    {
       for (i=0;i<B;i++)
@@ -218,7 +213,7 @@ static void compute_inv_mdcts(const mdct_lookup *mdct_lookup, celt_word16_t *win
          /* De-interleaving the sub-frames */
          for (j=0;j<N;j++)
             tmp[j] = X[C*B*j+C*i+c];
-         mdct_backward(mdct_lookup, tmp, x);
+         mdct_backward(lookup, tmp, x);
          /* The first and last part would need to be set to zero if we actually
             wanted to use them. */
          for (j=0;j<overlap;j++)
@@ -237,18 +232,18 @@ static void compute_inv_mdcts(const mdct_lookup *mdct_lookup, celt_word16_t *win
    RESTORE_STACK;
 }
 
-int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, int nbCompressedBytes)
+int EXPORT celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, int nbCompressedBytes)
 {
    int i, c, N, B, C, N4;
    int has_pitch;
    int pitch_index;
    celt_word32_t curr_power, pitch_power;
-   VARDECL(celt_sig_t *in);
-   VARDECL(celt_sig_t *freq);
-   VARDECL(celt_norm_t *X);
-   VARDECL(celt_norm_t *P);
-   VARDECL(celt_ener_t *bandE);
-   VARDECL(celt_pgain_t *gains);
+   VARDECL(celt_sig_tin);
+   VARDECL(celt_sig_tfreq);
+   VARDECL(celt_norm_tX);
+   VARDECL(celt_norm_tP);
+   VARDECL(celt_ener_tbandE);
+   VARDECL(celt_pgain_tgains);
    SAVE_STACK;
 
    if (check_mode(st->mode) != CELT_OK)
@@ -257,33 +252,31 @@ int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, i
    N = st->block_size;
    B = st->nb_blocks;
    C = st->mode->nbChannels;
-   ALLOC(in, (B+1)*C*N, celt_sig_t);
-   ALLOC(freq, B*C*N, celt_sig_t); /**< Interleaved signal MDCTs */
-   ALLOC(bandE,st->mode->nbEBands*C, celt_ener_t);
-   ALLOC(gains,st->mode->nbPBands, celt_pgain_t);
+   N4 = (N-st->overlap)>>1;
+   ALLOC(in, (B+1)*C*N-2*N4, celt_sig_t);
    
-   N4 = (N-st->overlap)/2;
 
    for (c=0;c<C;c++)
    {
-      for (i=0;i<N4;i++)
-         in[C*i+c] = 0;
       for (i=0;i<st->overlap;i++)
-         in[C*(i+N4)+c] = st->in_mem[C*i+c];
+         in[C*i+c] = st->in_mem[C*i+c];
       for (i=0;i<B*N;i++)
       {
          celt_sig_t tmp = SHL32(EXTEND32(pcm[C*i+c]), SIG_SHIFT);
-         in[C*(i+st->overlap+N4)+c] = SUB32(tmp, MULT16_32_Q15(st->preemph,st->preemph_memE[c]));
+         in[C*(i+st->overlap)+c] = SUB32(tmp, MULT16_32_Q15(preemph,st->preemph_memE[c]));
          st->preemph_memE[c] = tmp;
       }
-      for (i=N*(B+1)-N4;i<N*(B+1);i++)
-         in[C*i+c] = 0;
       for (i=0;i<st->overlap;i++)
-         st->in_mem[C*i+c] = in[C*(N*(B+1)-N4-st->overlap+i)+c];
+         st->in_mem[C*i+c] = in[C*(N*(B+1)-2*N4-st->overlap+i)+c];
    }
+   /* Pitch analysis: we do it early to save on the peak stack space */
+   find_spectral_pitch(st->fft, &st->mode->psy, in, st->out_mem, st->mode->window, st->overlap, MAX_PERIOD, (B+1)*N-2*N4, C, &pitch_index);
+
+   ALLOC(freq, B*C*N, celt_sig_t); /**< Interleaved signal MDCTs */
+   
    /*for (i=0;i<(B+1)*C*N;i++) printf ("%f(%d) ", in[i], i); printf ("\n");*/
    /* Compute MDCTs */
-   curr_power = compute_mdcts(&st->mode->mdct, st->mode->window, in+N4, freq, N, st->overlap, B, C);
+   curr_power = compute_mdcts(&st->mode->mdct, st->mode->window, in, freq, N, st->overlap, B, C);
 
 #if 0 /* Mask disabled until it can be made to do something useful */
    compute_mdct_masking(X, mask, B*C*N, st->Fs);
@@ -294,25 +287,12 @@ int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, i
    for (i=0;i<B*C*N;i++)
       mask[i] = 1/(.1+mask[i]);
 #endif
-   /* Pitch analysis */
-   for (c=0;c<C;c++)
-   {
-      for (i=0;i<N4;i++)
-      {
-         in[C*i+c] = 0;
-         in[C*(B*N+N-i-1)+c] = 0;
-      }
-      for (i=0;i<st->overlap;i++)
-      {
-         in[C*(i+N4)+c] = MULT16_32_Q15(st->mode->window[i], in[C*(i+N4)+c]);
-         in[C*(B*N+N-i-N4-1)+c] = MULT16_32_Q15(st->mode->window[i], in[C*(B*N+N-i-N4-1)+c]);
-      }
-   }
-   find_spectral_pitch(st->fft, &st->psy, in+N4, st->out_mem, MAX_PERIOD, (B+1)*N-2*N4, C, &pitch_index);
    
    /* Deferred allocation after find_spectral_pitch() to reduce the peak memory usage */
    ALLOC(X, B*C*N, celt_norm_t);         /**< Interleaved normalised MDCTs */
    ALLOC(P, B*C*N, celt_norm_t);         /**< Interleaved normalised pitch MDCTs*/
+   ALLOC(bandE,st->mode->nbEBands*C, celt_ener_t);
+   ALLOC(gains,st->mode->nbPBands, celt_pgain_t);
 
    /*printf ("%f %f\n", curr_power, pitch_power);*/
    /*int j;
@@ -340,10 +320,10 @@ int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, i
    }
 
    /* Check if we can safely use the pitch (i.e. effective gain isn't too high) */
-   if (curr_power + 1e5f*(1.f/SHL16(1,8)) < 10.f*pitch_power)
+   if (MULT16_32_Q15(QCONST16(.1f, 15),curr_power) + SHR16(10000,8) < pitch_power)
    {
       /* Normalise the pitch vector as well (discard the energies) */
-      VARDECL(celt_ener_t *bandEp);
+      VARDECL(celt_ener_tbandEp);
       ALLOC(bandEp, st->mode->nbEBands*st->mode->nbChannels, celt_ener_t);
       compute_band_energies(st->mode, freq, bandEp);
       normalise_bands(st->mode, freq, P, bandEp);
@@ -400,7 +380,7 @@ int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, i
          for (j=0;j<N;j++)
          {
             celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD+(i-B)*N)+C*j+c],
-                                   MULT16_32_Q15(st->preemph,st->preemph_memD[c]));
+                                   MULT16_32_Q15(preemph,st->preemph_memD[c]));
             st->preemph_memD[c] = tmp;
             pcm[C*i*N+C*j+c] = SIG2INT16(tmp);
          }
@@ -465,7 +445,6 @@ struct CELTDecoder {
    ec_byte_buffer buf;
    ec_enc         enc;
 
-   celt_word16_t preemph;
    celt_sig_t *preemph_memD;
 
    celt_sig_t *mdct_overlap;
@@ -476,7 +455,7 @@ struct CELTDecoder {
    int last_pitch_index;
 };
 
-CELTDecoder *celt_decoder_create(const CELTMode *mode)
+CELTDecoder EXPORT *celt_decoder_create(const CELTMode *mode)
 {
    int N, B, C;
    CELTDecoder *st;
@@ -500,14 +479,13 @@ CELTDecoder *celt_decoder_create(const CELTMode *mode)
    
    st->oldBandE = (celt_word16_t*)celt_alloc(C*mode->nbEBands*sizeof(celt_word16_t));
 
-   st->preemph = QCONST16(0.8f,15);
    st->preemph_memD = (celt_sig_t*)celt_alloc(C*sizeof(celt_sig_t));;
 
    st->last_pitch_index = 0;
    return st;
 }
 
-void celt_decoder_destroy(CELTDecoder *st)
+void EXPORT celt_decoder_destroy(CELTDecoder *st)
 {
    if (st == NULL)
    {
@@ -532,14 +510,13 @@ void celt_decoder_destroy(CELTDecoder *st)
     pitch period */
 static void celt_decode_lost(CELTDecoder *st, short *pcm)
 {
-   int i, c, N, B, C, N4;
+   int i, c, N, B, C;
    int pitch_index;
-   VARDECL(celt_sig_t *freq);
+   VARDECL(celt_sig_tfreq);
    SAVE_STACK;
    N = st->block_size;
    B = st->nb_blocks;
    C = st->mode->nbChannels;
-   N4 = (N-st->overlap)/2;
    ALLOC(freq,C*B*N, celt_sig_t);         /**< Interleaved signal MDCTs */
    
    pitch_index = st->last_pitch_index;
@@ -559,7 +536,7 @@ static void celt_decode_lost(CELTDecoder *st, short *pcm)
          for (j=0;j<N;j++)
          {
             celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD+(i-B)*N)+C*j+c],
-                                   MULT16_32_Q15(st->preemph,st->preemph_memD[c]));
+                                   MULT16_32_Q15(preemph,st->preemph_memD[c]));
             st->preemph_memD[c] = tmp;
             pcm[C*i*N+C*j+c] = SIG2INT16(tmp);
          }
@@ -568,18 +545,18 @@ static void celt_decode_lost(CELTDecoder *st, short *pcm)
    RESTORE_STACK;
 }
 
-int celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm)
+int EXPORT celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm)
 {
    int i, c, N, B, C, N4;
    int has_pitch;
    int pitch_index;
    ec_dec dec;
    ec_byte_buffer buf;
-   VARDECL(celt_sig_t *freq);
-   VARDECL(celt_norm_t *X);
-   VARDECL(celt_norm_t *P);
-   VARDECL(celt_ener_t *bandE);
-   VARDECL(celt_pgain_t *gains);
+   VARDECL(celt_sig_tfreq);
+   VARDECL(celt_norm_tX);
+   VARDECL(celt_norm_tP);
+   VARDECL(celt_ener_tbandE);
+   VARDECL(celt_pgain_tgains);
    SAVE_STACK;
 
    if (check_mode(st->mode) != CELT_OK)
@@ -588,7 +565,7 @@ int celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm
    N = st->block_size;
    B = st->nb_blocks;
    C = st->mode->nbChannels;
-   N4 = (N-st->overlap)/2;
+   N4 = (N-st->overlap)>>1;
 
    ALLOC(freq, C*B*N, celt_sig_t); /**< Interleaved signal MDCTs */
    ALLOC(X, C*B*N, celt_norm_t);         /**< Interleaved normalised MDCTs */
@@ -631,7 +608,7 @@ int celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm
    compute_mdcts(&st->mode->mdct, st->mode->window, st->out_mem+pitch_index*C, freq, N, st->overlap, B, C);
 
    {
-      VARDECL(celt_ener_t *bandEp);
+      VARDECL(celt_ener_tbandEp);
       ALLOC(bandEp, st->mode->nbEBands*C, celt_ener_t);
       compute_band_energies(st->mode, freq, bandEp);
       normalise_bands(st->mode, freq, P, bandEp);
@@ -667,7 +644,7 @@ int celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm
          for (j=0;j<N;j++)
          {
             celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD+(i-B)*N)+C*j+c],
-                                   MULT16_32_Q15(st->preemph,st->preemph_memD[c]));
+                                   MULT16_32_Q15(preemph,st->preemph_memD[c]));
             st->preemph_memD[c] = tmp;
             pcm[C*i*N+C*j+c] = SIG2INT16(tmp);
          }