Add partial_tukey and punchout_tukey apodization functions
authorMartijn van Beurden <mvanb1@gmail.com>
Sun, 10 Aug 2014 08:59:29 +0000 (10:59 +0200)
committerErik de Castro Lopo <erikd@mega-nerd.com>
Mon, 22 Sep 2014 08:03:24 +0000 (18:03 +1000)
Adds two new apodization functions that seem to perform better than
the apodization functions currently in the codebase and fixes three
existing windows as well.

Its important to note that this patch only affects the encoder stage
that evaluates various possible predictors. Audio encoded with these
new windows will still decode with existing legacy decoders.

= Theory =
These functions are used to window the  audio data at the predictor
stage. These news functions enable the use of only part of the signal
to generate a predictor. This helps  because short transients can
introduce noise into the predictor. The  predictor becomes very good
at prediciting one part of the  signal, instead of mediocre for the
whole block.

Signed-off-by: Erik de Castro Lopo <erikd@mega-nerd.com>
doc/html/documentation_tools_flac.html
include/FLAC/stream_encoder.h
man/flac.1
man/flac.sgml
src/libFLAC/include/private/window.h
src/libFLAC/include/protected/stream_encoder.h
src/libFLAC/stream_encoder.c
src/libFLAC/window.c

index 6fa18da..9f85a25 100644 (file)
                                        <span class="argument">-A "function"</span>, <span class="argument">--apodization="function"</span>
                                </td>
                                <td>
-                                       Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), welch.<br />
+                                       Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), partial_tukey(n[/ov[/P]]), punchout_tukey(n[/ov[/P]]), welch.<br />
                                        For gauss(STDDEV), STDDEV is the standard deviation (0&lt;STDDEV&lt;=0.5).<br />
                                        For tukey(P), P specifies the fraction of the window that is tapered (0&lt;=P&lt;=1; P=0 corresponds to "rectangle" and P=1 corresponds to "hann").<br />
-                                       Please note that for both P as well as STDDEV, the use of a point or comma as decimal separator is locale-dependent.<br />
+                                       For partial_tukey(n) and punchout_tukey(n), n apodization functions are added that span different parts of each block. Values of 2 to 6 seem to yield sane results. If necessary, an overlap can be specified, as can be the taper parameter, for example partial_tukey(2/0.2) or partial_tukey(2/0.2/0.5). ov should be smaller than 1 and can be negative.<br />
+                                       Please note that P, STDDEV and ov are locale specific, so a comma as decimal separator might be required instead of a dot.<br />
                                        More than one -A option (up to 32) may be used.  Any function that is specified erroneously is silently dropped.  The encoder chooses suitable defaults in the absence of any -A options; any -A option specified replaces the default(s).<br />
                                        When more than one function is specified, then for every subframe the encoder will try each of them separately and choose the window that results in the smallest compressed subframe.  Multiple functions can greatly increase the encoding time.<br />
                                </td>
index 6f7796b..dbfee63 100644 (file)
@@ -920,7 +920,8 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_loose_mid_side_stereo(FLAC__StreamE
  * The available functions are \c bartlett, \c bartlett_hann,
  * \c blackman, \c blackman_harris_4term_92db, \c connes, \c flattop,
  * \c gauss(STDDEV), \c hamming, \c hann, \c kaiser_bessel, \c nuttall,
- * \c rectangle, \c triangle, \c tukey(P), \c welch.
+ * \c rectangle, \c triangle, \c tukey(P), \c partial_tukey(n[/ov[/P]]),
+ * \c punchout_tukey(n[/ov[/P]]), \c welch.
  *
  * For \c gauss(STDDEV), STDDEV specifies the standard deviation
  * (0<STDDEV<=0.5).
@@ -929,6 +930,24 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_loose_mid_side_stereo(FLAC__StreamE
  * tapered (0<=P<=1).  P=0 corresponds to \c rectangle and P=1
  * corresponds to \c hann.
  *
+ * Specifying \c partial_tukey or \c punchout_tukey works a little
+ * different. These do not specify a single apodization function, but
+ * a series of them with some overlap. partial_tukey specifies a series
+ * of small windows (all treated separately) while punchout_tukey
+ * specifies a series of windows that have a hole in them. In this way,
+ * the predictor is constructed with only a part of the block, which
+ * helps in case a block consists of dissimilar parts.
+ *
+ * The three parameters that can be specified for the functions are
+ * n, ov and P. n is the number of functions to add, ov is the overlap
+ * of the windows in case of partial_tukey and the overlap in the gaps
+ * in case of punchout_tukey. P is the fraction of the window that is
+ * tapered, like with a regular tukey window. The function can be
+ * specified with only a number, a number and an overlap, or a number
+ * an overlap and a P, for example, partial_tukey(3), partial_tukey(3/0.3)
+ * and partial_tukey(3/0.3/0.5) are all valid. ov should be smaller than 1
+ * and can be negative.
+ *
  * Example specifications are \c "blackman" or
  * \c "hann;triangle;tukey(0.5);tukey(0.25);tukey(0.125)"
  *
@@ -941,7 +960,9 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_loose_mid_side_stereo(FLAC__StreamE
  * results in the smallest compressed subframe.
  *
  * Note that each function specified causes the encoder to occupy a
- * floating point array in which to store the window.
+ * floating point array in which to store the window. Also note that the
+ * values of P, STDDEV and ov are locale-specific, so if the comma
+ * separator specified by the locale is a comma, a comma should be used.
  *
  * \default \c "tukey(0.5)"
  * \param  encoder        An encoder instance to set.
index aaa9b05..a361418 100644 (file)
@@ -280,12 +280,16 @@ Highest compression.  Currently synonymous with -8.
 Do exhaustive model search (expensive!)
 .TP
 \fB-A \fIfunction\fB, --apodization=\fIfunction\fB\fR
-Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), welch.
+Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), partial_tukey(n[/ov[/P]]), punchout_tukey(n[/ov[/P]]), welch.
 
 For gauss(STDDEV), STDDEV is the standard deviation (0<STDDEV<=0.5).
 
 For tukey(P), P specifies the fraction of the window that is tapered (0<=P<=1; P=0 corresponds to "rectangle" and P=1 corresponds to "hann").
 
+For partial_tukey(n) and punchout_tukey(n), n apodization functions are added that span different parts of each block. Values of 2 to 6 seem to yield sane results. If necessary, an overlap can be specified, as can be the taper parameter, for example partial_tukey(2/0.2) or partial_tukey(2/0.2/0.5). ov should be smaller than 1 and can be negative.
+
+Please note that P, STDDEV and ov are locale specific, so a comma as decimal separator might be required instead of a dot.
+
 More than one -A option (up to 32) may be used.  Any function that is specified erroneously is silently dropped.  The encoder chooses suitable defaults in the absence of any -A options; any -A option specified replaces the default(s).
 
 When more than one function is specified, then for every subframe the encoder will try each of them separately and choose the window that results in the smallest compressed subframe.  Multiple functions can greatly increase the encoding time.
index 7fa410b..37525ce 100644 (file)
          <term><option>-A</option> <replaceable>function</replaceable>, <option>--apodization</option>=<replaceable>function</replaceable></term>
 
          <listitem>
-           <para>Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), welch.</para>
+           <para>Window audio data with given the apodization function.  The functions are: bartlett, bartlett_hann, blackman, blackman_harris_4term_92db, connes, flattop, gauss(STDDEV), hamming, hann, kaiser_bessel, nuttall, rectangle, triangle, tukey(P), partial_tukey(n[/ov[/P]]), punchout_tukey(n[/ov[/P]]), welch.</para>
            <para>For gauss(STDDEV), STDDEV is the standard deviation (0&lt;STDDEV&lt;=0.5).</para>
            <para>For tukey(P), P specifies the fraction of the window that is tapered (0&lt;=P&lt;=1; P=0 corresponds to "rectangle" and P=1 corresponds to "hann").</para>
+           <para>For partial_tukey(n) and punchout_tukey(n), n apodization functions are added that span different parts of each block. Values of 2 to 6 seem to yield sane results. If necessary, an overlap can be specified, as can be the taper parameter, for example partial_tukey(2/0.2) or partial_tukey(2/0.2/0.5). ov should be smaller than 1 and can be negative.</para>
+           <para>Please note that P, STDDEV and ov are locale specific, so a comma as decimal separator might be required instead of a dot.</para>
            <para>More than one -A option (up to 32) may be used.  Any function that is specified erroneously is silently dropped.  The encoder chooses suitable defaults in the absence of any -A options; any -A option specified replaces the default(s).</para>
            <para>When more than one function is specified, then for every subframe the encoder will try each of them separately and choose the window that results in the smallest compressed subframe.  Multiple functions can greatly increase the encoding time.</para>
          </listitem>
index e712b4a..acf5086 100644 (file)
@@ -65,6 +65,8 @@ void FLAC__window_nuttall(FLAC__real *window, const FLAC__int32 L);
 void FLAC__window_rectangle(FLAC__real *window, const FLAC__int32 L);
 void FLAC__window_triangle(FLAC__real *window, const FLAC__int32 L);
 void FLAC__window_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p);
+void FLAC__window_partial_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end);
+void FLAC__window_punchout_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end);
 void FLAC__window_welch(FLAC__real *window, const FLAC__int32 L);
 
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
index 011adc0..38796cb 100644 (file)
@@ -59,6 +59,8 @@ typedef enum {
        FLAC__APODIZATION_RECTANGLE,
        FLAC__APODIZATION_TRIANGLE,
        FLAC__APODIZATION_TUKEY,
+       FLAC__APODIZATION_PARTIAL_TUKEY,
+       FLAC__APODIZATION_PUNCHOUT_TUKEY,
        FLAC__APODIZATION_WELCH
 } FLAC__ApodizationFunction;
 
@@ -71,6 +73,11 @@ typedef struct {
                struct {
                        FLAC__real p;
                } tukey;
+               struct {
+                       FLAC__real p;
+                       FLAC__real start;
+                       FLAC__real end;
+               } multiple_tukey;
        } parameters;
 } FLAC__ApodizationSpecification;
 
index 3e33336..6f46d78 100644 (file)
@@ -1664,6 +1664,48 @@ FLAC_API FLAC__bool FLAC__stream_encoder_set_apodization(FLAC__StreamEncoder *en
                                encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY;
                        }
                }
+               else if(n>15   && 0 == strncmp("partial_tukey("       , specification, 14)) {
+                       FLAC__int32 tukey_parts = (FLAC__int32)strtod(specification+14, 0);
+                       const char *si_1 = strchr(specification, '/');
+                       FLAC__real overlap = si_1?flac_min((FLAC__real)strtod(si_1+1, 0),0.99f):0.1f;
+                       FLAC__real overlap_units = 1.0f/(1.0f - overlap) - 1.0f;
+                       const char *si_2 = strchr((si_1?(si_1+1):specification), '/');
+                       FLAC__real tukey_p = si_2?(FLAC__real)strtod(si_2+1, 0):0.2f;
+
+                       if (tukey_parts <= 1) {
+                               encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.tukey.p = tukey_p;
+                               encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY;
+                       }else if (encoder->protected_->num_apodizations + tukey_parts < 32){
+                               FLAC__int32 m;
+                               for(m = 0; m < tukey_parts; m++){
+                                       encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.p = tukey_p;
+                                       encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.start = m/(tukey_parts+overlap_units);
+                                       encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.end = (m+1+overlap_units)/(tukey_parts+overlap_units);
+                                       encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_PARTIAL_TUKEY;
+                               }
+                       }
+               }
+               else if(n>16   && 0 == strncmp("punchout_tukey("       , specification, 15)) {
+                       FLAC__int32 tukey_parts = (FLAC__int32)strtod(specification+15, 0);
+                       const char *si_1 = strchr(specification, '/');
+                       FLAC__real overlap = si_1?flac_min((FLAC__real)strtod(si_1+1, 0),0.99f):0.2f;
+                       FLAC__real overlap_units = 1.0f/(1.0f - overlap) - 1.0f;
+                       const char *si_2 = strchr((si_1?(si_1+1):specification), '/');
+                       FLAC__real tukey_p = si_2?(FLAC__real)strtod(si_2+1, 0):0.2f;
+
+                       if (tukey_parts <= 1) {
+                               encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.tukey.p = tukey_p;
+                               encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY;
+                       }else if (encoder->protected_->num_apodizations + tukey_parts < 32){
+                               FLAC__int32 m;
+                               for(m = 0; m < tukey_parts; m++){
+                                       encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.p = tukey_p;
+                                       encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.start = m/(tukey_parts+overlap_units);
+                                       encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.multiple_tukey.end = (m+1+overlap_units)/(tukey_parts+overlap_units);
+                                       encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_PUNCHOUT_TUKEY;
+                               }
+                       }
+               }
                else if(n==5  && 0 == strncmp("welch"        , specification, n))
                        encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_WELCH;
                if (encoder->protected_->num_apodizations == 32)
@@ -2443,6 +2485,12 @@ FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, unsigned new_blocksize)
                                case FLAC__APODIZATION_TUKEY:
                                        FLAC__window_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.tukey.p);
                                        break;
+                               case FLAC__APODIZATION_PARTIAL_TUKEY:
+                                       FLAC__window_partial_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.multiple_tukey.p, encoder->protected_->apodizations[i].parameters.multiple_tukey.start, encoder->protected_->apodizations[i].parameters.multiple_tukey.end);
+                                       break;
+                               case FLAC__APODIZATION_PUNCHOUT_TUKEY:
+                                       FLAC__window_punchout_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.multiple_tukey.p, encoder->protected_->apodizations[i].parameters.multiple_tukey.start, encoder->protected_->apodizations[i].parameters.multiple_tukey.end);
+                                       break;
                                case FLAC__APODIZATION_WELCH:
                                        FLAC__window_welch(encoder->private_->window[i], new_blocksize);
                                        break;
index b873368..6acf66a 100644 (file)
@@ -58,7 +58,7 @@ void FLAC__window_bartlett(FLAC__real *window, const FLAC__int32 L)
                for (n = 0; n <= L/2-1; n++)
                        window[n] = 2.0f * n / (float)N;
                for (; n <= N; n++)
-                       window[n] = 2.0f - 2.0f * (N-n) / (float)N;
+                       window[n] = 2.0f - 2.0f * n / (float)N;
        }
 }
 
@@ -68,7 +68,7 @@ void FLAC__window_bartlett_hann(FLAC__real *window, const FLAC__int32 L)
        FLAC__int32 n;
 
        for (n = 0; n < L; n++)
-               window[n] = (FLAC__real)(0.62f - 0.48f * fabs((float)n/(float)N+0.5f) + 0.38f * cos(2.0f * M_PI * ((float)n/(float)N+0.5f)));
+               window[n] = (FLAC__real)(0.62f - 0.48f * fabs((float)n/(float)N-0.5f) - 0.38f * cos(2.0f * M_PI * ((float)n/(float)N)));
 }
 
 void FLAC__window_blackman(FLAC__real *window, const FLAC__int32 L)
@@ -173,16 +173,16 @@ void FLAC__window_triangle(FLAC__real *window, const FLAC__int32 L)
        FLAC__int32 n;
 
        if (L & 1) {
-               for (n = 1; n <= L+1/2; n++)
+               for (n = 1; n <= (L+1)/2; n++)
                        window[n-1] = 2.0f * n / ((float)L + 1.0f);
                for (; n <= L; n++)
-                       window[n-1] = (float)(2 * (L - n + 1)) / ((float)L + 1.0f);
+                       window[n-1] = (float)(2 * (L - n + 1)) / ((float)L + 1.0f);
        }
        else {
                for (n = 1; n <= L/2; n++)
-                       window[n-1] = 2.0f * n / (float)L;
+                       window[n-1] = 2.0f * n / ((float)L + 1.0f);
                for (; n <= L; n++)
-                       window[n-1] = ((float)(2 * (L - n)) + 1.0f) / (float)L;
+                       window[n-1] = (float)(2 * (L - n + 1)) / ((float)L + 1.0f);
        }
 }
 
@@ -207,6 +207,61 @@ void FLAC__window_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__rea
        }
 }
 
+void FLAC__window_partial_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end)
+{
+       const FLAC__int32 start_n = (FLAC__int32)(start * L);
+       const FLAC__int32 end_n = (FLAC__int32)(end * L);
+       const FLAC__int32 N = end_n - start_n;
+       FLAC__int32 Np, n, i;
+
+       if (p <= 0.0)
+               FLAC__window_partial_tukey(window, L, 0.01, start, end);
+       else if (p >= 1.0)
+               FLAC__window_partial_tukey(window, L, 1, start, end);
+
+       Np = (FLAC__int32)(p / 2.0f * N) - 1;
+
+       for (n = 0; n < start_n; n++)
+               window[n] = 0.0f;
+       for (i = 1; n < (start_n+Np); n++, i++)
+               window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Np));
+       for (; n < (end_n-Np); n++)
+               window[n] = 1.0f;
+       for (i = Np; n < end_n; n++, i--)
+               window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Np));
+       for (; n < L; n++)
+               window[n] = 0.0f;
+}
+void FLAC__window_punchout_tukey(FLAC__real *window, const FLAC__int32 L, const FLAC__real p, const FLAC__real start, const FLAC__real end)
+{
+       const FLAC__int32 start_n = (FLAC__int32)(start * L);
+       const FLAC__int32 end_n = (FLAC__int32)(end * L);
+       FLAC__int32 Ns, Ne, n, i;
+
+       if (p <= 0.0)
+               FLAC__window_partial_tukey(window, L, 0.01, start, end);
+       else if (p >= 1.0)
+               FLAC__window_partial_tukey(window, L, 1, start, end);
+
+       Ns = (FLAC__int32)(p / 2.0f * start_n);
+       Ne = (FLAC__int32)(p / 2.0f * (L - end_n));
+
+       for (n = 0, i = 1; n < Ns; n++, i++)
+               window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ns));
+       for (; n < start_n-Ns; n++)
+               window[n] = 1.0f;
+       for (i = Ns; n < start_n; n++, i--)
+               window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ns));
+       for (; n < end_n; n++)
+               window[n] = 0.0f;
+       for (i = 1; n < end_n+Ne; n++, i++)
+               window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ne));
+       for (; n < L - (Ne); n++)
+               window[n] = 1.0f;
+       for (i = Ne; n < L; n++, i--)
+               window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ne));
+}
+
 void FLAC__window_welch(FLAC__real *window, const FLAC__int32 L)
 {
        const FLAC__int32 N = L - 1;