minor optimizations to precompute_partition_info_sums_()
[flac.git] / src / libFLAC / stream_encoder.c
index cc9a9e7..5b2c12a 100644 (file)
 #include "private/stream_encoder_framing.h"
 #include "private/window.h"
 
+#ifndef FLaC__INLINE
+#define FLaC__INLINE
+#endif
+
 #ifdef min
 #undef min
 #endif
@@ -245,6 +249,7 @@ static unsigned find_best_partition_order_(
        unsigned rice_parameter,
        unsigned min_partition_order,
        unsigned max_partition_order,
+       unsigned bps,
        FLAC__bool do_escape_coding,
        unsigned rice_parameter_search_dist,
        FLAC__EntropyCodingMethod_PartitionedRice *best_partitioned_rice
@@ -256,7 +261,8 @@ static void precompute_partition_info_sums_(
        unsigned residual_samples,
        unsigned predictor_order,
        unsigned min_partition_order,
-       unsigned max_partition_order
+       unsigned max_partition_order,
+       unsigned bps
 );
 
 static void precompute_partition_info_escapes_(
@@ -357,7 +363,7 @@ typedef struct FLAC__StreamEncoderPrivate {
        FLAC__StreamMetadata_SeekTable *seek_table;       /* pointer into encoder->protected_->metadata_ where the seek table is */
        unsigned current_sample_number;
        unsigned current_frame_number;
-       struct FLAC__MD5Context md5context;
+       FLAC__MD5Context md5context;
        FLAC__CPUInfo cpuinfo;
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
        unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
@@ -735,16 +741,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
                        encoder->protected_->blocksize != 16384
                )
                        return FLAC__STREAM_ENCODER_INIT_STATUS_NOT_STREAMABLE;
-               if(
-                       encoder->protected_->sample_rate != 8000 &&
-                       encoder->protected_->sample_rate != 16000 &&
-                       encoder->protected_->sample_rate != 22050 &&
-                       encoder->protected_->sample_rate != 24000 &&
-                       encoder->protected_->sample_rate != 32000 &&
-                       encoder->protected_->sample_rate != 44100 &&
-                       encoder->protected_->sample_rate != 48000 &&
-                       encoder->protected_->sample_rate != 96000
-               )
+               if(!FLAC__format_sample_rate_is_subset(encoder->protected_->sample_rate))
                        return FLAC__STREAM_ENCODER_INIT_STATUS_NOT_STREAMABLE;
                if(
                        encoder->protected_->bits_per_sample != 8 &&
@@ -3746,6 +3743,7 @@ unsigned evaluate_fixed_subframe_(
                        rice_parameter,
                        min_partition_order,
                        max_partition_order,
+                       subframe_bps,
                        do_escape_coding,
                        rice_parameter_search_dist,
                        &subframe->data.fixed.entropy_coding_method.data.partitioned_rice
@@ -3826,6 +3824,7 @@ unsigned evaluate_lpc_subframe_(
                        rice_parameter,
                        min_partition_order,
                        max_partition_order,
+                       subframe_bps,
                        do_escape_coding,
                        rice_parameter_search_dist,
                        &subframe->data.lpc.entropy_coding_method.data.partitioned_rice
@@ -3883,6 +3882,7 @@ unsigned find_best_partition_order_(
        unsigned rice_parameter,
        unsigned min_partition_order,
        unsigned max_partition_order,
+       unsigned bps,
        FLAC__bool do_escape_coding,
        unsigned rice_parameter_search_dist,
        FLAC__EntropyCodingMethod_PartitionedRice *best_partitioned_rice
@@ -3895,7 +3895,7 @@ unsigned find_best_partition_order_(
        max_partition_order = FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(max_partition_order, blocksize, predictor_order);
        min_partition_order = min(min_partition_order, max_partition_order);
 
-       precompute_partition_info_sums_(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order);
+       precompute_partition_info_sums_(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order, bps);
 
        if(do_escape_coding)
                precompute_partition_info_escapes_(residual, raw_bits_per_partition, residual_samples, predictor_order, min_partition_order, max_partition_order);
@@ -3955,56 +3955,54 @@ void precompute_partition_info_sums_(
        unsigned residual_samples,
        unsigned predictor_order,
        unsigned min_partition_order,
-       unsigned max_partition_order
+       unsigned max_partition_order,
+       unsigned bps
 )
 {
        int partition_order;
        unsigned from_partition, to_partition = 0;
        const unsigned blocksize = residual_samples + predictor_order;
+       const unsigned partitions = 1u << max_partition_order;
+       const unsigned default_partition_samples = blocksize >> max_partition_order;
+       unsigned partition, end, residual_sample;
+
+       FLAC__ASSERT(default_partition_samples > predictor_order);
 
        /* first do max_partition_order */
-       for(partition_order = (int)max_partition_order; partition_order >= 0; partition_order--) {
-               FLAC__uint64 abs_residual_partition_sum; /* OPT: can reasonably be FLAC__uint32 for bps <= 17 and maybe higher */
-               unsigned partition, partition_sample, partition_samples, residual_sample;
-               const unsigned partitions = 1u << partition_order;
-               const unsigned default_partition_samples = blocksize >> partition_order;
+       if(FLAC__bitmath_ilog2(default_partition_samples) + bps < 32) { /* very slightly pessimistic but still catches all common cases */
+               FLAC__uint32 abs_residual_partition_sum;
 
-               FLAC__ASSERT(default_partition_samples > predictor_order);
+               end = (unsigned)(-(int)predictor_order);
+               for(partition = residual_sample = 0; partition < partitions; partition++) {
+                       end += default_partition_samples;
+                       abs_residual_partition_sum = 0;
+                       for( ; residual_sample < end; residual_sample++)
+                               abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
+                       abs_residual_partition_sums[partition] = abs_residual_partition_sum;
+               }
+       }
+       else { /* have to pessimistically use 64 bits for accumulator */
+               FLAC__uint64 abs_residual_partition_sum;
 
+               end = (unsigned)(-(int)predictor_order);
                for(partition = residual_sample = 0; partition < partitions; partition++) {
-                       partition_samples = default_partition_samples;
-                       if(partition == 0)
-                               partition_samples -= predictor_order;
+                       end += default_partition_samples;
                        abs_residual_partition_sum = 0;
-                       for(partition_sample = 0; partition_sample < partition_samples; partition_sample++, residual_sample++) {
-#if defined _MSC_VER
-                               /* OPT: abs() may be faster for some compilers */
+                       for( ; residual_sample < end; residual_sample++)
                                abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
-#else
-                               const FLAC__int32 r = residual[residual_sample];
-                               if(r < 0)
-                                       abs_residual_partition_sum -= r;
-                               else
-                                       abs_residual_partition_sum += r;
-#endif
-                       }
                        abs_residual_partition_sums[partition] = abs_residual_partition_sum;
                }
-               to_partition = partitions;
-               break;
        }
 
        /* now merge partitions for lower orders */
-       for(from_partition = 0, --partition_order; partition_order >= (int)min_partition_order; partition_order--) {
-               FLAC__uint64 s;
+       for(from_partition = 0, to_partition = partitions, partition_order = (int)max_partition_order - 1; partition_order >= (int)min_partition_order; partition_order--) {
                unsigned i;
                const unsigned partitions = 1u << partition_order;
                for(i = 0; i < partitions; i++) {
-                       s = abs_residual_partition_sums[from_partition];
-                       from_partition++;
-                       abs_residual_partition_sums[to_partition] = s + abs_residual_partition_sums[from_partition];
-                       from_partition++;
-                       to_partition++;
+                       abs_residual_partition_sums[to_partition++] =
+                               abs_residual_partition_sums[from_partition  ] +
+                               abs_residual_partition_sums[from_partition+1];
+                       from_partition += 2;
                }
        }
 }
@@ -4068,7 +4066,7 @@ void precompute_partition_info_escapes_(
 }
 
 #ifdef EXACT_RICE_BITS_CALCULATION
-static __inline unsigned count_rice_bits_in_partition_(
+static FLaC__INLINE unsigned count_rice_bits_in_partition_(
        const unsigned rice_parameter,
        const unsigned partition_samples,
        const FLAC__int32 *residual
@@ -4083,7 +4081,7 @@ static __inline unsigned count_rice_bits_in_partition_(
        return partition_bits;
 }
 #else
-static __inline unsigned count_rice_bits_in_partition_(
+static FLaC__INLINE unsigned count_rice_bits_in_partition_(
        const unsigned rice_parameter,
        const unsigned partition_samples,
        const FLAC__uint64 abs_residual_partition_sum