several speed improvements: completely rewritten bitbuffer which uses native machine...
[flac.git] / src / libFLAC / stream_encoder.c
index 354d033..d9e1612 100644 (file)
@@ -55,7 +55,7 @@
 #include "FLAC/assert.h"
 #include "FLAC/stream_decoder.h"
 #include "protected/stream_encoder.h"
-#include "private/bitbuffer.h"
+#include "private/bitwriter.h"
 #include "private/bitmath.h"
 #include "private/crc.h"
 #include "private/cpu.h"
@@ -157,7 +157,6 @@ static FLAC__bool process_subframe_(
        FLAC__StreamEncoder *encoder,
        unsigned min_partition_order,
        unsigned max_partition_order,
-       FLAC__bool precompute_partition_sums,
        const FLAC__FrameHeader *frame_header,
        unsigned subframe_bps,
        const FLAC__int32 integer_signal[],
@@ -176,7 +175,7 @@ static FLAC__bool add_subframe_(
        unsigned blocksize,
        unsigned subframe_bps,
        const FLAC__Subframe *subframe,
-       FLAC__BitBuffer *frame
+       FLAC__BitWriter *frame
 );
 
 static unsigned evaluate_constant_subframe_(
@@ -191,7 +190,6 @@ static unsigned evaluate_fixed_subframe_(
        FLAC__StreamEncoder *encoder,
        const FLAC__int32 signal[],
        FLAC__int32 residual[],
-       FLAC__uint32 abs_residual[],
        FLAC__uint64 abs_residual_partition_sums[],
        unsigned raw_bits_per_partition[],
        unsigned blocksize,
@@ -200,7 +198,6 @@ static unsigned evaluate_fixed_subframe_(
        unsigned rice_parameter,
        unsigned min_partition_order,
        unsigned max_partition_order,
-       FLAC__bool precompute_partition_sums,
        FLAC__bool do_escape_coding,
        unsigned rice_parameter_search_dist,
        FLAC__Subframe *subframe,
@@ -212,7 +209,6 @@ static unsigned evaluate_lpc_subframe_(
        FLAC__StreamEncoder *encoder,
        const FLAC__int32 signal[],
        FLAC__int32 residual[],
-       FLAC__uint32 abs_residual[],
        FLAC__uint64 abs_residual_partition_sums[],
        unsigned raw_bits_per_partition[],
        const FLAC__real lp_coeff[],
@@ -223,7 +219,6 @@ static unsigned evaluate_lpc_subframe_(
        unsigned rice_parameter,
        unsigned min_partition_order,
        unsigned max_partition_order,
-       FLAC__bool precompute_partition_sums,
        FLAC__bool do_escape_coding,
        unsigned rice_parameter_search_dist,
        FLAC__Subframe *subframe,
@@ -242,7 +237,6 @@ static unsigned evaluate_verbatim_subframe_(
 static unsigned find_best_partition_order_(
        struct FLAC__StreamEncoderPrivate *private_,
        const FLAC__int32 residual[],
-       FLAC__uint32 abs_residual[],
        FLAC__uint64 abs_residual_partition_sums[],
        unsigned raw_bits_per_partition[],
        unsigned residual_samples,
@@ -250,14 +244,13 @@ static unsigned find_best_partition_order_(
        unsigned rice_parameter,
        unsigned min_partition_order,
        unsigned max_partition_order,
-       FLAC__bool precompute_partition_sums,
        FLAC__bool do_escape_coding,
        unsigned rice_parameter_search_dist,
        FLAC__EntropyCodingMethod_PartitionedRice *best_partitioned_rice
 );
 
 static void precompute_partition_info_sums_(
-       const FLAC__uint32 abs_residual[],
+       const FLAC__int32 residual[],
        FLAC__uint64 abs_residual_partition_sums[],
        unsigned residual_samples,
        unsigned predictor_order,
@@ -275,24 +268,8 @@ static void precompute_partition_info_escapes_(
 );
 
 static FLAC__bool set_partitioned_rice_(
-       const FLAC__uint32 abs_residual[],
-#ifdef EXACT_RICE_BITS_CALCULATION
-       const FLAC__int32 residual[],
-#endif
-       const unsigned residual_samples,
-       const unsigned predictor_order,
-       const unsigned suggested_rice_parameter,
-       const unsigned rice_parameter_search_dist,
-       const unsigned partition_order,
-       FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents,
-       unsigned *bits
-);
-
-static FLAC__bool set_partitioned_rice_with_precompute_(
 #ifdef EXACT_RICE_BITS_CALCULATION
        const FLAC__int32 residual[],
-#else
-       const FLAC__uint32 abs_residual[],
 #endif
        const FLAC__uint64 abs_residual_partition_sums[],
        const unsigned raw_bits_per_partition[],
@@ -369,10 +346,9 @@ typedef struct FLAC__StreamEncoderPrivate {
        unsigned best_subframe_mid_side[2];
        unsigned best_subframe_bits[FLAC__MAX_CHANNELS];  /* size in bits of the best subframe for each channel */
        unsigned best_subframe_bits_mid_side[2];
-       FLAC__uint32 *abs_residual;                       /* workspace where abs(candidate residual) is stored */
        FLAC__uint64 *abs_residual_partition_sums;        /* workspace where the sum of abs(candidate residual) for each partition is stored */
        unsigned *raw_bits_per_partition;                 /* workspace where the sum of silog2(candidate residual) for each partition is stored */
-       FLAC__BitBuffer *frame;                           /* the current frame being worked on */
+       FLAC__BitWriter *frame;                           /* the current frame being worked on */
        unsigned loose_mid_side_stereo_frames;            /* rounded number of frames the encoder will use before trying both independent and mid/side frames again */
        unsigned loose_mid_side_stereo_frame_count;       /* number of frames using the current channel assignment */
        FLAC__ChannelAssignment last_channel_assignment;
@@ -396,7 +372,6 @@ typedef struct FLAC__StreamEncoderPrivate {
        FLAC__bool use_wide_by_block;          /* use slow 64-bit versions of some functions because of the block size */
        FLAC__bool use_wide_by_partition;      /* use slow 64-bit versions of some functions because of the min partition order and blocksize */
        FLAC__bool use_wide_by_order;          /* use slow 64-bit versions of some functions because of the lpc order */
-       FLAC__bool precompute_partition_sums;  /* our initial guess as to whether precomputing the partitions sums could be a speed improvement */
        FLAC__bool disable_constant_subframes;
        FLAC__bool disable_fixed_subframes;
        FLAC__bool disable_verbatim_subframes;
@@ -427,7 +402,6 @@ typedef struct FLAC__StreamEncoderPrivate {
 #endif
        FLAC__int32 *residual_workspace_unaligned[FLAC__MAX_CHANNELS][2];
        FLAC__int32 *residual_workspace_mid_side_unaligned[2][2];
-       FLAC__uint32 *abs_residual_unaligned;
        FLAC__uint64 *abs_residual_partition_sums_unaligned;
        unsigned *raw_bits_per_partition_unaligned;
        /*
@@ -561,7 +535,7 @@ FLAC_API FLAC__StreamEncoder *FLAC__stream_encoder_new()
                return 0;
        }
 
-       encoder->private_->frame = FLAC__bitbuffer_new();
+       encoder->private_->frame = FLAC__bitwriter_new();
        if(encoder->private_->frame == 0) {
                free(encoder->private_);
                free(encoder->protected_);
@@ -635,7 +609,7 @@ FLAC_API void FLAC__stream_encoder_delete(FLAC__StreamEncoder *encoder)
        for(i = 0; i < 2; i++)
                FLAC__format_entropy_coding_method_partitioned_rice_contents_clear(&encoder->private_->partitioned_rice_contents_extra[i]);
 
-       FLAC__bitbuffer_delete(encoder->private_->frame);
+       FLAC__bitwriter_delete(encoder->private_->frame);
        free(encoder->private_);
        free(encoder->protected_);
        free(encoder);
@@ -904,7 +878,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
                encoder->private_->residual_workspace_mid_side_unaligned[i][1] = encoder->private_->residual_workspace_mid_side[i][1] = 0;
                encoder->private_->best_subframe_mid_side[i] = 0;
        }
-       encoder->private_->abs_residual_unaligned = encoder->private_->abs_residual = 0;
        encoder->private_->abs_residual_partition_sums_unaligned = encoder->private_->abs_residual_partition_sums = 0;
        encoder->private_->raw_bits_per_partition_unaligned = encoder->private_->raw_bits_per_partition = 0;
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
@@ -986,9 +959,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
                encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_wide;
        }
 
-       /* we require precompute_partition_sums if do_escape_coding because of their intertwined nature */
-       encoder->private_->precompute_partition_sums = (encoder->protected_->max_residual_partition_order > encoder->protected_->min_residual_partition_order) || encoder->protected_->do_escape_coding;
-
        /* set state to OK; from here on, errors are fatal and we'll override the state then */
        encoder->protected_->state = FLAC__STREAM_ENCODER_OK;
 
@@ -1012,7 +982,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
                return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
        }
 
-       if(!FLAC__bitbuffer_init(encoder->private_->frame)) {
+       if(!FLAC__bitwriter_init(encoder->private_->frame)) {
                encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
                return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
        }
@@ -1070,7 +1040,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
         */
        if(encoder->protected_->verify)
                encoder->private_->verify.state_hint = ENCODER_IN_MAGIC;
-       if(!FLAC__bitbuffer_write_raw_uint32(encoder->private_->frame, FLAC__STREAM_SYNC, FLAC__STREAM_SYNC_LEN)) {
+       if(!FLAC__bitwriter_write_raw_uint32(encoder->private_->frame, FLAC__STREAM_SYNC, FLAC__STREAM_SYNC_LEN)) {
                encoder->protected_->state = FLAC__STREAM_ENCODER_FRAMING_ERROR;
                return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
        }
@@ -1097,10 +1067,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
        encoder->private_->streaminfo.data.stream_info.total_samples = encoder->protected_->total_samples_estimate; /* we will replace this later with the real total */
        memset(encoder->private_->streaminfo.data.stream_info.md5sum, 0, 16); /* we don't know this yet; have to fill it in later */
        FLAC__MD5Init(&encoder->private_->md5context);
-       if(!FLAC__bitbuffer_clear(encoder->private_->frame)) {
-               encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
-               return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
-       }
        if(!FLAC__add_metadata_block(&encoder->private_->streaminfo, encoder->private_->frame)) {
                encoder->protected_->state = FLAC__STREAM_ENCODER_FRAMING_ERROR;
                return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
@@ -1137,10 +1103,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
                vorbis_comment.data.vorbis_comment.vendor_string.entry = 0;
                vorbis_comment.data.vorbis_comment.num_comments = 0;
                vorbis_comment.data.vorbis_comment.comments = 0;
-               if(!FLAC__bitbuffer_clear(encoder->private_->frame)) {
-                       encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
-                       return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
-               }
                if(!FLAC__add_metadata_block(&vorbis_comment, encoder->private_->frame)) {
                        encoder->protected_->state = FLAC__STREAM_ENCODER_FRAMING_ERROR;
                        return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
@@ -1156,10 +1118,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
         */
        for(i = 0; i < encoder->protected_->num_metadata_blocks; i++) {
                encoder->protected_->metadata[i]->is_last = (i == encoder->protected_->num_metadata_blocks - 1);
-               if(!FLAC__bitbuffer_clear(encoder->private_->frame)) {
-                       encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
-                       return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
-               }
                if(!FLAC__add_metadata_block(encoder->protected_->metadata[i], encoder->private_->frame)) {
                        encoder->protected_->state = FLAC__STREAM_ENCODER_FRAMING_ERROR;
                        return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
@@ -2461,10 +2419,6 @@ void free_(FLAC__StreamEncoder *encoder)
                        }
                }
        }
-       if(0 != encoder->private_->abs_residual_unaligned) {
-               free(encoder->private_->abs_residual_unaligned);
-               encoder->private_->abs_residual_unaligned = 0;
-       }
        if(0 != encoder->private_->abs_residual_partition_sums_unaligned) {
                free(encoder->private_->abs_residual_partition_sums_unaligned);
                encoder->private_->abs_residual_partition_sums_unaligned = 0;
@@ -2481,7 +2435,7 @@ void free_(FLAC__StreamEncoder *encoder)
                        }
                }
        }
-       FLAC__bitbuffer_free(encoder->private_->frame);
+       FLAC__bitwriter_free(encoder->private_->frame);
 }
 
 FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, unsigned new_blocksize)
@@ -2540,9 +2494,8 @@ FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, unsigned new_blocksize)
                        ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize, &encoder->private_->residual_workspace_mid_side_unaligned[channel][i], &encoder->private_->residual_workspace_mid_side[channel][i]);
                }
        }
-       ok = ok && FLAC__memory_alloc_aligned_uint32_array(new_blocksize, &encoder->private_->abs_residual_unaligned, &encoder->private_->abs_residual);
-       if(encoder->private_->precompute_partition_sums || encoder->protected_->do_escape_coding) /* we require precompute_partition_sums if do_escape_coding because of their intertwined nature */
-               ok = ok && FLAC__memory_alloc_aligned_uint64_array(new_blocksize * 2, &encoder->private_->abs_residual_partition_sums_unaligned, &encoder->private_->abs_residual_partition_sums);
+       /* @@@@@@@@@ blocksize*2 is too pessimistic, but to fix, we need smarter logic because a smaller new_blocksize can actually increase the # of partitions */
+       ok = ok && FLAC__memory_alloc_aligned_uint64_array(new_blocksize * 2, &encoder->private_->abs_residual_partition_sums_unaligned, &encoder->private_->abs_residual_partition_sums);
        if(encoder->protected_->do_escape_coding)
                ok = ok && FLAC__memory_alloc_aligned_unsigned_array(new_blocksize * 2, &encoder->private_->raw_bits_per_partition_unaligned, &encoder->private_->raw_bits_per_partition);
 
@@ -2619,9 +2572,12 @@ FLAC__bool write_bitbuffer_(FLAC__StreamEncoder *encoder, unsigned samples, FLAC
        const FLAC__byte *buffer;
        size_t bytes;
 
-       FLAC__ASSERT(FLAC__bitbuffer_is_byte_aligned(encoder->private_->frame));
+       FLAC__ASSERT(FLAC__bitwriter_is_byte_aligned(encoder->private_->frame));
 
-       FLAC__bitbuffer_get_buffer(encoder->private_->frame, &buffer, &bytes);
+       if(!FLAC__bitwriter_get_buffer(encoder->private_->frame, &buffer, &bytes)) {
+               encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
+               return false;
+       }
 
        if(encoder->protected_->verify) {
                encoder->private_->verify.output.data = buffer;
@@ -2631,7 +2587,8 @@ FLAC__bool write_bitbuffer_(FLAC__StreamEncoder *encoder, unsigned samples, FLAC
                }
                else {
                        if(!FLAC__stream_decoder_process_single(encoder->private_->verify.decoder)) {
-                               FLAC__bitbuffer_release_buffer(encoder->private_->frame);
+                               FLAC__bitwriter_release_buffer(encoder->private_->frame);
+                               FLAC__bitwriter_clear(encoder->private_->frame);
                                if(encoder->protected_->state != FLAC__STREAM_ENCODER_VERIFY_MISMATCH_IN_AUDIO_DATA)
                                        encoder->protected_->state = FLAC__STREAM_ENCODER_VERIFY_DECODER_ERROR;
                                return false;
@@ -2640,12 +2597,14 @@ FLAC__bool write_bitbuffer_(FLAC__StreamEncoder *encoder, unsigned samples, FLAC
        }
 
        if(write_frame_(encoder, buffer, bytes, samples, is_last_block) != FLAC__STREAM_ENCODER_WRITE_STATUS_OK) {
-               FLAC__bitbuffer_release_buffer(encoder->private_->frame);
+               FLAC__bitwriter_release_buffer(encoder->private_->frame);
+               FLAC__bitwriter_clear(encoder->private_->frame);
                encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR;
                return false;
        }
 
-       FLAC__bitbuffer_release_buffer(encoder->private_->frame);
+       FLAC__bitwriter_release_buffer(encoder->private_->frame);
+       FLAC__bitwriter_clear(encoder->private_->frame);
 
        if(samples > 0) {
                encoder->private_->streaminfo.data.stream_info.min_framesize = min(bytes, encoder->private_->streaminfo.data.stream_info.min_framesize);
@@ -3092,6 +3051,7 @@ void update_ogg_metadata_(FLAC__StreamEncoder *encoder)
 
 FLAC__bool process_frame_(FLAC__StreamEncoder *encoder, FLAC__bool is_fractional_block, FLAC__bool is_last_block)
 {
+       FLAC__uint16 crc;
        FLAC__ASSERT(encoder->protected_->state == FLAC__STREAM_ENCODER_OK);
 
        /*
@@ -3113,7 +3073,7 @@ FLAC__bool process_frame_(FLAC__StreamEncoder *encoder, FLAC__bool is_fractional
        /*
         * Zero-pad the frame to a byte_boundary
         */
-       if(!FLAC__bitbuffer_zero_pad_to_byte_boundary(encoder->private_->frame)) {
+       if(!FLAC__bitwriter_zero_pad_to_byte_boundary(encoder->private_->frame)) {
                encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
                return false;
        }
@@ -3121,8 +3081,14 @@ FLAC__bool process_frame_(FLAC__StreamEncoder *encoder, FLAC__bool is_fractional
        /*
         * CRC-16 the whole thing
         */
-       FLAC__ASSERT(FLAC__bitbuffer_is_byte_aligned(encoder->private_->frame));
-       FLAC__bitbuffer_write_raw_uint32(encoder->private_->frame, FLAC__bitbuffer_get_write_crc16(encoder->private_->frame), FLAC__FRAME_FOOTER_CRC_LEN);
+       FLAC__ASSERT(FLAC__bitwriter_is_byte_aligned(encoder->private_->frame));
+       if(
+               !FLAC__bitwriter_get_write_crc16(encoder->private_->frame, &crc) ||
+               !FLAC__bitwriter_write_raw_uint32(encoder->private_->frame, crc, FLAC__FRAME_FOOTER_CRC_LEN)
+       ) {
+               encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
+               return false;
+       }
 
        /*
         * Write it
@@ -3146,7 +3112,7 @@ FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder, FLAC__bool is_fracti
 {
        FLAC__FrameHeader frame_header;
        unsigned channel, min_partition_order = encoder->protected_->min_residual_partition_order, max_partition_order;
-       FLAC__bool do_independent, do_mid_side, precompute_partition_sums;
+       FLAC__bool do_independent, do_mid_side;
 
        /*
         * Calculate the min,max Rice partition orders
@@ -3160,15 +3126,9 @@ FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder, FLAC__bool is_fracti
        }
        min_partition_order = min(min_partition_order, max_partition_order);
 
-       precompute_partition_sums = encoder->private_->precompute_partition_sums && ((max_partition_order > min_partition_order) || encoder->protected_->do_escape_coding);
-
        /*
         * Setup the frame
         */
-       if(!FLAC__bitbuffer_clear(encoder->private_->frame)) {
-               encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
-               return false;
-       }
        frame_header.blocksize = encoder->protected_->blocksize;
        frame_header.sample_rate = encoder->protected_->sample_rate;
        frame_header.channels = encoder->protected_->channels;
@@ -3232,7 +3192,6 @@ FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder, FLAC__bool is_fracti
                                        encoder,
                                        min_partition_order,
                                        max_partition_order,
-                                       precompute_partition_sums,
                                        &frame_header,
                                        encoder->private_->subframe_bps[channel],
                                        encoder->private_->integer_signal[channel],
@@ -3262,7 +3221,6 @@ FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder, FLAC__bool is_fracti
                                        encoder,
                                        min_partition_order,
                                        max_partition_order,
-                                       precompute_partition_sums,
                                        &frame_header,
                                        encoder->private_->subframe_bps_mid_side[channel],
                                        encoder->private_->integer_signal_mid_side[channel],
@@ -3404,7 +3362,6 @@ FLAC__bool process_subframe_(
        FLAC__StreamEncoder *encoder,
        unsigned min_partition_order,
        unsigned max_partition_order,
-       FLAC__bool precompute_partition_sums,
        const FLAC__FrameHeader *frame_header,
        unsigned subframe_bps,
        const FLAC__int32 integer_signal[],
@@ -3435,6 +3392,8 @@ FLAC__bool process_subframe_(
        unsigned _candidate_bits, _best_bits;
        unsigned _best_subframe;
 
+       FLAC__ASSERT(frame_header->blocksize > 0);
+
        /* verbatim subframe is the baseline against which we measure other compressed subframes */
        _best_subframe = 0;
        if(encoder->private_->disable_verbatim_subframes && frame_header->blocksize >= FLAC__MAX_FIXED_ORDER)
@@ -3481,6 +3440,8 @@ FLAC__bool process_subframe_(
                                else {
                                        min_fixed_order = max_fixed_order = guess_fixed_order;
                                }
+                               if(max_fixed_order >= frame_header->blocksize)
+                                       max_fixed_order = frame_header->blocksize - 1;
                                for(fixed_order = min_fixed_order; fixed_order <= max_fixed_order; fixed_order++) {
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
                                        if(fixed_residual_bits_per_sample[fixed_order] >= (FLAC__float)subframe_bps)
@@ -3503,7 +3464,6 @@ FLAC__bool process_subframe_(
                                                        encoder,
                                                        integer_signal,
                                                        residual[!_best_subframe],
-                                                       encoder->private_->abs_residual,
                                                        encoder->private_->abs_residual_partition_sums,
                                                        encoder->private_->raw_bits_per_partition,
                                                        frame_header->blocksize,
@@ -3512,7 +3472,6 @@ FLAC__bool process_subframe_(
                                                        rice_parameter,
                                                        min_partition_order,
                                                        max_partition_order,
-                                                       precompute_partition_sums,
                                                        encoder->protected_->do_escape_coding,
                                                        encoder->protected_->rice_parameter_search_dist,
                                                        subframe[!_best_subframe],
@@ -3557,6 +3516,8 @@ FLAC__bool process_subframe_(
                                                                        );
                                                                min_lpc_order = max_lpc_order = guess_lpc_order;
                                                        }
+                                                       if(max_lpc_order >= frame_header->blocksize)
+                                                               max_lpc_order = frame_header->blocksize - 1;
                                                        for(lpc_order = min_lpc_order; lpc_order <= max_lpc_order; lpc_order++) {
                                                                lpc_residual_bits_per_sample = FLAC__lpc_compute_expected_bits_per_residual_sample(lpc_error[lpc_order-1], frame_header->blocksize-lpc_order);
                                                                if(lpc_residual_bits_per_sample >= (FLAC__double)subframe_bps)
@@ -3588,7 +3549,6 @@ FLAC__bool process_subframe_(
                                                                                        encoder,
                                                                                        integer_signal,
                                                                                        residual[!_best_subframe],
-                                                                                       encoder->private_->abs_residual,
                                                                                        encoder->private_->abs_residual_partition_sums,
                                                                                        encoder->private_->raw_bits_per_partition,
                                                                                        encoder->private_->lp_coeff[lpc_order-1],
@@ -3599,7 +3559,6 @@ FLAC__bool process_subframe_(
                                                                                        rice_parameter,
                                                                                        min_partition_order,
                                                                                        max_partition_order,
-                                                                                       precompute_partition_sums,
                                                                                        encoder->protected_->do_escape_coding,
                                                                                        encoder->protected_->rice_parameter_search_dist,
                                                                                        subframe[!_best_subframe],
@@ -3638,7 +3597,7 @@ FLAC__bool add_subframe_(
        unsigned blocksize,
        unsigned subframe_bps,
        const FLAC__Subframe *subframe,
-       FLAC__BitBuffer *frame
+       FLAC__BitWriter *frame
 )
 {
        switch(subframe->type) {
@@ -3684,23 +3643,23 @@ static void spotcheck_subframe_estimate_(
 )
 {
        FLAC__bool ret;
-       FLAC__BitBuffer *frame = FLAC__bitbuffer_new();
+       FLAC__BitWriter *frame = FLAC__bitwriter_new();
        if(frame == 0) {
                fprintf(stderr, "EST: can't allocate frame\n");
                return;
        }
-       if(!FLAC__bitbuffer_init(frame)) {
+       if(!FLAC__bitwriter_init(frame)) {
                fprintf(stderr, "EST: can't init frame\n");
                return;
        }
        ret = add_subframe_(encoder, blocksize, subframe_bps, subframe, frame);
        FLAC__ASSERT(ret);
        {
-               const unsigned actual = FLAC__bitbuffer_get_input_bits_unconsumed(frame);
+               const unsigned actual = FLAC__bitwriter_get_input_bits_unconsumed(frame);
                if(estimate != actual)
                        fprintf(stderr, "EST: bad, frame#%u sub#%%d type=%8s est=%u, actual=%u, delta=%d\n", encoder->private_->current_frame_number, FLAC__SubframeTypeString[subframe->type], estimate, actual, (int)actual-(int)estimate);
        }
-       FLAC__bitbuffer_delete(frame);
+       FLAC__bitwriter_delete(frame);
 }
 #endif
 
@@ -3716,7 +3675,7 @@ unsigned evaluate_constant_subframe_(
        subframe->type = FLAC__SUBFRAME_TYPE_CONSTANT;
        subframe->data.constant.value = signal;
 
-       estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe_bps;
+       estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe->wasted_bits + subframe_bps;
 
 #if SPOTCHECK_ESTIMATE
        spotcheck_subframe_estimate_(encoder, blocksize, subframe_bps, subframe, estimate);
@@ -3731,7 +3690,6 @@ unsigned evaluate_fixed_subframe_(
        FLAC__StreamEncoder *encoder,
        const FLAC__int32 signal[],
        FLAC__int32 residual[],
-       FLAC__uint32 abs_residual[],
        FLAC__uint64 abs_residual_partition_sums[],
        unsigned raw_bits_per_partition[],
        unsigned blocksize,
@@ -3740,7 +3698,6 @@ unsigned evaluate_fixed_subframe_(
        unsigned rice_parameter,
        unsigned min_partition_order,
        unsigned max_partition_order,
-       FLAC__bool precompute_partition_sums,
        FLAC__bool do_escape_coding,
        unsigned rice_parameter_search_dist,
        FLAC__Subframe *subframe,
@@ -3762,7 +3719,6 @@ unsigned evaluate_fixed_subframe_(
                find_best_partition_order_(
                        encoder->private_,
                        residual,
-                       abs_residual,
                        abs_residual_partition_sums,
                        raw_bits_per_partition,
                        residual_samples,
@@ -3770,7 +3726,6 @@ unsigned evaluate_fixed_subframe_(
                        rice_parameter,
                        min_partition_order,
                        max_partition_order,
-                       precompute_partition_sums,
                        do_escape_coding,
                        rice_parameter_search_dist,
                        &subframe->data.fixed.entropy_coding_method.data.partitioned_rice
@@ -3780,7 +3735,7 @@ unsigned evaluate_fixed_subframe_(
        for(i = 0; i < order; i++)
                subframe->data.fixed.warmup[i] = signal[i];
 
-       estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + (order * subframe_bps) + residual_bits;
+       estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe->wasted_bits + (order * subframe_bps) + residual_bits;
 
 #if SPOTCHECK_ESTIMATE
        spotcheck_subframe_estimate_(encoder, blocksize, subframe_bps, subframe, estimate);
@@ -3794,7 +3749,6 @@ unsigned evaluate_lpc_subframe_(
        FLAC__StreamEncoder *encoder,
        const FLAC__int32 signal[],
        FLAC__int32 residual[],
-       FLAC__uint32 abs_residual[],
        FLAC__uint64 abs_residual_partition_sums[],
        unsigned raw_bits_per_partition[],
        const FLAC__real lp_coeff[],
@@ -3805,7 +3759,6 @@ unsigned evaluate_lpc_subframe_(
        unsigned rice_parameter,
        unsigned min_partition_order,
        unsigned max_partition_order,
-       FLAC__bool precompute_partition_sums,
        FLAC__bool do_escape_coding,
        unsigned rice_parameter_search_dist,
        FLAC__Subframe *subframe,
@@ -3846,7 +3799,6 @@ unsigned evaluate_lpc_subframe_(
                find_best_partition_order_(
                        encoder->private_,
                        residual,
-                       abs_residual,
                        abs_residual_partition_sums,
                        raw_bits_per_partition,
                        residual_samples,
@@ -3854,7 +3806,6 @@ unsigned evaluate_lpc_subframe_(
                        rice_parameter,
                        min_partition_order,
                        max_partition_order,
-                       precompute_partition_sums,
                        do_escape_coding,
                        rice_parameter_search_dist,
                        &subframe->data.lpc.entropy_coding_method.data.partitioned_rice
@@ -3867,7 +3818,7 @@ unsigned evaluate_lpc_subframe_(
        for(i = 0; i < order; i++)
                subframe->data.lpc.warmup[i] = signal[i];
 
-       estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN + FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN + (order * (qlp_coeff_precision + subframe_bps)) + residual_bits;
+       estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe->wasted_bits + FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN + FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN + (order * (qlp_coeff_precision + subframe_bps)) + residual_bits;
 
 #if SPOTCHECK_ESTIMATE
        spotcheck_subframe_estimate_(encoder, blocksize, subframe_bps, subframe, estimate);
@@ -3891,7 +3842,7 @@ unsigned evaluate_verbatim_subframe_(
 
        subframe->data.verbatim.data = signal;
 
-       estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + (blocksize * subframe_bps);
+       estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe->wasted_bits + (blocksize * subframe_bps);
 
 #if SPOTCHECK_ESTIMATE
        spotcheck_subframe_estimate_(encoder, blocksize, subframe_bps, subframe, estimate);
@@ -3905,7 +3856,6 @@ unsigned evaluate_verbatim_subframe_(
 unsigned find_best_partition_order_(
        FLAC__StreamEncoderPrivate *private_,
        const FLAC__int32 residual[],
-       FLAC__uint32 abs_residual[],
        FLAC__uint64 abs_residual_partition_sums[],
        unsigned raw_bits_per_partition[],
        unsigned residual_samples,
@@ -3913,43 +3863,32 @@ unsigned find_best_partition_order_(
        unsigned rice_parameter,
        unsigned min_partition_order,
        unsigned max_partition_order,
-       FLAC__bool precompute_partition_sums,
        FLAC__bool do_escape_coding,
        unsigned rice_parameter_search_dist,
        FLAC__EntropyCodingMethod_PartitionedRice *best_partitioned_rice
 )
 {
-       FLAC__int32 r;
        unsigned residual_bits, best_residual_bits = 0;
-       unsigned residual_sample;
        unsigned best_parameters_index = 0;
        const unsigned blocksize = residual_samples + predictor_order;
 
-       /* compute abs(residual) for use later */
-       for(residual_sample = 0; residual_sample < residual_samples; residual_sample++) {
-               r = residual[residual_sample];
-               abs_residual[residual_sample] = (FLAC__uint32)(r<0? -r : r);
-       }
-
        max_partition_order = FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(max_partition_order, blocksize, predictor_order);
        min_partition_order = min(min_partition_order, max_partition_order);
 
-       if(precompute_partition_sums) {
-               int partition_order;
-               unsigned sum;
+       precompute_partition_info_sums_(residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order);
 
-               precompute_partition_info_sums_(abs_residual, abs_residual_partition_sums, residual_samples, predictor_order, min_partition_order, max_partition_order);
+       if(do_escape_coding)
+               precompute_partition_info_escapes_(residual, raw_bits_per_partition, residual_samples, predictor_order, min_partition_order, max_partition_order);
 
-               if(do_escape_coding)
-                       precompute_partition_info_escapes_(residual, raw_bits_per_partition, residual_samples, predictor_order, min_partition_order, max_partition_order);
+       {
+               int partition_order;
+               unsigned sum;
 
                for(partition_order = (int)max_partition_order, sum = 0; partition_order >= (int)min_partition_order; partition_order--) {
                        if(!
-                               set_partitioned_rice_with_precompute_(
+                               set_partitioned_rice_(
 #ifdef EXACT_RICE_BITS_CALCULATION
                                        residual,
-#else
-                                       abs_residual,
 #endif
                                        abs_residual_partition_sums+sum,
                                        raw_bits_per_partition+sum,
@@ -3975,35 +3914,6 @@ unsigned find_best_partition_order_(
                        }
                }
        }
-       else {
-               unsigned partition_order;
-               for(partition_order = min_partition_order; partition_order <= max_partition_order; partition_order++) {
-                       if(!
-                               set_partitioned_rice_(
-                                       abs_residual,
-#ifdef EXACT_RICE_BITS_CALCULATION
-                                       residual,
-#endif
-                                       residual_samples,
-                                       predictor_order,
-                                       rice_parameter,
-                                       rice_parameter_search_dist,
-                                       partition_order,
-                                       &private_->partitioned_rice_contents_extra[!best_parameters_index],
-                                       &residual_bits
-                               )
-                       )
-                       {
-                               FLAC__ASSERT(best_residual_bits != 0);
-                               break;
-                       }
-                       if(best_residual_bits == 0 || residual_bits < best_residual_bits) {
-                               best_residual_bits = residual_bits;
-                               best_parameters_index = !best_parameters_index;
-                               best_partitioned_rice->order = partition_order;
-                       }
-               }
-       }
 
        /*
         * We are allowed to de-const the pointer based on our special knowledge;
@@ -4020,7 +3930,7 @@ unsigned find_best_partition_order_(
 }
 
 void precompute_partition_info_sums_(
-       const FLAC__uint32 abs_residual[],
+       const FLAC__int32 residual[],
        FLAC__uint64 abs_residual_partition_sums[],
        unsigned residual_samples,
        unsigned predictor_order,
@@ -4034,7 +3944,7 @@ void precompute_partition_info_sums_(
 
        /* first do max_partition_order */
        for(partition_order = (int)max_partition_order; partition_order >= 0; partition_order--) {
-               FLAC__uint64 abs_residual_partition_sum;
+               FLAC__uint64 abs_residual_partition_sum; /* OPT: can reasonably be FLAC__uint32 for bps <= 17 and maybe higher */
                unsigned partition, partition_sample, partition_samples, residual_sample;
                const unsigned partitions = 1u << partition_order;
                const unsigned default_partition_samples = blocksize >> partition_order;
@@ -4046,8 +3956,18 @@ void precompute_partition_info_sums_(
                        if(partition == 0)
                                partition_samples -= predictor_order;
                        abs_residual_partition_sum = 0;
-                       for(partition_sample = 0; partition_sample < partition_samples; partition_sample++)
-                               abs_residual_partition_sum += abs_residual[residual_sample++];
+                       for(partition_sample = 0; partition_sample < partition_samples; partition_sample++, residual_sample++) {
+#if defined _MSC_VER && _MSC_VER <= 1200
+                               /* OPT: abs() may be faster for some compilers */
+                               abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
+#else
+                               const FLAC__int32 r = residual[residual_sample];
+                               if(r < 0)
+                                       abs_residual_partition_sum -= r;
+                               else
+                                       abs_residual_partition_sum += r;
+#endif
+                       }
                        abs_residual_partition_sums[partition] = abs_residual_partition_sum;
                }
                to_partition = partitions;
@@ -4128,195 +4048,52 @@ void precompute_partition_info_escapes_(
        }
 }
 
-#undef VARIABLE_RICE_BITS
-#ifndef EXACT_RICE_BITS_CALCULATION
-#define VARIABLE_RICE_BITS(value, parameter) ((value) >> (parameter))
-#endif
-
+/*@@@@@@ overflow is a possible problem here for hi-res samples */
 #ifdef EXACT_RICE_BITS_CALCULATION
-static __inline unsigned count_rice_bits_(
+static __inline unsigned count_rice_bits_in_partition_(
        const unsigned rice_parameter,
        const unsigned partition_samples,
        const FLAC__int32 *residual
 )
 {
-       unsigned i, partition_bits = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN;
+       unsigned i, partition_bits =
+               FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN +
+               (1+rice_parameter) * partition_samples /* 1 for unary stop bit + rice_parameter for the binary portion */
+       ;
        for(i = 0; i < partition_samples; i++)
-               partition_bits += FLAC__bitbuffer_rice_bits(residual[i], rice_parameter);
+               partition_bits += ( (FLAC__uint32)((residual[i]<<1)^(residual[i]>>31)) >> rice_parameter );
        return partition_bits;
 }
 #else
-static __inline unsigned count_rice_bits_(
+static __inline unsigned count_rice_bits_in_partition_(
        const unsigned rice_parameter,
        const unsigned partition_samples,
-       const FLAC__uint32 *abs_residual
+       const FLAC__uint64 abs_residual_partition_sum
 )
 {
-       const unsigned rice_parameter_estimate = rice_parameter-1;
-       unsigned i, partition_bits = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN + (1+rice_parameter) * partition_samples;
-       for(i = 0; i < partition_samples; i++)
-               partition_bits += VARIABLE_RICE_BITS(abs_residual[i], rice_parameter_estimate);
-       return partition_bits;
+       return
+               FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN +
+               (1+rice_parameter) * partition_samples + /* 1 for unary stop bit + rice_parameter for the binary portion */
+               (
+                       rice_parameter?
+                               (unsigned)(abs_residual_partition_sum >> (rice_parameter-1)) /* rice_parameter-1 because the real coder sign-folds instead of using a sign bit */
+                               : (unsigned)(abs_residual_partition_sum << 1) /* can't shift by negative number, so reverse */
+               )
+               - (partition_samples >> 1)
+               /* -(partition_samples>>1) to subtract out extra contributions to the abs_residual_partition_sum.
+                * The actual number of bits used is closer to the sum for all i in the partition of  abs(residual[i])>>(rice_parameter-1)
+                * By using the abs_residual_partition sum, we also add in bits in the LSBs that would normally be shifted out.
+                * So the subtraction term tries to guess how many extra bits were contributed.
+                * If the LSBs are randomly distributed, this should average to 0.5 extra bits per sample.
+                */
+       ;
 }
 #endif
 
 FLAC__bool set_partitioned_rice_(
-       const FLAC__uint32 abs_residual[],
 #ifdef EXACT_RICE_BITS_CALCULATION
        const FLAC__int32 residual[],
 #endif
-       const unsigned residual_samples,
-       const unsigned predictor_order,
-       const unsigned suggested_rice_parameter,
-       const unsigned rice_parameter_search_dist,
-       const unsigned partition_order,
-       FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents,
-       unsigned *bits
-)
-{
-       unsigned rice_parameter, partition_bits;
-       unsigned bits_ = FLAC__ENTROPY_CODING_METHOD_TYPE_LEN + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN;
-       unsigned *parameters;
-#ifdef ENABLE_RICE_PARAMETER_SEARCH
-       unsigned best_partition_bits, best_rice_parameter = 0;
-       unsigned min_rice_parameter, max_rice_parameter;
-#else
-       (void)rice_parameter_search_dist;
-#endif
-
-       FLAC__ASSERT(suggested_rice_parameter < FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER);
-
-       FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(partitioned_rice_contents, max(6, partition_order));
-       parameters = partitioned_rice_contents->parameters;
-
-       if(partition_order == 0) {
-#ifdef ENABLE_RICE_PARAMETER_SEARCH
-               best_partition_bits = 0xffffffff;
-               if(rice_parameter_search_dist) {
-                       if(suggested_rice_parameter < rice_parameter_search_dist)
-                               min_rice_parameter = 0;
-                       else
-                               min_rice_parameter = suggested_rice_parameter - rice_parameter_search_dist;
-                       max_rice_parameter = suggested_rice_parameter + rice_parameter_search_dist;
-                       if(max_rice_parameter >= FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER) {
-#ifdef DEBUG_VERBOSE
-                               fprintf(stderr, "clipping rice_parameter (%u -> %u) @2\n", max_rice_parameter, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER - 1);
-#endif
-                               max_rice_parameter = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER - 1;
-                       }
-               }
-               else
-                       min_rice_parameter = max_rice_parameter = suggested_rice_parameter;
-
-               for(rice_parameter = min_rice_parameter; rice_parameter <= max_rice_parameter; rice_parameter++) {
-#else
-                       rice_parameter = suggested_rice_parameter;
-#endif
-#ifdef EXACT_RICE_BITS_CALCULATION
-                       partition_bits = count_rice_bits_(rice_parameter, residual_samples, residual);
-#else
-                       partition_bits = count_rice_bits_(rice_parameter, residual_samples, abs_residual);
-#endif
-#ifdef ENABLE_RICE_PARAMETER_SEARCH
-                       if(partition_bits < best_partition_bits) {
-                               best_rice_parameter = rice_parameter;
-                               best_partition_bits = partition_bits;
-                       }
-               }
-               parameters[0] = best_rice_parameter;
-               bits_ += best_partition_bits;
-#else
-                       parameters[0] = rice_parameter;
-                       bits_ += partition_bits;
-#endif
-       }
-       else {
-               unsigned partition, residual_sample;
-               unsigned partition_samples;
-               FLAC__uint64 mean, k;
-               const unsigned partitions = 1u << partition_order;
-               for(partition = residual_sample = 0; partition < partitions; partition++) {
-                       partition_samples = (residual_samples+predictor_order) >> partition_order;
-                       if(partition == 0) {
-                               if(partition_samples <= predictor_order)
-                                       return false;
-                               else
-                                       partition_samples -= predictor_order;
-                       }
-                       mean = 0;
-                       {
-                               unsigned rs, ps;
-                               for(ps = 0, rs = residual_sample; ps < partition_samples; ps++)
-                                       mean += abs_residual[rs++];
-                       }
-                       /* we are basically calculating the size in bits of the
-                        * average residual magnitude in the partition:
-                        *   rice_parameter = floor(log2(mean/partition_samples))
-                        * 'mean' is not a good name for the variable, it is
-                        * actually the sum of magnitudes of all residual values
-                        * in the partition, so the actual mean is
-                        * mean/partition_samples
-                        */
-                       for(rice_parameter = 0, k = partition_samples; k < mean; rice_parameter++, k <<= 1)
-                               ;
-                       if(rice_parameter >= FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER) {
-#ifdef DEBUG_VERBOSE
-                               fprintf(stderr, "clipping rice_parameter (%u -> %u) @3\n", rice_parameter, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER - 1);
-#endif
-                               rice_parameter = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER - 1;
-                       }
-
-#ifdef ENABLE_RICE_PARAMETER_SEARCH
-                       best_partition_bits = 0xffffffff;
-                       if(rice_parameter_search_dist) {
-                               if(rice_parameter < rice_parameter_search_dist)
-                                       min_rice_parameter = 0;
-                               else
-                                       min_rice_parameter = rice_parameter - rice_parameter_search_dist;
-                               max_rice_parameter = rice_parameter + rice_parameter_search_dist;
-                               if(max_rice_parameter >= FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER) {
-#ifdef DEBUG_VERBOSE
-                                       fprintf(stderr, "clipping rice_parameter (%u -> %u) @4\n", max_rice_parameter, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER - 1);
-#endif
-                                       max_rice_parameter = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER - 1;
-                               }
-                       }
-                       else
-                               min_rice_parameter = max_rice_parameter = rice_parameter;
-
-                       for(rice_parameter = min_rice_parameter; rice_parameter <= max_rice_parameter; rice_parameter++) {
-#endif
-#ifdef EXACT_RICE_BITS_CALCULATION
-                               partition_bits = count_rice_bits_(rice_parameter, partition_samples, residual+residual_sample);
-#else
-                               partition_bits = count_rice_bits_(rice_parameter, partition_samples, abs_residual+residual_sample);
-#endif
-#ifdef ENABLE_RICE_PARAMETER_SEARCH
-                               if(partition_bits < best_partition_bits) {
-                                       best_rice_parameter = rice_parameter;
-                                       best_partition_bits = partition_bits;
-                               }
-                       }
-                       parameters[partition] = best_rice_parameter;
-                       bits_ += best_partition_bits;
-#else
-                               parameters[partition] = rice_parameter;
-                               bits_ += partition_bits;
-#endif
-                       residual_sample += partition_samples;
-               }
-       }
-
-       *bits = bits_;
-       return true;
-}
-
-FLAC__bool set_partitioned_rice_with_precompute_(
-#ifdef EXACT_RICE_BITS_CALCULATION
-       const FLAC__int32 residual[],
-#else
-       const FLAC__uint32 abs_residual[],
-#endif
        const FLAC__uint64 abs_residual_partition_sums[],
        const unsigned raw_bits_per_partition[],
        const unsigned residual_samples,
@@ -4370,9 +4147,9 @@ FLAC__bool set_partitioned_rice_with_precompute_(
                        rice_parameter = suggested_rice_parameter;
 #endif
 #ifdef EXACT_RICE_BITS_CALCULATION
-                       partition_bits = count_rice_bits_(rice_parameter, residual_samples, residual);
+                       partition_bits = count_rice_bits_in_partition_(rice_parameter, residual_samples, residual);
 #else
-                       partition_bits = count_rice_bits_(rice_parameter, residual_samples, abs_residual);
+                       partition_bits = count_rice_bits_in_partition_(rice_parameter, residual_samples, abs_residual_partition_sums[0]);
 #endif
                        if(partition_bits < best_partition_bits) {
                                best_rice_parameter = rice_parameter;
@@ -4444,9 +4221,9 @@ FLAC__bool set_partitioned_rice_with_precompute_(
                        for(rice_parameter = min_rice_parameter; rice_parameter <= max_rice_parameter; rice_parameter++) {
 #endif
 #ifdef EXACT_RICE_BITS_CALCULATION
-                               partition_bits = count_rice_bits_(rice_parameter, partition_samples, residual+residual_sample);
+                               partition_bits = count_rice_bits_in_partition_(rice_parameter, partition_samples, residual+residual_sample);
 #else
-                               partition_bits = count_rice_bits_(rice_parameter, partition_samples, abs_residual+residual_sample);
+                               partition_bits = count_rice_bits_in_partition_(rice_parameter, partition_samples, abs_residual_partition_sums[partition]);
 #endif
                                if(partition_bits < best_partition_bits) {
                                        best_rice_parameter = rice_parameter;