libFLAC : Remove FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap().
authorErik de Castro Lopo <erikd@mega-nerd.com>
Sun, 6 Jul 2014 10:27:28 +0000 (20:27 +1000)
committerErik de Castro Lopo <erikd@mega-nerd.com>
Sun, 6 Jul 2014 10:27:31 +0000 (20:27 +1000)
This function was un-used because it showed no speed improvement over the
C version. As a result the bitreader_read_from_client_() function can be
made static again.

Patch-from: lvqcl <lvqcl.mail@gmail.com>

src/libFLAC/Makefile.lite
src/libFLAC/bitreader.c
src/libFLAC/ia32/Makefile.am
src/libFLAC/ia32/bitreader_asm.nasm [deleted file]
src/libFLAC/include/private/bitreader.h
src/libFLAC/libFLAC_dynamic.vcproj
src/libFLAC/libFLAC_static.vcproj
src/libFLAC/stream_decoder.c

index a57b0bb..cbffc6f 100644 (file)
@@ -68,7 +68,6 @@ SRCS_S = \
 else
 ifeq ($(PROC),i386)
 SRCS_NASM = \
-       ia32/bitreader_asm.nasm \
        ia32/cpu_asm.nasm \
        ia32/fixed_asm.nasm \
        ia32/lpc_asm.nasm
index b7f8b17..d25d0b2 100644 (file)
@@ -120,8 +120,7 @@ static inline void crc16_update_word_(FLAC__BitReader *br, uint32_t word)
        br->crc16_align = 0;
 }
 
-/* would be static except it needs to be called by asm routines */
-FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br)
+static FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br)
 {
        unsigned start, end;
        size_t bytes;
index aca4561..f8d0692 100644 (file)
@@ -38,7 +38,6 @@ AM_CPPFLAGS = -I$(top_builddir) -I$(srcdir)/include -I$(top_srcdir)/include
 
 noinst_LTLIBRARIES = libFLAC-asm.la
 libFLAC_asm_la_SOURCES = \
-       bitreader_asm.nasm \
        cpu_asm.nasm \
        fixed_asm.nasm \
        lpc_asm.nasm \
diff --git a/src/libFLAC/ia32/bitreader_asm.nasm b/src/libFLAC/ia32/bitreader_asm.nasm
deleted file mode 100644 (file)
index 7db9152..0000000
+++ /dev/null
@@ -1,593 +0,0 @@
-;  vim:filetype=nasm ts=8
-
-;  libFLAC - Free Lossless Audio Codec library
-;  Copyright (C) 2001-2009  Josh Coalson
-;  Copyright (C) 2011-2013  Xiph.Org Foundation
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;
-;  - Redistributions of source code must retain the above copyright
-;  notice, this list of conditions and the following disclaimer.
-;
-;  - Redistributions in binary form must reproduce the above copyright
-;  notice, this list of conditions and the following disclaimer in the
-;  documentation and/or other materials provided with the distribution.
-;
-;  - Neither the name of the Xiph.org Foundation nor the names of its
-;  contributors may be used to endorse or promote products derived from
-;  this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
-;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%include "nasm.h"
-
-       data_section
-
-cextern FLAC__crc16_table              ; unsigned FLAC__crc16_table[256];
-cextern bitreader_read_from_client_    ; FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br);
-
-cglobal FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
-
-       code_section
-
-
-; **********************************************************************
-;
-; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
-;
-; Some details like assertions and other checking is performed by the caller.
-       ALIGN 16
-cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
-
-       ;ASSERT(0 != br);
-       ;ASSERT(0 != br->buffer);
-       ; WATCHOUT: code only works if sizeof(brword)==32; we can make things much faster with this assertion
-       ;ASSERT(FLAC__BITS_PER_WORD == 32);
-       ;ASSERT(parameter < 32);
-       ; the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it
-
-       ;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time
-       ;; [esp + 16]   unsigned parameter
-       ;; [esp + 12]   unsigned nvals
-       ;; [esp + 8]    int vals[]
-       ;; [esp + 4]    FLAC__BitReader *br
-       mov     eax, [esp + 12]         ; if(nvals == 0)
-       test    eax, eax
-       ja      .nvals_gt_0
-       mov     eax, 1                  ;   return true;
-       ret
-
-.nvals_gt_0:
-       push    ebp
-       push    ebx
-       push    esi
-       push    edi
-       sub     esp, 4
-       ;; [esp + 36]   unsigned parameter
-       ;; [esp + 32]   unsigned nvals
-       ;; [esp + 28]   int vals[]
-       ;; [esp + 24]   FLAC__BitReader *br
-       ;; [esp]        ucbits
-       mov     ebp, [esp + 24]         ; ebp <- br == br->buffer
-       mov     esi, [ebp + 16]         ; esi <- br->consumed_words (aka 'cwords' in the C version)
-       mov     ecx, [ebp + 20]         ; ecx <- br->consumed_bits  (aka 'cbits'  in the C version)
-       xor     edi, edi                ; edi <- 0  'uval'
-       ;; ecx          cbits
-       ;; esi          cwords
-       ;; edi          uval
-       ;; ebp          br
-       ;; [ebp]        br->buffer
-       ;; [ebp + 8]    br->words
-       ;; [ebp + 12]   br->bytes
-       ;; [ebp + 16]   br->consumed_words
-       ;; [ebp + 20]   br->consumed_bits
-       ;; [ebp + 24]   br->read_crc
-       ;; [ebp + 28]   br->crc16_align
-
-                                       ; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
-       mov     eax, [ebp + 8]          ;   eax <- br->words
-       sub     eax, esi                ;   eax <- br->words-cwords
-       shl     eax, 2                  ;   eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD
-       add     eax, [ebp + 12]         ;   eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
-       shl     eax, 3                  ;   eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
-       sub     eax, ecx                ;   eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
-       mov     [esp], eax              ;   ucbits <- eax
-
-       ALIGN 16
-.val_loop:                             ; while(1) {
-
-       ;
-       ; read unary part
-       ;
-.unary_loop:                           ;   while(1) {
-       ;; ecx          cbits
-       ;; esi          cwords
-       ;; edi          uval
-       ;; ebp          br
-       cmp     esi, [ebp + 8]          ;     while(cwords < br->words)   /* if we've not consumed up to a partial tail word... */
-       jae     near .c1_next1
-.c1_loop:                              ;     {
-       mov     ebx, [ebp]
-       mov     eax, [ebx + 4*esi]      ;       b = br->buffer[cwords]
-       mov     edx, eax                ;       edx = br->buffer[cwords] (saved for later use)
-       shl     eax, cl                 ;       b = br->buffer[cwords] << cbits
-       test    eax, eax                ;         (still have to test since cbits may be 0, thus ZF not updated for shl eax,0)
-       jz      near .c1_next2          ;       if(b) {
-       bsr     ebx, eax
-       not     ebx
-       and     ebx, 31                 ;         ebx = 'i' = # of leading 0 bits in 'b' (eax)
-       add     ecx, ebx                ;         cbits += i;
-       add     edi, ebx                ;         uval += i;
-       add     ecx, byte 1             ;         cbits++; /* skip over stop bit */
-       test    ecx, ~31
-       jz      near .break1            ;         if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */
-                                       ;           crc16_update_word_(br, br->buffer[cwords]);
-       push    edi                     ;               [need more registers]
-       bswap   edx                     ;               edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
-       mov     ecx, [ebp + 28]         ;               ecx <- br->crc16_align
-       mov     eax, [ebp + 24]         ;               ax <- br->read_crc (a.k.a. crc)
-%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
-       mov     edi, _FLAC__crc16_table
-%else
-%ifdef OBJ_FORMAT_elf
-       mov     edi, [esp + 16]         ;               saved ebx (GOT base)
-       lea     edi, [edi + FLAC__crc16_table wrt ..gotoff]
-%else
-       mov     edi, FLAC__crc16_table
-%endif
-%endif
-       ;; eax (ax)     crc a.k.a. br->read_crc
-       ;; ebx (bl)     intermediate result index into FLAC__crc16_table[]
-       ;; ecx          br->crc16_align
-       ;; edx          byteswapped brword to CRC
-       ;; esi          cwords
-       ;; edi          unsigned FLAC__crc16_table[]
-       ;; ebp          br
-       test    ecx, ecx                ;               switch(br->crc16_align) ...
-       jnz     .c0b4                   ;               [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
-.c0b0: xor     dl, ah                  ;               dl <- (crc>>8)^(word>>24)
-       movzx   ebx, dl
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
-.c0b1: xor     dh, ah                  ;               dh <- (crc>>8)^((word>>16)&0xff))
-       movzx   ebx, dh
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
-       shr     edx, 16
-.c0b2: xor     dl, ah                  ;               dl <- (crc>>8)^((word>>8)&0xff))
-       movzx   ebx, dl
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
-.c0b3: xor     dh, ah                  ;               dh <- (crc>>8)^(word&0xff)
-       movzx   ebx, dh
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
-       movzx   eax, ax
-       mov     [ebp + 24], eax         ;               br->read_crc <- crc
-       pop     edi
-
-       add     esi, byte 1             ;           cwords++;
-       xor     ecx, ecx                ;           cbits = 0;
-                                       ;         }
-       jmp     near .break1            ;         goto break1;
-       ;; this section relocated out of the way for performance
-.c0b4:
-       mov     [ebp + 28], dword 0     ;               br->crc16_align <- 0
-       cmp     ecx, 8
-       je      .c0b1
-       shr     edx, 16
-       cmp     ecx, 16
-       je      .c0b2
-       jmp     .c0b3
-
-       ;; this section relocated out of the way for performance
-.c1b4:
-       mov     [ebp + 28], dword 0     ;               br->crc16_align <- 0
-       cmp     ecx, 8
-       je      .c1b1
-       shr     edx, 16
-       cmp     ecx, 16
-       je      .c1b2
-       jmp     .c1b3
-
-.c1_next2:                             ;       } else {
-       ;; ecx          cbits
-       ;; edx          current brword 'b'
-       ;; esi          cwords
-       ;; edi          uval
-       ;; ebp          br
-       add     edi, 32
-       sub     edi, ecx                ;         uval += FLAC__BITS_PER_WORD - cbits;
-                                       ;         crc16_update_word_(br, br->buffer[cwords]);
-       push    edi                     ;               [need more registers]
-       bswap   edx                     ;               edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
-       mov     ecx, [ebp + 28]         ;               ecx <- br->crc16_align
-       mov     eax, [ebp + 24]         ;               ax <- br->read_crc (a.k.a. crc)
-%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
-       mov     edi, _FLAC__crc16_table
-%else
-%ifdef OBJ_FORMAT_elf
-       mov     edi, [esp + 16]         ;               saved ebx (GOT base)
-       lea     edi, [edi + FLAC__crc16_table wrt ..gotoff]
-%else
-       mov     edi, FLAC__crc16_table
-%endif
-%endif
-       ;; eax (ax)     crc a.k.a. br->read_crc
-       ;; ebx (bl)     intermediate result index into FLAC__crc16_table[]
-       ;; ecx          br->crc16_align
-       ;; edx          byteswapped brword to CRC
-       ;; esi          cwords
-       ;; edi          unsigned FLAC__crc16_table[]
-       ;; ebp          br
-       test    ecx, ecx                ;               switch(br->crc16_align) ...
-       jnz     .c1b4                   ;               [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
-.c1b0: xor     dl, ah                  ;               dl <- (crc>>8)^(word>>24)
-       movzx   ebx, dl
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
-.c1b1: xor     dh, ah                  ;               dh <- (crc>>8)^((word>>16)&0xff))
-       movzx   ebx, dh
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
-       shr     edx, 16
-.c1b2: xor     dl, ah                  ;               dl <- (crc>>8)^((word>>8)&0xff))
-       movzx   ebx, dl
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
-.c1b3: xor     dh, ah                  ;               dh <- (crc>>8)^(word&0xff)
-       movzx   ebx, dh
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
-       movzx   eax, ax
-       mov     [ebp + 24], eax         ;               br->read_crc <- crc
-       pop     edi
-
-       add     esi, byte 1             ;         cwords++;
-       xor     ecx, ecx                ;         cbits = 0;
-                                       ;         /* didn't find stop bit yet, have to keep going... */
-                                       ;       }
-
-       cmp     esi, [ebp + 8]          ;     } while(cwords < br->words)   /* if we've not consumed up to a partial tail word... */
-       jb      near .c1_loop
-
-.c1_next1:
-       ; at this point we've eaten up all the whole words; have to try
-       ; reading through any tail bytes before calling the read callback.
-       ; this is a repeat of the above logic adjusted for the fact we
-       ; don't have a whole word.  note though if the client is feeding
-       ; us data a byte at a time (unlikely), br->consumed_bits may not
-       ; be zero.
-       ;; ecx          cbits
-       ;; esi          cwords
-       ;; edi          uval
-       ;; ebp          br
-       mov     edx, [ebp + 12]         ;     edx <- br->bytes
-       shl     edx, 3                  ;     edx <- br->bytes*8
-       cmp     edx, ecx
-       jbe     .read1                  ;     if(br->bytes*8 > cbits) {  [NOTE: this case is rare so it doesn't have to be all that fast ]
-       mov     ebx, [ebp]
-                                       ;       edx <- const unsigned end = br->bytes * 8;
-       mov     eax, [ebx + 4*esi]      ;       b = br->buffer[cwords]
-       xchg    edx, ecx                ;       [edx <- cbits , ecx <- end]
-       mov     ebx, 0xffffffff         ;       ebx <- FLAC__WORD_ALL_ONES
-       shr     ebx, cl                 ;       ebx <- FLAC__WORD_ALL_ONES >> end
-       not     ebx                     ;       ebx <- ~(FLAC__WORD_ALL_ONES >> end)
-       xchg    edx, ecx                ;       [edx <- end , ecx <- cbits]
-       and     eax, ebx                ;       b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end));
-       shl     eax, cl                 ;       b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits;
-       test    eax, eax                ;         (still have to test since cbits may be 0, thus ZF not updated for shl eax,0)
-       jz      .c1_next3               ;       if(b) {
-       bsr     ebx, eax
-       not     ebx
-       and     ebx, 31                 ;         ebx = 'i' = # of leading 0 bits in 'b' (eax)
-       add     ecx, ebx                ;         cbits += i;
-       add     edi, ebx                ;         uval += i;
-       add     ecx, byte 1             ;         cbits++; /* skip over stop bit */
-       jmp     short .break1           ;         goto break1;
-.c1_next3:                             ;       } else {
-       sub     edi, ecx
-       add     edi, edx                ;         uval += end - cbits;
-       mov     ecx, edx                ;         cbits = end
-                                       ;         /* didn't find stop bit yet, have to keep going... */
-                                       ;       }
-                                       ;     }
-.read1:
-       ; flush registers and read; bitreader_read_from_client_() does
-       ; not touch br->consumed_bits at all but we still need to set
-       ; it in case it fails and we have to return false.
-       ;; ecx          cbits
-       ;; esi          cwords
-       ;; edi          uval
-       ;; ebp          br
-       mov     [ebp + 16], esi         ;     br->consumed_words = cwords;
-       mov     [ebp + 20], ecx         ;     br->consumed_bits = cbits;
-       push    ecx                     ;     /* save */
-       push    ebp                     ;     /* push br argument */
-%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
-       call    _bitreader_read_from_client_
-%else
-%ifdef OBJ_FORMAT_elf
-       mov     ebx, [esp + 20]         ;               saved ebx (GOT base)
-       call    bitreader_read_from_client_ wrt ..plt
-%else
-       call    bitreader_read_from_client_
-%endif
-%endif
-       pop     edx                     ;     /* discard, unused */
-       pop     ecx                     ;     /* restore */
-       mov     esi, [ebp + 16]         ;     cwords = br->consumed_words;
-                                       ;     ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
-       mov     ebx, [ebp + 8]          ;       ebx <- br->words
-       sub     ebx, esi                ;       ebx <- br->words-cwords
-       shl     ebx, 2                  ;       ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD
-       add     ebx, [ebp + 12]         ;       ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
-       shl     ebx, 3                  ;       ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
-       sub     ebx, ecx                ;       ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
-       add     ebx, edi                ;       ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits + uval
-                                       ;           + uval to offset our count by the # of unary bits already
-                                       ;           consumed before the read, because we will add these back
-                                       ;           in all at once at break1
-       mov     [esp], ebx              ;       ucbits <- ebx
-       test    eax, eax                ;     if(!bitreader_read_from_client_(br))
-       jnz     near .unary_loop
-       jmp     .end                    ;       return false; /* eax (the return value) is already 0 */
-                                       ;   } /* end while(1) unary part */
-
-       ALIGN 16
-.break1:
-       ;; ecx          cbits
-       ;; esi          cwords
-       ;; edi          uval
-       ;; ebp          br
-       ;; [esp]        ucbits
-       sub     [esp], edi              ;   ucbits -= uval;
-       sub     dword [esp], byte 1     ;   ucbits--; /* account for stop bit */
-
-       ;
-       ; read binary part
-       ;
-       mov     ebx, [esp + 36]         ;   ebx <- parameter
-       test    ebx, ebx                ;   if(parameter) {
-       jz      near .break2
-.read2:
-       cmp     [esp], ebx              ;     while(ucbits < parameter) {
-       jae     .c2_next1
-       ; flush registers and read; bitreader_read_from_client_() does
-       ; not touch br->consumed_bits at all but we still need to set
-       ; it in case it fails and we have to return false.
-       mov     [ebp + 16], esi         ;       br->consumed_words = cwords;
-       mov     [ebp + 20], ecx         ;       br->consumed_bits = cbits;
-       push    ecx                     ;       /* save */
-       push    ebx                     ;       /* save */
-       push    ebp                     ;       /* push br argument */
-%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
-       call    _bitreader_read_from_client_
-%else
-%ifdef OBJ_FORMAT_elf
-       mov     ebx, [esp + 24]         ;               saved ebx (GOT base)
-       call    bitreader_read_from_client_ wrt ..plt
-%else
-       call    bitreader_read_from_client_
-%endif
-%endif
-       pop     edx                     ;       /* discard, unused */
-       pop     ebx                     ;       /* restore */
-       pop     ecx                     ;       /* restore */
-       mov     esi, [ebp + 16]         ;       cwords = br->consumed_words;
-                                       ;       ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
-       mov     edx, [ebp + 8]          ;         edx <- br->words
-       sub     edx, esi                ;         edx <- br->words-cwords
-       shl     edx, 2                  ;         edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD
-       add     edx, [ebp + 12]         ;         edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
-       shl     edx, 3                  ;         edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
-       sub     edx, ecx                ;         edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
-       mov     [esp], edx              ;         ucbits <- edx
-       test    eax, eax                ;       if(!bitreader_read_from_client_(br))
-       jnz     .read2
-       jmp     .end                    ;         return false; /* eax (the return value) is already 0 */
-                                       ;     }
-.c2_next1:
-       ;; ebx          parameter
-       ;; ecx          cbits
-       ;; esi          cwords
-       ;; edi          uval
-       ;; ebp          br
-       ;; [esp]        ucbits
-       cmp     esi, [ebp + 8]          ;     if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
-       jae     near .c2_next2
-       test    ecx, ecx                ;       if(cbits) {
-       jz      near .c2_next3          ;         /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
-       mov     eax, 32
-       mov     edx, [ebp]
-       sub     eax, ecx                ;         const unsigned n = FLAC__BITS_PER_WORD - cbits;
-       mov     edx, [edx + 4*esi]      ;         const brword word = br->buffer[cwords];
-       cmp     ebx, eax                ;         if(parameter < n) {
-       jae     .c2_next4
-                                       ;           uval <<= parameter;
-                                       ;           uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-parameter);
-       shl     edx, cl
-       xchg    ebx, ecx
-       shld    edi, edx, cl
-       add     ebx, ecx                ;           cbits += parameter;
-       xchg    ebx, ecx                ;           ebx <- parameter, ecx <- cbits
-       jmp     .break2                 ;           goto break2;
-                                       ;         }
-.c2_next4:
-                                       ;         uval <<= n;
-                                       ;         uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
-%if 1
-       rol     edx, cl                 ;            @@@@@@OPT: may be faster to use rol to save edx so we can restore it for CRC'ing
-                                       ;            @@@@@@OPT: or put parameter in ch instead and free up ebx completely again
-%else
-       shl     edx, cl
-%endif
-       xchg    eax, ecx
-       shld    edi, edx, cl
-       xchg    eax, ecx
-%if 1
-       ror     edx, cl                 ;            restored.
-%else
-       mov     edx, [ebp]
-       mov     edx, [edx + 4*esi]
-%endif
-                                       ;         crc16_update_word_(br, br->buffer[cwords]);
-       push    edi                     ;               [need more registers]
-       push    ebx                     ;               [need more registers]
-       push    eax                     ;               [need more registers]
-       bswap   edx                     ;               edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
-       mov     ecx, [ebp + 28]         ;               ecx <- br->crc16_align
-       mov     eax, [ebp + 24]         ;               ax <- br->read_crc (a.k.a. crc)
-%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
-       mov     edi, _FLAC__crc16_table
-%else
-%ifdef OBJ_FORMAT_elf
-       mov     edi, [esp + 24]         ;               saved ebx (GOT base)
-       lea     edi, [edi + FLAC__crc16_table wrt ..gotoff]
-%else
-       mov     edi, FLAC__crc16_table
-%endif
-%endif
-       ;; eax (ax)     crc a.k.a. br->read_crc
-       ;; ebx (bl)     intermediate result index into FLAC__crc16_table[]
-       ;; ecx          br->crc16_align
-       ;; edx          byteswapped brword to CRC
-       ;; esi          cwords
-       ;; edi          unsigned FLAC__crc16_table[]
-       ;; ebp          br
-       test    ecx, ecx                ;               switch(br->crc16_align) ...
-       jnz     .c2b4                   ;               [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
-.c2b0: xor     dl, ah                  ;               dl <- (crc>>8)^(word>>24)
-       movzx   ebx, dl
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
-.c2b1: xor     dh, ah                  ;               dh <- (crc>>8)^((word>>16)&0xff))
-       movzx   ebx, dh
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
-       shr     edx, 16
-.c2b2: xor     dl, ah                  ;               dl <- (crc>>8)^((word>>8)&0xff))
-       movzx   ebx, dl
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
-.c2b3: xor     dh, ah                  ;               dh <- (crc>>8)^(word&0xff)
-       movzx   ebx, dh
-       mov     ecx, [ebx*4 + edi]      ;               cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
-       shl     eax, 8                  ;               ax <- (crc<<8)
-       xor     eax, ecx                ;               crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
-       movzx   eax, ax
-       mov     [ebp + 24], eax         ;               br->read_crc <- crc
-       pop     eax
-       pop     ebx
-       pop     edi
-       add     esi, byte 1             ;         cwords++;
-       mov     ecx, ebx
-       sub     ecx, eax                ;         cbits = parameter - n;
-       jz      .break2                 ;         if(cbits) { /* parameter > n, i.e. if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
-                                       ;           uval <<= cbits;
-                                       ;           uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
-       mov     eax, [ebp]
-       mov     eax, [eax + 4*esi]
-       shld    edi, eax, cl
-                                       ;         }
-       jmp     .break2                 ;         goto break2;
-
-       ;; this section relocated out of the way for performance
-.c2b4:
-       mov     [ebp + 28], dword 0     ;               br->crc16_align <- 0
-       cmp     ecx, 8
-       je      .c2b1
-       shr     edx, 16
-       cmp     ecx, 16
-       je      .c2b2
-       jmp     .c2b3
-
-.c2_next3:                             ;       } else {
-       mov     ecx, ebx                ;         cbits = parameter;
-                                       ;         uval <<= cbits;
-                                       ;         uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
-       mov     eax, [ebp]
-       mov     eax, [eax + 4*esi]
-       shld    edi, eax, cl
-       jmp     .break2                 ;         goto break2;
-                                       ;       }
-.c2_next2:                             ;     } else {
-       ; in this case we're starting our read at a partial tail word;
-       ; the reader has guaranteed that we have at least 'parameter'
-       ; bits available to read, which makes this case simpler.
-                                       ;       uval <<= parameter;
-                                       ;       if(cbits) {
-                                       ;         /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
-                                       ;         uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-parameter);
-                                       ;         cbits += parameter;
-                                       ;         goto break2;
-                                       ;       } else {
-                                       ;         cbits = parameter;
-                                       ;         uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
-                                       ;         goto break2;
-                                       ;       }
-                                       ;       the above is much shorter in assembly:
-       mov     eax, [ebp]
-       mov     eax, [eax + 4*esi]      ;       eax <- br->buffer[cwords]
-       shl     eax, cl                 ;       eax <- br->buffer[cwords] << cbits
-       add     ecx, ebx                ;       cbits += parameter
-       xchg    ebx, ecx                ;       ebx <- cbits, ecx <- parameter
-       shld    edi, eax, cl            ;       uval <<= parameter <<< 'parameter' bits of tail word
-       xchg    ebx, ecx                ;       ebx <- parameter, ecx <- cbits
-                                       ;     }
-                                       ;   }
-.break2:
-       sub     [esp], ebx              ;   ucbits -= parameter;
-
-       ;
-       ; compose the value
-       ;
-       mov     ebx, [esp + 28]         ;   ebx <- vals
-       mov     edx, edi                ;   edx <- uval
-       and     edi, 1                  ;   edi <- uval & 1
-       shr     edx, 1                  ;   edx <- uval >> 1
-       neg     edi                     ;   edi <- -(int)(uval & 1)
-       xor     edx, edi                ;   edx <- (uval >> 1 ^ -(int)(uval & 1))
-       mov     [ebx], edx              ;   *vals <- edx
-       sub     dword [esp + 32], byte 1        ;   --nvals;
-       jz      .finished               ;   if(nvals == 0) /* jump to finish */
-       xor     edi, edi                ;   uval = 0;
-       add     dword [esp + 28], 4     ;   ++vals
-       jmp     .val_loop               ; }
-
-.finished:
-       mov     [ebp + 16], esi         ; br->consumed_words = cwords;
-       mov     [ebp + 20], ecx         ; br->consumed_bits = cbits;
-       mov     eax, 1
-.end:
-       add     esp, 4
-       pop     edi
-       pop     esi
-       pop     ebx
-       pop     ebp
-       ret
-
-; end
index 3bd1121..c43c335 100644 (file)
@@ -82,19 +82,10 @@ FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, F
 FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *val);
 FLAC__bool FLAC__bitreader_read_rice_signed(FLAC__BitReader *br, int *val, unsigned parameter);
 FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter);
-#ifndef FLAC__NO_ASM
-#  ifdef FLAC__CPU_IA32
-#    ifdef FLAC__HAS_NASM
-FLAC__bool FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter);
-#    endif
-#  endif
-#endif
 #if 0 /* UNUSED */
 FLAC__bool FLAC__bitreader_read_golomb_signed(FLAC__BitReader *br, int *val, unsigned parameter);
 FLAC__bool FLAC__bitreader_read_golomb_unsigned(FLAC__BitReader *br, unsigned *val, unsigned parameter);
 #endif
 FLAC__bool FLAC__bitreader_read_utf8_uint32(FLAC__BitReader *br, FLAC__uint32 *val, FLAC__byte *raw, unsigned *rawlen);
 FLAC__bool FLAC__bitreader_read_utf8_uint64(FLAC__BitReader *br, FLAC__uint64 *val, FLAC__byte *raw, unsigned *rawlen);
-
-FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br);
 #endif
index 0d4c4c4..b18cc99 100644 (file)
                        </File>\r
                </Filter>\r
                <File\r
-                       RelativePath=".\ia32\bitreader_asm.nasm"\r
-                       >\r
-                       <FileConfiguration\r
-                               Name="Debug|Win32"\r
-                               >\r
-                               <Tool\r
-                                       Name="VCCustomBuildTool"\r
-                                       CommandLine="nasm.exe -f win32 -d OBJ_FORMAT_win32 -i ia32/ ia32/bitreader_asm.nasm -o ia32/bitreader_asm.obj&#x0D;&#x0A;"\r
-                                       AdditionalDependencies="ia32/bitreader_asm.nasm;ia32/nasm.h"\r
-                                       Outputs="ia32/bitreader_asm.obj"\r
-                               />\r
-                       </FileConfiguration>\r
-                       <FileConfiguration\r
-                               Name="Release|Win32"\r
-                               >\r
-                               <Tool\r
-                                       Name="VCCustomBuildTool"\r
-                                       CommandLine="nasm.exe -f win32 -d OBJ_FORMAT_win32 -i ia32/ ia32/bitreader_asm.nasm -o ia32/bitreader_asm.obj&#x0D;&#x0A;"\r
-                                       AdditionalDependencies="ia32/bitreader_asm.nasm;ia32/nasm.h"\r
-                                       Outputs="ia32/bitreader_asm.obj"\r
-                               />\r
-                       </FileConfiguration>\r
-               </File>\r
-               <File\r
                        RelativePath=".\ia32\cpu_asm.nasm"\r
                        >\r
                        <FileConfiguration\r
index d6ad96f..ab69777 100644 (file)
                        </File>\r
                </Filter>\r
                <File\r
-                       RelativePath=".\ia32\bitreader_asm.nasm"\r
-                       >\r
-                       <FileConfiguration\r
-                               Name="Debug|Win32"\r
-                               >\r
-                               <Tool\r
-                                       Name="VCCustomBuildTool"\r
-                                       CommandLine="nasm.exe -f win32 -d OBJ_FORMAT_win32 -i ia32/ ia32/bitreader_asm.nasm -o ia32/bitreader_asm.obj&#x0D;&#x0A;"\r
-                                       AdditionalDependencies="ia32/bitreader_asm.nasm;ia32/nasm.h"\r
-                                       Outputs="ia32/bitreader_asm.obj"\r
-                               />\r
-                       </FileConfiguration>\r
-                       <FileConfiguration\r
-                               Name="Release|Win32"\r
-                               >\r
-                               <Tool\r
-                                       Name="VCCustomBuildTool"\r
-                                       CommandLine="nasm.exe -f win32 -d OBJ_FORMAT_win32 -i ia32/ ia32/bitreader_asm.nasm -o ia32/bitreader_asm.obj&#x0D;&#x0A;"\r
-                                       AdditionalDependencies="ia32/bitreader_asm.nasm;ia32/nasm.h"\r
-                                       Outputs="ia32/bitreader_asm.obj"\r
-                               />\r
-                       </FileConfiguration>\r
-               </File>\r
-               <File\r
                        RelativePath=".\ia32\cpu_asm.nasm"\r
                        >\r
                        <FileConfiguration\r
index 4ebb335..05deac0 100644 (file)
@@ -144,7 +144,6 @@ typedef struct FLAC__StreamDecoderPrivate {
        void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
        /* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit), AND order <= 8: */
        void (*local_lpc_restore_signal_16bit_order8)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-       FLAC__bool (*local_bitreader_read_rice_signed_block)(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter);
        void *client_data;
        FILE *file; /* only used if FLAC__stream_decoder_init_file()/FLAC__stream_decoder_init_file() called, else NULL */
        FLAC__BitReader *input;
@@ -393,17 +392,12 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
        decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide;
        decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal;
        decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal;
-       decoder->private_->local_bitreader_read_rice_signed_block = FLAC__bitreader_read_rice_signed_block;
        /* now override with asm where appropriate */
 #ifndef FLAC__NO_ASM
        if(decoder->private_->cpuinfo.use_asm) {
 #ifdef FLAC__CPU_IA32
                FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
 #ifdef FLAC__HAS_NASM
-#if 0 /*@@@@@@ OPT: seems to be slower than FLAC__bitreader_read_rice_signed_block */
-               if(decoder->private_->cpuinfo.ia32.bswap)
-                       decoder->private_->local_bitreader_read_rice_signed_block = FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap;
-#endif
                decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */
                if(decoder->private_->cpuinfo.ia32.mmx) {
                        decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
@@ -2747,7 +2741,7 @@ FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigne
                if(rice_parameter < pesc) {
                        partitioned_rice_contents->raw_bits[partition] = 0;
                        u = (partition_order == 0 || partition > 0)? partition_samples : partition_samples - predictor_order;
-                       if(!decoder->private_->local_bitreader_read_rice_signed_block(decoder->private_->input, residual + sample, u, rice_parameter))
+                       if(!FLAC__bitreader_read_rice_signed_block(decoder->private_->input, residual + sample, u, rice_parameter))
                                return false; /* read_callback_ sets the state for us */
                        sample += u;
                }