[04/10] dcadec: Use int32_to_float_fmul_scalar_array

Message ID 1373980994-30628-4-git-send-email-martin@martin.st
State Superseded
Headers show

Commit Message

Martin Storsjö July 16, 2013, 1:23 p.m.
From: Ben Avison <bavison@riscosopen.org>

---
 libavcodec/dcadec.c |   23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

Comments

Luca Barbato July 16, 2013, 3:18 p.m. | #1
On 07/16/2013 03:23 PM, Martin Storsjö wrote:
> From: Ben Avison <bavison@riscosopen.org>
> 
> ---
>  libavcodec/dcadec.c |   23 +++++++++++++++--------
>  1 file changed, 15 insertions(+), 8 deletions(-)
> 

I assume there is already a check for the buffer boundaries and it is
granted that it is padded to be a multiple of 8....
Ben Avison July 16, 2013, 4:24 p.m. | #2
On Tue, 16 Jul 2013 16:18:17 +0100, Luca Barbato <lu_zero@gentoo.org> wrote:
> I assume there is already a check for the buffer boundaries and it is
> granted that it is padded to be a multiple of 8....

Well, (**subband_samples) and block are forced to be multiples of 8 words
by the way they are defined.

It might make it clearer if I explain that there was originally a loop
 from 0 to s->vq_start_subband[k], using l as an iterator variable. What I
did was to identify that there were three stages within that loop that
could be performed sequentially without reference to other iterations of
the loop, meaning that it could be split into 3 loops. Then the actual
iteration of the middle stage (which was just calling
int32_to_float_fmul_scalar) could be moved into a separate platform-
specific function, enabling better pipelining of data loads and stores.

Yes, there is an implicit assumption that
   s->vq_start_subband[k] <= DCA_SUBBANDS

The same assumption existed in the code I started from. But I would refer
you to lines ~633/634 of dcadec.c, which say

          if (s->vq_start_subband[i] > DCA_SUBBANDS)
              s->vq_start_subband[i] = DCA_SUBBANDS;

which looks like it enforces this to me.

Ben

Patch

diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index 9b00d30..84d522d 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -1140,7 +1140,7 @@  static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
 
     /* FIXME */
     float (*subband_samples)[DCA_SUBBANDS][8] = s->subband_samples[block_index];
-    LOCAL_ALIGNED_16(int, block, [8]);
+    LOCAL_ALIGNED_16(int, block, [8 * DCA_SUBBANDS]);
 
     /*
      * Audio data
@@ -1153,6 +1153,8 @@  static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
         quant_step_table = lossy_quant_d;
 
     for (k = base_channel; k < s->prim_channels; k++) {
+        float rscale[DCA_SUBBANDS];
+
         if (get_bits_left(&s->gb) < 0)
             return AVERROR_INVALIDDATA;
 
@@ -1175,11 +1177,12 @@  static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
              * Extract bits from the bit stream
              */
             if (!abits) {
-                memset(subband_samples[k][l], 0, 8 * sizeof(subband_samples[0][0][0]));
+                rscale[l] = 0;
+                memset(block + 8 * l, 0, 8 * sizeof(block[0]));
             } else {
                 /* Deal with transients */
                 int sfi = s->transition_mode[k][l] && subsubframe >= s->transition_mode[k][l];
-                float rscale = quant_step_size * s->scale_factor[k][l][sfi] *
+                rscale[l] = quant_step_size * s->scale_factor[k][l][sfi] *
                                s->scalefactor_adj[k][sel];
 
                 if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table) {
@@ -1193,7 +1196,7 @@  static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                         block_code1 = get_bits(&s->gb, size);
                         block_code2 = get_bits(&s->gb, size);
                         err = decode_blockcodes(block_code1, block_code2,
-                                                levels, block);
+                                                levels, block + 8 * l);
                         if (err) {
                             av_log(s->avctx, AV_LOG_ERROR,
                                    "ERROR: block code look-up failed\n");
@@ -1202,19 +1205,23 @@  static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                     } else {
                         /* no coding */
                         for (m = 0; m < 8; m++)
-                            block[m] = get_sbits(&s->gb, abits - 3);
+                            block[8 * l + m] = get_sbits(&s->gb, abits - 3);
                     }
                 } else {
                     /* Huffman coded */
                     for (m = 0; m < 8; m++)
-                        block[m] = get_bitalloc(&s->gb,
+                        block[8 * l + m] = get_bitalloc(&s->gb,
                                                 &dca_smpl_bitalloc[abits], sel);
                 }
 
-                s->fmt_conv.int32_to_float_fmul_scalar(subband_samples[k][l],
-                                                       block, rscale, 8);
             }
+        }
+
+        s->fmt_conv.int32_to_float_fmul_scalar_array(&s->fmt_conv, subband_samples[k][0],
+                                                     block, rscale, 8 * s->vq_start_subband[k]);
 
+        for (l = 0; l < s->vq_start_subband[k]; l++) {
+            int m;
             /*
              * Inverse ADPCM if in prediction mode
              */