[3/4] arm: vp9itxfm: Reorder iadst16 coeffs

Message ID 1486643636-7432-3-git-send-email-martin@martin.st
State Committed
Commit 08074c092d8c97d71c5986e5325e97ffc956119d
Headers show

Commit Message

Martin Storsjö Feb. 9, 2017, 12:33 p.m.
This matches the order they are in the 16 bpp version.

There they are in this order, to make sure we access them in the
same order they are declared, easing loading only half of the
coefficients at a time.

This makes the 8 bpp version match the 16 bpp version better.
---
 libavcodec/arm/vp9itxfm_neon.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

Comments

Janne Grunau Feb. 23, 2017, 9:05 p.m. | #1
On 2017-02-09 14:33:55 +0200, Martin Storsjö wrote:
> This matches the order they are in the 16 bpp version.
> 
> There they are in this order, to make sure we access them in the
> same order they are declared, easing loading only half of the
> coefficients at a time.
> 
> This makes the 8 bpp version match the 16 bpp version better.
> ---
>  libavcodec/arm/vp9itxfm_neon.S | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
> index f74d542..c8eeb76 100644
> --- a/libavcodec/arm/vp9itxfm_neon.S
> +++ b/libavcodec/arm/vp9itxfm_neon.S
> @@ -37,8 +37,8 @@ idct_coeffs:
>  endconst
>  
>  const iadst16_coeffs, align=4
> -        .short  16364, 804, 15893, 3981, 14811, 7005, 13160, 9760
> -        .short  11003, 12140, 8423, 14053, 5520, 15426, 2404, 16207
> +        .short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
> +        .short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
>  endconst
>  
>  @ Do four 4x4 transposes, using q registers for the subtransposes that don't
> @@ -672,19 +672,19 @@ function iadst16
>          vld1.16         {q0-q1}, [r12,:128]
>  
>          mbutterfly_l    q3,  q2,  d31, d16, d0[1], d0[0] @ q3  = t1,   q2  = t0
> -        mbutterfly_l    q5,  q4,  d23, d24, d2[1], d2[0] @ q5  = t9,   q4  = t8
> +        mbutterfly_l    q5,  q4,  d23, d24, d1[1], d1[0] @ q5  = t9,   q4  = t8
>          butterfly_n     d31, d24, q3,  q5,  q6,  q5      @ d31 = t1a,  d24 = t9a
>          mbutterfly_l    q7,  q6,  d29, d18, d0[3], d0[2] @ q7  = t3,   q6  = t2
>          butterfly_n     d16, d23, q2,  q4,  q3,  q4      @ d16 = t0a,  d23 = t8a
>  
> -        mbutterfly_l    q3,  q2,  d21, d26, d2[3], d2[2] @ q3  = t11,  q2  = t10
> +        mbutterfly_l    q3,  q2,  d21, d26, d1[3], d1[2] @ q3  = t11,  q2  = t10
>          butterfly_n     d29, d26, q7,  q3,  q4,  q3      @ d29 = t3a,  d26 = t11a
> -        mbutterfly_l    q5,  q4,  d27, d20, d1[1], d1[0] @ q5  = t5,   q4  = t4
> +        mbutterfly_l    q5,  q4,  d27, d20, d2[1], d2[0] @ q5  = t5,   q4  = t4
>          butterfly_n     d18, d21, q6,  q2,  q3,  q2      @ d18 = t2a,  d21 = t10a
>  
>          mbutterfly_l    q7,  q6,  d19, d28, d3[1], d3[0] @ q7  = t13,  q6  = t12
>          butterfly_n     d20, d28, q5,  q7,  q2,  q7      @ d20 = t5a,  d28 = t13a
> -        mbutterfly_l    q3,  q2,  d25, d22, d1[3], d1[2] @ q3  = t7,   q2  = t6
> +        mbutterfly_l    q3,  q2,  d25, d22, d2[3], d2[2] @ q3  = t7,   q2  = t6
>          butterfly_n     d27, d19, q4,  q6,  q5,  q6      @ d27 = t4a,  d19 = t12a
>  
>          mbutterfly_l    q5,  q4,  d17, d30, d3[3], d3[2] @ q5  = t15,  q4  = t14

ok

Janne

Patch

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index f74d542..c8eeb76 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -37,8 +37,8 @@  idct_coeffs:
 endconst
 
 const iadst16_coeffs, align=4
-        .short  16364, 804, 15893, 3981, 14811, 7005, 13160, 9760
-        .short  11003, 12140, 8423, 14053, 5520, 15426, 2404, 16207
+        .short  16364, 804, 15893, 3981, 11003, 12140, 8423, 14053
+        .short  14811, 7005, 13160, 9760, 5520, 15426, 2404, 16207
 endconst
 
 @ Do four 4x4 transposes, using q registers for the subtransposes that don't
@@ -672,19 +672,19 @@  function iadst16
         vld1.16         {q0-q1}, [r12,:128]
 
         mbutterfly_l    q3,  q2,  d31, d16, d0[1], d0[0] @ q3  = t1,   q2  = t0
-        mbutterfly_l    q5,  q4,  d23, d24, d2[1], d2[0] @ q5  = t9,   q4  = t8
+        mbutterfly_l    q5,  q4,  d23, d24, d1[1], d1[0] @ q5  = t9,   q4  = t8
         butterfly_n     d31, d24, q3,  q5,  q6,  q5      @ d31 = t1a,  d24 = t9a
         mbutterfly_l    q7,  q6,  d29, d18, d0[3], d0[2] @ q7  = t3,   q6  = t2
         butterfly_n     d16, d23, q2,  q4,  q3,  q4      @ d16 = t0a,  d23 = t8a
 
-        mbutterfly_l    q3,  q2,  d21, d26, d2[3], d2[2] @ q3  = t11,  q2  = t10
+        mbutterfly_l    q3,  q2,  d21, d26, d1[3], d1[2] @ q3  = t11,  q2  = t10
         butterfly_n     d29, d26, q7,  q3,  q4,  q3      @ d29 = t3a,  d26 = t11a
-        mbutterfly_l    q5,  q4,  d27, d20, d1[1], d1[0] @ q5  = t5,   q4  = t4
+        mbutterfly_l    q5,  q4,  d27, d20, d2[1], d2[0] @ q5  = t5,   q4  = t4
         butterfly_n     d18, d21, q6,  q2,  q3,  q2      @ d18 = t2a,  d21 = t10a
 
         mbutterfly_l    q7,  q6,  d19, d28, d3[1], d3[0] @ q7  = t13,  q6  = t12
         butterfly_n     d20, d28, q5,  q7,  q2,  q7      @ d20 = t5a,  d28 = t13a
-        mbutterfly_l    q3,  q2,  d25, d22, d1[3], d1[2] @ q3  = t7,   q2  = t6
+        mbutterfly_l    q3,  q2,  d25, d22, d2[3], d2[2] @ q3  = t7,   q2  = t6
         butterfly_n     d27, d19, q4,  q6,  q5,  q6      @ d27 = t4a,  d19 = t12a
 
         mbutterfly_l    q5,  q4,  d17, d30, d3[3], d3[2] @ q5  = t15,  q4  = t14