arm: vp9itxfm: Simplify txfm string comparisons

Message ID 1478990216-15083-1-git-send-email-martin@martin.st
State Committed
Commit 52d196fb30fb6628921b5f1b31e7bd11eb7e1d9a
Headers show

Commit Message

Martin Storsjö Nov. 12, 2016, 10:36 p.m.
---
This comes from the review of the aarch64 version.
---
 libavcodec/arm/vp9itxfm_neon.S | 45 ++++++++++--------------------------------
 1 file changed, 10 insertions(+), 35 deletions(-)

Comments

Janne Grunau Nov. 13, 2016, 9:23 p.m. | #1
On 2016-11-13 00:36:56 +0200, Martin Storsjö wrote:
> ---
> This comes from the review of the aarch64 version.
> ---
>  libavcodec/arm/vp9itxfm_neon.S | 45 ++++++++++--------------------------------
>  1 file changed, 10 insertions(+), 35 deletions(-)
> 
> diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
> index fca9836..cdb43b5 100644
> --- a/libavcodec/arm/vp9itxfm_neon.S
> +++ b/libavcodec/arm/vp9itxfm_neon.S
> @@ -258,8 +258,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1
>  .endif
>  
>          vmov.i16        q15, #0
> -.ifc \txfm1,idct
> -.ifc \txfm2,idct
> +.ifc \txfm1\()_\txfm2,idct_idct
>          cmp             r3,  #1
>          bne             1f
>          @ DC-only for idct/idct
> @@ -273,7 +272,6 @@ function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1
>          vmov            q3,  q2
>          b               2f
>  .endif
> -.endif
>  
>  1:
>          vld1.16         {d4-d7},  [r2,:128]
> @@ -386,29 +384,21 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
>          @ if only idct is involved.
>          @ The iadst also uses a few coefficients from
>          @ idct, so those always need to be loaded.
> -.ifc \txfm1,iadst
> -        movrel          r12, iadst8_coeffs
> -        vld1.16         {q1}, [r12,:128]!
> -        vpush           {q4-q7}
> +.ifc \txfm1\()_\txfm2,idct_idct
> +        movrel          r12, idct_coeffs
> +        vpush           {q4-q5}
>          vld1.16         {q0}, [r12,:128]
>  .else
> -.ifc \txfm2,iadst
>          movrel          r12, iadst8_coeffs
>          vld1.16         {q1}, [r12,:128]!
>          vpush           {q4-q7}
>          vld1.16         {q0}, [r12,:128]
> -.else
> -        movrel          r12, idct_coeffs
> -        vpush           {q4-q5}
> -        vld1.16         {q0}, [r12,:128]
> -.endif
>  .endif
>  
>          vmov.i16        q2, #0
>          vmov.i16        q3, #0
>  
> -.ifc \txfm1,idct
> -.ifc \txfm2,idct
> +.ifc \txfm1\()_\txfm2,idct_idct
>          cmp             r3,  #1
>          bne             1f
>          @ DC-only for idct/idct
> @@ -428,7 +418,6 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
>          vst1.16         {d4[0]}, [r2,:16]
>          b               2f
>  .endif
> -.endif
>  1:
>          vld1.16         {q8-q9},    [r2,:128]!
>          vld1.16         {q10-q11},  [r2,:128]!
> @@ -497,14 +486,10 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
>          vst1.8          {d10}, [r3,:64], r1
>          vst1.8          {d11}, [r3,:64], r1
>  
> -.ifc \txfm1,iadst
> -        vpop            {q4-q7}
> +.ifc \txfm1\()_\txfm2,idct_idct
> +        vpop            {q4-q5}
>  .else
> -.ifc \txfm2,iadst
>          vpop            {q4-q7}
> -.else
> -        vpop            {q4-q5}
> -.endif
>  .endif
>          bx              lr
>  endfunc
> @@ -798,20 +783,14 @@ itxfm16_1d_funcs iadst
>  
>  .macro itxfm_func16x16 txfm1, txfm2
>  function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
> -.ifc \txfm1,idct
> -.ifc \txfm2,idct
> +.ifc \txfm1\()_\txfm2,idct_idct
>          cmp             r3,  #1
>          beq             idct16x16_dc_add_neon
>  .endif
> -.endif
>          push            {r4-r7,lr}
> -.ifc \txfm1,iadst
> -        vpush           {q4-q7}
> -.else
> -.ifc \txfm2,iadst
> +.ifnc \txfm1\()_\txfm2,idct_idct
>          vpush           {q4-q7}
>  .endif
> -.endif
>          mov             r7,  sp
>  
>          @ Align the stack, allocate a temp buffer
> @@ -850,13 +829,9 @@ A       sub             sp,  sp,  #512
>  .endr
>  
>          mov             sp,  r7
> -.ifc \txfm1,iadst
> -        vpop            {q4-q7}
> -.else
> -.ifc \txfm2,iadst
> +.ifnc \txfm1\()_\txfm2,idct_idct
>          vpop            {q4-q7}
>  .endif
> -.endif
>          pop             {r4-r7,pc}
>  endfunc
>  .endm

ok

Janne

Patch

diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index fca9836..cdb43b5 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -258,8 +258,7 @@  function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1
 .endif
 
         vmov.i16        q15, #0
-.ifc \txfm1,idct
-.ifc \txfm2,idct
+.ifc \txfm1\()_\txfm2,idct_idct
         cmp             r3,  #1
         bne             1f
         @ DC-only for idct/idct
@@ -273,7 +272,6 @@  function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1
         vmov            q3,  q2
         b               2f
 .endif
-.endif
 
 1:
         vld1.16         {d4-d7},  [r2,:128]
@@ -386,29 +384,21 @@  function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
         @ if only idct is involved.
         @ The iadst also uses a few coefficients from
         @ idct, so those always need to be loaded.
-.ifc \txfm1,iadst
-        movrel          r12, iadst8_coeffs
-        vld1.16         {q1}, [r12,:128]!
-        vpush           {q4-q7}
+.ifc \txfm1\()_\txfm2,idct_idct
+        movrel          r12, idct_coeffs
+        vpush           {q4-q5}
         vld1.16         {q0}, [r12,:128]
 .else
-.ifc \txfm2,iadst
         movrel          r12, iadst8_coeffs
         vld1.16         {q1}, [r12,:128]!
         vpush           {q4-q7}
         vld1.16         {q0}, [r12,:128]
-.else
-        movrel          r12, idct_coeffs
-        vpush           {q4-q5}
-        vld1.16         {q0}, [r12,:128]
-.endif
 .endif
 
         vmov.i16        q2, #0
         vmov.i16        q3, #0
 
-.ifc \txfm1,idct
-.ifc \txfm2,idct
+.ifc \txfm1\()_\txfm2,idct_idct
         cmp             r3,  #1
         bne             1f
         @ DC-only for idct/idct
@@ -428,7 +418,6 @@  function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
         vst1.16         {d4[0]}, [r2,:16]
         b               2f
 .endif
-.endif
 1:
         vld1.16         {q8-q9},    [r2,:128]!
         vld1.16         {q10-q11},  [r2,:128]!
@@ -497,14 +486,10 @@  function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
         vst1.8          {d10}, [r3,:64], r1
         vst1.8          {d11}, [r3,:64], r1
 
-.ifc \txfm1,iadst
-        vpop            {q4-q7}
+.ifc \txfm1\()_\txfm2,idct_idct
+        vpop            {q4-q5}
 .else
-.ifc \txfm2,iadst
         vpop            {q4-q7}
-.else
-        vpop            {q4-q5}
-.endif
 .endif
         bx              lr
 endfunc
@@ -798,20 +783,14 @@  itxfm16_1d_funcs iadst
 
 .macro itxfm_func16x16 txfm1, txfm2
 function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
-.ifc \txfm1,idct
-.ifc \txfm2,idct
+.ifc \txfm1\()_\txfm2,idct_idct
         cmp             r3,  #1
         beq             idct16x16_dc_add_neon
 .endif
-.endif
         push            {r4-r7,lr}
-.ifc \txfm1,iadst
-        vpush           {q4-q7}
-.else
-.ifc \txfm2,iadst
+.ifnc \txfm1\()_\txfm2,idct_idct
         vpush           {q4-q7}
 .endif
-.endif
         mov             r7,  sp
 
         @ Align the stack, allocate a temp buffer
@@ -850,13 +829,9 @@  A       sub             sp,  sp,  #512
 .endr
 
         mov             sp,  r7
-.ifc \txfm1,iadst
-        vpop            {q4-q7}
-.else
-.ifc \txfm2,iadst
+.ifnc \txfm1\()_\txfm2,idct_idct
         vpop            {q4-q7}
 .endif
-.endif
         pop             {r4-r7,pc}
 endfunc
 .endm