aarch64: vp9itxfm: Use w3 instead of x3 for the int eob parameter

Message ID 1479470050-24546-1-git-send-email-martin@martin.st
State Committed
Commit 4d960a11855f4212eb3a4e470ce890db7f01df29
Headers show

Commit Message

Martin Storsjö Nov. 18, 2016, 11:54 a.m.
The clobbering tests in checkasm are only invoked when testing
correctness, so this bug didn't show up when benchmarking the
dc-only version.
---
 libavcodec/aarch64/vp9itxfm_neon.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Comments

Janne Grunau Nov. 18, 2016, 7:12 p.m. | #1
On 2016-11-18 13:54:10 +0200, Martin Storsjö wrote:
> The clobbering tests in checkasm are only invoked when testing
> correctness, so this bug didn't show up when benchmarking the
> dc-only version.
> ---
>  libavcodec/aarch64/vp9itxfm_neon.S | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S
> index 9df0725..65406b9 100644
> --- a/libavcodec/aarch64/vp9itxfm_neon.S
> +++ b/libavcodec/aarch64/vp9itxfm_neon.S
> @@ -204,7 +204,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1
>  
>          movi            v31.8h, #0
>  .ifc \txfm1\()_\txfm2,idct_idct
> -        cmp             x3,  #1
> +        cmp             w3,  #1
>          b.ne            1f
>          // DC-only for idct/idct
>          ld1r            {v2.4h},  [x2]
> @@ -344,7 +344,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
>          movi            v5.16b, #0
>  
>  .ifc \txfm1\()_\txfm2,idct_idct
> -        cmp             x3,  #1
> +        cmp             w3,  #1
>          b.ne            1f
>          // DC-only for idct/idct
>          ld1r            {v2.4h},  [x2]
> @@ -722,7 +722,7 @@ itxfm16_1d_funcs iadst
>  .macro itxfm_func16x16 txfm1, txfm2
>  function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
>  .ifc \txfm1\()_\txfm2,idct_idct
> -        cmp             x3,  #1
> +        cmp             w3,  #1
>          b.eq            idct16x16_dc_add_neon
>  .endif
>          mov             x15, x30
> @@ -1074,7 +1074,7 @@ function idct32_1d_8x32_pass2_neon
>  endfunc
>  
>  function ff_vp9_idct_idct_32x32_add_neon, export=1
> -        cmp             x3,  #1
> +        cmp             w3,  #1
>          b.eq            idct32x32_dc_add_neon
>  
>          movrel          x10, idct_coeffs

ok

Janne

Patch

diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S
index 9df0725..65406b9 100644
--- a/libavcodec/aarch64/vp9itxfm_neon.S
+++ b/libavcodec/aarch64/vp9itxfm_neon.S
@@ -204,7 +204,7 @@  function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1
 
         movi            v31.8h, #0
 .ifc \txfm1\()_\txfm2,idct_idct
-        cmp             x3,  #1
+        cmp             w3,  #1
         b.ne            1f
         // DC-only for idct/idct
         ld1r            {v2.4h},  [x2]
@@ -344,7 +344,7 @@  function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
         movi            v5.16b, #0
 
 .ifc \txfm1\()_\txfm2,idct_idct
-        cmp             x3,  #1
+        cmp             w3,  #1
         b.ne            1f
         // DC-only for idct/idct
         ld1r            {v2.4h},  [x2]
@@ -722,7 +722,7 @@  itxfm16_1d_funcs iadst
 .macro itxfm_func16x16 txfm1, txfm2
 function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
 .ifc \txfm1\()_\txfm2,idct_idct
-        cmp             x3,  #1
+        cmp             w3,  #1
         b.eq            idct16x16_dc_add_neon
 .endif
         mov             x15, x30
@@ -1074,7 +1074,7 @@  function idct32_1d_8x32_pass2_neon
 endfunc
 
 function ff_vp9_idct_idct_32x32_add_neon, export=1
-        cmp             x3,  #1
+        cmp             w3,  #1
         b.eq            idct32x32_dc_add_neon
 
         movrel          x10, idct_coeffs