Message ID | 1479470050-24546-1-git-send-email-martin@martin.st |
---|---|
State | Committed |
Commit | 4d960a11855f4212eb3a4e470ce890db7f01df29 |
Headers | show |
On 2016-11-18 13:54:10 +0200, Martin Storsjö wrote: > The clobbering tests in checkasm are only invoked when testing > correctness, so this bug didn't show up when benchmarking the > dc-only version. > --- > libavcodec/aarch64/vp9itxfm_neon.S | 8 ++++---- > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S > index 9df0725..65406b9 100644 > --- a/libavcodec/aarch64/vp9itxfm_neon.S > +++ b/libavcodec/aarch64/vp9itxfm_neon.S > @@ -204,7 +204,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1 > > movi v31.8h, #0 > .ifc \txfm1\()_\txfm2,idct_idct > - cmp x3, #1 > + cmp w3, #1 > b.ne 1f > // DC-only for idct/idct > ld1r {v2.4h}, [x2] > @@ -344,7 +344,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1 > movi v5.16b, #0 > > .ifc \txfm1\()_\txfm2,idct_idct > - cmp x3, #1 > + cmp w3, #1 > b.ne 1f > // DC-only for idct/idct > ld1r {v2.4h}, [x2] > @@ -722,7 +722,7 @@ itxfm16_1d_funcs iadst > .macro itxfm_func16x16 txfm1, txfm2 > function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1 > .ifc \txfm1\()_\txfm2,idct_idct > - cmp x3, #1 > + cmp w3, #1 > b.eq idct16x16_dc_add_neon > .endif > mov x15, x30 > @@ -1074,7 +1074,7 @@ function idct32_1d_8x32_pass2_neon > endfunc > > function ff_vp9_idct_idct_32x32_add_neon, export=1 > - cmp x3, #1 > + cmp w3, #1 > b.eq idct32x32_dc_add_neon > > movrel x10, idct_coeffs ok Janne
diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S index 9df0725..65406b9 100644 --- a/libavcodec/aarch64/vp9itxfm_neon.S +++ b/libavcodec/aarch64/vp9itxfm_neon.S @@ -204,7 +204,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_4x4_add_neon, export=1 movi v31.8h, #0 .ifc \txfm1\()_\txfm2,idct_idct - cmp x3, #1 + cmp w3, #1 b.ne 1f // DC-only for idct/idct ld1r {v2.4h}, [x2] @@ -344,7 +344,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1 movi v5.16b, #0 .ifc \txfm1\()_\txfm2,idct_idct - cmp x3, #1 + cmp w3, #1 b.ne 1f // DC-only for idct/idct ld1r {v2.4h}, [x2] @@ -722,7 +722,7 @@ itxfm16_1d_funcs iadst .macro itxfm_func16x16 txfm1, txfm2 function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1 .ifc \txfm1\()_\txfm2,idct_idct - cmp x3, #1 + cmp w3, #1 b.eq idct16x16_dc_add_neon .endif mov x15, x30 @@ -1074,7 +1074,7 @@ function idct32_1d_8x32_pass2_neon endfunc function ff_vp9_idct_idct_32x32_add_neon, export=1 - cmp x3, #1 + cmp w3, #1 b.eq idct32x32_dc_add_neon movrel x10, idct_coeffs