Message ID | 1479906058-22747-2-git-send-email-martin@martin.st |
---|---|
State | Superseded |
Headers | show |
On 2016-11-23 15:00:49 +0200, Martin Storsjö wrote: > --- > libavcodec/aarch64/vp9itxfm_neon.S | 2 -- > 1 file changed, 2 deletions(-) > > diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S > index 2dc6b75..21352f0 100644 > --- a/libavcodec/aarch64/vp9itxfm_neon.S > +++ b/libavcodec/aarch64/vp9itxfm_neon.S > @@ -1001,7 +1001,6 @@ function idct32_1d_8x32_pass2_neon > > idct16 > > - mov x9, #128 > .irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 > st1 {v\i\().8h}, [x2], x9 > .endr > @@ -1018,7 +1017,6 @@ function idct32_1d_8x32_pass2_neon > > idct32_odd > > - mov x9, #128 > .macro load_acc_store a, b, c, d, neg=0 > ld1 {v4.8h}, [x2], x9 > ld1 {v5.8h}, [x2], x9 patch ok although there still many redundant mov x9 left. It's enough to have it once in ff_vp9_idct_idct_32x32_add_neon (and a neg x7, x9) and use x7 instead of x9 in the neg=1 part of load_acc_store The same applies to the 16x16 transforms Janne
diff --git a/libavcodec/aarch64/vp9itxfm_neon.S b/libavcodec/aarch64/vp9itxfm_neon.S index 2dc6b75..21352f0 100644 --- a/libavcodec/aarch64/vp9itxfm_neon.S +++ b/libavcodec/aarch64/vp9itxfm_neon.S @@ -1001,7 +1001,6 @@ function idct32_1d_8x32_pass2_neon idct16 - mov x9, #128 .irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 st1 {v\i\().8h}, [x2], x9 .endr @@ -1018,7 +1017,6 @@ function idct32_1d_8x32_pass2_neon idct32_odd - mov x9, #128 .macro load_acc_store a, b, c, d, neg=0 ld1 {v4.8h}, [x2], x9 ld1 {v5.8h}, [x2], x9