Message ID | 1504170656-16065-1-git-send-email-martin@martin.st |
---|---|
State | Committed |
Commit | 228083aa31dc3c7475eb7d9e722e24558bcfde28 |
Headers | show |
On 31/08/2017 11:10, Martin Storsjö wrote: > In binutils 2.29, the behavior of the ADR instruction changed so that 1 is > added to the address of a Thumb function (previously nothing was added). This > allows the loaded address to be passed to a BLX instruction and the correct > mode change will occur. > > See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458 > > By using adr with a label that isn't annotated as a thumb function, > we avoid the new behaviour in binutils 2.29 and get the same behaviour > as in prior releases, and as in other assemblers (ms armasm.exe, > clang's built in assembler). > --- > libavcodec/arm/h264idct_neon.S | 20 ++++++++++++-------- > 1 file changed, 12 insertions(+), 8 deletions(-) Looks less intrusive, I guess it should be ok even if feels a little brittle somehow. lu
On 2017-08-31 12:10:56 +0300, Martin Storsjö wrote: > In binutils 2.29, the behavior of the ADR instruction changed so that 1 is > added to the address of a Thumb function (previously nothing was added). This > allows the loaded address to be passed to a BLX instruction and the correct > mode change will occur. > > See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458 > > By using adr with a label that isn't annotated as a thumb function, > we avoid the new behaviour in binutils 2.29 and get the same behaviour > as in prior releases, and as in other assemblers (ms armasm.exe, > clang's built in assembler). > --- > libavcodec/arm/h264idct_neon.S | 20 ++++++++++++-------- > 1 file changed, 12 insertions(+), 8 deletions(-) > > diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S > index f588f3e..b078cf2 100644 > --- a/libavcodec/arm/h264idct_neon.S > +++ b/libavcodec/arm/h264idct_neon.S > @@ -21,6 +21,7 @@ > #include "libavutil/arm/asm.S" > > function ff_h264_idct_add_neon, export=1 > +h264_idct_add_neon_nothumb: I thought of adding the symbol to the function macro but given that it's just two functions in single file adding the macros manually might be better. I don't expect it to be needed in many more places if any. > vld1.64 {d0-d3}, [r1,:128] > vmov.i16 q15, #0 > > @@ -73,6 +74,7 @@ function ff_h264_idct_add_neon, export=1 > endfunc > > function ff_h264_idct_dc_add_neon, export=1 > +h264_idct_dc_add_neon_nothumb: > mov r3, #0 > vld1.16 {d2[],d3[]}, [r1,:16] > strh r3, [r1] > @@ -113,8 +115,8 @@ function ff_h264_idct_add16_neon, export=1 > movne lr, #0 > cmp lr, #0 > ite ne > - adrne lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB > - adreq lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB > + adrne lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB > + adreq lr, h264_idct_add_neon_nothumb + CONFIG_THUMB > blx lr > 2: subs ip, ip, #1 > add r1, r1, #32 > @@ -138,8 +140,8 @@ function ff_h264_idct_add16intra_neon, export=1 > cmp r8, #0 > ldrsh r8, [r1] > iteet ne > - adrne lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB > - adreq lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB > + adrne lr, h264_idct_add_neon_nothumb + CONFIG_THUMB > + adreq lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB > cmpeq r8, #0 > blxne lr > subs ip, ip, #1 > @@ -166,8 +168,8 @@ function ff_h264_idct_add8_neon, export=1 > cmp r8, #0 > ldrsh r8, [r1] > iteet ne > - adrne lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB > - adreq lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB > + adrne lr, h264_idct_add_neon_nothumb + CONFIG_THUMB > + adreq lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB > cmpeq r8, #0 > blxne lr > add r12, r12, #1 > @@ -267,6 +269,7 @@ endfunc > .endm > > function ff_h264_idct8_add_neon, export=1 > +h264_idct8_add_neon_nothumb: > vmov.i16 q3, #0 > vld1.16 {q8-q9}, [r1,:128] > vst1.16 {q3}, [r1,:128]! > @@ -328,6 +331,7 @@ function ff_h264_idct8_add_neon, export=1 > endfunc > > function ff_h264_idct8_dc_add_neon, export=1 > +h264_idct8_dc_add_neon_nothumb: > mov r3, #0 > vld1.16 {d30[],d31[]},[r1,:16] > strh r3, [r1] > @@ -388,8 +392,8 @@ function ff_h264_idct8_add4_neon, export=1 > movne lr, #0 > cmp lr, #0 > ite ne > - adrne lr, X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB > - adreq lr, X(ff_h264_idct8_add_neon) + CONFIG_THUMB > + adrne lr, h264_idct8_dc_add_neon_nothumb + CONFIG_THUMB > + adreq lr, h264_idct8_add_neon_nothumb + CONFIG_THUMB > blx lr > 2: subs r12, r12, #4 > add r1, r1, #128 patch ok Janne
diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S index f588f3e..b078cf2 100644 --- a/libavcodec/arm/h264idct_neon.S +++ b/libavcodec/arm/h264idct_neon.S @@ -21,6 +21,7 @@ #include "libavutil/arm/asm.S" function ff_h264_idct_add_neon, export=1 +h264_idct_add_neon_nothumb: vld1.64 {d0-d3}, [r1,:128] vmov.i16 q15, #0 @@ -73,6 +74,7 @@ function ff_h264_idct_add_neon, export=1 endfunc function ff_h264_idct_dc_add_neon, export=1 +h264_idct_dc_add_neon_nothumb: mov r3, #0 vld1.16 {d2[],d3[]}, [r1,:16] strh r3, [r1] @@ -113,8 +115,8 @@ function ff_h264_idct_add16_neon, export=1 movne lr, #0 cmp lr, #0 ite ne - adrne lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB - adreq lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB + adrne lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB + adreq lr, h264_idct_add_neon_nothumb + CONFIG_THUMB blx lr 2: subs ip, ip, #1 add r1, r1, #32 @@ -138,8 +140,8 @@ function ff_h264_idct_add16intra_neon, export=1 cmp r8, #0 ldrsh r8, [r1] iteet ne - adrne lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB - adreq lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB + adrne lr, h264_idct_add_neon_nothumb + CONFIG_THUMB + adreq lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB cmpeq r8, #0 blxne lr subs ip, ip, #1 @@ -166,8 +168,8 @@ function ff_h264_idct_add8_neon, export=1 cmp r8, #0 ldrsh r8, [r1] iteet ne - adrne lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB - adreq lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB + adrne lr, h264_idct_add_neon_nothumb + CONFIG_THUMB + adreq lr, h264_idct_dc_add_neon_nothumb + CONFIG_THUMB cmpeq r8, #0 blxne lr add r12, r12, #1 @@ -267,6 +269,7 @@ endfunc .endm function ff_h264_idct8_add_neon, export=1 +h264_idct8_add_neon_nothumb: vmov.i16 q3, #0 vld1.16 {q8-q9}, [r1,:128] vst1.16 {q3}, [r1,:128]! @@ -328,6 +331,7 @@ function ff_h264_idct8_add_neon, export=1 endfunc function ff_h264_idct8_dc_add_neon, export=1 +h264_idct8_dc_add_neon_nothumb: mov r3, #0 vld1.16 {d30[],d31[]},[r1,:16] strh r3, [r1] @@ -388,8 +392,8 @@ function ff_h264_idct8_add4_neon, export=1 movne lr, #0 cmp lr, #0 ite ne - adrne lr, X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB - adreq lr, X(ff_h264_idct8_add_neon) + CONFIG_THUMB + adrne lr, h264_idct8_dc_add_neon_nothumb + CONFIG_THUMB + adreq lr, h264_idct8_add_neon_nothumb + CONFIG_THUMB blx lr 2: subs r12, r12, #4 add r1, r1, #128