@@ -1666,6 +1666,7 @@ SYSTEM_FUNCS="
TOOLCHAIN_FEATURES="
as_arch_directive
+ as_eqv_directive
as_fpu_directive
as_func
as_object_arch
@@ -4435,6 +4436,9 @@ EOF
check_as <<EOF && enable as_arch_directive
.arch armv7-a
EOF
+ check_as <<EOF && enable as_eqv_directive
+.eqv foo, bar
+EOF
check_as <<EOF && enable as_fpu_directive
.fpu neon
EOF
@@ -20,6 +20,25 @@
#include "libavutil/arm/asm.S"
+# In binutils 2.29, the behavior of the ADR instruction changed so that 1 is
+# added to the address of a Thumb function (previously nothing was added).
+#
+# These .eqv are used to pre-calculate the correct address with +CONFIG_THUMB so
+# that ADR will work with both old and new versions binutils.
+#
+# See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458
+#if HAVE_AS_EQV_DIRECTIVE
+.eqv eqv_ff_h264_idct_add_neon, X(ff_h264_idct_add_neon) + CONFIG_THUMB
+.eqv eqv_ff_h264_idct_dc_add_neon, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+.eqv eqv_ff_h264_idct8_add_neon, X(ff_h264_idct8_add_neon) + CONFIG_THUMB
+.eqv eqv_ff_h264_idct8_dc_add_neon, X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB
+#else
+#define eqv_ff_h264_idct_add_neon X(ff_h264_idct_add_neon) + CONFIG_THUMB
+#define eqv_ff_h264_idct_dc_add_neon X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+#define eqv_ff_h264_idct8_add_neon X(ff_h264_idct8_add_neon) + CONFIG_THUMB
+#define eqv_ff_h264_idct8_dc_add_neon X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB
+#endif
+
function ff_h264_idct_add_neon, export=1
vld1.64 {d0-d3}, [r1,:128]
vmov.i16 q15, #0
@@ -113,8 +132,8 @@ function ff_h264_idct_add16_neon, export=1
movne lr, #0
cmp lr, #0
ite ne
- adrne lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
- adreq lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB
+ adrne lr, eqv_ff_h264_idct_dc_add_neon
+ adreq lr, eqv_ff_h264_idct_add_neon
blx lr
2: subs ip, ip, #1
add r1, r1, #32
@@ -138,8 +157,8 @@ function ff_h264_idct_add16intra_neon, export=1
cmp r8, #0
ldrsh r8, [r1]
iteet ne
- adrne lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB
- adreq lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+ adrne lr, eqv_ff_h264_idct_add_neon
+ adreq lr, eqv_ff_h264_idct_dc_add_neon
cmpeq r8, #0
blxne lr
subs ip, ip, #1
@@ -166,8 +185,8 @@ function ff_h264_idct_add8_neon, export=1
cmp r8, #0
ldrsh r8, [r1]
iteet ne
- adrne lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB
- adreq lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+ adrne lr, eqv_ff_h264_idct_add_neon
+ adreq lr, eqv_ff_h264_idct_dc_add_neon
cmpeq r8, #0
blxne lr
add r12, r12, #1
@@ -388,8 +407,8 @@ function ff_h264_idct8_add4_neon, export=1
movne lr, #0
cmp lr, #0
ite ne
- adrne lr, X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB
- adreq lr, X(ff_h264_idct8_add_neon) + CONFIG_THUMB
+ adrne lr, eqv_ff_h264_idct8_dc_add_neon
+ adreq lr, eqv_ff_h264_idct8_add_neon
blx lr
2: subs r12, r12, #4
add r1, r1, #128
From: James Cowgill <jcowgill@debian.org> In binutils 2.29, the behavior of the ADR instruction changed so that 1 is added to the address of a Thumb function (previously nothing was added). This allows the loaded address to be passed to a BLX instruction and the correct mode change will occur. So that the behavior matches in binutils 2.29 and pre-2.29, use .eqv to pre-calculate the function address without the automatic +1 fixup. Then use these new symbols as the function addresses to be loaded. See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458 The patch has modifications by Martin Storsjö, to fall back to plain defines, in case the assembler doesn't support .eqv directives. This makes sure it still works with old Apple tools with old binutils gas (since gas-preprocessor doesn't implement .eqv), and with the latest clang versions (where the assembler currently is capable of building all of our arm assembly without support from gas-preprocessor, but doesn't support .eqv). --- configure | 4 ++++ libavcodec/arm/h264idct_neon.S | 35 +++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 8 deletions(-)