arm: Fix SIGBUS on ARM when compiled with binutils 2.29

Message ID 1504121220-19840-1-git-send-email-martin@martin.st
State Superseded
Headers show

Commit Message

Martin Storsjö Aug. 30, 2017, 7:27 p.m.
From: James Cowgill <jcowgill@debian.org>

In binutils 2.29, the behavior of the ADR instruction changed so that 1 is
added to the address of a Thumb function (previously nothing was added). This
allows the loaded address to be passed to a BLX instruction and the correct
mode change will occur.

So that the behavior matches in binutils 2.29 and pre-2.29, use .eqv to
pre-calculate the function address without the automatic +1 fixup. Then use
these new symbols as the function addresses to be loaded.

See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458

The patch has modifications by Martin Storsjö, to fall back to
plain defines, in case the assembler doesn't support .eqv directives.

This makes sure it still works with old Apple tools with old binutils
gas (since gas-preprocessor doesn't implement .eqv), and with the latest
clang versions (where the assembler currently is capable of building
all of our arm assembly without support from gas-preprocessor, but
doesn't support .eqv).
---
 configure                      |  4 ++++
 libavcodec/arm/h264idct_neon.S | 35 +++++++++++++++++++++++++++--------
 2 files changed, 31 insertions(+), 8 deletions(-)

Comments

Luca Barbato Aug. 31, 2017, 8:07 a.m. | #1
On 30/08/2017 21:27, Martin Storsjö wrote:
> From: James Cowgill <jcowgill@debian.org>
> 
> In binutils 2.29, the behavior of the ADR instruction changed so that 1 is
> added to the address of a Thumb function (previously nothing was added). This
> allows the loaded address to be passed to a BLX instruction and the correct
> mode change will occur.
> 
> So that the behavior matches in binutils 2.29 and pre-2.29, use .eqv to
> pre-calculate the function address without the automatic +1 fixup. Then use
> these new symbols as the function addresses to be loaded.
> 
> See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458
> 
> The patch has modifications by Martin Storsjö, to fall back to
> plain defines, in case the assembler doesn't support .eqv directives.
> 
> This makes sure it still works with old Apple tools with old binutils
> gas (since gas-preprocessor doesn't implement .eqv), and with the latest
> clang versions (where the assembler currently is capable of building
> all of our arm assembly without support from gas-preprocessor, but
> doesn't support .eqv).
> ---
>   configure                      |  4 ++++
>   libavcodec/arm/h264idct_neon.S | 35 +++++++++++++++++++++++++++--------
>   2 files changed, 31 insertions(+), 8 deletions(-)
> 

Probably ok.

Patch

diff --git a/configure b/configure
index 3e5784f..1307362 100755
--- a/configure
+++ b/configure
@@ -1666,6 +1666,7 @@  SYSTEM_FUNCS="
 
 TOOLCHAIN_FEATURES="
     as_arch_directive
+    as_eqv_directive
     as_fpu_directive
     as_func
     as_object_arch
@@ -4435,6 +4436,9 @@  EOF
     check_as <<EOF && enable as_arch_directive
 .arch armv7-a
 EOF
+    check_as <<EOF && enable as_eqv_directive
+.eqv foo, bar
+EOF
     check_as <<EOF && enable as_fpu_directive
 .fpu neon
 EOF
diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
index f588f3e..706bdd3 100644
--- a/libavcodec/arm/h264idct_neon.S
+++ b/libavcodec/arm/h264idct_neon.S
@@ -20,6 +20,25 @@ 
 
 #include "libavutil/arm/asm.S"
 
+# In binutils 2.29, the behavior of the ADR instruction changed so that 1 is
+# added to the address of a Thumb function (previously nothing was added).
+#
+# These .eqv are used to pre-calculate the correct address with +CONFIG_THUMB so
+# that ADR will work with both old and new versions binutils.
+#
+# See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458
+#if HAVE_AS_EQV_DIRECTIVE
+.eqv eqv_ff_h264_idct_add_neon,       X(ff_h264_idct_add_neon) + CONFIG_THUMB
+.eqv eqv_ff_h264_idct_dc_add_neon,    X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+.eqv eqv_ff_h264_idct8_add_neon,      X(ff_h264_idct8_add_neon) + CONFIG_THUMB
+.eqv eqv_ff_h264_idct8_dc_add_neon,   X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB
+#else
+#define eqv_ff_h264_idct_add_neon     X(ff_h264_idct_add_neon) + CONFIG_THUMB
+#define eqv_ff_h264_idct_dc_add_neon  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+#define eqv_ff_h264_idct8_add_neon    X(ff_h264_idct8_add_neon) + CONFIG_THUMB
+#define eqv_ff_h264_idct8_dc_add_neon X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB
+#endif
+
 function ff_h264_idct_add_neon, export=1
         vld1.64         {d0-d3},  [r1,:128]
         vmov.i16        q15, #0
@@ -113,8 +132,8 @@  function ff_h264_idct_add16_neon, export=1
         movne           lr,  #0
         cmp             lr,  #0
         ite             ne
-        adrne           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
+        adrne           lr,  eqv_ff_h264_idct_dc_add_neon
+        adreq           lr,  eqv_ff_h264_idct_add_neon
         blx             lr
 2:      subs            ip,  ip,  #1
         add             r1,  r1,  #32
@@ -138,8 +157,8 @@  function ff_h264_idct_add16intra_neon, export=1
         cmp             r8,  #0
         ldrsh           r8,  [r1]
         iteet           ne
-        adrne           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+        adrne           lr,  eqv_ff_h264_idct_add_neon
+        adreq           lr,  eqv_ff_h264_idct_dc_add_neon
         cmpeq           r8,  #0
         blxne           lr
         subs            ip,  ip,  #1
@@ -166,8 +185,8 @@  function ff_h264_idct_add8_neon, export=1
         cmp             r8,  #0
         ldrsh           r8,  [r1]
         iteet           ne
-        adrne           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+        adrne           lr,  eqv_ff_h264_idct_add_neon
+        adreq           lr,  eqv_ff_h264_idct_dc_add_neon
         cmpeq           r8,  #0
         blxne           lr
         add             r12, r12, #1
@@ -388,8 +407,8 @@  function ff_h264_idct8_add4_neon, export=1
         movne           lr,  #0
         cmp             lr,  #0
         ite             ne
-        adrne           lr,  X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct8_add_neon)    + CONFIG_THUMB
+        adrne           lr,  eqv_ff_h264_idct8_dc_add_neon
+        adreq           lr,  eqv_ff_h264_idct8_add_neon
         blx             lr
 2:      subs            r12, r12, #4
         add             r1,  r1,  #128