arm: Fix SIGBUS on ARM when compiled with binutils 2.29

Message ID 1504170656-16065-1-git-send-email-martin@martin.st
State Committed
Commit 228083aa31dc3c7475eb7d9e722e24558bcfde28
Headers show

Commit Message

Martin Storsjö Aug. 31, 2017, 9:10 a.m.
In binutils 2.29, the behavior of the ADR instruction changed so that 1 is
added to the address of a Thumb function (previously nothing was added). This
allows the loaded address to be passed to a BLX instruction and the correct
mode change will occur.

See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458

By using adr with a label that isn't annotated as a thumb function,
we avoid the new behaviour in binutils 2.29 and get the same behaviour
as in prior releases, and as in other assemblers (ms armasm.exe,
clang's built in assembler).
---
 libavcodec/arm/h264idct_neon.S | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

Comments

Luca Barbato Aug. 31, 2017, 9:54 a.m. | #1
On 31/08/2017 11:10, Martin Storsjö wrote:
> In binutils 2.29, the behavior of the ADR instruction changed so that 1 is
> added to the address of a Thumb function (previously nothing was added). This
> allows the loaded address to be passed to a BLX instruction and the correct
> mode change will occur.
> 
> See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458
> 
> By using adr with a label that isn't annotated as a thumb function,
> we avoid the new behaviour in binutils 2.29 and get the same behaviour
> as in prior releases, and as in other assemblers (ms armasm.exe,
> clang's built in assembler).
> ---
>   libavcodec/arm/h264idct_neon.S | 20 ++++++++++++--------
>   1 file changed, 12 insertions(+), 8 deletions(-)

Looks less intrusive, I guess it should be ok even if feels a little 
brittle somehow.

lu
Janne Grunau Sept. 1, 2017, 7:41 a.m. | #2
On 2017-08-31 12:10:56 +0300, Martin Storsjö wrote:
> In binutils 2.29, the behavior of the ADR instruction changed so that 1 is
> added to the address of a Thumb function (previously nothing was added). This
> allows the loaded address to be passed to a BLX instruction and the correct
> mode change will occur.
> 
> See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458
> 
> By using adr with a label that isn't annotated as a thumb function,
> we avoid the new behaviour in binutils 2.29 and get the same behaviour
> as in prior releases, and as in other assemblers (ms armasm.exe,
> clang's built in assembler).
> ---
>  libavcodec/arm/h264idct_neon.S | 20 ++++++++++++--------
>  1 file changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
> index f588f3e..b078cf2 100644
> --- a/libavcodec/arm/h264idct_neon.S
> +++ b/libavcodec/arm/h264idct_neon.S
> @@ -21,6 +21,7 @@
>  #include "libavutil/arm/asm.S"
>  
>  function ff_h264_idct_add_neon, export=1
> +h264_idct_add_neon_nothumb:

I thought of adding the symbol to the function macro but given that it's 
just two functions in single file adding the macros manually might be 
better. I don't expect it to be needed in many more places if any.

>          vld1.64         {d0-d3},  [r1,:128]
>          vmov.i16        q15, #0
>  
> @@ -73,6 +74,7 @@ function ff_h264_idct_add_neon, export=1
>  endfunc
>  
>  function ff_h264_idct_dc_add_neon, export=1
> +h264_idct_dc_add_neon_nothumb:
>          mov             r3,       #0
>          vld1.16         {d2[],d3[]}, [r1,:16]
>          strh            r3,       [r1]
> @@ -113,8 +115,8 @@ function ff_h264_idct_add16_neon, export=1
>          movne           lr,  #0
>          cmp             lr,  #0
>          ite             ne
> -        adrne           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
> -        adreq           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
> +        adrne           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
> +        adreq           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
>          blx             lr
>  2:      subs            ip,  ip,  #1
>          add             r1,  r1,  #32
> @@ -138,8 +140,8 @@ function ff_h264_idct_add16intra_neon, export=1
>          cmp             r8,  #0
>          ldrsh           r8,  [r1]
>          iteet           ne
> -        adrne           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
> -        adreq           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
> +        adrne           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
> +        adreq           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
>          cmpeq           r8,  #0
>          blxne           lr
>          subs            ip,  ip,  #1
> @@ -166,8 +168,8 @@ function ff_h264_idct_add8_neon, export=1
>          cmp             r8,  #0
>          ldrsh           r8,  [r1]
>          iteet           ne
> -        adrne           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
> -        adreq           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
> +        adrne           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
> +        adreq           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
>          cmpeq           r8,  #0
>          blxne           lr
>          add             r12, r12, #1
> @@ -267,6 +269,7 @@ endfunc
>  .endm
>  
>  function ff_h264_idct8_add_neon, export=1
> +h264_idct8_add_neon_nothumb:
>          vmov.i16        q3,       #0
>          vld1.16         {q8-q9},  [r1,:128]
>          vst1.16         {q3},     [r1,:128]!
> @@ -328,6 +331,7 @@ function ff_h264_idct8_add_neon, export=1
>  endfunc
>  
>  function ff_h264_idct8_dc_add_neon, export=1
> +h264_idct8_dc_add_neon_nothumb:
>          mov             r3,       #0
>          vld1.16         {d30[],d31[]},[r1,:16]
>          strh            r3,       [r1]
> @@ -388,8 +392,8 @@ function ff_h264_idct8_add4_neon, export=1
>          movne           lr,  #0
>          cmp             lr,  #0
>          ite             ne
> -        adrne           lr,  X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB
> -        adreq           lr,  X(ff_h264_idct8_add_neon)    + CONFIG_THUMB
> +        adrne           lr,  h264_idct8_dc_add_neon_nothumb + CONFIG_THUMB
> +        adreq           lr,  h264_idct8_add_neon_nothumb    + CONFIG_THUMB
>          blx             lr
>  2:      subs            r12, r12, #4
>          add             r1,  r1,  #128

patch ok

Janne

Patch

diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
index f588f3e..b078cf2 100644
--- a/libavcodec/arm/h264idct_neon.S
+++ b/libavcodec/arm/h264idct_neon.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/arm/asm.S"
 
 function ff_h264_idct_add_neon, export=1
+h264_idct_add_neon_nothumb:
         vld1.64         {d0-d3},  [r1,:128]
         vmov.i16        q15, #0
 
@@ -73,6 +74,7 @@  function ff_h264_idct_add_neon, export=1
 endfunc
 
 function ff_h264_idct_dc_add_neon, export=1
+h264_idct_dc_add_neon_nothumb:
         mov             r3,       #0
         vld1.16         {d2[],d3[]}, [r1,:16]
         strh            r3,       [r1]
@@ -113,8 +115,8 @@  function ff_h264_idct_add16_neon, export=1
         movne           lr,  #0
         cmp             lr,  #0
         ite             ne
-        adrne           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
+        adrne           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
+        adreq           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
         blx             lr
 2:      subs            ip,  ip,  #1
         add             r1,  r1,  #32
@@ -138,8 +140,8 @@  function ff_h264_idct_add16intra_neon, export=1
         cmp             r8,  #0
         ldrsh           r8,  [r1]
         iteet           ne
-        adrne           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+        adrne           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
+        adreq           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
         cmpeq           r8,  #0
         blxne           lr
         subs            ip,  ip,  #1
@@ -166,8 +168,8 @@  function ff_h264_idct_add8_neon, export=1
         cmp             r8,  #0
         ldrsh           r8,  [r1]
         iteet           ne
-        adrne           lr,  X(ff_h264_idct_add_neon)    + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB
+        adrne           lr,  h264_idct_add_neon_nothumb    + CONFIG_THUMB
+        adreq           lr,  h264_idct_dc_add_neon_nothumb + CONFIG_THUMB
         cmpeq           r8,  #0
         blxne           lr
         add             r12, r12, #1
@@ -267,6 +269,7 @@  endfunc
 .endm
 
 function ff_h264_idct8_add_neon, export=1
+h264_idct8_add_neon_nothumb:
         vmov.i16        q3,       #0
         vld1.16         {q8-q9},  [r1,:128]
         vst1.16         {q3},     [r1,:128]!
@@ -328,6 +331,7 @@  function ff_h264_idct8_add_neon, export=1
 endfunc
 
 function ff_h264_idct8_dc_add_neon, export=1
+h264_idct8_dc_add_neon_nothumb:
         mov             r3,       #0
         vld1.16         {d30[],d31[]},[r1,:16]
         strh            r3,       [r1]
@@ -388,8 +392,8 @@  function ff_h264_idct8_add4_neon, export=1
         movne           lr,  #0
         cmp             lr,  #0
         ite             ne
-        adrne           lr,  X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB
-        adreq           lr,  X(ff_h264_idct8_add_neon)    + CONFIG_THUMB
+        adrne           lr,  h264_idct8_dc_add_neon_nothumb + CONFIG_THUMB
+        adreq           lr,  h264_idct8_add_neon_nothumb    + CONFIG_THUMB
         blx             lr
 2:      subs            r12, r12, #4
         add             r1,  r1,  #128