[1/2] checkasm: Check register clobbering on arm

Message ID 1451944908-16696-1-git-send-email-martin@martin.st
State Committed
Headers show

Commit Message

Martin Storsjö Jan. 4, 2016, 10:01 p.m.
Use two separate functions, depending on whether VFP/NEON is available.

This is set to require armv5te - it uses blx, which is only available
since armv5t, but we don't have a separate configure item for that.
(It also uses ldrd, which requires armv5te, but this could be avoided
if necessary.)
---
Use d8-d15 instead of q4-q7 for vpush/vpop/vldm, use Janne's suggestion
for doing the VFP register clobber checking, check FPSCR for changes
as well (except for the 5 topmost bits).
---
 tests/checkasm/arm/Makefile   |    1 +
 tests/checkasm/arm/checkasm.S |  144 +++++++++++++++++++++++++++++++++++++++++
 tests/checkasm/checkasm.c     |   11 ++++
 tests/checkasm/checkasm.h     |   13 ++++
 4 files changed, 169 insertions(+)
 create mode 100644 tests/checkasm/arm/Makefile
 create mode 100644 tests/checkasm/arm/checkasm.S

Comments

Janne Grunau Jan. 5, 2016, 8:50 a.m. | #1
On 2016-01-05 00:01:48 +0200, Martin Storsjö wrote:
> Use two separate functions, depending on whether VFP/NEON is available.
> 
> This is set to require armv5te - it uses blx, which is only available
> since armv5t, but we don't have a separate configure item for that.
> (It also uses ldrd, which requires armv5te, but this could be avoided
> if necessary.)
> ---
> Use d8-d15 instead of q4-q7 for vpush/vpop/vldm, use Janne's suggestion
> for doing the VFP register clobber checking, check FPSCR for changes
> as well (except for the 5 topmost bits).
> ---
>  tests/checkasm/arm/Makefile   |    1 +
>  tests/checkasm/arm/checkasm.S |  144 +++++++++++++++++++++++++++++++++++++++++
>  tests/checkasm/checkasm.c     |   11 ++++
>  tests/checkasm/checkasm.h     |   13 ++++
>  4 files changed, 169 insertions(+)
>  create mode 100644 tests/checkasm/arm/Makefile
>  create mode 100644 tests/checkasm/arm/checkasm.S
> 
> diff --git a/tests/checkasm/arm/Makefile b/tests/checkasm/arm/Makefile
> new file mode 100644
> index 0000000..55f2383
> --- /dev/null
> +++ b/tests/checkasm/arm/Makefile
> @@ -0,0 +1 @@
> +CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o
> diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
> new file mode 100644
> index 0000000..6132d61
> --- /dev/null
> +++ b/tests/checkasm/arm/checkasm.S
> @@ -0,0 +1,144 @@
> +/****************************************************************************
> + * Assembly testing and benchmarking tool
> + * Copyright (c) 2015 Martin Storsjo
> + * Copyright (c) 2015 Janne Grunau
> + *
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
> + *****************************************************************************/
> +
> +#include "libavutil/arm/asm.S"
> +
> +const register_init
> +    .quad 0x21f86d66c8ca00ce
> +    .quad 0x75b6ba21077c48ad
> +    .quad 0xed56bb2dcb3c7736
> +    .quad 0x8bda43d3fd1a7e06
> +    .quad 0xb64a9c9e5d318408
> +    .quad 0xdf9a54b303f1d3a3
> +    .quad 0x4a75479abd64e097
> +    .quad 0x249214109d5d1c88
> +endconst
> +
> +const error_message
> +    .asciz "failed to preserve register"
> +endconst
> +
> +@ max number of args used by any asm function.
> +#define MAX_ARGS 15
> +
> +#define ARG_STACK 4*(MAX_ARGS - 2)
> +
> +.macro clobbercheck variant
> +.equ pushed, 4*9
> +function checkasm_checked_call_\variant, export=1
> +    push        {r4-r11, lr}
> +.ifc \variant, vfp
> +    vpush       {d8-d15}
> +    fmrx        r4,  FPSCR
> +    push        {r4}
> +.equ pushed, pushed + 16*4 + 4
> +.endif
> +
> +    movrel      r12, register_init
> +.ifc \variant, vfp
> +    vldm        r12, {d8-d15}
> +.endif
> +    ldm         r12, {r4-r11}
> +
> +    sub         sp,  sp,  #ARG_STACK
> +.equ pos, 0
> +.rept MAX_ARGS-2
> +    ldr         r12, [sp, #ARG_STACK + pushed + 8 + pos]
> +    str         r12, [sp, #pos]
> +.equ pos, pos + 4
> +.endr
> +
> +    mov         r12, r0
> +    mov         r0,  r2
> +    mov         r1,  r3
> +    ldrd        r2,  r3,  [sp, #ARG_STACK + pushed]
> +    blx         r12
> +    add         sp,  sp,  #ARG_STACK
> +
> +    push        {r0, r1}
> +    movrel      r12, register_init
> +    mov         r3,  #0
> +.ifc \variant, vfp
> +.macro check_reg_vfp, dreg, inc=8
> +    ldrd        r0,  r1,  [r12], #\inc
> +    vmov        r2,  lr,  \dreg
> +    eor         r0,  r2

Sorry for me being lazy while writing code in a mail, please use the 
full 3 register form

> +    eor         r1,  lr
> +    orr         r3,  r0
> +    orr         r3,  r1
> +.endm
> +
> +.irp n, 8, 9, 10, 11, 12, 13, 14
> +    check_reg_vfp d\n
> +.endr
> +    check_reg_vfp d15, -56
> +.purgem check_reg_vfp
> +
> +    fmrx        r0,  FPSCR
> +    ldr         r1,  [sp, #8]
> +    eor         r0,  r1
> +    @ Ignore changes in the topmost 5 bits
> +    mov32       r2,  0x07ffffff
> +    and         r0,  r2

just shift left by 5, bfc is unfortunately an armv6t2 instruction

> +    orr         r3,  r0
> +.endif
> +
> +.macro check_reg reg1, reg2=
> +    ldrd        r0,  r1,  [r12], #8
> +    eor         r0,  r0, \reg1
> +    orr         r3,  r3, r0
> +.ifnb \reg2
> +    eor         r1,  r1, \reg2
> +    orr         r3,  r3, r1
> +.endif
> +.endm
> +    check_reg   r4,  r5
> +    check_reg   r6,  r7
> +@ r9 is a volatile register in the ios ABI
> +#ifdef __APPLE__
> +    check_reg   r8
> +#else
> +    check_reg   r8,  r9
> +#endif
> +    check_reg   r10, r11
> +.purgem check_reg
> +
> +    cmp         r3,  #0

the compare is not needed when the orrs (or at least the last one) in 
check_reg were setting flags

The rest remains ok

Janne

Patch

diff --git a/tests/checkasm/arm/Makefile b/tests/checkasm/arm/Makefile
new file mode 100644
index 0000000..55f2383
--- /dev/null
+++ b/tests/checkasm/arm/Makefile
@@ -0,0 +1 @@ 
+CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o
diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
new file mode 100644
index 0000000..6132d61
--- /dev/null
+++ b/tests/checkasm/arm/checkasm.S
@@ -0,0 +1,144 @@ 
+/****************************************************************************
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Martin Storsjo
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#include "libavutil/arm/asm.S"
+
+const register_init
+    .quad 0x21f86d66c8ca00ce
+    .quad 0x75b6ba21077c48ad
+    .quad 0xed56bb2dcb3c7736
+    .quad 0x8bda43d3fd1a7e06
+    .quad 0xb64a9c9e5d318408
+    .quad 0xdf9a54b303f1d3a3
+    .quad 0x4a75479abd64e097
+    .quad 0x249214109d5d1c88
+endconst
+
+const error_message
+    .asciz "failed to preserve register"
+endconst
+
+@ max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 4*(MAX_ARGS - 2)
+
+.macro clobbercheck variant
+.equ pushed, 4*9
+function checkasm_checked_call_\variant, export=1
+    push        {r4-r11, lr}
+.ifc \variant, vfp
+    vpush       {d8-d15}
+    fmrx        r4,  FPSCR
+    push        {r4}
+.equ pushed, pushed + 16*4 + 4
+.endif
+
+    movrel      r12, register_init
+.ifc \variant, vfp
+    vldm        r12, {d8-d15}
+.endif
+    ldm         r12, {r4-r11}
+
+    sub         sp,  sp,  #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-2
+    ldr         r12, [sp, #ARG_STACK + pushed + 8 + pos]
+    str         r12, [sp, #pos]
+.equ pos, pos + 4
+.endr
+
+    mov         r12, r0
+    mov         r0,  r2
+    mov         r1,  r3
+    ldrd        r2,  r3,  [sp, #ARG_STACK + pushed]
+    blx         r12
+    add         sp,  sp,  #ARG_STACK
+
+    push        {r0, r1}
+    movrel      r12, register_init
+    mov         r3,  #0
+.ifc \variant, vfp
+.macro check_reg_vfp, dreg, inc=8
+    ldrd        r0,  r1,  [r12], #\inc
+    vmov        r2,  lr,  \dreg
+    eor         r0,  r2
+    eor         r1,  lr
+    orr         r3,  r0
+    orr         r3,  r1
+.endm
+
+.irp n, 8, 9, 10, 11, 12, 13, 14
+    check_reg_vfp d\n
+.endr
+    check_reg_vfp d15, -56
+.purgem check_reg_vfp
+
+    fmrx        r0,  FPSCR
+    ldr         r1,  [sp, #8]
+    eor         r0,  r1
+    @ Ignore changes in the topmost 5 bits
+    mov32       r2,  0x07ffffff
+    and         r0,  r2
+    orr         r3,  r0
+.endif
+
+.macro check_reg reg1, reg2=
+    ldrd        r0,  r1,  [r12], #8
+    eor         r0,  r0, \reg1
+    orr         r3,  r3, r0
+.ifnb \reg2
+    eor         r1,  r1, \reg2
+    orr         r3,  r3, r1
+.endif
+.endm
+    check_reg   r4,  r5
+    check_reg   r6,  r7
+@ r9 is a volatile register in the ios ABI
+#ifdef __APPLE__
+    check_reg   r8
+#else
+    check_reg   r8,  r9
+#endif
+    check_reg   r10, r11
+.purgem check_reg
+
+    cmp         r3,  #0
+    beq         0f
+
+    movrel      r0, error_message
+    blx         X(checkasm_fail_func)
+0:
+    pop         {r0, r1}
+.ifc \variant, vfp
+    pop         {r2}
+    fmxr        FPSCR, r2
+    vpop        {d8-d15}
+.endif
+    pop         {r4-r11, pc}
+endfunc
+.endm
+
+#if HAVE_VFP || HAVE_NEON
+clobbercheck vfp
+#endif
+clobbercheck novfp
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index d6f8ffc..c75e431 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -53,6 +53,12 @@ 
 #define isatty(fd) 1
 #endif
 
+#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+#include "libavutil/arm/cpu.h"
+
+void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp;
+#endif
+
 /* List of tests to invoke */
 static const struct {
     const char *name;
@@ -463,6 +469,11 @@  int main(int argc, char *argv[])
 {
     int i, seed, ret = 0;
 
+#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+    if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags()))
+        checkasm_checked_call = checkasm_checked_call_vfp;
+#endif
+
     if (!tests[0].func || !cpus[0].flag) {
         fprintf(stderr, "checkasm: no tests to perform\n");
         return 0;
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 0bc66b9..2486cb5 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -116,6 +116,15 @@  void checkasm_stack_clobber(uint64_t clobber, ...);
                                              (void *)checkasm_checked_call;
 #define call_new(...) checked_call(func_new, __VA_ARGS__)
 #endif
+#elif ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+/* Use a dummy argument, to offset the real parameters by 2, not only 1.
+ * This makes sure that potential 8-byte-alignment of parameters is kept the same
+ * even when the extra parameters have been removed. */
+void checkasm_checked_call_vfp(void *func, int dummy, ...);
+void checkasm_checked_call_novfp(void *func, int dummy, ...);
+extern void (*checkasm_checked_call)(void *func, int dummy, ...);
+#define declare_new(ret, ...) ret (*checked_call)(void *, int dummy, __VA_ARGS__) = (void *)checkasm_checked_call;
+#define call_new(...) checked_call(func_new, 0, __VA_ARGS__)
 #else
 #define declare_new(ret, ...)
 #define declare_new_emms(cpu_flags, ret, ...)
@@ -123,6 +132,10 @@  void checkasm_stack_clobber(uint64_t clobber, ...);
 #define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
 #endif
 
+#ifndef declare_new_emms
+#define declare_new_emms(cpu_flags, ret, ...) declare_new(ret, __VA_ARGS__)
+#endif
+
 /* Benchmark the function */
 #ifdef AV_READ_TIME
 #define bench_new(...)\