[1/1] h264/x86: sign extend int stride in deblock functions

Message ID 20190127101841.25995-1-janne-libav@jannau.net
State Committed
Commit 156ea66c91b1986a87916f187216978d686725f6
Headers show
Series
  • [1/1] h264/x86: sign extend int stride in deblock functions
Related show

Commit Message

Janne Grunau Jan. 27, 2019, 10:18 a.m.
Fixes checkasm errors after adding the h264 deblock tests.
---
 libavcodec/x86/h264_deblock.asm       | 8 ++++++++
 libavcodec/x86/h264_deblock_10bit.asm | 9 +++++++++
 2 files changed, 17 insertions(+)

Comments

Diego Biurrun Jan. 27, 2019, 10:39 a.m. | #1
On Sun, Jan 27, 2019 at 11:18:41AM +0100, Janne Grunau wrote:
> Fixes checkasm errors after adding the h264 deblock tests.
> ---
>  libavcodec/x86/h264_deblock.asm       | 8 ++++++++
>  libavcodec/x86/h264_deblock_10bit.asm | 9 +++++++++
>  2 files changed, 17 insertions(+)

Shouldn't some int types be converted to ptrdiff_t instead?

Diego
Janne Grunau Jan. 27, 2019, 11:05 a.m. | #2
On 2019-01-27 11:39:13 +0100, Diego Biurrun wrote:
> On Sun, Jan 27, 2019 at 11:18:41AM +0100, Janne Grunau wrote:
> > Fixes checkasm errors after adding the h264 deblock tests.
> > ---
> >  libavcodec/x86/h264_deblock.asm       | 8 ++++++++
> >  libavcodec/x86/h264_deblock_10bit.asm | 9 +++++++++
> >  2 files changed, 17 insertions(+)
> 
> Shouldn't some int types be converted to ptrdiff_t instead?

that would be another possible solution but h264 seems to use int for 
stride consistently. So changing stride to ptrdiff_t is much more effort
and riskier since it touches more functions and architectures.

Janne
Martin Storsjö Jan. 27, 2019, 7:22 p.m. | #3
On Sun, 27 Jan 2019, Janne Grunau wrote:

> Fixes checkasm errors after adding the h264 deblock tests.
> ---
> libavcodec/x86/h264_deblock.asm       | 8 ++++++++
> libavcodec/x86/h264_deblock_10bit.asm | 9 +++++++++
> 2 files changed, 17 insertions(+)

Ok with me.

Yes, changing the prototypes to use ptrdiff_t instead of int would be 
good, but I think it's better to get tests back to green instead of 
blocking the fix by demanding the larger refactoring right now.

// Martin

Patch

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 33fd5a9dd7..4b9cf85d16 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -288,6 +288,7 @@  cextern pb_3
 ;-----------------------------------------------------------------------------
 %macro DEBLOCK_LUMA 0
 cglobal deblock_v_luma_8, 5,5,10
+    movsxdifnidn  r1, r1d
     movd    m8, [r4] ; tc0
     lea     r4, [r1*3]
     dec     r2d        ; alpha-1
@@ -335,6 +336,7 @@  cglobal deblock_v_luma_8, 5,5,10
 INIT_MMX cpuname
 cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
     movsxd r7,  r1d
+    movsxdifnidn  r1, r1d
     lea    r8,  [r7+r7*2]
     lea    r6,  [r0-4]
     lea    r5,  [r0-4+r8]
@@ -395,6 +397,7 @@  DEBLOCK_LUMA
 ;                         int8_t *tc0)
 ;-----------------------------------------------------------------------------
 cglobal deblock_%1_luma_8, 5,5,8,2*%2
+    movsxdifnidn  r1, r1d
     lea     r4, [r1*3]
     dec     r2     ; alpha-1
     neg     r4
@@ -445,6 +448,7 @@  cglobal deblock_%1_luma_8, 5,5,8,2*%2
 ;-----------------------------------------------------------------------------
 INIT_MMX cpuname
 cglobal deblock_h_luma_8, 0,5,8,0x60+12
+    movsxdifnidn  r1, r1d
     mov    r0, r0mp
     mov    r3, r1m
     lea    r4, [r3*3]
@@ -646,6 +650,7 @@  cglobal deblock_%1_luma_intra_8, 4,6,16,0x10
 %else
 cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
 %endif
+    movsxdifnidn  r1, r1d
     lea     r4, [r1*4]
     lea     r5, [r1*3] ; 3*stride
     dec     r2d        ; alpha-1
@@ -703,6 +708,7 @@  INIT_MMX cpuname
 ;-----------------------------------------------------------------------------
 cglobal deblock_h_luma_intra_8, 4,9,0,0x80
     movsxd r7,  r1d
+    movsxdifnidn  r1, r1d
     lea    r8,  [r7*3]
     lea    r6,  [r0-4]
     lea    r5,  [r0-4+r8]
@@ -782,6 +788,7 @@  DEBLOCK_LUMA_INTRA v8
 INIT_MMX mmxext
 
 %macro CHROMA_V_START 0
+    movsxdifnidn  r1, r1d
     dec    r2d      ; alpha-1
     dec    r3d      ; beta-1
     mov    t5, r0
@@ -790,6 +797,7 @@  INIT_MMX mmxext
 %endmacro
 
 %macro CHROMA_H_START 0
+    movsxdifnidn  r1, r1d
     dec    r2d
     dec    r3d
     sub    r0, 2
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index d049c62bf2..1a424b7f43 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -162,6 +162,7 @@  cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
     %define ms2 [rsp+mmsize*2]
     %define am  [rsp+mmsize*3]
     %define bm  [rsp+mmsize*4]
+    movsxdifnidn  r1, r1d
     SUB        rsp, pad
     shl        r2d, 2
     shl        r3d, 2
@@ -219,6 +220,7 @@  cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
     %define p2m [rsp+mmsize*4]
     %define am  [rsp+mmsize*5]
     %define bm  [rsp+mmsize*6]
+    movsxdifnidn  r1, r1d
     SUB        rsp, pad
     shl        r2d, 2
     shl        r3d, 2
@@ -349,6 +351,7 @@  cglobal deblock_v_luma_10, 5,5,15
     %define mask0 m7
     %define mask1 m10
     %define mask2 m11
+    movsxdifnidn  r1, r1d
     shl        r2d, 2
     shl        r3d, 2
     LOAD_AB    m12, m13, r2d, r3d
@@ -377,6 +380,7 @@  cglobal deblock_v_luma_10, 5,5,15
     REP_RET
 
 cglobal deblock_h_luma_10, 5,7,15
+    movsxdifnidn  r1, r1d
     shl        r2d, 2
     shl        r3d, 2
     LOAD_AB    m12, m13, r2d, r3d
@@ -492,6 +496,7 @@  DEBLOCK_LUMA_64
     CAT_XDEFINE t, i, [rsp+mmsize*(i-4)]
     %assign i i+1
 %endrep
+    movsxdifnidn  r1, r1d
     SUB    rsp, pad
 %endmacro
 
@@ -615,6 +620,7 @@  cglobal deblock_v_luma_intra_10, 4,7,16
     %define q2 m13
     %define aa m5
     %define bb m14
+    movsxdifnidn  r1, r1d
     lea     r4, [r1*4]
     lea     r5, [r1*3] ; 3*stride
     neg     r4
@@ -668,6 +674,7 @@  cglobal deblock_h_luma_intra_10, 4,7,16
     %define p3 m4
     %define spill [rsp]
     %assign pad 24-(stack_offset&15)
+    movsxdifnidn  r1, r1d
     SUB     rsp, pad
     lea     r4, [r1*4]
     lea     r5, [r1*3] ; 3*stride
@@ -852,6 +859,7 @@  DEBLOCK_LUMA_INTRA
 ;                             int8_t *tc0)
 ;-----------------------------------------------------------------------------
 cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
+    movsxdifnidn  r1, r1d
     mov         r5, r0
     sub         r0, r1
     sub         r0, r1
@@ -887,6 +895,7 @@  cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
 ;                                   int beta)
 ;-----------------------------------------------------------------------------
 cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
+    movsxdifnidn  r1, r1d
     mov         r4, r0
     sub         r0, r1
     sub         r0, r1