vp3: Change type of stride parameters to ptrdiff_t

Message ID 1472113830-20081-1-git-send-email-diego@biurrun.de
State New
Headers show

Commit Message

Diego Biurrun Aug. 25, 2016, 8:30 a.m.
This avoids SIMD-optimized functions having to sign-extend their
stride argument manually to be able to do pointer arithmetic.

Also adjust parameter names to be "stride" everywhere.
---

Log message updated.

 libavcodec/arm/vp3dsp_init_arm.c |  6 +++---
 libavcodec/ppc/vp3dsp_altivec.c  |  4 ++--
 libavcodec/vp3dsp.c              | 20 ++++++++++----------
 libavcodec/vp3dsp.h              | 10 +++++-----
 libavcodec/x86/vp3dsp.asm        | 11 -----------
 libavcodec/x86/vp3dsp_init.c     | 15 +++++++--------
 6 files changed, 27 insertions(+), 39 deletions(-)

Comments

Martin Storsjo Aug. 25, 2016, 8:37 a.m. | #1
On Thu, 25 Aug 2016, Diego Biurrun wrote:

> This avoids SIMD-optimized functions having to sign-extend their
> stride argument manually to be able to do pointer arithmetic.
>
> Also adjust parameter names to be "stride" everywhere.
> ---
>
> Log message updated.
>
> libavcodec/arm/vp3dsp_init_arm.c |  6 +++---
> libavcodec/ppc/vp3dsp_altivec.c  |  4 ++--
> libavcodec/vp3dsp.c              | 20 ++++++++++----------
> libavcodec/vp3dsp.h              | 10 +++++-----
> libavcodec/x86/vp3dsp.asm        | 11 -----------
> libavcodec/x86/vp3dsp_init.c     | 15 +++++++--------
> 6 files changed, 27 insertions(+), 39 deletions(-)

Ok

// Martin

Patch

diff --git a/libavcodec/arm/vp3dsp_init_arm.c b/libavcodec/arm/vp3dsp_init_arm.c
index 11e1f1c..1c91434 100644
--- a/libavcodec/arm/vp3dsp_init_arm.c
+++ b/libavcodec/arm/vp3dsp_init_arm.c
@@ -23,9 +23,9 @@ 
 #include "libavutil/arm/cpu.h"
 #include "libavcodec/vp3dsp.h"
 
-void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, int16_t *data);
-void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);
-void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, int16_t *data);
+void ff_vp3_idct_put_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data);
+void ff_vp3_idct_add_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data);
+void ff_vp3_idct_dc_add_neon(uint8_t *dest, ptrdiff_t stride, int16_t *data);
 
 void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *);
 void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
diff --git a/libavcodec/ppc/vp3dsp_altivec.c b/libavcodec/ppc/vp3dsp_altivec.c
index 68e7102..1d907d7 100644
--- a/libavcodec/ppc/vp3dsp_altivec.c
+++ b/libavcodec/ppc/vp3dsp_altivec.c
@@ -114,7 +114,7 @@  static inline vec_s16 M16(vec_s16 a, vec_s16 C)
 #define ADD8(a) vec_add(a, eight)
 #define SHIFT4(a) vec_sra(a, four)
 
-static void vp3_idct_put_altivec(uint8_t *dst, int stride, int16_t block[64])
+static void vp3_idct_put_altivec(uint8_t *dst, ptrdiff_t stride, int16_t block[64])
 {
     vec_u8 t;
     IDCT_START
@@ -143,7 +143,7 @@  static void vp3_idct_put_altivec(uint8_t *dst, int stride, int16_t block[64])
     memset(block, 0, sizeof(*block) * 64);
 }
 
-static void vp3_idct_add_altivec(uint8_t *dst, int stride, int16_t block[64])
+static void vp3_idct_add_altivec(uint8_t *dst, ptrdiff_t stride, int16_t block[64])
 {
     LOAD_ZERO;
     vec_u8 t, vdst;
diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c
index ab65f2b..459441e 100644
--- a/libavcodec/vp3dsp.c
+++ b/libavcodec/vp3dsp.c
@@ -44,7 +44,7 @@ 
 
 #define M(a, b) (((a) * (b)) >> 16)
 
-static av_always_inline void idct(uint8_t *dst, int stride,
+static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride,
                                   int16_t *input, int type)
 {
     int16_t *ip = input;
@@ -195,21 +195,21 @@  static av_always_inline void idct(uint8_t *dst, int stride,
     }
 }
 
-static void vp3_idct_put_c(uint8_t *dest /* align 8 */, int line_size,
+static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
                            int16_t *block /* align 16 */)
 {
-    idct(dest, line_size, block, 1);
+    idct(dest, stride, block, 1);
     memset(block, 0, sizeof(*block) * 64);
 }
 
-static void vp3_idct_add_c(uint8_t *dest /* align 8 */, int line_size,
+static void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
                            int16_t *block /* align 16 */)
 {
-    idct(dest, line_size, block, 2);
+    idct(dest, stride, block, 2);
     memset(block, 0, sizeof(*block) * 64);
 }
 
-static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size,
+static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
                               int16_t *block /* align 16 */)
 {
     int i, dc = (block[0] + 15) >> 5;
@@ -223,17 +223,17 @@  static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size,
         dest[5] = av_clip_uint8(dest[5] + dc);
         dest[6] = av_clip_uint8(dest[6] + dc);
         dest[7] = av_clip_uint8(dest[7] + dc);
-        dest   += line_size;
+        dest   += stride;
     }
     block[0] = 0;
 }
 
-static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
+static void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
                                 int *bounding_values)
 {
     unsigned char *end;
     int filter_value;
-    const int nstride = -stride;
+    const ptrdiff_t nstride = -stride;
 
     for (end = first_pixel + 8; first_pixel < end; first_pixel++) {
         filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) +
@@ -245,7 +245,7 @@  static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
     }
 }
 
-static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride,
+static void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
                                 int *bounding_values)
 {
     unsigned char *end;
diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h
index 3099a7e..010f905 100644
--- a/libavcodec/vp3dsp.h
+++ b/libavcodec/vp3dsp.h
@@ -38,11 +38,11 @@  typedef struct VP3DSPContext {
                                  const uint8_t *b,
                                  ptrdiff_t stride, int h);
 
-    void (*idct_put)(uint8_t *dest, int line_size, int16_t *block);
-    void (*idct_add)(uint8_t *dest, int line_size, int16_t *block);
-    void (*idct_dc_add)(uint8_t *dest, int line_size, int16_t *block);
-    void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
-    void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+    void (*idct_put)(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+    void (*idct_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+    void (*idct_dc_add)(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+    void (*v_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values);
+    void (*h_loop_filter)(uint8_t *src, ptrdiff_t stride, int *bounding_values);
 } VP3DSPContext;
 
 void ff_vp3dsp_init(VP3DSPContext *c, int flags);
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index fc8a047..8587741 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -104,9 +104,6 @@  SECTION .text
 
 INIT_MMX mmxext
 cglobal vp3_v_loop_filter, 3, 4
-%if ARCH_X86_64
-    movsxd        r1, r1d
-%endif
     mov           r3, r1
     neg           r1
     movq          m6, [r0+r1*2]
@@ -121,9 +118,6 @@  cglobal vp3_v_loop_filter, 3, 4
     RET
 
 cglobal vp3_h_loop_filter, 3, 4
-%if ARCH_X86_64
-    movsxd        r1, r1d
-%endif
     lea           r3, [r1*3]
 
     movd          m6, [r0     -2]
@@ -525,7 +519,6 @@  cglobal vp3_h_loop_filter, 3, 4
 cglobal vp3_idct_put, 3, 4, 9
     VP3_IDCT      r2
 
-    movsxdifnidn  r1, r1d
     mova          m4, [pb_80]
     lea           r3, [r1*3]
 %assign %%i 0
@@ -582,7 +575,6 @@  cglobal vp3_idct_put, 3, 4, 9
 cglobal vp3_idct_add, 3, 4, 9
     VP3_IDCT      r2
 
-    movsxdifnidn  r1, r1d
     lea           r3, [r1*3]
     pxor          m4, m4
 %if mmsize == 16
@@ -689,9 +681,6 @@  vp3_idct_funcs
 
 INIT_MMX mmxext
 cglobal vp3_idct_dc_add, 3, 4
-%if ARCH_X86_64
-    movsxd        r1, r1d
-%endif
     movsx         r3, word [r2]
     mov    word [r2], 0
     lea           r2, [r1*3]
diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c
index b320dc5..043e10f 100644
--- a/libavcodec/x86/vp3dsp_init.c
+++ b/libavcodec/x86/vp3dsp_init.c
@@ -25,18 +25,17 @@ 
 #include "libavcodec/vp3dsp.h"
 #include "config.h"
 
-void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block);
-void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);
+void ff_vp3_idct_put_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_vp3_idct_add_mmx(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 
-void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, int16_t *block);
-void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, int16_t *block);
+void ff_vp3_idct_put_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block);
+void ff_vp3_idct_add_sse2(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 
-void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, int line_size,
-                               int16_t *block);
+void ff_vp3_idct_dc_add_mmxext(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 
-void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride,
+void ff_vp3_v_loop_filter_mmxext(uint8_t *src, ptrdiff_t stride,
                                  int *bounding_values);
-void ff_vp3_h_loop_filter_mmxext(uint8_t *src, int stride,
+void ff_vp3_h_loop_filter_mmxext(uint8_t *src, ptrdiff_t stride,
                                  int *bounding_values);
 
 av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)