[4/6] vp8: Change type of stride parameters to ptrdiff_t

Message ID 1472036543-32662-5-git-send-email-diego@biurrun.de
State New
Headers show

Commit Message

Diego Biurrun Aug. 24, 2016, 11:02 a.m.
This avoids SIMD-optimized functions having to sign-extend their
line size argument manually to be able to do pointer arithmetic.
---
 libavcodec/arm/vp8dsp_armv6.S        | 24 ++++++++++++------------
 libavcodec/vp8.c                     | 17 +++++++++--------
 libavcodec/vp8.h                     |  4 ++--
 libavcodec/x86/vp8dsp.asm            | 12 ++++++------
 libavcodec/x86/vp8dsp_loopfilter.asm |  6 +++---
 tests/checkasm/vp8dsp.c              |  4 ++--
 6 files changed, 34 insertions(+), 33 deletions(-)

Comments

Martin Storsjo Aug. 24, 2016, 11:08 a.m. | #1
On Wed, 24 Aug 2016, Diego Biurrun wrote:

> This avoids SIMD-optimized functions having to sign-extend their
> line size argument manually to be able to do pointer arithmetic.

Actually, the parameter already is ptrdiff_t in most places, which is why 
this patch of yours actually isn't doing anything of what your commit 
message says (simplifying asm by avoding sign extension - you never 
actually touch the asm here).

This was already mostly done in bd66f073fe7286bd3c, but only some of the 
function signatures in comments were left behind unchanged.

The patch itself probably is fine, but please make sure that the commit 
message reflects what the patch actually does.

// Martin

Patch

diff --git a/libavcodec/arm/vp8dsp_armv6.S b/libavcodec/arm/vp8dsp_armv6.S
index 565361e..9eb9734 100644
--- a/libavcodec/arm/vp8dsp_armv6.S
+++ b/libavcodec/arm/vp8dsp_armv6.S
@@ -192,7 +192,7 @@  function ff_vp8_luma_dc_wht_dc_armv6, export=1
         bx              lr
 endfunc
 
-@ void vp8_idct_add(uint8_t *dst, int16_t block[16], int stride)
+@ void vp8_idct_add(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
 function ff_vp8_idct_add_armv6, export=1
         push            {r4-r12, lr}
         sub             sp,  sp,  #32
@@ -314,7 +314,7 @@  function ff_vp8_idct_add_armv6, export=1
         pop             {r4-r12, pc}
 endfunc
 
-@ void vp8_idct_dc_add(uint8_t *dst, int16_t block[16], int stride)
+@ void vp8_idct_dc_add(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
 function ff_vp8_idct_dc_add_armv6, export=1
         push            {r4-r6, lr}
         add             r6,  r0,  r2,  lsl #1
@@ -355,7 +355,7 @@  function ff_vp8_idct_dc_add_armv6, export=1
         pop             {r4-r6, pc}
 endfunc
 
-@ void vp8_idct_dc_add4uv(uint8_t *dst, int16_t block[4][16], int stride)
+@ void vp8_idct_dc_add4uv(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
 function ff_vp8_idct_dc_add4uv_armv6, export=1
         push            {r4, lr}
 
@@ -371,7 +371,7 @@  function ff_vp8_idct_dc_add4uv_armv6, export=1
         pop             {r4, pc}
 endfunc
 
-@ void vp8_idct_dc_add4y(uint8_t *dst, int16_t block[4][16], int stride)
+@ void vp8_idct_dc_add4y(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
 function ff_vp8_idct_dc_add4y_armv6, export=1
         push            {r4, lr}
 
@@ -455,7 +455,7 @@  endfunc
         eor             r5,  r5,  r2            @ *oq0 = u ^ 0x80
 .endm
 
-@ void vp8_v_loop_filter16_simple(uint8_t *dst, int stride, int flim)
+@ void vp8_v_loop_filter16_simple(uint8_t *dst, ptrdiff_t stride, int flim)
 function ff_vp8_v_loop_filter16_simple_armv6, export=1
         push            {r4-r11, lr}
 
@@ -866,7 +866,7 @@  function ff_vp8_v_loop_filter8uv_armv6, export=1
         b               vp8_v_loop_filter_armv6
 endfunc
 
-@ void vp8_h_loop_filter16_simple(uint8_t *dst, int stride, int flim)
+@ void vp8_h_loop_filter16_simple(uint8_t *dst, ptrdiff_t stride, int flim)
 function ff_vp8_h_loop_filter16_simple_armv6, export=1
         push            {r4-r11, lr}
         orr             r12, r2,  r2,  lsl #16
@@ -1113,8 +1113,8 @@  endfunc
 
 @ MC
 
-@ void put_vp8_pixels16(uint8_t *dst, int dststride, uint8_t *src,
-@                       int srcstride, int h, int mx, int my)
+@ void put_vp8_pixels16(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
+@                       ptrdiff_t srcstride, int h, int mx, int my)
 function ff_put_vp8_pixels16_armv6, export=1
         push            {r4-r11}
         ldr             r12, [sp, #32]          @ h
@@ -1137,8 +1137,8 @@  function ff_put_vp8_pixels16_armv6, export=1
         bx              lr
 endfunc
 
-@ void put_vp8_pixels8(uint8_t *dst, int dststride, uint8_t *src,
-@                      int srcstride, int h, int mx, int my)
+@ void put_vp8_pixels8(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
+@                      ptrdiff_t srcstride, int h, int mx, int my)
 function ff_put_vp8_pixels8_armv6, export=1
         push            {r4-r11}
         ldr             r12, [sp, #32]          @ h
@@ -1161,8 +1161,8 @@  function ff_put_vp8_pixels8_armv6, export=1
         bx              lr
 endfunc
 
-@ void put_vp8_pixels4(uint8_t *dst, int dststride, uint8_t *src,
-@                      int srcstride, int h, int mx, int my)
+@ void put_vp8_pixels4(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
+@                      ptrdiff_t srcstride, int h, int mx, int my)
 function ff_put_vp8_pixels4_armv6, export=1
         ldr             r12, [sp, #0]           @ h
         push            {r4-r6,lr}
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index cd7f692..546124c 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -426,7 +426,7 @@  static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
 }
 
 static void fade(uint8_t *dst, uint8_t *src,
-                 int width, int height, int linesize,
+                 int width, int height, ptrdiff_t linesize,
                  int alpha, int beta)
 {
     int i, j;
@@ -1427,7 +1427,7 @@  void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
 static av_always_inline
 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
                       uint8_t *src_cb, uint8_t *src_cr,
-                      int linesize, int uvlinesize, int simple)
+                      ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
 {
     AV_COPY128(top_border, src_y + 15 * linesize);
     if (!simple) {
@@ -1438,7 +1438,7 @@  void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
 
 static av_always_inline
 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
-                    uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
+                    uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
                     int mb_y, int mb_width, int simple, int xchg)
 {
     uint8_t *top_border_m1 = top_border - 32;     // for TL prediction
@@ -1591,7 +1591,8 @@  void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
         for (y = 0; y < 4; y++) {
             uint8_t *topright = ptr + 4 - s->linesize;
             for (x = 0; x < 4; x++) {
-                int copy = 0, linesize = s->linesize;
+                int copy = 0;
+                ptrdiff_t linesize = s->linesize;
                 uint8_t *dst = ptr + 4 * x;
                 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
 
@@ -1697,7 +1698,7 @@  void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
     uint8_t *src = ref->f->data[0];
 
     if (AV_RN32A(mv)) {
-        int src_linesize = linesize;
+        ptrdiff_t src_linesize = linesize;
 
         int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
         int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
@@ -2041,8 +2042,8 @@  void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
     int filter_level = f->filter_level;
     int inner_limit = f->inner_limit;
     int inner_filter = f->inner_filter;
-    int linesize = s->linesize;
-    int uvlinesize = s->uvlinesize;
+    ptrdiff_t linesize   = s->linesize;
+    ptrdiff_t uvlinesize = s->uvlinesize;
     static const uint8_t hev_thresh_lut[2][64] = {
         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -2128,7 +2129,7 @@  void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
     int filter_level = f->filter_level;
     int inner_limit  = f->inner_limit;
     int inner_filter = f->inner_filter;
-    int linesize     = s->linesize;
+    ptrdiff_t linesize = s->linesize;
 
     if (!filter_level)
         return;
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index ba5e62a..65948e1 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -143,8 +143,8 @@  typedef struct VP8Context {
 
     uint16_t mb_width;   /* number of horizontal MB */
     uint16_t mb_height;  /* number of vertical MB */
-    int linesize;
-    int uvlinesize;
+    ptrdiff_t linesize;
+    ptrdiff_t uvlinesize;
 
     uint8_t keyframe;
     uint8_t deblock_filter;
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index e17d3b2..b0f6b83 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -156,8 +156,8 @@  SECTION .text
 ;-------------------------------------------------------------------------------
 ; subpel MC functions:
 ;
-; void ff_put_vp8_epel<size>_h<htap>v<vtap>_<opt>(uint8_t *dst, int deststride,
-;                                                 uint8_t *src, int srcstride,
+; void ff_put_vp8_epel<size>_h<htap>v<vtap>_<opt>(uint8_t *dst, ptrdiff_t deststride,
+;                                                 uint8_t *src, ptrdiff_t srcstride,
 ;                                                 int height,   int mx, int my);
 ;-------------------------------------------------------------------------------
 
@@ -884,7 +884,7 @@  cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
     REP_RET
 
 ;-----------------------------------------------------------------------------
-; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], int stride);
+; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
 ;-----------------------------------------------------------------------------
 
 %macro ADD_DC 4
@@ -962,7 +962,7 @@  cglobal vp8_idct_dc_add, 3, 3, 6, dst, block, stride
     RET
 
 ;-----------------------------------------------------------------------------
-; void ff_vp8_idct_dc_add4y_<opt>(uint8_t *dst, int16_t block[4][16], int stride);
+; void ff_vp8_idct_dc_add4y_<opt>(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
 ;-----------------------------------------------------------------------------
 
 %if ARCH_X86_32
@@ -1035,7 +1035,7 @@  cglobal vp8_idct_dc_add4y, 3, 3, 6, dst, block, stride
     RET
 
 ;-----------------------------------------------------------------------------
-; void ff_vp8_idct_dc_add4uv_<opt>(uint8_t *dst, int16_t block[4][16], int stride);
+; void ff_vp8_idct_dc_add4uv_<opt>(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride);
 ;-----------------------------------------------------------------------------
 
 INIT_MMX mmx
@@ -1077,7 +1077,7 @@  cglobal vp8_idct_dc_add4uv, 3, 3, 0, dst, block, stride
     RET
 
 ;-----------------------------------------------------------------------------
-; void ff_vp8_idct_add_<opt>(uint8_t *dst, int16_t block[16], int stride);
+; void ff_vp8_idct_add_<opt>(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
 ;-----------------------------------------------------------------------------
 
 ; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2)
diff --git a/libavcodec/x86/vp8dsp_loopfilter.asm b/libavcodec/x86/vp8dsp_loopfilter.asm
index 5d792e8..9ffd83a 100644
--- a/libavcodec/x86/vp8dsp_loopfilter.asm
+++ b/libavcodec/x86/vp8dsp_loopfilter.asm
@@ -43,7 +43,7 @@  cextern pb_80
 SECTION .text
 
 ;-----------------------------------------------------------------------------
-; void ff_vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim);
+; void ff_vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, ptrdiff_t stride, int flim);
 ;-----------------------------------------------------------------------------
 
 ; macro called with 7 mm register indexes as argument, and 4 regular registers
@@ -429,7 +429,7 @@  INIT_XMM sse4
 SIMPLE_LOOPFILTER h, 5
 
 ;-----------------------------------------------------------------------------
-; void ff_vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
+; void ff_vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] ptrdiff_t stride,
 ;                                               int flimE, int flimI, int hev_thr);
 ;-----------------------------------------------------------------------------
 
@@ -921,7 +921,7 @@  INNER_LOOPFILTER v,  8
 INNER_LOOPFILTER h,  8
 
 ;-----------------------------------------------------------------------------
-; void ff_vp8_h/v_loop_filter<size>_mbedge_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
+; void ff_vp8_h/v_loop_filter<size>_mbedge_<opt>(uint8_t *dst, [uint8_t *v,] ptrdiff_t stride,
 ;                                                int flimE, int flimI, int hev_thr);
 ;-----------------------------------------------------------------------------
 
diff --git a/tests/checkasm/vp8dsp.c b/tests/checkasm/vp8dsp.c
index 0260d63..40e4061 100644
--- a/tests/checkasm/vp8dsp.c
+++ b/tests/checkasm/vp8dsp.c
@@ -171,7 +171,7 @@  static void check_idct_dc4(void)
     for (chroma = 0; chroma <= 1; chroma++) {
         void (*idct4dc)(uint8_t *, int16_t[4][16], ptrdiff_t) = chroma ? d.vp8_idct_dc_add4uv : d.vp8_idct_dc_add4y;
         if (check_func(idct4dc, "vp8_idct_dc_add4%s", chroma ? "uv" : "y")) {
-            int stride = chroma ? 8 : 16;
+            ptrdiff_t stride = chroma ? 8 : 16;
             int w      = chroma ? 2 : 4;
             for (i = 0; i < 4; i++) {
                 int blockx = 4 * (i % w);
@@ -365,7 +365,7 @@  static void randomize_loopfilter_buffers(int lineoff, int str,
 }
 
 // Fill the buffer with random pixels
-static void fill_loopfilter_buffers(uint8_t *buf, int stride, int w, int h)
+static void fill_loopfilter_buffers(uint8_t *buf, ptrdiff_t stride, int w, int h)
 {
     int x, y;
     for (y = 0; y < h; y++)