[06/14] rv34: Change type of array stride parameters to ptrdiff_t

Message ID 1474396595-14910-6-git-send-email-diego@biurrun.de
State New
Headers show

Commit Message

Diego Biurrun Sept. 20, 2016, 6:36 p.m.
This avoids SIMD-optimized functions having to sign-extend their
stride argument manually to be able to do pointer arithmetic.
---
 libavcodec/arm/rv34dsp_neon.S |  4 +--
 libavcodec/arm/rv40dsp_neon.S |  4 +--
 libavcodec/rv30.c             |  4 +--
 libavcodec/rv30dsp.c          | 64 +++++++++++++++++++++++++++++++++++--------
 libavcodec/rv34.c             | 10 ++++---
 libavcodec/rv34.h             |  2 +-
 libavcodec/rv40.c             |  2 +-
 libavcodec/rv40dsp.c          | 18 +++++++-----
 libavcodec/x86/rv34dsp.asm    |  4 +--
 libavcodec/x86/rv40dsp.asm    | 11 +++-----
 libavcodec/x86/rv40dsp_init.c |  4 +--
 11 files changed, 85 insertions(+), 42 deletions(-)

Comments

Martin Storsjo Sept. 29, 2016, 11:21 a.m. | #1
On Tue, 20 Sep 2016, Diego Biurrun wrote:

> This avoids SIMD-optimized functions having to sign-extend their
> stride argument manually to be able to do pointer arithmetic.
> ---
> libavcodec/arm/rv34dsp_neon.S |  4 +--
> libavcodec/arm/rv40dsp_neon.S |  4 +--
> libavcodec/rv30.c             |  4 +--
> libavcodec/rv30dsp.c          | 64 +++++++++++++++++++++++++++++++++++--------
> libavcodec/rv34.c             | 10 ++++---
> libavcodec/rv34.h             |  2 +-
> libavcodec/rv40.c             |  2 +-
> libavcodec/rv40dsp.c          | 18 +++++++-----
> libavcodec/x86/rv34dsp.asm    |  4 +--
> libavcodec/x86/rv40dsp.asm    | 11 +++-----
> libavcodec/x86/rv40dsp_init.c |  4 +--
> 11 files changed, 85 insertions(+), 42 deletions(-)

> diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c
> index 4ca5cc7..64d9f2e 100644
> --- a/libavcodec/rv40dsp.c
> +++ b/libavcodec/rv40dsp.c
> @@ -34,7 +34,8 @@
> #include "rv34dsp.h"
>
> #define RV40_LOWPASS(OPNAME, OP) \
> -static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
> +static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, \
> +                                           ptrdiff_t dstStride, ptrdiff_t srcStride, \
>                                                      const int h, const int C1, const int C2, const int SHIFT){\

Please fix the alignment of this line as well - I don't mind if you do it 
in the same patch.

> diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
> index 77f6ddb..3a1f2b5 100644
> --- a/libavcodec/x86/rv40dsp.asm
> +++ b/libavcodec/x86/rv40dsp.asm
> @@ -77,14 +77,11 @@ SECTION .text
> ;-----------------------------------------------------------------------------
> ; subpel MC functions:
> ;
> -; void ff_[put|rv40]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, int deststride,
> -;                                          uint8_t *src, int srcstride,
> -;                                          int len, int m);
> +; void ff_[put|avg]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, ptrdiff_t deststride,
> +;                                         uint8_t *src, ptrdiff_t srcstride,
> +;                                         int len, ptrdiff_t m);
> ;----------------------------------------------------------------------
> %macro LOAD  2
> -%if WIN64
> -   movsxd   %1q, %1d
> -%endif
> %ifdef PIC
>    add      %1q, picregq
> %else
> @@ -438,7 +435,7 @@ FILTER_SSSE3  avg
>
> %endmacro
>
> -; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)
> +; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)
> ; %1=size  %2=num of xmm regs
> ; The weights are FP0.14 notation of fractions depending on pts.
> ; For timebases without rounding error (i.e. PAL), the fractions

So, the only existing sign extension this actually changes is for the "int 
m" parameter. I don't see where the existing code does sign extension for 
srcstride nor dststride anywhere. Isn't that a latent bug, that you're 
fixing silently?

In that case, please first explicitly fix the bug by introducing the right 
sign extensions (which is cherrypickable to release branches), then remove 
them in this patch.

// Martin
Diego Biurrun Sept. 29, 2016, 3:52 p.m. | #2
On Thu, Sep 29, 2016 at 02:21:17PM +0300, Martin Storsjö wrote:
> On Tue, 20 Sep 2016, Diego Biurrun wrote:
> 
> > This avoids SIMD-optimized functions having to sign-extend their
> > stride argument manually to be able to do pointer arithmetic.
> > ---
> > libavcodec/arm/rv34dsp_neon.S |  4 +--
> > libavcodec/arm/rv40dsp_neon.S |  4 +--
> > libavcodec/rv30.c             |  4 +--
> > libavcodec/rv30dsp.c          | 64 +++++++++++++++++++++++++++++++++++--------
> > libavcodec/rv34.c             | 10 ++++---
> > libavcodec/rv34.h             |  2 +-
> > libavcodec/rv40.c             |  2 +-
> > libavcodec/rv40dsp.c          | 18 +++++++-----
> > libavcodec/x86/rv34dsp.asm    |  4 +--
> > libavcodec/x86/rv40dsp.asm    | 11 +++-----
> > libavcodec/x86/rv40dsp_init.c |  4 +--
> > 11 files changed, 85 insertions(+), 42 deletions(-)
> > --- a/libavcodec/rv40dsp.c
> > +++ b/libavcodec/rv40dsp.c
> > @@ -34,7 +34,8 @@
> >
> > #define RV40_LOWPASS(OPNAME, OP) \
> > -static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
> > +static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, \
> > +                                           ptrdiff_t dstStride, ptrdiff_t srcStride, \
> >                                                      const int h, const int C1, const int C2, const int SHIFT){\
> 
> Please fix the alignment of this line as well - I don't mind if you do it 
> in the same patch.

Changed locally.

> > --- a/libavcodec/x86/rv40dsp.asm
> > +++ b/libavcodec/x86/rv40dsp.asm
> > @@ -77,14 +77,11 @@ SECTION .text
> > ;-----------------------------------------------------------------------------
> > ; subpel MC functions:
> > ;
> > -; void ff_[put|rv40]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, int deststride,
> > -;                                          uint8_t *src, int srcstride,
> > -;                                          int len, int m);
> > +; void ff_[put|avg]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, ptrdiff_t deststride,
> > +;                                         uint8_t *src, ptrdiff_t srcstride,
> > +;                                         int len, ptrdiff_t m);
> > ;----------------------------------------------------------------------
> > %macro LOAD  2
> > -%if WIN64
> > -   movsxd   %1q, %1d
> > -%endif
> > %ifdef PIC
> >    add      %1q, picregq
> > %else
> > @@ -438,7 +435,7 @@ FILTER_SSSE3  avg
> >
> > %endmacro
> >
> > -; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)
> > +; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)
> > ; %1=size  %2=num of xmm regs
> > ; The weights are FP0.14 notation of fractions depending on pts.
> > ; For timebases without rounding error (i.e. PAL), the fractions
> 
> So, the only existing sign extension this actually changes is for the "int 
> m" parameter. I don't see where the existing code does sign extension for 
> srcstride nor dststride anywhere. Isn't that a latent bug, that you're 
> fixing silently?

All of the int strides are latent bugs. This file only does sign extension
for WIN64, for one function parameter. Possibly that was the only
environment that triggered an actual, visible bug, I don't know.

> In that case, please first explicitly fix the bug by introducing the right
> sign extensions (which is cherrypickable to release branches), then remove
> them in this patch.

This sounds like overkill to me as we don't know if any real-world
samples are affected. If any such cases creep up I'll gladly add the
necessary sign extension for release branches.

Diego
Janne Grunau Oct. 6, 2016, 7:39 a.m. | #3
On 2016-09-29 17:52:01 +0200, Diego Biurrun wrote:
> On Thu, Sep 29, 2016 at 02:21:17PM +0300, Martin Storsjö wrote:
> > On Tue, 20 Sep 2016, Diego Biurrun wrote:
> > 
> > > This avoids SIMD-optimized functions having to sign-extend their
> > > stride argument manually to be able to do pointer arithmetic.
> > > ---
> > > libavcodec/arm/rv34dsp_neon.S |  4 +--
> > > libavcodec/arm/rv40dsp_neon.S |  4 +--
> > > libavcodec/rv30.c             |  4 +--
> > > libavcodec/rv30dsp.c          | 64 +++++++++++++++++++++++++++++++++++--------
> > > libavcodec/rv34.c             | 10 ++++---
> > > libavcodec/rv34.h             |  2 +-
> > > libavcodec/rv40.c             |  2 +-
> > > libavcodec/rv40dsp.c          | 18 +++++++-----
> > > libavcodec/x86/rv34dsp.asm    |  4 +--
> > > libavcodec/x86/rv40dsp.asm    | 11 +++-----
> > > libavcodec/x86/rv40dsp_init.c |  4 +--
> > > 11 files changed, 85 insertions(+), 42 deletions(-)
> > > --- a/libavcodec/rv40dsp.c
> > > +++ b/libavcodec/rv40dsp.c
> > > @@ -34,7 +34,8 @@
> > >
> > > --- a/libavcodec/x86/rv40dsp.asm
> > > +++ b/libavcodec/x86/rv40dsp.asm
> > > @@ -77,14 +77,11 @@ SECTION .text
> > > ;-----------------------------------------------------------------------------
> > > ; subpel MC functions:
> > > ;
> > > -; void ff_[put|rv40]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, int deststride,
> > > -;                                          uint8_t *src, int srcstride,
> > > -;                                          int len, int m);
> > > +; void ff_[put|avg]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, ptrdiff_t deststride,
> > > +;                                         uint8_t *src, ptrdiff_t srcstride,
> > > +;                                         int len, ptrdiff_t m);
> > > ;----------------------------------------------------------------------
> > > %macro LOAD  2
> > > -%if WIN64
> > > -   movsxd   %1q, %1d
> > > -%endif
> > > %ifdef PIC
> > >    add      %1q, picregq
> > > %else
> > > @@ -438,7 +435,7 @@ FILTER_SSSE3  avg
> > >
> > > %endmacro
> > >
> > > -; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)
> > > +; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)
> > > ; %1=size  %2=num of xmm regs
> > > ; The weights are FP0.14 notation of fractions depending on pts.
> > > ; For timebases without rounding error (i.e. PAL), the fractions
> > 
> > So, the only existing sign extension this actually changes is for the "int 
> > m" parameter. I don't see where the existing code does sign extension for 
> > srcstride nor dststride anywhere. Isn't that a latent bug, that you're 
> > fixing silently?
> 
> All of the int strides are latent bugs. This file only does sign extension
> for WIN64, for one function parameter. Possibly that was the only
> environment that triggered an actual, visible bug, I don't know.

the difference is that the m parameter is in some macro instances 
negative. I'm not sure why it made a difference on win64 but it relies 
on non windows 64-bit systems that the parameter is sign extended to 
64-bit in the register.

> > In that case, please first explicitly fix the bug by introducing the right
> > sign extensions (which is cherrypickable to release branches), then remove
> > them in this patch.
> 
> This sounds like overkill to me as we don't know if any real-world
> samples are affected. If any such cases creep up I'll gladly add the
> necessary sign extension for release branches.

Please fix this in a separate patch. It differs from the other stride ptrdiff_t 
changes and affects only x86.

Janne

Patch

diff --git a/libavcodec/arm/rv34dsp_neon.S b/libavcodec/arm/rv34dsp_neon.S
index a29123f..06747f4 100644
--- a/libavcodec/arm/rv34dsp_neon.S
+++ b/libavcodec/arm/rv34dsp_neon.S
@@ -67,7 +67,7 @@ 
         vsub.s32        q15, q14, q9    @ z0 - z3
 .endm
 
-/* void rv34_idct_add_c(uint8_t *dst, int stride, int16_t *block) */
+/* void rv34_idct_add_c(uint8_t *dst, ptrdiff_t stride, int16_t *block) */
 function ff_rv34_idct_add_neon, export=1
         mov             r3,  r0
         rv34_inv_transform   r2
@@ -119,7 +119,7 @@  function ff_rv34_inv_transform_noround_neon, export=1
         bx              lr
 endfunc
 
-/* void ff_rv34_idct_dc_add_neon(uint8_t *dst, int stride, int dc) */
+/* void ff_rv34_idct_dc_add_neon(uint8_t *dst, ptrdiff_t stride, int dc) */
 function ff_rv34_idct_dc_add_neon, export=1
         mov             r3,  r0
         vld1.32         {d28[]},  [r0,:32], r1
diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S
index 6bd45eb..1facfb8 100644
--- a/libavcodec/arm/rv40dsp_neon.S
+++ b/libavcodec/arm/rv40dsp_neon.S
@@ -687,7 +687,7 @@  endfunc
 .endm
 
 /* void ff_rv40_weight_func_16_neon(uint8_t *dst, uint8_t *src1, uint8_t *src2,
-                                    int w1, int w2, int stride) */
+                                    int w1, int w2, ptrdiff_t stride) */
 function ff_rv40_weight_func_16_neon, export=1
         ldr             r12, [sp]
         vmov            d0,  r3,  r12
@@ -704,7 +704,7 @@  function ff_rv40_weight_func_16_neon, export=1
 endfunc
 
 /* void ff_rv40_weight_func_8_neon(uint8_t *dst, uint8_t *src1, uint8_t *src2,
-                                   int w1, int w2, int stride) */
+                                   int w1, int w2, ptrdiff_t stride) */
 function ff_rv40_weight_func_8_neon, export=1
         ldr             r12, [sp]
         vmov            d0,  r3,  r12
diff --git a/libavcodec/rv30.c b/libavcodec/rv30.c
index 7218fa3..d549eb7 100644
--- a/libavcodec/rv30.c
+++ b/libavcodec/rv30.c
@@ -125,8 +125,8 @@  static int rv30_decode_mb_info(RV34DecContext *r)
         return rv30_b_types[code];
 }
 
-static inline void rv30_weak_loop_filter(uint8_t *src, const int step,
-                                         const int stride, const int lim)
+static inline void rv30_weak_loop_filter(uint8_t *src, const ptrdiff_t step,
+                                         const ptrdiff_t stride, const int lim)
 {
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
     int i, diff;
diff --git a/libavcodec/rv30dsp.c b/libavcodec/rv30dsp.c
index 50f4186..d76b374 100644
--- a/libavcodec/rv30dsp.c
+++ b/libavcodec/rv30dsp.c
@@ -31,7 +31,11 @@ 
 #include "rv34dsp.h"
 
 #define RV30_LOWPASS(OPNAME, OP) \
-static void OPNAME ## rv30_tpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\
+static void OPNAME ## rv30_tpel8_h_lowpass(uint8_t *dst, const uint8_t *src,\
+                                           ptrdiff_t dstStride,\
+                                           ptrdiff_t srcStride,\
+                                           const int C1, const int C2)\
+{\
     const int h = 8;\
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
     int i;\
@@ -50,7 +54,11 @@  static void OPNAME ## rv30_tpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int
     }\
 }\
 \
-static void OPNAME ## rv30_tpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\
+static void OPNAME ## rv30_tpel8_v_lowpass(uint8_t *dst, const uint8_t *src,\
+                                           ptrdiff_t dstStride,\
+                                           ptrdiff_t srcStride,\
+                                           const int C1, const int C2)\
+{\
     const int w = 8;\
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
     int i;\
@@ -80,7 +88,10 @@  static void OPNAME ## rv30_tpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int
     }\
 }\
 \
-static void OPNAME ## rv30_tpel8_hv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
+static void OPNAME ## rv30_tpel8_hv_lowpass(uint8_t *dst, const uint8_t *src,\
+                                            ptrdiff_t dstStride,\
+                                            ptrdiff_t srcStride)\
+{\
     const int w = 8;\
     const int h = 8;\
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
@@ -99,7 +110,10 @@  static void OPNAME ## rv30_tpel8_hv_lowpass(uint8_t *dst, const uint8_t *src, in
     }\
 }\
 \
-static void OPNAME ## rv30_tpel8_hhv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
+static void OPNAME ## rv30_tpel8_hhv_lowpass(uint8_t *dst, const uint8_t *src,\
+                                             ptrdiff_t dstStride,\
+                                             ptrdiff_t srcStride)\
+{\
     const int w = 8;\
     const int h = 8;\
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
@@ -118,7 +132,10 @@  static void OPNAME ## rv30_tpel8_hhv_lowpass(uint8_t *dst, const uint8_t *src, i
     }\
 }\
 \
-static void OPNAME ## rv30_tpel8_hvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
+static void OPNAME ## rv30_tpel8_hvv_lowpass(uint8_t *dst, const uint8_t *src,\
+                                             ptrdiff_t dstStride,\
+                                             ptrdiff_t srcStride)\
+{\
     const int w = 8;\
     const int h = 8;\
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
@@ -137,7 +154,10 @@  static void OPNAME ## rv30_tpel8_hvv_lowpass(uint8_t *dst, const uint8_t *src, i
     }\
 }\
 \
-static void OPNAME ## rv30_tpel8_hhvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
+static void OPNAME ## rv30_tpel8_hhvv_lowpass(uint8_t *dst, const uint8_t *src,\
+                                              ptrdiff_t dstStride,\
+                                              ptrdiff_t srcStride)\
+{\
     const int w = 8;\
     const int h = 8;\
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
@@ -155,7 +175,11 @@  static void OPNAME ## rv30_tpel8_hhvv_lowpass(uint8_t *dst, const uint8_t *src,
     }\
 }\
 \
-static void OPNAME ## rv30_tpel16_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\
+static void OPNAME ## rv30_tpel16_v_lowpass(uint8_t *dst, const uint8_t *src,\
+                                            ptrdiff_t dstStride,\
+                                            ptrdiff_t srcStride,\
+                                            const int C1, const int C2)\
+{\
     OPNAME ## rv30_tpel8_v_lowpass(dst  , src  , dstStride, srcStride, C1, C2);\
     OPNAME ## rv30_tpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\
     src += 8*srcStride;\
@@ -164,7 +188,11 @@  static void OPNAME ## rv30_tpel16_v_lowpass(uint8_t *dst, const uint8_t *src, in
     OPNAME ## rv30_tpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\
 }\
 \
-static void OPNAME ## rv30_tpel16_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\
+static void OPNAME ## rv30_tpel16_h_lowpass(uint8_t *dst, const uint8_t *src,\
+                                            ptrdiff_t dstStride,\
+                                            ptrdiff_t srcStride,\
+                                            const int C1, const int C2)\
+{\
     OPNAME ## rv30_tpel8_h_lowpass(dst  , src  , dstStride, srcStride, C1, C2);\
     OPNAME ## rv30_tpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\
     src += 8*srcStride;\
@@ -173,7 +201,10 @@  static void OPNAME ## rv30_tpel16_h_lowpass(uint8_t *dst, const uint8_t *src, in
     OPNAME ## rv30_tpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, C1, C2);\
 }\
 \
-static void OPNAME ## rv30_tpel16_hv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
+static void OPNAME ## rv30_tpel16_hv_lowpass(uint8_t *dst, const uint8_t *src,\
+                                             ptrdiff_t dstStride,\
+                                             ptrdiff_t srcStride)\
+{\
     OPNAME ## rv30_tpel8_hv_lowpass(dst  , src  , dstStride, srcStride);\
     OPNAME ## rv30_tpel8_hv_lowpass(dst+8, src+8, dstStride, srcStride);\
     src += 8*srcStride;\
@@ -182,7 +213,10 @@  static void OPNAME ## rv30_tpel16_hv_lowpass(uint8_t *dst, const uint8_t *src, i
     OPNAME ## rv30_tpel8_hv_lowpass(dst+8, src+8, dstStride, srcStride);\
 }\
 \
-static void OPNAME ## rv30_tpel16_hhv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
+static void OPNAME ## rv30_tpel16_hhv_lowpass(uint8_t *dst, const uint8_t *src,\
+                                              ptrdiff_t dstStride,\
+                                              ptrdiff_t srcStride)\
+{\
     OPNAME ## rv30_tpel8_hhv_lowpass(dst  , src  , dstStride, srcStride);\
     OPNAME ## rv30_tpel8_hhv_lowpass(dst+8, src+8, dstStride, srcStride);\
     src += 8*srcStride;\
@@ -191,7 +225,10 @@  static void OPNAME ## rv30_tpel16_hhv_lowpass(uint8_t *dst, const uint8_t *src,
     OPNAME ## rv30_tpel8_hhv_lowpass(dst+8, src+8, dstStride, srcStride);\
 }\
 \
-static void OPNAME ## rv30_tpel16_hvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
+static void OPNAME ## rv30_tpel16_hvv_lowpass(uint8_t *dst, const uint8_t *src,\
+                                              ptrdiff_t dstStride,\
+                                              ptrdiff_t srcStride)\
+{\
     OPNAME ## rv30_tpel8_hvv_lowpass(dst  , src  , dstStride, srcStride);\
     OPNAME ## rv30_tpel8_hvv_lowpass(dst+8, src+8, dstStride, srcStride);\
     src += 8*srcStride;\
@@ -200,7 +237,10 @@  static void OPNAME ## rv30_tpel16_hvv_lowpass(uint8_t *dst, const uint8_t *src,
     OPNAME ## rv30_tpel8_hvv_lowpass(dst+8, src+8, dstStride, srcStride);\
 }\
 \
-static void OPNAME ## rv30_tpel16_hhvv_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride){\
+static void OPNAME ## rv30_tpel16_hhvv_lowpass(uint8_t *dst, const uint8_t *src,\
+                                               ptrdiff_t dstStride,\
+                                               ptrdiff_t srcStride)\
+{\
     OPNAME ## rv30_tpel8_hhvv_lowpass(dst  , src  , dstStride, srcStride);\
     OPNAME ## rv30_tpel8_hhvv_lowpass(dst+8, src+8, dstStride, srcStride);\
     src += 8*srcStride;\
diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 4220195..8a7bdee 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -42,7 +42,7 @@ 
 #include "rv34data.h"
 #include "rv34.h"
 
-static inline void ZERO8x2(void* dst, int stride)
+static inline void ZERO8x2(void* dst, ptrdiff_t stride)
 {
     fill_rectangle(dst,                 1, 2, stride, 0, 4);
     fill_rectangle(((uint8_t*)(dst))+4, 1, 2, stride, 0, 4);
@@ -953,7 +953,9 @@  static const int ittrans16[4] = {
 /**
  * Perform 4x4 intra prediction.
  */
-static void rv34_pred_4x4_block(RV34DecContext *r, uint8_t *dst, int stride, int itype, int up, int left, int down, int right)
+static void rv34_pred_4x4_block(RV34DecContext *r, uint8_t *dst,
+                                ptrdiff_t stride, int itype,
+                                int up, int left, int down, int right)
 {
     uint8_t *prev = dst - stride + 4;
     uint32_t topleft;
@@ -997,7 +999,7 @@  static inline int adjust_pred16(int itype, int up, int left)
 }
 
 static inline void rv34_process_block(RV34DecContext *r,
-                                      uint8_t *pdst, int stride,
+                                      uint8_t *pdst, ptrdiff_t stride,
                                       int fc, int sc, int q_dc, int q_ac)
 {
     MpegEncContext *s = &r->s;
@@ -1139,7 +1141,7 @@  static void rv34_output_intra(RV34DecContext *r, int8_t *intra_types, int cbp)
     }
 }
 
-static int is_mv_diff_gt_3(int16_t (*motion_val)[2], int step)
+static int is_mv_diff_gt_3(int16_t (*motion_val)[2], ptrdiff_t step)
 {
     int d;
     d = motion_val[0][0] - motion_val[-step][0];
diff --git a/libavcodec/rv34.h b/libavcodec/rv34.h
index 0ac24bf..b331722 100644
--- a/libavcodec/rv34.h
+++ b/libavcodec/rv34.h
@@ -86,7 +86,7 @@  typedef struct RV34DecContext{
     RV34DSPContext rdsp;
     int8_t *intra_types_hist;///< old block types, used for prediction
     int8_t *intra_types;     ///< block types
-    int    intra_types_stride;///< block types array stride
+    ptrdiff_t intra_types_stride; ///< block types array stride
     const uint8_t *luma_dc_quant_i;///< luma subblock DC quantizer for intraframes
     const uint8_t *luma_dc_quant_p;///< luma subblock DC quantizer for interframes
 
diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c
index 0da1312..c55199a 100644
--- a/libavcodec/rv40.c
+++ b/libavcodec/rv40.c
@@ -299,7 +299,7 @@  static const int neighbour_offs_x[4] = { 0,  0, -1, 0 };
 static const int neighbour_offs_y[4] = { 0, -1,  0, 1 };
 
 static void rv40_adaptive_loop_filter(RV34DSPContext *rdsp,
-                                      uint8_t *src, int stride, int dmode,
+                                      uint8_t *src, ptrdiff_t stride, int dmode,
                                       int lim_q1, int lim_p1,
                                       int alpha, int beta, int beta2,
                                       int chroma, int edge, int dir)
diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c
index 4ca5cc7..64d9f2e 100644
--- a/libavcodec/rv40dsp.c
+++ b/libavcodec/rv40dsp.c
@@ -34,7 +34,8 @@ 
 #include "rv34dsp.h"
 
 #define RV40_LOWPASS(OPNAME, OP) \
-static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
+static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, \
+                                           ptrdiff_t dstStride, ptrdiff_t srcStride, \
                                                      const int h, const int C1, const int C2, const int SHIFT){\
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
     int i;\
@@ -53,7 +54,8 @@  static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int
     }\
 }\
 \
-static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
+static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, \
+                                           ptrdiff_t dstStride, ptrdiff_t srcStride, \
                                            const int w, const int C1, const int C2, const int SHIFT){\
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
     int i;\
@@ -85,7 +87,8 @@  static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int
     }\
 }\
 \
-static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
+static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, const uint8_t *src, \
+                                            ptrdiff_t dstStride, ptrdiff_t srcStride, \
                                             const int w, const int C1, const int C2, const int SHIFT){\
     OPNAME ## rv40_qpel8_v_lowpass(dst  , src  , dstStride, srcStride, 8, C1, C2, SHIFT);\
     OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
@@ -95,7 +98,8 @@  static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, const uint8_t *src, in
     OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
 }\
 \
-static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
+static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, const uint8_t *src, \
+                                            ptrdiff_t dstStride, ptrdiff_t srcStride, \
                                             const int h, const int C1, const int C2, const int SHIFT){\
     OPNAME ## rv40_qpel8_h_lowpass(dst  , src  , dstStride, srcStride, 8, C1, C2, SHIFT);\
     OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
@@ -427,7 +431,7 @@  static const uint8_t rv40_dither_r[16] = {
  * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
  */
 static av_always_inline void rv40_weak_loop_filter(uint8_t *src,
-                                                   const int step,
+                                                   const ptrdiff_t step,
                                                    const ptrdiff_t stride,
                                                    const int filter_p1,
                                                    const int filter_q1,
@@ -495,7 +499,7 @@  static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
 }
 
 static av_always_inline void rv40_strong_loop_filter(uint8_t *src,
-                                                     const int step,
+                                                     const ptrdiff_t step,
                                                      const ptrdiff_t stride,
                                                      const int alpha,
                                                      const int lims,
@@ -567,7 +571,7 @@  static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
 }
 
 static av_always_inline int rv40_loop_filter_strength(uint8_t *src,
-                                                      int step, ptrdiff_t stride,
+                                                      ptrdiff_t step, ptrdiff_t stride,
                                                       int beta, int beta2,
                                                       int edge,
                                                       int *p1, int *q1)
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index 4d9c35b..8d21c78 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -63,7 +63,7 @@  rv34_idct dc
 %define IDCT_DC IDCT_DC_NOROUND
 rv34_idct dc_noround
 
-; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
+; ff_rv34_idct_dc_add_mmx(uint8_t *dst, ptrdiff_t stride, int dc);
 INIT_MMX mmx
 cglobal rv34_idct_dc_add, 3, 3
     ; calculate DC
@@ -166,7 +166,7 @@  cglobal rv34_idct_add, 3,3,0, d, s, b
     COL_TRANSFORM  [dq+sq], mm7, mm0, mm4
     ret
 
-; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc);
+; ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc);
 INIT_XMM sse4
 cglobal rv34_idct_dc_add, 3, 3, 6
     ; load data
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index 77f6ddb..3a1f2b5 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -77,14 +77,11 @@  SECTION .text
 ;-----------------------------------------------------------------------------
 ; subpel MC functions:
 ;
-; void ff_[put|rv40]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, int deststride,
-;                                          uint8_t *src, int srcstride,
-;                                          int len, int m);
+; void ff_[put|avg]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, ptrdiff_t deststride,
+;                                         uint8_t *src, ptrdiff_t srcstride,
+;                                         int len, ptrdiff_t m);
 ;----------------------------------------------------------------------
 %macro LOAD  2
-%if WIN64
-   movsxd   %1q, %1d
-%endif
 %ifdef PIC
    add      %1q, picregq
 %else
@@ -438,7 +435,7 @@  FILTER_SSSE3  avg
 
 %endmacro
 
-; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)
+; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)
 ; %1=size  %2=num of xmm regs
 ; The weights are FP0.14 notation of fractions depending on pts.
 ; For timebases without rounding error (i.e. PAL), the fractions
diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c
index 7bf3ecd..3384585 100644
--- a/libavcodec/x86/rv40dsp_init.c
+++ b/libavcodec/x86/rv40dsp_init.c
@@ -107,11 +107,11 @@  static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst,  \
 void ff_ ##OP ##rv40_qpel_h ##OPT(uint8_t *dst, ptrdiff_t dstStride,    \
                                   const uint8_t *src,                   \
                                   ptrdiff_t srcStride,                  \
-                                  int len, int m);                      \
+                                  int len, ptrdiff_t m);                \
 void ff_ ##OP ##rv40_qpel_v ##OPT(uint8_t *dst, ptrdiff_t dstStride,    \
                                   const uint8_t *src,                   \
                                   ptrdiff_t srcStride,                  \
-                                  int len, int m);                      \
+                                  int len, ptrdiff_t m);                \
 QPEL_FUNCS_DECL(OP, 0, 1, OPT)                                          \
 QPEL_FUNCS_DECL(OP, 0, 3, OPT)                                          \
 QPEL_FUNCS_DECL(OP, 1, 0, OPT)                                          \