vp78: Don't use AV_WN32A on possibly unaligned pointers

Message ID alpine.DEB.2.02.1404141148310.4401@cone.martin.st
State Committed
Headers show

Commit Message

Martin Storsjö April 14, 2014, 8:49 a.m.
On Mon, 14 Apr 2014, Kostya Shishkov wrote:

> On Mon, Apr 14, 2014 at 11:41:40AM +0300, Martin Storsjö wrote:
>> This hopefully fixes the vp7 fate test on sparc.
>> ---
>> None of the current vp8 fate tests trigger these codepaths.
>> ---
>>  libavcodec/vp8.c | 6 +++---
>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
>> index e73ee82..2445b2e 100644
>> --- a/libavcodec/vp8.c
>> +++ b/libavcodec/vp8.c
>> @@ -1151,7 +1151,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
>>              const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
>>                                             : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
>>              if (s->mb_layout == 1)
>> -                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
>> +                AV_WN32(mb->intra4x4_pred_mode_top, modes);
>>              else
>>                  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
>>              AV_WN32A(s->intra4x4_pred_mode_left, modes);
>> @@ -2158,8 +2158,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
>>          s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
>>          for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
>>              if (mb_y == 0)
>> -                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
>> -                         DC_PRED * 0x01010101);
>> +                AV_WN32((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
>> +                        DC_PRED * 0x01010101);
>>              decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
>>                             prev_frame && prev_frame->seg_map ?
>>                             prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
>> --
>
> looks OK

Alternatively, we could make sure this array is aligned to 4 bytes, like 
this:


Does that sound like a better or worse solution to you?

// Martin

Comments

Kostya Shishkov April 14, 2014, 8:55 a.m. | #1
On Mon, Apr 14, 2014 at 11:49:08AM +0300, Martin Storsjö wrote:
> On Mon, 14 Apr 2014, Kostya Shishkov wrote:
> 
> >On Mon, Apr 14, 2014 at 11:41:40AM +0300, Martin Storsjö wrote:
> >>This hopefully fixes the vp7 fate test on sparc.
> >>---
> >>None of the current vp8 fate tests trigger these codepaths.
> >>---
> >> libavcodec/vp8.c | 6 +++---
> >> 1 file changed, 3 insertions(+), 3 deletions(-)
> >>
> >>diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
> >>index e73ee82..2445b2e 100644
> >>--- a/libavcodec/vp8.c
> >>+++ b/libavcodec/vp8.c
> >>@@ -1151,7 +1151,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
> >>             const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
> >>                                            : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
> >>             if (s->mb_layout == 1)
> >>-                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
> >>+                AV_WN32(mb->intra4x4_pred_mode_top, modes);
> >>             else
> >>                 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
> >>             AV_WN32A(s->intra4x4_pred_mode_left, modes);
> >>@@ -2158,8 +2158,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
> >>         s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
> >>         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
> >>             if (mb_y == 0)
> >>-                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
> >>-                         DC_PRED * 0x01010101);
> >>+                AV_WN32((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
> >>+                        DC_PRED * 0x01010101);
> >>             decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
> >>                            prev_frame && prev_frame->seg_map ?
> >>                            prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
> >>--
> >
> >looks OK
> 
> Alternatively, we could make sure this array is aligned to 4 bytes,
> like this:
> 
> diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
> index 9938905..d4a231f 100644
> --- a/libavcodec/vp8.h
> +++ b/libavcodec/vp8.h
> @@ -91,7 +91,7 @@ typedef struct VP8Macroblock {
>      uint8_t chroma_pred_mode;
>      uint8_t segment;
>      uint8_t intra4x4_pred_mode_mb[16];
> -    uint8_t intra4x4_pred_mode_top[4];
> +    DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4];
>      VP56mv mv;
>      VP56mv bmv[16];
>  } VP8Macroblock;
> 
> Does that sound like a better or worse solution to you?

worse IMO because it's not so obvious
Luca Barbato April 14, 2014, 9:37 a.m. | #2
On 14/04/14 10:49, Martin Storsjö wrote:
> On Mon, 14 Apr 2014, Kostya Shishkov wrote:
> 
>> On Mon, Apr 14, 2014 at 11:41:40AM +0300, Martin Storsjö wrote:
>>> This hopefully fixes the vp7 fate test on sparc.
>>> ---
>>> None of the current vp8 fate tests trigger these codepaths.
>>> ---
>>>  libavcodec/vp8.c | 6 +++---
>>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
>>> index e73ee82..2445b2e 100644
>>> --- a/libavcodec/vp8.c
>>> +++ b/libavcodec/vp8.c
>>> @@ -1151,7 +1151,7 @@ void decode_mb_mode(VP8Context *s,
>>> VP8Macroblock *mb, int mb_x, int mb_y,
>>>              const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
>>>                                             :
>>> vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
>>>              if (s->mb_layout == 1)
>>> -                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
>>> +                AV_WN32(mb->intra4x4_pred_mode_top, modes);
>>>              else
>>>                  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
>>>              AV_WN32A(s->intra4x4_pred_mode_left, modes);
>>> @@ -2158,8 +2158,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext
>>> *avctx, VP8Frame *curframe,
>>>          s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
>>>          for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
>>>              if (mb_y == 0)
>>> -                AV_WN32A((mb - s->mb_width -
>>> 1)->intra4x4_pred_mode_top,
>>> -                         DC_PRED * 0x01010101);
>>> +                AV_WN32((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
>>> +                        DC_PRED * 0x01010101);
>>>              decode_mb_mode(s, mb, mb_x, mb_y,
>>> curframe->seg_map->data + mb_xy,
>>>                             prev_frame && prev_frame->seg_map ?
>>>                             prev_frame->seg_map->data + mb_xy : NULL,
>>> 1, is_vp7);
>>> -- 
>>
>> looks OK
> 
> Alternatively, we could make sure this array is aligned to 4 bytes, like
> this:
> 
> diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
> index 9938905..d4a231f 100644
> --- a/libavcodec/vp8.h
> +++ b/libavcodec/vp8.h
> @@ -91,7 +91,7 @@ typedef struct VP8Macroblock {
>      uint8_t chroma_pred_mode;
>      uint8_t segment;
>      uint8_t intra4x4_pred_mode_mb[16];
> -    uint8_t intra4x4_pred_mode_top[4];
> +    DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4];
>      VP56mv mv;
>      VP56mv bmv[16];
>  } VP8Macroblock;
> 
> Does that sound like a better or worse solution to you?

I doubt one or the other would impact performances, we might "quickly"
check one or the other and see if it influences.

lu
Martin Storsjö April 14, 2014, 11:44 a.m. | #3
On Mon, 14 Apr 2014, Luca Barbato wrote:

> On 14/04/14 10:49, Martin Storsjö wrote:
>> On Mon, 14 Apr 2014, Kostya Shishkov wrote:
>>
>>> On Mon, Apr 14, 2014 at 11:41:40AM +0300, Martin Storsjö wrote:
>>>> This hopefully fixes the vp7 fate test on sparc.
>>>> ---
>>>> None of the current vp8 fate tests trigger these codepaths.
>>>> ---
>>>>  libavcodec/vp8.c | 6 +++---
>>>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
>>>> index e73ee82..2445b2e 100644
>>>> --- a/libavcodec/vp8.c
>>>> +++ b/libavcodec/vp8.c
>>>> @@ -1151,7 +1151,7 @@ void decode_mb_mode(VP8Context *s,
>>>> VP8Macroblock *mb, int mb_x, int mb_y,
>>>>              const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
>>>>                                             :
>>>> vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
>>>>              if (s->mb_layout == 1)
>>>> -                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
>>>> +                AV_WN32(mb->intra4x4_pred_mode_top, modes);
>>>>              else
>>>>                  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
>>>>              AV_WN32A(s->intra4x4_pred_mode_left, modes);
>>>> @@ -2158,8 +2158,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext
>>>> *avctx, VP8Frame *curframe,
>>>>          s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
>>>>          for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
>>>>              if (mb_y == 0)
>>>> -                AV_WN32A((mb - s->mb_width -
>>>> 1)->intra4x4_pred_mode_top,
>>>> -                         DC_PRED * 0x01010101);
>>>> +                AV_WN32((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
>>>> +                        DC_PRED * 0x01010101);
>>>>              decode_mb_mode(s, mb, mb_x, mb_y,
>>>> curframe->seg_map->data + mb_xy,
>>>>                             prev_frame && prev_frame->seg_map ?
>>>>                             prev_frame->seg_map->data + mb_xy : NULL,
>>>> 1, is_vp7);
>>>> --
>>>
>>> looks OK
>>
>> Alternatively, we could make sure this array is aligned to 4 bytes, like
>> this:
>>
>> diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
>> index 9938905..d4a231f 100644
>> --- a/libavcodec/vp8.h
>> +++ b/libavcodec/vp8.h
>> @@ -91,7 +91,7 @@ typedef struct VP8Macroblock {
>>      uint8_t chroma_pred_mode;
>>      uint8_t segment;
>>      uint8_t intra4x4_pred_mode_mb[16];
>> -    uint8_t intra4x4_pred_mode_top[4];
>> +    DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4];
>>      VP56mv mv;
>>      VP56mv bmv[16];
>>  } VP8Macroblock;
>>
>> Does that sound like a better or worse solution to you?
>
> I doubt one or the other would impact performances, we might "quickly"
> check one or the other and see if it influences.

Please let me know when you've benchmarked it to see if it makes any 
difference.

// Martin
Luca Barbato April 14, 2014, 1:14 p.m. | #4
On 14/04/14 13:44, Martin Storsjö wrote:
> On Mon, 14 Apr 2014, Luca Barbato wrote:
> 
>> On 14/04/14 10:49, Martin Storsjö wrote:
>>> On Mon, 14 Apr 2014, Kostya Shishkov wrote:
>>>
>>>> On Mon, Apr 14, 2014 at 11:41:40AM +0300, Martin Storsjö wrote:
>>>>> This hopefully fixes the vp7 fate test on sparc.
>>>>> ---
>>>>> None of the current vp8 fate tests trigger these codepaths.
>>>>> ---
>>>>>  libavcodec/vp8.c | 6 +++---
>>>>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
>>>>> index e73ee82..2445b2e 100644
>>>>> --- a/libavcodec/vp8.c
>>>>> +++ b/libavcodec/vp8.c
>>>>> @@ -1151,7 +1151,7 @@ void decode_mb_mode(VP8Context *s,
>>>>> VP8Macroblock *mb, int mb_x, int mb_y,
>>>>>              const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
>>>>>                                             :
>>>>> vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
>>>>>              if (s->mb_layout == 1)
>>>>> -                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
>>>>> +                AV_WN32(mb->intra4x4_pred_mode_top, modes);
>>>>>              else
>>>>>                  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x,
>>>>> modes);
>>>>>              AV_WN32A(s->intra4x4_pred_mode_left, modes);
>>>>> @@ -2158,8 +2158,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext
>>>>> *avctx, VP8Frame *curframe,
>>>>>          s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
>>>>>          for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
>>>>>              if (mb_y == 0)
>>>>> -                AV_WN32A((mb - s->mb_width -
>>>>> 1)->intra4x4_pred_mode_top,
>>>>> -                         DC_PRED * 0x01010101);
>>>>> +                AV_WN32((mb - s->mb_width -
>>>>> 1)->intra4x4_pred_mode_top,
>>>>> +                        DC_PRED * 0x01010101);
>>>>>              decode_mb_mode(s, mb, mb_x, mb_y,
>>>>> curframe->seg_map->data + mb_xy,
>>>>>                             prev_frame && prev_frame->seg_map ?
>>>>>                             prev_frame->seg_map->data + mb_xy : NULL,
>>>>> 1, is_vp7);
>>>>> -- 
>>>>
>>>> looks OK
>>>
>>> Alternatively, we could make sure this array is aligned to 4 bytes, like
>>> this:
>>>
>>> diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
>>> index 9938905..d4a231f 100644
>>> --- a/libavcodec/vp8.h
>>> +++ b/libavcodec/vp8.h
>>> @@ -91,7 +91,7 @@ typedef struct VP8Macroblock {
>>>      uint8_t chroma_pred_mode;
>>>      uint8_t segment;
>>>      uint8_t intra4x4_pred_mode_mb[16];
>>> -    uint8_t intra4x4_pred_mode_top[4];
>>> +    DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4];
>>>      VP56mv mv;
>>>      VP56mv bmv[16];
>>>  } VP8Macroblock;
>>>
>>> Does that sound like a better or worse solution to you?
>>
>> I doubt one or the other would impact performances, we might "quickly"
>> check one or the other and see if it influences.
> 
> Please let me know when you've benchmarked it to see if it makes any
> difference.

I'll do tonight, not sure if I could get one of the arm boards set up to
test quickly since had been loads of time since I used it.

I'd say to flip a coin and push one in case I'm late.

lu
Ronald Bultje April 14, 2014, 1:43 p.m. | #5
Hi,

On Mon, Apr 14, 2014 at 4:49 AM, Martin Storsjö <martin@martin.st> wrote:

> On Mon, 14 Apr 2014, Kostya Shishkov wrote:
>
>  On Mon, Apr 14, 2014 at 11:41:40AM +0300, Martin Storsjö wrote:
>>
>>> This hopefully fixes the vp7 fate test on sparc.
>>> ---
>>> None of the current vp8 fate tests trigger these codepaths.
>>> ---
>>>  libavcodec/vp8.c | 6 +++---
>>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
>>> index e73ee82..2445b2e 100644
>>> --- a/libavcodec/vp8.c
>>> +++ b/libavcodec/vp8.c
>>> @@ -1151,7 +1151,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock
>>> *mb, int mb_x, int mb_y,
>>>              const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
>>>                                             :
>>> vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
>>>              if (s->mb_layout == 1)
>>> -                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
>>> +                AV_WN32(mb->intra4x4_pred_mode_top, modes);
>>>              else
>>>                  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
>>>              AV_WN32A(s->intra4x4_pred_mode_left, modes);
>>> @@ -2158,8 +2158,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext
>>> *avctx, VP8Frame *curframe,
>>>          s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
>>>          for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
>>>              if (mb_y == 0)
>>> -                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
>>> -                         DC_PRED * 0x01010101);
>>> +                AV_WN32((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
>>> +                        DC_PRED * 0x01010101);
>>>              decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data +
>>> mb_xy,
>>>                             prev_frame && prev_frame->seg_map ?
>>>                             prev_frame->seg_map->data + mb_xy : NULL, 1,
>>> is_vp7);
>>> --
>>>
>>
>> looks OK
>>
>
> Alternatively, we could make sure this array is aligned to 4 bytes, like
> this:
>
> diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
> index 9938905..d4a231f 100644
> --- a/libavcodec/vp8.h
> +++ b/libavcodec/vp8.h
> @@ -91,7 +91,7 @@ typedef struct VP8Macroblock {
>      uint8_t chroma_pred_mode;
>      uint8_t segment;
>      uint8_t intra4x4_pred_mode_mb[16];
> -    uint8_t intra4x4_pred_mode_top[4];
> +    DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4];
>      VP56mv mv;
>      VP56mv bmv[16];
>  } VP8Macroblock;
>
> Does that sound like a better or worse solution to you?


I prefer this.

Ronald
Martin Storsjö April 14, 2014, 5:31 p.m. | #6
On Mon, 14 Apr 2014, Ronald S. Bultje wrote:

> Hi,
>
> On Mon, Apr 14, 2014 at 4:49 AM, Martin Storsjö <martin@martin.st> wrote:
>
>> On Mon, 14 Apr 2014, Kostya Shishkov wrote:
>>
>>  On Mon, Apr 14, 2014 at 11:41:40AM +0300, Martin Storsjö wrote:
>>>
>>>> This hopefully fixes the vp7 fate test on sparc.
>>>> ---
>>>> None of the current vp8 fate tests trigger these codepaths.
>>>> ---
>>>>  libavcodec/vp8.c | 6 +++---
>>>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
>>>> index e73ee82..2445b2e 100644
>>>> --- a/libavcodec/vp8.c
>>>> +++ b/libavcodec/vp8.c
>>>> @@ -1151,7 +1151,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock
>>>> *mb, int mb_x, int mb_y,
>>>>              const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
>>>>                                             :
>>>> vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
>>>>              if (s->mb_layout == 1)
>>>> -                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
>>>> +                AV_WN32(mb->intra4x4_pred_mode_top, modes);
>>>>              else
>>>>                  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
>>>>              AV_WN32A(s->intra4x4_pred_mode_left, modes);
>>>> @@ -2158,8 +2158,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext
>>>> *avctx, VP8Frame *curframe,
>>>>          s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
>>>>          for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
>>>>              if (mb_y == 0)
>>>> -                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
>>>> -                         DC_PRED * 0x01010101);
>>>> +                AV_WN32((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
>>>> +                        DC_PRED * 0x01010101);
>>>>              decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data +
>>>> mb_xy,
>>>>                             prev_frame && prev_frame->seg_map ?
>>>>                             prev_frame->seg_map->data + mb_xy : NULL, 1,
>>>> is_vp7);
>>>> --
>>>>
>>>
>>> looks OK
>>>
>>
>> Alternatively, we could make sure this array is aligned to 4 bytes, like
>> this:
>>
>> diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
>> index 9938905..d4a231f 100644
>> --- a/libavcodec/vp8.h
>> +++ b/libavcodec/vp8.h
>> @@ -91,7 +91,7 @@ typedef struct VP8Macroblock {
>>      uint8_t chroma_pred_mode;
>>      uint8_t segment;
>>      uint8_t intra4x4_pred_mode_mb[16];
>> -    uint8_t intra4x4_pred_mode_top[4];
>> +    DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4];
>>      VP56mv mv;
>>      VP56mv bmv[16];
>>  } VP8Macroblock;
>>
>> Does that sound like a better or worse solution to you?
>
>
> I prefer this.

Ok, I'll push this version in a little while then - this one shouldn't 
have any relevant risk of slowdown at least.

// Martin
Ronald Bultje April 14, 2014, 7:15 p.m. | #7
Hi,

On Mon, Apr 14, 2014 at 1:31 PM, Martin Storsjö <martin@martin.st> wrote:

> On Mon, 14 Apr 2014, Ronald S. Bultje wrote:
>
>> On Mon, Apr 14, 2014 at 4:49 AM, Martin Storsjö <martin@martin.st> wrote:
>>
>>> On Mon, 14 Apr 2014, Kostya Shishkov wrote:
>>>  On Mon, Apr 14, 2014 at 11:41:40AM +0300, Martin Storsjö wrote:
>>>
>>>>
>>>>  This hopefully fixes the vp7 fate test on sparc.
>>>>> ---
>>>>> None of the current vp8 fate tests trigger these codepaths.
>>>>> ---
>>>>>  libavcodec/vp8.c | 6 +++---
>>>>>  1 file changed, 3 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
>>>>> index e73ee82..2445b2e 100644
>>>>> --- a/libavcodec/vp8.c
>>>>> +++ b/libavcodec/vp8.c
>>>>> @@ -1151,7 +1151,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock
>>>>> *mb, int mb_x, int mb_y,
>>>>>              const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
>>>>>                                             :
>>>>> vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
>>>>>              if (s->mb_layout == 1)
>>>>> -                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
>>>>> +                AV_WN32(mb->intra4x4_pred_mode_top, modes);
>>>>>              else
>>>>>                  AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x,
>>>>> modes);
>>>>>              AV_WN32A(s->intra4x4_pred_mode_left, modes);
>>>>> @@ -2158,8 +2158,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext
>>>>> *avctx, VP8Frame *curframe,
>>>>>          s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
>>>>>          for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
>>>>>              if (mb_y == 0)
>>>>> -                AV_WN32A((mb - s->mb_width -
>>>>> 1)->intra4x4_pred_mode_top,
>>>>> -                         DC_PRED * 0x01010101);
>>>>> +                AV_WN32((mb - s->mb_width -
>>>>> 1)->intra4x4_pred_mode_top,
>>>>> +                        DC_PRED * 0x01010101);
>>>>>              decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data
>>>>> +
>>>>> mb_xy,
>>>>>                             prev_frame && prev_frame->seg_map ?
>>>>>                             prev_frame->seg_map->data + mb_xy : NULL,
>>>>> 1,
>>>>> is_vp7);
>>>>> --
>>>>>
>>>>>
>>>> looks OK
>>>>
>>>>
>>> Alternatively, we could make sure this array is aligned to 4 bytes, like
>>> this:
>>>
>>> diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
>>> index 9938905..d4a231f 100644
>>> --- a/libavcodec/vp8.h
>>> +++ b/libavcodec/vp8.h
>>> @@ -91,7 +91,7 @@ typedef struct VP8Macroblock {
>>>      uint8_t chroma_pred_mode;
>>>      uint8_t segment;
>>>      uint8_t intra4x4_pred_mode_mb[16];
>>> -    uint8_t intra4x4_pred_mode_top[4];
>>> +    DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4];
>>>      VP56mv mv;
>>>      VP56mv bmv[16];
>>>  } VP8Macroblock;
>>>
>>> Does that sound like a better or worse solution to you?
>>>
>>
>>
>> I prefer this.
>>
>
> Ok, I'll push this version in a little while then - this one shouldn't
> have any relevant risk of slowdown at least.


Thanks.

It may be worth mentioning that there's precedence for these type of
solutions, look e.g. at the alignment for VP56mv:

https://www.ffmpeg.org/doxygen/trunk/vp56_8h_source.html#l00065
or
http://libav.org/doxygen/release/0.8/vp56_8h_source.html#l00037

Ronald

Patch

diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 9938905..d4a231f 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -91,7 +91,7 @@  typedef struct VP8Macroblock {
      uint8_t chroma_pred_mode;
      uint8_t segment;
      uint8_t intra4x4_pred_mode_mb[16];
-    uint8_t intra4x4_pred_mode_top[4];
+    DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4];
      VP56mv mv;
      VP56mv bmv[16];
  } VP8Macroblock;