Message ID | 1552384248-23652-1-git-send-email-martin@martin.st |
---|---|
State | Committed |
Commit | 0676de935b1e81bc5b5698fef3e7d48ff2ea77ff |
Headers | show |
Series |
|
Related | show |
On 12/03/2019 10:50, Martin Storsjö wrote: > Previously, the 420 version was used even for 422. > > This fixes occasional checkasm failures. > --- > libavcodec/arm/h264dsp_init_arm.c | 8 +++++++- > libavcodec/arm/h264dsp_neon.S | 19 +++++++++++++++++++ > 2 files changed, 26 insertions(+), 1 deletion(-) > > diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c > index 7afd350..617632c 100644 > --- a/libavcodec/arm/h264dsp_init_arm.c > +++ b/libavcodec/arm/h264dsp_init_arm.c > @@ -33,6 +33,8 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, > int beta, int8_t *tc0); > void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, > int beta, int8_t *tc0); > +void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, int stride, int alpha, > + int beta, int8_t *tc0); > > void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, > int log2_den, int weight, int offset); > @@ -76,7 +78,11 @@ static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth, > c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; > c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; > c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; > - c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; > + > + if (chroma_format_idc <= 1) > + c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; > + else > + c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon; > > c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; > c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; > diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S > index 5e75565..783e0f6 100644 > --- a/libavcodec/arm/h264dsp_neon.S > +++ b/libavcodec/arm/h264dsp_neon.S > @@ -237,6 +237,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 > h264_loop_filter_start > > sub r0, r0, #2 > +h_loop_filter_chroma420: > vld1.32 {d18[0]}, [r0], r1 > vld1.32 {d16[0]}, [r0], r1 > vld1.32 {d0[0]}, [r0], r1 > @@ -271,6 +272,24 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 > bx lr > endfunc > > +function ff_h264_h_loop_filter_chroma422_neon, export=1 > + h264_loop_filter_start > + push {r4, lr} > + add r4, r0, r1 > + add r1, r1, r1 > + sub r0, r0, #2 > + > + bl h_loop_filter_chroma420 > + > + ldr r12, [sp, #8] > + ldr r12, [r12] > + vmov.32 d24[0], r12 > + sub r0, r4, #2 > + > + bl h_loop_filter_chroma420 > + pop {r4, pc} > +endfunc > + > @ Biweighted prediction > > .macro biweight_16 macs, macd > Sure
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c index 7afd350..617632c 100644 --- a/libavcodec/arm/h264dsp_init_arm.c +++ b/libavcodec/arm/h264dsp_init_arm.c @@ -33,6 +33,8 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); +void ff_h264_h_loop_filter_chroma422_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, int log2_den, int weight, int offset); @@ -76,7 +78,11 @@ static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth, c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; - c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; + + if (chroma_format_idc <= 1) + c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; + else + c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma422_neon; c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S index 5e75565..783e0f6 100644 --- a/libavcodec/arm/h264dsp_neon.S +++ b/libavcodec/arm/h264dsp_neon.S @@ -237,6 +237,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 h264_loop_filter_start sub r0, r0, #2 +h_loop_filter_chroma420: vld1.32 {d18[0]}, [r0], r1 vld1.32 {d16[0]}, [r0], r1 vld1.32 {d0[0]}, [r0], r1 @@ -271,6 +272,24 @@ function ff_h264_h_loop_filter_chroma_neon, export=1 bx lr endfunc +function ff_h264_h_loop_filter_chroma422_neon, export=1 + h264_loop_filter_start + push {r4, lr} + add r4, r0, r1 + add r1, r1, r1 + sub r0, r0, #2 + + bl h_loop_filter_chroma420 + + ldr r12, [sp, #8] + ldr r12, [r12] + vmov.32 d24[0], r12 + sub r0, r4, #2 + + bl h_loop_filter_chroma420 + pop {r4, pc} +endfunc + @ Biweighted prediction .macro biweight_16 macs, macd