[v2] avcodec/qsv: fix async support

Message ID 1532602989-23620-1-git-send-email-dmitry.v.rogozhkin@intel.com
State New
Headers show
Series
  • [v2] avcodec/qsv: fix async support
Related show

Commit Message

Rogozhkin, Dmitry V July 26, 2018, 11:03 a.m.
Current implementations of qsv components incorrectly work with async level, they
actually try to work in async+1 level stepping into MFX_WRN_DEVICE_BUSY and polling
loop. This change address this misbehaviour.

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Maxym Dmytrychenko <maxim.d33@gmail.com>
Cc: Zhong Li <zhong.li@intel.com>
---
 libavcodec/qsvdec.c       | 15 ++++++++++++---
 libavcodec/qsvdec_h2645.c |  4 ++--
 libavcodec/qsvdec_other.c |  2 +-
 libavcodec/qsvenc.c       | 17 +++++++++++++----
 libavcodec/qsvenc.h       |  2 +-
 5 files changed, 29 insertions(+), 11 deletions(-)

Comments

Maxym Dmytrychenko July 27, 2018, 7:01 a.m. | #1
thanks and will proceed with it

On Thu, Jul 26, 2018 at 9:06 PM Dmitry Rogozhkin <
dmitry.v.rogozhkin@intel.com> wrote:

> Current implementations of qsv components incorrectly work with async
> level, they
> actually try to work in async+1 level stepping into MFX_WRN_DEVICE_BUSY
> and polling
> loop. This change address this misbehaviour.
>
> Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> Cc: Maxym Dmytrychenko <maxim.d33@gmail.com>
> Cc: Zhong Li <zhong.li@intel.com>
> ---
>  libavcodec/qsvdec.c       | 15 ++++++++++++---
>  libavcodec/qsvdec_h2645.c |  4 ++--
>  libavcodec/qsvdec_other.c |  2 +-
>  libavcodec/qsvenc.c       | 17 +++++++++++++----
>  libavcodec/qsvenc.h       |  2 +-
>  5 files changed, 29 insertions(+), 11 deletions(-)
>
> diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
> index 32f1fe7..22e7a46 100644
> --- a/libavcodec/qsvdec.c
> +++ b/libavcodec/qsvdec.c
> @@ -110,6 +110,16 @@ static int qsv_init_session(AVCodecContext *avctx,
> QSVContext *q, mfxSession ses
>      return 0;
>  }
>
> +static inline unsigned int qsv_fifo_item_size(void)
> +{
> +    return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*);
> +}
> +
> +static inline unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
> +{
> +    return av_fifo_size(fifo) / qsv_fifo_item_size();
> +}
> +
>  static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
>  {
>      const AVPixFmtDescriptor *desc;
> @@ -125,8 +135,7 @@ static int qsv_decode_init(AVCodecContext *avctx,
> QSVContext *q)
>          return AVERROR_BUG;
>
>      if (!q->async_fifo) {
> -        q->async_fifo = av_fifo_alloc((1 + q->async_depth) *
> -                                      (sizeof(mfxSyncPoint*) +
> sizeof(QSVFrame*)));
> +        q->async_fifo = av_fifo_alloc(q->async_depth *
> qsv_fifo_item_size());
>          if (!q->async_fifo)
>              return AVERROR(ENOMEM);
>      }
> @@ -384,7 +393,7 @@ static int qsv_decode(AVCodecContext *avctx,
> QSVContext *q,
>          av_freep(&sync);
>      }
>
> -    if (!av_fifo_space(q->async_fifo) ||
> +    if ((qsv_fifo_size(q->async_fifo) >= q->async_depth) ||
>          (!avpkt->size && av_fifo_size(q->async_fifo))) {
>          AVFrame *src_frame;
>
> diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
> index 831252f..d9d2318 100644
> --- a/libavcodec/qsvdec_h2645.c
> +++ b/libavcodec/qsvdec_h2645.c
> @@ -186,7 +186,7 @@ static void qsv_decode_flush(AVCodecContext *avctx)
>
>  #if CONFIG_HEVC_QSV_DECODER
>  static const AVOption hevc_options[] = {
> -    { "async_depth", "Internal parallelization depth, the higher the
> value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, {
> .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
> +    { "async_depth", "Internal parallelization depth, the higher the
> value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, {
> .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
>
>      { "load_plugin", "A user plugin to load in an internal session",
> OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_DEFAULT },
> LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VD, "load_plugin" },
>      { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_NONE
> },    0, 0, VD, "load_plugin" },
> @@ -229,7 +229,7 @@ AVCodec ff_hevc_qsv_decoder = {
>
>  #if CONFIG_H264_QSV_DECODER
>  static const AVOption options[] = {
> -    { "async_depth", "Internal parallelization depth, the higher the
> value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, {
> .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
> +    { "async_depth", "Internal parallelization depth, the higher the
> value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, {
> .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
>      { NULL },
>  };
>
> diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c
> index 3c872dc..f6e08a2 100644
> --- a/libavcodec/qsvdec_other.c
> +++ b/libavcodec/qsvdec_other.c
> @@ -159,7 +159,7 @@ static void qsv_decode_flush(AVCodecContext *avctx)
>  #define OFFSET(x) offsetof(QSVOtherContext, x)
>  #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
>  static const AVOption options[] = {
> -    { "async_depth", "Internal parallelization depth, the higher the
> value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, {
> .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
> +    { "async_depth", "Internal parallelization depth, the higher the
> value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, {
> .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
>      { NULL },
>  };
>
> diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
> index 3ce5ffe..b08fa9d 100644
> --- a/libavcodec/qsvenc.c
> +++ b/libavcodec/qsvenc.c
> @@ -777,7 +777,7 @@ static int qsv_init_opaque_alloc(AVCodecContext
> *avctx, QSVEncContext *q)
>      mfxFrameSurface1 *surfaces;
>      int nb_surfaces, i;
>
> -    nb_surfaces = qsv->nb_opaque_surfaces + q->req.NumFrameSuggested +
> q->async_depth;
> +    nb_surfaces = qsv->nb_opaque_surfaces + q->req.NumFrameSuggested;
>
>      q->opaque_alloc_buf = av_buffer_allocz(sizeof(*surfaces) *
> nb_surfaces);
>      if (!q->opaque_alloc_buf)
> @@ -848,6 +848,16 @@ static int qsvenc_init_session(AVCodecContext *avctx,
> QSVEncContext *q)
>      return 0;
>  }
>
> +static inline unsigned int qsv_fifo_item_size(void)
> +{
> +    return sizeof(AVPacket) + sizeof(mfxSyncPoint*) +
> sizeof(mfxBitstream*);
> +}
> +
> +static inline unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
> +{
> +    return av_fifo_size(fifo) / qsv_fifo_item_size();
> +}
> +
>  int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q)
>  {
>      int iopattern = 0;
> @@ -856,8 +866,7 @@ int ff_qsv_enc_init(AVCodecContext *avctx,
> QSVEncContext *q)
>
>      q->param.AsyncDepth = q->async_depth;
>
> -    q->async_fifo = av_fifo_alloc((1 + q->async_depth) *
> -                                  (sizeof(AVPacket) +
> sizeof(mfxSyncPoint*) + sizeof(mfxBitstream*)));
> +    q->async_fifo = av_fifo_alloc(q->async_depth * qsv_fifo_item_size());
>      if (!q->async_fifo)
>          return AVERROR(ENOMEM);
>
> @@ -1214,7 +1223,7 @@ int ff_qsv_encode(AVCodecContext *avctx,
> QSVEncContext *q,
>      if (ret < 0)
>          return ret;
>
> -    if (!av_fifo_space(q->async_fifo) ||
> +    if ((qsv_fifo_size(q->async_fifo) >= q->async_depth) ||
>          (!frame && av_fifo_size(q->async_fifo))) {
>          AVPacket new_pkt;
>          mfxBitstream *bs;
> diff --git a/libavcodec/qsvenc.h b/libavcodec/qsvenc.h
> index bb175c5..9a185ce 100644
> --- a/libavcodec/qsvenc.h
> +++ b/libavcodec/qsvenc.h
> @@ -67,7 +67,7 @@
>  #endif
>
>  #define QSV_COMMON_OPTS \
> -{ "async_depth", "Maximum processing parallelism",
> OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT },
> 0, INT_MAX, VE },                          \
> +{ "async_depth", "Maximum processing parallelism",
> OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT },
> 1, INT_MAX, VE },                          \
>  { "avbr_accuracy",    "Accuracy of the AVBR ratecontrol",
> OFFSET(qsv.avbr_accuracy),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE
> },                             \
>  { "avbr_convergence", "Convergence of the AVBR ratecontrol",
> OFFSET(qsv.avbr_convergence), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE
> },                             \
>  { "preset", NULL, OFFSET(qsv.preset), AV_OPT_TYPE_INT, { .i64 =
> MFX_TARGETUSAGE_BALANCED }, MFX_TARGETUSAGE_BEST_QUALITY,
> MFX_TARGETUSAGE_BEST_SPEED,   VE, "preset" }, \
> --
> 1.8.3.1
>
>

Patch

diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index 32f1fe7..22e7a46 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c
@@ -110,6 +110,16 @@  static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession ses
     return 0;
 }
 
+static inline unsigned int qsv_fifo_item_size(void)
+{
+    return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*);
+}
+
+static inline unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
+{
+    return av_fifo_size(fifo) / qsv_fifo_item_size();
+}
+
 static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
 {
     const AVPixFmtDescriptor *desc;
@@ -125,8 +135,7 @@  static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q)
         return AVERROR_BUG;
 
     if (!q->async_fifo) {
-        q->async_fifo = av_fifo_alloc((1 + q->async_depth) *
-                                      (sizeof(mfxSyncPoint*) + sizeof(QSVFrame*)));
+        q->async_fifo = av_fifo_alloc(q->async_depth * qsv_fifo_item_size());
         if (!q->async_fifo)
             return AVERROR(ENOMEM);
     }
@@ -384,7 +393,7 @@  static int qsv_decode(AVCodecContext *avctx, QSVContext *q,
         av_freep(&sync);
     }
 
-    if (!av_fifo_space(q->async_fifo) ||
+    if ((qsv_fifo_size(q->async_fifo) >= q->async_depth) ||
         (!avpkt->size && av_fifo_size(q->async_fifo))) {
         AVFrame *src_frame;
 
diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
index 831252f..d9d2318 100644
--- a/libavcodec/qsvdec_h2645.c
+++ b/libavcodec/qsvdec_h2645.c
@@ -186,7 +186,7 @@  static void qsv_decode_flush(AVCodecContext *avctx)
 
 #if CONFIG_HEVC_QSV_DECODER
 static const AVOption hevc_options[] = {
-    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
 
     { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_DEFAULT }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VD, "load_plugin" },
     { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_NONE },    0, 0, VD, "load_plugin" },
@@ -229,7 +229,7 @@  AVCodec ff_hevc_qsv_decoder = {
 
 #if CONFIG_H264_QSV_DECODER
 static const AVOption options[] = {
-    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
     { NULL },
 };
 
diff --git a/libavcodec/qsvdec_other.c b/libavcodec/qsvdec_other.c
index 3c872dc..f6e08a2 100644
--- a/libavcodec/qsvdec_other.c
+++ b/libavcodec/qsvdec_other.c
@@ -159,7 +159,7 @@  static void qsv_decode_flush(AVCodecContext *avctx)
 #define OFFSET(x) offsetof(QSVOtherContext, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
-    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VD },
     { NULL },
 };
 
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index 3ce5ffe..b08fa9d 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -777,7 +777,7 @@  static int qsv_init_opaque_alloc(AVCodecContext *avctx, QSVEncContext *q)
     mfxFrameSurface1 *surfaces;
     int nb_surfaces, i;
 
-    nb_surfaces = qsv->nb_opaque_surfaces + q->req.NumFrameSuggested + q->async_depth;
+    nb_surfaces = qsv->nb_opaque_surfaces + q->req.NumFrameSuggested;
 
     q->opaque_alloc_buf = av_buffer_allocz(sizeof(*surfaces) * nb_surfaces);
     if (!q->opaque_alloc_buf)
@@ -848,6 +848,16 @@  static int qsvenc_init_session(AVCodecContext *avctx, QSVEncContext *q)
     return 0;
 }
 
+static inline unsigned int qsv_fifo_item_size(void)
+{
+    return sizeof(AVPacket) + sizeof(mfxSyncPoint*) + sizeof(mfxBitstream*);
+}
+
+static inline unsigned int qsv_fifo_size(const AVFifoBuffer* fifo)
+{
+    return av_fifo_size(fifo) / qsv_fifo_item_size();
+}
+
 int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q)
 {
     int iopattern = 0;
@@ -856,8 +866,7 @@  int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q)
 
     q->param.AsyncDepth = q->async_depth;
 
-    q->async_fifo = av_fifo_alloc((1 + q->async_depth) *
-                                  (sizeof(AVPacket) + sizeof(mfxSyncPoint*) + sizeof(mfxBitstream*)));
+    q->async_fifo = av_fifo_alloc(q->async_depth * qsv_fifo_item_size());
     if (!q->async_fifo)
         return AVERROR(ENOMEM);
 
@@ -1214,7 +1223,7 @@  int ff_qsv_encode(AVCodecContext *avctx, QSVEncContext *q,
     if (ret < 0)
         return ret;
 
-    if (!av_fifo_space(q->async_fifo) ||
+    if ((qsv_fifo_size(q->async_fifo) >= q->async_depth) ||
         (!frame && av_fifo_size(q->async_fifo))) {
         AVPacket new_pkt;
         mfxBitstream *bs;
diff --git a/libavcodec/qsvenc.h b/libavcodec/qsvenc.h
index bb175c5..9a185ce 100644
--- a/libavcodec/qsvenc.h
+++ b/libavcodec/qsvenc.h
@@ -67,7 +67,7 @@ 
 #endif
 
 #define QSV_COMMON_OPTS \
-{ "async_depth", "Maximum processing parallelism", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VE },                          \
+{ "async_depth", "Maximum processing parallelism", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 1, INT_MAX, VE },                          \
 { "avbr_accuracy",    "Accuracy of the AVBR ratecontrol",    OFFSET(qsv.avbr_accuracy),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },                             \
 { "avbr_convergence", "Convergence of the AVBR ratecontrol", OFFSET(qsv.avbr_convergence), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },                             \
 { "preset", NULL, OFFSET(qsv.preset), AV_OPT_TYPE_INT, { .i64 = MFX_TARGETUSAGE_BALANCED }, MFX_TARGETUSAGE_BEST_QUALITY, MFX_TARGETUSAGE_BEST_SPEED,   VE, "preset" }, \