[v4] qsvvpp: Fix to perform full init only when needed

Message ID 20180717160720.44196-1-maxim.d33@gmail.com
State New
Headers show
Series
  • [v4] qsvvpp: Fix to perform full init only when needed
Related show

Commit Message

Maxym Dmytrychenko July 17, 2018, 4:07 p.m.
Not used VPP sessions, like for hwupload/hwdownload handling,
can increase CPU utilization and this patch fixes it.
thank you,Joe, for the contribution.

Signed-off-by: Maxym Dmytrychenko <maxim.d33@gmail.com>
---
 libavutil/hwcontext_qsv.c | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

Comments

Mark Thompson July 17, 2018, 10:48 p.m. | #1
On 17/07/18 17:07, Maxym Dmytrychenko wrote:
> Not used VPP sessions, like for hwupload/hwdownload handling,
> can increase CPU utilization and this patch fixes it.
> thank you,Joe, for the contribution.
> 
> Signed-off-by: Maxym Dmytrychenko <maxim.d33@gmail.com>
> ---
>  libavutil/hwcontext_qsv.c | 38 +++++++++++++++++++++++++++++---------
>  1 file changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
> index b3eb4a3ea..390c3aac4 100644
> --- a/libavutil/hwcontext_qsv.c
> +++ b/libavutil/hwcontext_qsv.c
> @@ -18,6 +18,7 @@
>  
>  #include <stdint.h>
>  #include <string.h>
> +#include <stdatomic.h>
>  
>  #include <mfx/mfxvideo.h>
>  
> @@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
>  
>  typedef struct QSVFramesContext {
>      mfxSession session_download;
> +    atomic_int session_download_init;
>      mfxSession session_upload;
> +    atomic_int session_upload_init;
>  
>      AVBufferRef *child_frames_ref;
>      mfxFrameSurface1 *surfaces_internal;
> @@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext *ctx)
>          MFXVideoVPP_Close(s->session_download);
>          MFXClose(s->session_download);
>      }
> -    s->session_download = NULL;
> +    s->session_download      = NULL;
> +    s->session_download_init = 0;
>  
>      if (s->session_upload) {
>          MFXVideoVPP_Close(s->session_upload);
>          MFXClose(s->session_upload);
>      }
> -    s->session_upload = NULL;
> +    s->session_upload      = NULL;
> +    s->session_upload_init = 0;
>  
>      av_freep(&s->mem_ids);
>      av_freep(&s->surface_ptrs);
> @@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
>              s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
>      }
>  
> -    ret = qsv_init_internal_session(ctx, &s->session_download, 0);
> -    if (ret < 0)
> -        return ret;
> -
> -    ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
> -    if (ret < 0)
> -        return ret;
> +    s->session_download      = NULL;
> +    s->session_upload        = NULL;
> +    s->session_download_init = 0;
> +    s->session_upload_init   = 0;
>  
>      return 0;
>  }
> @@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
>      mfxSyncPoint sync = NULL;
>      mfxStatus err;
>  
> +    while (!s->session_download_init && !s->session_download) {
> +	if (atomic_fetch_add(&s->session_download_init, 1) == 0) {
> +            qsv_init_internal_session(ctx, &s->session_download, 0);
> +        }
> +	else {
> +            av_usleep(1);

This races - consider what happens if the other thread is preempted for more than 1µs, or if the initialisation itself takes more than that long.

You need to actually do some synchronisation here (e.g. with a 'once' variable) - with only atomic flags there is no way to guarantee that the other thread has finished unless you spin, which isn't acceptable.

> +        }
> +    }
> +
>      if (!s->session_download) {
>          if (s->child_frames_ref)
>              return qsv_transfer_data_child(ctx, dst, src);
> @@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
>      mfxSyncPoint sync = NULL;
>      mfxStatus err;
>  
> +    while (!s->session_upload_init && !s->session_upload) {
> +	if (atomic_fetch_add(&s->session_upload_init, 1) == 0) {
> +            qsv_init_internal_session(ctx, &s->session_upload, 1);
> +        }
> +	else {
> +            av_usleep(1);
> +        }
> +    }
> +
>      if (!s->session_upload) {
>          if (s->child_frames_ref)
>              return qsv_transfer_data_child(ctx, dst, src);
> 

Thanks,

- Mark
Maxym Dmytrychenko July 17, 2018, 11:25 p.m. | #2
On Wed, Jul 18, 2018 at 12:48 AM Mark Thompson <sw@jkqxz.net> wrote:

> On 17/07/18 17:07, Maxym Dmytrychenko wrote:
> > Not used VPP sessions, like for hwupload/hwdownload handling,
> > can increase CPU utilization and this patch fixes it.
> > thank you,Joe, for the contribution.
> >
> > Signed-off-by: Maxym Dmytrychenko <maxim.d33@gmail.com>
> > ---
> >  libavutil/hwcontext_qsv.c | 38 +++++++++++++++++++++++++++++---------
> >  1 file changed, 29 insertions(+), 9 deletions(-)
> >
> > diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
> > index b3eb4a3ea..390c3aac4 100644
> > --- a/libavutil/hwcontext_qsv.c
> > +++ b/libavutil/hwcontext_qsv.c
> > @@ -18,6 +18,7 @@
> >
> >  #include <stdint.h>
> >  #include <string.h>
> > +#include <stdatomic.h>
> >
> >  #include <mfx/mfxvideo.h>
> >
> > @@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
> >
> >  typedef struct QSVFramesContext {
> >      mfxSession session_download;
> > +    atomic_int session_download_init;
> >      mfxSession session_upload;
> > +    atomic_int session_upload_init;
> >
> >      AVBufferRef *child_frames_ref;
> >      mfxFrameSurface1 *surfaces_internal;
> > @@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext
> *ctx)
> >          MFXVideoVPP_Close(s->session_download);
> >          MFXClose(s->session_download);
> >      }
> > -    s->session_download = NULL;
> > +    s->session_download      = NULL;
> > +    s->session_download_init = 0;
> >
> >      if (s->session_upload) {
> >          MFXVideoVPP_Close(s->session_upload);
> >          MFXClose(s->session_upload);
> >      }
> > -    s->session_upload = NULL;
> > +    s->session_upload      = NULL;
> > +    s->session_upload_init = 0;
> >
> >      av_freep(&s->mem_ids);
> >      av_freep(&s->surface_ptrs);
> > @@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
> >              s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
> >      }
> >
> > -    ret = qsv_init_internal_session(ctx, &s->session_download, 0);
> > -    if (ret < 0)
> > -        return ret;
> > -
> > -    ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
> > -    if (ret < 0)
> > -        return ret;
> > +    s->session_download      = NULL;
> > +    s->session_upload        = NULL;
> > +    s->session_download_init = 0;
> > +    s->session_upload_init   = 0;
> >
> >      return 0;
> >  }
> > @@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext
> *ctx, AVFrame *dst,
> >      mfxSyncPoint sync = NULL;
> >      mfxStatus err;
> >
> > +    while (!s->session_download_init && !s->session_download) {
> > +     if (atomic_fetch_add(&s->session_download_init, 1) == 0) {
> > +            qsv_init_internal_session(ctx, &s->session_download, 0);
> > +        }
> > +     else {
> > +            av_usleep(1);
>
> This races - consider what happens if the other thread is preempted for
> more than 1µs, or if the initialisation itself takes more than that long.
>
> You need to actually do some synchronisation here (e.g. with a 'once'
> variable) - with only atomic flags there is no way to guarantee that the
> other thread has finished unless you spin, which isn't acceptable.
>
> pthread_once was considered but we need to pass  s->session_download
adding mutex - can be overkill for each call of   qsv_transfer_data_*

what do you mean by 'once' variable?
dont really see it currently implemented somewhere...


> > +        }
> > +    }
> > +
> >      if (!s->session_download) {
> >          if (s->child_frames_ref)
> >              return qsv_transfer_data_child(ctx, dst, src);
> > @@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext
> *ctx, AVFrame *dst,
> >      mfxSyncPoint sync = NULL;
> >      mfxStatus err;
> >
> > +    while (!s->session_upload_init && !s->session_upload) {
> > +     if (atomic_fetch_add(&s->session_upload_init, 1) == 0) {
> > +            qsv_init_internal_session(ctx, &s->session_upload, 1);
> > +        }
> > +     else {
> > +            av_usleep(1);
> > +        }
> > +    }
> > +
> >      if (!s->session_upload) {
> >          if (s->child_frames_ref)
> >              return qsv_transfer_data_child(ctx, dst, src);
> >
>
> Thanks,
>
> - Mark
> _______________________________________________
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel


regards
Max
Mark Thompson July 17, 2018, 11:34 p.m. | #3
On 18/07/18 00:25, Maxym Dmytrychenko wrote:
> On Wed, Jul 18, 2018 at 12:48 AM Mark Thompson <sw@jkqxz.net> wrote:
> 
>> On 17/07/18 17:07, Maxym Dmytrychenko wrote:
>>> Not used VPP sessions, like for hwupload/hwdownload handling,
>>> can increase CPU utilization and this patch fixes it.
>>> thank you,Joe, for the contribution.
>>>
>>> Signed-off-by: Maxym Dmytrychenko <maxim.d33@gmail.com>
>>> ---
>>>  libavutil/hwcontext_qsv.c | 38 +++++++++++++++++++++++++++++---------
>>>  1 file changed, 29 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
>>> index b3eb4a3ea..390c3aac4 100644
>>> --- a/libavutil/hwcontext_qsv.c
>>> +++ b/libavutil/hwcontext_qsv.c
>>> @@ -18,6 +18,7 @@
>>>
>>>  #include <stdint.h>
>>>  #include <string.h>
>>> +#include <stdatomic.h>
>>>
>>>  #include <mfx/mfxvideo.h>
>>>
>>> @@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
>>>
>>>  typedef struct QSVFramesContext {
>>>      mfxSession session_download;
>>> +    atomic_int session_download_init;
>>>      mfxSession session_upload;
>>> +    atomic_int session_upload_init;
>>>
>>>      AVBufferRef *child_frames_ref;
>>>      mfxFrameSurface1 *surfaces_internal;
>>> @@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext
>> *ctx)
>>>          MFXVideoVPP_Close(s->session_download);
>>>          MFXClose(s->session_download);
>>>      }
>>> -    s->session_download = NULL;
>>> +    s->session_download      = NULL;
>>> +    s->session_download_init = 0;
>>>
>>>      if (s->session_upload) {
>>>          MFXVideoVPP_Close(s->session_upload);
>>>          MFXClose(s->session_upload);
>>>      }
>>> -    s->session_upload = NULL;
>>> +    s->session_upload      = NULL;
>>> +    s->session_upload_init = 0;
>>>
>>>      av_freep(&s->mem_ids);
>>>      av_freep(&s->surface_ptrs);
>>> @@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext *ctx)
>>>              s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
>>>      }
>>>
>>> -    ret = qsv_init_internal_session(ctx, &s->session_download, 0);
>>> -    if (ret < 0)
>>> -        return ret;
>>> -
>>> -    ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
>>> -    if (ret < 0)
>>> -        return ret;
>>> +    s->session_download      = NULL;
>>> +    s->session_upload        = NULL;
>>> +    s->session_download_init = 0;
>>> +    s->session_upload_init   = 0;
>>>
>>>      return 0;
>>>  }
>>> @@ -741,6 +743,15 @@ static int qsv_transfer_data_from(AVHWFramesContext
>> *ctx, AVFrame *dst,
>>>      mfxSyncPoint sync = NULL;
>>>      mfxStatus err;
>>>
>>> +    while (!s->session_download_init && !s->session_download) {
>>> +     if (atomic_fetch_add(&s->session_download_init, 1) == 0) {
>>> +            qsv_init_internal_session(ctx, &s->session_download, 0);
>>> +        }
>>> +     else {
>>> +            av_usleep(1);
>>
>> This races - consider what happens if the other thread is preempted for
>> more than 1µs, or if the initialisation itself takes more than that long.

(Apologies, I misread that the first time - with the spin loop it should only be a benign race on session_download, but it's still undefined behaviour by C11 and things like tsan will complain about it: read in the non-initialising thread against write in the initialising thread, after the flag has been set to 1.)

>>
>> You need to actually do some synchronisation here (e.g. with a 'once'
>> variable) - with only atomic flags there is no way to guarantee that the
>> other thread has finished unless you spin, which isn't acceptable.
>>
>> pthread_once was considered but we need to pass  s->session_download
> adding mutex - can be overkill for each call of   qsv_transfer_data_*
> 
> what do you mean by 'once' variable?
> dont really see it currently implemented somewhere...

See AVOnce.  It's used in a few places in this role, for example in hwcontext_d3d11va.c for the library loading.

>>> +        }
>>> +    }
>>> +
>>>      if (!s->session_download) {
>>>          if (s->child_frames_ref)
>>>              return qsv_transfer_data_child(ctx, dst, src);
>>> @@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext
>> *ctx, AVFrame *dst,
>>>      mfxSyncPoint sync = NULL;
>>>      mfxStatus err;
>>>
>>> +    while (!s->session_upload_init && !s->session_upload) {
>>> +     if (atomic_fetch_add(&s->session_upload_init, 1) == 0) {
>>> +            qsv_init_internal_session(ctx, &s->session_upload, 1);
>>> +        }
>>> +     else {
>>> +            av_usleep(1);
>>> +        }
>>> +    }
>>> +
>>>      if (!s->session_upload) {
>>>          if (s->child_frames_ref)
>>>              return qsv_transfer_data_child(ctx, dst, src);
>>>
Maxym Dmytrychenko July 17, 2018, 11:53 p.m. | #4
On Wed, Jul 18, 2018 at 1:34 AM Mark Thompson <sw@jkqxz.net> wrote:

> On 18/07/18 00:25, Maxym Dmytrychenko wrote:
> > On Wed, Jul 18, 2018 at 12:48 AM Mark Thompson <sw@jkqxz.net> wrote:
> >
> >> On 17/07/18 17:07, Maxym Dmytrychenko wrote:
> >>> Not used VPP sessions, like for hwupload/hwdownload handling,
> >>> can increase CPU utilization and this patch fixes it.
> >>> thank you,Joe, for the contribution.
> >>>
> >>> Signed-off-by: Maxym Dmytrychenko <maxim.d33@gmail.com>
> >>> ---
> >>>  libavutil/hwcontext_qsv.c | 38 +++++++++++++++++++++++++++++---------
> >>>  1 file changed, 29 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
> >>> index b3eb4a3ea..390c3aac4 100644
> >>> --- a/libavutil/hwcontext_qsv.c
> >>> +++ b/libavutil/hwcontext_qsv.c
> >>> @@ -18,6 +18,7 @@
> >>>
> >>>  #include <stdint.h>
> >>>  #include <string.h>
> >>> +#include <stdatomic.h>
> >>>
> >>>  #include <mfx/mfxvideo.h>
> >>>
> >>> @@ -56,7 +57,9 @@ typedef struct QSVDeviceContext {
> >>>
> >>>  typedef struct QSVFramesContext {
> >>>      mfxSession session_download;
> >>> +    atomic_int session_download_init;
> >>>      mfxSession session_upload;
> >>> +    atomic_int session_upload_init;
> >>>
> >>>      AVBufferRef *child_frames_ref;
> >>>      mfxFrameSurface1 *surfaces_internal;
> >>> @@ -146,13 +149,15 @@ static void qsv_frames_uninit(AVHWFramesContext
> >> *ctx)
> >>>          MFXVideoVPP_Close(s->session_download);
> >>>          MFXClose(s->session_download);
> >>>      }
> >>> -    s->session_download = NULL;
> >>> +    s->session_download      = NULL;
> >>> +    s->session_download_init = 0;
> >>>
> >>>      if (s->session_upload) {
> >>>          MFXVideoVPP_Close(s->session_upload);
> >>>          MFXClose(s->session_upload);
> >>>      }
> >>> -    s->session_upload = NULL;
> >>> +    s->session_upload      = NULL;
> >>> +    s->session_upload_init = 0;
> >>>
> >>>      av_freep(&s->mem_ids);
> >>>      av_freep(&s->surface_ptrs);
> >>> @@ -535,13 +540,10 @@ static int qsv_frames_init(AVHWFramesContext
> *ctx)
> >>>              s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
> >>>      }
> >>>
> >>> -    ret = qsv_init_internal_session(ctx, &s->session_download, 0);
> >>> -    if (ret < 0)
> >>> -        return ret;
> >>> -
> >>> -    ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
> >>> -    if (ret < 0)
> >>> -        return ret;
> >>> +    s->session_download      = NULL;
> >>> +    s->session_upload        = NULL;
> >>> +    s->session_download_init = 0;
> >>> +    s->session_upload_init   = 0;
> >>>
> >>>      return 0;
> >>>  }
> >>> @@ -741,6 +743,15 @@ static int
> qsv_transfer_data_from(AVHWFramesContext
> >> *ctx, AVFrame *dst,
> >>>      mfxSyncPoint sync = NULL;
> >>>      mfxStatus err;
> >>>
> >>> +    while (!s->session_download_init && !s->session_download) {
> >>> +     if (atomic_fetch_add(&s->session_download_init, 1) == 0) {
> >>> +            qsv_init_internal_session(ctx, &s->session_download, 0);
> >>> +        }
> >>> +     else {
> >>> +            av_usleep(1);
> >>
> >> This races - consider what happens if the other thread is preempted for
> >> more than 1µs, or if the initialisation itself takes more than that
> long.
>
> (Apologies, I misread that the first time - with the spin loop it should
> only be a benign race on session_download, but it's still undefined
> behaviour by C11 and things like tsan will complain about it: read in the
> non-initialising thread against write in the initialising thread, after the
> flag has been set to 1.)
>
>
np,
let's fix it, sure.


> >>
> >> You need to actually do some synchronisation here (e.g. with a 'once'
> >> variable) - with only atomic flags there is no way to guarantee that the
> >> other thread has finished unless you spin, which isn't acceptable.
> >>
> >> pthread_once was considered but we need to pass  s->session_download
> > adding mutex - can be overkill for each call of   qsv_transfer_data_*
> >
> > what do you mean by 'once' variable?
> > dont really see it currently implemented somewhere...
>
> See AVOnce.  It's used in a few places in this role, for example in
> hwcontext_d3d11va.c for the library loading.
>
>
if I see it correct: (ret = ff_thread_once(&functions_loaded,
load_functions)
 it comes down to :
static inline int ff_thread_once(char *control, void (*routine)(void))
note - no params for routine(), where I need to pass current ( ctx,
&s->session_download )
that is the puzzle to solve now.

Also thinking if
#if HAVE_PTHREADS
would be needed.

pthread_mutex / pthread_cond seems to be the only option left,
anything else?


> >>> +        }
> >>> +    }
> >>> +
> >>>      if (!s->session_download) {
> >>>          if (s->child_frames_ref)
> >>>              return qsv_transfer_data_child(ctx, dst, src);
> >>> @@ -788,6 +799,15 @@ static int qsv_transfer_data_to(AVHWFramesContext
> >> *ctx, AVFrame *dst,
> >>>      mfxSyncPoint sync = NULL;
> >>>      mfxStatus err;
> >>>
> >>> +    while (!s->session_upload_init && !s->session_upload) {
> >>> +     if (atomic_fetch_add(&s->session_upload_init, 1) == 0) {
> >>> +            qsv_init_internal_session(ctx, &s->session_upload, 1);
> >>> +        }
> >>> +     else {
> >>> +            av_usleep(1);
> >>> +        }
> >>> +    }
> >>> +
> >>>      if (!s->session_upload) {
> >>>          if (s->child_frames_ref)
> >>>              return qsv_transfer_data_child(ctx, dst, src);
> >>>
> _______________________________________________
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel

Patch

diff --git a/libavutil/hwcontext_qsv.c b/libavutil/hwcontext_qsv.c
index b3eb4a3ea..390c3aac4 100644
--- a/libavutil/hwcontext_qsv.c
+++ b/libavutil/hwcontext_qsv.c
@@ -18,6 +18,7 @@ 
 
 #include <stdint.h>
 #include <string.h>
+#include <stdatomic.h>
 
 #include <mfx/mfxvideo.h>
 
@@ -56,7 +57,9 @@  typedef struct QSVDeviceContext {
 
 typedef struct QSVFramesContext {
     mfxSession session_download;
+    atomic_int session_download_init;
     mfxSession session_upload;
+    atomic_int session_upload_init;
 
     AVBufferRef *child_frames_ref;
     mfxFrameSurface1 *surfaces_internal;
@@ -146,13 +149,15 @@  static void qsv_frames_uninit(AVHWFramesContext *ctx)
         MFXVideoVPP_Close(s->session_download);
         MFXClose(s->session_download);
     }
-    s->session_download = NULL;
+    s->session_download      = NULL;
+    s->session_download_init = 0;
 
     if (s->session_upload) {
         MFXVideoVPP_Close(s->session_upload);
         MFXClose(s->session_upload);
     }
-    s->session_upload = NULL;
+    s->session_upload      = NULL;
+    s->session_upload_init = 0;
 
     av_freep(&s->mem_ids);
     av_freep(&s->surface_ptrs);
@@ -535,13 +540,10 @@  static int qsv_frames_init(AVHWFramesContext *ctx)
             s->mem_ids[i] = frames_hwctx->surfaces[i].Data.MemId;
     }
 
-    ret = qsv_init_internal_session(ctx, &s->session_download, 0);
-    if (ret < 0)
-        return ret;
-
-    ret = qsv_init_internal_session(ctx, &s->session_upload, 1);
-    if (ret < 0)
-        return ret;
+    s->session_download      = NULL;
+    s->session_upload        = NULL;
+    s->session_download_init = 0;
+    s->session_upload_init   = 0;
 
     return 0;
 }
@@ -741,6 +743,15 @@  static int qsv_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
     mfxSyncPoint sync = NULL;
     mfxStatus err;
 
+    while (!s->session_download_init && !s->session_download) {
+	if (atomic_fetch_add(&s->session_download_init, 1) == 0) {
+            qsv_init_internal_session(ctx, &s->session_download, 0);
+        }
+	else {
+            av_usleep(1);
+        }
+    }
+
     if (!s->session_download) {
         if (s->child_frames_ref)
             return qsv_transfer_data_child(ctx, dst, src);
@@ -788,6 +799,15 @@  static int qsv_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
     mfxSyncPoint sync = NULL;
     mfxStatus err;
 
+    while (!s->session_upload_init && !s->session_upload) {
+	if (atomic_fetch_add(&s->session_upload_init, 1) == 0) {
+            qsv_init_internal_session(ctx, &s->session_upload, 1);
+        }
+	else {
+            av_usleep(1);
+        }
+    }
+
     if (!s->session_upload) {
         if (s->child_frames_ref)
             return qsv_transfer_data_child(ctx, dst, src);