[1/2] add application private packet and frame side data

Message ID 1458613249-18995-1-git-send-email-stebbins@jetheaddev.com
State New
Headers show

Commit Message

John Stebbins March 22, 2016, 2:20 a.m.
From: John Stebbins <jstebbins@roamer.alpe-d-or.dyn-o-saur.com>

This side data is a better more flexible replacement for
AVCodecContext.reordered_opaque.  During decoding, it is added to an
AVPacket by the application, and provided back, reordered, to the
application in the decoded frame. It can contain anything the
application requires.
---
 libavcodec/avcodec.h | 7 +++++++
 libavcodec/utils.c   | 1 +
 libavcodec/version.h | 2 +-
 libavutil/frame.h    | 6 ++++++
 libavutil/version.h  | 2 +-
 5 files changed, 16 insertions(+), 2 deletions(-)

Comments

wm4 March 22, 2016, 8:36 a.m. | #1
On Mon, 21 Mar 2016 20:20:48 -0600
John Stebbins <stebbins@jetheaddev.com> wrote:

> From: John Stebbins <jstebbins@roamer.alpe-d-or.dyn-o-saur.com>
> 
> This side data is a better more flexible replacement for
> AVCodecContext.reordered_opaque.  During decoding, it is added to an
> AVPacket by the application, and provided back, reordered, to the
> application in the decoded frame. It can contain anything the
> application requires.
> ---
>  libavcodec/avcodec.h | 7 +++++++
>  libavcodec/utils.c   | 1 +
>  libavcodec/version.h | 2 +-
>  libavutil/frame.h    | 6 ++++++
>  libavutil/version.h  | 2 +-
>  5 files changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index 0ff31a0..00566c6 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -1184,6 +1184,13 @@ enum AVPacketSideDataType {
>       * This side data corresponds to the AVCPBProperties struct.
>       */
>      AV_PKT_DATA_CPB_PROPERTIES,
> +    /**
> +     ** This side data contains data that the application requires to
> +     ** be passed from the packet to the frame during decoding.  It is
> +     ** a more flexible replacement for AVCodecContext reordered_opaque
> +     **/
> +    AV_PKT_DATA_APP_PRIVATE,
> +
>  };
>  
>  typedef struct AVPacketSideData {
> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
> index 866cdfc..56075e2 100644
> --- a/libavcodec/utils.c
> +++ b/libavcodec/utils.c
> @@ -538,6 +538,7 @@ int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
>          { AV_PKT_DATA_DISPLAYMATRIX, AV_FRAME_DATA_DISPLAYMATRIX },
>          { AV_PKT_DATA_STEREO3D,      AV_FRAME_DATA_STEREO3D },
>          { AV_PKT_DATA_AUDIO_SERVICE_TYPE, AV_FRAME_DATA_AUDIO_SERVICE_TYPE },
> +        { AV_PKT_DATA_APP_PRIVATE,   AV_FRAME_DATA_APP_PRIVATE },
>      };
>  
>      frame->color_primaries = avctx->color_primaries;
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index d5045fb..3b47f49 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,7 +28,7 @@
>  #include "libavutil/version.h"
>  
>  #define LIBAVCODEC_VERSION_MAJOR 57
> -#define LIBAVCODEC_VERSION_MINOR 15
> +#define LIBAVCODEC_VERSION_MINOR 16
>  #define LIBAVCODEC_VERSION_MICRO  0
>  
>  #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
> diff --git a/libavutil/frame.h b/libavutil/frame.h
> index a39a1ef..795001d 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -92,6 +92,12 @@ enum AVFrameSideDataType {
>       * enum AVAudioServiceType defined in avcodec.h.
>       */
>      AV_FRAME_DATA_AUDIO_SERVICE_TYPE,
> +    /**
> +     ** This side data contains data that the application requires to
> +     ** be passed from the packet to the frame during decoding.  It is
> +     ** a more flexible replacement for AVCodecContext reordered_opaque
> +     **/
> +    AV_FRAME_DATA_APP_PRIVATE,
>  };
>  
>  enum AVActiveFormatDescription {
> diff --git a/libavutil/version.h b/libavutil/version.h
> index fdd27e3..186ebd8 100644
> --- a/libavutil/version.h
> +++ b/libavutil/version.h
> @@ -54,7 +54,7 @@
>   */
>  
>  #define LIBAVUTIL_VERSION_MAJOR 55
> -#define LIBAVUTIL_VERSION_MINOR  9
> +#define LIBAVUTIL_VERSION_MINOR 10
>  #define LIBAVUTIL_VERSION_MICRO  0
>  
>  #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \

As far as I understand it, not every decoder will be able to pass this
through correctly. (Hardware decoders, wrappers.)
Diego Biurrun March 22, 2016, 10:47 a.m. | #2
On Mon, Mar 21, 2016 at 08:20:48PM -0600, John Stebbins wrote:
> From: John Stebbins <jstebbins@roamer.alpe-d-or.dyn-o-saur.com>

Looks like your Git is misconfigured.

Diego
Luca Barbato March 22, 2016, 11:02 a.m. | #3
On 22/03/16 09:36, wm4 wrote:
> As far as I understand it, not every decoder will be able to pass this
> through correctly. (Hardware decoders, wrappers.)

At least some of those situation can be addressed, for the others I
guess working with who produces those third parties is needed.

lu
John Stebbins March 22, 2016, 2:59 p.m. | #4
On 03/22/2016 05:02 AM, Luca Barbato wrote:
> On 22/03/16 09:36, wm4 wrote:
>> As far as I understand it, not every decoder will be able to pass this
>> through correctly. (Hardware decoders, wrappers.)
> At least some of those situation can be addressed, for the others I
> guess working with who produces those third parties is needed.
>
>

Can we document were this wouldn't work?  Provide a list somewhere?  And try to fix cases where it doesn't work but
could be made to work?  I assume most of these hardware decoders must have special handling for pts that allows it to be
reordered and passed through.  Any decoder that can't do this is garbage and I have no interest in using.  My thought is
that as long as they can pass through a reordered "pts" they can be made to pass through anything by associating a cache
of data elements that are indexed by the "pts" fed to the hardware and then looked up when a frame with "pts" is
output.  I put pts in quotes because a sequence number could be inserted here instead, while the real pts is stored with
the cache of data elements and reattached to the frame at the output of the hardware decoder.
wm4 March 22, 2016, 3:28 p.m. | #5
On Tue, 22 Mar 2016 08:59:24 -0600
John Stebbins <stebbins@jetheaddev.com> wrote:

> On 03/22/2016 05:02 AM, Luca Barbato wrote:
> > On 22/03/16 09:36, wm4 wrote:  
> >> As far as I understand it, not every decoder will be able to pass this
> >> through correctly. (Hardware decoders, wrappers.)  
> > At least some of those situation can be addressed, for the others I
> > guess working with who produces those third parties is needed.
> >
> >  
> 
> Can we document were this wouldn't work?  Provide a list somewhere?  And try to fix cases where it doesn't work but
> could be made to work?  I assume most of these hardware decoders must have special handling for pts that allows it to be
> reordered and passed through.  Any decoder that can't do this is garbage and I have no interest in using.  My thought is
> that as long as they can pass through a reordered "pts" they can be made to pass through anything by associating a cache
> of data elements that are indexed by the "pts" fed to the hardware and then looked up when a frame with "pts" is
> output.  I put pts in quotes because a sequence number could be inserted here instead, while the real pts is stored with
> the cache of data elements and reattached to the frame at the output of the hardware decoder.
> 

Many hw decoder seem to be able to reorder PTS, but that's it. They
don't treat the PTS as opaque either, but expect it to be in a specific
fixed unit (like microseconds etc.). They also may try their own
(broken) attempts to fix broken timestamps, modifying the value as a
result. That's just my own quite limited experience.

Maybe decoders which support this could be marked with a codec flag.
John Stebbins March 22, 2016, 3:39 p.m. | #6
On 03/22/2016 09:28 AM, wm4 wrote:
> On Tue, 22 Mar 2016 08:59:24 -0600
> John Stebbins <stebbins@jetheaddev.com> wrote:
>
>> On 03/22/2016 05:02 AM, Luca Barbato wrote:
>>> On 22/03/16 09:36, wm4 wrote:  
>>>> As far as I understand it, not every decoder will be able to pass this
>>>> through correctly. (Hardware decoders, wrappers.)  
>>> At least some of those situation can be addressed, for the others I
>>> guess working with who produces those third parties is needed.
>>>
>>>  
>> Can we document were this wouldn't work?  Provide a list somewhere?  And try to fix cases where it doesn't work but
>> could be made to work?  I assume most of these hardware decoders must have special handling for pts that allows it to be
>> reordered and passed through.  Any decoder that can't do this is garbage and I have no interest in using.  My thought is
>> that as long as they can pass through a reordered "pts" they can be made to pass through anything by associating a cache
>> of data elements that are indexed by the "pts" fed to the hardware and then looked up when a frame with "pts" is
>> output.  I put pts in quotes because a sequence number could be inserted here instead, while the real pts is stored with
>> the cache of data elements and reattached to the frame at the output of the hardware decoder.
>>
> Many hw decoder seem to be able to reorder PTS, but that's it. They
> don't treat the PTS as opaque either, but expect it to be in a specific
> fixed unit (like microseconds etc.). They also may try their own
> (broken) attempts to fix broken timestamps, modifying the value as a
> result. That's just my own quite limited experience.
>
> Maybe decoders which support this could be marked with a codec flag.
>

That would be a good option. Although it would probably be easier to identify which *do not* support this and mark
them.  If others agree, I can add such a flag to the patch and start tracking down which decoders this does not work for.
Hendrik Leppkes March 22, 2016, 3:42 p.m. | #7
On Tue, Mar 22, 2016 at 3:59 PM, John Stebbins <stebbins@jetheaddev.com> wrote:
> On 03/22/2016 05:02 AM, Luca Barbato wrote:
>> On 22/03/16 09:36, wm4 wrote:
>>> As far as I understand it, not every decoder will be able to pass this
>>> through correctly. (Hardware decoders, wrappers.)
>> At least some of those situation can be addressed, for the others I
>> guess working with who produces those third parties is needed.
>>
>>
>
> Can we document were this wouldn't work?  Provide a list somewhere?  And try to fix cases where it doesn't work but
> could be made to work?  I assume most of these hardware decoders must have special handling for pts that allows it to be
> reordered and passed through.  Any decoder that can't do this is garbage and I have no interest in using.  My thought is
> that as long as they can pass through a reordered "pts" they can be made to pass through anything by associating a cache
> of data elements that are indexed by the "pts" fed to the hardware and then looked up when a frame with "pts" is
> output.  I put pts in quotes because a sequence number could be inserted here instead, while the real pts is stored with
> the cache of data elements and reattached to the frame at the output of the hardware decoder.
>

Honestly this sounds rather complicated and hacky, if its only
associated by PTS, then your user code could also do that, couldn't
it.

- Hendrik
John Stebbins March 22, 2016, 3:51 p.m. | #8
On 03/22/2016 09:42 AM, Hendrik Leppkes wrote:
> On Tue, Mar 22, 2016 at 3:59 PM, John Stebbins <stebbins@jetheaddev.com> wrote:
>> On 03/22/2016 05:02 AM, Luca Barbato wrote:
>>> On 22/03/16 09:36, wm4 wrote:
>>>> As far as I understand it, not every decoder will be able to pass this
>>>> through correctly. (Hardware decoders, wrappers.)
>>> At least some of those situation can be addressed, for the others I
>>> guess working with who produces those third parties is needed.
>>>
>>>
>> Can we document were this wouldn't work?  Provide a list somewhere?  And try to fix cases where it doesn't work but
>> could be made to work?  I assume most of these hardware decoders must have special handling for pts that allows it to be
>> reordered and passed through.  Any decoder that can't do this is garbage and I have no interest in using.  My thought is
>> that as long as they can pass through a reordered "pts" they can be made to pass through anything by associating a cache
>> of data elements that are indexed by the "pts" fed to the hardware and then looked up when a frame with "pts" is
>> output.  I put pts in quotes because a sequence number could be inserted here instead, while the real pts is stored with
>> the cache of data elements and reattached to the frame at the output of the hardware decoder.
>>
> Honestly this sounds rather complicated and hacky, if its only
> associated by PTS, then your user code could also do that, couldn't
> it.
>
>

Yes, it could.  But in the majority of cases, this "hacky" code path would not be executed.  Under most circumstances,
this information would be cleanly passed by the existing code in ff_decode_frame_props. I only extended an already in
place paradigm which also "breaks" under the circumstances being discussed.  TBH, I would be fine with using only
decoders where this can be made to cleanly work.  The "hack" was a suggestion, not a requirement for this to be useful.
Luca Barbato March 22, 2016, 3:54 p.m. | #9
On 22/03/16 16:39, John Stebbins wrote:

> That would be a good option. Although it would probably be easier to identify which *do not* support this and mark
> them.  If others agree, I can add such a flag to the patch and start tracking down which decoders this does not work for.

So it boils down to x264 and x265 for sure ^^;

lu
John Stebbins March 22, 2016, 4:12 p.m. | #10
On 03/22/2016 09:54 AM, Luca Barbato wrote:
> On 22/03/16 16:39, John Stebbins wrote:
>
>> That would be a good option. Although it would probably be easier to identify which *do not* support this and mark
>> them.  If others agree, I can add such a flag to the patch and start tracking down which decoders this does not work for.
> So it boils down to x264 and x265 for sure ^^;
>
>

My patch is providing a mechanism for the *decode* side of things (AVPacket to AVFrame).  It would be nice to have a
symmetric behaviour for encoding (AVFrame to AVPacket), but that's a different patch ;)

Some background.  There is information (chapter marks) that HandBrake attaches to video frames and passes all the way
through our pipeline from reader to muxer.   We do this for a couple of reasons.  First, chapter marks for DVD and BD
are not indexed by time but rather by position.  You know the time when the position is reached during playback..  When
we reach a chapter mark, we mark the video buffers with the new chapter.  The second reason is that multiple stages in
the pipeline can modify timestamps.  If we merely created an index of chapters when chapter marks are received, the
timestamps read at the front of the pipeline and placed in this index will not match the timestamps that reach the
muxer.  We currently have various "hacks" in place to make this all happen.  I'm hoping to remove such hacks.
Luca Barbato March 22, 2016, 4:21 p.m. | #11
On 22/03/16 17:12, John Stebbins wrote:
> My patch is providing a mechanism for the *decode* side of things (AVPacket to AVFrame).

Ah, then the scenario is much nicer even if not perfect =) (as in more
work needed).

lu

Patch

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 0ff31a0..00566c6 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1184,6 +1184,13 @@  enum AVPacketSideDataType {
      * This side data corresponds to the AVCPBProperties struct.
      */
     AV_PKT_DATA_CPB_PROPERTIES,
+    /**
+     ** This side data contains data that the application requires to
+     ** be passed from the packet to the frame during decoding.  It is
+     ** a more flexible replacement for AVCodecContext reordered_opaque
+     **/
+    AV_PKT_DATA_APP_PRIVATE,
+
 };
 
 typedef struct AVPacketSideData {
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 866cdfc..56075e2 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -538,6 +538,7 @@  int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
         { AV_PKT_DATA_DISPLAYMATRIX, AV_FRAME_DATA_DISPLAYMATRIX },
         { AV_PKT_DATA_STEREO3D,      AV_FRAME_DATA_STEREO3D },
         { AV_PKT_DATA_AUDIO_SERVICE_TYPE, AV_FRAME_DATA_AUDIO_SERVICE_TYPE },
+        { AV_PKT_DATA_APP_PRIVATE,   AV_FRAME_DATA_APP_PRIVATE },
     };
 
     frame->color_primaries = avctx->color_primaries;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index d5045fb..3b47f49 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,7 +28,7 @@ 
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR 57
-#define LIBAVCODEC_VERSION_MINOR 15
+#define LIBAVCODEC_VERSION_MINOR 16
 #define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
diff --git a/libavutil/frame.h b/libavutil/frame.h
index a39a1ef..795001d 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -92,6 +92,12 @@  enum AVFrameSideDataType {
      * enum AVAudioServiceType defined in avcodec.h.
      */
     AV_FRAME_DATA_AUDIO_SERVICE_TYPE,
+    /**
+     ** This side data contains data that the application requires to
+     ** be passed from the packet to the frame during decoding.  It is
+     ** a more flexible replacement for AVCodecContext reordered_opaque
+     **/
+    AV_FRAME_DATA_APP_PRIVATE,
 };
 
 enum AVActiveFormatDescription {
diff --git a/libavutil/version.h b/libavutil/version.h
index fdd27e3..186ebd8 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -54,7 +54,7 @@ 
  */
 
 #define LIBAVUTIL_VERSION_MAJOR 55
-#define LIBAVUTIL_VERSION_MINOR  9
+#define LIBAVUTIL_VERSION_MINOR 10
 #define LIBAVUTIL_VERSION_MICRO  0
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \