[1/3] movenc: Add an option for enabling negative CTS offsets

Message ID 20170219212217.80933-1-martin@martin.st
State Superseded
Headers show

Commit Message

Martin Storsjö Feb. 19, 2017, 9:22 p.m.
This reduces the need for an edit list; streams that start with
e.g. dts=-1, pts=0 can be encoded as dts=0, pts=0 (which is valid
in mov/mp4) by shifting the dts values of all packets forward.
This avoids the need for edit lists for such streams (while they
still are needed for audio streams with encoder delay).
---
 libavformat/movenc.c | 24 ++++++++++++++++++++----
 libavformat/movenc.h |  2 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

Comments

Jan Ekström Feb. 22, 2017, 7:25 p.m. | #1
On Sun, Feb 19, 2017 at 11:22 PM, Martin Storsjö <martin@martin.st> wrote:
> This reduces the need for an edit list; streams that start with
> e.g. dts=-1, pts=0 can be encoded as dts=0, pts=0 (which is valid
> in mov/mp4) by shifting the dts values of all packets forward.
> This avoids the need for edit lists for such streams (while they
> still are needed for audio streams with encoder delay).
> ---

Cool stuff. Did some general testing and it seems to work nicely!

Thus, LGTM from me.

Jan
Yusuke Nakamura Feb. 22, 2017, 11:50 p.m. | #2
2017-02-20 6:22 GMT+09:00 Martin Storsjö <martin@martin.st>:

> This reduces the need for an edit list; streams that start with
> e.g. dts=-1, pts=0 can be encoded as dts=0, pts=0 (which is valid
> in mov/mp4) by shifting the dts values of all packets forward.
> This avoids the need for edit lists for such streams (while they
> still are needed for audio streams with encoder delay).
> ---
>  libavformat/movenc.c | 24 ++++++++++++++++++++----
>  libavformat/movenc.h |  2 ++
>  2 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index 840190d..713c145 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -62,6 +62,7 @@ static const AVOption options[] = {
>      { "delay_moov", "Delay writing the initial moov until the first
> fragment is cut, or until the first fragment flush", 0, AV_OPT_TYPE_CONST,
> {.i64 = FF_MOV_FLAG_DELAY_MOOV}, INT_MIN, INT_MAX,
> AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
>      { "global_sidx", "Write a global sidx index at the start of the
> file", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_GLOBAL_SIDX}, INT_MIN,
> INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
>      { "skip_trailer", "Skip writing the mfra/tfra/mfro trailer for
> fragmented files", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_SKIP_TRAILER},
> INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
> +    { "negative_cts_offsets", "Use negative CTS offsets (reducing the
> need for edit lists)", 0, AV_OPT_TYPE_CONST, {.i64 =
> FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS}, INT_MIN, INT_MAX,
> AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
>      FF_RTP_FLAG_OPTS(MOVMuxContext, rtp_flags),
>      { "skip_iods", "Skip writing iods atom.", offsetof(MOVMuxContext,
> iods_skip), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
>      { "iods_audio_profile", "iods audio profile atom.",
> offsetof(MOVMuxContext, iods_audio_profile), AV_OPT_TYPE_INT, {.i64 = -1},
> -1, 255, AV_OPT_FLAG_ENCODING_PARAM},
> @@ -1163,8 +1164,9 @@ static int mov_write_stsd_tag(AVFormatContext *s,
> AVIOContext *pb, MOVTrack *tra
>      return update_size(pb, pos);
>  }
>
> -static int mov_write_ctts_tag(AVIOContext *pb, MOVTrack *track)
> +static int mov_write_ctts_tag(AVFormatContext *s, AVIOContext *pb,
> MOVTrack *track)
>  {
> +    MOVMuxContext *mov = s->priv_data;
>      MOVStts *ctts_entries;
>      uint32_t entries = 0;
>      uint32_t atom_size;
> @@ -1188,7 +1190,11 @@ static int mov_write_ctts_tag(AVIOContext *pb,
> MOVTrack *track)
>      atom_size = 16 + (entries * 8);
>      avio_wb32(pb, atom_size); /* size */
>      ffio_wfourcc(pb, "ctts");
> -    avio_wb32(pb, 0); /* version & flags */
> +    if (mov->flags & FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS)
> +        avio_w8(pb, 1); /* version */
>

ctts ver. 1 is defined in iso4 or later isobmff brands.


> +    else
> +        avio_w8(pb, 0); /* version */
> +    avio_wb24(pb, 0); /* flags */
>      avio_wb32(pb, entries); /* entry count */
>      for (i = 0; i < entries; i++) {
>          avio_wb32(pb, ctts_entries[i].count);
> @@ -1273,7 +1279,7 @@ static int mov_write_stbl_tag(AVFormatContext *s,
> AVIOContext *pb, MOVTrack *tra
>          mov_write_stss_tag(pb, track, MOV_PARTIAL_SYNC_SAMPLE);
>      if (track->par->codec_type == AVMEDIA_TYPE_VIDEO &&
>          track->flags & MOV_TRACK_CTTS && track->entry)
> -        mov_write_ctts_tag(pb, track);
> +        mov_write_ctts_tag(s, pb, track);
>      mov_write_stsc_tag(pb, track);
>      mov_write_stsz_tag(pb, track);
>      mov_write_stco_tag(pb, track);
> @@ -2594,7 +2600,10 @@ static int mov_write_trun_tag(AVIOContext *pb,
> MOVMuxContext *mov,
>
>      avio_wb32(pb, 0); /* size placeholder */
>      ffio_wfourcc(pb, "trun");
> -    avio_w8(pb, 0); /* version */
> +    if (mov->flags & FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS)
> +        avio_w8(pb, 1); /* version */
> +    else
> +        avio_w8(pb, 0); /* version */
>      avio_wb24(pb, flags);
>
>      avio_wb32(pb, end - first); /* sample count */
> @@ -3729,6 +3738,12 @@ static int mov_write_packet(AVFormatContext *s,
> AVPacket *pkt)
>              mov->flags &= ~FF_MOV_FLAG_FRAG_DISCONT;
>          }
>
> +        if (mov->flags & FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS) {
> +            if (trk->dts_shift == AV_NOPTS_VALUE)
> +                trk->dts_shift = pkt->pts - pkt->dts;
>

Do you care about an issue of negative composition time offset on early
flush of movie fragments? Reordering of leading samples could confuse
demuxers due to the non-zero cts of the first sample and no examination
about subsequent samples. This can be occured when starting to remux from
Open-GOP boundary (also, don't forget that AVC and HEVC can output P or B
pictures before IDR picture).


> +            pkt->dts += trk->dts_shift;
> +        }
> +
>          if (!pkt->size) {
>              if (trk->start_dts == AV_NOPTS_VALUE && trk->frag_discont) {
>                  trk->start_dts = pkt->dts;
> @@ -4095,6 +4110,7 @@ static int mov_write_header(AVFormatContext *s)
>          track->start_dts  = AV_NOPTS_VALUE;
>          track->start_cts  = AV_NOPTS_VALUE;
>          track->end_pts    = AV_NOPTS_VALUE;
> +        track->dts_shift  = AV_NOPTS_VALUE;
>          if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
>              if (track->tag == MKTAG('m','x','3','p') || track->tag ==
> MKTAG('m','x','3','n') ||
>                  track->tag == MKTAG('m','x','4','p') || track->tag ==
> MKTAG('m','x','4','n') ||
> diff --git a/libavformat/movenc.h b/libavformat/movenc.h
> index f4ed188..008f467 100644
> --- a/libavformat/movenc.h
> +++ b/libavformat/movenc.h
> @@ -107,6 +107,7 @@ typedef struct MOVTrack {
>      int64_t     start_cts;
>      int64_t     end_pts;
>      int         end_reliable;
> +    int64_t     dts_shift;
>
>      int         hint_track;   ///< the track that hints this track, -1 if
> no hint track is set
>      int         src_track;    ///< the track that this hint track
> describes
> @@ -195,6 +196,7 @@ typedef struct MOVMuxContext {
>  #define FF_MOV_FLAG_DELAY_MOOV            (1 << 13)
>  #define FF_MOV_FLAG_GLOBAL_SIDX           (1 << 14)
>  #define FF_MOV_FLAG_SKIP_TRAILER          (1 << 15)
> +#define FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS  (1 << 16)
>
>  int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt);
>
> --
> 2.10.1 (Apple Git-78)
>
> _______________________________________________
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel
Martin Storsjö Feb. 23, 2017, 11:27 a.m. | #3
On Thu, 23 Feb 2017, Yusuke Nakamura wrote:

> 2017-02-20 6:22 GMT+09:00 Martin Storsjö <martin@martin.st>:
>
>> This reduces the need for an edit list; streams that start with
>> e.g. dts=-1, pts=0 can be encoded as dts=0, pts=0 (which is valid
>> in mov/mp4) by shifting the dts values of all packets forward.
>> This avoids the need for edit lists for such streams (while they
>> still are needed for audio streams with encoder delay).
>> ---
>>  libavformat/movenc.c | 24 ++++++++++++++++++++----
>>  libavformat/movenc.h |  2 ++
>>  2 files changed, 22 insertions(+), 4 deletions(-)
>>
>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>> index 840190d..713c145 100644
>> --- a/libavformat/movenc.c
>> +++ b/libavformat/movenc.c
>> @@ -62,6 +62,7 @@ static const AVOption options[] = {
>>      { "delay_moov", "Delay writing the initial moov until the first
>> fragment is cut, or until the first fragment flush", 0, AV_OPT_TYPE_CONST,
>> {.i64 = FF_MOV_FLAG_DELAY_MOOV}, INT_MIN, INT_MAX,
>> AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
>>      { "global_sidx", "Write a global sidx index at the start of the
>> file", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_GLOBAL_SIDX}, INT_MIN,
>> INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
>>      { "skip_trailer", "Skip writing the mfra/tfra/mfro trailer for
>> fragmented files", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_SKIP_TRAILER},
>> INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
>> +    { "negative_cts_offsets", "Use negative CTS offsets (reducing the
>> need for edit lists)", 0, AV_OPT_TYPE_CONST, {.i64 =
>> FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS}, INT_MIN, INT_MAX,
>> AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
>>      FF_RTP_FLAG_OPTS(MOVMuxContext, rtp_flags),
>>      { "skip_iods", "Skip writing iods atom.", offsetof(MOVMuxContext,
>> iods_skip), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
>>      { "iods_audio_profile", "iods audio profile atom.",
>> offsetof(MOVMuxContext, iods_audio_profile), AV_OPT_TYPE_INT, {.i64 = -1},
>> -1, 255, AV_OPT_FLAG_ENCODING_PARAM},
>> @@ -1163,8 +1164,9 @@ static int mov_write_stsd_tag(AVFormatContext *s,
>> AVIOContext *pb, MOVTrack *tra
>>      return update_size(pb, pos);
>>  }
>>
>> -static int mov_write_ctts_tag(AVIOContext *pb, MOVTrack *track)
>> +static int mov_write_ctts_tag(AVFormatContext *s, AVIOContext *pb,
>> MOVTrack *track)
>>  {
>> +    MOVMuxContext *mov = s->priv_data;
>>      MOVStts *ctts_entries;
>>      uint32_t entries = 0;
>>      uint32_t atom_size;
>> @@ -1188,7 +1190,11 @@ static int mov_write_ctts_tag(AVIOContext *pb,
>> MOVTrack *track)
>>      atom_size = 16 + (entries * 8);
>>      avio_wb32(pb, atom_size); /* size */
>>      ffio_wfourcc(pb, "ctts");
>> -    avio_wb32(pb, 0); /* version & flags */
>> +    if (mov->flags & FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS)
>> +        avio_w8(pb, 1); /* version */
>>
>
> ctts ver. 1 is defined in iso4 or later isobmff brands.

Thanks, will change so that we declare iso4 as major brand if this flag is 
set (unless some other option is set that requires declaring iso5).

>> +    else
>> +        avio_w8(pb, 0); /* version */
>> +    avio_wb24(pb, 0); /* flags */
>>      avio_wb32(pb, entries); /* entry count */
>>      for (i = 0; i < entries; i++) {
>>          avio_wb32(pb, ctts_entries[i].count);
>> @@ -1273,7 +1279,7 @@ static int mov_write_stbl_tag(AVFormatContext *s,
>> AVIOContext *pb, MOVTrack *tra
>>          mov_write_stss_tag(pb, track, MOV_PARTIAL_SYNC_SAMPLE);
>>      if (track->par->codec_type == AVMEDIA_TYPE_VIDEO &&
>>          track->flags & MOV_TRACK_CTTS && track->entry)
>> -        mov_write_ctts_tag(pb, track);
>> +        mov_write_ctts_tag(s, pb, track);
>>      mov_write_stsc_tag(pb, track);
>>      mov_write_stsz_tag(pb, track);
>>      mov_write_stco_tag(pb, track);
>> @@ -2594,7 +2600,10 @@ static int mov_write_trun_tag(AVIOContext *pb,
>> MOVMuxContext *mov,
>>
>>      avio_wb32(pb, 0); /* size placeholder */
>>      ffio_wfourcc(pb, "trun");
>> -    avio_w8(pb, 0); /* version */
>> +    if (mov->flags & FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS)
>> +        avio_w8(pb, 1); /* version */
>> +    else
>> +        avio_w8(pb, 0); /* version */
>>      avio_wb24(pb, flags);
>>
>>      avio_wb32(pb, end - first); /* sample count */
>> @@ -3729,6 +3738,12 @@ static int mov_write_packet(AVFormatContext *s,
>> AVPacket *pkt)
>>              mov->flags &= ~FF_MOV_FLAG_FRAG_DISCONT;
>>          }
>>
>> +        if (mov->flags & FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS) {
>> +            if (trk->dts_shift == AV_NOPTS_VALUE)
>> +                trk->dts_shift = pkt->pts - pkt->dts;
>>
>
> Do you care about an issue of negative composition time offset on early
> flush of movie fragments? Reordering of leading samples could confuse
> demuxers due to the non-zero cts of the first sample and no examination
> about subsequent samples. This can be occured when starting to remux from
> Open-GOP boundary (also, don't forget that AVC and HEVC can output P or B
> pictures before IDR picture).

Good point - I hadn't thought about that. In those cases, we won't get 
exactly the desired result. On the other hand, I don't have any better 
idea on heuristics that would do the right thing either. So I'd declare 
that as a known limitation (or just recommend not to enable this flag at 
all for those cases).

// Martin

Patch

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 840190d..713c145 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -62,6 +62,7 @@  static const AVOption options[] = {
     { "delay_moov", "Delay writing the initial moov until the first fragment is cut, or until the first fragment flush", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_DELAY_MOOV}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "global_sidx", "Write a global sidx index at the start of the file", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_GLOBAL_SIDX}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "skip_trailer", "Skip writing the mfra/tfra/mfro trailer for fragmented files", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_SKIP_TRAILER}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
+    { "negative_cts_offsets", "Use negative CTS offsets (reducing the need for edit lists)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     FF_RTP_FLAG_OPTS(MOVMuxContext, rtp_flags),
     { "skip_iods", "Skip writing iods atom.", offsetof(MOVMuxContext, iods_skip), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { "iods_audio_profile", "iods audio profile atom.", offsetof(MOVMuxContext, iods_audio_profile), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 255, AV_OPT_FLAG_ENCODING_PARAM},
@@ -1163,8 +1164,9 @@  static int mov_write_stsd_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
     return update_size(pb, pos);
 }
 
-static int mov_write_ctts_tag(AVIOContext *pb, MOVTrack *track)
+static int mov_write_ctts_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
 {
+    MOVMuxContext *mov = s->priv_data;
     MOVStts *ctts_entries;
     uint32_t entries = 0;
     uint32_t atom_size;
@@ -1188,7 +1190,11 @@  static int mov_write_ctts_tag(AVIOContext *pb, MOVTrack *track)
     atom_size = 16 + (entries * 8);
     avio_wb32(pb, atom_size); /* size */
     ffio_wfourcc(pb, "ctts");
-    avio_wb32(pb, 0); /* version & flags */
+    if (mov->flags & FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS)
+        avio_w8(pb, 1); /* version */
+    else
+        avio_w8(pb, 0); /* version */
+    avio_wb24(pb, 0); /* flags */
     avio_wb32(pb, entries); /* entry count */
     for (i = 0; i < entries; i++) {
         avio_wb32(pb, ctts_entries[i].count);
@@ -1273,7 +1279,7 @@  static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
         mov_write_stss_tag(pb, track, MOV_PARTIAL_SYNC_SAMPLE);
     if (track->par->codec_type == AVMEDIA_TYPE_VIDEO &&
         track->flags & MOV_TRACK_CTTS && track->entry)
-        mov_write_ctts_tag(pb, track);
+        mov_write_ctts_tag(s, pb, track);
     mov_write_stsc_tag(pb, track);
     mov_write_stsz_tag(pb, track);
     mov_write_stco_tag(pb, track);
@@ -2594,7 +2600,10 @@  static int mov_write_trun_tag(AVIOContext *pb, MOVMuxContext *mov,
 
     avio_wb32(pb, 0); /* size placeholder */
     ffio_wfourcc(pb, "trun");
-    avio_w8(pb, 0); /* version */
+    if (mov->flags & FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS)
+        avio_w8(pb, 1); /* version */
+    else
+        avio_w8(pb, 0); /* version */
     avio_wb24(pb, flags);
 
     avio_wb32(pb, end - first); /* sample count */
@@ -3729,6 +3738,12 @@  static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
             mov->flags &= ~FF_MOV_FLAG_FRAG_DISCONT;
         }
 
+        if (mov->flags & FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS) {
+            if (trk->dts_shift == AV_NOPTS_VALUE)
+                trk->dts_shift = pkt->pts - pkt->dts;
+            pkt->dts += trk->dts_shift;
+        }
+
         if (!pkt->size) {
             if (trk->start_dts == AV_NOPTS_VALUE && trk->frag_discont) {
                 trk->start_dts = pkt->dts;
@@ -4095,6 +4110,7 @@  static int mov_write_header(AVFormatContext *s)
         track->start_dts  = AV_NOPTS_VALUE;
         track->start_cts  = AV_NOPTS_VALUE;
         track->end_pts    = AV_NOPTS_VALUE;
+        track->dts_shift  = AV_NOPTS_VALUE;
         if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
             if (track->tag == MKTAG('m','x','3','p') || track->tag == MKTAG('m','x','3','n') ||
                 track->tag == MKTAG('m','x','4','p') || track->tag == MKTAG('m','x','4','n') ||
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index f4ed188..008f467 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -107,6 +107,7 @@  typedef struct MOVTrack {
     int64_t     start_cts;
     int64_t     end_pts;
     int         end_reliable;
+    int64_t     dts_shift;
 
     int         hint_track;   ///< the track that hints this track, -1 if no hint track is set
     int         src_track;    ///< the track that this hint track describes
@@ -195,6 +196,7 @@  typedef struct MOVMuxContext {
 #define FF_MOV_FLAG_DELAY_MOOV            (1 << 13)
 #define FF_MOV_FLAG_GLOBAL_SIDX           (1 << 14)
 #define FF_MOV_FLAG_SKIP_TRAILER          (1 << 15)
+#define FF_MOV_FLAG_NEGATIVE_CTS_OFFSETS  (1 << 16)
 
 int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt);