[1/4] movenc: factor initial_padding into edit lists

Message ID 20170224202416.13106-1-stebbins@jetheaddev.com
State New
Headers show

Commit Message

John Stebbins Feb. 24, 2017, 8:24 p.m.
initial_padding was getting added to the edit list indirectly due to
initial negative dts.  But in cases where the audio is delayed,
all or part of initial_padding would be unaccounted for.  This patch
makes initial_padding explicit.
---
 libavformat/movenc.c | 60 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 17 deletions(-)

Comments

Martin Storsjö Feb. 24, 2017, 9:16 p.m. | #1
On Fri, 24 Feb 2017, John Stebbins wrote:

> initial_padding was getting added to the edit list indirectly due to
> initial negative dts.  But in cases where the audio is delayed,
> all or part of initial_padding would be unaccounted for.  This patch
> makes initial_padding explicit.
> ---
> libavformat/movenc.c | 60 +++++++++++++++++++++++++++++++++++++---------------
> 1 file changed, 43 insertions(+), 17 deletions(-)
>
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index 840190d..4d351b7 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -1637,9 +1637,31 @@ static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
>                                       track->timescale, AV_ROUND_UP);
>     int version = duration < INT32_MAX ? 0 : 1;
>     int entry_size, entry_count, size;
> -    int64_t delay, start_ct = track->start_cts;
> -    delay = av_rescale_rnd(track->start_dts + start_ct, MOV_TIMESCALE,
> -                           track->timescale, AV_ROUND_DOWN);
> +    int64_t delay;
> +    int64_t mediatime;
> +    int64_t skip = 0;
> +
> +    delay = track->start_dts + track->start_cts;
> +
> +    if (track->par->codec_type == AVMEDIA_TYPE_AUDIO &&
> +        track->par->initial_padding > 0) {
> +        /* Adjust delay so that initial_padding gets recorded in the
> +         * MediaTime of an edit list entry even in the case that
> +         * delay is positive. I.e. we don't want initial_padding to be
> +         * absorbed and hidden in the delay. MediaTime must contain
> +         * initial_padding in order to know where the actual media
> +         * timeline begins. A player should drop samples until MediaTime
> +         * is reached */
> +        delay += av_rescale_rnd(track->par->initial_padding, track->timescale,
> +                                track->par->sample_rate, AV_ROUND_UP);
> +        skip = av_rescale_rnd(track->par->initial_padding,
> +                              track->timescale,
> +                              track->par->sample_rate, AV_ROUND_DOWN);
> +    }
> +    /* rescale delay, this was not done earlier to minimize rounding errors */
> +    delay = av_rescale_rnd(delay, MOV_TIMESCALE,
> +                           track->timescale, AV_ROUND_NEAR_INF);
> +
>     version |= delay < INT32_MAX ? 0 : 1;
>
>     entry_size = (version == 1) ? 20 : 12;
> @@ -1670,33 +1692,35 @@ static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
>         }
>         avio_wb32(pb, 0x00010000);
>     } else {
> -        /* Avoid accidentally ending up with start_ct = -1 which has got a
> -         * special meaning. Normally start_ct should end up positive or zero
> -         * here, but use FFMIN in case dts is a a small positive integer
> -         * rounded to 0 when represented in MOV_TIMESCALE units. */
> -        start_ct  = -FFMIN(track->start_dts, 0);
> -        /* Note, this delay is calculated from the pts of the first sample,
> -         * ensuring that we don't reduce the duration for cases with
> -         * dts<0 pts=0. */
> -        duration += delay;
> +        /* Skip the larger of initial_padding or the initial -PTS. */
> +        skip = FFMAX(skip, -track->start_dts - track->start_cts);
>     }
> +    mediatime = skip + track->start_cts;
> +
> +    /* skip is the duration of the media segment that will be dropped
> +     * during playback when an edit entry is applied.  The edit entry
> +     * duration must be reduced by this amount. */
> +    duration -= av_rescale_rnd(skip, MOV_TIMESCALE,
> +                               track->timescale, AV_ROUND_UP);
>
>     /* For fragmented files, we don't know the full length yet. Setting
>      * duration to 0 allows us to only specify the offset, including
>      * the rest of the content (from all future fragments) without specifying
>      * an explicit duration. */
> -    if (mov->flags & FF_MOV_FLAG_FRAGMENT)
> +    if (mov->flags & FF_MOV_FLAG_FRAGMENT || duration < 0)
>         duration = 0;
> 
> -    /* duration */
> +    /* add edit entry that defines the presentation time of the first
> +     * sample to render during playback and the duration of the segment */
>     if (version == 1) {
>         avio_wb64(pb, duration);
> -        avio_wb64(pb, start_ct);
> +        avio_wb64(pb, mediatime);
>     } else {
>         avio_wb32(pb, duration);
> -        avio_wb32(pb, start_ct);
> +        avio_wb32(pb, mediatime);
>     }
>     avio_wb32(pb, 0x00010000);
> +
>     return size;
> }
> 
> @@ -1806,7 +1830,9 @@ static int mov_write_trak_tag(AVIOContext *pb, MOVMuxContext *mov,
>     mov_write_tkhd_tag(pb, mov, track, st);
>     if (track->start_dts != AV_NOPTS_VALUE &&
>         (track->mode == MODE_PSP || track->flags & MOV_TRACK_CTTS ||
> -        track->start_dts || is_clcp_track(track))) {
> +        track->start_dts || is_clcp_track(track) ||
> +        (track->par->codec_type == AVMEDIA_TYPE_AUDIO &&
> +         track->par->initial_padding > 0))) {
>         if (mov->use_editlist)
>             mov_write_edts_tag(pb, mov, track);  // PSP Movies require edts box
>         else if ((track->entry && track->cluster[0].dts) || track->mode == MODE_PSP || is_clcp_track(track))
> -- 
> 2.9.3

Ok

// Martin

Patch

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 840190d..4d351b7 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1637,9 +1637,31 @@  static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
                                       track->timescale, AV_ROUND_UP);
     int version = duration < INT32_MAX ? 0 : 1;
     int entry_size, entry_count, size;
-    int64_t delay, start_ct = track->start_cts;
-    delay = av_rescale_rnd(track->start_dts + start_ct, MOV_TIMESCALE,
-                           track->timescale, AV_ROUND_DOWN);
+    int64_t delay;
+    int64_t mediatime;
+    int64_t skip = 0;
+
+    delay = track->start_dts + track->start_cts;
+
+    if (track->par->codec_type == AVMEDIA_TYPE_AUDIO &&
+        track->par->initial_padding > 0) {
+        /* Adjust delay so that initial_padding gets recorded in the
+         * MediaTime of an edit list entry even in the case that
+         * delay is positive. I.e. we don't want initial_padding to be
+         * absorbed and hidden in the delay. MediaTime must contain
+         * initial_padding in order to know where the actual media
+         * timeline begins. A player should drop samples until MediaTime
+         * is reached */
+        delay += av_rescale_rnd(track->par->initial_padding, track->timescale,
+                                track->par->sample_rate, AV_ROUND_UP);
+        skip = av_rescale_rnd(track->par->initial_padding,
+                              track->timescale,
+                              track->par->sample_rate, AV_ROUND_DOWN);
+    }
+    /* rescale delay, this was not done earlier to minimize rounding errors */
+    delay = av_rescale_rnd(delay, MOV_TIMESCALE,
+                           track->timescale, AV_ROUND_NEAR_INF);
+
     version |= delay < INT32_MAX ? 0 : 1;
 
     entry_size = (version == 1) ? 20 : 12;
@@ -1670,33 +1692,35 @@  static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
         }
         avio_wb32(pb, 0x00010000);
     } else {
-        /* Avoid accidentally ending up with start_ct = -1 which has got a
-         * special meaning. Normally start_ct should end up positive or zero
-         * here, but use FFMIN in case dts is a a small positive integer
-         * rounded to 0 when represented in MOV_TIMESCALE units. */
-        start_ct  = -FFMIN(track->start_dts, 0);
-        /* Note, this delay is calculated from the pts of the first sample,
-         * ensuring that we don't reduce the duration for cases with
-         * dts<0 pts=0. */
-        duration += delay;
+        /* Skip the larger of initial_padding or the initial -PTS. */
+        skip = FFMAX(skip, -track->start_dts - track->start_cts);
     }
+    mediatime = skip + track->start_cts;
+
+    /* skip is the duration of the media segment that will be dropped
+     * during playback when an edit entry is applied.  The edit entry
+     * duration must be reduced by this amount. */
+    duration -= av_rescale_rnd(skip, MOV_TIMESCALE,
+                               track->timescale, AV_ROUND_UP);
 
     /* For fragmented files, we don't know the full length yet. Setting
      * duration to 0 allows us to only specify the offset, including
      * the rest of the content (from all future fragments) without specifying
      * an explicit duration. */
-    if (mov->flags & FF_MOV_FLAG_FRAGMENT)
+    if (mov->flags & FF_MOV_FLAG_FRAGMENT || duration < 0)
         duration = 0;
 
-    /* duration */
+    /* add edit entry that defines the presentation time of the first
+     * sample to render during playback and the duration of the segment */
     if (version == 1) {
         avio_wb64(pb, duration);
-        avio_wb64(pb, start_ct);
+        avio_wb64(pb, mediatime);
     } else {
         avio_wb32(pb, duration);
-        avio_wb32(pb, start_ct);
+        avio_wb32(pb, mediatime);
     }
     avio_wb32(pb, 0x00010000);
+
     return size;
 }
 
@@ -1806,7 +1830,9 @@  static int mov_write_trak_tag(AVIOContext *pb, MOVMuxContext *mov,
     mov_write_tkhd_tag(pb, mov, track, st);
     if (track->start_dts != AV_NOPTS_VALUE &&
         (track->mode == MODE_PSP || track->flags & MOV_TRACK_CTTS ||
-        track->start_dts || is_clcp_track(track))) {
+        track->start_dts || is_clcp_track(track) ||
+        (track->par->codec_type == AVMEDIA_TYPE_AUDIO &&
+         track->par->initial_padding > 0))) {
         if (mov->use_editlist)
             mov_write_edts_tag(pb, mov, track);  // PSP Movies require edts box
         else if ((track->entry && track->cluster[0].dts) || track->mode == MODE_PSP || is_clcp_track(track))