movenc: factor initial_padding into edit lists

Message ID 20170219222310.25201-1-stebbins@jetheaddev.com
State New
Headers show

Commit Message

John Stebbins Feb. 19, 2017, 10:23 p.m.
initial_padding was getting added to the edit list indirectly due to
initial negative dts.  But in cases where the audio is delayed,
all or part of initial_padding would be unaccounted for.  This patch
makes initial_padding explicit.
---
 libavformat/movenc.c | 53 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 15 deletions(-)

Comments

John Stebbins Feb. 20, 2017, 5:05 p.m. | #1
On 02/19/2017 03:23 PM, John Stebbins wrote:
> initial_padding was getting added to the edit list indirectly due to
> initial negative dts.  But in cases where the audio is delayed,
> all or part of initial_padding would be unaccounted for.  This patch
> makes initial_padding explicit.
> ---
>  libavformat/movenc.c | 53 +++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 38 insertions(+), 15 deletions(-)
>
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index 689291d..1f29716 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -1698,9 +1698,28 @@ static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
>                                        track->timescale, AV_ROUND_UP);
>      int version = duration < INT32_MAX ? 0 : 1;
>      int entry_size, entry_count, size;
> -    int64_t delay, start_ct = track->start_cts;
> -    delay = av_rescale_rnd(track->start_dts + start_ct, MOV_TIMESCALE,
> +    int64_t delay;
> +    int64_t mediatime;
> +    int64_t skip = 0;
> +
> +    delay = av_rescale_rnd(track->start_dts + track->start_cts, MOV_TIMESCALE,
>                             track->timescale, AV_ROUND_DOWN);
> +
> +    if (track->par->codec_type == AVMEDIA_TYPE_AUDIO &&
> +        track->par->initial_padding > 0) {
> +        /* Adjust delay so that initial_padding gets recorded in the
> +         * MediaTime of an edit list entry even in the case that
> +         * delay is positive. I.e. we don't want initial_padding to be
> +         * absorbed and hidden in the delay. MediaTime must contain
> +         * initial_padding in order to know where the actual media
> +         * timeline begins. A player should drop samples until MediaTime
> +         * is reached */
> +        delay += av_rescale_rnd(track->par->initial_padding, MOV_TIMESCALE,
> +                                track->par->sample_rate, AV_ROUND_DOWN);
> +        skip = av_rescale_rnd(track->par->initial_padding,
> +                              track->timescale,
> +                              track->par->sample_rate, AV_ROUND_DOWN);
> +    }
>      version |= delay < INT32_MAX ? 0 : 1;
>  
>      entry_size = (version == 1) ? 20 : 12;
> @@ -1731,33 +1750,37 @@ static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
>          }
>          avio_wb32(pb, 0x00010000);
>      } else {
> -        /* Avoid accidentally ending up with start_ct = -1 which has got a
> -         * special meaning. Normally start_ct should end up positive or zero
> -         * here, but use FFMIN in case dts is a a small positive integer
> -         * rounded to 0 when represented in MOV_TIMESCALE units. */
> -        start_ct  = -FFMIN(track->start_dts, 0);
> -        /* Note, this delay is calculated from the pts of the first sample,
> -         * ensuring that we don't reduce the duration for cases with
> -         * dts<0 pts=0. */
> -        duration += delay;
> +        /* Avoid accidentally ending up with mediatime = -1 which has got a
> +         * special meaning. skip and -track->start_dts are guaranteed to be
> +         * positive here, so it is not possible mediatime to be -1 */
> +        skip = FFMAX(skip, -track->start_dts);

I was doing some additional testing (or rather I recruited someone to help me test ;) and found an error in my math. 
The above calculation should be

skip = FFMAX(skip, -track->start_dts - track->start_cts);

Sending an update to fix this.

Patch

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 689291d..1f29716 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1698,9 +1698,28 @@  static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
                                       track->timescale, AV_ROUND_UP);
     int version = duration < INT32_MAX ? 0 : 1;
     int entry_size, entry_count, size;
-    int64_t delay, start_ct = track->start_cts;
-    delay = av_rescale_rnd(track->start_dts + start_ct, MOV_TIMESCALE,
+    int64_t delay;
+    int64_t mediatime;
+    int64_t skip = 0;
+
+    delay = av_rescale_rnd(track->start_dts + track->start_cts, MOV_TIMESCALE,
                            track->timescale, AV_ROUND_DOWN);
+
+    if (track->par->codec_type == AVMEDIA_TYPE_AUDIO &&
+        track->par->initial_padding > 0) {
+        /* Adjust delay so that initial_padding gets recorded in the
+         * MediaTime of an edit list entry even in the case that
+         * delay is positive. I.e. we don't want initial_padding to be
+         * absorbed and hidden in the delay. MediaTime must contain
+         * initial_padding in order to know where the actual media
+         * timeline begins. A player should drop samples until MediaTime
+         * is reached */
+        delay += av_rescale_rnd(track->par->initial_padding, MOV_TIMESCALE,
+                                track->par->sample_rate, AV_ROUND_DOWN);
+        skip = av_rescale_rnd(track->par->initial_padding,
+                              track->timescale,
+                              track->par->sample_rate, AV_ROUND_DOWN);
+    }
     version |= delay < INT32_MAX ? 0 : 1;
 
     entry_size = (version == 1) ? 20 : 12;
@@ -1731,33 +1750,37 @@  static int mov_write_edts_tag(AVIOContext *pb, MOVMuxContext *mov,
         }
         avio_wb32(pb, 0x00010000);
     } else {
-        /* Avoid accidentally ending up with start_ct = -1 which has got a
-         * special meaning. Normally start_ct should end up positive or zero
-         * here, but use FFMIN in case dts is a a small positive integer
-         * rounded to 0 when represented in MOV_TIMESCALE units. */
-        start_ct  = -FFMIN(track->start_dts, 0);
-        /* Note, this delay is calculated from the pts of the first sample,
-         * ensuring that we don't reduce the duration for cases with
-         * dts<0 pts=0. */
-        duration += delay;
+        /* Avoid accidentally ending up with mediatime = -1 which has got a
+         * special meaning. skip and -track->start_dts are guaranteed to be
+         * positive here, so it is not possible mediatime to be -1 */
+        skip = FFMAX(skip, -track->start_dts);
     }
+    mediatime = skip + track->start_cts;
+
+    /* skip is the duration of the media segment that will be dropped
+     * during playback when an edit entry is applied.  The edit entry
+     * duration must be reduced by this amount. */
+    duration -= av_rescale_rnd(skip, MOV_TIMESCALE,
+                               track->timescale, AV_ROUND_UP);
 
     /* For fragmented files, we don't know the full length yet. Setting
      * duration to 0 allows us to only specify the offset, including
      * the rest of the content (from all future fragments) without specifying
      * an explicit duration. */
-    if (mov->flags & FF_MOV_FLAG_FRAGMENT)
+    if (mov->flags & FF_MOV_FLAG_FRAGMENT || duration < 0)
         duration = 0;
 
-    /* duration */
+    /* add edit entry that defines the presentation time of the first
+     * sample to render during playback and the duration of the segment */
     if (version == 1) {
         avio_wb64(pb, duration);
-        avio_wb64(pb, start_ct);
+        avio_wb64(pb, mediatime);
     } else {
         avio_wb32(pb, duration);
-        avio_wb32(pb, start_ct);
+        avio_wb32(pb, mediatime);
     }
     avio_wb32(pb, 0x00010000);
+
     return size;
 }