[3/4] mov: fix edit list issue that can cause A/V desync

Message ID 20170224202416.13106-3-stebbins@jetheaddev.com
State New
Headers show

Commit Message

John Stebbins Feb. 24, 2017, 8:24 p.m.
Only the first entry in the edit list was factored into the time_offset
of the first sample in a track.  But when there is a delay (empty edit
entry) the mediatime from the second entry must also be factored into
the time_offset.
---
 libavformat/isom.h |  2 ++
 libavformat/mov.c  | 30 ++++++++++++++++++++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

Comments

Robert Krüger Feb. 25, 2017, 12:17 p.m. | #1
On Fri, Feb 24, 2017 at 9:24 PM, John Stebbins <stebbins@jetheaddev.com>
wrote:

> Only the first entry in the edit list was factored into the time_offset
> of the first sample in a track.  But when there is a delay (empty edit
> entry) the mediatime from the second entry must also be factored into
> the time_offset.
> ---
>  libavformat/isom.h |  2 ++
>  libavformat/mov.c  | 30 ++++++++++++++++++++++--------
>  2 files changed, 24 insertions(+), 8 deletions(-)
>
> diff --git a/libavformat/isom.h b/libavformat/isom.h
> index 8cc5ab7..7c345e9 100644
> --- a/libavformat/isom.h
> +++ b/libavformat/isom.h
> @@ -125,6 +125,8 @@ typedef struct MOVStreamContext {
>      int *keyframes;
>      int time_scale;
>      int64_t time_offset;  ///< time offset of the first edit list entry
> +    int64_t time_offset_delay;
> +    int64_t time_offset_skip;
>      int current_sample;
>      unsigned int bytes_per_frame;
>      unsigned int samples_per_frame;
> diff --git a/libavformat/mov.c b/libavformat/mov.c
> index 5c9f85c..1657647 100644
> --- a/libavformat/mov.c
> +++ b/libavformat/mov.c
> @@ -2323,10 +2323,10 @@ static void mov_build_index(MOVContext *mov,
> AVStream *st)
>      uint64_t stream_size = 0;
>
>      /* adjust first dts according to edit list */
> -    if (sc->time_offset && mov->time_scale > 0) {
> -        if (sc->time_offset < 0)
> -            sc->time_offset = av_rescale(sc->time_offset, sc->time_scale,
> mov->time_scale);
> -        current_dts = -sc->time_offset;
> +    if (mov->time_scale > 0) {
> +        sc->time_offset = av_rescale(sc->time_offset_delay,
> sc->time_scale,
> +                                     mov->time_scale) -
> sc->time_offset_skip;
> +        current_dts = sc->time_offset;
>      }
>
>      /* only use old uncompressed audio chunk demuxing when stts specifies
> it */
> @@ -2999,6 +2999,10 @@ static int mov_read_trun(MOVContext *c, AVIOContext
> *pb, MOVAtom atom)
>      sc = st->priv_data;
>      if (sc->pseudo_stream_id+1 != frag->stsd_id)
>          return 0;
> +    if (c->time_scale > 0) {
> +        sc->time_offset = av_rescale(sc->time_offset_delay,
> sc->time_scale,
> +                                     c->time_scale) -
> sc->time_offset_skip;
> +    }
>      avio_r8(pb); /* version */
>      flags = avio_rb24(pb);
>      entries = avio_rb32(pb);
> @@ -3029,7 +3033,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext
> *pb, MOVAtom atom)
>      }
>      if (flags & MOV_TRUN_DATA_OFFSET)        data_offset        =
> avio_rb32(pb);
>      if (flags & MOV_TRUN_FIRST_SAMPLE_FLAGS) first_sample_flags =
> avio_rb32(pb);
> -    dts    = sc->track_end - sc->time_offset;
> +    dts    = sc->track_end + sc->time_offset;
>      offset = frag->base_data_offset + data_offset;
>      distance = 0;
>      av_log(c->fc, AV_LOG_TRACE, "first sample flags 0x%x\n",
> first_sample_flags);
> @@ -3069,7 +3073,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext
> *pb, MOVAtom atom)
>          return AVERROR_EOF;
>
>      frag->implicit_offset = offset;
> -    st->duration = sc->track_end = dts + sc->time_offset;
> +    st->duration = sc->track_end = dts - sc->time_offset;
>      return 0;
>  }
>
> @@ -3152,6 +3156,7 @@ static int mov_read_elst(MOVContext *c, AVIOContext
> *pb, MOVAtom atom)
>  {
>      MOVStreamContext *sc;
>      int i, edit_count, version;
> +    int time_offset_done = 0;
>
>      if (c->fc->nb_streams < 1)
>          return 0;
> @@ -3175,8 +3180,17 @@ static int mov_read_elst(MOVContext *c, AVIOContext
> *pb, MOVAtom atom)
>              time     = (int32_t)avio_rb32(pb); /* media time */
>          }
>          avio_rb32(pb); /* Media rate */
> -        if (i == 0 && time >= -1) {
> -            sc->time_offset = time != -1 ? time : -duration;
> +        if (!time_offset_done) {
> +            if (time == -1) {
> +                /* delay is in movie timescale */
> +                sc->time_offset_delay += duration;
> +            } else if (time >= 0) {
> +                /* samples to skip is in track timescale */
> +                sc->time_offset_skip = time;
> +                time_offset_done = 1;
> +            }
> +            /* timescales may not be known yet, so we can not compute
> +             * a single combined time_offset yet */
>          }
>      }
>
> --
> 2.9.3
>

do you by any chance know if that fixes an issue with open-gop XDCAM? I
haven't gotten to making a bug report but the recent big edit list changes
have introduced a regression there for us (causing a/v desync when remuxing
such files). I will probably test this again anyway but was curious if that
was the motivation for the patch.
John Stebbins Feb. 25, 2017, 6:23 p.m. | #2
On 02/25/2017 05:17 AM, Robert Krüger wrote:
> On Fri, Feb 24, 2017 at 9:24 PM, John Stebbins <stebbins@jetheaddev.com>
> wrote:
>
>> Only the first entry in the edit list was factored into the time_offset
>> of the first sample in a track.  But when there is a delay (empty edit
>> entry) the mediatime from the second entry must also be factored into
>> the time_offset.
>> ---
>>  libavformat/isom.h |  2 ++
>>  libavformat/mov.c  | 30 ++++++++++++++++++++++--------
>>  2 files changed, 24 insertions(+), 8 deletions(-)
>>
>> diff --git a/libavformat/isom.h b/libavformat/isom.h
>> index 8cc5ab7..7c345e9 100644
>> --- a/libavformat/isom.h
>> +++ b/libavformat/isom.h
>> @@ -125,6 +125,8 @@ typedef struct MOVStreamContext {
>>      int *keyframes;
>>      int time_scale;
>>      int64_t time_offset;  ///< time offset of the first edit list entry
>> +    int64_t time_offset_delay;
>> +    int64_t time_offset_skip;
>>      int current_sample;
>>      unsigned int bytes_per_frame;
>>      unsigned int samples_per_frame;
>> diff --git a/libavformat/mov.c b/libavformat/mov.c
>> index 5c9f85c..1657647 100644
>> --- a/libavformat/mov.c
>> +++ b/libavformat/mov.c
>> @@ -2323,10 +2323,10 @@ static void mov_build_index(MOVContext *mov,
>> AVStream *st)
>>      uint64_t stream_size = 0;
>>
>>      /* adjust first dts according to edit list */
>> -    if (sc->time_offset && mov->time_scale > 0) {
>> -        if (sc->time_offset < 0)
>> -            sc->time_offset = av_rescale(sc->time_offset, sc->time_scale,
>> mov->time_scale);
>> -        current_dts = -sc->time_offset;
>> +    if (mov->time_scale > 0) {
>> +        sc->time_offset = av_rescale(sc->time_offset_delay,
>> sc->time_scale,
>> +                                     mov->time_scale) -
>> sc->time_offset_skip;
>> +        current_dts = sc->time_offset;
>>      }
>>
>>      /* only use old uncompressed audio chunk demuxing when stts specifies
>> it */
>> @@ -2999,6 +2999,10 @@ static int mov_read_trun(MOVContext *c, AVIOContext
>> *pb, MOVAtom atom)
>>      sc = st->priv_data;
>>      if (sc->pseudo_stream_id+1 != frag->stsd_id)
>>          return 0;
>> +    if (c->time_scale > 0) {
>> +        sc->time_offset = av_rescale(sc->time_offset_delay,
>> sc->time_scale,
>> +                                     c->time_scale) -
>> sc->time_offset_skip;
>> +    }
>>      avio_r8(pb); /* version */
>>      flags = avio_rb24(pb);
>>      entries = avio_rb32(pb);
>> @@ -3029,7 +3033,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext
>> *pb, MOVAtom atom)
>>      }
>>      if (flags & MOV_TRUN_DATA_OFFSET)        data_offset        =
>> avio_rb32(pb);
>>      if (flags & MOV_TRUN_FIRST_SAMPLE_FLAGS) first_sample_flags =
>> avio_rb32(pb);
>> -    dts    = sc->track_end - sc->time_offset;
>> +    dts    = sc->track_end + sc->time_offset;
>>      offset = frag->base_data_offset + data_offset;
>>      distance = 0;
>>      av_log(c->fc, AV_LOG_TRACE, "first sample flags 0x%x\n",
>> first_sample_flags);
>> @@ -3069,7 +3073,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext
>> *pb, MOVAtom atom)
>>          return AVERROR_EOF;
>>
>>      frag->implicit_offset = offset;
>> -    st->duration = sc->track_end = dts + sc->time_offset;
>> +    st->duration = sc->track_end = dts - sc->time_offset;
>>      return 0;
>>  }
>>
>> @@ -3152,6 +3156,7 @@ static int mov_read_elst(MOVContext *c, AVIOContext
>> *pb, MOVAtom atom)
>>  {
>>      MOVStreamContext *sc;
>>      int i, edit_count, version;
>> +    int time_offset_done = 0;
>>
>>      if (c->fc->nb_streams < 1)
>>          return 0;
>> @@ -3175,8 +3180,17 @@ static int mov_read_elst(MOVContext *c, AVIOContext
>> *pb, MOVAtom atom)
>>              time     = (int32_t)avio_rb32(pb); /* media time */
>>          }
>>          avio_rb32(pb); /* Media rate */
>> -        if (i == 0 && time >= -1) {
>> -            sc->time_offset = time != -1 ? time : -duration;
>> +        if (!time_offset_done) {
>> +            if (time == -1) {
>> +                /* delay is in movie timescale */
>> +                sc->time_offset_delay += duration;
>> +            } else if (time >= 0) {
>> +                /* samples to skip is in track timescale */
>> +                sc->time_offset_skip = time;
>> +                time_offset_done = 1;
>> +            }
>> +            /* timescales may not be known yet, so we can not compute
>> +             * a single combined time_offset yet */
>>          }
>>      }
>>
>> --
>> 2.9.3
>>
> do you by any chance know if that fixes an issue with open-gop XDCAM? I
> haven't gotten to making a bug report but the recent big edit list changes
> have introduced a regression there for us (causing a/v desync when remuxing
> such files). I will probably test this again anyway but was curious if that
> was the motivation for the patch.
> _______________________________________________
>

Off hand, I don't know.  I spotted this when debugging an A/V sync issue while re-encoding an mp4 where the video track
was delayed by 160ms and the first video frame has 320ms dts to pts difference (i.e. the first cts was 320ms).  The
320ms difference was not accounted for because only the first edit list entry for the 160ms delay was processed.

Patch

diff --git a/libavformat/isom.h b/libavformat/isom.h
index 8cc5ab7..7c345e9 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -125,6 +125,8 @@  typedef struct MOVStreamContext {
     int *keyframes;
     int time_scale;
     int64_t time_offset;  ///< time offset of the first edit list entry
+    int64_t time_offset_delay;
+    int64_t time_offset_skip;
     int current_sample;
     unsigned int bytes_per_frame;
     unsigned int samples_per_frame;
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 5c9f85c..1657647 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2323,10 +2323,10 @@  static void mov_build_index(MOVContext *mov, AVStream *st)
     uint64_t stream_size = 0;
 
     /* adjust first dts according to edit list */
-    if (sc->time_offset && mov->time_scale > 0) {
-        if (sc->time_offset < 0)
-            sc->time_offset = av_rescale(sc->time_offset, sc->time_scale, mov->time_scale);
-        current_dts = -sc->time_offset;
+    if (mov->time_scale > 0) {
+        sc->time_offset = av_rescale(sc->time_offset_delay, sc->time_scale,
+                                     mov->time_scale) - sc->time_offset_skip;
+        current_dts = sc->time_offset;
     }
 
     /* only use old uncompressed audio chunk demuxing when stts specifies it */
@@ -2999,6 +2999,10 @@  static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     sc = st->priv_data;
     if (sc->pseudo_stream_id+1 != frag->stsd_id)
         return 0;
+    if (c->time_scale > 0) {
+        sc->time_offset = av_rescale(sc->time_offset_delay, sc->time_scale,
+                                     c->time_scale) - sc->time_offset_skip;
+    }
     avio_r8(pb); /* version */
     flags = avio_rb24(pb);
     entries = avio_rb32(pb);
@@ -3029,7 +3033,7 @@  static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     }
     if (flags & MOV_TRUN_DATA_OFFSET)        data_offset        = avio_rb32(pb);
     if (flags & MOV_TRUN_FIRST_SAMPLE_FLAGS) first_sample_flags = avio_rb32(pb);
-    dts    = sc->track_end - sc->time_offset;
+    dts    = sc->track_end + sc->time_offset;
     offset = frag->base_data_offset + data_offset;
     distance = 0;
     av_log(c->fc, AV_LOG_TRACE, "first sample flags 0x%x\n", first_sample_flags);
@@ -3069,7 +3073,7 @@  static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return AVERROR_EOF;
 
     frag->implicit_offset = offset;
-    st->duration = sc->track_end = dts + sc->time_offset;
+    st->duration = sc->track_end = dts - sc->time_offset;
     return 0;
 }
 
@@ -3152,6 +3156,7 @@  static int mov_read_elst(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     MOVStreamContext *sc;
     int i, edit_count, version;
+    int time_offset_done = 0;
 
     if (c->fc->nb_streams < 1)
         return 0;
@@ -3175,8 +3180,17 @@  static int mov_read_elst(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             time     = (int32_t)avio_rb32(pb); /* media time */
         }
         avio_rb32(pb); /* Media rate */
-        if (i == 0 && time >= -1) {
-            sc->time_offset = time != -1 ? time : -duration;
+        if (!time_offset_done) {
+            if (time == -1) {
+                /* delay is in movie timescale */
+                sc->time_offset_delay += duration;
+            } else if (time >= 0) {
+                /* samples to skip is in track timescale */
+                sc->time_offset_skip = time;
+                time_offset_done = 1;
+            }
+            /* timescales may not be known yet, so we can not compute
+             * a single combined time_offset yet */
         }
     }