[2/4] movenc: write AAC pre-roll sample group info

Message ID 20170224202416.13106-2-stebbins@jetheaddev.com
State New
Headers show

Commit Message

John Stebbins Feb. 24, 2017, 8:24 p.m.
This recordes the number of AAC frames that must be decoded before valid
audio is decoded.

It also signals to Apple tools that the encoder delay is explicitly set
in the edit list.  If the roll sample group is omitted, Apply tools will
apply an implicit rule to remove encoder delay which would result in
removing the delay twice.
---
 libavformat/movenc.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/ref/fate/movenc | 32 +++++++++++++-------------
 2 files changed, 79 insertions(+), 16 deletions(-)

Comments

Martin Storsjö Feb. 24, 2017, 9:21 p.m. | #1
On Fri, 24 Feb 2017, John Stebbins wrote:

> This recordes the number of AAC frames that must be decoded before valid
> audio is decoded.
>
> It also signals to Apple tools that the encoder delay is explicitly set
> in the edit list.  If the roll sample group is omitted, Apply tools will
> apply an implicit rule to remove encoder delay which would result in
> removing the delay twice.
> ---
> libavformat/movenc.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
> tests/ref/fate/movenc | 32 +++++++++++++-------------
> 2 files changed, 79 insertions(+), 16 deletions(-)
>
> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
> index 4d351b7..a26dcf7 100644
> --- a/libavformat/movenc.c
> +++ b/libavformat/movenc.c
> @@ -136,6 +136,54 @@ static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
>     return update_size(pb, pos);
> }
> 
> +static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
> +{
> +    int64_t pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +
> +    ffio_wfourcc(pb, "sgpd");
> +    avio_w8(pb, 1); /* version */
> +    avio_w8(pb, 0); /* flags (1) */
> +    avio_wb16(pb, 0); /* flags (2) */
> +    ffio_wfourcc(pb, "roll"); /* grouping type */
> +    avio_wb32(pb, 2); /* table entry length */
> +    avio_wb32(pb, 1); /* table entry count */
> +
> +    /* table data, roll distance
> +     * i.e. number of audio frames to pre-roll after a seek */
> +    avio_wb16(pb, roll);
> +
> +    return update_size(pb, pos);
> +}
> +
> +static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
> +{
> +    int count = 0;
> +    int i;
> +    int64_t pos;
> +
> +    for (i = 0; i < track->entry; i++)
> +    {
> +        count += track->cluster[i].entries;
> +    }

The starting brace should be on the same line as for, or omitted

> +
> +    pos = avio_tell(pb);
> +    avio_wb32(pb, 0); /* size */
> +
> +    ffio_wfourcc(pb, "sbgp"); /* atom name */
> +    avio_wb32(pb, 0); /* version & flags */
> +    ffio_wfourcc(pb, "roll"); /* grouping type */
> +    avio_wb32(pb, 1); /* table entry count */
> +
> +    /* table data */
> +    avio_wb32(pb, count);
> +    /* sgpd table index, index values are 1 based
> +     * we write 'roll' sample group at index 1 */
> +    avio_wb32(pb, 1);
> +
> +    return update_size(pb, pos);
> +}
> +
> /* Sample size atom */
> static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
> {
> @@ -1260,6 +1308,7 @@ static int mov_write_dref_tag(AVIOContext *pb)
> 
> static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
> {
> +    MOVMuxContext *mov = s->priv_data;
>     int64_t pos = avio_tell(pb);
>     avio_wb32(pb, 0); /* size */
>     ffio_wfourcc(pb, "stbl");
> @@ -1277,6 +1326,20 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>     mov_write_stsc_tag(pb, track);
>     mov_write_stsz_tag(pb, track);
>     mov_write_stco_tag(pb, track);
> +
> +    if (track->par->codec_id == AV_CODEC_ID_AAC && mov->use_editlist &&
> +        track->start_dts != AV_NOPTS_VALUE &&
> +        (track->start_dts < 0 || track->par->initial_padding > 0)) {
> +        /* Add sgpd and sbgp tags for AAC tracks.
> +         * Apple documentation says they use this as a flag to indicate
> +         * that AAC encoder delay is explicitely set in the edit list.
> +         * If this is omitted, Apple tools will apply an implicit
> +         * rule to remove encoder delay samples, which results in
> +         * removal of 2 * delay and A/V desync. */
> +        mov_write_sgpd_tag(pb, -1);

The magic value -1 could probably be described here. From the code in 
mov_write_sgpd_tag, it seems like it would be a normal actual value, but 
this seems like a special case that isn't explained elsewhere.

Otherwise this seems ok.

// Martin
John Stebbins Feb. 24, 2017, 9:52 p.m. | #2
On 02/24/2017 02:21 PM, Martin Storsjö wrote:
> On Fri, 24 Feb 2017, John Stebbins wrote:
>
>> This recordes the number of AAC frames that must be decoded before valid
>> audio is decoded.
>>
>> It also signals to Apple tools that the encoder delay is explicitly set
>> in the edit list.  If the roll sample group is omitted, Apply tools will
>> apply an implicit rule to remove encoder delay which would result in
>> removing the delay twice.
>> ---
>> libavformat/movenc.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
>> tests/ref/fate/movenc | 32 +++++++++++++-------------
>> 2 files changed, 79 insertions(+), 16 deletions(-)
>>
>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>> index 4d351b7..a26dcf7 100644
>> --- a/libavformat/movenc.c
>> +++ b/libavformat/movenc.c
>> @@ -136,6 +136,54 @@ static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
>>     return update_size(pb, pos);
>> }
>>
>> +static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
>> +{
>> +    int64_t pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* size */
>> +
>> +    ffio_wfourcc(pb, "sgpd");
>> +    avio_w8(pb, 1); /* version */
>> +    avio_w8(pb, 0); /* flags (1) */
>> +    avio_wb16(pb, 0); /* flags (2) */
>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>> +    avio_wb32(pb, 2); /* table entry length */
>> +    avio_wb32(pb, 1); /* table entry count */
>> +
>> +    /* table data, roll distance
>> +     * i.e. number of audio frames to pre-roll after a seek */
>> +    avio_wb16(pb, roll);
>> +
>> +    return update_size(pb, pos);
>> +}
>> +
>> +static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
>> +{
>> +    int count = 0;
>> +    int i;
>> +    int64_t pos;
>> +
>> +    for (i = 0; i < track->entry; i++)
>> +    {
>> +        count += track->cluster[i].entries;
>> +    }
> The starting brace should be on the same line as for, or omitted

Oops, I know this, but habits are hard to break.

>> +
>> +    pos = avio_tell(pb);
>> +    avio_wb32(pb, 0); /* size */
>> +
>> +    ffio_wfourcc(pb, "sbgp"); /* atom name */
>> +    avio_wb32(pb, 0); /* version & flags */
>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>> +    avio_wb32(pb, 1); /* table entry count */
>> +
>> +    /* table data */
>> +    avio_wb32(pb, count);
>> +    /* sgpd table index, index values are 1 based
>> +     * we write 'roll' sample group at index 1 */
>> +    avio_wb32(pb, 1);
>> +
>> +    return update_size(pb, pos);
>> +}
>> +
>> /* Sample size atom */
>> static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
>> {
>> @@ -1260,6 +1308,7 @@ static int mov_write_dref_tag(AVIOContext *pb)
>>
>> static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
>> {
>> +    MOVMuxContext *mov = s->priv_data;
>>     int64_t pos = avio_tell(pb);
>>     avio_wb32(pb, 0); /* size */
>>     ffio_wfourcc(pb, "stbl");
>> @@ -1277,6 +1326,20 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>>     mov_write_stsc_tag(pb, track);
>>     mov_write_stsz_tag(pb, track);
>>     mov_write_stco_tag(pb, track);
>> +
>> +    if (track->par->codec_id == AV_CODEC_ID_AAC && mov->use_editlist &&
>> +        track->start_dts != AV_NOPTS_VALUE &&
>> +        (track->start_dts < 0 || track->par->initial_padding > 0)) {
>> +        /* Add sgpd and sbgp tags for AAC tracks.
>> +         * Apple documentation says they use this as a flag to indicate
>> +         * that AAC encoder delay is explicitely set in the edit list.
>> +         * If this is omitted, Apple tools will apply an implicit
>> +         * rule to remove encoder delay samples, which results in
>> +         * removal of 2 * delay and A/V desync. */
>> +        mov_write_sgpd_tag(pb, -1);
> The magic value -1 could probably be described here. From the code in 
> mov_write_sgpd_tag, it seems like it would be a normal actual value, but 
> this seems like a special case that isn't explained elsewhere.
>
> Otherwise this seems ok.
>
>

The parameter "roll" is documented where it is used in mov_write_sgpd_tag.  But I can explain it here as well.

Ideally, the roll value should be passed in via AVCodecParameters so that it wouldn't need to be hard coded for AAC like
this (and we could easily set it for other codecs).  I looked into what it would take.  mov wants this value in frames.
matroska has a similar feature SeekPreRoll that is specified in nanoseconds, So to be generally useful we should add
both a preroll duration and frame duration to AVCodecParameters to satisfy the needs of various containers.   Thoughts
on this?  Should I look into generalizing what I've done here?

I took the short-cut because the more general solution is an API change and also more of an enhancement.  This patch is
more of a bug fix for getting synchronization right when using Apple tools on files generated by libav.
Martin Storsjö Feb. 24, 2017, 10:13 p.m. | #3
On Fri, 24 Feb 2017, John Stebbins wrote:

> On 02/24/2017 02:21 PM, Martin Storsjö wrote:
>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>
>>> This recordes the number of AAC frames that must be decoded before valid
>>> audio is decoded.
>>>
>>> It also signals to Apple tools that the encoder delay is explicitly set
>>> in the edit list.  If the roll sample group is omitted, Apply tools will
>>> apply an implicit rule to remove encoder delay which would result in
>>> removing the delay twice.
>>> ---
>>> libavformat/movenc.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>> tests/ref/fate/movenc | 32 +++++++++++++-------------
>>> 2 files changed, 79 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>>> index 4d351b7..a26dcf7 100644
>>> --- a/libavformat/movenc.c
>>> +++ b/libavformat/movenc.c
>>> @@ -136,6 +136,54 @@ static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
>>>     return update_size(pb, pos);
>>> }
>>>
>>> +static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
>>> +{
>>> +    int64_t pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* size */
>>> +
>>> +    ffio_wfourcc(pb, "sgpd");
>>> +    avio_w8(pb, 1); /* version */
>>> +    avio_w8(pb, 0); /* flags (1) */
>>> +    avio_wb16(pb, 0); /* flags (2) */
>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>> +    avio_wb32(pb, 2); /* table entry length */
>>> +    avio_wb32(pb, 1); /* table entry count */
>>> +
>>> +    /* table data, roll distance
>>> +     * i.e. number of audio frames to pre-roll after a seek */
>>> +    avio_wb16(pb, roll);
>>> +
>>> +    return update_size(pb, pos);
>>> +}
>>> +
>>> +static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
>>> +{
>>> +    int count = 0;
>>> +    int i;
>>> +    int64_t pos;
>>> +
>>> +    for (i = 0; i < track->entry; i++)
>>> +    {
>>> +        count += track->cluster[i].entries;
>>> +    }
>> The starting brace should be on the same line as for, or omitted
>
> Oops, I know this, but habits are hard to break.
>
>>> +
>>> +    pos = avio_tell(pb);
>>> +    avio_wb32(pb, 0); /* size */
>>> +
>>> +    ffio_wfourcc(pb, "sbgp"); /* atom name */
>>> +    avio_wb32(pb, 0); /* version & flags */
>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>> +    avio_wb32(pb, 1); /* table entry count */
>>> +
>>> +    /* table data */
>>> +    avio_wb32(pb, count);
>>> +    /* sgpd table index, index values are 1 based
>>> +     * we write 'roll' sample group at index 1 */
>>> +    avio_wb32(pb, 1);
>>> +
>>> +    return update_size(pb, pos);
>>> +}
>>> +
>>> /* Sample size atom */
>>> static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
>>> {
>>> @@ -1260,6 +1308,7 @@ static int mov_write_dref_tag(AVIOContext *pb)
>>>
>>> static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
>>> {
>>> +    MOVMuxContext *mov = s->priv_data;
>>>     int64_t pos = avio_tell(pb);
>>>     avio_wb32(pb, 0); /* size */
>>>     ffio_wfourcc(pb, "stbl");
>>> @@ -1277,6 +1326,20 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>>>     mov_write_stsc_tag(pb, track);
>>>     mov_write_stsz_tag(pb, track);
>>>     mov_write_stco_tag(pb, track);
>>> +
>>> +    if (track->par->codec_id == AV_CODEC_ID_AAC && mov->use_editlist &&
>>> +        track->start_dts != AV_NOPTS_VALUE &&
>>> +        (track->start_dts < 0 || track->par->initial_padding > 0)) {
>>> +        /* Add sgpd and sbgp tags for AAC tracks.
>>> +         * Apple documentation says they use this as a flag to indicate
>>> +         * that AAC encoder delay is explicitely set in the edit list.
>>> +         * If this is omitted, Apple tools will apply an implicit
>>> +         * rule to remove encoder delay samples, which results in
>>> +         * removal of 2 * delay and A/V desync. */
>>> +        mov_write_sgpd_tag(pb, -1);
>> The magic value -1 could probably be described here. From the code in
>> mov_write_sgpd_tag, it seems like it would be a normal actual value, but
>> this seems like a special case that isn't explained elsewhere.
>>
>> Otherwise this seems ok.
>>
>>
>
> The parameter "roll" is documented where it is used in 
> mov_write_sgpd_tag.  But I can explain it here as well.

Yes, there I see that it says "number of audio frames to pre-roll after a 
seek". But what does -1 frames mean?

// Martin
John Stebbins Feb. 24, 2017, 10:16 p.m. | #4
On 02/24/2017 03:13 PM, Martin Storsjö wrote:
> On Fri, 24 Feb 2017, John Stebbins wrote:
>
>> On 02/24/2017 02:21 PM, Martin Storsjö wrote:
>>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>>
>>>> This recordes the number of AAC frames that must be decoded before valid
>>>> audio is decoded.
>>>>
>>>> It also signals to Apple tools that the encoder delay is explicitly set
>>>> in the edit list.  If the roll sample group is omitted, Apply tools will
>>>> apply an implicit rule to remove encoder delay which would result in
>>>> removing the delay twice.
>>>> ---
>>>> libavformat/movenc.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>>> tests/ref/fate/movenc | 32 +++++++++++++-------------
>>>> 2 files changed, 79 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>>>> index 4d351b7..a26dcf7 100644
>>>> --- a/libavformat/movenc.c
>>>> +++ b/libavformat/movenc.c
>>>> @@ -136,6 +136,54 @@ static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
>>>>     return update_size(pb, pos);
>>>> }
>>>>
>>>> +static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
>>>> +{
>>>> +    int64_t pos = avio_tell(pb);
>>>> +    avio_wb32(pb, 0); /* size */
>>>> +
>>>> +    ffio_wfourcc(pb, "sgpd");
>>>> +    avio_w8(pb, 1); /* version */
>>>> +    avio_w8(pb, 0); /* flags (1) */
>>>> +    avio_wb16(pb, 0); /* flags (2) */
>>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>>> +    avio_wb32(pb, 2); /* table entry length */
>>>> +    avio_wb32(pb, 1); /* table entry count */
>>>> +
>>>> +    /* table data, roll distance
>>>> +     * i.e. number of audio frames to pre-roll after a seek */
>>>> +    avio_wb16(pb, roll);
>>>> +
>>>> +    return update_size(pb, pos);
>>>> +}
>>>> +
>>>> +static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
>>>> +{
>>>> +    int count = 0;
>>>> +    int i;
>>>> +    int64_t pos;
>>>> +
>>>> +    for (i = 0; i < track->entry; i++)
>>>> +    {
>>>> +        count += track->cluster[i].entries;
>>>> +    }
>>> The starting brace should be on the same line as for, or omitted
>> Oops, I know this, but habits are hard to break.
>>
>>>> +
>>>> +    pos = avio_tell(pb);
>>>> +    avio_wb32(pb, 0); /* size */
>>>> +
>>>> +    ffio_wfourcc(pb, "sbgp"); /* atom name */
>>>> +    avio_wb32(pb, 0); /* version & flags */
>>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>>> +    avio_wb32(pb, 1); /* table entry count */
>>>> +
>>>> +    /* table data */
>>>> +    avio_wb32(pb, count);
>>>> +    /* sgpd table index, index values are 1 based
>>>> +     * we write 'roll' sample group at index 1 */
>>>> +    avio_wb32(pb, 1);
>>>> +
>>>> +    return update_size(pb, pos);
>>>> +}
>>>> +
>>>> /* Sample size atom */
>>>> static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
>>>> {
>>>> @@ -1260,6 +1308,7 @@ static int mov_write_dref_tag(AVIOContext *pb)
>>>>
>>>> static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
>>>> {
>>>> +    MOVMuxContext *mov = s->priv_data;
>>>>     int64_t pos = avio_tell(pb);
>>>>     avio_wb32(pb, 0); /* size */
>>>>     ffio_wfourcc(pb, "stbl");
>>>> @@ -1277,6 +1326,20 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>>>>     mov_write_stsc_tag(pb, track);
>>>>     mov_write_stsz_tag(pb, track);
>>>>     mov_write_stco_tag(pb, track);
>>>> +
>>>> +    if (track->par->codec_id == AV_CODEC_ID_AAC && mov->use_editlist &&
>>>> +        track->start_dts != AV_NOPTS_VALUE &&
>>>> +        (track->start_dts < 0 || track->par->initial_padding > 0)) {
>>>> +        /* Add sgpd and sbgp tags for AAC tracks.
>>>> +         * Apple documentation says they use this as a flag to indicate
>>>> +         * that AAC encoder delay is explicitely set in the edit list.
>>>> +         * If this is omitted, Apple tools will apply an implicit
>>>> +         * rule to remove encoder delay samples, which results in
>>>> +         * removal of 2 * delay and A/V desync. */
>>>> +        mov_write_sgpd_tag(pb, -1);
>>> The magic value -1 could probably be described here. From the code in
>>> mov_write_sgpd_tag, it seems like it would be a normal actual value, but
>>> this seems like a special case that isn't explained elsewhere.
>>>
>>> Otherwise this seems ok.
>>>
>>>
>> The parameter "roll" is documented where it is used in 
>> mov_write_sgpd_tag.  But I can explain it here as well.
> Yes, there I see that it says "number of audio frames to pre-roll after a 
> seek". But what does -1 frames mean?
>
>

Ah, ok.  Would it be clearer like this? "roll is an offset in frames that should be applied when seeking in order to
pre-roll data to the decoder"
Martin Storsjö Feb. 25, 2017, 10:09 a.m. | #5
On Fri, 24 Feb 2017, John Stebbins wrote:

> On 02/24/2017 03:13 PM, Martin Storsjö wrote:
>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>
>>> On 02/24/2017 02:21 PM, Martin Storsjö wrote:
>>>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>>>
>>>>> This recordes the number of AAC frames that must be decoded before valid
>>>>> audio is decoded.
>>>>>
>>>>> It also signals to Apple tools that the encoder delay is explicitly set
>>>>> in the edit list.  If the roll sample group is omitted, Apply tools will
>>>>> apply an implicit rule to remove encoder delay which would result in
>>>>> removing the delay twice.
>>>>> ---
>>>>> libavformat/movenc.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>> tests/ref/fate/movenc | 32 +++++++++++++-------------
>>>>> 2 files changed, 79 insertions(+), 16 deletions(-)
>>>>>
>>>>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>>>>> index 4d351b7..a26dcf7 100644
>>>>> --- a/libavformat/movenc.c
>>>>> +++ b/libavformat/movenc.c
>>>>> @@ -136,6 +136,54 @@ static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
>>>>>     return update_size(pb, pos);
>>>>> }
>>>>>
>>>>> +static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
>>>>> +{
>>>>> +    int64_t pos = avio_tell(pb);
>>>>> +    avio_wb32(pb, 0); /* size */
>>>>> +
>>>>> +    ffio_wfourcc(pb, "sgpd");
>>>>> +    avio_w8(pb, 1); /* version */
>>>>> +    avio_w8(pb, 0); /* flags (1) */
>>>>> +    avio_wb16(pb, 0); /* flags (2) */
>>>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>>>> +    avio_wb32(pb, 2); /* table entry length */
>>>>> +    avio_wb32(pb, 1); /* table entry count */
>>>>> +
>>>>> +    /* table data, roll distance
>>>>> +     * i.e. number of audio frames to pre-roll after a seek */
>>>>> +    avio_wb16(pb, roll);
>>>>> +
>>>>> +    return update_size(pb, pos);
>>>>> +}
>>>>> +
>>>>> +static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
>>>>> +{
>>>>> +    int count = 0;
>>>>> +    int i;
>>>>> +    int64_t pos;
>>>>> +
>>>>> +    for (i = 0; i < track->entry; i++)
>>>>> +    {
>>>>> +        count += track->cluster[i].entries;
>>>>> +    }
>>>> The starting brace should be on the same line as for, or omitted
>>> Oops, I know this, but habits are hard to break.
>>>
>>>>> +
>>>>> +    pos = avio_tell(pb);
>>>>> +    avio_wb32(pb, 0); /* size */
>>>>> +
>>>>> +    ffio_wfourcc(pb, "sbgp"); /* atom name */
>>>>> +    avio_wb32(pb, 0); /* version & flags */
>>>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>>>> +    avio_wb32(pb, 1); /* table entry count */
>>>>> +
>>>>> +    /* table data */
>>>>> +    avio_wb32(pb, count);
>>>>> +    /* sgpd table index, index values are 1 based
>>>>> +     * we write 'roll' sample group at index 1 */
>>>>> +    avio_wb32(pb, 1);
>>>>> +
>>>>> +    return update_size(pb, pos);
>>>>> +}
>>>>> +
>>>>> /* Sample size atom */
>>>>> static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
>>>>> {
>>>>> @@ -1260,6 +1308,7 @@ static int mov_write_dref_tag(AVIOContext *pb)
>>>>>
>>>>> static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
>>>>> {
>>>>> +    MOVMuxContext *mov = s->priv_data;
>>>>>     int64_t pos = avio_tell(pb);
>>>>>     avio_wb32(pb, 0); /* size */
>>>>>     ffio_wfourcc(pb, "stbl");
>>>>> @@ -1277,6 +1326,20 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>>>>>     mov_write_stsc_tag(pb, track);
>>>>>     mov_write_stsz_tag(pb, track);
>>>>>     mov_write_stco_tag(pb, track);
>>>>> +
>>>>> +    if (track->par->codec_id == AV_CODEC_ID_AAC && mov->use_editlist &&
>>>>> +        track->start_dts != AV_NOPTS_VALUE &&
>>>>> +        (track->start_dts < 0 || track->par->initial_padding > 0)) {
>>>>> +        /* Add sgpd and sbgp tags for AAC tracks.
>>>>> +         * Apple documentation says they use this as a flag to indicate
>>>>> +         * that AAC encoder delay is explicitely set in the edit list.
>>>>> +         * If this is omitted, Apple tools will apply an implicit
>>>>> +         * rule to remove encoder delay samples, which results in
>>>>> +         * removal of 2 * delay and A/V desync. */
>>>>> +        mov_write_sgpd_tag(pb, -1);
>>>> The magic value -1 could probably be described here. From the code in
>>>> mov_write_sgpd_tag, it seems like it would be a normal actual value, but
>>>> this seems like a special case that isn't explained elsewhere.
>>>>
>>>> Otherwise this seems ok.
>>>>
>>>>
>>> The parameter "roll" is documented where it is used in
>>> mov_write_sgpd_tag.  But I can explain it here as well.
>> Yes, there I see that it says "number of audio frames to pre-roll after a
>> seek". But what does -1 frames mean?
>>
>>
>
> Ah, ok.  Would it be clearer like this? "roll is an offset in frames that should be applied when seeking in order to
> pre-roll data to the decoder"

I see - then it makes more sense. I guess this means in practice it won't 
ever be positive?

// Martin
John Stebbins Feb. 25, 2017, 6:13 p.m. | #6
On 02/25/2017 03:09 AM, Martin Storsjö wrote:
> On Fri, 24 Feb 2017, John Stebbins wrote:
>
>> On 02/24/2017 03:13 PM, Martin Storsjö wrote:
>>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>>
>>>> On 02/24/2017 02:21 PM, Martin Storsjö wrote:
>>>>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>>>>
>>>>>> This recordes the number of AAC frames that must be decoded before valid
>>>>>> audio is decoded.
>>>>>>
>>>>>> It also signals to Apple tools that the encoder delay is explicitly set
>>>>>> in the edit list.  If the roll sample group is omitted, Apply tools will
>>>>>> apply an implicit rule to remove encoder delay which would result in
>>>>>> removing the delay twice.
>>>>>> ---
>>>>>> libavformat/movenc.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>> tests/ref/fate/movenc | 32 +++++++++++++-------------
>>>>>> 2 files changed, 79 insertions(+), 16 deletions(-)
>>>>>>
>>>>>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>>>>>> index 4d351b7..a26dcf7 100644
>>>>>> --- a/libavformat/movenc.c
>>>>>> +++ b/libavformat/movenc.c
>>>>>> @@ -136,6 +136,54 @@ static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
>>>>>>     return update_size(pb, pos);
>>>>>> }
>>>>>>
>>>>>> +static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
>>>>>> +{
>>>>>> +    int64_t pos = avio_tell(pb);
>>>>>> +    avio_wb32(pb, 0); /* size */
>>>>>> +
>>>>>> +    ffio_wfourcc(pb, "sgpd");
>>>>>> +    avio_w8(pb, 1); /* version */
>>>>>> +    avio_w8(pb, 0); /* flags (1) */
>>>>>> +    avio_wb16(pb, 0); /* flags (2) */
>>>>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>>>>> +    avio_wb32(pb, 2); /* table entry length */
>>>>>> +    avio_wb32(pb, 1); /* table entry count */
>>>>>> +
>>>>>> +    /* table data, roll distance
>>>>>> +     * i.e. number of audio frames to pre-roll after a seek */
>>>>>> +    avio_wb16(pb, roll);
>>>>>> +
>>>>>> +    return update_size(pb, pos);
>>>>>> +}
>>>>>> +
>>>>>> +static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
>>>>>> +{
>>>>>> +    int count = 0;
>>>>>> +    int i;
>>>>>> +    int64_t pos;
>>>>>> +
>>>>>> +    for (i = 0; i < track->entry; i++)
>>>>>> +    {
>>>>>> +        count += track->cluster[i].entries;
>>>>>> +    }
>>>>> The starting brace should be on the same line as for, or omitted
>>>> Oops, I know this, but habits are hard to break.
>>>>
>>>>>> +
>>>>>> +    pos = avio_tell(pb);
>>>>>> +    avio_wb32(pb, 0); /* size */
>>>>>> +
>>>>>> +    ffio_wfourcc(pb, "sbgp"); /* atom name */
>>>>>> +    avio_wb32(pb, 0); /* version & flags */
>>>>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>>>>> +    avio_wb32(pb, 1); /* table entry count */
>>>>>> +
>>>>>> +    /* table data */
>>>>>> +    avio_wb32(pb, count);
>>>>>> +    /* sgpd table index, index values are 1 based
>>>>>> +     * we write 'roll' sample group at index 1 */
>>>>>> +    avio_wb32(pb, 1);
>>>>>> +
>>>>>> +    return update_size(pb, pos);
>>>>>> +}
>>>>>> +
>>>>>> /* Sample size atom */
>>>>>> static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
>>>>>> {
>>>>>> @@ -1260,6 +1308,7 @@ static int mov_write_dref_tag(AVIOContext *pb)
>>>>>>
>>>>>> static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
>>>>>> {
>>>>>> +    MOVMuxContext *mov = s->priv_data;
>>>>>>     int64_t pos = avio_tell(pb);
>>>>>>     avio_wb32(pb, 0); /* size */
>>>>>>     ffio_wfourcc(pb, "stbl");
>>>>>> @@ -1277,6 +1326,20 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>>>>>>     mov_write_stsc_tag(pb, track);
>>>>>>     mov_write_stsz_tag(pb, track);
>>>>>>     mov_write_stco_tag(pb, track);
>>>>>> +
>>>>>> +    if (track->par->codec_id == AV_CODEC_ID_AAC && mov->use_editlist &&
>>>>>> +        track->start_dts != AV_NOPTS_VALUE &&
>>>>>> +        (track->start_dts < 0 || track->par->initial_padding > 0)) {
>>>>>> +        /* Add sgpd and sbgp tags for AAC tracks.
>>>>>> +         * Apple documentation says they use this as a flag to indicate
>>>>>> +         * that AAC encoder delay is explicitely set in the edit list.
>>>>>> +         * If this is omitted, Apple tools will apply an implicit
>>>>>> +         * rule to remove encoder delay samples, which results in
>>>>>> +         * removal of 2 * delay and A/V desync. */
>>>>>> +        mov_write_sgpd_tag(pb, -1);
>>>>> The magic value -1 could probably be described here. From the code in
>>>>> mov_write_sgpd_tag, it seems like it would be a normal actual value, but
>>>>> this seems like a special case that isn't explained elsewhere.
>>>>>
>>>>> Otherwise this seems ok.
>>>>>
>>>>>
>>>> The parameter "roll" is documented where it is used in
>>>> mov_write_sgpd_tag.  But I can explain it here as well.
>>> Yes, there I see that it says "number of audio frames to pre-roll after a
>>> seek". But what does -1 frames mean?
>>>
>>>
>> Ah, ok.  Would it be clearer like this? "roll is an offset in frames that should be applied when seeking in order to
>> pre-roll data to the decoder"
> I see - then it makes more sense. I guess this means in practice it won't 
> ever be positive?
>
>

ISO 14496-12 says, "A positive value indicates the number of samples after the
sample that is a group member that must be decoded such that at the last of these recovery is
complete, i.e. the last sample is correct. A negative value indicates the number of samples before the
sample that is a group member that must be decoded in order for recovery to be complete at the
marked sample."

I should probably just put the above description in the comment since it's quite clear.

Apple documentation only talks about the negative variation. And since the objective of this patch is to fix a sync
problem with Apple tools, I stuck to what the apple documentation says. I could test it with a positive value to see
what the apple tools do in this case if you have a preference for writing this with a positive value.
Yusuke Nakamura Feb. 25, 2017, 6:45 p.m. | #7
2017-02-25 19:09 GMT+09:00 Martin Storsjö <martin@martin.st>:

> On Fri, 24 Feb 2017, John Stebbins wrote:
>
> On 02/24/2017 03:13 PM, Martin Storsjö wrote:
>>
>>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>>
>>> On 02/24/2017 02:21 PM, Martin Storsjö wrote:
>>>>
>>>>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>>>>
>>>>> This recordes the number of AAC frames that must be decoded before
>>>>>> valid
>>>>>> audio is decoded.
>>>>>>
>>>>>> It also signals to Apple tools that the encoder delay is explicitly
>>>>>> set
>>>>>> in the edit list.  If the roll sample group is omitted, Apply tools
>>>>>> will
>>>>>> apply an implicit rule to remove encoder delay which would result in
>>>>>> removing the delay twice.
>>>>>> ---
>>>>>> libavformat/movenc.c  | 63 ++++++++++++++++++++++++++++++
>>>>>> +++++++++++++++++++++
>>>>>> tests/ref/fate/movenc | 32 +++++++++++++-------------
>>>>>> 2 files changed, 79 insertions(+), 16 deletions(-)
>>>>>>
>>>>>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>>>>>> index 4d351b7..a26dcf7 100644
>>>>>> --- a/libavformat/movenc.c
>>>>>> +++ b/libavformat/movenc.c
>>>>>> @@ -136,6 +136,54 @@ static int mov_write_stco_tag(AVIOContext *pb,
>>>>>> MOVTrack *track)
>>>>>>     return update_size(pb, pos);
>>>>>> }
>>>>>>
>>>>>> +static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
>>>>>> +{
>>>>>> +    int64_t pos = avio_tell(pb);
>>>>>> +    avio_wb32(pb, 0); /* size */
>>>>>> +
>>>>>> +    ffio_wfourcc(pb, "sgpd");
>>>>>> +    avio_w8(pb, 1); /* version */
>>>>>> +    avio_w8(pb, 0); /* flags (1) */
>>>>>> +    avio_wb16(pb, 0); /* flags (2) */
>>>>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>>>>> +    avio_wb32(pb, 2); /* table entry length */
>>>>>> +    avio_wb32(pb, 1); /* table entry count */
>>>>>> +
>>>>>> +    /* table data, roll distance
>>>>>> +     * i.e. number of audio frames to pre-roll after a seek */
>>>>>> +    avio_wb16(pb, roll);
>>>>>> +
>>>>>> +    return update_size(pb, pos);
>>>>>> +}
>>>>>> +
>>>>>> +static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
>>>>>> +{
>>>>>> +    int count = 0;
>>>>>> +    int i;
>>>>>> +    int64_t pos;
>>>>>> +
>>>>>> +    for (i = 0; i < track->entry; i++)
>>>>>> +    {
>>>>>> +        count += track->cluster[i].entries;
>>>>>> +    }
>>>>>>
>>>>> The starting brace should be on the same line as for, or omitted
>>>>>
>>>> Oops, I know this, but habits are hard to break.
>>>>
>>>> +
>>>>>> +    pos = avio_tell(pb);
>>>>>> +    avio_wb32(pb, 0); /* size */
>>>>>> +
>>>>>> +    ffio_wfourcc(pb, "sbgp"); /* atom name */
>>>>>> +    avio_wb32(pb, 0); /* version & flags */
>>>>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>>>>> +    avio_wb32(pb, 1); /* table entry count */
>>>>>> +
>>>>>> +    /* table data */
>>>>>> +    avio_wb32(pb, count);
>>>>>> +    /* sgpd table index, index values are 1 based
>>>>>> +     * we write 'roll' sample group at index 1 */
>>>>>> +    avio_wb32(pb, 1);
>>>>>> +
>>>>>> +    return update_size(pb, pos);
>>>>>> +}
>>>>>> +
>>>>>> /* Sample size atom */
>>>>>> static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
>>>>>> {
>>>>>> @@ -1260,6 +1308,7 @@ static int mov_write_dref_tag(AVIOContext *pb)
>>>>>>
>>>>>> static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb,
>>>>>> MOVTrack *track)
>>>>>> {
>>>>>> +    MOVMuxContext *mov = s->priv_data;
>>>>>>     int64_t pos = avio_tell(pb);
>>>>>>     avio_wb32(pb, 0); /* size */
>>>>>>     ffio_wfourcc(pb, "stbl");
>>>>>> @@ -1277,6 +1326,20 @@ static int mov_write_stbl_tag(AVFormatContext
>>>>>> *s, AVIOContext *pb, MOVTrack *tra
>>>>>>     mov_write_stsc_tag(pb, track);
>>>>>>     mov_write_stsz_tag(pb, track);
>>>>>>     mov_write_stco_tag(pb, track);
>>>>>> +
>>>>>> +    if (track->par->codec_id == AV_CODEC_ID_AAC && mov->use_editlist
>>>>>> &&
>>>>>> +        track->start_dts != AV_NOPTS_VALUE &&
>>>>>> +        (track->start_dts < 0 || track->par->initial_padding > 0)) {
>>>>>> +        /* Add sgpd and sbgp tags for AAC tracks.
>>>>>> +         * Apple documentation says they use this as a flag to
>>>>>> indicate
>>>>>> +         * that AAC encoder delay is explicitely set in the edit
>>>>>> list.
>>>>>> +         * If this is omitted, Apple tools will apply an implicit
>>>>>> +         * rule to remove encoder delay samples, which results in
>>>>>> +         * removal of 2 * delay and A/V desync. */
>>>>>> +        mov_write_sgpd_tag(pb, -1);
>>>>>>
>>>>> The magic value -1 could probably be described here. From the code in
>>>>> mov_write_sgpd_tag, it seems like it would be a normal actual value,
>>>>> but
>>>>> this seems like a special case that isn't explained elsewhere.
>>>>>
>>>>> Otherwise this seems ok.
>>>>>
>>>>>
>>>>> The parameter "roll" is documented where it is used in
>>>> mov_write_sgpd_tag.  But I can explain it here as well.
>>>>
>>> Yes, there I see that it says "number of audio frames to pre-roll after a
>>> seek". But what does -1 frames mean?
>>>
>>>
>>>
>> Ah, ok.  Would it be clearer like this? "roll is an offset in frames that
>> should be applied when seeking in order to
>> pre-roll data to the decoder"
>>
>
> I see - then it makes more sense. I guess this means in practice it won't
> ever be positive?

I've never seen positive values of roll_distances other than video tracks
(which indicates gradual decoder refresh at recovery point SEI), but I
found the following descriptions in the latest mpeg file format meeting
report.
1.4.10m39988 Proposal for Consistent Definitions of Audio Sync Samples and
Audio Roll Groups

*Disposition: accepted in principle*

We decide to correct the PreRollEntry and the roll_distance. We keep it a
signed value, but restrict it to positive in the PreRollEntry. Into the
part 12 corrigendum. Editors to phrase with the authors.


>
>
> // Martin
> _______________________________________________
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel
>
Martin Storsjö Feb. 25, 2017, 7 p.m. | #8
On Sat, 25 Feb 2017, John Stebbins wrote:

> On 02/25/2017 03:09 AM, Martin Storsjö wrote:
>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>
>>> On 02/24/2017 03:13 PM, Martin Storsjö wrote:
>>>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>>>
>>>>> On 02/24/2017 02:21 PM, Martin Storsjö wrote:
>>>>>> On Fri, 24 Feb 2017, John Stebbins wrote:
>>>>>>
>>>>>>> This recordes the number of AAC frames that must be decoded before valid
>>>>>>> audio is decoded.
>>>>>>>
>>>>>>> It also signals to Apple tools that the encoder delay is explicitly set
>>>>>>> in the edit list.  If the roll sample group is omitted, Apply tools will
>>>>>>> apply an implicit rule to remove encoder delay which would result in
>>>>>>> removing the delay twice.
>>>>>>> ---
>>>>>>> libavformat/movenc.c  | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>> tests/ref/fate/movenc | 32 +++++++++++++-------------
>>>>>>> 2 files changed, 79 insertions(+), 16 deletions(-)
>>>>>>>
>>>>>>> diff --git a/libavformat/movenc.c b/libavformat/movenc.c
>>>>>>> index 4d351b7..a26dcf7 100644
>>>>>>> --- a/libavformat/movenc.c
>>>>>>> +++ b/libavformat/movenc.c
>>>>>>> @@ -136,6 +136,54 @@ static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
>>>>>>>     return update_size(pb, pos);
>>>>>>> }
>>>>>>>
>>>>>>> +static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
>>>>>>> +{
>>>>>>> +    int64_t pos = avio_tell(pb);
>>>>>>> +    avio_wb32(pb, 0); /* size */
>>>>>>> +
>>>>>>> +    ffio_wfourcc(pb, "sgpd");
>>>>>>> +    avio_w8(pb, 1); /* version */
>>>>>>> +    avio_w8(pb, 0); /* flags (1) */
>>>>>>> +    avio_wb16(pb, 0); /* flags (2) */
>>>>>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>>>>>> +    avio_wb32(pb, 2); /* table entry length */
>>>>>>> +    avio_wb32(pb, 1); /* table entry count */
>>>>>>> +
>>>>>>> +    /* table data, roll distance
>>>>>>> +     * i.e. number of audio frames to pre-roll after a seek */
>>>>>>> +    avio_wb16(pb, roll);
>>>>>>> +
>>>>>>> +    return update_size(pb, pos);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
>>>>>>> +{
>>>>>>> +    int count = 0;
>>>>>>> +    int i;
>>>>>>> +    int64_t pos;
>>>>>>> +
>>>>>>> +    for (i = 0; i < track->entry; i++)
>>>>>>> +    {
>>>>>>> +        count += track->cluster[i].entries;
>>>>>>> +    }
>>>>>> The starting brace should be on the same line as for, or omitted
>>>>> Oops, I know this, but habits are hard to break.
>>>>>
>>>>>>> +
>>>>>>> +    pos = avio_tell(pb);
>>>>>>> +    avio_wb32(pb, 0); /* size */
>>>>>>> +
>>>>>>> +    ffio_wfourcc(pb, "sbgp"); /* atom name */
>>>>>>> +    avio_wb32(pb, 0); /* version & flags */
>>>>>>> +    ffio_wfourcc(pb, "roll"); /* grouping type */
>>>>>>> +    avio_wb32(pb, 1); /* table entry count */
>>>>>>> +
>>>>>>> +    /* table data */
>>>>>>> +    avio_wb32(pb, count);
>>>>>>> +    /* sgpd table index, index values are 1 based
>>>>>>> +     * we write 'roll' sample group at index 1 */
>>>>>>> +    avio_wb32(pb, 1);
>>>>>>> +
>>>>>>> +    return update_size(pb, pos);
>>>>>>> +}
>>>>>>> +
>>>>>>> /* Sample size atom */
>>>>>>> static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
>>>>>>> {
>>>>>>> @@ -1260,6 +1308,7 @@ static int mov_write_dref_tag(AVIOContext *pb)
>>>>>>>
>>>>>>> static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
>>>>>>> {
>>>>>>> +    MOVMuxContext *mov = s->priv_data;
>>>>>>>     int64_t pos = avio_tell(pb);
>>>>>>>     avio_wb32(pb, 0); /* size */
>>>>>>>     ffio_wfourcc(pb, "stbl");
>>>>>>> @@ -1277,6 +1326,20 @@ static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
>>>>>>>     mov_write_stsc_tag(pb, track);
>>>>>>>     mov_write_stsz_tag(pb, track);
>>>>>>>     mov_write_stco_tag(pb, track);
>>>>>>> +
>>>>>>> +    if (track->par->codec_id == AV_CODEC_ID_AAC && mov->use_editlist &&
>>>>>>> +        track->start_dts != AV_NOPTS_VALUE &&
>>>>>>> +        (track->start_dts < 0 || track->par->initial_padding > 0)) {
>>>>>>> +        /* Add sgpd and sbgp tags for AAC tracks.
>>>>>>> +         * Apple documentation says they use this as a flag to indicate
>>>>>>> +         * that AAC encoder delay is explicitely set in the edit list.
>>>>>>> +         * If this is omitted, Apple tools will apply an implicit
>>>>>>> +         * rule to remove encoder delay samples, which results in
>>>>>>> +         * removal of 2 * delay and A/V desync. */
>>>>>>> +        mov_write_sgpd_tag(pb, -1);
>>>>>> The magic value -1 could probably be described here. From the code in
>>>>>> mov_write_sgpd_tag, it seems like it would be a normal actual value, but
>>>>>> this seems like a special case that isn't explained elsewhere.
>>>>>>
>>>>>> Otherwise this seems ok.
>>>>>>
>>>>>>
>>>>> The parameter "roll" is documented where it is used in
>>>>> mov_write_sgpd_tag.  But I can explain it here as well.
>>>> Yes, there I see that it says "number of audio frames to pre-roll after a
>>>> seek". But what does -1 frames mean?
>>>>
>>>>
>>> Ah, ok.  Would it be clearer like this? "roll is an offset in frames that should be applied when seeking in order to
>>> pre-roll data to the decoder"
>> I see - then it makes more sense. I guess this means in practice it won't
>> ever be positive?
>>
>>
>
> ISO 14496-12 says, "A positive value indicates the number of samples after the
> sample that is a group member that must be decoded such that at the last of these recovery is
> complete, i.e. the last sample is correct. A negative value indicates the number of samples before the
> sample that is a group member that must be decoded in order for recovery to be complete at the
> marked sample."
>
> I should probably just put the above description in the comment since it's quite clear.
>
> Apple documentation only talks about the negative variation. And since the objective of this patch is to fix a sync
> problem with Apple tools, I stuck to what the apple documentation says. I could test it with a positive value to see
> what the apple tools do in this case if you have a preference for writing this with a positive value.

No, I don't have any preference for that - I just found it confusing. The 
description above makes it clear though.

// Martin

Patch

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 4d351b7..a26dcf7 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -136,6 +136,54 @@  static int mov_write_stco_tag(AVIOContext *pb, MOVTrack *track)
     return update_size(pb, pos);
 }
 
+static int mov_write_sgpd_tag(AVIOContext *pb, int16_t roll)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+
+    ffio_wfourcc(pb, "sgpd");
+    avio_w8(pb, 1); /* version */
+    avio_w8(pb, 0); /* flags (1) */
+    avio_wb16(pb, 0); /* flags (2) */
+    ffio_wfourcc(pb, "roll"); /* grouping type */
+    avio_wb32(pb, 2); /* table entry length */
+    avio_wb32(pb, 1); /* table entry count */
+
+    /* table data, roll distance
+     * i.e. number of audio frames to pre-roll after a seek */
+    avio_wb16(pb, roll);
+
+    return update_size(pb, pos);
+}
+
+static int mov_write_sbgp_tag(AVIOContext *pb, MOVTrack *track)
+{
+    int count = 0;
+    int i;
+    int64_t pos;
+
+    for (i = 0; i < track->entry; i++)
+    {
+        count += track->cluster[i].entries;
+    }
+
+    pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+
+    ffio_wfourcc(pb, "sbgp"); /* atom name */
+    avio_wb32(pb, 0); /* version & flags */
+    ffio_wfourcc(pb, "roll"); /* grouping type */
+    avio_wb32(pb, 1); /* table entry count */
+
+    /* table data */
+    avio_wb32(pb, count);
+    /* sgpd table index, index values are 1 based
+     * we write 'roll' sample group at index 1 */
+    avio_wb32(pb, 1);
+
+    return update_size(pb, pos);
+}
+
 /* Sample size atom */
 static int mov_write_stsz_tag(AVIOContext *pb, MOVTrack *track)
 {
@@ -1260,6 +1308,7 @@  static int mov_write_dref_tag(AVIOContext *pb)
 
 static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *track)
 {
+    MOVMuxContext *mov = s->priv_data;
     int64_t pos = avio_tell(pb);
     avio_wb32(pb, 0); /* size */
     ffio_wfourcc(pb, "stbl");
@@ -1277,6 +1326,20 @@  static int mov_write_stbl_tag(AVFormatContext *s, AVIOContext *pb, MOVTrack *tra
     mov_write_stsc_tag(pb, track);
     mov_write_stsz_tag(pb, track);
     mov_write_stco_tag(pb, track);
+
+    if (track->par->codec_id == AV_CODEC_ID_AAC && mov->use_editlist &&
+        track->start_dts != AV_NOPTS_VALUE &&
+        (track->start_dts < 0 || track->par->initial_padding > 0)) {
+        /* Add sgpd and sbgp tags for AAC tracks.
+         * Apple documentation says they use this as a flag to indicate
+         * that AAC encoder delay is explicitely set in the edit list.
+         * If this is omitted, Apple tools will apply an implicit
+         * rule to remove encoder delay samples, which results in
+         * removal of 2 * delay and A/V desync. */
+        mov_write_sgpd_tag(pb, -1);
+        mov_write_sbgp_tag(pb, track);
+    }
+
     return update_size(pb, pos);
 }
 
diff --git a/tests/ref/fate/movenc b/tests/ref/fate/movenc
index a0a1700..05a171d 100644
--- a/tests/ref/fate/movenc
+++ b/tests/ref/fate/movenc
@@ -4,10 +4,10 @@  write_data len 496, time 1000000, type sync atom moof
 write_data len 110, time nopts, type trailer atom -
 07cee26b35b140ae50268c3083e2d880 2449 non-empty-moov
 write_data len 36, time nopts, type header atom ftyp
-write_data len 2135, time nopts, type header atom -
+write_data len 2189, time nopts, type header atom -
 write_data len 616, time 966667, type sync atom moof
 write_data len 110, time nopts, type trailer atom -
-c3c47c2c9566cb410e0832c37f4f8527 2897 non-empty-moov-elst
+6548a2ff72e337dfc1da35482ff6d244 2951 non-empty-moov-elst
 write_data len 36, time nopts, type header atom ftyp
 write_data len 2055, time nopts, type header atom -
 write_data len 616, time 1000000, type sync atom moof
@@ -44,11 +44,11 @@  write_data len 500, time 0, type sync atom moof
 write_data len 496, time 1000000, type sync atom moof
 write_data len 148, time nopts, type trailer atom -
 89ea214e1d1079556164664eea9a7884 2327 delay-moov
-write_data len 1255, time nopts, type header atom ftyp
+write_data len 1309, time nopts, type header atom ftyp
 write_data len 620, time -33333, type sync atom moof
 write_data len 616, time 966667, type sync atom moof
 write_data len 148, time nopts, type trailer atom -
-8fd78a5a91d73a735da53ac02f844177 2639 delay-moov-elst
+795a3f0c76a0e9bc4ac1e3405e1d42cd 2693 delay-moov-elst
 write_data len 1183, time nopts, type header atom ftyp
 write_data len 596, time 0, type sync atom moof
 write_data len 67, time nopts, type trailer atom -
@@ -97,39 +97,39 @@  fdc08ccfb9ca1a7a63a8e82a99e28e83 1207 delay-moov-elst-init-discont
 write_data len 704, time 966667, type sync atom sidx
 41afdc44b0e376fae49a730afe0c53c2 704 delay-moov-elst-second-frag-discont
 write_data len 110, time nopts, type trailer atom -
-write_data len 1243, time nopts, type header atom ftyp
-57c113cd2baf7b231355eee6980fb6b5 1243 delay-moov-elst-signal-init
+write_data len 1297, time nopts, type header atom ftyp
+9b76e16f7f7835b18b461ac2797f10eb 1297 delay-moov-elst-signal-init
 write_data len 708, time -33333, type sync atom sidx
 write_data len 704, time 966667, type sync atom sidx
 13b8487a4f004ec9f1db543aee1e5e18 704 delay-moov-elst-signal-second-frag
 write_data len 148, time nopts, type trailer atom -
-write_data len 1243, time nopts, type header atom ftyp
-57c113cd2baf7b231355eee6980fb6b5 1243 delay-moov-elst-signal-init-discont
+write_data len 1297, time nopts, type header atom ftyp
+9b76e16f7f7835b18b461ac2797f10eb 1297 delay-moov-elst-signal-init-discont
 write_data len 704, time 966667, type sync atom sidx
 13b8487a4f004ec9f1db543aee1e5e18 704 delay-moov-elst-signal-second-frag-discont
 write_data len 110, time nopts, type trailer atom -
-write_data len 1243, time nopts, type header atom ftyp
+write_data len 1297, time nopts, type header atom ftyp
 write_data len 1552, time -333333, type sync atom sidx
 write_data len 704, time 5166667, type sync atom sidx
 write_data len 148, time nopts, type trailer atom -
-5e676152714f9478b5f74ce67cd7ed60 3647 vfr
-write_data len 1243, time nopts, type header atom ftyp
+2d775976518508c7ea633cbe64cd75d2 3701 vfr
+write_data len 1297, time nopts, type header atom ftyp
 write_data len 1552, time -333333, type sync atom sidx
 write_data len 704, time 5166667, type sync atom sidx
 write_data len 148, time nopts, type trailer atom -
-5e676152714f9478b5f74ce67cd7ed60 3647 vfr-noduration
-write_data len 1255, time nopts, type header atom ftyp
+2d775976518508c7ea633cbe64cd75d2 3701 vfr-noduration
+write_data len 1309, time nopts, type header atom ftyp
 write_data len 1500, time -333333, type sync atom moof
 write_data len 620, time nopts, type unknown atom -
 write_data len 1500, time 9666667, type sync atom moof
 write_data len 664, time nopts, type unknown atom -
 write_data len 148, time nopts, type trailer atom -
-03766894d839e5fcb1edb88498d812f7 5687 large_frag
-write_data len 1255, time nopts, type header atom ftyp
+a4b05f1a8273dc7bb4f46744717f9ca9 5741 large_frag
+write_data len 1309, time nopts, type header atom ftyp
 write_data len 508, time -33333, type sync atom moof
 write_data len 372, time 800000, type boundary atom moof
 write_data len 328, time 1266667, type boundary atom moof
 write_data len 476, time 1566667, type sync atom moof
 write_data len 340, time 2233333, type boundary atom moof
 write_data len 262, time nopts, type trailer atom -
-a4280bdc23af2c4334ec2da3fa946d3a 3541 vfr-noduration-interleave
+e605de9fc384d64aacee5ea6844539e6 3595 vfr-noduration-interleave