movdec: Parse the sdtp atom

Message ID 1329596013-65883-1-git-send-email-martin@martin.st
State Superseded
Headers show

Commit Message

Martin Storsjö Feb. 18, 2012, 8:13 p.m.
Set keyframe flags based on this atom, too.

This atom contains info about which samples are keyframes in
some files where it isn't indicated in any other way. (Smooth
streaming fragments produced by Wowza and ismv files produced
by Sorenson Squeeze don't indicate any difference at all between
keyframes and nonkeyframes via sample flags.)
---
Updated the subject line, added defines for the bitfields.

 libavformat/isom.h |   11 +++++++++++
 libavformat/mov.c  |   38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 0 deletions(-)

Comments

Yusuke Nakamura Feb. 18, 2012, 9:49 p.m. | #1
2012/2/19 Martin Storsjö <martin@martin.st>

> Set keyframe flags based on this atom, too.
>
> This atom contains info about which samples are keyframes in
> some files where it isn't indicated in any other way. (Smooth
> streaming fragments produced by Wowza and ismv files produced
> by Sorenson Squeeze don't indicate any difference at all between
> keyframes and nonkeyframes via sample flags.)
> ---
> Updated the subject line, added defines for the bitfields.
>
>  libavformat/isom.h |   11 +++++++++++
>  libavformat/mov.c  |   38 ++++++++++++++++++++++++++++++++++++++
>  2 files changed, 49 insertions(+), 0 deletions(-)
>
> diff --git a/libavformat/isom.h b/libavformat/isom.h
> index 32c4b3f..fd457fa 100644
> --- a/libavformat/isom.h
> +++ b/libavformat/isom.h
> @@ -77,6 +77,7 @@ typedef struct {
>     unsigned duration;
>     unsigned size;
>     unsigned flags;
> +    unsigned trun_entries;
>  } MOVFragment;
>
>  typedef struct {
> @@ -181,6 +182,16 @@ void ff_mp4_parse_es_descr(AVIOContext *pb, int
> *es_id);
>  #define MOV_FRAG_SAMPLE_FLAG_DEPENDS_NO                0x02000000
>  #define MOV_FRAG_SAMPLE_FLAG_DEPENDS_YES               0x01000000
>
> +#define MOV_SDTP_DEPENDS_MASK    0x30
> +#define MOV_SDTP_DEPENDS_YES     0x10
> +#define MOV_SDTP_DEPENDS_NO      0x20
> +#define MOV_SDTP_DEPENDED_MASK   0x0c
> +#define MOV_SDTP_DEPENDED_NO     0x04
> +#define MOV_SDTP_DEPENDED_YES    0x08
> +#define MOV_SDTP_REDUNDANCY_MASK 0x03
> +#define MOV_SDTP_REDUNDANCY_YES  0x01
> +#define MOV_SDTP_REDUNDANCY_NO   0x02
> +
>  int ff_mov_read_esds(AVFormatContext *fc, AVIOContext *pb, MOVAtom atom);
>  enum CodecID ff_mov_get_lpcm_codec_id(int bps, int flags);
>
> diff --git a/libavformat/mov.c b/libavformat/mov.c
> index fbc7223..c9e400e 100644
> --- a/libavformat/mov.c
> +++ b/libavformat/mov.c
> @@ -2177,6 +2177,7 @@ static int mov_read_tfhd(MOVContext *c, AVIOContext
> *pb, MOVAtom atom)
>                      avio_rb32(pb) : trex->size;
>     frag->flags    = flags & MOV_TFHD_DEFAULT_FLAGS ?
>                      avio_rb32(pb) : trex->flags;
> +    frag->trun_entries = 0;
>     av_dlog(c->fc, "frag flags 0x%x\n", frag->flags);
>     return 0;
>  }
> @@ -2295,10 +2296,46 @@ static int mov_read_trun(MOVContext *c,
> AVIOContext *pb, MOVAtom atom)
>         sc->data_size += sample_size;
>     }
>     frag->moof_offset = offset;
> +    frag->trun_entries = entries;
>     st->duration = sc->track_end = dts + sc->time_offset;
>     return 0;
>  }
>
> +static int mov_read_sdtp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
> +{
> +    MOVFragment *frag = &c->fragment;
> +    AVStream *st = NULL;
> +    int i;
> +
> +    /* This seems to be used in ismv files, where the entries normally are
> +     * 0x28 for audio samples, 0x24 for video keyframes and 0x14 for video
> +     * nonkeyframes. For some ismv files, this is the only way of
> identifying
> +     * keyframes.
> +     */
> +
> +    for (i = 0; i < c->fc->nb_streams && !st; i++)
> +        if (c->fc->streams[i]->id == frag->track_id)
> +            st = c->fc->streams[i];
> +    if (!st) {
> +        av_log(c->fc, AV_LOG_ERROR,
> +               "could not find corresponding track id %d\n",
> frag->track_id);
> +        return AVERROR_INVALIDDATA;
> +    }
> +    avio_r8(pb); /* version */
> +    avio_rb24(pb); /* flags */
> +
> +    if (frag->trun_entries == atom.size - 4 &&
> +        st->nb_index_entries >= frag->trun_entries) {
> +        for (i = 0; i < frag->trun_entries; i++) {
> +            int entry = st->nb_index_entries - frag->trun_entries + i;
> +            AVIndexEntry *ie = &st->index_entries[entry];
> +            if (avio_r8(pb) & MOV_SDTP_DEPENDS_NO)
> +                ie->flags |= AVINDEX_KEYFRAME;
> +        }
> +    }
> +    return 0;
> +}
> +
>  /* this atom should be null (from specs), but some buggy files put the
> 'moov' atom inside it... */
>  /* like the files created with Adobe Premiere 5.0, for samples see */
>  /* http://graphics.tudelft.nl/~wouter/publications/soundtests/ */
> @@ -2458,6 +2495,7 @@ static const MOVParseTableEntry
> mov_default_parse_table[] = {
>  { MKTAG('c','h','a','p'), mov_read_chap },
>  { MKTAG('t','r','e','x'), mov_read_trex },
>  { MKTAG('t','r','u','n'), mov_read_trun },
> +{ MKTAG('s','d','t','p'), mov_read_sdtp },
>  { MKTAG('u','d','t','a'), mov_read_default },
>  { MKTAG('w','a','v','e'), mov_read_wave },
>  { MKTAG('e','s','d','s'), mov_read_esds },
> --
> 1.7.3.1
>
> _______________________________________________
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel
>


[mov,mp4,m4a,3gp,3g2,mj2 @ 03d22600] could not find corresponding track id 0
[mov,mp4,m4a,3gp,3g2,mj2 @ 03d22600] error reading header: -1094995529

This patch fails to open any ISOBMFF/QTFF with sdtp atom under stbl atom.

sample_depends_on = 2 doesn't mean random accessible point (e.g. H.264
stream can contain non-RAP I-pictures).
How do you distinguish it from the issue for some ismv files?
Martin Storsjö Feb. 18, 2012, 10:15 p.m. | #2
On Sun, 19 Feb 2012, Yusuke Nakamura wrote:

> 2012/2/19 Martin Storsjö <martin@martin.st>
> 
> 
> [mov,mp4,m4a,3gp,3g2,mj2 @ 03d22600] could not find corresponding track id 0
> [mov,mp4,m4a,3gp,3g2,mj2 @ 03d22600] error reading header: -1094995529
> 
> This patch fails to open any ISOBMFF/QTFF with sdtp atom under stbl atom.

Ah, I'll fix that. Currently, this patch only uses this atom if it is 
within a traf, but it should just ignore it if it is found elsewhere. Can 
you share such a file?

> sample_depends_on = 2 doesn't mean random accessible point (e.g. H.264
> stream can contain non-RAP I-pictures).

Hmm, the spec says "2: this sample does not depend on others (I picture);" 
- doesn't that imply that it is randomly accessible as it doesn't depend 
on anything else?

> How do you distinguish it from the issue for some ismv files?

The issue with the ismv files is that normally(?) keyframes in fragmented 
mp4 files are indicated via the sample flags.

For ismv files, the sample flags fields seem to follow some unwritten 
definition. Most ismv files have the sample flags 0x8002 for audio 
samples, 0x4002 for video keyframes and 0x4001 for non keyframes. 
(According to the specs, all these bits are part of 
sample_degradation_priority, but that really doesn't make any sense.) Some 
ismv files have the sample flags 0x4001 for all video samples, and some 
ismv files don't include sample flags at all.

All these files include sdtp atoms though, which seem to mark the 
keyframes just fine, where audio samples are marked as 0x28 
(sample_depends_on=2, sample_is_depended_on=2), video keyframes as 0x24 
(sample_depends_on=2, sample_is_depended_on=1) and video nonkeyframes as 
0x14 (sample_depends_on=1, sample_is_depended_on=1).

Thus, setting the keyframe flag if sample_depends_on=2 seemed like a safe 
solution to me.

// Martin
Yusuke Nakamura Feb. 18, 2012, 11:02 p.m. | #3
2012/2/19 Martin Storsjö <martin@martin.st>

> On Sun, 19 Feb 2012, Yusuke Nakamura wrote:
>
>  sample_depends_on = 2 doesn't mean random accessible point (e.g. H.264
>> stream can contain non-RAP I-pictures).
>>
>
> Hmm, the spec says "2: this sample does not depend on others (I picture);"
> - doesn't that imply that it is randomly accessible as it doesn't depend on
> anything else?
>
>
Let's say I is IDR-picture, i is non-IDR I-picture and P is P-picture,
and consider a coded video sequence I[0]P[1]i[2]P[3] .
Here, P[3] depends on P[1] and P[1] depends on I[0].
Then if you start to decode i[2], P[3] cannot be decodable correctly.
Therefore, i[2] can have sample_depends_on=2 but is not random accessible
sample.
Martin Storsjö Feb. 18, 2012, 11:17 p.m. | #4
On Sun, 19 Feb 2012, Yusuke Nakamura wrote:

> Let's say I is IDR-picture, i is non-IDR I-picture and P is P-picture,
> and consider a coded video sequence I[0]P[1]i[2]P[3] .
> Here, P[3] depends on P[1] and P[1] depends on I[0].
> Then if you start to decode i[2], P[3] cannot be decodable correctly.
> Therefore, i[2] can have sample_depends_on=2 but is not random accessible
> sample.

Ah, I see.

Do you have any better suggestion on how to set the keyframe flag for that 
kind of ismv file, where there this is the only indication of them? 
Without it, stream copy normally fails, and seeking fails.

// Martin

Patch

diff --git a/libavformat/isom.h b/libavformat/isom.h
index 32c4b3f..fd457fa 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -77,6 +77,7 @@  typedef struct {
     unsigned duration;
     unsigned size;
     unsigned flags;
+    unsigned trun_entries;
 } MOVFragment;
 
 typedef struct {
@@ -181,6 +182,16 @@  void ff_mp4_parse_es_descr(AVIOContext *pb, int *es_id);
 #define MOV_FRAG_SAMPLE_FLAG_DEPENDS_NO                0x02000000
 #define MOV_FRAG_SAMPLE_FLAG_DEPENDS_YES               0x01000000
 
+#define MOV_SDTP_DEPENDS_MASK    0x30
+#define MOV_SDTP_DEPENDS_YES     0x10
+#define MOV_SDTP_DEPENDS_NO      0x20
+#define MOV_SDTP_DEPENDED_MASK   0x0c
+#define MOV_SDTP_DEPENDED_NO     0x04
+#define MOV_SDTP_DEPENDED_YES    0x08
+#define MOV_SDTP_REDUNDANCY_MASK 0x03
+#define MOV_SDTP_REDUNDANCY_YES  0x01
+#define MOV_SDTP_REDUNDANCY_NO   0x02
+
 int ff_mov_read_esds(AVFormatContext *fc, AVIOContext *pb, MOVAtom atom);
 enum CodecID ff_mov_get_lpcm_codec_id(int bps, int flags);
 
diff --git a/libavformat/mov.c b/libavformat/mov.c
index fbc7223..c9e400e 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2177,6 +2177,7 @@  static int mov_read_tfhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                      avio_rb32(pb) : trex->size;
     frag->flags    = flags & MOV_TFHD_DEFAULT_FLAGS ?
                      avio_rb32(pb) : trex->flags;
+    frag->trun_entries = 0;
     av_dlog(c->fc, "frag flags 0x%x\n", frag->flags);
     return 0;
 }
@@ -2295,10 +2296,46 @@  static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         sc->data_size += sample_size;
     }
     frag->moof_offset = offset;
+    frag->trun_entries = entries;
     st->duration = sc->track_end = dts + sc->time_offset;
     return 0;
 }
 
+static int mov_read_sdtp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    MOVFragment *frag = &c->fragment;
+    AVStream *st = NULL;
+    int i;
+
+    /* This seems to be used in ismv files, where the entries normally are
+     * 0x28 for audio samples, 0x24 for video keyframes and 0x14 for video
+     * nonkeyframes. For some ismv files, this is the only way of identifying
+     * keyframes.
+     */
+
+    for (i = 0; i < c->fc->nb_streams && !st; i++)
+        if (c->fc->streams[i]->id == frag->track_id)
+            st = c->fc->streams[i];
+    if (!st) {
+        av_log(c->fc, AV_LOG_ERROR,
+               "could not find corresponding track id %d\n", frag->track_id);
+        return AVERROR_INVALIDDATA;
+    }
+    avio_r8(pb); /* version */
+    avio_rb24(pb); /* flags */
+
+    if (frag->trun_entries == atom.size - 4 &&
+        st->nb_index_entries >= frag->trun_entries) {
+        for (i = 0; i < frag->trun_entries; i++) {
+            int entry = st->nb_index_entries - frag->trun_entries + i;
+            AVIndexEntry *ie = &st->index_entries[entry];
+            if (avio_r8(pb) & MOV_SDTP_DEPENDS_NO)
+                ie->flags |= AVINDEX_KEYFRAME;
+        }
+    }
+    return 0;
+}
+
 /* this atom should be null (from specs), but some buggy files put the 'moov' atom inside it... */
 /* like the files created with Adobe Premiere 5.0, for samples see */
 /* http://graphics.tudelft.nl/~wouter/publications/soundtests/ */
@@ -2458,6 +2495,7 @@  static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('c','h','a','p'), mov_read_chap },
 { MKTAG('t','r','e','x'), mov_read_trex },
 { MKTAG('t','r','u','n'), mov_read_trun },
+{ MKTAG('s','d','t','p'), mov_read_sdtp },
 { MKTAG('u','d','t','a'), mov_read_default },
 { MKTAG('w','a','v','e'), mov_read_wave },
 { MKTAG('e','s','d','s'), mov_read_esds },