[RFC] avcodec: Add an option for trimming out the audio encoder preroll from the content itself

Message ID 20170515111653.4592-1-martin@martin.st
State New
Headers show

Commit Message

Martin Storsjö May 15, 2017, 11:16 a.m.
This avoids extending the encoded audio, while compromising the
content of the start of the audio stream.

This allows e.g. encoding audio without shifting data forwards, for
containers that lack signalling of the preroll (or for e.g. avoiding
edit lists in mp4, to ease conformance with the DASH-IF interoperability
guidelines).

This is only implemented in avcodec_encode_audio2 right now, not
in avcodec_send_frame (no encoders currently implement the
send_frame function).
---
 libavcodec/avcodec.h       |  2 ++
 libavcodec/encode.c        | 74 +++++++++++++++++++++++++++++++++++++++++++++-
 libavcodec/internal.h      |  9 ++++++
 libavcodec/options_table.h |  1 +
 libavcodec/utils.c         | 28 ++++++++++++++++++
 5 files changed, 113 insertions(+), 1 deletion(-)

Comments

Luca Barbato May 15, 2017, 1:37 p.m. | #1
On 5/15/17 1:16 PM, Martin Storsjö wrote:
> +        struct AudioFrameBuffer *cur_buffer, *next_buffer;
> +        cur_buffer  = &avctx->internal->audio_frames[avctx->internal->cur_audio_frame];
> +        next_buffer = &avctx->internal->audio_frames[!avctx->internal->cur_audio_frame];
> +        if (frame) {
> +            if (avctx->internal->samples_to_skip >= frame->nb_samples) {
> +                avctx->internal->samples_to_skip -= frame->nb_samples;
> +                av_packet_unref(avpkt);
> +                av_init_packet(avpkt);
> +                return 0;
> +            }
> +            if (avctx->internal->samples_to_skip || cur_buffer->nb_samples) {
> +                int src_offset = 0;
> +                int samples;
> +
> +                if (avctx->internal->samples_to_skip) {
> +                    src_offset = avctx->internal->samples_to_skip;
> +                    avctx->internal->samples_to_skip = 0;
> +                }
> +
> +                if (cur_buffer->nb_samples == 0) {
> +                    cur_buffer->pts = frame->pts + av_rescale_q(src_offset, avctx->time_base, (AVRational){ 1, avctx->sample_rate });
> +                }
> +                samples = FFMIN(avctx->frame_size - cur_buffer->nb_samples,
> +                                frame->nb_samples - src_offset);
> +                av_samples_copy(cur_buffer->data, frame->extended_data, cur_buffer->nb_samples, src_offset, samples, avctx->channels, avctx->sample_fmt);
> +                cur_buffer->nb_samples += samples;
> +                src_offset += samples;
> +                if (cur_buffer->nb_samples != avctx->frame_size) {
> +                    av_packet_unref(avpkt);
> +                    av_init_packet(avpkt);
> +                    return 0;
> +                }
> +                tmp2               = *frame;
> +                tmp2.extended_data = cur_buffer->data;
> +                tmp2.nb_samples    = avctx->frame_size;
> +                tmp2.pts           = cur_buffer->pts;
> +                memcpy(tmp2.data, tmp2.extended_data,
> +                       FFMIN(AV_NUM_DATA_POINTERS, avctx->channels) * sizeof(uint8_t*));
> +
> +                avctx->internal->cur_audio_frame = !avctx->internal->cur_audio_frame;
> +                next_buffer->nb_samples = 0;
> +
> +                if (src_offset < frame->nb_samples) {
> +                    samples = FFMIN(frame->nb_samples - src_offset, avctx->frame_size); // This should always be less than avctx->frame_size
> +                    next_buffer->pts = frame->pts + av_rescale_q(src_offset, avctx->time_base, (AVRational){ 1, avctx->sample_rate });
> +                    av_samples_copy(next_buffer->data, frame->extended_data, next_buffer->nb_samples, src_offset, samples, avctx->channels, avctx->sample_fmt);
> +                    next_buffer->nb_samples += samples;
> +                }
> +
> +                frame = &tmp2;
> +            }
> +        }
> +        if (!frame && cur_buffer->nb_samples > 0) {
> +            memset(&tmp2, 0, sizeof(tmp2));
> +            tmp2.linesize[0]    = cur_buffer->linesize[0];
> +            tmp2.extended_data  = cur_buffer->data;
> +            tmp2.nb_samples     = cur_buffer->nb_samples;
> +            tmp2.format         = avctx->sample_fmt;
> +            tmp2.sample_rate    = avctx->sample_rate;
> +            tmp2.channel_layout = avctx->channel_layout;
> +            tmp2.pts = cur_buffer->pts;
> +            memcpy(tmp2.data, tmp2.extended_data,
> +                   FFMIN(AV_NUM_DATA_POINTERS, avctx->channels) * sizeof(uint8_t*));
> +            cur_buffer->nb_samples = 0;
> +            frame = &tmp2;
> +        }

This should be a stand alone function, same for the one in utils.

Beside that I'd look again on how you are feeding out the last samples
since I'm not sure it is the best way to do that.

lu

Patch

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 162f1abe4b..eec7eb5e76 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2725,6 +2725,8 @@  typedef struct AVCodecContext {
      *             AVCodecContext.get_format callback)
      */
     int hwaccel_flags;
+
+    int trim_preroll;
 } AVCodecContext;
 
 /**
diff --git a/libavcodec/encode.c b/libavcodec/encode.c
index 9bb7ae5bde..241b8448b4 100644
--- a/libavcodec/encode.c
+++ b/libavcodec/encode.c
@@ -18,8 +18,11 @@ 
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <string.h>
+
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
+#include "libavutil/common.h"
 #include "libavutil/frame.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
@@ -92,7 +95,7 @@  int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
                                               const AVFrame *frame,
                                               int *got_packet_ptr)
 {
-    AVFrame tmp;
+    AVFrame tmp, tmp2;
     AVFrame *padded_frame = NULL;
     int ret;
     int user_packet = !!avpkt->data;
@@ -133,6 +136,75 @@  int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
             avctx->audio_service_type = *(enum AVAudioServiceType*)sd->data;
     }
 
+    if (avctx->trim_preroll) {
+        struct AudioFrameBuffer *cur_buffer, *next_buffer;
+        cur_buffer  = &avctx->internal->audio_frames[avctx->internal->cur_audio_frame];
+        next_buffer = &avctx->internal->audio_frames[!avctx->internal->cur_audio_frame];
+        if (frame) {
+            if (avctx->internal->samples_to_skip >= frame->nb_samples) {
+                avctx->internal->samples_to_skip -= frame->nb_samples;
+                av_packet_unref(avpkt);
+                av_init_packet(avpkt);
+                return 0;
+            }
+            if (avctx->internal->samples_to_skip || cur_buffer->nb_samples) {
+                int src_offset = 0;
+                int samples;
+
+                if (avctx->internal->samples_to_skip) {
+                    src_offset = avctx->internal->samples_to_skip;
+                    avctx->internal->samples_to_skip = 0;
+                }
+
+                if (cur_buffer->nb_samples == 0) {
+                    cur_buffer->pts = frame->pts + av_rescale_q(src_offset, avctx->time_base, (AVRational){ 1, avctx->sample_rate });
+                }
+                samples = FFMIN(avctx->frame_size - cur_buffer->nb_samples,
+                                frame->nb_samples - src_offset);
+                av_samples_copy(cur_buffer->data, frame->extended_data, cur_buffer->nb_samples, src_offset, samples, avctx->channels, avctx->sample_fmt);
+                cur_buffer->nb_samples += samples;
+                src_offset += samples;
+                if (cur_buffer->nb_samples != avctx->frame_size) {
+                    av_packet_unref(avpkt);
+                    av_init_packet(avpkt);
+                    return 0;
+                }
+                tmp2               = *frame;
+                tmp2.extended_data = cur_buffer->data;
+                tmp2.nb_samples    = avctx->frame_size;
+                tmp2.pts           = cur_buffer->pts;
+                memcpy(tmp2.data, tmp2.extended_data,
+                       FFMIN(AV_NUM_DATA_POINTERS, avctx->channels) * sizeof(uint8_t*));
+
+                avctx->internal->cur_audio_frame = !avctx->internal->cur_audio_frame;
+                next_buffer->nb_samples = 0;
+
+                if (src_offset < frame->nb_samples) {
+                    samples = FFMIN(frame->nb_samples - src_offset, avctx->frame_size); // This should always be less than avctx->frame_size
+                    next_buffer->pts = frame->pts + av_rescale_q(src_offset, avctx->time_base, (AVRational){ 1, avctx->sample_rate });
+                    av_samples_copy(next_buffer->data, frame->extended_data, next_buffer->nb_samples, src_offset, samples, avctx->channels, avctx->sample_fmt);
+                    next_buffer->nb_samples += samples;
+                }
+
+                frame = &tmp2;
+            }
+        }
+        if (!frame && cur_buffer->nb_samples > 0) {
+            memset(&tmp2, 0, sizeof(tmp2));
+            tmp2.linesize[0]    = cur_buffer->linesize[0];
+            tmp2.extended_data  = cur_buffer->data;
+            tmp2.nb_samples     = cur_buffer->nb_samples;
+            tmp2.format         = avctx->sample_fmt;
+            tmp2.sample_rate    = avctx->sample_rate;
+            tmp2.channel_layout = avctx->channel_layout;
+            tmp2.pts = cur_buffer->pts;
+            memcpy(tmp2.data, tmp2.extended_data,
+                   FFMIN(AV_NUM_DATA_POINTERS, avctx->channels) * sizeof(uint8_t*));
+            cur_buffer->nb_samples = 0;
+            frame = &tmp2;
+        }
+    }
+
     /* check for valid frame size */
     if (frame) {
         if (avctx->codec->capabilities & AV_CODEC_CAP_SMALL_LAST_FRAME) {
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index 403fb4a090..d363f08011 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -180,6 +180,15 @@  typedef struct AVCodecInternal {
      * of the packet (that should be submitted in the next decode call */
     size_t compat_decode_partial_size;
     AVFrame *compat_decode_frame;
+
+    int samples_to_skip;
+    struct AudioFrameBuffer {
+       uint8_t **data;
+       int *linesize;
+       int64_t pts;
+       int nb_samples;
+    } audio_frames[2];
+    int cur_audio_frame;
 } AVCodecInternal;
 
 struct AVCodecDefault {
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 925ef376f3..a015bb609b 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -419,6 +419,7 @@  static const AVOption avcodec_options[] = {
 {"side_data_only_packets", NULL, OFFSET(side_data_only_packets), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, A|V|E },
 #endif
 {"apply_cropping", NULL, OFFSET(apply_cropping), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, V | D },
+{"trim_preroll", NULL, OFFSET(trim_preroll), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, A | E },
 {NULL},
 };
 
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index bc421f67f8..304034ccbc 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -646,6 +646,27 @@  FF_ENABLE_DEPRECATION_WARNINGS
         }
     }
 
+    if (avctx->trim_preroll) {
+        int i;
+        for (i = 0; i < 2; i++) {
+            avctx->internal->audio_frames[i].data = av_malloc_array(avctx->channels, sizeof(uint8_t *));
+            avctx->internal->audio_frames[i].linesize = av_malloc_array(avctx->channels, sizeof(int *));
+            if (!avctx->internal->audio_frames[i].data ||
+                !avctx->internal->audio_frames[i].linesize) {
+                ret = AVERROR(ENOMEM);
+                goto free_and_end;
+            }
+            ret = av_samples_alloc(avctx->internal->audio_frames[i].data,
+                                   avctx->internal->audio_frames[i].linesize,
+                                   avctx->channels, avctx->frame_size,
+                                   avctx->sample_fmt, 0);
+            if (ret < 0)
+                goto free_and_end;
+        }
+        avctx->internal->samples_to_skip = avctx->initial_padding;
+        avctx->initial_padding = 0;
+    }
+
     if (av_codec_is_decoder(avctx->codec)) {
         /* validate channel layout from the decoder */
         if (avctx->channel_layout) {
@@ -761,6 +782,13 @@  av_cold int avcodec_close(AVCodecContext *avctx)
 
         ff_decode_bsfs_uninit(avctx);
 
+        for (i = 0; i < 2; i++) {
+            if (avctx->internal->audio_frames[i].data)
+                av_free(avctx->internal->audio_frames[i].data[0]);
+            av_free(avctx->internal->audio_frames[i].data);
+            av_free(avctx->internal->audio_frames[i].linesize);
+        }
+
         av_freep(&avctx->internal);
     }