[v2,3/3] omx: Add support for zerocopy input of frames

Message ID 1460029429-26222-3-git-send-email-martin@martin.st
State Superseded
Headers show

Commit Message

Martin Storsjö April 7, 2016, 11:43 a.m.
This can only be used if the input data happens to be laid out
exactly correctly.

This might not be supported on all encoders, so only enable it
with an option, but enable it automatically on raspberry pi,
where it is known to be supported.
---
Updated for using imgutils.h.
---
 libavcodec/omx.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 82 insertions(+), 3 deletions(-)

Comments

Mark Thompson April 9, 2016, 7:28 p.m. | #1
On 07/04/16 12:43, Martin Storsjö wrote:
> ...
> @@ -679,6 +698,7 @@ static int omx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
>      if (frame) {
>          uint8_t *dst[4];
>          int linesize[4];
> +        int need_copy;
>          pthread_mutex_lock(&s->input_mutex);
>          while (!s->num_free_in_buffers)
>              pthread_cond_wait(&s->input_cond, &s->input_mutex);
> @@ -686,7 +706,64 @@ static int omx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
>          pthread_mutex_unlock(&s->input_mutex);
>  
>          buffer->nFilledLen = av_image_fill_arrays(dst, linesize, buffer->pBuffer, avctx->pix_fmt, s->stride, s->plane_size, 1);
> -        av_image_copy(dst, linesize, (const uint8_t**) frame->data, frame->linesize, avctx->pix_fmt, avctx->width, avctx->height);
> +
> +        if (s->input_zerocopy) {
> +            int error = 0;
> +            // We release the previous buffer attached to the OMX buffer header
> +            // only here when we want to reuse it; we could also release it
> +            // in the emptyBufferDone callback.

Doing it in the EmptyBufferDone callback does seem preferable.  I'm imagining deadlock possibilities if the source doesn't have many buffers and waits for one to become available.

> +            if (buffer->pAppPrivate) {
> +                if (buffer->pOutputPortPrivate)
> +                    av_free(buffer->pAppPrivate);
> +                else
> +                    av_frame_free((AVFrame**)&buffer->pAppPrivate);
> +                buffer->pAppPrivate = NULL;
> +            }
> +            // YUV420P only, for now
> +            if (frame->buf[0] && !frame->buf[1] && !frame->buf[2] &&

Not sure this part of the check is useful (the user could be filling buf[n] with whatever referencing information they want).

> +                frame->linesize[0] == linesize[0] &&
> +                frame->linesize[1] == linesize[1] &&
> +                frame->data[0] + s->plane_size*linesize[0] == frame->data[1] &&
> +                frame->data[1] + s->plane_size/2*linesize[1] == frame->data[2]) {
> +                // If the input frame happens to have all planes stored contiguously,
> +                // with the right strides, just clone the frame and set the OMX
> +                // buffer header to point to it
> +                AVFrame *local = av_frame_clone(frame);
> +                if (!local) {
> +                    error = 1;
> +                } else {
> +                    buffer->pAppPrivate = local;
> +                    buffer->pOutputPortPrivate = NULL;
> +                    buffer->pBuffer = local->data[0];
> +                    need_copy = 0;
> +                }
> +            } else {
> +                // If not, we need to allocate a new buffer with the right
> +                // size and copy the input frame into it.
> +                uint8_t *buf = av_malloc(av_image_get_buffer_size(avctx->pix_fmt, s->stride, s->plane_size, 1));
> +                if (!buf) {
> +                    error = 1;
> +                } else {
> +                    buffer->pAppPrivate = buf;
> +                    buffer->pOutputPortPrivate = (void*) 1;
> +                    buffer->pBuffer = buf;
> +                    need_copy = 1;
> +                    buffer->nFilledLen = av_image_fill_arrays(dst, linesize, buffer->pBuffer, avctx->pix_fmt, s->stride, s->plane_size, 1);
> +                }
> +            }
> +            if (error) {
> +                // If we failed, return the buffer to the queue so it's not
> +                // lost
> +                pthread_mutex_lock(&s->input_mutex);
> +                s->free_in_buffers[s->num_free_in_buffers++] = buffer;
> +                pthread_mutex_unlock(&s->input_mutex);
> +                return AVERROR(ENOMEM);
> +            }
> +        } else {
> +            need_copy = 1;
> +        }
> +        if (need_copy)
> +            av_image_copy(dst, linesize, (const uint8_t**) frame->data, frame->linesize, avctx->pix_fmt, avctx->width, avctx->height);
>          buffer->nFlags = OMX_BUFFERFLAG_ENDOFFRAME;
>          buffer->nOffset = 0;
>          // Convert the timestamps to microseconds; some encoders can ignore
> @@ -770,9 +847,11 @@ static av_cold int omx_encode_end(AVCodecContext *avctx)
>  
>  #define OFFSET(x) offsetof(OMXCodecContext, x)
>  #define VDE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_ENCODING_PARAM
> +#define VE  AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
>  static const AVOption options[] = {
>      { "omx_libname", "OpenMAX library name", OFFSET(libname), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VDE },
>      { "omx_libprefix", "OpenMAX library prefix", OFFSET(libprefix), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VDE },
> +    { "zerocopy", "Try to avoid copying input frames if possible", OFFSET(input_zerocopy), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
>      { NULL }
>  };
>  

Otherwise looks good.

Thanks,

- Mark
Martin Storsjö April 9, 2016, 8:36 p.m. | #2
On Sat, 9 Apr 2016, Mark Thompson wrote:

> On 07/04/16 12:43, Martin Storsjö wrote:
>> ...
>> @@ -679,6 +698,7 @@ static int omx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
>>      if (frame) {
>>          uint8_t *dst[4];
>>          int linesize[4];
>> +        int need_copy;
>>          pthread_mutex_lock(&s->input_mutex);
>>          while (!s->num_free_in_buffers)
>>              pthread_cond_wait(&s->input_cond, &s->input_mutex);
>> @@ -686,7 +706,64 @@ static int omx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
>>          pthread_mutex_unlock(&s->input_mutex);
>>
>>          buffer->nFilledLen = av_image_fill_arrays(dst, linesize, buffer->pBuffer, avctx->pix_fmt, s->stride, s->plane_size, 1);
>> -        av_image_copy(dst, linesize, (const uint8_t**) frame->data, frame->linesize, avctx->pix_fmt, avctx->width, avctx->height);
>> +
>> +        if (s->input_zerocopy) {
>> +            int error = 0;
>> +            // We release the previous buffer attached to the OMX buffer header
>> +            // only here when we want to reuse it; we could also release it
>> +            // in the emptyBufferDone callback.
>
> Doing it in the EmptyBufferDone callback does seem preferable.  I'm imagining deadlock possibilities if the source doesn't have many buffers and waits for one to become available.
>
>> +            if (buffer->pAppPrivate) {
>> +                if (buffer->pOutputPortPrivate)
>> +                    av_free(buffer->pAppPrivate);
>> +                else
>> +                    av_frame_free((AVFrame**)&buffer->pAppPrivate);
>> +                buffer->pAppPrivate = NULL;
>> +            }
>> +            // YUV420P only, for now
>> +            if (frame->buf[0] && !frame->buf[1] && !frame->buf[2] &&
>
> Not sure this part of the check is useful (the user could be filling buf[n] with whatever referencing information they want).

Right - yes, I don't think it's necessary given how it currently works.

// Martin

Patch

diff --git a/libavcodec/omx.c b/libavcodec/omx.c
index da73762..2206c97 100644
--- a/libavcodec/omx.c
+++ b/libavcodec/omx.c
@@ -232,6 +232,8 @@  typedef struct OMXCodecContext {
 
     uint8_t *output_buf;
     int output_buf_size;
+
+    int input_zerocopy;
 } OMXCodecContext;
 
 static OMX_ERRORTYPE event_handler(OMX_HANDLETYPE component, OMX_PTR app_data, OMX_EVENTTYPE event, OMX_U32 data1, OMX_U32 data2, OMX_PTR event_data)
@@ -500,8 +502,14 @@  static av_cold int omx_component_init(AVCodecContext *avctx, const char *role)
     s->done_out_buffers   = av_mallocz(sizeof(OMX_BUFFERHEADERTYPE*) * s->num_out_buffers);
     if (!s->in_buffer_headers || !s->free_in_buffers || !s->out_buffer_headers || !s->done_out_buffers)
         return AVERROR(ENOMEM);
-    for (i = 0; i < s->num_in_buffers && err == OMX_ErrorNone; i++)
-        err = OMX_AllocateBuffer(s->handle, &s->in_buffer_headers[i],  s->in_port,  s, in_port_params.nBufferSize);
+    for (i = 0; i < s->num_in_buffers && err == OMX_ErrorNone; i++) {
+        if (s->input_zerocopy)
+            err = OMX_UseBuffer(s->handle, &s->in_buffer_headers[i], s->in_port, s, in_port_params.nBufferSize, NULL);
+        else
+            err = OMX_AllocateBuffer(s->handle, &s->in_buffer_headers[i],  s->in_port,  s, in_port_params.nBufferSize);
+        if (err == OMX_ErrorNone)
+            s->in_buffer_headers[i]->pAppPrivate = s->in_buffer_headers[i]->pOutputPortPrivate = NULL;
+    }
     CHECK(err);
     s->num_in_buffers = i;
     for (i = 0; i < s->num_out_buffers && err == OMX_ErrorNone; i++)
@@ -548,6 +556,13 @@  static av_cold void cleanup(OMXCodecContext *s)
             s->num_free_in_buffers--;
             memmove(&s->free_in_buffers[0], &s->free_in_buffers[1], s->num_free_in_buffers * sizeof(OMX_BUFFERHEADERTYPE*));
             pthread_mutex_unlock(&s->input_mutex);
+            if (s->input_zerocopy && buffer->pAppPrivate) {
+                if (buffer->pOutputPortPrivate)
+                    av_free(buffer->pAppPrivate);
+                else
+                    av_frame_free((AVFrame**)&buffer->pAppPrivate);
+                buffer->pBuffer = NULL;
+            }
             OMX_FreeBuffer(s->handle, s->in_port, buffer);
         }
         for (i = 0; i < s->num_out_buffers; i++) {
@@ -589,6 +604,10 @@  static av_cold int omx_encode_init(AVCodecContext *avctx)
     const char *role;
     OMX_BUFFERHEADERTYPE *buffer;
 
+#if CONFIG_RPI_OMX
+    s->input_zerocopy = 1;
+#endif
+
     if ((ret = omx_init(avctx, s->libname, s->libprefix)) < 0)
         return ret;
 
@@ -679,6 +698,7 @@  static int omx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     if (frame) {
         uint8_t *dst[4];
         int linesize[4];
+        int need_copy;
         pthread_mutex_lock(&s->input_mutex);
         while (!s->num_free_in_buffers)
             pthread_cond_wait(&s->input_cond, &s->input_mutex);
@@ -686,7 +706,64 @@  static int omx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         pthread_mutex_unlock(&s->input_mutex);
 
         buffer->nFilledLen = av_image_fill_arrays(dst, linesize, buffer->pBuffer, avctx->pix_fmt, s->stride, s->plane_size, 1);
-        av_image_copy(dst, linesize, (const uint8_t**) frame->data, frame->linesize, avctx->pix_fmt, avctx->width, avctx->height);
+
+        if (s->input_zerocopy) {
+            int error = 0;
+            // We release the previous buffer attached to the OMX buffer header
+            // only here when we want to reuse it; we could also release it
+            // in the emptyBufferDone callback.
+            if (buffer->pAppPrivate) {
+                if (buffer->pOutputPortPrivate)
+                    av_free(buffer->pAppPrivate);
+                else
+                    av_frame_free((AVFrame**)&buffer->pAppPrivate);
+                buffer->pAppPrivate = NULL;
+            }
+            // YUV420P only, for now
+            if (frame->buf[0] && !frame->buf[1] && !frame->buf[2] &&
+                frame->linesize[0] == linesize[0] &&
+                frame->linesize[1] == linesize[1] &&
+                frame->data[0] + s->plane_size*linesize[0] == frame->data[1] &&
+                frame->data[1] + s->plane_size/2*linesize[1] == frame->data[2]) {
+                // If the input frame happens to have all planes stored contiguously,
+                // with the right strides, just clone the frame and set the OMX
+                // buffer header to point to it
+                AVFrame *local = av_frame_clone(frame);
+                if (!local) {
+                    error = 1;
+                } else {
+                    buffer->pAppPrivate = local;
+                    buffer->pOutputPortPrivate = NULL;
+                    buffer->pBuffer = local->data[0];
+                    need_copy = 0;
+                }
+            } else {
+                // If not, we need to allocate a new buffer with the right
+                // size and copy the input frame into it.
+                uint8_t *buf = av_malloc(av_image_get_buffer_size(avctx->pix_fmt, s->stride, s->plane_size, 1));
+                if (!buf) {
+                    error = 1;
+                } else {
+                    buffer->pAppPrivate = buf;
+                    buffer->pOutputPortPrivate = (void*) 1;
+                    buffer->pBuffer = buf;
+                    need_copy = 1;
+                    buffer->nFilledLen = av_image_fill_arrays(dst, linesize, buffer->pBuffer, avctx->pix_fmt, s->stride, s->plane_size, 1);
+                }
+            }
+            if (error) {
+                // If we failed, return the buffer to the queue so it's not
+                // lost
+                pthread_mutex_lock(&s->input_mutex);
+                s->free_in_buffers[s->num_free_in_buffers++] = buffer;
+                pthread_mutex_unlock(&s->input_mutex);
+                return AVERROR(ENOMEM);
+            }
+        } else {
+            need_copy = 1;
+        }
+        if (need_copy)
+            av_image_copy(dst, linesize, (const uint8_t**) frame->data, frame->linesize, avctx->pix_fmt, avctx->width, avctx->height);
         buffer->nFlags = OMX_BUFFERFLAG_ENDOFFRAME;
         buffer->nOffset = 0;
         // Convert the timestamps to microseconds; some encoders can ignore
@@ -770,9 +847,11 @@  static av_cold int omx_encode_end(AVCodecContext *avctx)
 
 #define OFFSET(x) offsetof(OMXCodecContext, x)
 #define VDE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+#define VE  AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
     { "omx_libname", "OpenMAX library name", OFFSET(libname), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VDE },
     { "omx_libprefix", "OpenMAX library prefix", OFFSET(libprefix), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VDE },
+    { "zerocopy", "Try to avoid copying input frames if possible", OFFSET(input_zerocopy), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
     { NULL }
 };