cpu: split flag checks per arch in av_cpu_max_align()

Message ID 20170828145252.5260-1-jamrial@gmail.com
State New
Headers show

Commit Message

James Almer Aug. 28, 2017, 2:52 p.m.
AV_CPU_FLAG_MMX      == AV_CPU_FLAG_ARMV6 == AV_CPU_FLAG_ALTIVEC
AV_CPU_FLAG_3DNOWEXT == AV_CPU_FLAG_NEON
AV_CPU_FLAG_SSE      == AV_CPU_FLAG_VFP

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavutil/cpu.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

Comments

wm4 Aug. 28, 2017, 3:08 p.m. | #1
On Mon, 28 Aug 2017 11:52:52 -0300
James Almer <jamrial@gmail.com> wrote:

> AV_CPU_FLAG_MMX      == AV_CPU_FLAG_ARMV6 == AV_CPU_FLAG_ALTIVEC
> AV_CPU_FLAG_3DNOWEXT == AV_CPU_FLAG_NEON
> AV_CPU_FLAG_SSE      == AV_CPU_FLAG_VFP
> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavutil/cpu.c | 13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> index 5aef6af21..da857cd2c 100644
> --- a/libavutil/cpu.c
> +++ b/libavutil/cpu.c
> @@ -20,6 +20,7 @@
>  #include <stdint.h>
>  #include <stdatomic.h>
>  
> +#include "attributes.h"
>  #include "cpu.h"
>  #include "cpu_internal.h"
>  #include "config.h"
> @@ -184,12 +185,20 @@ int av_cpu_count(void)
>  
>  size_t av_cpu_max_align(void)
>  {
> -    int flags = av_get_cpu_flags();
> +    int av_unused flags = av_get_cpu_flags();
>  
> +#if ARCH_ARM || ARCH_AARCH64
> +    if (flags & AV_CPU_FLAG_NEON)
> +        return 16;
> +#elif ARCH_PPC
> +    if (flags & AV_CPU_FLAG_ALTIVEC)
> +        return 16;
> +#elif ARCH_X86
>      if (flags & AV_CPU_FLAG_AVX)
>          return 32;
> -    if (flags & (AV_CPU_FLAG_ALTIVEC | AV_CPU_FLAG_SSE | AV_CPU_FLAG_NEON))
> +    if (flags & AV_CPU_FLAG_SSE)
>          return 16;
> +#endif
>  
>      return 8;
>  }

Wouldn't it be better to make the values disjoint?
James Almer Aug. 28, 2017, 3:53 p.m. | #2
On 8/28/2017 12:08 PM, wm4 wrote:
> On Mon, 28 Aug 2017 11:52:52 -0300
> James Almer <jamrial@gmail.com> wrote:
> 
>> AV_CPU_FLAG_MMX      == AV_CPU_FLAG_ARMV6 == AV_CPU_FLAG_ALTIVEC
>> AV_CPU_FLAG_3DNOWEXT == AV_CPU_FLAG_NEON
>> AV_CPU_FLAG_SSE      == AV_CPU_FLAG_VFP
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>  libavutil/cpu.c | 13 +++++++++++--
>>  1 file changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
>> index 5aef6af21..da857cd2c 100644
>> --- a/libavutil/cpu.c
>> +++ b/libavutil/cpu.c
>> @@ -20,6 +20,7 @@
>>  #include <stdint.h>
>>  #include <stdatomic.h>
>>  
>> +#include "attributes.h"
>>  #include "cpu.h"
>>  #include "cpu_internal.h"
>>  #include "config.h"
>> @@ -184,12 +185,20 @@ int av_cpu_count(void)
>>  
>>  size_t av_cpu_max_align(void)
>>  {
>> -    int flags = av_get_cpu_flags();
>> +    int av_unused flags = av_get_cpu_flags();
>>  
>> +#if ARCH_ARM || ARCH_AARCH64
>> +    if (flags & AV_CPU_FLAG_NEON)
>> +        return 16;
>> +#elif ARCH_PPC
>> +    if (flags & AV_CPU_FLAG_ALTIVEC)
>> +        return 16;
>> +#elif ARCH_X86
>>      if (flags & AV_CPU_FLAG_AVX)
>>          return 32;
>> -    if (flags & (AV_CPU_FLAG_ALTIVEC | AV_CPU_FLAG_SSE | AV_CPU_FLAG_NEON))
>> +    if (flags & AV_CPU_FLAG_SSE)
>>          return 16;
>> +#endif
>>  
>>      return 8;
>>  }
> 
> Wouldn't it be better to make the values disjoint?

That requires an ABI break and is not a good solution, especially when
it's only a "problem" in a function like this one outside of arch
specific folders.

Something like

size_t av_cpu_max_align(void)
{
    if (ARCH_AARCH64)
        return ff_get_cpu_max_align_aarch64();
    if (ARCH_ARM)
        return ff_get_cpu_max_align_arm();
    if (ARCH_PPC)
        return ff_get_cpu_max_align_ppc();
    if (ARCH_X86)
        return ff_get_cpu_max_align_x86();
    return 8;
}

Would be more in line with the rest of the codebase, but seems overkill
to me. I can send such an implementation anyway if that's preferred in
any case.
wm4 Aug. 28, 2017, 4:41 p.m. | #3
On Mon, 28 Aug 2017 12:53:09 -0300
James Almer <jamrial@gmail.com> wrote:

> On 8/28/2017 12:08 PM, wm4 wrote:
> > On Mon, 28 Aug 2017 11:52:52 -0300
> > James Almer <jamrial@gmail.com> wrote:
> >   
> >> AV_CPU_FLAG_MMX      == AV_CPU_FLAG_ARMV6 == AV_CPU_FLAG_ALTIVEC
> >> AV_CPU_FLAG_3DNOWEXT == AV_CPU_FLAG_NEON
> >> AV_CPU_FLAG_SSE      == AV_CPU_FLAG_VFP
> >>
> >> Signed-off-by: James Almer <jamrial@gmail.com>
> >> ---
> >>  libavutil/cpu.c | 13 +++++++++++--
> >>  1 file changed, 11 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
> >> index 5aef6af21..da857cd2c 100644
> >> --- a/libavutil/cpu.c
> >> +++ b/libavutil/cpu.c
> >> @@ -20,6 +20,7 @@
> >>  #include <stdint.h>
> >>  #include <stdatomic.h>
> >>  
> >> +#include "attributes.h"
> >>  #include "cpu.h"
> >>  #include "cpu_internal.h"
> >>  #include "config.h"
> >> @@ -184,12 +185,20 @@ int av_cpu_count(void)
> >>  
> >>  size_t av_cpu_max_align(void)
> >>  {
> >> -    int flags = av_get_cpu_flags();
> >> +    int av_unused flags = av_get_cpu_flags();
> >>  
> >> +#if ARCH_ARM || ARCH_AARCH64
> >> +    if (flags & AV_CPU_FLAG_NEON)
> >> +        return 16;
> >> +#elif ARCH_PPC
> >> +    if (flags & AV_CPU_FLAG_ALTIVEC)
> >> +        return 16;
> >> +#elif ARCH_X86
> >>      if (flags & AV_CPU_FLAG_AVX)
> >>          return 32;
> >> -    if (flags & (AV_CPU_FLAG_ALTIVEC | AV_CPU_FLAG_SSE | AV_CPU_FLAG_NEON))
> >> +    if (flags & AV_CPU_FLAG_SSE)
> >>          return 16;
> >> +#endif
> >>  
> >>      return 8;
> >>  }  
> > 
> > Wouldn't it be better to make the values disjoint?  
> 
> That requires an ABI break and is not a good solution, especially when
> it's only a "problem" in a function like this one outside of arch
> specific folders.

Libav recently had an ABI bump. Should still be possible.

I had the same problem when trying to use this function from outside
Libav - I couldn't know on which arch Libav thinks it is, without
duplicating its configure check.
James Almer Aug. 28, 2017, 5:11 p.m. | #4
On 8/28/2017 1:41 PM, wm4 wrote:
> On Mon, 28 Aug 2017 12:53:09 -0300
> James Almer <jamrial@gmail.com> wrote:
> 
>> On 8/28/2017 12:08 PM, wm4 wrote:
>>> On Mon, 28 Aug 2017 11:52:52 -0300
>>> James Almer <jamrial@gmail.com> wrote:
>>>   
>>>> AV_CPU_FLAG_MMX      == AV_CPU_FLAG_ARMV6 == AV_CPU_FLAG_ALTIVEC
>>>> AV_CPU_FLAG_3DNOWEXT == AV_CPU_FLAG_NEON
>>>> AV_CPU_FLAG_SSE      == AV_CPU_FLAG_VFP
>>>>
>>>> Signed-off-by: James Almer <jamrial@gmail.com>
>>>> ---
>>>>  libavutil/cpu.c | 13 +++++++++++--
>>>>  1 file changed, 11 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
>>>> index 5aef6af21..da857cd2c 100644
>>>> --- a/libavutil/cpu.c
>>>> +++ b/libavutil/cpu.c
>>>> @@ -20,6 +20,7 @@
>>>>  #include <stdint.h>
>>>>  #include <stdatomic.h>
>>>>  
>>>> +#include "attributes.h"
>>>>  #include "cpu.h"
>>>>  #include "cpu_internal.h"
>>>>  #include "config.h"
>>>> @@ -184,12 +185,20 @@ int av_cpu_count(void)
>>>>  
>>>>  size_t av_cpu_max_align(void)
>>>>  {
>>>> -    int flags = av_get_cpu_flags();
>>>> +    int av_unused flags = av_get_cpu_flags();
>>>>  
>>>> +#if ARCH_ARM || ARCH_AARCH64
>>>> +    if (flags & AV_CPU_FLAG_NEON)
>>>> +        return 16;
>>>> +#elif ARCH_PPC
>>>> +    if (flags & AV_CPU_FLAG_ALTIVEC)
>>>> +        return 16;
>>>> +#elif ARCH_X86
>>>>      if (flags & AV_CPU_FLAG_AVX)
>>>>          return 32;
>>>> -    if (flags & (AV_CPU_FLAG_ALTIVEC | AV_CPU_FLAG_SSE | AV_CPU_FLAG_NEON))
>>>> +    if (flags & AV_CPU_FLAG_SSE)
>>>>          return 16;
>>>> +#endif
>>>>  
>>>>      return 8;
>>>>  }  
>>>
>>> Wouldn't it be better to make the values disjoint?  
>>
>> That requires an ABI break and is not a good solution, especially when
>> it's only a "problem" in a function like this one outside of arch
>> specific folders.
> 
> Libav recently had an ABI bump. Should still be possible.

That was five months ago.

> 
> I had the same problem when trying to use this function from outside
> Libav - I couldn't know on which arch Libav thinks it is, without
> duplicating its configure check.

This patch should solve that, returning the correct alignment depending
on the arch the library was built for.

In any case, there are 31 bits available for flags and x86 alone uses
23, and that's without AVX512. What you're asking for is kinda
constricting and the supposed benefits doubtful.
James Almer Sept. 13, 2017, 8:11 p.m. | #5
On 8/28/2017 2:11 PM, James Almer wrote:
> On 8/28/2017 1:41 PM, wm4 wrote:
>> On Mon, 28 Aug 2017 12:53:09 -0300
>> James Almer <jamrial@gmail.com> wrote:
>>
>>> On 8/28/2017 12:08 PM, wm4 wrote:
>>>> On Mon, 28 Aug 2017 11:52:52 -0300
>>>> James Almer <jamrial@gmail.com> wrote:
>>>>   
>>>>> AV_CPU_FLAG_MMX      == AV_CPU_FLAG_ARMV6 == AV_CPU_FLAG_ALTIVEC
>>>>> AV_CPU_FLAG_3DNOWEXT == AV_CPU_FLAG_NEON
>>>>> AV_CPU_FLAG_SSE      == AV_CPU_FLAG_VFP
>>>>>
>>>>> Signed-off-by: James Almer <jamrial@gmail.com>
>>>>> ---
>>>>>  libavutil/cpu.c | 13 +++++++++++--
>>>>>  1 file changed, 11 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/libavutil/cpu.c b/libavutil/cpu.c
>>>>> index 5aef6af21..da857cd2c 100644
>>>>> --- a/libavutil/cpu.c
>>>>> +++ b/libavutil/cpu.c
>>>>> @@ -20,6 +20,7 @@
>>>>>  #include <stdint.h>
>>>>>  #include <stdatomic.h>
>>>>>  
>>>>> +#include "attributes.h"
>>>>>  #include "cpu.h"
>>>>>  #include "cpu_internal.h"
>>>>>  #include "config.h"
>>>>> @@ -184,12 +185,20 @@ int av_cpu_count(void)
>>>>>  
>>>>>  size_t av_cpu_max_align(void)
>>>>>  {
>>>>> -    int flags = av_get_cpu_flags();
>>>>> +    int av_unused flags = av_get_cpu_flags();
>>>>>  
>>>>> +#if ARCH_ARM || ARCH_AARCH64
>>>>> +    if (flags & AV_CPU_FLAG_NEON)
>>>>> +        return 16;
>>>>> +#elif ARCH_PPC
>>>>> +    if (flags & AV_CPU_FLAG_ALTIVEC)
>>>>> +        return 16;
>>>>> +#elif ARCH_X86
>>>>>      if (flags & AV_CPU_FLAG_AVX)
>>>>>          return 32;
>>>>> -    if (flags & (AV_CPU_FLAG_ALTIVEC | AV_CPU_FLAG_SSE | AV_CPU_FLAG_NEON))
>>>>> +    if (flags & AV_CPU_FLAG_SSE)
>>>>>          return 16;
>>>>> +#endif
>>>>>  
>>>>>      return 8;
>>>>>  }  
>>>>
>>>> Wouldn't it be better to make the values disjoint?  
>>>
>>> That requires an ABI break and is not a good solution, especially when
>>> it's only a "problem" in a function like this one outside of arch
>>> specific folders.
>>
>> Libav recently had an ABI bump. Should still be possible.
> 
> That was five months ago.
> 
>>
>> I had the same problem when trying to use this function from outside
>> Libav - I couldn't know on which arch Libav thinks it is, without
>> duplicating its configure check.
> 
> This patch should solve that, returning the correct alignment depending
> on the arch the library was built for.
> 
> In any case, there are 31 bits available for flags and x86 alone uses
> 23, and that's without AVX512. What you're asking for is kinda
> constricting and the supposed benefits doubtful.

Ping.
Diego Biurrun Sept. 14, 2017, 9:07 a.m. | #6
On Mon, Aug 28, 2017 at 12:53:09PM -0300, James Almer wrote:
> On 8/28/2017 12:08 PM, wm4 wrote:
> > On Mon, 28 Aug 2017 11:52:52 -0300
> > James Almer <jamrial@gmail.com> wrote:
> >> --- a/libavutil/cpu.c
> >> +++ b/libavutil/cpu.c
> >> @@ -184,12 +185,20 @@ int av_cpu_count(void)
> >>  
> >>  size_t av_cpu_max_align(void)
> >>  {
> >> -    int flags = av_get_cpu_flags();
> >> +    int av_unused flags = av_get_cpu_flags();
> >>  
> >> +#if ARCH_ARM || ARCH_AARCH64
> >> +    if (flags & AV_CPU_FLAG_NEON)
> >> +        return 16;
> >> +#elif ARCH_PPC
> >> +    if (flags & AV_CPU_FLAG_ALTIVEC)
> >> +        return 16;
> >> +#elif ARCH_X86
> >>      if (flags & AV_CPU_FLAG_AVX)
> >>          return 32;
> >> -    if (flags & (AV_CPU_FLAG_ALTIVEC | AV_CPU_FLAG_SSE | AV_CPU_FLAG_NEON))
> >> +    if (flags & AV_CPU_FLAG_SSE)
> >>          return 16;
> >> +#endif
> >>  
> >>      return 8;
> >>  }
> 
> Something like
> 
> size_t av_cpu_max_align(void)
> {
>     if (ARCH_AARCH64)
>         return ff_get_cpu_max_align_aarch64();
>     if (ARCH_ARM)
>         return ff_get_cpu_max_align_arm();
>     if (ARCH_PPC)
>         return ff_get_cpu_max_align_ppc();
>     if (ARCH_X86)
>         return ff_get_cpu_max_align_x86();
>     return 8;
> }
> 
> Would be more in line with the rest of the codebase, but seems overkill
> to me. I can send such an implementation anyway if that's preferred in
> any case.

That would be much better. Your original patch is IMO a step in the wrong
direction. Arch-specific code should not litter the C code in the first
place and your patch produces an ifdef maze on top of that.

Diego
James Almer Sept. 14, 2017, 7:37 p.m. | #7
On 9/14/2017 6:07 AM, Diego Biurrun wrote:
> On Mon, Aug 28, 2017 at 12:53:09PM -0300, James Almer wrote:
>> On 8/28/2017 12:08 PM, wm4 wrote:
>>> On Mon, 28 Aug 2017 11:52:52 -0300
>>> James Almer <jamrial@gmail.com> wrote:
>>>> --- a/libavutil/cpu.c
>>>> +++ b/libavutil/cpu.c
>>>> @@ -184,12 +185,20 @@ int av_cpu_count(void)
>>>>  
>>>>  size_t av_cpu_max_align(void)
>>>>  {
>>>> -    int flags = av_get_cpu_flags();
>>>> +    int av_unused flags = av_get_cpu_flags();
>>>>  
>>>> +#if ARCH_ARM || ARCH_AARCH64
>>>> +    if (flags & AV_CPU_FLAG_NEON)
>>>> +        return 16;
>>>> +#elif ARCH_PPC
>>>> +    if (flags & AV_CPU_FLAG_ALTIVEC)
>>>> +        return 16;
>>>> +#elif ARCH_X86
>>>>      if (flags & AV_CPU_FLAG_AVX)
>>>>          return 32;
>>>> -    if (flags & (AV_CPU_FLAG_ALTIVEC | AV_CPU_FLAG_SSE | AV_CPU_FLAG_NEON))
>>>> +    if (flags & AV_CPU_FLAG_SSE)
>>>>          return 16;
>>>> +#endif
>>>>  
>>>>      return 8;
>>>>  }
>>
>> Something like
>>
>> size_t av_cpu_max_align(void)
>> {
>>     if (ARCH_AARCH64)
>>         return ff_get_cpu_max_align_aarch64();
>>     if (ARCH_ARM)
>>         return ff_get_cpu_max_align_arm();
>>     if (ARCH_PPC)
>>         return ff_get_cpu_max_align_ppc();
>>     if (ARCH_X86)
>>         return ff_get_cpu_max_align_x86();
>>     return 8;
>> }
>>
>> Would be more in line with the rest of the codebase, but seems overkill
>> to me. I can send such an implementation anyway if that's preferred in
>> any case.
> 
> That would be much better. Your original patch is IMO a step in the wrong
> direction. Arch-specific code should not litter the C code in the first
> place and your patch produces an ifdef maze on top of that.

There's a precedent in av_parse_cpu_flags(), which is why
av_cpu_max_align() was written this way i presume. But in any case sure,
I'll split it across arch folders.

> 
> Diego
> _______________________________________________
> libav-devel mailing list
> libav-devel@libav.org
> https://lists.libav.org/mailman/listinfo/libav-devel
>

Patch

diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 5aef6af21..da857cd2c 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -20,6 +20,7 @@ 
 #include <stdint.h>
 #include <stdatomic.h>
 
+#include "attributes.h"
 #include "cpu.h"
 #include "cpu_internal.h"
 #include "config.h"
@@ -184,12 +185,20 @@  int av_cpu_count(void)
 
 size_t av_cpu_max_align(void)
 {
-    int flags = av_get_cpu_flags();
+    int av_unused flags = av_get_cpu_flags();
 
+#if ARCH_ARM || ARCH_AARCH64
+    if (flags & AV_CPU_FLAG_NEON)
+        return 16;
+#elif ARCH_PPC
+    if (flags & AV_CPU_FLAG_ALTIVEC)
+        return 16;
+#elif ARCH_X86
     if (flags & AV_CPU_FLAG_AVX)
         return 32;
-    if (flags & (AV_CPU_FLAG_ALTIVEC | AV_CPU_FLAG_SSE | AV_CPU_FLAG_NEON))
+    if (flags & AV_CPU_FLAG_SSE)
         return 16;
+#endif
 
     return 8;
 }