checkasm: Test sub-idcts for hevc

Message ID 20170412115352.53231-1-martin@martin.st
State Deferred
Headers show

Commit Message

Martin Storsjö April 12, 2017, 11:53 a.m.
This allows testing and benchmarking use of the col_limit parameter
in the idct implementations.
---
I can't say I really understand the logic for setting the col_limit
parameter based on the range on where non-zero coefficients are set,
but this set up at least works with the reference C version of the
idct. (I.e., it gives the same result both if passing the real
col_limit value or block_size, i.e. it doesn't miss anything.)
---
 tests/checkasm/hevc_idct.c | 46 +++++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 15 deletions(-)

Comments

Luca Barbato April 12, 2017, 12:15 p.m. | #1
On 12/04/2017 13:53, Martin Storsjö wrote:
> This allows testing and benchmarking use of the col_limit parameter
> in the idct implementations.
> ---
> I can't say I really understand the logic for setting the col_limit
> parameter based on the range on where non-zero coefficients are set,
> but this set up at least works with the reference C version of the
> idct. (I.e., it gives the same result both if passing the real
> col_limit value or block_size, i.e. it doesn't miss anything.)
> ---
>  tests/checkasm/hevc_idct.c | 46 +++++++++++++++++++++++++++++++---------------
>  1 file changed, 31 insertions(+), 15 deletions(-)
> 

Probably fine.
Martin Storsjö April 12, 2017, 7:06 p.m. | #2
On Wed, 12 Apr 2017, Martin Storsjö wrote:

> This allows testing and benchmarking use of the col_limit parameter
> in the idct implementations.
> ---
> I can't say I really understand the logic for setting the col_limit
> parameter based on the range on where non-zero coefficients are set,
> but this set up at least works with the reference C version of the
> idct. (I.e., it gives the same result both if passing the real
> col_limit value or block_size, i.e. it doesn't miss anything.)
> ---
> tests/checkasm/hevc_idct.c | 46 +++++++++++++++++++++++++++++++---------------
> 1 file changed, 31 insertions(+), 15 deletions(-)
>
> diff --git a/tests/checkasm/hevc_idct.c b/tests/checkasm/hevc_idct.c
> index dd4dc0d064..f8cf4de9f1 100644
> --- a/tests/checkasm/hevc_idct.c
> +++ b/tests/checkasm/hevc_idct.c
> @@ -26,35 +26,51 @@
> 
> #include "checkasm.h"
> 
> -#define randomize_buffers(buf, size)            \
> +#define randomize_buffers(buf, size, sub)       \
>     do {                                        \
> -        int j;                                  \
> +        int j, k;                               \
>         for (j = 0; j < size; j++) {            \
> -            int16_t r = rnd();                  \
> -            AV_WN16A(buf + j, r);               \
> +            for (k = 0; k < size; k++) {        \
> +                int16_t r = 0;                  \
> +                if (j < sub && k < sub)         \
> +                    r = rnd();                  \
> +                AV_WN16A(buf + j * size + k, r);\
> +            }                                   \
>         }                                       \
>     } while (0)
> 
> static void check_idct(HEVCDSPContext h, int bit_depth)
> {
> -    int i;
> +    int i, sub;
>     LOCAL_ALIGNED(32, int16_t, coeffs0, [32 * 32]);
>     LOCAL_ALIGNED(32, int16_t, coeffs1, [32 * 32]);
>
>     for (i = 2; i <= 5; i++) {
>         int block_size = 1 << i;
>         int size = block_size * block_size;
> -        int col_limit = block_size;
> +        int col_limit;
>         declare_func(void, int16_t *coeffs, int col_limit);
> 
> -        randomize_buffers(coeffs0, size);
> -        memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
> -        if (check_func(h.idct[i - 2], "hevc_idct_%dx%d_%d", block_size, block_size, bit_depth)) {
> -            call_ref(coeffs0, col_limit);
> -            call_new(coeffs1, col_limit);
> -            if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
> -                fail();
> -            bench_new(coeffs1, col_limit);
> +        for (sub = 1; sub <= block_size; sub < 4 ? (sub <<= 1) : (sub += 4)) {
> +            int max_xy = sub - 1;
> +            randomize_buffers(coeffs0, block_size, sub);
> +            memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
> +
> +            col_limit = max_xy + max_xy + 4; // last_significant_coeff_x + last_significant_coeff_y + 4

This logic doesn't really match what the real decoder does, so I'd rather 
postpone and skip this patch for now, until someone can actually clarify 
the exact definition of what the col_limit parameter.

// Martin

Patch

diff --git a/tests/checkasm/hevc_idct.c b/tests/checkasm/hevc_idct.c
index dd4dc0d064..f8cf4de9f1 100644
--- a/tests/checkasm/hevc_idct.c
+++ b/tests/checkasm/hevc_idct.c
@@ -26,35 +26,51 @@ 
 
 #include "checkasm.h"
 
-#define randomize_buffers(buf, size)            \
+#define randomize_buffers(buf, size, sub)       \
     do {                                        \
-        int j;                                  \
+        int j, k;                               \
         for (j = 0; j < size; j++) {            \
-            int16_t r = rnd();                  \
-            AV_WN16A(buf + j, r);               \
+            for (k = 0; k < size; k++) {        \
+                int16_t r = 0;                  \
+                if (j < sub && k < sub)         \
+                    r = rnd();                  \
+                AV_WN16A(buf + j * size + k, r);\
+            }                                   \
         }                                       \
     } while (0)
 
 static void check_idct(HEVCDSPContext h, int bit_depth)
 {
-    int i;
+    int i, sub;
     LOCAL_ALIGNED(32, int16_t, coeffs0, [32 * 32]);
     LOCAL_ALIGNED(32, int16_t, coeffs1, [32 * 32]);
 
     for (i = 2; i <= 5; i++) {
         int block_size = 1 << i;
         int size = block_size * block_size;
-        int col_limit = block_size;
+        int col_limit;
         declare_func(void, int16_t *coeffs, int col_limit);
 
-        randomize_buffers(coeffs0, size);
-        memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
-        if (check_func(h.idct[i - 2], "hevc_idct_%dx%d_%d", block_size, block_size, bit_depth)) {
-            call_ref(coeffs0, col_limit);
-            call_new(coeffs1, col_limit);
-            if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
-                fail();
-            bench_new(coeffs1, col_limit);
+        for (sub = 1; sub <= block_size; sub < 4 ? (sub <<= 1) : (sub += 4)) {
+            int max_xy = sub - 1;
+            randomize_buffers(coeffs0, block_size, sub);
+            memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
+
+            col_limit = max_xy + max_xy + 4; // last_significant_coeff_x + last_significant_coeff_y + 4
+            if (max_xy < 4)
+                col_limit = FFMIN(4, col_limit);
+            else if (max_xy < 8)
+                col_limit = FFMIN(8, col_limit);
+            else if (max_xy < 12)
+                col_limit = FFMIN(24, col_limit);
+
+            if (check_func(h.idct[i - 2], "hevc_idct_%dx%d_sub%d_%d", block_size, block_size, sub, bit_depth)) {
+                call_ref(coeffs0, col_limit);
+                call_new(coeffs1, col_limit);
+                if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size))
+                    fail();
+                bench_new(coeffs1, col_limit);
+            }
         }
     }
 }
@@ -70,7 +86,7 @@  static void check_idct_dc(HEVCDSPContext h, int bit_depth)
         int size = block_size * block_size;
         declare_func_emms(AV_CPU_FLAG_MMXEXT, void, int16_t *coeffs);
 
-        randomize_buffers(coeffs0, size);
+        randomize_buffers(coeffs0, block_size, 1);
         memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size);
 
         if (check_func(h.idct_dc[i - 2], "hevc_idct_%dx%d_dc_%d", block_size, block_size, bit_depth)) {