[18/19] aarch64: vp8: Skip saturating in shrn in ff_vp8_idct_add_neon

Message ID 1549012378-32118-18-git-send-email-martin@martin.st
State Committed
Commit 49f9c4272c4029b57ff300d908ba03c6332fc9c4
Headers show
Series
  • [01/19] libavcodec: vp8 neon optimizations for aarch64
Related show

Commit Message

Martin Storsjö Feb. 1, 2019, 9:12 a.m.
The original arm version didn't do saturation here. This probably
doesn't make any difference for performance, but reduces the
differences.
---
 libavcodec/aarch64/vp8dsp_neon.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Patch

diff --git a/libavcodec/aarch64/vp8dsp_neon.S b/libavcodec/aarch64/vp8dsp_neon.S
index 139b380..cac4558 100644
--- a/libavcodec/aarch64/vp8dsp_neon.S
+++ b/libavcodec/aarch64/vp8dsp_neon.S
@@ -92,8 +92,8 @@  function ff_vp8_idct_add_neon, export=1
         smull           v27.4s, v3.4h,  v4.h[0]
         sqdmulh         v20.4h, v1.4h,  v4.h[1]
         sqdmulh         v23.4h, v3.4h,  v4.h[1]
-        sqshrn          v21.4h, v26.4s, #16
-        sqshrn          v22.4h, v27.4s, #16
+        shrn            v21.4h, v26.4s, #16
+        shrn            v22.4h, v27.4s, #16
         add             v21.4h, v21.4h, v1.4h
         add             v22.4h, v22.4h, v3.4h
 
@@ -117,8 +117,8 @@  function ff_vp8_idct_add_neon, export=1
         st1             {v29.16b},  [x1]
         sqdmulh         v21.4h,     v1.4h,  v4.h[1]
         sqdmulh         v23.4h,     v3.4h,  v4.h[1]
-        sqshrn          v20.4h,     v26.4s, #16
-        sqshrn          v22.4h,     v27.4s, #16
+        shrn            v20.4h,     v26.4s, #16
+        shrn            v22.4h,     v27.4s, #16
         add             v20.4h,     v20.4h, v1.4h
         add             v22.4h,     v22.4h, v3.4h
         add             v16.4h,     v0.4h,  v2.4h