OpenShot Audio Library | OpenShotAudio  0.3.2
juce_neon_SIMDNativeOps.h
1 /*
2  ==============================================================================
3 
4  This file is part of the JUCE library.
5  Copyright (c) 2017 - ROLI Ltd.
6 
7  JUCE is an open source library subject to commercial or open-source
8  licensing.
9 
10  By using JUCE, you agree to the terms of both the JUCE 5 End-User License
11  Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
12  27th April 2017).
13 
14  End User License Agreement: www.juce.com/juce-5-licence
15  Privacy Policy: www.juce.com/juce-5-privacy-policy
16 
17  Or: You may also use this code under the terms of the GPL v3 (see
18  www.gnu.org/licenses).
19 
20  JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
21  EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
22  DISCLAIMED.
23 
24  ==============================================================================
25 */
26 
27 namespace juce
28 {
29 namespace dsp
30 {
31 
32 #ifndef DOXYGEN
33 
34 #if JUCE_GCC && (__GNUC__ >= 6)
35  #pragma GCC diagnostic push
36  #pragma GCC diagnostic ignored "-Wignored-attributes"
37 #endif
38 
39 #ifdef _MSC_VER
40  #define DECLARE_NEON_SIMD_CONST(type, name) \
41  static __declspec(align(16)) const type name [16 / sizeof (type)]
42 
43  #define DEFINE_NEON_SIMD_CONST(type, class_type, name) \
44  __declspec(align(16)) const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)]
45 
46 #else
47  #define DECLARE_NEON_SIMD_CONST(type, name) \
48  static const type name [16 / sizeof (type)] __attribute__((aligned(16)))
49 
50  #define DEFINE_NEON_SIMD_CONST(type, class_type, name) \
51  const type SIMDNativeOps<class_type>:: name [16 / sizeof (type)] __attribute__((aligned(16)))
52 
53 #endif
54 
55 template <typename type>
56 struct SIMDNativeOps;
57 
58 //==============================================================================
63 template <>
64 struct SIMDNativeOps<uint32_t>
65 {
66  //==============================================================================
67  using vSIMDType = uint32x4_t;
68  using fb = SIMDFallbackOps<uint32_t, vSIMDType>;
69 
70  //==============================================================================
71  DECLARE_NEON_SIMD_CONST (uint32_t, kAllBitsSet);
72 
73  //==============================================================================
74  static forcedinline vSIMDType expand (uint32_t s) noexcept { return vdupq_n_u32 (s); }
75  static forcedinline vSIMDType load (const uint32_t* a) noexcept { return vld1q_u32 (a); }
76  static forcedinline void store (vSIMDType value, uint32_t* a) noexcept { vst1q_u32 (a, value); }
77  static forcedinline uint32_t get (vSIMDType v, size_t i) noexcept { return v[i]; }
78  static forcedinline vSIMDType set (vSIMDType v, size_t i, uint32_t s) noexcept { v[i] = s; return v; }
79  static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u32 (a, b); }
80  static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u32 (a, b); }
81  static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u32 (a, b); }
82  static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u32 (a, b); }
83  static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u32 (a, b); }
84  static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u32 (a, b); }
85  static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u32 (b, a); }
86  static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u32 ((uint32_t*) kAllBitsSet)); }
87  static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u32 (a, b); }
88  static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u32 (a, b); }
89  static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u32 (a, b); }
90  static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); }
91  static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
92  static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u32 (a, b); }
93  static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u32 (a, b); }
94  static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u32 (a, b, c); }
95  static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
96 
97  static forcedinline uint32_t sum (vSIMDType a) noexcept
98  {
99  auto rr = vadd_u32 (vget_high_u32 (a), vget_low_u32 (a));
100  return vget_lane_u32 (vpadd_u32 (rr, rr), 0);
101  }
102 };
103 
104 //==============================================================================
109 template <>
110 struct SIMDNativeOps<int32_t>
111 {
112  //==============================================================================
113  using vSIMDType = int32x4_t;
114  using fb = SIMDFallbackOps<int32_t, vSIMDType>;
115 
116  //==============================================================================
117  DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
118 
119  //==============================================================================
120  static forcedinline vSIMDType expand (int32_t s) noexcept { return vdupq_n_s32 (s); }
121  static forcedinline vSIMDType load (const int32_t* a) noexcept { return vld1q_s32 (a); }
122  static forcedinline void store (vSIMDType value, int32_t* a) noexcept { vst1q_s32 (a, value); }
123  static forcedinline int32_t get (vSIMDType v, size_t i) noexcept { return v[i]; }
124  static forcedinline vSIMDType set (vSIMDType v, size_t i, int32_t s) noexcept { v[i] = s; return v; }
125  static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s32 (a, b); }
126  static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s32 (a, b); }
127  static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s32 (a, b); }
128  static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s32 (a, b); }
129  static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s32 (a, b); }
130  static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s32 (a, b); }
131  static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s32 (b, a); }
132  static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s32 ((int32_t*) kAllBitsSet)); }
133  static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s32 (a, b); }
134  static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s32 (a, b); }
135  static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s32 (a, b); }
136  static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (sum (notEqual (a, b)) == 0); }
137  static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
138  static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s32 (a, b); }
139  static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s32 (a, b); }
140  static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s32 (a, b, c); }
141  static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
142 
143  static forcedinline int32_t sum (vSIMDType a) noexcept
144  {
145  auto rr = vadd_s32 (vget_high_s32 (a), vget_low_s32 (a));
146  rr = vpadd_s32 (rr, rr);
147  return vget_lane_s32 (rr, 0);
148  }
149 };
150 
151 //==============================================================================
156 template <>
157 struct SIMDNativeOps<int8_t>
158 {
159  //==============================================================================
160  using vSIMDType = int8x16_t;
161  using fb = SIMDFallbackOps<int8_t, vSIMDType>;
162 
163  //==============================================================================
164  DECLARE_NEON_SIMD_CONST (int8_t, kAllBitsSet);
165 
166  //==============================================================================
167  static forcedinline vSIMDType expand (int8_t s) noexcept { return vdupq_n_s8 (s); }
168  static forcedinline vSIMDType load (const int8_t* a) noexcept { return vld1q_s8 (a); }
169  static forcedinline void store (vSIMDType value, int8_t* a) noexcept { vst1q_s8 (a, value); }
170  static forcedinline int8_t get (vSIMDType v, size_t i) noexcept { return v[i]; }
171  static forcedinline vSIMDType set (vSIMDType v, size_t i, int8_t s) noexcept { v[i] = s; return v; }
172  static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s8 (a, b); }
173  static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s8 (a, b); }
174  static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s8 (a, b); }
175  static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s8 (a, b); }
176  static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s8 (a, b); }
177  static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s8 (a, b); }
178  static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s8 (b, a); }
179  static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s8 ((int8_t*) kAllBitsSet)); }
180  static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s8 (a, b); }
181  static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s8 (a, b); }
182  static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s8 (a, b); }
183  static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
184  static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s8 (a, b); }
185  static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s8 (a, b); }
186  static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum ((SIMDNativeOps<int32_t>::vSIMDType) notEqual (a, b)) == 0); }
187  static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s8 (a, b, c); }
188  static forcedinline int8_t sum (vSIMDType a) noexcept { return fb::sum (a); }
189  static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
190 };
191 
192 //==============================================================================
197 template <>
198 struct SIMDNativeOps<uint8_t>
199 {
200  //==============================================================================
201  using vSIMDType = uint8x16_t;
202  using fb = SIMDFallbackOps<uint8_t, vSIMDType>;
203 
204  //==============================================================================
205  DECLARE_NEON_SIMD_CONST (uint8_t, kAllBitsSet);
206 
207  //==============================================================================
208  static forcedinline vSIMDType expand (uint8_t s) noexcept { return vdupq_n_u8 (s); }
209  static forcedinline vSIMDType load (const uint8_t* a) noexcept { return vld1q_u8 (a); }
210  static forcedinline void store (vSIMDType value, uint8_t* a) noexcept { vst1q_u8 (a, value); }
211  static forcedinline uint8_t get (vSIMDType v, size_t i) noexcept { return v[i]; }
212  static forcedinline vSIMDType set (vSIMDType v, size_t i, uint8_t s) noexcept { v[i] = s; return v; }
213  static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u8 (a, b); }
214  static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u8 (a, b); }
215  static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u8 (a, b); }
216  static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u8 (a, b); }
217  static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u8 (a, b); }
218  static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u8 (a, b); }
219  static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u8 (b, a); }
220  static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u8 ((uint8_t*) kAllBitsSet)); }
221  static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u8 (a, b); }
222  static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u8 (a, b); }
223  static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u8 (a, b); }
224  static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
225  static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u8 (a, b); }
226  static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u8 (a, b); }
227  static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
228  static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u8 (a, b, c); }
229  static forcedinline uint8_t sum (vSIMDType a) noexcept { return fb::sum (a); }
230  static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
231 };
232 
233 //==============================================================================
238 template <>
239 struct SIMDNativeOps<int16_t>
240 {
241  //==============================================================================
242  using vSIMDType = int16x8_t;
243  using fb = SIMDFallbackOps<int16_t, vSIMDType>;
244 
245  //==============================================================================
246  DECLARE_NEON_SIMD_CONST (int16_t, kAllBitsSet);
247 
248  //==============================================================================
249  static forcedinline vSIMDType expand (int16_t s) noexcept { return vdupq_n_s16 (s); }
250  static forcedinline vSIMDType load (const int16_t* a) noexcept { return vld1q_s16 (a); }
251  static forcedinline void store (vSIMDType value, int16_t* a) noexcept { vst1q_s16 (a, value); }
252  static forcedinline int16_t get (vSIMDType v, size_t i) noexcept { return v[i]; }
253  static forcedinline vSIMDType set (vSIMDType v, size_t i, int16_t s) noexcept { v[i] = s; return v; }
254  static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s16 (a, b); }
255  static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s16 (a, b); }
256  static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_s16 (a, b); }
257  static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s16 (a, b); }
258  static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s16 (a, b); }
259  static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s16 (a, b); }
260  static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s16 (b, a); }
261  static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s16 ((int16_t*) kAllBitsSet)); }
262  static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_s16 (a, b); }
263  static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_s16 (a, b); }
264  static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_s16 (a, b); }
265  static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
266  static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_s16 (a, b); }
267  static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_s16 (a, b); }
268  static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum ((SIMDNativeOps<int32_t>::vSIMDType) notEqual (a, b)) == 0); }
269  static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_s16 (a, b, c); }
270  static forcedinline int16_t sum (vSIMDType a) noexcept { return fb::sum (a); }
271  static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
272 };
273 
274 
275 //==============================================================================
280 template <>
281 struct SIMDNativeOps<uint16_t>
282 {
283  //==============================================================================
284  using vSIMDType = uint16x8_t;
285  using fb = SIMDFallbackOps<uint16_t, vSIMDType>;
286 
287  //==============================================================================
288  DECLARE_NEON_SIMD_CONST (uint16_t, kAllBitsSet);
289 
290  //==============================================================================
291  static forcedinline vSIMDType expand (uint16_t s) noexcept { return vdupq_n_u16 (s); }
292  static forcedinline vSIMDType load (const uint16_t* a) noexcept { return vld1q_u16 (a); }
293  static forcedinline void store (vSIMDType value, uint16_t* a) noexcept { vst1q_u16 (a, value); }
294  static forcedinline uint16_t get (vSIMDType v, size_t i) noexcept { return v[i]; }
295  static forcedinline vSIMDType set (vSIMDType v, size_t i, uint16_t s) noexcept { v[i] = s; return v; }
296  static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u16 (a, b); }
297  static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u16 (a, b); }
298  static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_u16 (a, b); }
299  static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u16 (a, b); }
300  static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u16 (a, b); }
301  static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u16 (a, b); }
302  static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u16 (b, a); }
303  static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u16 ((uint16_t*) kAllBitsSet)); }
304  static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_u16 (a, b); }
305  static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_u16 (a, b); }
306  static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_u16 (a, b); }
307  static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
308  static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_u16 (a, b); }
309  static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_u16 (a, b); }
310  static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
311  static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_u16 (a, b, c); }
312  static forcedinline uint16_t sum (vSIMDType a) noexcept { return fb::sum (a); }
313  static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
314 };
315 
316 //==============================================================================
321 template <>
322 struct SIMDNativeOps<int64_t>
323 {
324  //==============================================================================
325  using vSIMDType = int64x2_t;
326  using fb = SIMDFallbackOps<int64_t, vSIMDType>;
327 
328  //==============================================================================
329  DECLARE_NEON_SIMD_CONST (int64_t, kAllBitsSet);
330 
331  //==============================================================================
332  static forcedinline vSIMDType expand (int64_t s) noexcept { return vdupq_n_s64 (s); }
333  static forcedinline vSIMDType load (const int64_t* a) noexcept { return vld1q_s64 (a); }
334  static forcedinline void store (vSIMDType value, int64_t* a) noexcept { vst1q_s64 (a, value); }
335  static forcedinline int64_t get (vSIMDType v, size_t i) noexcept { return v[i]; }
336  static forcedinline vSIMDType set (vSIMDType v, size_t i, int64_t s) noexcept { v[i] = s; return v; }
337  static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_s64 (a, b); }
338  static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_s64 (a, b); }
339  static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
340  static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_s64 (a, b); }
341  static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_s64 (a, b); }
342  static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_s64 (a, b); }
343  static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_s64 (b, a); }
344  static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_s64 ((int64_t*) kAllBitsSet)); }
345  static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
346  static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
347  static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
348  static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
349  static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
350  static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
351  static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<int32_t>::sum ((SIMDNativeOps<int32_t>::vSIMDType) notEqual (a, b)) == 0); }
352  static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
353  static forcedinline int64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
354  static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
355 };
356 
357 
358 //==============================================================================
363 template <>
364 struct SIMDNativeOps<uint64_t>
365 {
366  //==============================================================================
367  using vSIMDType = uint64x2_t;
368  using fb = SIMDFallbackOps<uint64_t, vSIMDType>;
369 
370  //==============================================================================
371  DECLARE_NEON_SIMD_CONST (uint64_t, kAllBitsSet);
372 
373  //==============================================================================
374  static forcedinline vSIMDType expand (uint64_t s) noexcept { return vdupq_n_u64 (s); }
375  static forcedinline vSIMDType load (const uint64_t* a) noexcept { return vld1q_u64 (a); }
376  static forcedinline void store (vSIMDType value, uint64_t* a) noexcept { vst1q_u64 (a, value); }
377  static forcedinline uint64_t get (vSIMDType v, size_t i) noexcept { return v[i]; }
378  static forcedinline vSIMDType set (vSIMDType v, size_t i, uint64_t s) noexcept { v[i] = s; return v; }
379  static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_u64 (a, b); }
380  static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_u64 (a, b); }
381  static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return fb::mul (a, b); }
382  static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return vandq_u64 (a, b); }
383  static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return vorrq_u64 (a, b); }
384  static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return veorq_u64 (a, b); }
385  static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return vbicq_u64 (b, a); }
386  static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_u64 ((uint64_t*) kAllBitsSet)); }
387  static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
388  static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
389  static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
390  static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
391  static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
392  static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
393  static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
394  static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
395  static forcedinline uint64_t sum (vSIMDType a) noexcept { return fb::sum (a); }
396  static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return a; }
397 };
398 
399  //==============================================================================
404 template <>
405 struct SIMDNativeOps<float>
406 {
407  //==============================================================================
408  using vSIMDType = float32x4_t;
409  using vMaskType = uint32x4_t;
410  using fb = SIMDFallbackOps<float, vSIMDType>;
411 
412  //==============================================================================
413  DECLARE_NEON_SIMD_CONST (int32_t, kAllBitsSet);
414  DECLARE_NEON_SIMD_CONST (int32_t, kEvenHighBit);
415  DECLARE_NEON_SIMD_CONST (float, kOne);
416 
417  //==============================================================================
418  static forcedinline vSIMDType expand (float s) noexcept { return vdupq_n_f32 (s); }
419  static forcedinline vSIMDType load (const float* a) noexcept { return vld1q_f32 (a); }
420  static forcedinline float get (vSIMDType v, size_t i) noexcept { return v[i]; }
421  static forcedinline vSIMDType set (vSIMDType v, size_t i, float s) noexcept { v[i] = s; return v; }
422  static forcedinline void store (vSIMDType value, float* a) noexcept { vst1q_f32 (a, value); }
423  static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return vaddq_f32 (a, b); }
424  static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return vsubq_f32 (a, b); }
425  static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return vmulq_f32 (a, b); }
426  static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vandq_u32 ((vMaskType) a, (vMaskType) b); }
427  static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vorrq_u32 ((vMaskType) a, (vMaskType) b); }
428  static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) veorq_u32 ((vMaskType) a, (vMaskType) b); }
429  static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vbicq_u32 ((vMaskType) b, (vMaskType) a); }
430  static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return bit_notand (a, vld1q_f32 ((float*) kAllBitsSet)); }
431  static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return vminq_f32 (a, b); }
432  static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return vmaxq_f32 (a, b); }
433  static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vceqq_f32 (a, b); }
434  static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return bit_not (equal (a, b)); }
435  static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgtq_f32 (a, b); }
436  static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return (vSIMDType) vcgeq_f32 (a, b); }
437  static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return (SIMDNativeOps<uint32_t>::sum ((SIMDNativeOps<uint32_t>::vSIMDType) notEqual (a, b)) == 0); }
438  static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return vmlaq_f32 (a, b, c); }
439  static forcedinline vSIMDType dupeven (vSIMDType a) noexcept { return fb::shuffle<(0 << 0) | (0 << 2) | (2 << 4) | (2 << 6)> (a); }
440  static forcedinline vSIMDType dupodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (1 << 2) | (3 << 4) | (3 << 6)> (a); }
441  static forcedinline vSIMDType swapevenodd (vSIMDType a) noexcept { return fb::shuffle<(1 << 0) | (0 << 2) | (3 << 4) | (2 << 6)> (a); }
442  static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return add (fb::shuffle<(2 << 0) | (3 << 2) | (0 << 4) | (1 << 6)> (a), a); }
443  static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return vcvtq_f32_s32 (vcvtq_s32_f32 (a)); }
444 
445  //==============================================================================
446  static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept
447  {
448  vSIMDType rr_ir = mul (a, dupeven (b));
449  vSIMDType ii_ri = mul (swapevenodd (a), dupodd (b));
450  return add (rr_ir, bit_xor (ii_ri, vld1q_f32 ((float*) kEvenHighBit)));
451  }
452 
453  static forcedinline float sum (vSIMDType a) noexcept
454  {
455  auto rr = vadd_f32 (vget_high_f32 (a), vget_low_f32 (a));
456  return vget_lane_f32 (vpadd_f32 (rr, rr), 0);
457  }
458 };
459 
460 //==============================================================================
466 template <>
467 struct SIMDNativeOps<double>
468 {
469  //==============================================================================
470  using vSIMDType = struct { double v[2]; };
471  using fb = SIMDFallbackOps<double, vSIMDType>;
472 
473  static forcedinline vSIMDType expand (double s) noexcept { return {{s, s}}; }
474  static forcedinline vSIMDType load (const double* a) noexcept { return {{a[0], a[1]}}; }
475  static forcedinline void store (vSIMDType v, double* a) noexcept { a[0] = v.v[0]; a[1] = v.v[1]; }
476  static forcedinline double get (vSIMDType v, size_t i) noexcept { return v.v[i]; }
477  static forcedinline vSIMDType set (vSIMDType v, size_t i, double s) noexcept { v.v[i] = s; return v; }
478  static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept { return {{a.v[0] + b.v[0], a.v[1] + b.v[1]}}; }
479  static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept { return {{a.v[0] - b.v[0], a.v[1] - b.v[1]}}; }
480  static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept { return {{a.v[0] * b.v[0], a.v[1] * b.v[1]}}; }
481  static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept { return fb::bit_and (a, b); }
482  static forcedinline vSIMDType bit_or (vSIMDType a, vSIMDType b) noexcept { return fb::bit_or (a, b); }
483  static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept { return fb::bit_xor (a, b); }
484  static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return fb::bit_notand (a, b); }
485  static forcedinline vSIMDType bit_not (vSIMDType a) noexcept { return fb::bit_not (a); }
486  static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept { return fb::min (a, b); }
487  static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept { return fb::max (a, b); }
488  static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept { return fb::equal (a, b); }
489  static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept { return fb::notEqual (a, b); }
490  static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThan (a, b); }
491  static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return fb::greaterThanOrEqual (a, b); }
492  static forcedinline bool allEqual (vSIMDType a, vSIMDType b) noexcept { return fb::allEqual (a, b); }
493  static forcedinline vSIMDType multiplyAdd (vSIMDType a, vSIMDType b, vSIMDType c) noexcept { return fb::multiplyAdd (a, b, c); }
494  static forcedinline vSIMDType cmplxmul (vSIMDType a, vSIMDType b) noexcept { return fb::cmplxmul (a, b); }
495  static forcedinline double sum (vSIMDType a) noexcept { return fb::sum (a); }
496  static forcedinline vSIMDType oddevensum (vSIMDType a) noexcept { return a; }
497  static forcedinline vSIMDType truncate (vSIMDType a) noexcept { return fb::truncate (a); }
498 };
499 
500 #endif
501 
502 #if JUCE_GCC && (__GNUC__ >= 6)
503  #pragma GCC diagnostic pop
504 #endif
505 
506 } // namespace dsp
507 } // namespace juce