Grok 10.0.3
test_util-inl.h
Go to the documentation of this file.
1// Copyright 2019 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Target-specific helper functions for use by *_test.cc.
17
18#include <inttypes.h>
19#include <stdint.h>
20
21#include "hwy/base.h"
22#include "hwy/print-inl.h"
23#include "hwy/tests/hwy_gtest.h"
24#include "hwy/tests/test_util.h"
25
26// Per-target include guard
27#if defined(HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_) == \
28 defined(HWY_TARGET_TOGGLE)
29#ifdef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
30#undef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
31#else
32#define HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
33#endif
34
36namespace hwy {
37namespace HWY_NAMESPACE {
38
39// Compare expected vector to vector.
40// HWY_INLINE works around a Clang SVE compiler bug where all but the first
41// 128 bits (the NEON register) of actual are zero.
42template <class D, typename T = TFromD<D>, class V = Vec<D>>
43HWY_INLINE void AssertVecEqual(D d, const T* expected, VecArg<V> actual,
44 const char* filename, const int line) {
45 const size_t N = Lanes(d);
46 auto actual_lanes = AllocateAligned<T>(N);
47 Store(actual, d, actual_lanes.get());
48
49 const auto info = hwy::detail::MakeTypeInfo<T>();
50 const char* target_name = hwy::TargetName(HWY_TARGET);
51 hwy::detail::AssertArrayEqual(info, expected, actual_lanes.get(), N,
52 target_name, filename, line);
53}
54
55// Compare expected lanes to vector.
56// HWY_INLINE works around a Clang SVE compiler bug where all but the first
57// 128 bits (the NEON register) of actual are zero.
58template <class D, typename T = TFromD<D>, class V = Vec<D>>
60 const char* filename, int line) {
61 auto expected_lanes = AllocateAligned<T>(Lanes(d));
62 Store(expected, d, expected_lanes.get());
63 AssertVecEqual(d, expected_lanes.get(), actual, filename, line);
64}
65
66// Only checks the valid mask elements (those whose index < Lanes(d)).
67template <class D>
69 const char* filename, int line) {
70 // lvalues prevented MSAN failure in farm_sve.
71 const Vec<D> va = VecFromMask(d, a);
72 const Vec<D> vb = VecFromMask(d, b);
73 AssertVecEqual(d, va, vb, filename, line);
74
75 const char* target_name = hwy::TargetName(HWY_TARGET);
76 AssertEqual(CountTrue(d, a), CountTrue(d, b), target_name, filename, line);
77 AssertEqual(AllTrue(d, a), AllTrue(d, b), target_name, filename, line);
78 AssertEqual(AllFalse(d, a), AllFalse(d, b), target_name, filename, line);
79
80 const size_t N = Lanes(d);
81#if HWY_TARGET == HWY_SCALAR
82 const Rebind<uint8_t, D> d8;
83#else
85#endif
86 const size_t N8 = Lanes(d8);
87 auto bits_a = AllocateAligned<uint8_t>(HWY_MAX(8, N8));
88 auto bits_b = AllocateAligned<uint8_t>(HWY_MAX(8, N8));
89 memset(bits_a.get(), 0, N8);
90 memset(bits_b.get(), 0, N8);
91 const size_t num_bytes_a = StoreMaskBits(d, a, bits_a.get());
92 const size_t num_bytes_b = StoreMaskBits(d, b, bits_b.get());
93 AssertEqual(num_bytes_a, num_bytes_b, target_name, filename, line);
94 size_t i = 0;
95 // First check whole bytes (if that many elements are still valid)
96 for (; i < N / 8; ++i) {
97 if (bits_a[i] != bits_b[i]) {
98 fprintf(stderr, "Mismatch in byte %" PRIu64 ": %d != %d\n",
99 static_cast<uint64_t>(i), bits_a[i], bits_b[i]);
100 Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
101 Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
102 hwy::Abort(filename, line, "Masks not equal");
103 }
104 }
105 // Then the valid bit(s) in the last byte.
106 const size_t remainder = N % 8;
107 if (remainder != 0) {
108 const int mask = (1 << remainder) - 1;
109 const int valid_a = bits_a[i] & mask;
110 const int valid_b = bits_b[i] & mask;
111 if (valid_a != valid_b) {
112 fprintf(stderr, "Mismatch in last byte %" PRIu64 ": %d != %d\n",
113 static_cast<uint64_t>(i), valid_a, valid_b);
114 Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
115 Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
116 hwy::Abort(filename, line, "Masks not equal");
117 }
118 }
119}
120
121// Only sets valid elements (those whose index < Lanes(d)). This helps catch
122// tests that are not masking off the (undefined) upper mask elements.
123//
124// TODO(janwas): with HWY_NOINLINE GCC zeros the upper half of AVX2 masks.
125template <class D>
127 return FirstN(d, Lanes(d));
128}
129
130template <class D>
132 const auto zero = Zero(RebindToSigned<D>());
133 return RebindMask(d, Lt(zero, zero));
134}
135
136#ifndef HWY_ASSERT_EQ
137
138#define HWY_ASSERT_EQ(expected, actual) \
139 hwy::AssertEqual(expected, actual, hwy::TargetName(HWY_TARGET), __FILE__, \
140 __LINE__)
141
142#define HWY_ASSERT_ARRAY_EQ(expected, actual, count) \
143 hwy::AssertArrayEqual(expected, actual, count, hwy::TargetName(HWY_TARGET), \
144 __FILE__, __LINE__)
145
146#define HWY_ASSERT_STRING_EQ(expected, actual) \
147 hwy::AssertStringEqual(expected, actual, hwy::TargetName(HWY_TARGET), \
148 __FILE__, __LINE__)
149
150#define HWY_ASSERT_VEC_EQ(d, expected, actual) \
151 AssertVecEqual(d, expected, actual, __FILE__, __LINE__)
152
153#define HWY_ASSERT_MASK_EQ(d, expected, actual) \
154 AssertMaskEqual(d, expected, actual, __FILE__, __LINE__)
155
156#endif // HWY_ASSERT_EQ
157
158namespace detail {
159
160// Helpers for instantiating tests with combinations of lane types / counts.
161
162// Calls Test for each CappedTag<T, N> where N is in [kMinLanes, kMul * kMinArg]
163// and the resulting Lanes() is in [min_lanes, max_lanes]. The upper bound
164// is required to ensure capped vectors remain extendable. Implemented by
165// recursively halving kMul until it is zero.
166template <typename T, size_t kMul, size_t kMinArg, class Test>
168 static void Do(size_t min_lanes, size_t max_lanes) {
170
171 // If we already don't have enough lanes, stop.
172 const size_t lanes = Lanes(d);
173 if (lanes < min_lanes) return;
174
175 if (lanes <= max_lanes) {
176 Test()(T(), d);
177 }
178 ForeachCappedR<T, kMul / 2, kMinArg, Test>::Do(min_lanes, max_lanes);
179 }
180};
181
182// Base case to stop the recursion.
183template <typename T, size_t kMinArg, class Test>
184struct ForeachCappedR<T, 0, kMinArg, Test> {
185 static void Do(size_t, size_t) {}
186};
187
188#if HWY_HAVE_SCALABLE
189
190template <typename T>
191constexpr int MinPow2() {
192 // Highway follows RVV LMUL in that the smallest fraction is 1/8th (encoded
193 // as kPow2 == -3). The fraction also must not result in zero lanes for the
194 // smallest possible vector size, which is 128 bits even on RISC-V (with the
195 // application processor profile).
196 return HWY_MAX(-3, -static_cast<int>(CeilLog2(16 / sizeof(T))));
197}
198
199// Iterates kPow2 upward through +3.
200template <typename T, int kPow2, int kAddPow2, class Test>
201struct ForeachShiftR {
202 static void Do(size_t min_lanes) {
203 const ScalableTag<T, kPow2 + kAddPow2> d;
204
205 // Precondition: [kPow2, 3] + kAddPow2 is a valid fraction of the minimum
206 // vector size, so we always have enough lanes, except ForGEVectors.
207 if (Lanes(d) >= min_lanes) {
208 Test()(T(), d);
209 } else {
210 fprintf(stderr, "%d lanes < %d: T=%d pow=%d\n",
211 static_cast<int>(Lanes(d)), static_cast<int>(min_lanes),
212 static_cast<int>(sizeof(T)), kPow2 + kAddPow2);
213 HWY_ASSERT(min_lanes != 1);
214 }
215
216 ForeachShiftR<T, kPow2 + 1, kAddPow2, Test>::Do(min_lanes);
217 }
218};
219
220// Base case to stop the recursion.
221template <typename T, int kAddPow2, class Test>
222struct ForeachShiftR<T, 4, kAddPow2, Test> {
223 static void Do(size_t) {}
224};
225#else
226// ForeachCappedR already handled all possible sizes.
227#endif // HWY_HAVE_SCALABLE
228
229} // namespace detail
230
231// These 'adapters' call a test for all possible N or kPow2 subject to
232// constraints such as "vectors must be extendable" or "vectors >= 128 bits".
233// They may be called directly, or via For*Types. Note that for an adapter C,
234// `C<Test>(T())` does not call the test - the correct invocation is
235// `C<Test>()(T())`, or preferably `ForAllTypes(C<Test>())`. We check at runtime
236// that operator() is called to prevent such bugs. Note that this is not
237// thread-safe, but that is fine because C are typically local variables.
238
239// Calls Test for all power of two N in [1, Lanes(d) >> kPow2]. This is for
240// ops that widen their input, e.g. Combine (not supported by HWY_SCALAR).
241template <class Test, int kPow2 = 1>
243 mutable bool called_ = false;
244
245 public:
247 if (!called_) {
248 HWY_ABORT("Test is incorrect, ensure operator() is called");
249 }
250 }
251
252 template <typename T>
253 void operator()(T /*unused*/) const {
254 called_ = true;
255 constexpr size_t kMaxCapped = HWY_LANES(T);
256 // Skip CappedTag that are already full vectors.
257 const size_t max_lanes = Lanes(ScalableTag<T>()) >> kPow2;
258 (void)kMaxCapped;
259 (void)max_lanes;
260#if HWY_TARGET == HWY_SCALAR
261 // not supported
262#else
263 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), 1, Test>::Do(1, max_lanes);
264#if HWY_TARGET == HWY_RVV
265 // For each [MinPow2, 3 - kPow2]; counter is [MinPow2 + kPow2, 3].
266 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, -kPow2, Test>::Do(1);
267#elif HWY_HAVE_SCALABLE
268 // For each [MinPow2, 0 - kPow2]; counter is [MinPow2 + kPow2 + 3, 3].
269 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -kPow2 - 3,
270 Test>::Do(1);
271#endif
272#endif // HWY_SCALAR
273 }
274};
275
276// Calls Test for all power of two N in [1 << kPow2, Lanes(d)]. This is for ops
277// that narrow their input, e.g. UpperHalf.
278template <class Test, int kPow2 = 1>
280 mutable bool called_ = false;
281
282 public:
284 if (!called_) {
285 HWY_ABORT("Test is incorrect, ensure operator() is called");
286 }
287 }
288
289 template <typename T>
290 void operator()(T /*unused*/) const {
291 called_ = true;
292 constexpr size_t kMinLanes = size_t{1} << kPow2;
293 constexpr size_t kMaxCapped = HWY_LANES(T);
294 // For shrinking, an upper limit is unnecessary.
295 constexpr size_t max_lanes = kMaxCapped;
296
297 (void)kMinLanes;
298 (void)max_lanes;
299 (void)max_lanes;
300#if HWY_TARGET == HWY_SCALAR
301 // not supported
302#else
303 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
304 kMinLanes, max_lanes);
305#if HWY_TARGET == HWY_RVV
306 // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
307 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
308 kMinLanes);
309#elif HWY_HAVE_SCALABLE
310 // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
311 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
312 kMinLanes);
313#endif
314#endif // HWY_TARGET == HWY_SCALAR
315 }
316};
317
318// Calls Test for all supported power of two vectors of at least kMinBits.
319// Examples: AES or 64x64 require 128 bits, casts may require 64 bits.
320template <size_t kMinBits, class Test>
322 mutable bool called_ = false;
323
324 public:
326 if (!called_) {
327 HWY_ABORT("Test is incorrect, ensure operator() is called");
328 }
329 }
330
331 template <typename T>
332 void operator()(T /*unused*/) const {
333 called_ = true;
334 constexpr size_t kMaxCapped = HWY_LANES(T);
335 constexpr size_t kMinLanes = kMinBits / 8 / sizeof(T);
336 // An upper limit is unnecessary.
337 constexpr size_t max_lanes = kMaxCapped;
338 (void)max_lanes;
339#if HWY_TARGET == HWY_SCALAR
340 (void)kMinLanes; // not supported
341#else
342 detail::ForeachCappedR<T, HWY_LANES(T) / kMinLanes, kMinLanes, Test>::Do(
343 kMinLanes, max_lanes);
344#if HWY_TARGET == HWY_RVV
345 // Can be 0 (handled below) if kMinBits > 64.
346 constexpr size_t kRatio = 128 / kMinBits;
347 constexpr int kMinPow2 =
348 kRatio == 0 ? 0 : -static_cast<int>(CeilLog2(kRatio));
349 // For each [kMinPow2, 3]; counter is [kMinPow2, 3].
350 detail::ForeachShiftR<T, kMinPow2, 0, Test>::Do(kMinLanes);
351#elif HWY_HAVE_SCALABLE
352 // Can be 0 (handled below) if kMinBits > 128.
353 constexpr size_t kRatio = 128 / kMinBits;
354 constexpr int kMinPow2 =
355 kRatio == 0 ? 0 : -static_cast<int>(CeilLog2(kRatio));
356 // For each [kMinPow2, 0]; counter is [kMinPow2 + 3, 3].
357 detail::ForeachShiftR<T, kMinPow2 + 3, -3, Test>::Do(kMinLanes);
358#endif
359#endif // HWY_TARGET == HWY_SCALAR
360 }
361};
362
363template <class Test>
365
366// Calls Test for all N that can be promoted (not the same as Extendable because
367// HWY_SCALAR has one lane). Also used for ZipLower, but not ZipUpper.
368template <class Test, int kPow2 = 1>
370 mutable bool called_ = false;
371
372 public:
374 if (!called_) {
375 HWY_ABORT("Test is incorrect, ensure operator() is called");
376 }
377 }
378
379 template <typename T>
380 void operator()(T /*unused*/) const {
381 called_ = true;
382 constexpr size_t kFactor = size_t{1} << kPow2;
383 static_assert(kFactor >= 2 && kFactor * sizeof(T) <= sizeof(uint64_t), "");
384 constexpr size_t kMaxCapped = HWY_LANES(T);
385 constexpr size_t kMinLanes = kFactor;
386 // Skip CappedTag that are already full vectors.
387 const size_t max_lanes = Lanes(ScalableTag<T>()) >> kPow2;
388 (void)kMaxCapped;
389 (void)kMinLanes;
390 (void)max_lanes;
391#if HWY_TARGET == HWY_SCALAR
393#else
394 // TODO(janwas): call Extendable if kMinLanes check not required?
395 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), 1, Test>::Do(kMinLanes,
396 max_lanes);
397#if HWY_TARGET == HWY_RVV
398 // For each [MinPow2, 3 - kPow2]; counter is [MinPow2 + kPow2, 3].
399 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, -kPow2, Test>::Do(
400 kMinLanes);
401#elif HWY_HAVE_SCALABLE
402 // For each [MinPow2, 0 - kPow2]; counter is [MinPow2 + kPow2 + 3, 3].
403 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -kPow2 - 3,
404 Test>::Do(kMinLanes);
405#endif
406#endif // HWY_SCALAR
407 }
408};
409
410// Calls Test for all N than can be demoted (not the same as Shrinkable because
411// HWY_SCALAR has one lane).
412template <class Test, int kPow2 = 1>
414 mutable bool called_ = false;
415
416 public:
418 if (!called_) {
419 HWY_ABORT("Test is incorrect, ensure operator() is called");
420 }
421 }
422
423 template <typename T>
424 void operator()(T /*unused*/) const {
425 called_ = true;
426 constexpr size_t kMinLanes = size_t{1} << kPow2;
427 constexpr size_t kMaxCapped = HWY_LANES(T);
428 // For shrinking, an upper limit is unnecessary.
429 constexpr size_t max_lanes = kMaxCapped;
430
431 (void)kMinLanes;
432 (void)max_lanes;
433 (void)max_lanes;
434#if HWY_TARGET == HWY_SCALAR
436#else
437 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
438 kMinLanes, max_lanes);
439
440// TODO(janwas): call Extendable if kMinLanes check not required?
441#if HWY_TARGET == HWY_RVV
442 // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
443 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
444 kMinLanes);
445#elif HWY_HAVE_SCALABLE
446 // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
447 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
448 kMinLanes);
449#endif
450#endif // HWY_TARGET == HWY_SCALAR
451 }
452};
453
454// For LowerHalf/Quarter.
455template <class Test, int kPow2 = 1>
457 mutable bool called_ = false;
458
459 public:
461 if (!called_) {
462 HWY_ABORT("Test is incorrect, ensure operator() is called");
463 }
464 }
465
466 template <typename T>
467 void operator()(T /*unused*/) const {
468 called_ = true;
469#if HWY_TARGET == HWY_SCALAR
471#else
472 constexpr size_t kMinLanes = size_t{1} << kPow2;
473 // For shrinking, an upper limit is unnecessary.
474 constexpr size_t kMaxCapped = HWY_LANES(T);
475 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
476 kMinLanes, kMaxCapped);
477
478// TODO(janwas): call Extendable if kMinLanes check not required?
479#if HWY_TARGET == HWY_RVV
480 // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
481 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
482 kMinLanes);
483#elif HWY_HAVE_SCALABLE
484 // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
485 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
486 kMinLanes);
487#endif
488#endif // HWY_TARGET == HWY_SCALAR
489 }
490};
491
492// Calls Test for all power of two N in [1, Lanes(d)]. This is the default
493// for ops that do not narrow nor widen their input, nor require 128 bits.
494template <class Test>
496 mutable bool called_ = false;
497
498 public:
500 if (!called_) {
501 HWY_ABORT("Test is incorrect, ensure operator() is called");
502 }
503 }
504
505 template <typename T>
506 void operator()(T t) const {
507 called_ = true;
508#if HWY_TARGET == HWY_SCALAR
509 (void)t;
511#else
513#endif
514 }
515};
516
517// Type lists to shorten call sites:
518
519template <class Func>
520void ForSignedTypes(const Func& func) {
521 func(int8_t());
522 func(int16_t());
523 func(int32_t());
524#if HWY_HAVE_INTEGER64
525 func(int64_t());
526#endif
527}
528
529template <class Func>
530void ForUnsignedTypes(const Func& func) {
531 func(uint8_t());
532 func(uint16_t());
533 func(uint32_t());
534#if HWY_HAVE_INTEGER64
535 func(uint64_t());
536#endif
537}
538
539template <class Func>
540void ForIntegerTypes(const Func& func) {
541 ForSignedTypes(func);
542 ForUnsignedTypes(func);
543}
544
545template <class Func>
546void ForFloatTypes(const Func& func) {
547 func(float());
548#if HWY_HAVE_FLOAT64
549 func(double());
550#endif
551}
552
553template <class Func>
554void ForAllTypes(const Func& func) {
555 ForIntegerTypes(func);
556 ForFloatTypes(func);
557}
558
559template <class Func>
560void ForUI8(const Func& func) {
561 func(uint8_t());
562 func(int8_t());
563}
564
565template <class Func>
566void ForUI16(const Func& func) {
567 func(uint16_t());
568 func(int16_t());
569}
570
571template <class Func>
572void ForUIF16(const Func& func) {
573 ForUI16(func);
574#if HWY_HAVE_FLOAT16
575 func(float16_t());
576#endif
577}
578
579template <class Func>
580void ForUI32(const Func& func) {
581 func(uint32_t());
582 func(int32_t());
583}
584
585template <class Func>
586void ForUIF32(const Func& func) {
587 ForUI32(func);
588 func(float());
589}
590
591template <class Func>
592void ForUI64(const Func& func) {
593#if HWY_HAVE_INTEGER64
594 func(uint64_t());
595 func(int64_t());
596#endif
597}
598
599template <class Func>
600void ForUIF64(const Func& func) {
601 ForUI64(func);
602#if HWY_HAVE_FLOAT64
603 func(double());
604#endif
605}
606
607template <class Func>
608void ForUI3264(const Func& func) {
609 ForUI32(func);
610 ForUI64(func);
611}
612
613template <class Func>
614void ForUIF3264(const Func& func) {
615 ForUIF32(func);
616 ForUIF64(func);
617}
618
619template <class Func>
620void ForUI163264(const Func& func) {
621 ForUI16(func);
622 ForUI3264(func);
623}
624
625template <class Func>
626void ForUIF163264(const Func& func) {
627 ForUIF16(func);
628 ForUIF3264(func);
629}
630
631// For tests that involve loops, adjust the trip count so that emulated tests
632// finish quickly (but always at least 2 iterations to ensure some diversity).
633constexpr size_t AdjustedReps(size_t max_reps) {
634#if HWY_ARCH_RVV
635 return HWY_MAX(max_reps / 32, 2);
636#elif HWY_IS_DEBUG_BUILD
637 return HWY_MAX(max_reps / 8, 2);
638#elif HWY_ARCH_ARM
639 return HWY_MAX(max_reps / 4, 2);
640#else
641 return HWY_MAX(max_reps, 2);
642#endif
643}
644
645// Same as above, but the loop trip count will be 1 << max_pow2.
646constexpr size_t AdjustedLog2Reps(size_t max_pow2) {
647 // If "negative" (unsigned wraparound), use original.
648#if HWY_ARCH_RVV
649 return HWY_MIN(max_pow2 - 4, max_pow2);
650#elif HWY_IS_DEBUG_BUILD
651 return HWY_MIN(max_pow2 - 1, max_pow2);
652#elif HWY_ARCH_ARM
653 return HWY_MIN(max_pow2 - 1, max_pow2);
654#else
655 return max_pow2;
656#endif
657}
658
659// NOLINTNEXTLINE(google-readability-namespace-comments)
660} // namespace HWY_NAMESPACE
661} // namespace hwy
663
664#endif // per-target include guard
#define HWY_MAX(a, b)
Definition: base.h:126
#define HWY_NOINLINE
Definition: base.h:63
#define HWY_MIN(a, b)
Definition: base.h:125
#define HWY_ABORT(format,...)
Definition: base.h:141
#define HWY_INLINE
Definition: base.h:62
#define HWY_ASSERT(condition)
Definition: base.h:145
Definition: test_util-inl.h:413
~ForDemoteVectors()
Definition: test_util-inl.h:417
void operator()(T) const
Definition: test_util-inl.h:424
bool called_
Definition: test_util-inl.h:414
Definition: test_util-inl.h:242
void operator()(T) const
Definition: test_util-inl.h:253
bool called_
Definition: test_util-inl.h:243
~ForExtendableVectors()
Definition: test_util-inl.h:246
Definition: test_util-inl.h:321
bool called_
Definition: test_util-inl.h:322
~ForGEVectors()
Definition: test_util-inl.h:325
void operator()(T) const
Definition: test_util-inl.h:332
Definition: test_util-inl.h:456
~ForHalfVectors()
Definition: test_util-inl.h:460
bool called_
Definition: test_util-inl.h:457
void operator()(T) const
Definition: test_util-inl.h:467
Definition: test_util-inl.h:495
bool called_
Definition: test_util-inl.h:496
void operator()(T t) const
Definition: test_util-inl.h:506
~ForPartialVectors()
Definition: test_util-inl.h:499
Definition: test_util-inl.h:369
~ForPromoteVectors()
Definition: test_util-inl.h:373
bool called_
Definition: test_util-inl.h:370
void operator()(T) const
Definition: test_util-inl.h:380
Definition: test_util-inl.h:279
void operator()(T) const
Definition: test_util-inl.h:290
bool called_
Definition: test_util-inl.h:280
~ForShrinkableVectors()
Definition: test_util-inl.h:283
#define HWY_TARGET
Definition: detect_targets.h:341
d
Definition: rvv-inl.h:1742
V VecArg
Definition: ops/shared-inl.h:306
HWY_API Mask128< TTo, N > RebindMask(Simd< TTo, N, 0 > dto, Mask128< TFrom, N > m)
Definition: arm_neon-inl.h:2189
constexpr size_t AdjustedReps(size_t max_reps)
Definition: test_util-inl.h:633
HWY_API bool AllTrue(const Full128< T > d, const Mask128< T > m)
Definition: arm_neon-inl.h:5305
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6309
void ForUIF32(const Func &func)
Definition: test_util-inl.h:586
void ForUI163264(const Func &func)
Definition: test_util-inl.h:620
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition: arm_neon-inl.h:2409
HWY_API size_t StoreMaskBits(Simd< T, N, 0 >, const Mask128< T, N > mask, uint8_t *bits)
Definition: arm_neon-inl.h:5290
void ForUIF3264(const Func &func)
Definition: test_util-inl.h:614
void ForUIF163264(const Func &func)
Definition: test_util-inl.h:626
constexpr size_t AdjustedLog2Reps(size_t max_pow2)
Definition: test_util-inl.h:646
typename detail::CappedTagChecker< T, kLimit >::type CappedTag
Definition: ops/shared-inl.h:172
void ForUI32(const Func &func)
Definition: test_util-inl.h:580
void ForAllTypes(const Func &func)
Definition: test_util-inl.h:554
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition: ops/shared-inl.h:198
void ForFloatTypes(const Func &func)
Definition: test_util-inl.h:546
void Print(const D d, const char *caption, VecArg< V > v, size_t lane_u=0, size_t max_lanes=7)
Definition: print-inl.h:39
HWY_NOINLINE void AssertMaskEqual(D d, VecArg< Mask< D > > a, VecArg< Mask< D > > b, const char *filename, int line)
Definition: test_util-inl.h:68
HWY_API size_t CountTrue(Full128< T >, const Mask128< T > mask)
Definition: arm_neon-inl.h:5269
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition: arm_neon-inl.h:2182
HWY_INLINE void AssertVecEqual(D d, const T *expected, VecArg< V > actual, const char *filename, const int line)
Definition: test_util-inl.h:43
void ForIntegerTypes(const Func &func)
Definition: test_util-inl.h:540
HWY_API constexpr size_t Lanes(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:236
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2706
HWY_INLINE Mask< D > MaskFalse(const D d)
Definition: test_util-inl.h:131
void ForUI8(const Func &func)
Definition: test_util-inl.h:560
void ForUI3264(const Func &func)
Definition: test_util-inl.h:608
typename detail::ScalableTagChecker< T, kPow2 >::type ScalableTag
Definition: ops/shared-inl.h:161
HWY_API bool AllFalse(const Simd< T, N, 0 > d, const Mask128< T, N > m)
Definition: arm_neon-inl.h:5299
void ForUIF64(const Func &func)
Definition: test_util-inl.h:600
void ForUI16(const Func &func)
Definition: test_util-inl.h:566
typename D::template Rebind< T > Rebind
Definition: ops/shared-inl.h:195
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition: arm_neon-inl.h:1011
void ForUI64(const Func &func)
Definition: test_util-inl.h:592
void ForSignedTypes(const Func &func)
Definition: test_util-inl.h:520
void ForUIF16(const Func &func)
Definition: test_util-inl.h:572
typename D::template Repartition< T > Repartition
Definition: ops/shared-inl.h:206
decltype(MaskFromVec(Zero(D()))) Mask
Definition: generic_ops-inl.h:38
N
Definition: rvv-inl.h:1742
HWY_INLINE Mask< D > MaskTrue(const D d)
Definition: test_util-inl.h:126
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2882
void ForUnsignedTypes(const Func &func)
Definition: test_util-inl.h:530
decltype(Zero(D())) Vec
Definition: generic_ops-inl.h:32
HWY_TEST_DLLEXPORT void AssertArrayEqual(const TypeInfo &info, const void *expected_void, const void *actual_void, size_t N, const char *target_name, const char *filename, int line)
Definition: aligned_allocator.h:27
static HWY_MAYBE_UNUSED const char * TargetName(uint32_t target)
Definition: targets.h:77
FuncOutput(*)(const void *, FuncInput) Func
Definition: nanobenchmark.h:105
constexpr size_t CeilLog2(TI x)
Definition: base.h:777
HWY_INLINE void AssertEqual(const T expected, const T actual, const char *target_name, const char *filename, int line, size_t lane=0)
Definition: test_util.h:151
HWY_DLLEXPORT HWY_NORETURN void int line
Definition: base.h:848
#define HWY_LANES(T)
Definition: set_macros-inl.h:85
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82
static void Do(size_t, size_t)
Definition: test_util-inl.h:185
Definition: test_util-inl.h:167
static void Do(size_t min_lanes, size_t max_lanes)
Definition: test_util-inl.h:168
Definition: base.h:246
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()