Vector Optimized Library of Kernels 3.0.0
Architecture-tuned implementations of math kernels
volk_sse_intrinsics.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2015 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10/*
11 * This file is intended to hold SSE intrinsics of intrinsics.
12 * They should be used in VOLK kernels to avoid copy-pasta.
13 */
14
15#ifndef INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
16#define INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
17#include <xmmintrin.h>
18
19static inline __m128 _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
20{
21 __m128 iValue, qValue;
22 // Arrange in i1i2i3i4 format
23 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
24 // Arrange in q1q2q3q4 format
25 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
26 iValue = _mm_mul_ps(iValue, iValue); // Square the I values
27 qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
28 return _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
29}
30
31static inline __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
32{
33 return _mm_sqrt_ps(_mm_magnitudesquared_ps(cplxValue1, cplxValue2));
34}
35
36static inline __m128 _mm_scaled_norm_dist_ps_sse(const __m128 symbols0,
37 const __m128 symbols1,
38 const __m128 points0,
39 const __m128 points1,
40 const __m128 scalar)
41{
42 // calculate scalar * |x - y|^2
43 const __m128 diff0 = _mm_sub_ps(symbols0, points0);
44 const __m128 diff1 = _mm_sub_ps(symbols1, points1);
45 const __m128 norms = _mm_magnitudesquared_ps(diff0, diff1);
46 return _mm_mul_ps(norms, scalar);
47}
48
50 __m128 sq_acc, __m128 acc, __m128 val, __m128 rec, __m128 aux)
51{
52 aux = _mm_mul_ps(aux, val);
53 aux = _mm_sub_ps(aux, acc);
54 aux = _mm_mul_ps(aux, aux);
55 aux = _mm_mul_ps(aux, rec);
56 return _mm_add_ps(sq_acc, aux);
57}
58
59#endif /* INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_ */