Vector Optimized Library of Kernels
3.0.0
Architecture-tuned implementations of math kernels
volk_sse_intrinsics.h
Go to the documentation of this file.
1
/* -*- c++ -*- */
2
/*
3
* Copyright 2015 Free Software Foundation, Inc.
4
*
5
* This file is part of VOLK
6
*
7
* SPDX-License-Identifier: LGPL-3.0-or-later
8
*/
9
10
/*
11
* This file is intended to hold SSE intrinsics of intrinsics.
12
* They should be used in VOLK kernels to avoid copy-pasta.
13
*/
14
15
#ifndef INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
16
#define INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
17
#include <xmmintrin.h>
18
19
static
inline
__m128
_mm_magnitudesquared_ps
(
__m128
cplxValue1,
__m128
cplxValue2)
20
{
21
__m128
iValue, qValue;
22
// Arrange in i1i2i3i4 format
23
iValue =
_mm_shuffle_ps
(cplxValue1, cplxValue2,
_MM_SHUFFLE
(2, 0, 2, 0));
24
// Arrange in q1q2q3q4 format
25
qValue =
_mm_shuffle_ps
(cplxValue1, cplxValue2,
_MM_SHUFFLE
(3, 1, 3, 1));
26
iValue =
_mm_mul_ps
(iValue, iValue);
// Square the I values
27
qValue =
_mm_mul_ps
(qValue, qValue);
// Square the Q Values
28
return
_mm_add_ps
(iValue, qValue);
// Add the I2 and Q2 values
29
}
30
31
static
inline
__m128
_mm_magnitude_ps
(
__m128
cplxValue1,
__m128
cplxValue2)
32
{
33
return
_mm_sqrt_ps
(
_mm_magnitudesquared_ps
(cplxValue1, cplxValue2));
34
}
35
36
static
inline
__m128
_mm_scaled_norm_dist_ps_sse
(
const
__m128
symbols0,
37
const
__m128
symbols1,
38
const
__m128
points0,
39
const
__m128
points1,
40
const
__m128
scalar)
41
{
42
// calculate scalar * |x - y|^2
43
const
__m128
diff0 =
_mm_sub_ps
(symbols0, points0);
44
const
__m128
diff1 =
_mm_sub_ps
(symbols1, points1);
45
const
__m128
norms =
_mm_magnitudesquared_ps
(diff0, diff1);
46
return
_mm_mul_ps
(norms, scalar);
47
}
48
49
static
inline
__m128
_mm_accumulate_square_sum_ps
(
50
__m128
sq_acc,
__m128
acc,
__m128
val
,
__m128
rec,
__m128
aux)
51
{
52
aux =
_mm_mul_ps
(aux,
val
);
53
aux =
_mm_sub_ps
(aux, acc);
54
aux =
_mm_mul_ps
(aux, aux);
55
aux =
_mm_mul_ps
(aux, rec);
56
return
_mm_add_ps
(sq_acc, aux);
57
}
58
59
#endif
/* INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_ */
include
volk
volk_sse_intrinsics.h
Generated by
1.9.4