, , SIMD. (xyxyxyxyxyxy...), ,
for(unsigned i=0; i<points.size(); ++i) {
x[i] = points[i].x;
y[i] = points[i].y;
}
(xxxxxxxx.... yyyyyyy...). .
, . aka . SSE, , , xxxxyyyyxxxxyyyy....
SIMD. Intel SVML, . AMD libm, , .
.
SIMD Agner Fog Vector Class Library (VCL). , Intel, AMD. , Eigen, , , Eigen, . . SSE AVX float (VLC AVX AVX).
#include <vectorclass.h>
#include <vectormath_trig.h>
struct Point2DBlock {
float x[8];
float y[8];
};
int main(void) {
const int nblocks = 10;
Point2DBlock aosoa[nblocks];
float ox = 0.0f, oy = 0.0f;
Vec8f vox = ox, voy = oy;
for(int i=0; i<nblocks; i++) {
Vec8f dx = Vec8f().load(aosoa[i].x) - vox;
Vec8f dy = Vec8f().load(aosoa[i].y) - voy;
Vec8f d = sqrt(dx*dx + dy*dy);
Vec8f az = atan2(dy,dx);
}
}
hypot. VCL, .
static inline Vec8f hypot(Vec8f const &x, Vec8f const &y) {
Vec8f t;
Vec8f ax = abs(x), ay = abs(y);
t = min(ax,ay);
ax = max(ax,ay);
t = t/ax;
return ax*sqrt(1+t*t);
}
Edit:
, . , . VLC - .
#include <vectorclass.h>
#include <vectormath_trig.h>
int main(void) {
const int npoints=80;
float points[2*npoints];
float ox = 0.0, oy = 0.0;
Vec8f vox = ox, voy = oy;
for(int i=0; i<npoints; i+=16) {
Vec8f l1 = Vec8f().load(&points[i+0]);
Vec8f l2 = Vec8f().load(&points[i+8]);
Vec8f dx = blend8f<0, 2, 4, 6, 8, 10, 12, 14>(l1,l2) - vox;
Vec8f dy = blend8f<1, 3, 5, 7, 9, 11, 13, 15>(l1,l2) - voy;
Vec8f d = sqrt(dx*dx + dy*dy);
Vec8f az = atan2(dy,dx);
}
}