Most efficient way to convert uint32 vector to float vector?

x86 does not have an SSE instruction to convert from unsigned int32 to a floating point. What would be the most effective sequence of commands to achieve this?

EDIT: To clarify, I want to do a vector sequence of the following scalar operation:

unsigned int x = ...
float res = (float)x;

EDIT2: Here is a naive algorithm for performing scalar conversion.

unsigned int x = ...
float bias = 0.f;
if (x > 0x7fffffff) {
    bias = (float)0x80000000;
    x -= 0x80000000;
}
res = signed_convert(x) + bias;
+5
source share
3 answers

- . : x - 0x88000081, float 2281701632.0f, 2281701376.0f.

( , , , , - ):

movdqa   xmm1,  xmm0    // make a copy of x
psrld    xmm0,  16      // high 16 bits of x
pand     xmm1, [mask]   // low 16 bits of x
orps     xmm0, [onep39] // float(2^39 + high 16 bits of x)
cvtdq2ps xmm1, xmm1     // float(low 16 bits of x)
subps    xmm0, [onep39] // float(high 16 bits of x)
addps    xmm0,  xmm1    // float(x)

:

mask:   0000ffff 0000ffff 0000ffff 0000ffff
onep39: 53000000 53000000 53000000 53000000

, , , . 16 , float . ; - , .

, 31 float, , 2 ^ 31 , . , , , , , .

+4

, Apple AltiVec-SSE, , , http://developer.apple.com

inline __m128 _mm_ctf_epu32(const __m128i v)
{
    const __m128 two16 = _mm_set1_ps(0x1.0p16f);

    // Avoid double rounding by doing two exact conversions
    // of high and low 16-bit segments
    const __m128i hi = _mm_srli_epi32((__m128i)v, 16);
    const __m128i lo = _mm_srli_epi32(_mm_slli_epi32((__m128i)v, 16), 16);
    const __m128 fHi = _mm_mul_ps(_mm_cvtepi32_ps(hi), two16);
    const __m128 fLo = _mm_cvtepi32_ps(lo);

    // do single rounding according to current rounding mode
    return _mm_add_ps(fHi, fLo);
}
+1

This was not available when you asked, but the AVX512F added vcvtudq2ps.

+1
source

All Articles