, , , . , AVX. AVX2 ymm ( ), xmm-, vinsertf128 , . , , , xmm VEX ( "v" , ).
, , dword, . , 0f 1f.
, , , eax, :
vmovd xmm0, eax
vpshufd xmm0, xmm0, 0
:
vpand xmm0, xmm0, [low_mask]
vpand xmm1, xmm0, [high_mask]
1, 2, 4, 8 16, 32, 64, 128 ( , _mm_set_epi32, )
:
vpxor xmm2, xmm2, xmm2
vpcmpgtd xmm0, xmm0, xmm2
vpcmpgtd xmm1, xmm1, xmm2
Merge:
vinsertf128 ymm0, ymm0, xmm1, 1
0f 1f:
vandps ymm0, ymm0, [ones]
ones 8 .
, , . , .
intrinsics, , ( ). , , VEX, .
// broadcast
__m128i low = _mm_set1_epi32(mask);
__m128i high = _mm_set1_epi32(mask);
// extract bits
low = _mm_and_si128(low, _mm_set_epi32(8, 4, 2, 1));
high = _mm_and_si128(high, _mm_set_epi32(128, 64, 32, 16));
// form masks
low = _mm_cmpgt_epi32(low, _mm_setzero_si128());
high = _mm_cmpgt_epi32(high, _mm_setzero_si128());
// stupid no-op casts
__m256 low2 = _mm256_castps128_ps256(_mm_castsi128_ps(low));
__m128 high2 = _mm_castsi128_ps(high);
// merge
__m256 total = _mm256_insertf128_ps(low2, high2, 1);
// convert to 0f or 1f
total = _mm256_and_ps(total, _mm256_set1_ps(1.0f));
GCC, . vbroadcastss set1 ( vpshufd), , ( , int ).
AVX2 :
__m256i x = _mm256_set1_epi32(mask);
x = _mm256_and_si256(x, _mm256_set_epi32(128, 64, 32, 16, 8, 4, 2, 1));
x = _mm256_cmpgt_epi32(x, _mm256_setzero_si256());
x = _mm256_and_si256(x, _mm256_set1_epi32(0x3F800000));
return _mm256_castsi256_ps(x);