, gcc . , , asm , 30% c. , . intrinsics asm - armcc ( ), .
, c (-):
for (i=arr_size; i<arr_size; i -= 4)
{
val1 ^= my_array[0];
val2 ^= my_array[1];
val1 ^= my_array[2];
val2 ^= my_array[3];
my_array += 4;
}
- . , neon asm, ( , ).
NEON asm ( , , , )
//data has to be suitably aligned (it has to be 8 or 16 byte aligned, not sure).
//dataSize in bytes has to be multiple of 64 and has to be at least 128.
//function does xor of uint32_t values and returns the result.
unsigned xor_array_64(const void *data, int dataSize);
xor_array_64:
vldm r0!,{d0-d7}
subs r1,r1,#0x40
0:
pld [r0, #0xC0]
vldm r0!,{d16-d23}
veor q0, q0, q8
veor q1, q1, q9
veor q2, q2, q10
veor q3, q3, q11
subs r1,r1,#0x40
bge 0b
veor q0, q0, q1
veor q2, q2, q3
veor q0, q0, q2
veor d0, d0, d1
vtrn.32 d1, d0
veor d0, d0, d1
vmov r0, s0
bx lr