If I have the following C ++ code to compare two 128 bit unsigned integers, with inline amd-64 asm:
struct uint128_t {
uint64_t lo, hi;
};
inline bool operator< (const uint128_t &a, const uint128_t &b)
{
uint64_t temp;
bool result;
__asm__(
"cmpq %3, %2;"
"sbbq %4, %1;"
"setc %0;"
:
"=r,1,2"(result),
"=r,r,r"(temp)
:
"r,r,r"(a.lo),
"emr,emr,emr"(b.lo),
"emr,emr,emr"(b.hi),
"1"(a.hi));
return result;
}
Then it will be installed very efficiently, but with one drawback. The return value is executed through the "interface" of the general register with the value 0 or 1. This adds two or three extra additional instructions and distracts from the comparison operation, which otherwise would have been completely optimized. The generated code will look something like this:
mov r10, [r14]
mov r11, [r14+8]
cmp r10, [r15]
sbb r11, [r15+8]
setc al
movzx eax, al
test eax, eax
jnz is_lessthan
If I use "sbb% 0,% 0" with a return value of "int" instead of "setc% 0" with a return value of "bool", there are two additional instructions:
mov r10, [r14]
mov r11, [r14+8]
cmp r10, [r15]
sbb r11, [r15+8]
sbb eax, eax
test eax, eax
jnz is_lessthan
I want it:
mov r10, [r14]
mov r11, [r14+8]
cmp r10, [r15]
sbb r11, [r15+8]
jc is_lessthan
GCC asm , . , , . "" .
, GCC ( Intel ++, asm), , ?
, - , ?