An interesting challenge! Here is my solution, I hope that I did not violate any restrictions by mistake:
#include <stdio.h> #include <stdint.h> // work with uint to avoid undefined behavior (signed int overflow is undefined) static inline int32_t x2(int32_t v) { uint32_t uv = v; // our first option: "multiply" by shifting: uint32_t doubled = uv<<1; // our second option: clamp to max/min integer: uint32_t neg = !!(uv >> 31); // 1 if negative uint32_t bigval = (~0u)>>1; // 0x7fffffff uint32_t clamped = bigval + neg; // 0x80000000 if neg, 0x7fffffff otherwise // so, which one will we use? uint32_t ok = !((v>>31) ^ (v>>30)); // 0 if overflow, 1 otherwise // note the use of signed value here uint32_t mask = (~ok)+1; // 0x00000000 if overflow, 0xffffffff otherwise // choose by masking one option with ones, the other with zeroes return (mask & doubled) | ((~mask) & clamped); } static inline void check(int32_t val, int32_t expect) { int32_t actual = x2(val); if ((val & 0x3ffffff) == 0) { printf("0x%08x...\n", val); } if (actual != expect) { printf("val=%d, expected=%d, actual=%d\n", val, expect, actual); } } int main() { int32_t v = 0x80000000; printf("checking negative clamp...\n"); for (; v < -0x40000000; ++v) { check(v, 0x80000000); } printf("checking straight double...\n"); for(; v < 0x40000000; ++v) { check(v, 2*v); } printf("checking positive clamp...\n"); for(; v < 0x7fffffff; ++v) { check(v, 0x7fffffff); } check(0x7fffffff, 0x7fffffff); printf("All done!\n"); return 0; }
And it works fine:
gcc -std=c99 -O2 -Wall -Werror -Wextra -pedantic bounded.c -o bounded && ./bounded checking negative clamp... 0x80000000... 0x84000000... 0x88000000... 0x8c000000... 0x90000000... 0x94000000... 0x98000000... 0x9c000000... 0xa0000000... 0xa4000000... 0xa8000000... 0xac000000... 0xb0000000... 0xb4000000... 0xb8000000... 0xbc000000... checking straight double... 0xc0000000... 0xc4000000... 0xc8000000... 0xcc000000... 0xd0000000... 0xd4000000... 0xd8000000... 0xdc000000... 0xe0000000... 0xe4000000... 0xe8000000... 0xec000000... 0xf0000000... 0xf4000000... 0xf8000000... 0xfc000000... 0x00000000... 0x04000000... 0x08000000... 0x0c000000... 0x10000000... 0x14000000... 0x18000000... 0x1c000000... 0x20000000... 0x24000000... 0x28000000... 0x2c000000... 0x30000000... 0x34000000... 0x38000000... 0x3c000000... checking positive clamp... 0x40000000... 0x44000000... 0x48000000... 0x4c000000... 0x50000000... 0x54000000... 0x58000000... 0x5c000000... 0x60000000... 0x64000000... 0x68000000... 0x6c000000... 0x70000000... 0x74000000... 0x78000000... 0x7c000000... All done!
Using this convenient interactive compiler , we can get disassembly for different platforms. Annotated ARM64 build:
x2(int): asr w1, w0, 30 # w1 = v >> 30 cmp w1, w0, asr 31 # compare w1 to (v>>31) csetm w1, eq # w1 = eq ? 0 : -1 # --- so w1 is "mask" mov w2, 2147483647 # w2 = 0x7fffffff mvn w3, w1 # w3 = ~w1 # --- so w3 is ~mask add w2, w2, w0, lsr 31 # w2 = w2 + (v>>31) # --- so w2 is "clamped" and w2, w3, w2 # w2 = w3 & w2 and w0, w1, w0, lsl 1 # w0 = w1 & (v << 1) orr w0, w2, w0 # w0 = w2 | w0 ret # return w0
It looks pretty effective for me. It's pretty sweet that a "doubling" is never stored in the register - it is simply done as a shift in the input value for one of the commands and commands.