Using:
inline uint64_t rdtsc() { uint32_t cycles_high; uint32_t cycles_low; asm volatile ("CPUID\n\t" "RDTSC\n\t" "mov %%edx, %0\n\t" "mov %%eax, %1\n\t": "=r" (cycles_high), "=r" (cycles_low):: "%rax", "%rbx", "%rcx", "%rdx"); return ( ((uint64_t)cycles_high << 32) | cycles_low ); }
stream 1 works
while(globalIndex < COUNT) { while(globalIndex %2 == 0 && globalIndex < COUNT) ; cycles[globalIndex][0] = rdtsc(); cycles[globalIndex][1] = cpuToBindTo; __sync_add_and_fetch(&globalIndex,1); }
stream 2 works
while(globalIndex < COUNT) { while(globalIndex %2 == 1 && globalIndex < COUNT) ; cycles[globalIndex][0] = rdtsc(); cycles[globalIndex][1] = cpuToBindTo; __sync_add_and_fetch(&globalIndex,1); }
I see
CPU rdtsc() t1-t0 11 = 5023231563212740 990 03 = 5023231563213730 310 11 = 5023231563214040 990 03 = 5023231563215030 310 11 = 5023231563215340 990 03 = 5023231563216330 310 11 = 5023231563216640 990 03 = 5023231563217630 310 11 = 5023231563217940 990 03 = 5023231563218930 310 11 = 5023231563219240 990 03 = 5023231563220230 310 11 = 5023231563220540 990 03 = 5023231563221530 310 11 = 5023231563221840 990 03 = 5023231563222830 310 11 = 5023231563223140 990 03 = 5023231563224130 310 11 = 5023231563224440 990 03 = 5023231563225430 310 11 = 5023231563225740 990 03 = 5023231561739842 310 11 = 5023231561740152 990 03 = 5023231561741142 310 11 = 5023231561741452 12458 03 = 5023231561753910 458 11 = 5023231561754368 1154 03 = 5023231561755522 318 11 = 5023231561755840 982 03 = 5023231561756822 310 11 = 5023231561757132 990 03 = 5023231561758122 310 11 = 5023231561758432 990 03 = 5023231561759422 310
I'm not sure how I got pong 12458, but I was wondering why I saw 310-990-310 instead of 650-650-650. I thought tsc should be synchronized across all cores. my const_tsc constant flag is on.
source share