movnti it can apparently support one-cycle throughput when re-writing to the same address.
, movnti , , . ( WC SSE4.1 NT, NT.)
, NT NT , , DRAM.
DDR DRAM . movnti 4B, , . //, , . . .
, , . , , .
, , 4 ( ), , . 100k ( perf).
, Core2 E6600 (Merom/Conroe) DDR2 533MHz , / , 0.113846 . 266 007 .
( movnti) :
global _start
_start:
xor ecx,ecx
.begin:
movnti [array], eax
dec ecx
jnz .begin ; 2^32 iterations
mov eax, 60 ; __NR_exit
xor edi,edi
syscall ; exit(0)
section .bss
array resb 81920
(asm-link - script, )
$ asm-link movnti-same-address.asm
+ yasm -felf64 -Worphan-labels -gdwarf2 movnti-same-address.asm
+ ld -o movnti-same-address movnti-same-address.o
$ perf stat -e task-clock,cycles,instructions ./movnti-same-address
Performance counter stats for './movnti-same-address':
1835.056710 task-clock (msec)
4,398,731,563 cycles
12,891,491,495 instructions
1.843642514 seconds time elapsed
:
$ time ./movnti-same-address; time ./movnti-same-address & time ./movnti-same-address &
real 0m1.844s / user 0m1.828s
[1] 12523
[2] 12524
peter@tesla:~/src/SO$
real 0m1.855s / user 0m1.824s
real 0m1.984s / user 0m1.808s
SMP ( ), . 10- Xeon 10 ( ), , . ( , , , .)
zx485 uop count , .
, CPU , - . , , , , , IPC .
P.S - ?
, . (1 ). , insn, dec/jg cmp.
"" , , . "", , , - "".
. , - , , .