Add vs mul (IA32-Assembly)

I know that adding is faster compared to mul .

I want to know how to use add instead of mul in the following code to make it more efficient.

Code example:

            mov eax, [ebp + 8]              #eax = x1
            mov ecx, [ebp + 12]             #ecx = x2
            mov edx, [ebp + 16]             #edx = y1
            mov ebx, [ebp + 20]             #ebx = y2

            sub eax,ecx                     #eax = x1-x2
            sub edx,ebx                     #edx = y1-y2

            mul edx                         #eax = (x1-x2)*(y1-y2)
+5
source share
5 answers

add is faster than mul , but if you want to multiply two common values, mul is much faster than any loop repeating add .

, , mul. (, 2), , , , . - .

+12

, , x86.

, , . , . , 2 ^ p + 2 ^ q +... 2 ^ r, x * 2 ^ p + x * 2 ^ q +.. x * 2 * r, p, q,... r. :

;  x in EDX
;  product to EAX
xor  eax,eax
shl  edx,r ; x*2^r
add  eax,edx
shl  edx,q-r ; x*2^q
add  eax,edx
shl  edx,p-q ; x*2^p
add  eax,edx

, 4 , , . 10 , , .

9:

mov  eax,edx ; same effect as xor eax,eax/shl edx 1/add eax,edx
shl  edx,3 ; x*2^3
add  eax,edx

; 2 .

, LEA (load effective address) . LEA, , CPU.

LEA " ". t = 2 ^ k * x + y k = 1,2,3 (. Intel) t, x y . x == y, 1,2,3,4,5,8,9 x, x y , (, t), . , 9 :

lea  eax,[edx*8+edx]  ; takes 1 clock

LEA , :

lea  eax,[edx*4+edx] ; 5 * edx
lea  eax,[eax*2+edx] ; 11 * edx
lea  eax,[eax*4] ; 44 * edx

/, 1,2,3,4,5,8 9. , , 3-4 .

(, SHL/SUB/NEG/MOV) , LEA . 31:

lea  eax,[4*edx]
lea  eax,[8*eax]  ; 32*edx
sub  eax,edx; 31*edx ; 3 clocks

LEA :

lea  eax,[edx*4+edx]
lea  eax,[edx*2+eax] ; eax*7
lea  eax,[eax*2+edx] ; eax*15
lea  eax,[eax*2+edx] ; eax*31 ; 4 clocks

, .

LEA, SHL, SUB, NEG, MOV - case zero clocks, , exeuction . , . , , ( 5 ), , , ( ).

PARLANSE, A [i ], . , , ; , , .

, 1 10000. 5-6 . , PARLANSE .

+9

, add , a mul. , add :

Multiply by 2:
    add eax,eax          ; x2
Multiply by 4:
    add eax,eax          ; x2
    add eax,eax          ; x4
Multiply by 8:
    add eax,eax          ; x2
    add eax,eax          ; x4
    add eax,eax          ; x8

. , . , , , . -, , 6502, Zilog z80 RCA1802: -)

-, :

Multiply by 9:
    push ebx              ; preserve
    push eax              ; save for later
    add  eax,eax          ; x2
    add  eax,eax          ; x4
    add  eax,eax          ; x8
    pop  ebx              ; get original eax into ebx
    add  eax,ebx          ; x9
    pop  ebx              ; recover original ebx

, . , , . , "" , .

, , , , . .


add , , ax bx ax. .

START:  MOV    AX, 0007    ; Load up registers
        MOV    BX, 0005
        CALL   MULT        ; Call multiply function.
        HLT                ; Stop.

MULT:   PUSH   BX          ; Preserve BX, CX, DX.
        PUSH   CX
        PUSH   DX

        XOR    CX,CX       ; CX is the accumulator.

        CMP    BX, 0       ; If multiplying by zero, just stop.
        JZ     FIN

MORE:   PUSH   BX          ; Xfer BX to DX for bit check.
        POP    DX

        AND    DX, 0001    ; Is lowest bit 1?
        JZ     NOADD       ; No, do not add.
        ADD    CX,AX

NOADD:  SHL    AX,1        ; Shift AX left (double).
        SHR    BX,1        ; Shift BX right (integer halve, next bit).
        JNZ    MORE        ; Keep going until no more bits in BX.

FIN:    PUSH   CX          ; Xfer product from CX to AX.
        POP    AX

        POP    DX          ; Restore registers and return.
        POP    CX
        POP    BX
        RET

, 123, 456, :

    123 x 6
+  1230 x 5
+ 12300 x 4

, / . , ( , ).

x86 (8086, DEBUG - , XP), , . - : -)

+4

, - MUL - , !

, , (, ), ( ) SHL ADD - :

1280 x 1024 - 1280 .

1280 = 1024 + 256 = 2 ^ 10 + 2 ^ 8

y * 1280 = y * (2 ^ 10) + y * (2 ^ 8) = ADD (SHL y, 10), (SHL y, 8)

... , , , , .

0

All Articles