Thanks to the hint of Xiaolei Zhu, now I know that gfortran will use smooth multiple additions for optimization sum(A*B). For example, using this code:
software test implicit
real, dimension (7) :: a, b
a = (/2.0, 3.0, 5.0, 7.0, 11.0, 13.0, 17.0 /)
b = (/4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0 /)
print *, sum (a * b)
endprogram
f95 sum.f95 -o sum -O3 -march=core-avx2, objdump -d sum | grep vfmadd
40088b: c4 e2 71 99 44 24 30 vfmadd132ss 0x30 (% rsp),% xmm1,% xmm0
400892: c4 e2 69 b9 44 24 34 vfmadd231ss 0x34 (% rsp),% xmm2,% xmm0
400899: c4 e2 61 b9 44 24 38 vfmadd231ss 0x38 (% rsp),% xmm3,% xmm0
4008a0: c4 e2 59 b9 44 24 3c vfmadd231ss 0x3c (% rsp),% xmm4,% xmm0
4008a7: c4 e2 51 b9 44 24 40 vfmadd231ss 0x40 (% rsp),% xmm5,% xmm0
4008ae: c4 e2 49 b9 44 24 44 vfmadd231ss 0x44 (% rsp),% xmm6,% xmm0
4008b5: c4 e2 41 b9 44 24 48 vfmadd231ss 0x48 (% rsp),% xmm7,% xmm0
, gfortran 7 . , , , , vfmadd231ss ( ).