Beware of wasting a lot of time thinking about the optimization details that the compiler will simply take care of you.
Here are four implementations of what I understand as OP, as well as code generated using gcc 4.8 with --std=c++11 -O3 -S
Ads:
#include <algorithm> #include <vector> struct T { int irrelevant; int relevant; double trailing; };
Explicit loop implementations, roughly from the answers and comments provided by the OP. Both produce identical machine code besides labels.
.cfi_startproc movq (%rdi), %rsi void clear_relevant(std::vector<T>* vecp) { movq 8(%rdi), %rcx for(unsigned i=0; i<vecp->size(); i++) { xorl %edx, %edx vecp->at(i).relevant = 0; xorl %eax, %eax } subq %rsi, %rcx } sarq $4, %rcx testq %rcx, %rcx je .L1 .p2align 4,,10 .p2align 3 .L5: void clear_relevant2(std::vector<T>* vecp) { salq $4, %rdx std::vector<T>& vec = *vecp; addl $1, %eax auto s = vec.size(); movl $0, 4(%rsi,%rdx) for (unsigned i = 0; i < s; ++i) { movl %eax, %edx vec[i].relevant = 0; cmpq %rcx, %rdx } jb .L5 } .L1: rep ret .cfi_endproc
Two other versions, one using std::for_each and the other using the syntax for range. There is a slight difference in the code for the two versions (except for the labels):
.cfi_startproc movq 8(%rdi), %rdx movq (%rdi), %rax cmpq %rax, %rdx je .L17 void clear_relevant3(std::vector<T>* vecp) { .p2align 4,,10 for (auto& p : *vecp) p.relevant = 0; .p2align 3 } .L21: movl $0, 4(%rax) addq $16, %rax cmpq %rax, %rdx jne .L21 .L17: rep ret .cfi_endproc .cfi_startproc movq 8(%rdi), %rdx movq (%rdi), %rax cmpq %rdx, %rax void clear_relevant4(std::vector<T>* vecp) { je .L12 std::for_each(vecp->begin(), vecp->end(), .p2align 4,,10 [](T& o){o.relevant=0;}); .p2align 3 } .L16: movl $0, 4(%rax) addq $16, %rax cmpq %rax, %rdx jne .L16 .L12: rep ret .cfi_endproc