How hard is this to do? You can configure it to copy a 4-byte word at a time, which may be slightly faster on some 32-bit systems:
void fast_unpack(char* rgba, const char* rgb, const int count) {
if(count==0)
return;
for(int i=count; --i; rgba+=4, rgb+=3) {
*(uint32_t*)(void*)rgba = *(const uint32_t*)(const void*)rgb;
}
for(int j=0; j<3; ++j) {
rgba[j] = rgb[j];
}
}
- , rgb . , SSE, 4 . , , , FP, . , , , , , , , .
, , , x86. :
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <time.h>
void fast_unpack(char* rgba, const char* rgb, const int count) {
if(count==0)
return;
for(int i=count; --i; rgba+=4, rgb+=3) {
*(uint32_t*)(void*)rgba = *(const uint32_t*)(const void*)rgb;
}
for(int j=0; j<3; ++j) {
rgba[j] = rgb[j];
}
}
void simple_unpack(char* rgba, const char* rgb, const int count) {
for(int i=0; i<count; ++i) {
for(int j=0; j<3; ++j) {
rgba[j] = rgb[j];
}
rgba += 4;
rgb += 3;
}
}
int main() {
const int count = 512*512;
const int N = 10000;
char* src = (char*)malloc(count * 3);
char* dst = (char*)malloc(count * 4);
clock_t c0, c1;
double t;
printf("Image size = %d bytes\n", count);
printf("Number of iterations = %d\n", N);
printf("Testing simple unpack....");
c0 = clock();
for(int i=0; i<N; ++i) {
simple_unpack(dst, src, count);
}
c1 = clock();
printf("Done\n");
t = (double)(c1 - c0) / (double)CLOCKS_PER_SEC;
printf("Elapsed time: %lf\nAverage time: %lf\n", t, t/N);
printf("Testing tricky unpack....");
c0 = clock();
for(int i=0; i<N; ++i) {
fast_unpack(dst, src, count);
}
c1 = clock();
printf("Done\n");
t = (double)(c1 - c0) / (double)CLOCKS_PER_SEC;
printf("Elapsed time: %lf\nAverage time: %lf\n", t, t/N);
return 0;
}
( g++ -O3):
= 262144
= 10000
....
: 3.830000
: 0,000383
....
: 2.390000
: 0,000239
, , 40% .