I have an ARM NEON Cortex-A8. I optimized my code using NEON. But when I compile my code, I get this strange error. I don’t know how to fix it.
I am trying to compile the following code (PART 1) using Code Sourcery (PART2) on my host. And I get this strange error (PART 3). Am I something wrong here? Can anyone else compile this and see if they also get the same compilation error?
The strange part in the code, if I comment on the else if(step_size == 4) code part of the code, then the error will disappear. But, unfortunately, my optimization is not complete, so I must have it.
At first, I thought it was a problem with the CodeSourcey compiler (on my host), so I compiled the program directly in my target program (my target runs on Ubuntu). I used gcc there and again, I get the same error, and when I comment on the else if(step_size == 4) part else if(step_size == 4) , then the error disappears.
Help!
PART 1
#include<stdio.h> #include"arm_neon.h" #define IMAGE_HEIGHT 480 #define IMAGE_WIDTH 640 float32_t integral_image[IMAGE_HEIGHT][IMAGE_WIDTH]; float32x4_t box_area_compute3(int, int , int , int , unsigned int , float); inline int min(int, int); int main() { box_area_compute3(1, 1, 4, 4, 2, 0); return 0; } float32x4_t box_area_compute3(int row, int col, int num_rows, int num_cols, unsigned int step_size, float three) { unsigned int height = IMAGE_HEIGHT; unsigned int width = IMAGE_WIDTH; int temp_row = row + num_rows; int temp_col = col + num_cols; int r1 = (min(row, height))- 1 ; int r2 = (min(temp_row, height)) - 1; int c1 = (min(col, width)) - 1; int c2 = (min(temp_col, width)) - 1; float32x4_t v128_areas; if(step_size == 2) { float32x4x2_t top_left, top_right, bottom_left, bottom_right; top_left = vld2q_f32((float32_t *)integral_image[r1] + c1); top_right = vld2q_f32((float32_t *)integral_image[r1] + c2); bottom_left = vld2q_f32((float32_t *)integral_image[r2] + c1); bottom_right = vld2q_f32((float32_t *)integral_image[r2] + c2); v128_areas = vsubq_f32(vsubq_f32(vaddq_f32(top_left.val[0], bottom_right.val[0]), top_right.val[0]), bottom_left.val[0]); } else if(step_size == 4) { float32x4x4_t top_left, top_right, bottom_left, bottom_right; top_left = vld4q_f32((float32_t *)integral_image[r1] + c1); top_right = vld4q_f32((float32_t *)integral_image[r1] + c2); bottom_left = vld4q_f32((float32_t *)integral_image[r2] + c1); bottom_right = vld4q_f32((float32_t *)integral_image[r2] + c2); v128_areas = vsubq_f32(vsubq_f32(vaddq_f32(top_left.val[0], bottom_right.val[0]), top_right.val[0]), bottom_left.val[0]); } if(three == 3.0) v128_areas = vmulq_n_f32(v128_areas, three); return v128_areas; } inline int min(int X, int Y) { return (X < Y ? X : Y); }
PART 2
arm-none-linux-gnueabi-gcc -O0 -g3 -Wall -c -fmessage-length=0 -fcommon -MMD -MP -MF"main.d" -MT"main.d" -mcpu=cortex-a8 -marm -mfloat-abi=hard -mfpu=neon-vfpv4 -o"main.o" "../main.c"
PART 3
../main.c: In function 'box_area_compute3': ../main.c:65: error: unable to find a register to spill in class 'GENERAL_REGS' ../main.c:65: error: this is the insn: (insn 226 225 227 5 c:\program files\codesourcery\sourcery g++\bin\../lib/gcc/arm-none-linux-gnueabi/4.4.1/include/arm_neon.h:9863 (parallel [ (set (reg:XI 148 [ D.17028 ]) (unspec:XI [ (mem:XI (reg:SI 3 r3 [301]) [0 S64 A64]) (reg:XI 148 [ D.17028 ]) (unspec:V4SF [ (const_int 0 [0x0]) ] 191) ] 111)) (set (reg:SI 3 r3 [301]) (plus:SI (reg:SI 3 r3 [301]) (const_int 32 [0x20]))) ]) 1605 {neon_vld4qav4sf} (nil)) ../main.c:65: confused by earlier errors, bailing out cs-make: *** [main.o] Error 1