i have the following bit of inline ARM assembly, it works in a debug build but crashes in a release build of iphone sdk 3.1. The problem is the add instructions where i am incrementing the address of the C variables output and x by 4 bytes, this is supposed to increment by the size of a float. I think when i increment at some such stage i am overwriting something, can anyone say which is the best way to handle this
我有以下一些内联ARM组装,它在调试构建中工作,但在iphone sdk 3.1的发布构建中崩溃。问题是添加指令,我将C变量输出和x的地址增加4个字节,这应该增加一个浮点数的大小。我认为当我在这样的阶段增加的时候,我写了一些东西,谁能说这是最好的处理方法
Thanks
谢谢
C code that the asm is replacing, sum,output and x are all floats
C代码,asm正在替换,sum,输出和x都是浮点数。
for(int i = 0; i< count; i++)
sum+= output[i]* (*x++)
asm volatile(
".align 4 \n\t"
"mov r4,%3 \n\t"
"flds s0,[%0] \n\t"
"0: \n\t"
"flds s1,[%2] \n\t"
//"add %3,%3,#4 \n\t"
"flds s2,[%1] \n\t"
//"add %2,%2,#4 \n\t"
"subs r4,r4, #1 \n\t"
"fmacs s0, s1, s2 \n\t"
"bne 0b \n\t"
"fsts s0,[%0] \n\t"
:
: "r" (&sum), "r" (output), "r" (x),"r" (count)
: "r0","r4","cc", "memory",
"s0","s1","s2"
);
1 个解决方案
#1
0
did you mean %1 to add 4 to and %3? adding to %3 could cause damage if that register is used again after your function.
你是说%1加上4和%3吗?如果在您的函数之后再次使用该寄存器,则添加%3可能会导致损坏。
asm volatile( ".align 4 \n\t" "mov r4,%3 \n\t" "flds s0,[%0] \n\t" "0: \n\t" "flds s1,[%2] \n\t" "add %2,%2,#4 \n\t" "flds s2,[%1] \n\t" "add %1,%1,#4 \n\t" "subs r4,r4, #1 \n\t" "fmacs s0, s1, s2 \n\t" "bne 0b \n\t" "fsts s0,[%0] \n\t" : : "r" (&sum), "r" (output), "r" (x),"r" (count) : "r0","r4","cc", "memory", "s0","s1","s2" );
#1
0
did you mean %1 to add 4 to and %3? adding to %3 could cause damage if that register is used again after your function.
你是说%1加上4和%3吗?如果在您的函数之后再次使用该寄存器,则添加%3可能会导致损坏。
asm volatile( ".align 4 \n\t" "mov r4,%3 \n\t" "flds s0,[%0] \n\t" "0: \n\t" "flds s1,[%2] \n\t" "add %2,%2,#4 \n\t" "flds s2,[%1] \n\t" "add %1,%1,#4 \n\t" "subs r4,r4, #1 \n\t" "fmacs s0, s1, s2 \n\t" "bne 0b \n\t" "fsts s0,[%0] \n\t" : : "r" (&sum), "r" (output), "r" (x),"r" (count) : "r0","r4","cc", "memory", "s0","s1","s2" );