Actions
|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Toolchain, asm parameters numbering and optimisation flags
Hello, I'm just trying to optimize the speex codec for the coldfire CPU. Using the CodeSourcery toolchain, I have strange behaviour (I think) of GCC. I've attached the C source code, then the results of : m68k-uclinux-gcc -Wa,--register-prefix-optional -Wa,-memac -Wa,-mcpu=5208 -mcpu=5208 -O2 -S -o test.O2.s test.c and m68k-uclinux-gcc -Wa,--register-prefix-optional -Wa,-memac -Wa,-mcpu=5208 -mcpu=5208 -O0 -S -o test.O0.s test.c For the O2 flag, only the D0 register is used, mixing it as a temporary data holder, and as a results holder. For the O0 flag, as you can see line 37 of test.O0.s it is used as the "len" parameter and as the "sum" result variable. I'm really lost. I have only ARM inline asm experience. I suspect it's a very simple problem, but I can't, after days of search, find a solution nor an explication Does someone hav an hint? please :-) Thanx in advance 1 unsigned int inner_prod(const unsigned int *x, const unsigned int *y, int len)
2 {
3 unsigned int sum, t1, t2, t3;
4 __asm__ (
5 "asr.l #2, %3;\n"
6 "\tbeq 1f;\n"
7 "\tmove.l #0, %%d3;\n"
8 "\tmove.l %%d3, %%macsr;\n"
9
10 "\t0:;\n"
11 "\tmove.l #0, %%acc0;\n"
12
13 "\tmove.w (%1)+, %4;\n"
14 "\tmove.w (%2)+, %5;\n"
15 "\tmac.l %4, %5, %%acc0;\n"
16 "\tmove.w (%1)+, %4;\n"
17 "\tmove.w (%2)+, %5;\n"
18 "\tmac.l %4, %5, %%acc0;\n"
19 "\tmove.w (%1)+, %4;\n"
20 "\tmove.w (%2)+, %5;\n"
21 "\tmac.l %4, %5, %%acc0;\n"
22 "\tmove.w (%1)+, %4;\n"
23 "\tmove.w (%2)+, %5;\n"
24 "\tmac.l %4, %5, %%acc0;\n"
25
26 "\tmove.l %%acc0, %6;\n"
27 "\tasr.l #6, %6;\n"
28
29 "\tadd.l %6, %0;\n"
30
31 "\tsub.l #1, %3;\n"
32 "\tbne 0b;\n"
33
34 "\t1:;\n"
35 : "=d" (sum)
36 : "a"(x), "a"(y), "d"(len), "d"(t1), "d"(t2), "d"(t3)
37 :"cc"
38 );
39 return sum;
40 }
41
42
43 int main() {
44 unsigned int tab1[10];
45 unsigned int tab2[10];
46 unsigned int r;
47 r = inner_prod(tab1,tab2,2);
48 }Attachment:
test.O0.s Attachment:
test.O2.s
|
|||||||
|
||||||||