[arm-gnu] Optimization problems with ARM GCC 4.5.1 2010.09-50
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[arm-gnu] Optimization problems with ARM GCC 4.5.1 2010.09-50




Building the boot loader U-Boot for ARM with

gcc version 4.5.1 (Sourcery G++ Lite 2010.09-50)

we found that it optimizes away all HW accesses declared with 'volatile' (see test code [1]):

00000000 <main>:
   0:    e3a00000     mov    r0, #0
   4:    e12fff1e     bx    lr

Using gcc version 4.3.3 (Sourcery G++ Lite 2009q1-203) results in correct code, though:

00000000 <main>:
   0:    e59f300c     ldr    r3, [pc, #12]    ; 14 <main+0x14>
   4:    e5932028     ldr    r2, [r3, #40]    ; 0x28
   8:    e5930028     ldr    r0, [r3, #40]    ; 0x28
   c:    e0620000     rsb    r0, r2, r0
  10:    e12fff1e     bx    lr
  14:    48318000

We then found that adding an additional "__asm__ __volatile__ ("" : : : "memory")" results in correct code with 4.5.1, too (see the alternative readl(c) in the test code [1]).


While it first seemed that adding the "__asm__ __volatile__ ("" : : : "memory")" to all reads is the fix for the optimization issue we saw with 2010.09-50, we found an other problem:

In a routine accessing a NAND device, the compiler adds an additional "ldrb r3, [r3]" which breaks the access to the NAND device (the HW forbids reading in this state):

static void omap_nand_hwcontrol(struct mtd_info *mtd, int32_t cmd,
    uint32_t ctrl)
{
 register struct nand_chip *this = mtd->priv;

...

 if (cmd != -1)
({ __asm__ __volatile__ ("" : : : "memory"); (*(volatile unsigned char *)(this->IO_ADDR_W) = (cmd)); });
}

results in

    if (cmd != NAND_CMD_NONE)
  84:    e3710001     cmn    r1, #1
  88:    012fff1e     bxeq    lr
        writeb(cmd, this->IO_ADDR_W);
  8c:    e5933004     ldr    r3, [r3, #4]
  90:    e20110ff     and    r1, r1, #255    ; 0xff
  94:    e5c31000     strb    r1, [r3]
  98:    e5d33000     ldrb    r3, [r3]
  9c:    e12fff1e     bx    lr

Please note the additional ldrb in line 98. Replacing this manually with a nop in the binary everything works fine, then. We think that there is no reason for the compiler to add the additional ldrb and it seems to be an compiler issue for us. The compiler options for this are [2].


Result are two questions:

1) Why does the compiler gcc version 4.5.1 (Sourcery G++ Lite 2010.09-50) optimizes away HW read statements declared with 'volatile'? While older compilers do this fine? Is there an other fix to stop 4.5.1 from optimizing away volatiles?

2) Why does the gcc version 4.5.1 (Sourcery G++ Lite 2010.09-50) add an additional read access 'ldrb' if only a write strb is wanted and written in the C code?


Many thanks and best regards

Dirk


[1]

arm-none-linux-gnueabi-gcc -Wall -O2 -c foo.c -o foo.o
arm-none-linux-gnueabi-objdump -D foo.o > foo.dis

-- foo.c --
struct gptimer {
    unsigned int tidr;    /* 0x00 r */
    unsigned char res[0xc];
    unsigned int tiocp_cfg;    /* 0x10 rw */
    unsigned int tistat;    /* 0x14 r */
    unsigned int tisr;    /* 0x18 rw */
    unsigned int tier;    /* 0x1c rw */
    unsigned int twer;    /* 0x20 rw */
    unsigned int tclr;    /* 0x24 rw */
    unsigned int tcrr;    /* 0x28 rw */
    unsigned int tldr;    /* 0x2c rw */
    unsigned int ttgr;    /* 0x30 rw */
    unsigned int twpc;    /* 0x34 r*/
    unsigned int tmar;    /* 0x38 rw*/
    unsigned int tcar1;    /* 0x3c r */
    unsigned int tcicr;    /* 0x40 rw */
    unsigned int tcar2;    /* 0x44 r */
};


#define dmb()          __asm__ __volatile__ ("" : : : "memory")
#define __iormb()      dmb()

#define __arch_getl(a)      (*(volatile unsigned int *)(a))
#define readl(a)      __arch_getl(a)
//#define readl(c) ({ unsigned int __v = __arch_getl(c); __iormb(); __v; })

int main(void) {

  struct gptimer *gpt1_base = (struct gptimer *)0x48318000;
  unsigned int cdiff, cstart, cend;

  cstart = readl(&gpt1_base->tcrr);

  cend = readl(&gpt1_base->tcrr);

  cdiff = cend - cstart;

  return cdiff;

}
-- foo.c --

[2]

arm-none-linux-gnueabi-gcc -g -Os -fno-common -ffixed-r8 -msoft-float -fno-builtin -ffreestanding -nostdinc -pipe -marm -mabi=aapcs-linux -mno-thumb-interwork -march=armv5 -Wall -Wstrict-prototypes -fno-stack-protector -o omap_gpmc.o omap_gpmc.c -c