Back to Top

一个O3带来的数据对齐问题

GCC测试版本4.8.5, 分别测试使用cc -O2 -g test.c, cc -O3 -g test.c 以及 ARRAY_SIZE 分别define1718.

/*test.c*/
/*
#define ARRAY_SIZE 17
*/
#define ARRAY_SIZE 18

struct data{
   int array[ARRAY_SIZE];
};

int data_init(struct data *data)
{
    int i;
    for (i = 1; i < ARRAY_SIZE; i++){
        data->array[i - 1] = i;
    }
    return 0;
}

int main (void)
{
    char *buffer = (char *)malloc(sizeof(struct data) + 64);
    int i;
    for (i = 0; i < 2; i++) {
        char* pos = buffer + i;
        printf("befroe====%p\n", pos);
        data_init((struct data *)pos);
        printf("after====%p\n", pos);
    }
    free(buffer);
    return 0;
}
Crash? -O2 -O3
17 NO NO
18 NO YES

可以看到, 当启用O3优化, 且ARRAY_SIZE大于等于18时,程序会crash.

对比汇编代码可以看到

;启用`O3` `ARRAY_SIZE`等于`17` 时
; objdump -S a.out > size_17.txt
    for (i = 1; i < ARRAY_SIZE; i++){
        data->array[i - 1] = i;
  40052b:	c7 43 18 07 00 00 00 	movl   $0x7,0x18(%rbx)
  400532:	c7 43 1c 08 00 00 00 	movl   $0x8,0x1c(%rbx)
  400539:	48 83 c3 01          	add    $0x1,%rbx
  40053d:	c7 43 1f 09 00 00 00 	movl   $0x9,0x1f(%rbx)
  400544:	c7 43 23 0a 00 00 00 	movl   $0xa,0x23(%rbx)
  40054b:	c7 43 27 0b 00 00 00 	movl   $0xb,0x27(%rbx)
  400552:	c7 43 2b 0c 00 00 00 	movl   $0xc,0x2b(%rbx)
  400559:	c7 43 2f 0d 00 00 00 	movl   $0xd,0x2f(%rbx)
  400560:	c7 43 33 0e 00 00 00 	movl   $0xe,0x33(%rbx)
  400567:	c7 43 37 0f 00 00 00 	movl   $0xf,0x37(%rbx)
  40056e:	c7 43 3b 10 00 00 00 	movl   $0x10,0x3b(%rbx)
;启用`O3` `ARRAY_SIZE`等于`18` 时
; objdump -S a.out > size_18.txt
    for (i = 1; i < ARRAY_SIZE; i++){
  40053e:	b9 04 00 00 00       	mov    $0x4,%ecx
  400543:	44 8d 51 02          	lea    0x2(%rcx),%r10d
  400547:	44 8d 49 01          	lea    0x1(%rcx),%r9d
  40054b:	44 8d 59 03          	lea    0x3(%rcx),%r11d
  40054f:	b8 11 00 00 00       	mov    $0x11,%eax
  400554:	44 89 54 24 08       	mov    %r10d,0x8(%rsp)
  400559:	44 89 4c 24 04       	mov    %r9d,0x4(%rsp)
  40055e:	29 d0                	sub    %edx,%eax
  400560:	66 0f 6e 4c 24 08    	movd   0x8(%rsp),%xmm1
  400566:	44 89 5c 24 0c       	mov    %r11d,0xc(%rsp)
  40056b:	89 d2                	mov    %edx,%edx
  40056d:	89 4c 24 08          	mov    %ecx,0x8(%rsp)
  400571:	66 0f 6e 54 24 0c    	movd   0xc(%rsp),%xmm2
  400577:	48 8d 14 93          	lea    (%rbx,%rdx,4),%rdx
  40057b:	66 0f 6e 44 24 08    	movd   0x8(%rsp),%xmm0
  400581:	89 c6                	mov    %eax,%esi
  400583:	66 0f 6e 5c 24 04    	movd   0x4(%rsp),%xmm3
  400589:	66 0f 62 ca          	punpckldq %xmm2,%xmm1
  40058d:	c1 ee 02             	shr    $0x2,%esi
  400590:	66 0f 62 c3          	punpckldq %xmm3,%xmm0
  400594:	83 fe 04             	cmp    $0x4,%esi
  400597:	8d 3c b5 00 00 00 00 	lea    0x0(,%rsi,4),%edi
  40059e:	66 0f 6c c1          	punpcklqdq %xmm1,%xmm0
  4005a2:	66 0f 6f 0d a6 03 00 	movdqa 0x3a6(%rip),%xmm1        # 400950 <__dso_handle+0x28>
  4005a9:	00 
        data->array[i - 1] = i;
  4005aa:	66 0f 7f 02          	movdqa %xmm0,(%rdx)
  4005ae:	66 0f fe c8          	paddd  %xmm0,%xmm1
  4005b2:	66 0f 7f 4a 10       	movdqa %xmm1,0x10(%rdx)
  4005b7:	66 0f 6f 05 91 03 00 	movdqa 0x391(%rip),%xmm0        # 400950 <__dso_handle+0x28>
  4005be:	00 
  4005bf:	66 0f fe c1          	paddd  %xmm1,%xmm0
  4005c3:	66 0f 6f 0d 85 03 00 	movdqa 0x385(%rip),%xmm1        # 400950 <__dso_handle+0x28>
  4005ca:	00 
  4005cb:	66 0f 7f 42 20       	movdqa %xmm0,0x20(%rdx)
  4005d0:	66 0f fe c8          	paddd  %xmm0,%xmm1
  4005d4:	75 05                	jne    4005db <main+0x10b>
  4005d6:	66 0f 7f 4a 30       	movdqa %xmm1,0x30(%rdx)
  4005db:	01 f9                	add    %edi,%ecx
  4005dd:	41 29 f8             	sub    %edi,%r8d
  4005e0:	39 f8                	cmp    %edi,%eax
  4005e2:	74 25                	je     400609 <main+0x139>
  4005e4:	8d 41 ff             	lea    -0x1(%rcx),%eax

主要区别是使用汇编指令进行了优化movdqa, 文档中写道该指令需要16字节对齐the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.

参考文档:A bug story: data alignment on x86