GCC测试版本4.8.5
, 分别测试使用cc -O2 -g test.c
, cc -O3 -g test.c
以及 ARRAY_SIZE
分别define
为17
和18
.
/*test.c*/
/*
#define ARRAY_SIZE 17
*/
#define ARRAY_SIZE 18
struct data{
int array[ARRAY_SIZE];
};
int data_init(struct data *data)
{
int i;
for (i = 1; i < ARRAY_SIZE; i++){
data->array[i - 1] = i;
}
return 0;
}
int main (void)
{
char *buffer = (char *)malloc(sizeof(struct data) + 64);
int i;
for (i = 0; i < 2; i++) {
char* pos = buffer + i;
printf("befroe====%p\n", pos);
data_init((struct data *)pos);
printf("after====%p\n", pos);
}
free(buffer);
return 0;
}
Crash? | -O2 | -O3 |
17 | NO | NO |
18 | NO | YES |
可以看到, 当启用O3
优化, 且ARRAY_SIZE
大于等于18
时,程序会crash.
对比汇编代码可以看到
;启用`O3` `ARRAY_SIZE`等于`17` 时
; objdump -S a.out > size_17.txt
for (i = 1; i < ARRAY_SIZE; i++){
data->array[i - 1] = i;
40052b: c7 43 18 07 00 00 00 movl $0x7,0x18(%rbx)
400532: c7 43 1c 08 00 00 00 movl $0x8,0x1c(%rbx)
400539: 48 83 c3 01 add $0x1,%rbx
40053d: c7 43 1f 09 00 00 00 movl $0x9,0x1f(%rbx)
400544: c7 43 23 0a 00 00 00 movl $0xa,0x23(%rbx)
40054b: c7 43 27 0b 00 00 00 movl $0xb,0x27(%rbx)
400552: c7 43 2b 0c 00 00 00 movl $0xc,0x2b(%rbx)
400559: c7 43 2f 0d 00 00 00 movl $0xd,0x2f(%rbx)
400560: c7 43 33 0e 00 00 00 movl $0xe,0x33(%rbx)
400567: c7 43 37 0f 00 00 00 movl $0xf,0x37(%rbx)
40056e: c7 43 3b 10 00 00 00 movl $0x10,0x3b(%rbx)
;启用`O3` `ARRAY_SIZE`等于`18` 时
; objdump -S a.out > size_18.txt
for (i = 1; i < ARRAY_SIZE; i++){
40053e: b9 04 00 00 00 mov $0x4,%ecx
400543: 44 8d 51 02 lea 0x2(%rcx),%r10d
400547: 44 8d 49 01 lea 0x1(%rcx),%r9d
40054b: 44 8d 59 03 lea 0x3(%rcx),%r11d
40054f: b8 11 00 00 00 mov $0x11,%eax
400554: 44 89 54 24 08 mov %r10d,0x8(%rsp)
400559: 44 89 4c 24 04 mov %r9d,0x4(%rsp)
40055e: 29 d0 sub %edx,%eax
400560: 66 0f 6e 4c 24 08 movd 0x8(%rsp),%xmm1
400566: 44 89 5c 24 0c mov %r11d,0xc(%rsp)
40056b: 89 d2 mov %edx,%edx
40056d: 89 4c 24 08 mov %ecx,0x8(%rsp)
400571: 66 0f 6e 54 24 0c movd 0xc(%rsp),%xmm2
400577: 48 8d 14 93 lea (%rbx,%rdx,4),%rdx
40057b: 66 0f 6e 44 24 08 movd 0x8(%rsp),%xmm0
400581: 89 c6 mov %eax,%esi
400583: 66 0f 6e 5c 24 04 movd 0x4(%rsp),%xmm3
400589: 66 0f 62 ca punpckldq %xmm2,%xmm1
40058d: c1 ee 02 shr $0x2,%esi
400590: 66 0f 62 c3 punpckldq %xmm3,%xmm0
400594: 83 fe 04 cmp $0x4,%esi
400597: 8d 3c b5 00 00 00 00 lea 0x0(,%rsi,4),%edi
40059e: 66 0f 6c c1 punpcklqdq %xmm1,%xmm0
4005a2: 66 0f 6f 0d a6 03 00 movdqa 0x3a6(%rip),%xmm1 # 400950 <__dso_handle+0x28>
4005a9: 00
data->array[i - 1] = i;
4005aa: 66 0f 7f 02 movdqa %xmm0,(%rdx)
4005ae: 66 0f fe c8 paddd %xmm0,%xmm1
4005b2: 66 0f 7f 4a 10 movdqa %xmm1,0x10(%rdx)
4005b7: 66 0f 6f 05 91 03 00 movdqa 0x391(%rip),%xmm0 # 400950 <__dso_handle+0x28>
4005be: 00
4005bf: 66 0f fe c1 paddd %xmm1,%xmm0
4005c3: 66 0f 6f 0d 85 03 00 movdqa 0x385(%rip),%xmm1 # 400950 <__dso_handle+0x28>
4005ca: 00
4005cb: 66 0f 7f 42 20 movdqa %xmm0,0x20(%rdx)
4005d0: 66 0f fe c8 paddd %xmm0,%xmm1
4005d4: 75 05 jne 4005db <main+0x10b>
4005d6: 66 0f 7f 4a 30 movdqa %xmm1,0x30(%rdx)
4005db: 01 f9 add %edi,%ecx
4005dd: 41 29 f8 sub %edi,%r8d
4005e0: 39 f8 cmp %edi,%eax
4005e2: 74 25 je 400609 <main+0x139>
4005e4: 8d 41 ff lea -0x1(%rcx),%eax
主要区别是使用汇编指令进行了优化movdqa
, 文档中写道该指令需要16字节对齐the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.
参考文档:A bug story: data alignment on x86