首页 > 代码库 > 原子变量的性能问题

原子变量的性能问题

#include <stdio.h>#include <sys/time.h>int main(){    volatile int m;    struct timeval start;    gettimeofday(&start, NULL);    for (int i = 0; i < 1000000; i++) {        m++;    }    struct timeval end;    gettimeofday(&end, NULL);    printf("add cost %lldus\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));    int n;    gettimeofday(&start, NULL);    for (int i = 0; i < 1000000; i++) {        __sync_fetch_and_add(&n, 1);    }    gettimeofday(&end, NULL);    printf("atomic cost %lldus\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));    return 0;}

之所以用volatile修饰m是拒绝编译器对m++做优化。

使用O2编译并查看性能:

$gcc -O2 -std=c99 -o perf atomic_perf.c$./perfadd cost 2638usatomic cost 8510us

  可见如果你的变量压根不会被多线程访问,并且对性能极度苛刻的话,还是不要用原子变量了吧。因为在有些平台上“A full memory barrier is created when this function is invoked”。

 

可以通过下面的方法看到m++和原子操作的汇编之间的区别:

$gcc -O2 -std=c99 -g -c atomic_perf.c$objdump -Sl atomic_perf.oatomic_perf.o:     file format elf64-x86-64Disassembly of section .text:0000000000000000 <main>:main():/home/admin/jinxin/test/atomic_perf.c:5#include <stdio.h>#include <sys/time.h>int main(){   0: 55                    push   %rbp/home/admin/jinxin/test/atomic_perf.c:9    volatile int m;    struct timeval start;    gettimeofday(&start, NULL);   1: 31 f6                 xor    %esi,%esi/home/admin/jinxin/test/atomic_perf.c:5   3: 53                    push   %rbx   4: 48 83 ec 38           sub    $0x38,%rsp/home/admin/jinxin/test/atomic_perf.c:9   8: 48 8d 6c 24 10        lea    0x10(%rsp),%rbp   d: 48 89 ef              mov    %rbp,%rdi  10: e8 00 00 00 00        callq  15 <main+0x15>  15: 31 d2                 xor    %edx,%edx/home/admin/jinxin/test/atomic_perf.c:11    for (int i = 0; i < 1000000; i++) {        m++;  17: 8b 44 24 2c           mov    0x2c(%rsp),%eax/home/admin/jinxin/test/atomic_perf.c:10  1b: 83 c2 01              add    $0x1,%edx/home/admin/jinxin/test/atomic_perf.c:11  1e: 83 c0 01              add    $0x1,%eax/home/admin/jinxin/test/atomic_perf.c:10  21: 81 fa 40 42 0f 00     cmp    $0xf4240,%edx/home/admin/jinxin/test/atomic_perf.c:11  27: 89 44 24 2c           mov    %eax,0x2c(%rsp)/home/admin/jinxin/test/atomic_perf.c:10  2b: 75 ea                 jne    17 <main+0x17>/home/admin/jinxin/test/atomic_perf.c:14    }    struct timeval end;    gettimeofday(&end, NULL);  2d: 31 f6                 xor    %esi,%esi  2f: 48 89 e7              mov    %rsp,%rdi  32: e8 00 00 00 00        callq  37 <main+0x37>/home/admin/jinxin/test/atomic_perf.c:16    printf("add cost %lldus\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));  37: 48 8b 04 24           mov    (%rsp),%rax  3b: 48 2b 44 24 10        sub    0x10(%rsp),%rax  40: bf 00 00 00 00        mov    $0x0,%edi  45: 48 8b 74 24 08        mov    0x8(%rsp),%rsi  4a: 48 2b 74 24 18        sub    0x18(%rsp),%rsi  4f: 48 69 c0 40 42 0f 00  imul   $0xf4240,%rax,%rax  56: 48 01 c6              add    %rax,%rsi  59: 31 c0                 xor    %eax,%eax  5b: e8 00 00 00 00        callq  60 <main+0x60>/home/admin/jinxin/test/atomic_perf.c:19    int n;    gettimeofday(&start, NULL);  60: 31 f6                 xor    %esi,%esi  62: 48 89 ef              mov    %rbp,%rdi  65: e8 00 00 00 00        callq  6a <main+0x6a>  6a: 48 8d 54 24 28        lea    0x28(%rsp),%rdx  6f: 31 c0                 xor    %eax,%eax/home/admin/jinxin/test/atomic_perf.c:21    for (int i = 0; i < 1000000; i++) {        __sync_fetch_and_add(&n, 1);  71: f0 83 02 01           lock addl $0x1,(%rdx)/home/admin/jinxin/test/atomic_perf.c:20  75: 83 c0 01              add    $0x1,%eax  78: 3d 40 42 0f 00        cmp    $0xf4240,%eax  7d: 75 f2                 jne    71 <main+0x71>/home/admin/jinxin/test/atomic_perf.c:23    }    gettimeofday(&end, NULL);  7f: 48 89 e7              mov    %rsp,%rdi  82: 31 f6                 xor    %esi,%esi  84: e8 00 00 00 00        callq  89 <main+0x89>/home/admin/jinxin/test/atomic_perf.c:24    printf("atomic cost %lldus\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));  89: 48 8b 04 24           mov    (%rsp),%rax  8d: 48 2b 44 24 10        sub    0x10(%rsp),%rax  92: bf 00 00 00 00        mov    $0x0,%edi  97: 48 8b 74 24 08        mov    0x8(%rsp),%rsi  9c: 48 2b 74 24 18        sub    0x18(%rsp),%rsi  a1: 48 69 c0 40 42 0f 00  imul   $0xf4240,%rax,%rax  a8: 48 01 c6              add    %rax,%rsi  ab: 31 c0                 xor    %eax,%eax  ad: e8 00 00 00 00        callq  b2 <main+0xb2>/home/admin/jinxin/test/atomic_perf.c:27    return 0;}  b2: 48 83 c4 38           add    $0x38,%rsp  b6: 31 c0                 xor    %eax,%eax  b8: 5b                    pop    %rbx  b9: 5d                    pop    %rbp  ba: c3                    retq???

  

原子变量的性能问题