首页 > 代码库 > 《coredump问题原理探究》Linux x86版6.4节虚函数
《coredump问题原理探究》Linux x86版6.4节虚函数
在上一节已经探究了类的成员变量的排列,现在看一下虚函数表和成员变量的排列及虚函数之间的排列.
先看一个例子:
1 #include <stdio.h> 2 class xuzhina_dump_c06_s3 3 { 4 private: 5 int m_a; 6 public: 7 xuzhina_dump_c06_s3() { m_a = 0; } 8 virtual void inc() { m_a++; } 9 virtual void dec() { m_a--; } 10 virtual void print() 11 { 12 printf( "%d\n", m_a ); 13 } 14 }; 15 16 int main() 17 { 18 xuzhina_dump_c06_s3* test = new xuzhina_dump_c06_s3; 19 if ( test != NULL ) 20 { 21 test->inc(); 22 test->inc(); 23 test->print(); 24 } 25 return 0; 26 }
汇编代码:
(gdb) disassemble main Dump of assembler code for function main: 0x08048560 <+0>: push %ebp 0x08048561 <+1>: mov %esp,%ebp 0x08048563 <+3>: push %ebx 0x08048564 <+4>: and $0xfffffff0,%esp 0x08048567 <+7>: sub $0x20,%esp 0x0804856a <+10>: movl $0x8,(%esp) 0x08048571 <+17>: call 0x8048450 <_Znwj@plt> 0x08048576 <+22>: mov %eax,%ebx 0x08048578 <+24>: mov %ebx,(%esp) 0x0804857b <+27>: call 0x80485cc <_ZN19xuzhina_dump_c06_s3C2Ev> 0x08048580 <+32>: mov %ebx,0x1c(%esp) 0x08048584 <+36>: cmpl $0x0,0x1c(%esp) 0x08048589 <+41>: je 0x80485c1 <main+97> 0x0804858b <+43>: mov 0x1c(%esp),%eax 0x0804858f <+47>: mov (%eax),%eax 0x08048591 <+49>: mov (%eax),%eax 0x08048593 <+51>: mov 0x1c(%esp),%edx 0x08048597 <+55>: mov %edx,(%esp) 0x0804859a <+58>: call *%eax 0x0804859c <+60>: mov 0x1c(%esp),%eax 0x080485a0 <+64>: mov (%eax),%eax 0x080485a2 <+66>: mov (%eax),%eax 0x080485a4 <+68>: mov 0x1c(%esp),%edx 0x080485a8 <+72>: mov %edx,(%esp) 0x080485ab <+75>: call *%eax 0x080485ad <+77>: mov 0x1c(%esp),%eax 0x080485b1 <+81>: mov (%eax),%eax 0x080485b3 <+83>: add $0x8,%eax 0x080485b6 <+86>: mov (%eax),%eax 0x080485b8 <+88>: mov 0x1c(%esp),%edx 0x080485bc <+92>: mov %edx,(%esp) 0x080485bf <+95>: call *%eax 0x080485c1 <+97>: mov $0x0,%eax 0x080485c6 <+102>: mov -0x4(%ebp),%ebx 0x080485c9 <+105>: leave 0x080485ca <+106>: ret End of assembler dump.
由上面代码可知,执行完了构造函数,test的m_a的值,会变成0。且由上面汇编可以看到this指针在调用构造函数前后,是放在ebx寄存器。
在0x08048578,0x08048580都打断点,看一下this指针所指向的地址是不是这样的结果。
(gdb) tbreak *0x08048578 Temporary breakpoint 1 at 0x8048578 (gdb) tbreak *0x08048580 Temporary breakpoint 2 at 0x8048580 (gdb) r Starting program: /home/buckxu/work/6/3/xuzhina_dump_c6_s3 Temporary breakpoint 1, 0x08048578 in main () (gdb) x /4x $ebx 0x804a008: 0x00000000 0x00000000 0x00000000 0x00020ff1 (gdb) c Continuing. Temporary breakpoint 2, 0x08048580 in main () (gdb) x /4x $ebx 0x804a008: 0x080486d0 0x00000000 0x00000000 0x00020ff1
非常奇怪,按照上一节的内容,地址0x804a008应该存放m_a,会初始化为0.究竟类xuzhina_dump_c06_s3的构造函数做了什么事情?而0x080486d0是什么东西来的?
看一下类xuzhina_dump_c06_s3的构造函数:
(gdb) disassemble _ZN19xuzhina_dump_c06_s3C2Ev Dump of assembler code for function _ZN19xuzhina_dump_c06_s3C2Ev: 0x080485cc <+0>: push %ebp 0x080485cd <+1>: mov %esp,%ebp 0x080485cf <+3>: mov 0x8(%ebp),%eax 0x080485d2 <+6>: movl $0x80486d0,(%eax) 0x080485d8 <+12>: mov 0x8(%ebp),%eax 0x080485db <+15>: movl $0x0,0x4(%eax) 0x080485e2 <+22>: pop %ebp 0x080485e3 <+23>: ret End of assembler dump.
由构造函数的汇编可知,0x80486d0这个值是在构造函数设置,但还不清楚是什么东西。而
0x080485d8 <+12>: mov 0x8(%ebp),%eax 0x080485db <+15>: movl $0x0,0x4(%eax)
却刚好对应了
7 xuzhina_dump_c06_s3() { m_a = 0; }
也就是说,类xuzhina_dump_c06_s3的第一个成员变量m_a放在偏移this指针的地方,那么0x80486d0是什么东西,占了m_a的位置呢?
重新看一下main函数的汇编:
(gdb) disassemble main Dump of assembler code for function main: 0x08048560 <+0>: push %ebp 0x08048561 <+1>: mov %esp,%ebp 0x08048563 <+3>: push %ebx 0x08048564 <+4>: and $0xfffffff0,%esp 0x08048567 <+7>: sub $0x20,%esp 0x0804856a <+10>: movl $0x8,(%esp) 0x08048571 <+17>: call 0x8048450 <_Znwj@plt> 0x08048576 <+22>: mov %eax,%ebx 0x08048578 <+24>: mov %ebx,(%esp) 0x0804857b <+27>: call 0x80485cc <_ZN19xuzhina_dump_c06_s3C2Ev> => 0x08048580 <+32>: mov %ebx,0x1c(%esp) 0x08048584 <+36>: cmpl $0x0,0x1c(%esp) 0x08048589 <+41>: je 0x80485c1 <main+97> 0x0804858b <+43>: mov 0x1c(%esp),%eax 0x0804858f <+47>: mov (%eax),%eax 0x08048591 <+49>: mov (%eax),%eax 0x08048593 <+51>: mov 0x1c(%esp),%edx 0x08048597 <+55>: mov %edx,(%esp) 0x0804859a <+58>: call *%eax 0x0804859c <+60>: mov 0x1c(%esp),%eax 0x080485a0 <+64>: mov (%eax),%eax 0x080485a2 <+66>: mov (%eax),%eax 0x080485a4 <+68>: mov 0x1c(%esp),%edx 0x080485a8 <+72>: mov %edx,(%esp) 0x080485ab <+75>: call *%eax 0x080485ad <+77>: mov 0x1c(%esp),%eax 0x080485b1 <+81>: mov (%eax),%eax 0x080485b3 <+83>: add $0x8,%eax 0x080485b6 <+86>: mov (%eax),%eax 0x080485b8 <+88>: mov 0x1c(%esp),%edx 0x080485bc <+92>: mov %edx,(%esp) 0x080485bf <+95>: call *%eax 0x080485c1 <+97>: mov $0x0,%eax 0x080485c6 <+102>: mov -0x4(%ebp),%ebx 0x080485c9 <+105>: leave 0x080485ca <+106>: ret End of assembler dump.
由
0x0804857b <+27>: call 0x80485cc <_ZN19xuzhina_dump_c06_s3C2Ev> 0x08048580 <+32>: mov %ebx,0x1c(%esp)
可知,esp+0x1c用来存放this指针。
再看一下这几段指令:
0x0804858b <+43>: mov 0x1c(%esp),%eax 0x0804858f <+47>: mov (%eax),%eax 0x08048591 <+49>: mov (%eax),%eax 0x08048593 <+51>: mov 0x1c(%esp),%edx 0x08048597 <+55>: mov %edx,(%esp) 0x0804859a <+58>: call *%eax 0x0804859c <+60>: mov 0x1c(%esp),%eax 0x080485a0 <+64>: mov (%eax),%eax 0x080485a2 <+66>: mov (%eax),%eax 0x080485a4 <+68>: mov 0x1c(%esp),%edx 0x080485a8 <+72>: mov %edx,(%esp) 0x080485ab <+75>: call *%eax 0x080485ad <+77>: mov 0x1c(%esp),%eax 0x080485b1 <+81>: mov (%eax),%eax 0x080485b3 <+83>: add $0x8,%eax 0x080485b6 <+86>: mov (%eax),%eax 0x080485b8 <+88>: mov 0x1c(%esp),%edx 0x080485bc <+92>: mov %edx,(%esp) 0x080485bf <+95>: call *%eax
由于是顺序结构,可知,这三段指令刚好对应
21 test->inc(); 22 test->inc(); 23 test->print();
分析一下第三段汇编:
0x080485ad <+77>: mov 0x1c(%esp),%eax 0x080485b1 <+81>: mov (%eax),%eax 0x080485b3 <+83>: add $0x8,%eax 0x080485b6 <+86>: mov (%eax),%eax 0x080485b8 <+88>: mov 0x1c(%esp),%edx 0x080485bc <+92>: mov %edx,(%esp) 0x080485bf <+95>: call *%eax
可见eax正好是放着print这个虚函数的指针。而这个指针最终是由esp+0x1c来取得。由
0x080485ad <+77>: mov 0x1c(%esp),%eax 0x080485b1 <+81>: mov (%eax),%eax
可知,正好是从this指针的第一个成员取出来的,也就是说,这个成员是虚函数表指针。根据上面的分析,可知这个虚函数表指针的值是0x80486d0。来验证一下,它是不是虚函数表指针。
(gdb) x /4x 0x80486d0 0x80486d0 <_ZTV19xuzhina_dump_c06_s3+8>: 0x080485e4 0x080485f8 0x0804860c 0x75783931 (gdb) shell c++filt _ZTV19xuzhina_dump_c06_s3 vtable for xuzhina_dump_c06_s3 (gdb) info symbol 0x080485e4 xuzhina_dump_c06_s3::inc() in section .text of /home/buckxu/work/6/3/xuzhina_dump_c6_s3 (gdb) info symbol 0x080485f8 xuzhina_dump_c06_s3::dec() in section .text of /home/buckxu/work/6/3/xuzhina_dump_c6_s3 (gdb) info symbol 0x0804860c xuzhina_dump_c06_s3::print() in section .text of /home/buckxu/work/6/3/xuzhina_dump_c6_s3
可见,0x80486d0所指向正是虚函数表,且里面的表项顺序正好和虚函数的声明顺序一样。
由上面分析,test所指向的对象的内存布局如下图:
《coredump问题原理探究》Linux x86版6.4节虚函数