首页 > 代码库 > 《coredump问题原理探究》Linux x86版6.4节虚函数

《coredump问题原理探究》Linux x86版6.4节虚函数

在上一节已经探究了类的成员变量的排列,现在看一下虚函数表和成员变量的排列及虚函数之间的排列.

先看一个例子:

  1	 #include <stdio.h>
  2	 class xuzhina_dump_c06_s3
  3	 {
  4	     private:
  5	         int m_a;
  6	     public:
  7	         xuzhina_dump_c06_s3() { m_a = 0; }
  8	         virtual void inc() { m_a++; }
  9	         virtual void dec() { m_a--; }
 10	         virtual void print()
 11	         {
 12	             printf( "%d\n", m_a );
 13	         }
 14	 };
 15 
 16	 int main()
 17	 {
 18	     xuzhina_dump_c06_s3* test = new xuzhina_dump_c06_s3;
 19	     if ( test != NULL )
 20	     {
 21	         test->inc();
 22	         test->inc();
 23	         test->print();
 24	     }
 25	     return 0;
 26	 }


汇编代码:

(gdb) disassemble main
Dump of assembler code for function main:
   0x08048560 <+0>:     push   %ebp
   0x08048561 <+1>:     mov    %esp,%ebp
   0x08048563 <+3>:     push   %ebx
   0x08048564 <+4>:     and    $0xfffffff0,%esp
   0x08048567 <+7>:     sub    $0x20,%esp
   0x0804856a <+10>:    movl   $0x8,(%esp)
   0x08048571 <+17>:    call   0x8048450 <_Znwj@plt>
   0x08048576 <+22>:    mov    %eax,%ebx
   0x08048578 <+24>:    mov    %ebx,(%esp)
   0x0804857b <+27>:    call   0x80485cc <_ZN19xuzhina_dump_c06_s3C2Ev>
   0x08048580 <+32>:    mov    %ebx,0x1c(%esp)
   0x08048584 <+36>:    cmpl   $0x0,0x1c(%esp)
   0x08048589 <+41>:    je     0x80485c1 <main+97>
   0x0804858b <+43>:    mov    0x1c(%esp),%eax
   0x0804858f <+47>:    mov    (%eax),%eax
   0x08048591 <+49>:    mov    (%eax),%eax
   0x08048593 <+51>:    mov    0x1c(%esp),%edx
   0x08048597 <+55>:    mov    %edx,(%esp)
   0x0804859a <+58>:    call   *%eax
   0x0804859c <+60>:    mov    0x1c(%esp),%eax
   0x080485a0 <+64>:    mov    (%eax),%eax
   0x080485a2 <+66>:    mov    (%eax),%eax
   0x080485a4 <+68>:    mov    0x1c(%esp),%edx
   0x080485a8 <+72>:    mov    %edx,(%esp)
   0x080485ab <+75>:    call   *%eax
   0x080485ad <+77>:    mov    0x1c(%esp),%eax
   0x080485b1 <+81>:    mov    (%eax),%eax
   0x080485b3 <+83>:    add    $0x8,%eax
   0x080485b6 <+86>:    mov    (%eax),%eax
   0x080485b8 <+88>:    mov    0x1c(%esp),%edx
   0x080485bc <+92>:    mov    %edx,(%esp)
   0x080485bf <+95>:    call   *%eax
   0x080485c1 <+97>:    mov    $0x0,%eax
   0x080485c6 <+102>:   mov    -0x4(%ebp),%ebx
   0x080485c9 <+105>:   leave  
   0x080485ca <+106>:   ret    
End of assembler dump.


由上面代码可知,执行完了构造函数,test的m_a的值,会变成0。且由上面汇编可以看到this指针在调用构造函数前后,是放在ebx寄存器。

在0x08048578,0x08048580都打断点,看一下this指针所指向的地址是不是这样的结果。

(gdb) tbreak *0x08048578
Temporary breakpoint 1 at 0x8048578
(gdb) tbreak *0x08048580
Temporary breakpoint 2 at 0x8048580
(gdb) r
Starting program: /home/buckxu/work/6/3/xuzhina_dump_c6_s3 

Temporary breakpoint 1, 0x08048578 in main ()
(gdb) x /4x $ebx
0x804a008:      0x00000000      0x00000000      0x00000000      0x00020ff1
(gdb) c
Continuing.

Temporary breakpoint 2, 0x08048580 in main ()
(gdb) x /4x $ebx
0x804a008:      0x080486d0      0x00000000      0x00000000      0x00020ff1


非常奇怪,按照上一节的内容,地址0x804a008应该存放m_a,会初始化为0.究竟类xuzhina_dump_c06_s3的构造函数做了什么事情?而0x080486d0是什么东西来的?

看一下类xuzhina_dump_c06_s3的构造函数:

(gdb) disassemble _ZN19xuzhina_dump_c06_s3C2Ev
Dump of assembler code for function _ZN19xuzhina_dump_c06_s3C2Ev:
   0x080485cc <+0>:     push   %ebp
   0x080485cd <+1>:     mov    %esp,%ebp
   0x080485cf <+3>:     mov    0x8(%ebp),%eax
   0x080485d2 <+6>:     movl   $0x80486d0,(%eax)
   0x080485d8 <+12>:    mov    0x8(%ebp),%eax
   0x080485db <+15>:    movl   $0x0,0x4(%eax)
   0x080485e2 <+22>:    pop    %ebp
   0x080485e3 <+23>:    ret    
End of assembler dump.


由构造函数的汇编可知,0x80486d0这个值是在构造函数设置,但还不清楚是什么东西。而

   0x080485d8 <+12>:    mov    0x8(%ebp),%eax
   0x080485db <+15>:    movl   $0x0,0x4(%eax)

却刚好对应了

  7	         xuzhina_dump_c06_s3() { m_a = 0; }

也就是说,类xuzhina_dump_c06_s3的第一个成员变量m_a放在偏移this指针的地方,那么0x80486d0是什么东西,占了m_a的位置呢?

重新看一下main函数的汇编:

(gdb) disassemble main
Dump of assembler code for function main:
   0x08048560 <+0>:     push   %ebp
   0x08048561 <+1>:     mov    %esp,%ebp
   0x08048563 <+3>:     push   %ebx
   0x08048564 <+4>:     and    $0xfffffff0,%esp
   0x08048567 <+7>:     sub    $0x20,%esp
   0x0804856a <+10>:    movl   $0x8,(%esp)
   0x08048571 <+17>:    call   0x8048450 <_Znwj@plt>
   0x08048576 <+22>:    mov    %eax,%ebx
   0x08048578 <+24>:    mov    %ebx,(%esp)
   0x0804857b <+27>:    call   0x80485cc <_ZN19xuzhina_dump_c06_s3C2Ev>
=> 0x08048580 <+32>:    mov    %ebx,0x1c(%esp)
   0x08048584 <+36>:    cmpl   $0x0,0x1c(%esp)
   0x08048589 <+41>:    je     0x80485c1 <main+97>
   0x0804858b <+43>:    mov    0x1c(%esp),%eax
   0x0804858f <+47>:    mov    (%eax),%eax
   0x08048591 <+49>:    mov    (%eax),%eax
   0x08048593 <+51>:    mov    0x1c(%esp),%edx
   0x08048597 <+55>:    mov    %edx,(%esp)
   0x0804859a <+58>:    call   *%eax
   0x0804859c <+60>:    mov    0x1c(%esp),%eax
   0x080485a0 <+64>:    mov    (%eax),%eax
   0x080485a2 <+66>:    mov    (%eax),%eax
   0x080485a4 <+68>:    mov    0x1c(%esp),%edx
   0x080485a8 <+72>:    mov    %edx,(%esp)
   0x080485ab <+75>:    call   *%eax
   0x080485ad <+77>:    mov    0x1c(%esp),%eax
   0x080485b1 <+81>:    mov    (%eax),%eax
   0x080485b3 <+83>:    add    $0x8,%eax
   0x080485b6 <+86>:    mov    (%eax),%eax
   0x080485b8 <+88>:    mov    0x1c(%esp),%edx
   0x080485bc <+92>:    mov    %edx,(%esp)
   0x080485bf <+95>:    call   *%eax
   0x080485c1 <+97>:    mov    $0x0,%eax
   0x080485c6 <+102>:   mov    -0x4(%ebp),%ebx
   0x080485c9 <+105>:   leave  
   0x080485ca <+106>:   ret    
End of assembler dump.

   0x0804857b <+27>:    call   0x80485cc <_ZN19xuzhina_dump_c06_s3C2Ev>
 0x08048580 <+32>:    mov    %ebx,0x1c(%esp)

可知,esp+0x1c用来存放this指针。

再看一下这几段指令:

   0x0804858b <+43>:    mov    0x1c(%esp),%eax
   0x0804858f <+47>:    mov    (%eax),%eax
   0x08048591 <+49>:    mov    (%eax),%eax
   0x08048593 <+51>:    mov    0x1c(%esp),%edx
   0x08048597 <+55>:    mov    %edx,(%esp)
   0x0804859a <+58>:    call   *%eax

   0x0804859c <+60>:    mov    0x1c(%esp),%eax
   0x080485a0 <+64>:    mov    (%eax),%eax
   0x080485a2 <+66>:    mov    (%eax),%eax
   0x080485a4 <+68>:    mov    0x1c(%esp),%edx
   0x080485a8 <+72>:    mov    %edx,(%esp)
   0x080485ab <+75>:    call   *%eax

   0x080485ad <+77>:    mov    0x1c(%esp),%eax
   0x080485b1 <+81>:    mov    (%eax),%eax
   0x080485b3 <+83>:    add    $0x8,%eax
   0x080485b6 <+86>:    mov    (%eax),%eax
   0x080485b8 <+88>:    mov    0x1c(%esp),%edx
   0x080485bc <+92>:    mov    %edx,(%esp)
   0x080485bf <+95>:    call   *%eax

由于是顺序结构,可知,这三段指令刚好对应

21	         test->inc();
 22	         test->inc();
 23	         test->print();

分析一下第三段汇编:

   0x080485ad <+77>:    mov    0x1c(%esp),%eax
   0x080485b1 <+81>:    mov    (%eax),%eax
   0x080485b3 <+83>:    add    $0x8,%eax
   0x080485b6 <+86>:    mov    (%eax),%eax
   0x080485b8 <+88>:    mov    0x1c(%esp),%edx
   0x080485bc <+92>:    mov    %edx,(%esp)
   0x080485bf <+95>:    call   *%eax

可见eax正好是放着print这个虚函数的指针。而这个指针最终是由esp+0x1c来取得。由

   0x080485ad <+77>:    mov    0x1c(%esp),%eax
   0x080485b1 <+81>:    mov    (%eax),%eax

可知,正好是从this指针的第一个成员取出来的,也就是说,这个成员是虚函数表指针。根据上面的分析,可知这个虚函数表指针的值是0x80486d0。来验证一下,它是不是虚函数表指针。

(gdb) x /4x 0x80486d0
0x80486d0 <_ZTV19xuzhina_dump_c06_s3+8>:        0x080485e4      0x080485f8 0x0804860c      0x75783931
(gdb) shell c++filt _ZTV19xuzhina_dump_c06_s3
vtable for xuzhina_dump_c06_s3
(gdb) info symbol 0x080485e4
xuzhina_dump_c06_s3::inc() in section .text of /home/buckxu/work/6/3/xuzhina_dump_c6_s3
(gdb) info symbol 0x080485f8
xuzhina_dump_c06_s3::dec() in section .text of /home/buckxu/work/6/3/xuzhina_dump_c6_s3
(gdb) info symbol 0x0804860c
xuzhina_dump_c06_s3::print() in section .text of /home/buckxu/work/6/3/xuzhina_dump_c6_s3

可见,0x80486d0所指向正是虚函数表,且里面的表项顺序正好和虚函数的声明顺序一样。

 

由上面分析,test所指向的对象的内存布局如下图:

技术分享

《coredump问题原理探究》Linux x86版6.4节虚函数