首页 > 代码库 > ddr不稳定导致1-bit error引起kernel panic(Unable to handle kernel paging request )

ddr不稳定导致1-bit error引起kernel panic(Unable to handle kernel paging request )

Panic log:

[  939.136378] c0 11060 (kworker/u8:5) binder: release 29969:29969 transaction 41327 out, still active[  951.095433] c1 1026 (Binder_6) binder: 644:1026 transaction failed 29189, size 18336-0[  951.103360] c1 1026 (Binder_6) binder: send failed reply for transaction 41327, target dead[  952.103111] c1 5588 (concur_s_opchan) Unable to handle kernel paging request at virtual address 8000000000[  952.125168] c1 5588 (concur_s_opchan) pgd = ffffffc0039be000[  952.132150] [8000000000] *pgd=000000001e588003, *pmd=000000001937f003, *pte=0000000000000000[  952.175000] c1 5588 (concur_s_opchan) Internal error: Oops: 96000004 [#1] PREEMPT SMP[  952.182791] Modules linked in: hwmap runcase_sysfs(O) audiostub cidatattydev gs_modem ccinetdev cci_datastub citty iml_module seh cploaddev msocketk tzdd galcore(O)[  952.197659] c1 5588 (concur_s_opchan) CPU: 1 PID: 5588 Comm: concur_s_opchan Tainted: G           O 3.10.33 #1[  952.207604] c1 5588 (concur_s_opchan) task: ffffffc02e106180 ti: ffffffc009124000 task.ti: ffffffc009124000[  952.217311] c1 5588 (concur_s_opchan) PC is at __rb_insert_augmented+0xe8/0x1e0[  952.224595] c1 5588 (concur_s_opchan) LR is at anon_vma_interval_tree_insert+0x94/0x9c[  952.232475] c1 5588 (concur_s_opchan) pc : [<ffffffc000307d50>] lr : [<ffffffc000164488>] pstate: 60000145[  952.242068] c1 5588 (concur_s_opchan) sp : ffffffc009127c50[  952.247612] R29: ffffffc009127c50 R28: ffffffc0008fee20[  952.252927] R27: ffffffc021d92a10 R26: ffffffc000acbf10[  952.258271] R25: ffffffc000c1d5c0 R24: ffffffc00c90c5f8[  952.263602] R23: ffffffc021d92a10 R22: ffffffc0299e0c80[  952.268935] R21: ffffffc021d92a38 R20: ffffffc02d7bca50[  952.274251] R19: ffffffc021e15a60 R18: 0000000000000000[  952.279566] R17: 0000000000000000 R16: ffffffc00009eae4[  952.284881] R15: 0000000000000000 R14: ffffffc0299e0d10[  952.290197] R13: 0000000000000000 R12: 0000000000000000[  952.295512] R11: 0000000000000000 R10: 0000000000000000[  952.300828] R9 : 000000000000058b R8 : ffffffc0098c35e0[  952.306143] R7 : ffffffc021e15a60 R6 : 0000000000000010[  952.311459] R5 : ffffffc021e15a61 R4 : ffffffc021e15a61[  952.316793] R3 : ffffffc0098c35e0 R2 : ffffffc000163ed0[  952.322109] R1 : 0000008000000000 R0 : ffffffc021e15b60[  952.327426] c1 5588 (concur_s_opchan)[  952.331172] c1 5588 (concur_s_opchan)[  952.331172] PC: ffffffc000307cd0:

   Analyze:

  We can see that system panic due to access of invalid address  8000000000, where does it come from?

  Disassemble function __rb_insert_augmented:

ffffffc000307c68 <__rb_insert_augmented>:ffffffc000307c68:     a9bc7bfd           stp   x29, x30, [sp,#-64]!ffffffc000307c6c:      910003fd          mov x29, spffffffc000307c70:     f90013f5 str    x21, [sp,#32]ffffffc000307c74:     a90153f3          stp   x19, x20, [sp,#16]ffffffc000307c78:     f9400003          ldr    x3, [x0]ffffffc000307c7c:      aa0103f5          mov x21, x1ffffffc000307c80:     b4000be3         cbz   x3, ffffffc000307dfc <__rb_insert_augmented+0x194>ffffffc000307c84:     f9400073          ldr    x19, [x3]ffffffc000307c88:     37000253         tbnz w19, #0, ffffffc000307cd0 <__rb_insert_augmented+0x68>ffffffc000307c8c:      f9400664          ldr    x4, [x19,#8]ffffffc000307c90:     b2400265         orr    x5, x19, #0x1ffffffc000307c94:     eb04007f          cmp x3, x4ffffffc000307c98:     aa1303e7         mov x7, x19ffffffc000307c9c:      54000540         b.eq ffffffc000307d44 <__rb_insert_augmented+0xdc>ffffffc000307ca0:     b4000204         cbz   x4, ffffffc000307ce0 <__rb_insert_augmented+0x78>ffffffc000307ca4:     f9400086          ldr    x6, [x4]ffffffc000307ca8:     370001c6          tbnz w6, #0, ffffffc000307ce0 <__rb_insert_augmented+0x78>ffffffc000307cac:      f9000085          str    x5, [x4]ffffffc000307cb0:     f9000065          str    x5, [x3]ffffffc000307cb4:     f9400263          ldr    x3, [x19]ffffffc000307cb8:     927ef463          and  x3, x3, #0xfffffffffffffffcffffffc000307cbc:      f9000263          str    x3, [x19]ffffffc000307cc0:      b40009c3          cbz   x3, ffffffc000307df8 <__rb_insert_augmented+0x190>ffffffc000307cc4:      f9400073          ldr    x19, [x3]ffffffc000307cc8:      aa0703e0         mov x0, x7ffffffc000307ccc:      3607fe13          tbz   w19, #0, ffffffc000307c8c <__rb_insert_augmented+0x24>ffffffc000307cd0:     a94153f3          ldp    x19, x20, [sp,#16]ffffffc000307cd4:     f94013f5 ldr    x21, [sp,#32]ffffffc000307cd8:     a8c47bfd           ldp    x29, x30, [sp],#64ffffffc000307cdc:      d65f03c0           retffffffc000307ce0:     f9400474          ldr    x20, [x3,#8]ffffffc000307ce4:     eb00029f          cmp x20, x0ffffffc000307ce8:     54000640         b.eq ffffffc000307db0 <__rb_insert_augmented+0x148>ffffffc000307cec:      aa0303e4         mov x4, x3ffffffc000307cf0:      f9000a74          str    x20, [x19,#16]ffffffc000307cf4:      f9000473          str    x19, [x3,#8]ffffffc000307cf8:      b4000074         cbz   x20, ffffffc000307d04 <__rb_insert_augmented+0x9c>ffffffc000307cfc:       b2400260         orr    x0, x19, #0x1ffffffc000307d00:     f9000280          str    x0, [x20]ffffffc000307d04:     f9400260          ldr    x0, [x19]ffffffc000307d08:     f9000060          str    x0, [x3]ffffffc000307d0c:     f9000264          str    x4, [x19]ffffffc000307d10:     927ef400          and  x0, x0, #0xfffffffffffffffcffffffc000307d14:     b40003e0         cbz   x0, ffffffc000307d90 <__rb_insert_augmented+0x128>ffffffc000307d18:     f9400801          ldr    x1, [x0,#16]ffffffc000307d1c:     eb01027f          cmp x19, x1ffffffc000307d20:     54000680         b.eq ffffffc000307df0 <__rb_insert_augmented+0x188>ffffffc000307d24:     f9000403          str    x3, [x0,#8]ffffffc000307d28:     aa1303e0         mov x0, x19ffffffc000307d2c:     aa0303e1         mov x1, x3ffffffc000307d30:     d63f0040          blr    x2ffffffc000307d34:     a94153f3          ldp    x19, x20, [sp,#16]ffffffc000307d38:     f94013f5 ldr    x21, [sp,#32]ffffffc000307d3c:     a8c47bfd           ldp    x29, x30, [sp],#64ffffffc000307d40:     d65f03c0           retffffffc000307d44:     f9400a61          ldr    x1, [x19,#16]ffffffc000307d48:     b2400264         orr    x4, x19, #0x1ffffffc000307d4c:     b4000061         cbz   x1, ffffffc000307d58 <__rb_insert_augmented+0xf0>ffffffc000307d50:     f9400025          ldr    x5, [x1]ffffffc000307d54:     36000225         tbz   w5, #0, ffffffc000307d98 <__rb_insert_augmented+0x130>ffffffc000307d58:     f9400874          ldr    x20, [x3,#16]

  yellow line caused panic, ldr load memory content from address X1 to X5, x1 equals 8000000000, it is not a valid kernel space address thus panic occurs,

  we can see x1 is got from  [x19,#16], combine with source code, X19 is the address of struct rb_node(we can get x19 from panic log), use crash to check it.

 

crash> struct rb_node ffffffc021e15a60 //(it is X19)struct rb_node {  __rb_parent_color = 18446743799310610977,  rb_right = 0xffffffc0098c35e0,  rb_left = 0x8000000000} 

  So X1 represents address of rb_left, it should be a normal kernel space address or 0, I think 0x8000000000 is more like a one-bit error address, it should be 0.

ddr不稳定导致1-bit error引起kernel panic(Unable to handle kernel paging request )