1.开启 Kdump
1.2安装必要的软件包
确保 crash 和 kernel-debuginfo 已经安装。
1.根据当前系统内核版本下载并安装kernel-debuginfo包:
wget https://dl.rockylinux.org/vault/rocky/8.4/BaseOS/x86_64/debug/tree/Packages/kernel-debuginfo-$(uname -r).rpm
wget https://dl.rockylinux.org/vault/rocky/8.4/BaseOS/x86_64/debug/tree/Packages/kernel-debuginfo-common-x86_64-$(uname -r).rpm
2.使用 dnf 安装 crash 软件包
dnf install crash
3.安装 kernel-debuginfo 及其相关包
rpm -ivh kernel-debuginfo-common-x86_64-$(uname -r).rpm
rpm -ivh kernel-debuginfo-$(uname -r).rpm
2.分配 Kdump 保留内存
2.1在 /etc/default/grub 文件中设置 crashkernel 参数:
#默认已经有这个crashkernel=auto,如果想调整也可以
GRUB_CMDLINE_LINUX="crashkernel=auto resume=/dev/mapper/rl-swap rd.lvm.lv=rl/root rd.lvm.lv=rl/swap"
补充:这里的 auto 代表系统会根据内存大小自动设置一个值,也可以指定一个值,例如:crashkernel=128M,high、crashkernel=256M,high 等等。如果设置成一个固定值,建议
1) 1 GB 到 4 GB 内存设置成 160 M
2) 4 GB 到 64 GB 内存设置成 192 M
3) 64 GB 到 1 TB 内存设置成 256 M
4) 大于 1 TB 内存设置成 512 M
2.2更新 GRUB 配置并重启使更改生效:
grub2-mkconfig -o /boot/grub2/grub.cfg
reboot
2.3检查分配给 Kdump 的内存:
[root@localhost ~]# makedumpfile --mem-usage /proc/kcore
TYPE PAGES EXCLUDABLE DESCRIPTION
----------------------------------------------------------------------
ZERO 19232 yes Pages filled with zero
NON_PRI_CACHE 338834 yes Cache pages without private flag
PRI_CACHE 1 yes Cache pages with private flag
USER 14030 yes User process pages
FREE 21913 yes Free pages
KERN_DATA 65679 no Dumpable kernel data
page size: 4096
Total pages on system: 459689
Total size on system: 1882886144 Byte
3.触发 Kdump
模拟内核崩溃以测试 Kdump 是否正确配置:
echo 1 > /proc/sys/kernel/sysrq
echo c > /proc/sysrq-trigger
#执行后会重启机器
4.分析 Kdump 生成的内核崩溃信息
4.1进入存放内核崩溃转储的目录:
[root@localhost ~]# ls /var/crash/
127.0.0.1-2024-10-28-12:20:58
4.2确认内核映像文件vmlinux生成(没有生成可能是以上提到的RPM没装好)
[root@localhost ~]# find / -name vmlinux
/sys/kernel/btf/vmlinux
/usr/lib/debug/usr/lib/modules/4.18.0-305.3.1.el8_4.x86_64/vmlinux #这个才是
4.3分析Kdump生成的内核崩溃信息
[root@localhost ~]# crash /usr/lib/debug/usr/lib/modules/4.18.0-305.3.1.el8_4.x86_64/vmlinux /var/crash/127.0.0.1-2024-10-28-12\:20\:58/vmcore
crash 8.0.4-2.el8
Copyright (C) 2002-2022 Red Hat, Inc.
Copyright (C) 2004, 2005, 2006, 2010 IBM Corporation
Copyright (C) 1999-2006 Hewlett-Packard Co
Copyright (C) 2005, 2006, 2011, 2012 Fujitsu Limited
Copyright (C) 2006, 2007 VA Linux Systems Japan K.K.
Copyright (C) 2005, 2011, 2020-2022 NEC Corporation
Copyright (C) 1999, 2002, 2007 Silicon Graphics, Inc.
Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc.
Copyright (C) 2015, 2021 VMware, Inc.
This program is free software, covered by the GNU General Public License,
and you are welcome to change it and/or distribute copies of it under
certain conditions. Enter "help copying" to see the conditions.
This program has absolutely no warranty. Enter "help warranty" for details.
GNU gdb (GDB) 10.2
Copyright (C) 2021 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-pc-linux-gnu".
Type "show configuration" for configuration details.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
KERNEL: /usr/lib/debug/usr/lib/modules/4.18.0-305.3.1.el8_4.x86_64/vmlinux [TAINTED]
DUMPFILE: /var/crash/127.0.0.1-2024-10-28-12:20:58/vmcore [PARTIAL DUMP]
CPUS: 2
DATE: Mon Oct 28 04:20:56 EDT 2024
UPTIME: 00:05:01
LOAD AVERAGE: 0.13, 0.15, 0.06
TASKS: 179
NODENAME: localhost.localdomain
RELEASE: 4.18.0-305.3.1.el8_4.x86_64
VERSION: #1 SMP Thu Jun 17 07:52:48 UTC 2021
MACHINE: x86_64 (3110 Mhz)
MEMORY: 1.5 GB
PANIC: "sysrq: SysRq : Trigger a crash"
PID: 1415
COMMAND: "bash"
TASK: ffff9ee34eec4800 [THREAD_INFO: ffff9ee34eec4800]
CPU: 0
STATE: TASK_RUNNING (SYSRQ)
5.Crash基本命令
log命令
crash> log
[ 0.000000] Linux version 4.18.0-305.3.1.el8_4.x86_64 (mockbuild@ord1-prod-x86build003.svc.aws.rockylinux.org) (gcc version 8.4.1 20200928 (Red Hat 8.4.1-1) (GCC)) #1 SMP Thu Jun 17 07:52:48 UTC 2021
[ 0.000000] Command line: BOOT_IMAGE=(hd0,msdos1)/vmlinuz-4.18.0-305.3.1.el8_4.x86_64 root=/dev/mapper/rl-root ro crashkernel=auto resume=/dev/mapper/rl-swap rd.lvm.lv=rl/root rd.lvm.lv=rl/swap
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[ 0.000000] x86/fpu: Supporting XSAVE feature 0x200: 'Protection Keys User registers'
[ 0.000000] x86/fpu: xstate_offset[2]: 576, xstate_sizes[2]: 256
[ 0.000000] x86/fpu: xstate_offset[9]: 832, xstate_sizes[9]: 8
[ 0.000000] x86/fpu: Enabled xstate features 0x207, context size is 840 bytes, using 'compacted' format.
[ 0.000000] BIOS-provided physical RAM map:
[ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009ebff] usable
[ 0.000000] BIOS-e820: [mem 0x000000000009ec00-0x000000000009ffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000000dc000-0x00000000000fffff] reserved
[ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x0000000060edffff] usable
[ 0.000000] BIOS-e820: [mem 0x0000000060ee0000-0x0000000060efefff] ACPI data
[ 0.000000] BIOS-e820: [mem 0x0000000060eff000-0x0000000060efffff] ACPI NVS
[ 0.000000] BIOS-e820: [mem 0x0000000060f00000-0x0000000060ffffff] usable
[ 0.000000] BIOS-e820: [mem 0x00000000f0000000-0x00000000f7ffffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000fec00000-0x00000000fec0ffff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000fee00000-0x00000000fee00fff] reserved
[ 0.000000] BIOS-e820: [mem 0x00000000fffe0000-0x00000000ffffffff] reserved
bt命令
crash> bt
PID: 1415 TASK: ffff9ee34eec4800 CPU: 0 COMMAND: "bash"
#0 [ffffb6f640d0fb98] machine_kexec at ffffffff9906156e
#1 [ffffb6f640d0fbf0] __crash_kexec at ffffffff9918f99d
#2 [ffffb6f640d0fcb8] crash_kexec at ffffffff9919088d
#3 [ffffb6f640d0fcd0] oops_end at ffffffff9902434d
#4 [ffffb6f640d0fcf0] no_context at ffffffff9907262f
#5 [ffffb6f640d0fd48] __bad_area_nosemaphore at ffffffff9907298c
#6 [ffffb6f640d0fd90] do_page_fault at ffffffff99073267
#7 [ffffb6f640d0fdc0] page_fault at ffffffff99a010fe
[exception RIP: sysrq_handle_crash+18]
RIP: ffffffff99590ac2 RSP: ffffb6f640d0fe78 RFLAGS: 00010246
RAX: ffffffff99590ab0 RBX: 0000000000000063 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffff9ee3526167c8 RDI: 0000000000000063
RBP: 0000000000000007 R8: 00000000000005dc R9: 0000000000aaaaaa
R10: 0000000000000000 R11: ffffb6f64133fbc0 R12: 0000000000000000
R13: 0000000000000000 R14: ffffffff99eab8e0 R15: 0000000000000000
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#8 [ffffb6f640d0fe78] __handle_sysrq.cold.11 at ffffffff99591658
#9 [ffffb6f640d0fea8] write_sysrq_trigger at ffffffff9959151b
ps命令
crash> ps
PID PPID CPU TASK ST %MEM VSZ RSS COMM
0 0 0 ffffffff9a618840 RU 0.0 0 0 [swapper/0]
> 0 0 1 ffff9ee3600f1800 RU 0.0 0 0 [swapper/1]
1 0 1 ffff9ee360090000 IN 0.7 183592 11044 systemd
2 0 1 ffff9ee360091800 IN 0.0 0 0 [kthreadd]
3 2 0 ffff9ee360093000 ID 0.0 0 0 [rcu_gp]
4 2 0 ffff9ee360096000 ID 0.0 0 0 [rcu_par_gp]
5 2 0 ffff9ee360094800 ID 0.0 0 0 [kworker/0:0]
6 2 0 ffff9ee3600ec800 ID 0.0 0 0 [kworker/0:0H]
7 2 0 ffff9ee3600e8000 ID 0.0 0 0 [kworker/0:1]
8 2 1 ffff9ee3600e9800 ID 0.0 0 0 [kworker/u256:0]
9 2 0 ffff9ee3600eb000 ID 0.0 0 0 [mm_percpu_wq]
10 2 0 ffff9ee3600ee000 IN 0.0 0 0 [ksoftirqd/0]
11 2 0 ffff9ee3600f6000 ID 0.0 0 0 [rcu_sched]
12 2 0 ffff9ee3600f4800 IN 0.0 0 0 [migration/0]
13 2 0 ffff9ee3600f0000 IN 0.0 0 0 [watchdog/0]
14 2 0 ffff9ee35207e000 IN 0.0 0 0 [cpuhp/0]
15 2 1 ffff9ee35207c800 IN 0.0 0 0 [cpuhp/1]
16 2 1 ffff9ee352078000 IN 0.0 0 0 [watchdog/1]
17 2 1 ffff9ee352079800 IN 0.0 0 0 [migration/1]
18 2 1 ffff9ee35207b000 IN 0.0 0 0 [ksoftirqd/1]
19 2 1 ffff9ee352089800 ID 0.0 0 0 [kworker/1:0]
20 2 1 ffff9ee35208b000 ID 0.0 0 0 [kworker/1:0H]
22 2 0 ffff9ee35208c800 IN 0.0 0 0 [kdevtmpfs]
23 2 1 ffff9ee352088000 ID 0.0 0 0 [netns]
24 2 1 ffff9ee3523b9800 IN 0.0 0 0 [kauditd]
25 2 0 ffff9ee3523bb000 ID 0.0 0 0 [kworker/0:2]