一个bug引发的linux smp 血案(二)

时间:2022-09-20 07:30:30

http://blog.chinaunix.net/xmlrpc.php?r=blog/article&uid=25171069&id=3212735

前一篇文章分析了,cpu1在on and off切换过程中出现softlockup,将触发watchdog reset, 解决方案是禁止抢占,今天分析linux smp相关的另一个bug,log如下:

环境如下:高通芯片平台(双核), linux 3.0 version
<3>[ 34.570251,0] mdm6600_ctrl: modem already powered down.
<0>[ 34.587890,0] Restarting system with command ''.
<2>[ 34.609497,1] CPU1: stopping
...
...
<3>[ 81.664855,0] BUG: soft lockup - CPU#0 stuck for 42s! [qe:153]
<4>[ 81.671112,0] Modules linked in: vpnclient btwilink wl12xx mac80211 cfg80211 compat evfwd
<4>[ 81.671112,0] 
<4>[ 81.682312,0] Pid: 153, comm: qe
<3>[ 81.687469,0] GIC mask = 90, Priority of IRQ(4~7) = a080a0a0
<4>[ 81.687469,0] CPU: 0 Tainted: G W (3.0.8-eng-g4a0bba0 #1)
<4>[ 81.687469,0] PC is at generic_exec_single+0x78/0x9c
<4>[ 81.705963,0] LR is at arch_send_call_function_single_ipi+0x3c/0x40
<4>[ 81.705963,0] pc : [<c00e3a1c>] lr : [<c005f6e4>] psr: 20000013
<4>[ 81.712738,0] sp : de889d10 ip : de889d00 fp : de889d3c
<4>[ 81.712738,0] r10: 00000001 r9 : c1403d48 r8 : 013b1000
<4>[ 81.725463,0] r7 : c1403d40 r6 : 00000001 r5 : c1403d40 r4 : de889d54
<4>[ 81.725463,0] r3 : 00000001 r2 : fa241000 r1 : 00000005 r0 : c06eed1c
...
...
<4>[ 82.652008,0] [<c005b9c4>] (show_regs+0x0/0x50) from [<c00f5d40>] (watchdog_timer_fn+0x174/0x1c8)
<4>[ 82.652008,0] r5:de888000 r4:c0051a64
<4>[ 82.675384,0] [<c00f5bcc>] (watchdog_timer_fn+0x0/0x1c8) from [<c00d006c>] (__run_hrtimer+0x7c/0x270)
<4>[ 82.675384,0] [<c00cfff0>] (__run_hrtimer+0x0/0x270) from [<c00d0514>] (hrtimer_interrupt+0x108/0x32c)
<4>[ 82.675384,0] [<c00d040c>] (hrtimer_interrupt+0x0/0x32c) from [<c00535c0>] (do_local_timer+0xac/0xd0)
<4>[ 82.675384,0] [<c0053514>] (do_local_timer+0x0/0xd0) from [<c005a008>] (__irq_svc+0x48/0xe4)
<4>[ 82.675384,0] Exception stack(0xde889cc8 to 0xde889d10)
<4>[ 82.719818,0] 9cc0: c06eed1c 00000005 fa241000 00000001 de889d54 c1403d40
<4>[ 82.719818,0] 9ce0: 00000001 c1403d40 013b1000 c1403d48 00000001 de889d3c de889d00 de889d10
<4>[ 82.719818,0] 9d00: c005f6e4 c00e3a1c 20000013 ffffffff
<4>[ 82.719818,0] r9:de888000 r8:00000003 r7:00000004 r6:0000001d r5:fa240100
<4>[ 82.750823,0] r4:ffffffff
<4>[ 82.750823,0] [<c00e39a4>] (generic_exec_single+0x0/0x9c) from [<c00e3ea8>] (smp_call_function_single+0x27c/0x2a4)
<4>[ 82.754058,0] [<c00e3c2c>] (smp_call_function_single+0x0/0x2a4) from [<c00e4160>] (smp_call_function_many+0x290/0x2e8)
<4>[ 82.754058,0] [<c00e3ed0>] (smp_call_function_many+0x0/0x2e8) from [<c00e41fc>] (smp_call_function+0x44/0x70)
<4>[ 82.776611,0] [<c00e41b8>] (smp_call_function+0x0/0x70) from [<c00e4258>] (on_each_cpu+0x30/0xe8)
<4>[ 82.776611,0] r5:c018c44c r4:de888000
<4>[ 82.776611,0] [<c00e4228>] (on_each_cpu+0x0/0xe8) from [<c018c0e0>] (invalidate_bh_lrus+0x20/0x24)
<4>[ 82.810516,0] [<c018c0c0>] (invalidate_bh_lrus+0x0/0x24) from [<c0192ce0>] (kill_bdev+0x28/0x40)
<4>[ 82.810516,0] [<c0192cb8>] (kill_bdev+0x0/0x40) from [<c0193b28>] (__blkdev_put+0x68/0x180)
<4>[ 82.810516,0] r5:00000000 r4:df402a80
这个bug出现在当tester执行adb reboot(android system)重启设备时,出现死锁,panic现场如下:
<3>[ 81.664855,0] BUG: soft lockup - CPU#0 stuck for 42s! [qe:153]
<4>[ 81.671112,0] Modules linked in: vpnclient btwilink wl12xx mac80211 cfg80211 compat evfwd
<4>[ 81.671112,0] 
<4>[ 81.682312,0] Pid: 153, comm: qe
<3>[ 81.687469,0] GIC mask = 90, Priority of IRQ(4~7) = a080a0a0
<4>[ 81.687469,0] CPU: 0 Tainted: G W (3.0.8-eng-g4a0bba0 #1)
<4>[ 81.687469,0] PC is at generic_exec_single+0x78/0x9c
<4>[ 81.705963,0] LR is at arch_send_call_function_single_ipi+0x3c/0x40
<4>[ 81.705963,0] pc : [<c00e3a1c>] lr : [<c005f6e4>] psr: 20000013
<4>[ 81.712738,0] sp : de889d10 ip : de889d00 fp : de889d3c
<4>[ 81.712738,0] r10: 00000001 r9 : c1403d48 r8 : 013b1000
<4>[ 81.725463,0] r7 : c1403d40 r6 : 00000001 r5 : c1403d40 r4 : de889d54
<4>[ 81.725463,0] r3 : 00000001 r2 : fa241000 r1 : 00000005 r0 : c06eed1c
=============================================================================================
panic 现场PC 指向  generic_exec_single,首先分析关机流程:
kernel_restart -->
machine_restart -->
machine_shutdown-->
smp_send_stop-->
看smp_send_stop的实现:
void smp_send_stop(void)
{
cpumask_t mask = cpu_online_map;
cpu_clear(smp_processor_id(), mask);
send_ipi_message(&mask, IPI_CPU_STOP);
}
这里cpu0 会send  IPI_CPU_STOP中断给cpu1。

cpu1 接收到IPI_CPU_STOP中断之后会有什么样子的运行流程呢?

arch/arm/kernel/smp.c
do_IPI-->
handle_IPI

点击(此处)折叠或打开

  1. void handle_IPI(int ipinr, struct pt_regs *regs)
  2. {
  3.         unsigned int cpu = smp_processor_id();
  4.         struct pt_regs *old_regs = set_irq_regs(regs);

  5.         if (ipinr >= IPI_CPU_START && ipinr < IPI_CPU_START + NR_IPI)
  6.                 __inc_irq_stat(cpu, ipi_irqs[ipinr - IPI_CPU_START]);

  7.         switch (ipinr) {
  8.         case IPI_CPU_START:
  9.                 /* Wake up from WFI/WFE using SGI */
  10.                 break;
  11.         case IPI_TIMER:
  12.                 ipi_timer();
  13.                 break;

  14.         case IPI_RESCHEDULE:
  15.                 scheduler_ipi();
  16.                 break;

  17.         case IPI_CALL_FUNC:
  18.                 generic_smp_call_function_interrupt();
  19.                 break;

  20.         case IPI_CALL_FUNC_SINGLE:
  21.                 generic_smp_call_function_single_interrupt();
  22.                 break;

  23.         case IPI_CPU_STOP:  <===走这个分支
  24.                 ipi_cpu_stop(cpu);
  25.                 break;

  26.         case IPI_CPU_BACKTRACE:
  27.                 ipi_cpu_backtrace(cpu, regs);
  28.                 break;

  29.         default:
  30.                 printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
  31.                        cpu, ipinr);
  32.                 break;
  33.         }
  34.         set_irq_regs(old_regs);
  35. }
调用ipi_cpu_stop

点击(此处)折叠或打开

  1. static void ipi_cpu_stop(unsigned int cpu)
  2. {
  3.         if (system_state == SYSTEM_BOOTING ||
  4.             system_state == SYSTEM_RUNNING) {
  5.                 raw_spin_lock(&stop_lock);
  6.                 printk(KERN_CRIT "CPU%u: stopping\n", cpu);
  7.                 dump_stack();
  8.                 raw_spin_unlock(&stop_lock);
  9.         }

  10.         set_cpu_online(cpu, false);

  11.         local_fiq_disable();
  12.         local_irq_disable();

  13.         while (1)
  14.                 cpu_relax();
  15. }
这个函数做三件事情:
1.  设置cpu online状态为 off
2.  禁fiq
3.  禁irq
然后就让cpu 进入一个死循环。

这里看似都没有什么问题,但是假设在cpu0 send IPI_CPU_STOP之后, cpu1开始停止,同时cpu0 又发出一个ipi function call给cpu1(在cpu1更新online状态之前), 但是这时cpu1已经关中断了,不再响应ipi中断了,那么会造成cpu0死等cpu1去完成ipi,就形成了死锁,等的过程如下:
smp_call_function -->
smp_call_function_many-->
smp_call_function_single-->
generic_exec_single



点击(此处)折叠或打开

  1. void generic_exec_single(int cpu, struct call_single_data *data, int wait)
  2. {
  3.     struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
  4.     unsigned long flags;
  5.     int ipi;

  6.     raw_spin_lock_irqsave(&dst->lock, flags);
  7.     ipi = list_empty(&dst->list);
  8.     list_add_tail(&data->list, &dst->list);
  9.     raw_spin_unlock_irqrestore(&dst->lock, flags);

  10.     /*
  11.      * The list addition should be visible before sending the IPI
  12.      * handler locks the list to pull the entry off it because of
  13.      * normal cache coherency rules implied by spinlocks.
  14.      *
  15.      * If IPIs can go out of order to the cache coherency protocol
  16.      * in an architecture, sufficient synchronisation should be added
  17.      * to arch code to make it appear to obey cache coherency WRT
  18.      * locking and barrier primitives. Generic code isn't really
  19.      * equipped to do the right thing...
  20.      */
  21.     if (ipi)
  22.         arch_send_call_function_single_ipi(cpu);

  23.     if (wait)  <===这里就是等的动作,实际上也可以不等,这个wait标志会在struct call_single_data *data这个data结构里面设置,如果设置了,那么就会等待ipi返回,如果没设,就不用等。
  24.         csd_lock_wait(data);
  25. }

看看csd_lock_wait实现如下:
static void csd_lock_wait(struct call_single_data *data)
{
while (data->flags & CSD_FLAG_LOCK)
cpu_relax();
}
就是上面说的标志位有没有设在csd结构体中,这里有设置,所以会死等。

解决方案是,在machine_restart中禁掉中断、不让其发第二次ipi中断
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -291,6 +291,7 @@ void machine_power_off(void)
 
 void machine_restart(char *cmd)
 {
+       local_irq_disable();
        machine_shutdown();
        arm_pm_restart(reboot_mode, cmd);
 }