多线程之问题的解决及使用GDB调试多线程多进程的手段(九)

时间:2020-12-09 16:41:42

1. 待解决问题

[root@localhost ~]# ./example
thread started...
parent about to fork...
preparing locks...
parent unlocking locks...
parent returned from fork
child unlocking locks...
child returned from fork

[root@localhost ~]# ./example >temp.txt
[root@localhost ~]# cat temp.txt
thread started...
parent about to fork...
preparing locks...
parent unlocking locks...
parent returned from fork
thread started...
parent about to fork...
preparing locks...
child unlocking locks...
child returned from fork

2. 分析问题

  可以看到输出到文件的话,有些信息打印了两次。这是STANDARD IO中lined buffer和full buffer的锅,当然帮凶是fork对父进程资源继承的机制。其实问题到这里就解决了,但是这是笔者从原理上分析的结论,必然需要用实验去证实。
  为此笔者对做一下修改:1. 将标准输出指向文件,方便GDB调式查看结果。 2. 简化代码逻辑,能够说明本文目的即可。 3. 本文的重点将是如何分析问题并解决问题,而非问题本身。

3. 解决问题

3.1 修改代码看看结果

[root@localhost ~]# vim 11_9.c 
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <err.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
void prepare(void)
{
printf("preparing locks...\n");
}
void * thr_fn(void *arg)
{
printf("thread started...\n");
fflush(stdout);//这里专门针对DeBUG
pause();
return(0);
}
int main(void)
{
int err;
pid_t pid;
pthread_t tid;
int fd;
char * buffer="hi, it's a flag to control flow\n";
//可以使用多种手段重定向标准输入到文件
close(1);
if((fd=open("data.txt",O_RDWR|O_CREAT|O_TRUNC,S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
errx(1,"error in creating a file\n");
//这里已经将pthread_atfork本来的目的忽略了!
if ((err = pthread_atfork(prepare, NULL, NULL)) != 0)
errx(1, "can’t install fork handlers");
if ((err = pthread_create(&tid, NULL, thr_fn, 0)) != 0)
errx(1, "can’t create thread");
sleep(2);
//这里将会更直观的看到一些区别
write(STDOUT_FILENO,buffer,strlen(buffer));
printf("parent about to fork....\n");
if ((pid = fork()) < 0)
errx(1,"fork failed");
else if (pid == 0)
printf("child returned from fork\n");
else
printf("parent returned from fork\n");
exit(0);
}

  运行结果如下:

[root@localhost ~]# gcc -pthread -o 11_9 11_9.c
[root@localhost ~]# ./11_9
[root@localhost ~]# cat data.txt
thread started...
hi, it`s a flag to control flow
parent about to fork....
preparing locks...
parent returned from fork
parent about to fork....
preparing locks...
child returned from fork

  可以看到thread started...hi,it's...输出一次,parent about to fork...输出了两次,所以到这里已经很明确了,笔者通过修改的代码验证了最上面的理论分析。但是还是不够,没有做GDB的跟踪。

3.2 多线程及多进程GDB跟踪

  首先重新编译增加调试信息,本程序正好fork前跟踪另一个线程fork后跟踪另一个进程

3.2.1 跟踪线程:
[root@localhost ~]# gcc -pthread -o 11_9 11_9.c -g
[root@localhost ~]# gdb
GNU gdb (GDB) Fedora 8.0.1-33.fc27
Copyright (C) 2017 Free Software Foundation, Inc.
...........//简化一些信息,笔者没有用截图。
(gdb) file 11_9
Reading symbols from 11_9...done.
(gdb) break 14
Breakpoint 1 at 0x4008f4: file 11_9.c, line 14.
(gdb) break main
Breakpoint 2 at 0x400921: file 11_9.c, line 25.
(gdb) r
Starting program: /root/11_9
Missing separate debuginfos, use: dnf debuginfo-install glibc-2.26-20.fc27.x86_64
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".

Breakpoint 2, main () at 11_9.c:25
25 char * buffer="hi, it's a flag to control flow\n";
(gdb) set scheduler-locking on
(gdb) c
Continuing.
[New Thread 0x7ffff77d2700 (LWP 8431)]
[Switching to Thread 0x7ffff77d2700 (LWP 8431)]

Thread 2 "11_9" hit Breakpoint 1, thr_fn (arg=0x0) at 11_9.c:14
14 printf("thread started...\n");
(gdb) info thread
Id Target Id Frame
1 Thread 0x7ffff7fd5740 (LWP 8427) "11_9" 0x00007ffff78aead0 in nanosleep () from /lib64/libc.so.6
* 2 Thread 0x7ffff77d2700 (LWP 8431) "11_9" thr_fn (arg=0x0) at 11_9.c:14
(gdb) next
15 fflush(stdout);//这里专门针对DeBUG
(gdb)
//-------------------------------------------------------------------
//此时在另一个终端看看data.txt文件的结果,以上fflush语句还未执行
[root@localhost ~]# cat data.txt
//什么都没有
(gdb) next
16 pause();
(gdb)
//-----------------------------------------------------------------
//此时再次在另一个终端观察结果,发现文件里面有了输出。所以因为fflush的缘故,刷新了标准输出。
[root@localhost ~]# cat data.txt
thread started...
[root@localhost ~]#
//笔者强调下GDB没有退出过,所以还是接着上面的操作来的
(gdb) thread 1
[Switching to thread 1 (Thread 0x7ffff7fd5740 (LWP 8427))]
#0 0x00007ffff78aead0 in nanosleep () from /lib64/libc.so.6
(gdb) break 36
Breakpoint 3 at 0x4009dc: file 11_9.c, line 36.
(gdb) c
Continuing.

Thread 1 "11_9" hit Breakpoint 3, main () at 11_9.c:36
36 write(STDOUT_FILENO,buffer,strlen(buffer));
(gdb) next
37 printf("parent about to fork....\n");
(gdb) next
38 if ((pid = fork()) < 0)
(gdb)
//-----------------------------------------------------------------
//在另一个终端看看结果
[root@localhost ~]# cat data.txt
thread started...
hi, it's a flag to control flow

  以上跟踪调试的结果很好的和笔者的分析吻合,因为头一个输出使用了fflush刷新缓冲区,另一个输出使用了低层次的I/O write。其实到这里本文已经可以结尾了,但是多进程的部分做个最后的验证吧。

3.2.2 跟踪进程:
//笔者强调下GDB没有退出过,所以还是接着上面的操作来的
(gdb) set follow-fork-mode child
(gdb) set detach-on-fork off
(gdb) break 41
Breakpoint 4 at 0x400a2e: file 11_9.c, line 41.
(gdb) c
Continuing.
[New process 8512]
Reading symbols from /root/11_9...done.
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
[Switching to Thread 0x7ffff7fd5740 (LWP 8512)]

Thread 2.1 "11_9" hit Breakpoint 4, main () at 11_9.c:41
41 printf("child returned from fork\n");
Missing separate debuginfos, use: dnf debuginfo-install glibc-2.26-20.fc27.x86_64
(gdb) info inferiors
Num Description Executable
1 process 8427 /root/11_9
* 2 process 8512 /root/11_9
(gdb) next
44 exit(0);
(gdb) next
[Inferior 2 (process 8512) exited normally]
(gdb)
//-----------------------------------------------------------------
//注意这里让子进程结束了
[root@localhost ~]# cat data.txt
thread started...
hi, it`s a flag to control flow
parent about to fork....
preparing locks...
child returned from fork
[root@localhost ~]#

//子进程因为exit(0),所以刷新了缓冲区,这里就可以看到子进程的输出了。注意`parent about to fork....`和`preparing locks...`
(gdb) break 43
Breakpoint 5 at 0x400a3a: /root/11_9.c:43. (2 locations)
(gdb) c
Continuing.

Thread 1 "11_9" hit Breakpoint 5, main () at 11_9.c:43
43 printf("parent returned from fork\n");
(gdb) next
44 exit(0);
(gdb) next
[Thread 0x7ffff77d2700 (LWP 8431) exited]
[Inferior 1 (process 8427) exited normally]
(gdb) q
[root@localhost ~]#
//-----------------------------------------------------------------
//注意这里让父进程结束了
[root@localhost ~]# cat data.txt
thread started...
hi, it`s a flag to control flow
parent about to fork....
preparing locks...
child returned from fork
parent about to fork....
preparing locks...
parent returned from fork
[root@localhost ~]#

  至此,整个过程完全复现。所以重复下造成题目刚开始输出差别的根本原因是:

  • 标准IO函数会有默认buffer的存在,输出到文件的话是full buffer,而输出到终端的话是line buffer
  • fork子进程会继承大部分父进程的资源,其中就包括打开的文件描述符,还有标准IO 函数所分配的buffer区域!

4. 写在最后

  本文笔者没有介绍GDB调试多线程和多进程的用法,本文的重点在于如何分析解决一个问题。而非任意工具的使用,本文程序也并非有实用性,笔者根据问题修改的代码。如pthread_atfork本来的目的是为了锁在多线程中fork的一致性,这里没有体现这一作用。如果对于笔者以上论述有任何疑问,请及时指正。