GroupBlog

ref: https://www.bilibili.com/video/BV1kt411z7ND

pthread_create

#include <pthread.h>

// @param 
// thread: point to thread ID
// attr: pointer to thread attributes
// start_routine：函数指针，指向线程创建后要调用的函数
// arg：要传给线程线程函数的参数

int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
					void *(*start_routine) (void *), void *arg);

Compile and link with -pthread.

pthread_join

       int pthread_join(pthread_t thread, void **retval);
       
       Compile and link with -pthread.

DESCRIPTION
       The  pthread_join()  function  waits  for  the  thread specified by
       thread to terminate.  If that thread has already  terminated,  then
       pthread_join() returns immediately.  **The thread specified by thread
       must be joinable.**

       If retval is not NULL, then pthread_join() copies the  exit  status
       of  the  target thread (i.e., the value that the target thread sup‐
       plied to pthread_exit(3)) into the location pointed to  by  retval.
       If  the target thread was canceled, then PTHREAD_CANCELED is placed
       in the location pointed to by retval.

example1.c

main函数这个线程开始执行，创建了一条子线程myfunc，但myfunc还没来得及执行完，main函数进程就结束了，因此看不到example1.c打印myfunc中的HelloWorld。

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

void* myfunc(void* args){
   
        printf("Hello World\n");
        return NULL;
}


int main()
{
   
        pthread_t th;
        // 函数名也就是指针
        pthread_create(&th, NULL, myfunc, NULL);        // 创建一条子线程th，执行myfunc函数

        return 0;
}

解决方法：使用join进行等待，等待所有子线程结束后再结束main线程

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

void* myfunc(void* args){
   
        printf("Hello World\n");
        return NULL;
}


int main()
{
   
        pthread_t th;
        // 函数名也就是指针
        pthread_create(&th, NULL, myfunc, NULL);        // 创建一条子线程th，执行myfunc函数
        pthread_join(th, NULL);
        return 0;
}

example3.c

本例看出两条线程运行速度不一样

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

void* myfunc(void* args){
   
        int i;
        for(i = 1; i <= 50; i++){
   
                printf("%d\n", i);
        }
        return NULL;
}


int main()
{
   
        pthread_t th1;
        pthread_t th2;

        pthread_create(&th1, NULL, myfunc, NULL);       // 创建一条子线程th，执行myfunc函数
        pthread_create(&th2, NULL, myfunc, NULL);

        pthread_join(th1, NULL);
        pthread_join(th2, NULL);

        return 0;
}

example4.c

为了更加详细的区分不同的数字是谁打印出来的，这里我们给myfunc函数传参

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

void* myfunc(void* args){
   
        int i;
        for(i = 1; i <= 50; i++){
   
                printf("%s: %d\n", (char*)args, i);
        }
        return NULL;
}


int main()
{
   
        pthread_t th1;
        pthread_t th2;

        // 第四个参数是往myfunc中传参的参数
        pthread_create(&th1, NULL, myfunc, " th1");     // 创建一条子线程th，执行myfunc函数
        pthread_create(&th2, NULL, myfunc, " th2");

        pthread_join(th1, NULL);
        pthread_join(th2, NULL);

        return 0;
}

example5.c

把一个5000个随机数的数组中数字加起来by两条线程，前2500个数字使用th1加起来，后2500个数字用th2加起来，最终在main函数中将两条线程执行结果相加

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

typedef struct {
   
        int first;
        int last;
        int result;
}MY_ARGS;

int arr[5000];

int s1 = 0, s2 = 0;

void* myfunc(void* args){
   
        int s;
        for(int i = ((MY_ARGS*)args)->first; i < ((MY_ARGS*)args)->last; i++){
   
                s += arr[i];
        }
        ((MY_ARGS*)args)->result = s;

        return NULL;
}


int main()
{
   
        for(int i = 0 ; i < 5000; i++){
   
                arr[i] = rand() % 50;
        }

        /*
        for(int i = 0; i < 5000; i++){
                printf("arr[%d]: %d\n",i ,arr[i]);
        }
        */

        pthread_t th1;
        pthread_t th2;

        MY_ARGS my_args1 = {
   0, 2500, 0};
        MY_ARGS my_args2 = {
   2500, 5000, 0};

        // 第四个参数是往myfunc中传参的参数
        pthread_create(&th1, NULL, myfunc, &my_args1);  // 创建一条子线程th，执行myfunc函数
        pthread_create(&th2, NULL, myfunc, &my_args2);

        pthread_join(th1, NULL);
        pthread_join(th2, NULL);


        s1 = my_args1.result;
        s2 = my_args2.result;
        printf("s1: %d\n", s1);
        printf("s2: %d\n", s2);
        printf("s1 + s2: %d\n", s1 + s2);


        return 0;

}

example6.c

如果两条线程往同一个全局变量里加？ – Race Condition :

A race condition is an undesirable(不良的) situation that occurs when a device or system attempts to perform two or more operations at the same time

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

typedef struct {
   
        int first;
        int last;
}MY_ARGS;

int arr[5000];
int s = 0;

void* myfunc(void* args){
   
        for(int i = ((MY_ARGS*)args)->first; i < ((MY_ARGS*)args)->last; i++){
   
                s += arr[i];
        }

        return NULL;
}


int main()
{
   
        for(int i = 0 ; i < 5000; i++){
   
                arr[i] = rand() % 50;
        }

        /*
        for(int i = 0; i < 5000; i++){
                printf("arr[%d]: %d\n",i ,arr[i]);
        }
        */

        pthread_t th1;
        pthread_t th2;

        MY_ARGS my_args1 = {
   0, 2500};
        MY_ARGS my_args2 = {
   2500, 5000};

        // 第四个参数是往myfunc中传参的参数
        pthread_create(&th1, NULL, myfunc, &my_args1);  // 创建一条子线程th，执行myfunc函数
        pthread_create(&th2, NULL, myfunc, &my_args2);

        pthread_join(th1, NULL);
        pthread_join(th2, NULL);


        printf("s: %d\n", s);


        return 0;

}

结果好像与example5.c一致，但是当线程加和数字量一大呢？

example7.c – race condition

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

int s = 0;

void* myfunc(void* args){
   
        int i = 0;
        for(i = 0; i < 1000000; i++){
   
                s++;
        }
}

int main()
{
   
        pthread_t th1;
        pthread_t th2;

        pthread_create(&th1, NULL, myfunc, NULL);
        pthread_create(&th2, NULL, myfunc, NULL);

        pthread_join(th1, NULL);
        pthread_join(th2, NULL);

        printf("s理想值应为2000000\n");
        printf("%d\n", s);
}

输出：

[briansun@briansun multithreading]$ gcc example7.c -lpthread -o example7
[briansun@briansun multithreading]$ ./example7 
s理想值应为2000000
1117439
[briansun@briansun multithreading]$ ./example7 
s理想值应为2000000
1116611
[briansun@briansun multithreading]$ ./example7 
s理想值应为2000000
1083574
[briansun@briansun multithreading]$ ./example7 
s理想值应为2000000
1120933
[briansun@briansun multithreading]$ ./example7 
s理想值应为2000000
1217610

为什么？

s++实际上计算机会将其分为三步处理

read(s)

s = s+1

write(s)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-D9CsVVFt-1608474730403)(https://s3.ax1x.com/2020/12/20/rahAte.md.png)]

将这三步操作分解，两条线程所谓“同时”进行s++，实际上很可能一条线程即将将0刚加完的数值1写入，但是另一条线程读取当前值0进行+1，当那条线程完成写入1操作时，另一条线程加完后将1写入，虽然这是两次++操作，但从结果/效果上来看实际上只进行了一次++

以上的这种情况就是race condition

What is race condition?

Defined by Wikipedia

A race condition arises in software when a computer program, to operate properly, depends on the sequence or timing of the program’s processes or threads. Critical(临界的) race conditions cause invalid execution and software bugs. Critical race conditions often happen when the processes or threads depend on some shared state. Operations upon shared states are done(解决) in critical sections(临界区段*) that must be mutually exclusive.（加锁） Failure to obey this rule can corrupt(破坏) the shared state.

*临界区段：在并行计算中，同时接入共享的资源会导致不可预估或出现错误的行为，因此，程序中访问共享资源的部分需要以避免并发访问的方式加以保护。（加锁）

example8.c 解决race condition – 代码段加锁

锁：用来锁一段代码，当某段代码被加锁后，多条线程使用这段代码时只允许一条线程使用。

当一条线程抢占到这部分代码的锁时，对这段代码拥有执行权，在代码执行到解锁前，其他线程访问到加锁代码前想要加锁时，必须得等待占用锁的线程执行解锁操作

man

初始化一个锁

pthread_mutex_t

int pthread_mutex_init(pthread_mutex_t *restrict mutex, const pthread_mutexattr_t *restrict attr);

代码段加锁

int pthread_mutex_lock(pthread_mutex_t *mutex);

代码段解锁

int pthread_mutex_unlock(pthread_mutex_t *mutex);

销毁一个锁

int pthread_mutex_destroy(pthread_mutex_t *mutex);

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

pthread_mutex_t lock;

int s = 0;

void* myfunc(void* args){
   
	int i = 0;
	for(i = 0; i < 1000000; i++){
   
        pthread_mutex_lock(&lock);
		s++;
        pthread_mutex_unlock(&lock);
	}
}

int main()
{
   
	pthread_t th1;
	pthread_t th2;

	pthread_create(&th1, NULL, myfunc, NULL);
	pthread_create(&th2, NULL, myfunc, NULL);

    pthread_mutex_init(&lock, NULL);
    
	pthread_join(th1, NULL);
	pthread_join(th2, NULL);

	printf("s理想值应为2000000\n");
	printf("实际值：%d\n", s);
}

问题所在：很慢

$ time ./example8
s理想值应为2000000
实际值：2000000

real    0m0.131s
user    0m0.178s
sys     0m0.076s

由于每次s++都会加锁解锁，加锁总共加了2000000次，也是需要时间的

更好的解决方案

如果把加锁解锁放在for循环外面呢？

s理想值应为2000000
实际值：2000000

real    0m0.016s
user    0m0.016s
sys     0m0.000s

是快了很多，但是有必要吗？为什么不直接让一条线程执行两次myfunc，还省去四次加锁解锁的时间

解决方案：让两条线程把需要加和的数据存储在两个变量里，最终再对这两个变量进行最终加和，即example5.c