关于 I/O multiplexing:
—————-这里简单介绍一下,详细请参考UNP/ linux/UNIX系统编程手册下册
其实“I/O多路复用”这个坑爹翻译可能是这个概念在中文里面如此难理解的原因。所谓的I/O多路复用在英文中其实叫 I/O multiplexing,基本是这个意思:
I/O multiplexing 这里面的 multiplexing 指的其实是在单个线程通过记录跟踪每一个Sock(I/O流)的状态来同时管理多个I/O流. 发明它的原因,是尽量多的提高服务器的吞吐能力。
epoll和read 都是阻塞的
read 和 epoll等阻塞的内容不一样,前者阻塞在整个IO调用上,后者阻塞在等待IO事件通知(多路复用就是多个文件句柄的多个状态变更通知)
select poll epoll不管是单路还是多路,都需要有个线程在那阻塞等事件通知,通知有数据了,就调用你的回调函数; 对用户程序来说,就是一个模拟的AIO。
就一个单刀多置开关,哪搞事情,就连哪。
第一套:select:
struct fd_set { __fd_mask_bits[N] };
先来几个结构体,存放要监听的读写,异常事件。
(1) fd_set read_fds; //读
fd_set write_fds; //写
(2) FD_ZERO( &read_fds );
FD_ZERO( &write_fds );
listenfd = listen(...)
(3)
while(1)
FD_SET(listenfd, &read_fds);
FD_SET(connfd, &read_fds);
FD_SET(connfd, &write_fds);
int select(int nfds 文件描述符总数, fd_set *readfds 可读, fd_set *writefds 可写, fd_set *exceptfds 异常, struct timeval *timeout 超时时间);
if FD_ISSET(connfd, &read_fds) //发生读事件
if FD_ISSET(connfd, &write_fds) //发生写事件
相关函数, 宏定义:
FD_ZERO(fd_set *fdset); //清空集合
FD_SET(int fd, fd_set *fdset); //将一个给定的文件描述符加入集合之中
FD_CLR(int fd, fd_set *fdset); //将一个给定的文件描述符从集合中删除
int FD_ISSET(int fd, fd_set *fdset); // 检查集合中指定的文件描述符是否可以读写
select回射服务器:
#include <iostream>
#include<mynet.h>
using namespace std;
#define BUF_SIZE 1024
#define LISTEN_MAX 1000
int main() {
cout << "TCP select 回射服务器!" << endl;
int maxi, maxfd, listenfd, connfd, sockfd, num, i;
int nready, client[FD_SETSIZE];
ssize_t n;
fd_set rset, allset;
socklen_t cli_len;
struct sockaddr_in cliaddr, servaddr;
char buf[BUF_SIZE];
listenfd = socket(AF_INET, SOCK_STREAM, 0);
int resuse = 1;
setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &resuse, sizeof(resuse));
bzero(&servaddr, sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_port = htons(10086);
inet_pton(AF_INET, "127.0.0.1", &servaddr.sin_addr);
bind(listenfd, (struct sockaddr*)&servaddr, sizeof(servaddr));
listen(listenfd, LISTEN_MAX);
maxfd = listenfd;
maxi = -1;
for(int i = 0; i < FD_SETSIZE; i++)
{
client[i] = -1;
}
FD_ZERO(&allset);
FD_SET(listenfd, &allset);
while(1)
{
rset = allset;
printf("selecting\n");
nready = select(maxfd+1, &rset, NULL, NULL, NULL);
printf("%d\n",nready);
printf("selected\n");
if(FD_ISSET(listenfd, &rset))
{
cli_len = sizeof(cliaddr);
connfd = accept(listenfd, (struct sockaddr*)&cliaddr, &cli_len);
printf("connfd: %d", connfd);
printf("Get new one client\n");
for(i = 0; i < maxfd; i++) {
if (client[i] < 0) {
client[i] = connfd;
break;
}
}
FD_SET(connfd, &allset);
if(connfd > maxfd)
maxfd = connfd;
if(i > maxi)
maxi = i;
nready --;
if(nready <= 0)
continue;
}
for(i = 0; i <= maxi; i++)
{
if((sockfd = client[i]) < 0)
continue;
if( FD_ISSET(sockfd, &rset))
{
n = read(sockfd, buf, sizeof(buf));
if(n == 0)
{
printf("lost a client\n");
close(sockfd);
FD_CLR(sockfd, &allset);
client[i] = -1;
}
else{
write(sockfd, buf, n);
}
nready --;
if(nready <= 0)
break;
}
}
}
return 0;
}
第二套:poll
fcntl(fd, F_SETFL, O_NONBLOCK)
pollfd fds[]; //要监听的事件集合
struct pollfd {
int fd; /* file descriptor */
short events; /* requested events */
short revents; /* returned events */
};
fds[0].fd = listenfd;
fds[0].events = POLLIN | POLLERR //监听fd 只关注 读取,错误事件,,不可能有写事件
fds[0].revents = 0;
while(1)
{
ret = poll(fds, user_counter+1, -1)
//int poll(struct pollfd *fds, nfds_t nfds, int timeout);
for(0 -- user_counter)
{
if(fds[i].fd == listenfd && fds[i].revents & POLLIN)
{
/*
*/
if fds[i].revents & POLLIN //读事件
if fds[i].revents & POLLOUT //写事件
}
poll 回射服务器:
/*************************************************************************
> File Name: server.cpp
> Author: dulun
> Mail: dulun@xiyoulinux.org
> Created Time: 2016年07月19日 星期二 11时26分40秒
************************************************************************/
#include<iostream>
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<algorithm>
#include<sys/socket.h>
#include<sys/types.h>
#include<netinet/in.h>
#include<arpa/inet.h>
#include<assert.h>
#include<fcntl.h>
#include<poll.h>
#include<errno.h>
#include<unistd.h>
using namespace std;
#define USER_LIMIT 100
#define BUFFER_SIZE 64
#define FD_LIMIT 65535
struct client_data
{
sockaddr_in address;
char * write_buff;
char buf[BUFFER_SIZE];
};
int setnonblocking( int fd )
{
int old_option = fcntl( fd, F_GETFL );
int new_option = old_option | O_NONBLOCK;
fcntl( fd, F_SETFL, new_option );
return old_option;
}
int main()
{
const char *ip = "127.0.0.1";
int port = 10086;
struct sockaddr_in address;
bzero(&address, sizeof(address));
address.sin_family = AF_INET;
inet_pton(AF_INET, ip, &address.sin_addr);
address.sin_port = htons(port);
int listenfd = socket(PF_INET, SOCK_STREAM, 0);
int resuse = 1;
setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &resuse, sizeof(resuse));
assert(listenfd >= 0);
int ret;
ret = bind(listenfd, (struct sockaddr*)&address, sizeof(address)); //命名套接字。
assert(ret != -1);
ret = listen(listenfd, USER_LIMIT); //监听
assert(ret != -1);
client_data * users = new client_data[FD_LIMIT];
pollfd fds[USER_LIMIT+1];
int user_counter = 0;
for(int i = 1; i <= USER_LIMIT; i++)
{
fds[i].fd = -1;
fds[i].events = 0;
}
//初始化第一个
fds[0].fd = listenfd;
fds[0].events = POLLIN | POLLERR;
fds[0].revents = 0;
while(1)
{
ret = poll( fds, user_counter+1, -1 );
if(ret < 0)
{
printf("POLL failed\n");
break;
}
for(int i = 0; i < user_counter+1; i++)
{
if((fds[i].fd == listenfd) && (fds[i].revents & POLLIN) )
{
struct sockaddr_in client_address;
socklen_t client_addrlength = sizeof(client_data);
int connfd = accept(listenfd, (struct sockaddr*)&client_address, &client_addrlength);
printf("accept\n");
if(connfd < 0)
{
printf("errno is : %d \n", errno);
continue;
}
if(user_counter >= USER_LIMIT)
{
const char * info = "too many users\n";
printf("%s", info);
send(connfd, info, strlen(info), 0);
close(connfd);
continue;
}
user_counter++;
users[connfd].address = client_address;
setnonblocking(connfd);
fds[user_counter].fd = connfd;
fds[user_counter].events = POLLIN | POLLERR;
fds[user_counter].revents = 0;
printf("comes a new user, now have %d users\n", user_counter);
}
else if(fds[i].revents & POLLERR) //ERROR , fds[i].revent & POLLERR 为真,表示,发生错误,下同
{
printf("get an error form %d\n", fds[i].fd);
char errors[100];
memset(errors, 0, sizeof(errors));
socklen_t length = sizeof(errors);
if( getsockopt(fds[i].fd, SOL_SOCKET, SO_ERROR, &errors, &length) < 0)
{
printf("get socket option failed\n");
}
continue;
}
else if(fds[i].revents & POLLRDHUP)
{
//users[fds[i].fd] = users[fds[user_counter].fd];
close(fds[i].fd);
//fds[i] = fds[]
printf("a client left\n");
sleep(1);
}
else if(fds[i].revents & POLLIN)
{
int connfd = fds[i].fd;
memset(users[connfd].buf, 0, BUFFER_SIZE);
ret = recv(connfd, users[connfd].buf, BUFFER_SIZE-1, 0);
send(connfd, users[connfd].buf, BUFFER_SIZE-1, 0);
if(ret == 0)
{
close(connfd);
printf("one client left\n");
user_counter--;
i--;
continue;
}
printf("get %d bytes of client data %s from %d \n", ret, users[connfd].buf, connfd);
//sleep(10000); //test!
if(ret < 0)
{
if(errno != EAGAIN)
{
close(connfd);
users[fds[i].fd] = users[fds[user_counter].fd];
fds[i] = fds[user_counter];
i--;
user_counter--;
}
}
else if( ret == 0 )
{
}
else{
for(int j = 1; j <= user_counter; j++)
{
if(fds[j].fd == connfd) //不等于自己,不给自己设置读写事件
{
continue;
}
//fds[j].events |= ~POLLIN; //取消读事件
fds[j].events |= POLLOUT; //设置写事件,下次while循环,直接进下一个else if
users[fds[j].fd].write_buff = users[connfd].buf;
}
}
}
else if(fds[i].revents & POLLOUT) //监听到写事件(次数:user_counter-1)
{
int connfd = fds[i].fd;
if( !users[connfd].write_buff )
{
continue;
}
ret = send(connfd, users[connfd].write_buff, strlen(users[connfd].write_buff)-1, 0); //往过发
users[connfd].write_buff = NULL; //清缓存
// fds[i].events |= ~POLLOUT; //取消写时间
// fds[i].events |= POLLIN; //设置读事件
}
}
}
delete []users;
close(listenfd);
return 0;
}
poll零拷贝客户端:
(splice + pipe 重定向标准输入到套接字描述符
/*************************************************************************
> File Name: client.c
> Author: dulun
> Mail: dulun@xiyoulinux.org
> Created Time: 2016年07月19日 星期二 11时01分09秒
************************************************************************/
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<unistd.h>
#include<netinet/in.h>
#include<arpa/inet.h>
#include<sys/types.h>
#include<sys/socket.h>
#include<poll.h>
#include<fcntl.h>
#include<assert.h>
#define BUFFER_SIZE 64
int main()
{
// const char * ip = "115.159.53.185";
const char * ip = "127.0.0.1";
//int port = 6550;
int port = 10086;
struct sockaddr_in server_address;
bzero( &server_address, sizeof(server_address) );
server_address.sin_family = AF_INET;
inet_pton( AF_INET, ip, &server_address.sin_addr );
server_address.sin_port = htons(port);
int sockfd = socket( PF_INET, SOCK_STREAM, 0 );
assert(sockfd >= 0);
if( connect(sockfd, (struct sockaddr*)&server_address, sizeof(server_address)) < 0 )
{
printf("connetcion failed\n");
close(sockfd);
return 1;
}
printf("connetc over!\n");
pollfd fds[2]; //0:标准输入
fds[0].fd = 0;
fds[0].events = POLLIN; //可读事件
fds[0].revents = 0; //内核处理
//1:套接字描述符
fds[1].fd = sockfd;
fds[1].events = POLLIN | POLLRDHUP; //可读或对方中断
fds[1].revents = 0;
char read_buf[BUFFER_SIZE]; //读缓存, 用于接收
int pipefd[2];
int ret = pipe(pipefd);
assert(ret != -1);
while(1)
{
ret = poll(fds, 2, -1); //阻塞,监听两个感兴趣的fd
if(ret < 0)
{
printf("poll failed\n");
break;
}
if(fds[1].revents & POLLRDHUP)
{
printf("server close the connetcion\n");
break;
}
else if( fds[1].revents & POLLIN )
{
memset( read_buf, 0, BUFFER_SIZE );
recv(fds[1].fd, read_buf, BUFFER_SIZE - 1, 0);
printf("%s\n", read_buf);
}
if(fds[0].revents & POLLIN)
{
//标准输入输出 通过管道 。去sockfd
//从标准输入读 ,进管道, 大小32768 更多或文件直接走内核缓冲区
ret = splice( 0, NULL, pipefd[1], NULL, 32768 , SPLICE_F_MORE | SPLICE_F_MOVE); // pipefd[0] 读
//从管道读, 进套接字描述符, 大小32768 更多或文件直接走内核缓冲区
ret = splice( pipefd[0], NULL, sockfd, NULL, 32768 , SPLICE_F_MORE | SPLICE_F_MOVE); //写
}
}
close(sockfd);
return 0;
}
第三套 epoll:
传统的select/poll另一个致命弱点就是当你拥有一个很大的socket集合,不过由于网络延时,任一时间只有部分的socket是”活跃”的, 但是select/poll每次调用都会线性扫描全部的集合,导致效率呈现线性下降 o(n)
但是epoll不存在这个问题,它只会对”活跃”的socket进行 操作—这是因为在内核实现中epoll是根据每个fd上面的callback函数实现的。那么,只有”活跃”的socket才会主动的去调用 callback函数,其他idle状态socket则不会,在这点上,epoll实现了一个”伪”AIO,因为这时候推动力在os内核。在一些 benchmark中,如果所有的socket基本上都是活跃的—比如一个高速LAN环境,epoll并不比select/poll有什么效率,相 反,如果过多使用epoll_ctl,效率相比还有稍微的下降。但是一旦使用idle connections模拟WAN环境,epoll的效率就远在select/poll之上了。
要使用epoll只需要这三个系统调 用:epoll_create(2), epoll_ctl(2), epoll_wait(2)。
epoll_create(2) creates an epoll instance and returns a file descriptor referring to that instance.
(The more recent epoll_create1(2) extends the functionality of epoll_create(2).)
//epoll_creat(系统调用) 创建一个epoll事件,并且返回一个文件描述符指向那个事件Interest in particular file descriptors is then registered via epoll_ctl(2). The set of file descriptors currently registered on an epoll instance is sometimes called an epoll set.
//(对某些特定的有兴趣的文件描述符,通过epoll_ctl(系统调用)进行注册),文件描述符集合一般注册在一个epoll 例子,称为 epoll setepoll_wait(2) waits for I/O events, blocking the calling thread if no events are currently available.
//eppll_wait(系统调用)等待I/O事件,如果当前没有事件发生,阻塞于调用的线程,
结构体:
epoll_event events[]
epollfd = epoll_create(0) //返回指向内核中文件描述符
epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event); //加入一个描述符到内核监听队列
基本使用逻辑
(a) 使用epoll_create()函数创建文件描述,设定将可管理的最大socket描述符数目。
(b) 创建与epoll关联的接收线程,应用程序可以创建多个接收线程来处理epoll上的读通知事件,线程的数量依赖于程序的具体需要。
(c) 创建一个侦听socket描述符ListenSock;将该描述符设定为非阻塞模式,调用Listen()函数在套接字上侦听有无新的连接请求,
在 epoll_event结构中设置要处理的事件类型EPOLLIN,工作方式为 epoll_ET,以提高工作效率,同时使用epoll_ctl()注册事件,最后启动网络监视线程。
(d) 网络监视线程启动循环,epoll_wait()等待epoll事件发生。
(e) 如果epoll事件表明有新的连接请求,则调用accept()函数,将用户socket描述符添加到epoll_data联合体,同时设定该描述符为非 阻塞,并在epoll_event结构中设置要处理的事件类型为读和写,工作方式为epoll_ET.
(f) 如果epoll事件表明socket描述符上有数据可读,则将该socket描述符加入可读队列,通知接收线程读入数据,并将接收到的数据放入到接收数据 的链表中,经逻辑处理后,将反馈的数据包放入到发送数据链表中,等待由发送线程发送。
epoll 回射服务器:
/*************************************************************************
> File Name: server2.cpp
> Author: dulun
> Mail: dulun@xiyoulinux.org
> Created Time: 2016年07月20日 星期三 09时12分28秒
************************************************************************/
#include<iostream>
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<algorithm>
#include<sys/socket.h>
#include<sys/types.h>
#include<netinet/in.h>
#include<arpa/inet.h>
#include<assert.h>
#include<errno.h>
#include<unistd.h>
#include<fcntl.h>
#include<sys/epoll.h>
#include<pthread.h>
#define MAX_EVENT_NUMBER 1024
#define TCP_BUFFER_SIZE 512
#define UDP_BUFFER_SIZE 1024
int setnonblocking(int fd)
{
int old_opt = fcntl(fd, F_GETFL);
int new_opt = old_opt | O_NONBLOCK;
fcntl(fd, F_SETFL, new_opt);
return old_opt;
}
void addfd(int epollfd, int fd)
{
epoll_event event;
event.data.fd = fd;
event.events = EPOLLIN | EPOLLET;
epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
setnonblocking(fd);
}
int main()
{
const char * ip = "127.0.0.1";
int port = 10086;
int ret = 0;
struct sockaddr_in address;
address.sin_family = AF_INET;
inet_pton(AF_INET, ip, &address.sin_addr);
address.sin_port = htons(port);
int listenfd = socket(PF_INET, SOCK_STREAM, 0);
assert(listenfd >= 0);
int resuse = 1;
setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, &resuse, sizeof(resuse));
ret = bind(listenfd, (struct sockaddr*)&address, sizeof(address));
assert(ret != -1);
ret = listen(listenfd, 5);
assert(ret != -1);
bzero(&address, sizeof(address));
address.sin_family = AF_INET;
inet_pton(AF_INET, ip, &address.sin_addr);
address.sin_port = htons(port);
int udpfd = socket(PF_INET, SOCK_DGRAM, 0);
assert(udpfd >= 0);
ret = bind(udpfd, (struct sockaddr *)&address, sizeof(address));
epoll_event events[MAX_EVENT_NUMBER];
int epollfd = epoll_create(5);
addfd(epollfd, listenfd);
addfd(epollfd, udpfd);
while(1)
{
int number = epoll_wait(epollfd, events, MAX_EVENT_NUMBER, -1);
if(number < 0)
{
printf("epoll failed");
break;
}
for(int i = 0; i < number; i++)
{
int sockfd = events[i].data.fd;
if(sockfd == listenfd)
{
struct sockaddr_in client_address;
socklen_t client_addrelength = sizeof(client_address);
int connfd = accept(listenfd, (struct sockaddr*)&client_address, &client_addrelength);
addfd(epollfd, connfd);
}
else if(sockfd == udpfd)
{
char buf[UDP_BUFFER_SIZE];
memset(buf, 0, sizeof(buf));
struct sockaddr_in client_address;
socklen_t client_addresslength = sizeof(client_address);
ret = recvfrom(udpfd, buf, UDP_BUFFER_SIZE-1, 0, (struct sockaddr*)&client_address, &client_addresslength);
if(ret > 0) {
sendto(udpfd, buf, UDP_BUFFER_SIZE - 1, 0, (struct sockaddr *) &client_address, client_addresslength);
}
}
else if(events[i].events & EPOLLIN)
{
char buf[TCP_BUFFER_SIZE];
while(1)
{
memset(buf, 0, sizeof(buf));
ret = recv(sockfd, buf, TCP_BUFFER_SIZE - 1, 0);
if(ret < 0)
{
if(errno == EAGAIN || errno == EWOULDBLOCK)
{
break;
}
close(sockfd);
break;
}
else if(ret == 0)
{
close(sockfd);
}
else
{
send(sockfd, buf, ret, 0);//回射
}
}
}
else{
printf("somethin else happened \n");
}
}
}
close(listenfd);
return 0;
}