epoll 解析

首页 > 代码库 > epoll 解析

2024-11-05 16:08:39 210人阅读

概述

epoll 实际上是 poll 的一种改进，它可以处理大批量的句柄。而 poll 又是select 的一种改进。在select 中对所打开的文件描述符个数有一定的限制，该限制由FD_SETSIZE 设置（一般为 1024 或 2048），而且内核中的select 的实现是采用轮询来处理描文件描述符集，因此效率低。当文件描述符集中的某个描述符处于可读、可写或异常状态时，select 采用内存拷贝方法通知用户空间。因此，在select 模型中文件描述符个数受限且效率低的问题就很明显。为了解决select 对文件描述符个数的限制，采用了 poll 模型，但是 poll 依然不能解决 select 的效率问题。所以，最终epoll 模型重新对poll 模型进行改进。

epoll 的优点如下所示：

处理大批量文件句柄：一个进程可以处理大批量的文件句柄，可处理文件描述符的个数远大于 2048；
高效率：内核实现中 epoll 是根据每个描述符上面的回调函数实现的，并且只有处于活动状态的套接字才会主动调用该回调函数，其他不活动的套接字并不会去调用，因此，epoll 不必扫描整个文件描述符集，只需要扫描处于活动状态的文件描述符。所有大大减低了效率。
加快内核与用户的消息传递：epoll 是通过内核与用户空间mmap 同一块内存实现内核与用户之间消息的传递。
内核微调：可以根据运行时所需内存动态调整内存大小。

epoll 系统调用

调用函数 epoll_create 创建 epoll 文件描述符，该函数原型如下：

/* epoll 系统调用函数 */

#include <sys/epoll.h>

/*
 * 函数功能：创建epoll文件描述符；
 * 返回值：若成功则返回新创建的文件描述符；
 * 函数原型：
 */
int epoll_create(int size);
/*
 * 参数size是epoll的最大文件描述符个数；
 * 在新的系统内核中size已经不被使用；
 */

当创建好 epoll 文件描述符之后，接下来对需要监听的相关套接字描述符进行操作，由epoll 操作函数epoll_ctl 实现，其原型如下：

/*
 * 函数功能：操作某个epoll文件描述符；
 * 返回值：若成功则返回0，否则出错返回-1；
 * 函数原型：
 */
int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
/*
 * 参数：
 * epfd：由epoll_create创建的epoll文件描述符；
 * fd：是关联的文件描述符；
 * op：是操作方式，有以下三种操作方式：
 *      EPOLL_CTL_ADD   将fd注册到epfd中；
 *      EPOLL_CTL_MOD   修改已在epfd中注册的fd事件；
 *      EPOLL_CTL_DEL   将fd从epfd中删除；
 *
 * event：指向struct epoll_event 结构，表示需要监听fd的某种事件；
 */
/* struct epoll_event 结构体定义如下 */
typedef union epoll_data {
void        *ptr;
int          fd;
uint32_t     u32;
uint64_t     u64;
} epoll_data_t;

struct epoll_event {
uint32_t     events;      /* Epoll events */
epoll_data_t data;        /* User data variable */
};
/*
 * 其中events有如下的取值：
 *  EPOLLIN         表示对应的文件描述符可读；
 *  EPOLLOUT        表示对应的文件描述符可写；
 *  EPOLLPRI        表示对应的文件描述符有紧急数据可读；
 *  EPOLLERR        表示对应的文件描述符发生错误；
 *  EPOLLHUP        表示对应的文件描述符被挂载；
 *  EPOLLET         表示将EPOLL设置为边缘触发模式(Edge Triggered)；
 *  EPOLLONESHOT    表示只监听一次事件，当监听此次事件完成，若想继续监听，则需再次把该套接字描述符加入到EPOLL队列中；
 */

例如：

struct epoll_event ev;
/* 设置与要处理的事件相关的文件描述符 */
ev.data.fd=listenfd;
/* 设置要处理的事件类型 */
ev.events=EPOLLIN|EPOLLET;
/* 注册epoll事件 */
epoll_ctl(epfd,EPOLL_CTL_ADD,listenfd,&ev);

经过上面的操作之后，等待某些事情的发生由函数 epoll_wait 实现，其原型如下：

/*
 * 函数功能：收集在epoll监听事件中已发生的事件；
 * 返回值：若成功则返回所发生的事件数，否则出错返回-1；
 * 函数原型：
 */
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout);
/*
 * 参数：
 * epfd：由epoll_create创建的epoll文件描述符；
 * events：指向epoll_event结构体，用于保存已发生的事件；
 * maxevents：每次能处理的最大事件数；
 * timeout：等待IO 事件发生的超时时间：-1相当于阻塞，即不会立即返回；0相当于非阻塞，即立即返回；
 */

epoll 工作模式

epoll 有两种工作模式：

LT(level triggered)：水平触发是缺省的工作方式，并且同时支持block 和no-block socket。内核告诉你一个文件描述符是否就绪了，可以对这个就绪的fd 进行IO操作。若不进行任何操作，内核还是会继续通知。
ET(edge-triggered)：边缘触发是高速工作方式，仅当状态发生变化时才获得通知。用户收到一次通知后能够完整地处理事件，内核不再通知这一事件。

具体实例可参考文章：How to use epoll? A complete example in C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/epoll.h>
#include <errno.h>

#define MAXEVENTS 64

static int
make_socket_non_blocking (int sfd)
{
  int flags, s;

  flags = fcntl (sfd, F_GETFL, 0);
  if (flags == -1)
    {
      perror ("fcntl");
      return -1;
    }

  flags |= O_NONBLOCK;
  s = fcntl (sfd, F_SETFL, flags);
  if (s == -1)
    {
      perror ("fcntl");
      return -1;
    }

  return 0;
}

static int
create_and_bind (char *port)
{
  struct addrinfo hints;
  struct addrinfo *result, *rp;
  int s, sfd;

  memset (&hints, 0, sizeof (struct addrinfo));
  hints.ai_family = AF_UNSPEC;     /* Return IPv4 and IPv6 choices */
  hints.ai_socktype = SOCK_STREAM; /* We want a TCP socket */
  hints.ai_flags = AI_PASSIVE;     /* All interfaces */

  s = getaddrinfo (NULL, port, &hints, &result);
  if (s != 0)
    {
      fprintf (stderr, "getaddrinfo: %s\n", gai_strerror (s));
      return -1;
    }

  for (rp = result; rp != NULL; rp = rp->ai_next)
    {
      sfd = socket (rp->ai_family, rp->ai_socktype, rp->ai_protocol);
      if (sfd == -1)
        continue;

      s = bind (sfd, rp->ai_addr, rp->ai_addrlen);
      if (s == 0)
        {
          /* We managed to bind successfully! */
          break;
        }

      close (sfd);
    }

  if (rp == NULL)
    {
      fprintf (stderr, "Could not bind\n");
      return -1;
    }

  freeaddrinfo (result);

  return sfd;
}

int
main (int argc, char *argv[])
{
  int sfd, s;
  int efd;
  struct epoll_event event;
  struct epoll_event *events;

  if (argc != 2)
    {
      fprintf (stderr, "Usage: %s [port]\n", argv[0]);
      exit (EXIT_FAILURE);
    }

  sfd = create_and_bind (argv[1]);
  if (sfd == -1)
    abort ();

  s = make_socket_non_blocking (sfd);
  if (s == -1)
    abort ();

  s = listen (sfd, SOMAXCONN);
  if (s == -1)
    {
      perror ("listen");
      abort ();
    }

  efd = epoll_create1 (0);
  if (efd == -1)
    {
      perror ("epoll_create");
      abort ();
    }

  event.data.fd = sfd;
  event.events = EPOLLIN | EPOLLET;
  s = epoll_ctl (efd, EPOLL_CTL_ADD, sfd, &event);
  if (s == -1)
    {
      perror ("epoll_ctl");
      abort ();
    }

  /* Buffer where events are returned */
  events = calloc (MAXEVENTS, sizeof event);

  /* The event loop */
  while (1)
    {
      int n, i;

      n = epoll_wait (efd, events, MAXEVENTS, -1);
      for (i = 0; i < n; i++)
	{
	  if ((events[i].events & EPOLLERR) ||
              (events[i].events & EPOLLHUP) ||
              (!(events[i].events & EPOLLIN)))
	    {
              /* An error has occured on this fd, or the socket is not
                 ready for reading (why were we notified then?) */
	      fprintf (stderr, "epoll error\n");
	      close (events[i].data.fd);
	      continue;
	    }

	  else if (sfd == events[i].data.fd)
	    {
              /* We have a notification on the listening socket, which
                 means one or more incoming connections. */
              while (1)
                {
                  struct sockaddr in_addr;
                  socklen_t in_len;
                  int infd;
                  char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];

                  in_len = sizeof in_addr;
                  infd = accept (sfd, &in_addr, &in_len);
                  if (infd == -1)
                    {
                      if ((errno == EAGAIN) ||
                          (errno == EWOULDBLOCK))
                        {
                          /* We have processed all incoming
                             connections. */
                          break;
                        }
                      else
                        {
                          perror ("accept");
                          break;
                        }
                    }

                  s = getnameinfo (&in_addr, in_len,
                                   hbuf, sizeof hbuf,
                                   sbuf, sizeof sbuf,
                                   NI_NUMERICHOST | NI_NUMERICSERV);
                  if (s == 0)
                    {
                      printf("Accepted connection on descriptor %d "
                             "(host=%s, port=%s)\n", infd, hbuf, sbuf);
                    }

                  /* Make the incoming socket non-blocking and add it to the
                     list of fds to monitor. */
                  s = make_socket_non_blocking (infd);
                  if (s == -1)
                    abort ();

                  event.data.fd = infd;
                  event.events = EPOLLIN | EPOLLET;
                  s = epoll_ctl (efd, EPOLL_CTL_ADD, infd, &event);
                  if (s == -1)
                    {
                      perror ("epoll_ctl");
                      abort ();
                    }
                }
              continue;
            }
          else
            {
              /* We have data on the fd waiting to be read. Read and
                 display it. We must read whatever data is available
                 completely, as we are running in edge-triggered mode
                 and won't get a notification again for the same
                 data. */
              int done = 0;

              while (1)
                {
                  ssize_t count;
                  char buf[512];

                  count = read (events[i].data.fd, buf, sizeof buf);
                  if (count == -1)
                    {
                      /* If errno == EAGAIN, that means we have read all
                         data. So go back to the main loop. */
                      if (errno != EAGAIN)
                        {
                          perror ("read");
                          done = 1;
                        }
                      break;
                    }
                  else if (count == 0)
                    {
                      /* End of file. The remote has closed the
                         connection. */
                      done = 1;
                      break;
                    }

                  /* Write the buffer to standard output */
                  s = write (1, buf, count);
                  if (s == -1)
                    {
                      perror ("write");
                      abort ();
                    }
                }

              if (done)
                {
                  printf ("Closed connection on descriptor %d\n",
                          events[i].data.fd);

                  /* Closing the descriptor will make epoll remove it
                     from the set of descriptors which are monitored. */
                  close (events[i].data.fd);
                }
            }
        }
    }

  free (events);

  close (sfd);

  return EXIT_SUCCESS;
}

参考资料：

《epoll详解》

《Epoll详解及源码分析》

epoll 解析

声明：以上内容来自用户投稿及互联网公开渠道收集整理发布，本网站不拥有所有权，未作人工编辑处理，也不承担相关法律责任，若内容有误或涉及侵权可进行投诉：投诉/举报工作人员会在5个工作日内联系你，一经查实，本站将立刻删除涉嫌侵权内容。

联系
我们

首页 > 代码库 > epoll 解析

epoll 解析

概述

epoll 系统调用

epoll 工作模式

看完仍有疑问？有类似问题直接问程序猿