Nginx Parsing HTTP Package、header/post/files/args Sourcecode Analysis

时间:2023-03-09 18:41:27
Nginx Parsing HTTP Package、header/post/files/args Sourcecode Analysis

catalog

. Nginx源码结构
. HTTP Request Header解析流程
. HTTP Request Body解析流程

1. Nginx源码结构

. core:Nginx的核心源代码,包括常用数据结构的以及Nginx内核实现的核心代码
. event:Nginx事件驱动模型,以及定时器的实现相关代码
. http:Nginx实现http服务器相关的代码;
. mail:Nginx实现邮件代理服务器相关的代码
. misc:辅助代码,测试C++头的兼容性,以及对Google_PerfTools的支持
. os:不同体系统结构所提供的系统函数的封装,提供对外统一的系统调用接口

本文的关注重点在于nginx的http模块,http目录和event目录一样,通用包含了模块实现源码的module目录文件以及一些结构定义、初始化、网络连接建立、管理、关闭,以及数据报解析、服务器组管理等功能的源码文件。module目录文件实现了HTTP模块的功能

Relevant Link:

http://blog.****.net/chenhanzhun/article/details/42742097

2. HTTP Request Header解析流程

Method SP Request-URI SP HTTP-Version CRLF
general-header
request-header
entity-header

关于HTTP请求报文的详细格式,请参阅另一篇文章

http://www.cnblogs.com/LittleHann/p/5057295.html

0x1: HTTP包接收事件响应

nginx是基于异步事件响应模型的HTTP Server,nginx在初始化阶段,具体是在init process阶段的ngx_event_process_init函数中会为每一个监听套接字分配一个连接结构(ngx_connection_t),并将该连接结构的读事件成员(read)的事件处理函数设置为ngx_event_accept,并且如果没有使用accept互斥锁的话,在这个函数中会将该读事件挂载到nginx的事件处理模型上(poll或者epoll等),反之则会等到init process阶段结束,在工作进程的事件处理循环中,某个进程抢到了accept锁才能挂载该读事件
\nginx-1.7.4\src\event\ngx_event.c

static ngx_int_t
ngx_event_process_init(ngx_cycle_t *cycle)
{
ngx_uint_t m, i;
ngx_event_t *rev, *wev;
ngx_listening_t *ls;
ngx_connection_t *c, *next, *old;
ngx_core_conf_t *ccf;
ngx_event_conf_t *ecf;
ngx_event_module_t *module; ccf = (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_core_module);
ecf = ngx_event_get_conf(cycle->conf_ctx, ngx_event_core_module); if (ccf->master && ccf->worker_processes > && ecf->accept_mutex) {
ngx_use_accept_mutex = ;
ngx_accept_mutex_held = ;
ngx_accept_mutex_delay = ecf->accept_mutex_delay; } else {
ngx_use_accept_mutex = ;
} #if (NGX_WIN32) /*
* disable accept mutex on win32 as it may cause deadlock if
* grabbed by a process which can't accept connections
*/ ngx_use_accept_mutex = ; #endif #if (NGX_THREADS)
ngx_posted_events_mutex = ngx_mutex_init(cycle->log, );
if (ngx_posted_events_mutex == NULL) {
return NGX_ERROR;
}
#endif /* 初始化用来管理所有定时器的红黑树 */
if (ngx_event_timer_init(cycle->log) == NGX_ERROR) {
return NGX_ERROR;
}
/* 初始化事件模型 */
for (m = ; ngx_modules[m]; m++) {
if (ngx_modules[m]->type != NGX_EVENT_MODULE) {
continue;
} if (ngx_modules[m]->ctx_index != ecf->use) {
continue;
} module = ngx_modules[m]->ctx; if (module->actions.init(cycle, ngx_timer_resolution) != NGX_OK) {
/* fatal */
exit();
} break;
} #if !(NGX_WIN32) if (ngx_timer_resolution && !(ngx_event_flags & NGX_USE_TIMER_EVENT)) {
struct sigaction sa;
struct itimerval itv; ngx_memzero(&sa, sizeof(struct sigaction));
sa.sa_handler = ngx_timer_signal_handler;
sigemptyset(&sa.sa_mask); if (sigaction(SIGALRM, &sa, NULL) == -) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
"sigaction(SIGALRM) failed");
return NGX_ERROR;
} itv.it_interval.tv_sec = ngx_timer_resolution / ;
itv.it_interval.tv_usec = (ngx_timer_resolution % ) * ;
itv.it_value.tv_sec = ngx_timer_resolution / ;
itv.it_value.tv_usec = (ngx_timer_resolution % ) * ; if (setitimer(ITIMER_REAL, &itv, NULL) == -) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
"setitimer() failed");
}
} if (ngx_event_flags & NGX_USE_FD_EVENT) {
struct rlimit rlmt; if (getrlimit(RLIMIT_NOFILE, &rlmt) == -) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
"getrlimit(RLIMIT_NOFILE) failed");
return NGX_ERROR;
} cycle->files_n = (ngx_uint_t) rlmt.rlim_cur; cycle->files = ngx_calloc(sizeof(ngx_connection_t *) * cycle->files_n,
cycle->log);
if (cycle->files == NULL) {
return NGX_ERROR;
}
} #endif cycle->connections =
ngx_alloc(sizeof(ngx_connection_t) * cycle->connection_n, cycle->log);
if (cycle->connections == NULL) {
return NGX_ERROR;
} c = cycle->connections; cycle->read_events = ngx_alloc(sizeof(ngx_event_t) * cycle->connection_n,
cycle->log);
if (cycle->read_events == NULL) {
return NGX_ERROR;
} rev = cycle->read_events;
for (i = ; i < cycle->connection_n; i++) {
rev[i].closed = ;
rev[i].instance = ;
#if (NGX_THREADS)
rev[i].lock = &c[i].lock;
rev[i].own_lock = &c[i].lock;
#endif
} cycle->write_events = ngx_alloc(sizeof(ngx_event_t) * cycle->connection_n,
cycle->log);
if (cycle->write_events == NULL) {
return NGX_ERROR;
} wev = cycle->write_events;
for (i = ; i < cycle->connection_n; i++) {
wev[i].closed = ;
#if (NGX_THREADS)
wev[i].lock = &c[i].lock;
wev[i].own_lock = &c[i].lock;
#endif
} i = cycle->connection_n;
next = NULL; do {
i--; c[i].data = next;
c[i].read = &cycle->read_events[i];
c[i].write = &cycle->write_events[i];
c[i].fd = (ngx_socket_t) -; next = &c[i]; #if (NGX_THREADS)
c[i].lock = ;
#endif
} while (i); cycle->free_connections = next;
cycle->free_connection_n = cycle->connection_n; /* for each listening socket */
/* 为每个监听套接字分配一个连接结构 */
ls = cycle->listening.elts;
for (i = ; i < cycle->listening.nelts; i++) { c = ngx_get_connection(ls[i].fd, cycle->log); if (c == NULL) {
return NGX_ERROR;
} c->log = &ls[i].log; c->listening = &ls[i];
ls[i].connection = c; rev = c->read; rev->log = c->log;
/* 标识此读事件为新请求连接事件 */
rev->accept = ; #if (NGX_HAVE_DEFERRED_ACCEPT)
rev->deferred_accept = ls[i].deferred_accept;
#endif if (!(ngx_event_flags & NGX_USE_IOCP_EVENT)) {
if (ls[i].previous) { /*
* delete the old accept events that were bound to
* the old cycle read events array
*/ old = ls[i].previous->connection; if (ngx_del_event(old->read, NGX_READ_EVENT, NGX_CLOSE_EVENT)
== NGX_ERROR)
{
return NGX_ERROR;
} old->fd = (ngx_socket_t) -;
}
} #if (NGX_WIN32) if (ngx_event_flags & NGX_USE_IOCP_EVENT) {
ngx_iocp_conf_t *iocpcf; rev->handler = ngx_event_acceptex; if (ngx_use_accept_mutex) {
continue;
} if (ngx_add_event(rev, , NGX_IOCP_ACCEPT) == NGX_ERROR) {
return NGX_ERROR;
} ls[i].log.handler = ngx_acceptex_log_error; iocpcf = ngx_event_get_conf(cycle->conf_ctx, ngx_iocp_module);
if (ngx_event_post_acceptex(&ls[i], iocpcf->post_acceptex)
== NGX_ERROR)
{
return NGX_ERROR;
} } else {
rev->handler = ngx_event_accept; if (ngx_use_accept_mutex) {
continue;
} if (ngx_add_event(rev, NGX_READ_EVENT, ) == NGX_ERROR) {
return NGX_ERROR;
}
} #else
/* 将读事件结构的处理函数设置为ngx_event_accept */
rev->handler = ngx_event_accept;
/* 如果使用accept锁的话,要在后面抢到锁才能将监听句柄挂载上事件处理模型上 */
if (ngx_use_accept_mutex) {
continue;
}
/* 否则,将该监听句柄直接挂载上事件处理模型 */
if (ngx_event_flags & NGX_USE_RTSIG_EVENT) {
if (ngx_add_conn(c) == NGX_ERROR) {
return NGX_ERROR;
} } else {
if (ngx_add_event(rev, NGX_READ_EVENT, ) == NGX_ERROR) {
return NGX_ERROR;
}
} #endif } return NGX_OK;
}

处理流程如下

. 一个用户在浏览器的地址栏内输入一个域名,并且域名解析服务器将该域名解析到一台由nginx监听的服务器上,浏览器会构造对应格式的HTTP请求报文,发送给目标Nginx Server
. 当一个Nginx工作进程在某个时刻将监听事件挂载上事件处理模型之后,nginx就可以正式的接收并处理客户端过来的请求了
. nginx的事件处理模型接收到这个读事件之后,会速度交由之前注册好的事件处理函数ngx_event_accept来处理

\nginx-1.7.4\src\event\ngx_event_accept.c

void
ngx_event_accept(ngx_event_t *ev)
{
socklen_t socklen;
ngx_err_t err;
ngx_log_t *log;
ngx_uint_t level;
ngx_socket_t s;
ngx_event_t *rev, *wev;
ngx_listening_t *ls;
ngx_connection_t *c, *lc;
ngx_event_conf_t *ecf;
u_char sa[NGX_SOCKADDRLEN];
#if (NGX_HAVE_ACCEPT4)
static ngx_uint_t use_accept4 = ;
#endif if (ev->timedout) {
if (ngx_enable_accept_events((ngx_cycle_t *) ngx_cycle) != NGX_OK) {
return;
} ev->timedout = ;
} ecf = ngx_event_get_conf(ngx_cycle->conf_ctx, ngx_event_core_module); if (ngx_event_flags & NGX_USE_RTSIG_EVENT) {
ev->available = ; } else if (!(ngx_event_flags & NGX_USE_KQUEUE_EVENT)) {
ev->available = ecf->multi_accept;
} lc = ev->data;
ls = lc->listening;
ev->ready = ; ngx_log_debug2(NGX_LOG_DEBUG_EVENT, ev->log, ,
"accept on %V, ready: %d", &ls->addr_text, ev->available); do {
socklen = NGX_SOCKADDRLEN; #if (NGX_HAVE_ACCEPT4)
if (use_accept4) {
s = accept4(lc->fd, (struct sockaddr *) sa, &socklen,
SOCK_NONBLOCK);
} else {
s = accept(lc->fd, (struct sockaddr *) sa, &socklen);
}
#else
//nginx调用accept函数,从已连接队列得到一个连接以及对应的套接字
//配一个连接结构(ngx_connection_t),并将新得到的套接字保存在该连接结构中
s = accept(lc->fd, (struct sockaddr *) sa, &socklen);
#endif if (s == (ngx_socket_t) -) {
err = ngx_socket_errno; if (err == NGX_EAGAIN) {
ngx_log_debug0(NGX_LOG_DEBUG_EVENT, ev->log, err,
"accept() not ready");
return;
} level = NGX_LOG_ALERT; if (err == NGX_ECONNABORTED) {
level = NGX_LOG_ERR; } else if (err == NGX_EMFILE || err == NGX_ENFILE) {
level = NGX_LOG_CRIT;
} #if (NGX_HAVE_ACCEPT4)
ngx_log_error(level, ev->log, err,
use_accept4 ? "accept4() failed" : "accept() failed"); if (use_accept4 && err == NGX_ENOSYS) {
use_accept4 = ;
ngx_inherited_nonblocking = ;
continue;
}
#else
ngx_log_error(level, ev->log, err, "accept() failed");
#endif if (err == NGX_ECONNABORTED) {
if (ngx_event_flags & NGX_USE_KQUEUE_EVENT) {
ev->available--;
} if (ev->available) {
continue;
}
} if (err == NGX_EMFILE || err == NGX_ENFILE) {
if (ngx_disable_accept_events((ngx_cycle_t *) ngx_cycle)
!= NGX_OK)
{
return;
} if (ngx_use_accept_mutex) {
if (ngx_accept_mutex_held) {
ngx_shmtx_unlock(&ngx_accept_mutex);
ngx_accept_mutex_held = ;
} ngx_accept_disabled = ; } else {
ngx_add_timer(ev, ecf->accept_mutex_delay);
}
} return;
} #if (NGX_STAT_STUB)
(void) ngx_atomic_fetch_add(ngx_stat_accepted, );
#endif ngx_accept_disabled = ngx_cycle->connection_n /
- ngx_cycle->free_connection_n; c = ngx_get_connection(s, ev->log); if (c == NULL) {
if (ngx_close_socket(s) == -) {
ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_socket_errno,
ngx_close_socket_n " failed");
} return;
} #if (NGX_STAT_STUB)
(void) ngx_atomic_fetch_add(ngx_stat_active, );
#endif c->pool = ngx_create_pool(ls->pool_size, ev->log);
if (c->pool == NULL) {
ngx_close_accepted_connection(c);
return;
} c->sockaddr = ngx_palloc(c->pool, socklen);
if (c->sockaddr == NULL) {
ngx_close_accepted_connection(c);
return;
} ngx_memcpy(c->sockaddr, sa, socklen);
//分配日志结构,并保存在其中,以便后续的日志系统使用
log = ngx_palloc(c->pool, sizeof(ngx_log_t));
if (log == NULL) {
ngx_close_accepted_connection(c);
return;
} /* set a blocking mode for aio and non-blocking mode for others */
//初始化连接相应的io收发函数,具体的io收发函数和使用的事件模型及操作系统相关
if (ngx_inherited_nonblocking) {
if (ngx_event_flags & NGX_USE_AIO_EVENT) {
if (ngx_blocking(s) == -) {
ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_socket_errno,
ngx_blocking_n " failed");
ngx_close_accepted_connection(c);
return;
}
} } else {
if (!(ngx_event_flags & (NGX_USE_AIO_EVENT|NGX_USE_RTSIG_EVENT))) {
if (ngx_nonblocking(s) == -) {
ngx_log_error(NGX_LOG_ALERT, ev->log, ngx_socket_errno,
ngx_nonblocking_n " failed");
ngx_close_accepted_connection(c);
return;
}
}
} *log = ls->log; c->recv = ngx_recv;
c->send = ngx_send;
c->recv_chain = ngx_recv_chain;
c->send_chain = ngx_send_chain; c->log = log;
c->pool->log = log; c->socklen = socklen;
c->listening = ls;
//将本地套接口地址保存在local_sockaddr字段,因为这个值是从监听结构ngx_listening_t中可得,而监听结构中保存的只是配置文件中设置的监听地址
//但是配置的监听地址可能是通配符*,即监听在所有的地址上,所以连接中保存的这个值最终可能还会变动,会被确定为真正的接收地址
c->local_sockaddr = ls->sockaddr;
c->local_socklen = ls->socklen; c->unexpected_eof = ; #if (NGX_HAVE_UNIX_DOMAIN)
if (c->sockaddr->sa_family == AF_UNIX) {
c->tcp_nopush = NGX_TCP_NOPUSH_DISABLED;
c->tcp_nodelay = NGX_TCP_NODELAY_DISABLED;
#if (NGX_SOLARIS)
/* Solaris's sendfilev() supports AF_NCA, AF_INET, and AF_INET6 */
c->sendfile = ;
#endif
}
#endif rev = c->read;
wev = c->write;
//nginx默认连接第一次为可写
wev->ready = ; if (ngx_event_flags & (NGX_USE_AIO_EVENT|NGX_USE_RTSIG_EVENT)) {
/* rtsig, aio, iocp */
//将连接的写事件设置为已就绪,即设置ready为1
rev->ready = ;
}
//如果监听套接字设置了TCP_DEFER_ACCEPT属性,则表示该连接上已经有数据包过来,于是设置读事件为就绪
if (ev->deferred_accept) {
rev->ready = ;
#if (NGX_HAVE_KQUEUE)
rev->available = ;
#endif
} rev->log = log;
wev->log = log; /*
* TODO: MT: - ngx_atomic_fetch_add()
* or protection by critical section or light mutex
*
* TODO: MP: - allocated in a shared memory
* - ngx_atomic_fetch_add()
* or protection by critical section or light mutex
*/ c->number = ngx_atomic_fetch_add(ngx_connection_counter, ); #if (NGX_STAT_STUB)
(void) ngx_atomic_fetch_add(ngx_stat_handled, );
#endif #if (NGX_THREADS)
rev->lock = &c->lock;
wev->lock = &c->lock;
rev->own_lock = &c->lock;
wev->own_lock = &c->lock;
#endif if (ls->addr_ntop) {
//将sockaddr字段保存的对端地址格式化为可读字符串,并保存在addr_text字段
c->addr_text.data = ngx_pnalloc(c->pool, ls->addr_text_max_len);
if (c->addr_text.data == NULL) {
ngx_close_accepted_connection(c);
return;
} c->addr_text.len = ngx_sock_ntop(c->sockaddr, c->socklen,
c->addr_text.data,
ls->addr_text_max_len, );
if (c->addr_text.len == ) {
ngx_close_accepted_connection(c);
return;
}
} #if (NGX_DEBUG)
{ ngx_str_t addr;
struct sockaddr_in *sin;
ngx_cidr_t *cidr;
ngx_uint_t i;
u_char text[NGX_SOCKADDR_STRLEN];
#if (NGX_HAVE_INET6)
struct sockaddr_in6 *sin6;
ngx_uint_t n;
#endif cidr = ecf->debug_connection.elts;
for (i = ; i < ecf->debug_connection.nelts; i++) {
if (cidr[i].family != (ngx_uint_t) c->sockaddr->sa_family) {
goto next;
} switch (cidr[i].family) { #if (NGX_HAVE_INET6)
case AF_INET6:
sin6 = (struct sockaddr_in6 *) c->sockaddr;
for (n = ; n < ; n++) {
if ((sin6->sin6_addr.s6_addr[n]
& cidr[i].u.in6.mask.s6_addr[n])
!= cidr[i].u.in6.addr.s6_addr[n])
{
goto next;
}
}
break;
#endif #if (NGX_HAVE_UNIX_DOMAIN)
case AF_UNIX:
break;
#endif default: /* AF_INET */
sin = (struct sockaddr_in *) c->sockaddr;
if ((sin->sin_addr.s_addr & cidr[i].u.in.mask)
!= cidr[i].u.in.addr)
{
goto next;
}
break;
} log->log_level = NGX_LOG_DEBUG_CONNECTION|NGX_LOG_DEBUG_ALL;
break; next:
continue;
} if (log->log_level & NGX_LOG_DEBUG_EVENT) {
addr.data = text;
addr.len = ngx_sock_ntop(c->sockaddr, c->socklen, text,
NGX_SOCKADDR_STRLEN, ); ngx_log_debug3(NGX_LOG_DEBUG_EVENT, log, ,
"*%uA accept: %V fd:%d", c->number, &addr, s);
} }
#endif if (ngx_add_conn && (ngx_event_flags & NGX_USE_EPOLL_EVENT) == ) {
if (ngx_add_conn(c) == NGX_ERROR) {
ngx_close_accepted_connection(c);
return;
}
} log->data = NULL;
log->handler = NULL; ls->handler(c); if (ngx_event_flags & NGX_USE_KQUEUE_EVENT) {
ev->available--;
} } while (ev->available);
}

0x2: HTTP Header解析

Nginx将连接的读事件的处理函数设置为ngx_http_process_request_line函数,这个函数用来解析请求行
\nginx-1.7.4\src\http\ngx_http_request.c

static void
ngx_http_process_request_line(ngx_event_t *rev)
{
ssize_t n;
ngx_int_t rc, rv;
ngx_str_t host;
ngx_connection_t *c;
ngx_http_request_t *r; c = rev->data;
r = c->data; ngx_log_debug0(NGX_LOG_DEBUG_HTTP, rev->log, ,
"http process request line"); //将当前时间保持在start_sec和start_msec字段,这个时间是该请求的起始时刻,将被用来计算一个请求的处理时间(request time)
//nginx使用的这个起始点和apache略有差别,nginx中请求的起始点是接收到客户端的第一个数据包开始,而apache则是接收到客户端的整个request line后开始算起
if (rev->timedout) {
//ngx_http_process_request_line函数的主要作用即是解析请求行,同样由于涉及到网络IO操作,即使是很短的一行请求行可能也不能被一次读完
//所以在之前的ngx_http_init_request函数中,ngx_http_process_request_line函数被设置为读事件的处理函数,它也只拥有一个唯一的ngx_event_t *类型参数
//并且在函数的开头,同样需要判断是否是超时事件,如果是的话,则关闭这个请求和连接;否则开始正常的解析流程
ngx_log_error(NGX_LOG_INFO, c->log, NGX_ETIMEDOUT, "client timed out");
c->timedout = ;
ngx_http_close_request(r, NGX_HTTP_REQUEST_TIME_OUT);
return;
} rc = NGX_AGAIN; for ( ;; ) { if (rc == NGX_AGAIN) {
//开始正常的解析流程。先调用ngx_http_read_request_header函数读取数据
n = ngx_http_read_request_header(r); if (n == NGX_AGAIN || n == NGX_ERROR) {
return;
}
} //如果ngx_http_read_request_header函数正常的读取到了数据,ngx_http_process_request_line函数将调用ngx_http_parse_request_line函数来解析HTTP包头
rc = ngx_http_parse_request_line(r, r->header_in); if (rc == NGX_OK) {
/*
如果返回了NGX_OK,则表示请求行被正确的解析出来了
1. 这时先记录好请求行的起始地址以及长度
2. 并将请求uri的path和参数部分保存在请求结构的uri字段,请求方法起始位置和长度保存在method_name字段,http版本起始位置和长度记录在http_protocol字段
3. 还要从uri中解析出参数以及请求资源的拓展名,分别保存在args和exten字段。接下来将要解析请求头
*/
/* the request line has been parsed successfully */ r->request_line.len = r->request_end - r->request_start;
r->request_line.data = r->request_start;
r->request_length = r->header_in->pos - r->request_start; ngx_log_debug1(NGX_LOG_DEBUG_HTTP, c->log, ,
"http request line: \"%V\"", &r->request_line); r->method_name.len = r->method_end - r->request_start + ;
r->method_name.data = r->request_line.data; if (r->http_protocol.data) {
r->http_protocol.len = r->request_end - r->http_protocol.data;
} if (ngx_http_process_request_uri(r) != NGX_OK) {
return;
} if (r->host_start && r->host_end) { host.len = r->host_end - r->host_start;
host.data = r->host_start; rc = ngx_http_validate_host(&host, r->pool, ); if (rc == NGX_DECLINED) {
ngx_log_error(NGX_LOG_INFO, c->log, ,
"client sent invalid host in request line");
ngx_http_finalize_request(r, NGX_HTTP_BAD_REQUEST);
return;
} if (rc == NGX_ERROR) {
ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR);
return;
} if (ngx_http_set_virtual_server(r, &host) == NGX_ERROR) {
return;
}
//解析完请求行之后,如果请求行的uri里面包含了域名部分,则将其保持在请求结构的headers_in成员的server字段,headers_in用来保存所有请求头
r->headers_in.server = host;
}
//检查进来的请求是否使用的是http0.9,如果是的话则使用从请求行里得到的域名
if (r->http_version < NGX_HTTP_VERSION_10) { if (r->headers_in.server.len ==
//调用ngx_http_find_virtual_server()函数来查找用来处理该请求的虚拟服务器配置,之前通过端口和地址找到的默认配置不再使用
&& ngx_http_set_virtual_server(r, &r->headers_in.server)
== NGX_ERROR)
{
return;
}
//找到相应的配置之后,则直接调用ngx_http_process_request()函数处理该请求,因为http0.9是最原始的http协议,它里面没有定义任何请求头,显然就不需要读取请求头的操作
ngx_http_process_request(r);
return;
} if (ngx_list_init(&r->headers_in.headers, r->pool, ,
sizeof(ngx_table_elt_t))
!= NGX_OK)
{
ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR);
return;
} c->log->action = "reading client request headers"; rev->handler = ngx_http_process_request_headers;
ngx_http_process_request_headers(rev); return;
}
/*
如果返回NGX_AGAIN,则需要判断一下是否是由于缓冲区空间不够,还是已读数据不够
1. 如果是缓冲区大小不够了,nginx会调用ngx_http_alloc_large_header_buffer函数来分配另一块大缓冲区,如果大缓冲区还不够装下整个请求行,nginx则会返回414错误给客户端
2. 否则分配了更大的缓冲区并拷贝之前的数据之后,继续调用
*/
if (rc != NGX_AGAIN) { /* there was error while a request line parsing */ ngx_log_error(NGX_LOG_INFO, c->log, ,
ngx_http_client_errors[rc - NGX_HTTP_CLIENT_ERROR]);
ngx_http_finalize_request(r, NGX_HTTP_BAD_REQUEST);
return;
} /* NGX_AGAIN: a request line parsing is still incomplete */
//nginx在处理request的时候,会预先分配一个client_header_buffer_size的buf,如果不够就会分配large_client_header_buffers的buf
//对于request line和每个header而言,每一个不应该超过large buf,所有的总和也不应该超过large buf size*num。Http 1.1的pipeline请求
//如果前面的请求分配的large buf,那么后面的请求会继承使用这个large buf分配的空间,当large buf 不够了再去主动分配large buf
if (r->header_in->pos == r->header_in->end) { /*
nginx在接收到客户端得请求之后,就开始解析http请求,也就是解析http header,需要分配一段buf来接收这些数据
nginx并不知道这个http header的大小,在nginx配置中client_header_buffer_size和large_client_header_buffers这两个配置项起到了作用
1. client_header_buffer_size 1k
2. large_client_header_buffers 4 8k client_header_buffer_size默认是1024字节。large_client_header_buffers默认最大分配4组8192字节的buf,每次分配一个buf
1. nginx处理http header的过程是先处理request line(http 请求的第一行)
2. 然后在处理每一个header
3. 那么处理request line的过程首先会分配client_header_buffer_size大小的空间
4. 如果这个空间不够,那么再分配一个large_client_header_buffers的空间,然后把之前的client_header_buffer_size copy到大buffer的前半部分中
5. 如果依然不够,nginx就会返回给客户端400的错误 每个header也是和如上的request line一个处理步骤,所以对于request line和每个header的大小应该不超过1个large_client_header_buffers
对于整个request line和所有header来讲,总大小不应该超过4*8192字节大小,否则也会产生400的错误
*/
rv = ngx_http_alloc_large_header_buffer(r, ); if (rv == NGX_ERROR) {
ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR);
return;
} if (rv == NGX_DECLINED) {
r->request_line.len = r->header_in->end - r->request_start;
r->request_line.data = r->request_start; ngx_log_error(NGX_LOG_INFO, c->log, ,
"client sent too long URI");
ngx_http_finalize_request(r, NGX_HTTP_REQUEST_URI_TOO_LARGE);
return;
}
}
}
}

1. 读取HTTP请求行(第一行)

ngx_http_read_request_header
\nginx-1.7.4\src\http\ngx_http_request.c

static ssize_t
ngx_http_read_request_header(ngx_http_request_t *r)
{
ssize_t n;
ngx_event_t *rev;
ngx_connection_t *c;
ngx_http_core_srv_conf_t *cscf; c = r->connection;
rev = c->read; //由于可能多次进入ngx_http_process_request_line函数,ngx_http_read_request_header函数首先检查请求的header_in指向的缓冲区内是否有数据,有的话直接返回
n = r->header_in->last - r->header_in->pos;
if (n > ) {
return n;
} //从连接读取数据并保存在请求的header_in指向的缓存区,而且只要缓冲区有空间的话,会一次尽可能多的读数据
if (rev->ready) {
n = c->recv(c, r->header_in->last,
r->header_in->end - r->header_in->last);
} else {
//如果客户端暂时没有发任何数据过来,并返回NGX_AGAIN
n = NGX_AGAIN;
} /*
返回之前会做2件事情
1. 设置一个定时器,时长默认为60s,可以通过指令client_header_timeout设置,如果定时事件到达之前没有任何可读事件,nginx将会关闭此请求
2. 调用ngx_handle_read_event函数处理一下读事件
1) 如果该连接尚未在事件处理模型上挂载读事件,则将其挂载上
2) 如果客户端提前关闭了连接或者读取数据发生了其他错误,则给客户端返回一个400错误
3) 最后函数返回NGX_ERROR
*/
if (n == NGX_AGAIN) {
if (!rev->timer_set) {
cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
ngx_add_timer(rev, cscf->client_header_timeout);
} if (ngx_handle_read_event(rev, ) != NGX_OK) {
ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR);
return NGX_ERROR;
} return NGX_AGAIN;
} if (n == ) {
ngx_log_error(NGX_LOG_INFO, c->log, ,
"client prematurely closed connection");
} if (n == || n == NGX_ERROR) {
c->error = ;
c->log->action = "reading client request headers"; ngx_http_finalize_request(r, NGX_HTTP_BAD_REQUEST);
return NGX_ERROR;
} r->header_in->last += n; return n;
}

2. 解析HTTP请求行

rc = ngx_http_parse_request_line(r, r->header_in);
\Nginx\nginx-1.7.4\src\http\ngx_http_parse.c

ngx_int_t
ngx_http_parse_request_line(ngx_http_request_t *r, ngx_buf_t *b)
{
u_char c, ch, *p, *m;
//有限状态机
enum {
sw_start = ,
sw_method,
sw_spaces_before_uri,
sw_schema,
sw_schema_slash,
sw_schema_slash_slash,
sw_host_start,
sw_host,
sw_host_end,
sw_host_ip_literal,
sw_port,
sw_host_http_09,
sw_after_slash_in_uri,
sw_check_uri,
sw_check_uri_http_09,
sw_uri,
sw_http_09,
sw_http_H,
sw_http_HT,
sw_http_HTT,
sw_http_HTTP,
sw_first_major_digit,
sw_major_digit,
sw_first_minor_digit,
sw_minor_digit,
sw_spaces_after_digit,
sw_almost_done
} state; state = r->state; for (p = b->pos; p < b->last; p++) {
ch = *p; switch (state) { /* HTTP methods: GET, HEAD, POST */
case sw_start:
r->request_start = p;
//HTTP请求报文的第一行不允许有换行
if (ch == CR || ch == LF) {
break;
}
//HTTP请求报文的第一行不允许有除了字母、数字、下划线之外的字符出现
if ((ch < 'A' || ch > 'Z') && ch != '_') {
return NGX_HTTP_PARSE_INVALID_METHOD;
} state = sw_method;
break; case sw_method:
if (ch == ' ') {
r->method_end = p - ;
m = r->request_start; switch (p - m) {
//识别HTTP Method的状态机
case :
if (ngx_str3_cmp(m, 'G', 'E', 'T', ' ')) {
r->method = NGX_HTTP_GET;
break;
} if (ngx_str3_cmp(m, 'P', 'U', 'T', ' ')) {
r->method = NGX_HTTP_PUT;
break;
} break; case :
if (m[] == 'O') { if (ngx_str3Ocmp(m, 'P', 'O', 'S', 'T')) {
r->method = NGX_HTTP_POST;
break;
} if (ngx_str3Ocmp(m, 'C', 'O', 'P', 'Y')) {
r->method = NGX_HTTP_COPY;
break;
} if (ngx_str3Ocmp(m, 'M', 'O', 'V', 'E')) {
r->method = NGX_HTTP_MOVE;
break;
} if (ngx_str3Ocmp(m, 'L', 'O', 'C', 'K')) {
r->method = NGX_HTTP_LOCK;
break;
} } else { if (ngx_str4cmp(m, 'H', 'E', 'A', 'D')) {
r->method = NGX_HTTP_HEAD;
break;
}
} break; case :
if (ngx_str5cmp(m, 'M', 'K', 'C', 'O', 'L')) {
r->method = NGX_HTTP_MKCOL;
break;
} if (ngx_str5cmp(m, 'P', 'A', 'T', 'C', 'H')) {
r->method = NGX_HTTP_PATCH;
break;
} if (ngx_str5cmp(m, 'T', 'R', 'A', 'C', 'E')) {
r->method = NGX_HTTP_TRACE;
break;
} break; case :
if (ngx_str6cmp(m, 'D', 'E', 'L', 'E', 'T', 'E')) {
r->method = NGX_HTTP_DELETE;
break;
} if (ngx_str6cmp(m, 'U', 'N', 'L', 'O', 'C', 'K')) {
r->method = NGX_HTTP_UNLOCK;
break;
} break; case :
if (ngx_str7_cmp(m, 'O', 'P', 'T', 'I', 'O', 'N', 'S', ' '))
{
r->method = NGX_HTTP_OPTIONS;
} break; case :
if (ngx_str8cmp(m, 'P', 'R', 'O', 'P', 'F', 'I', 'N', 'D'))
{
r->method = NGX_HTTP_PROPFIND;
} break; case :
if (ngx_str9cmp(m,
'P', 'R', 'O', 'P', 'P', 'A', 'T', 'C', 'H'))
{
r->method = NGX_HTTP_PROPPATCH;
} break;
} state = sw_spaces_before_uri;
break;
} if ((ch < 'A' || ch > 'Z') && ch != '_') {
return NGX_HTTP_PARSE_INVALID_METHOD;
} break; /* space* before URI */
case sw_spaces_before_uri: if (ch == '/') {
r->uri_start = p;
state = sw_after_slash_in_uri;
break;
} c = (u_char) (ch | 0x20);
if (c >= 'a' && c <= 'z') {
r->schema_start = p;
state = sw_schema;
break;
} switch (ch) {
case ' ':
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; case sw_schema: c = (u_char) (ch | 0x20);
if (c >= 'a' && c <= 'z') {
break;
} switch (ch) {
case ':':
r->schema_end = p;
state = sw_schema_slash;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; case sw_schema_slash:
switch (ch) {
case '/':
state = sw_schema_slash_slash;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; case sw_schema_slash_slash:
switch (ch) {
case '/':
state = sw_host_start;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break;
//解析HTTP请求报文第一行中的HOST(http://www.target.com:80)
case sw_host_start: r->host_start = p; if (ch == '[') {
state = sw_host_ip_literal;
break;
} state = sw_host; /* fall through */ case sw_host: c = (u_char) (ch | 0x20);
if (c >= 'a' && c <= 'z') {
break;
} if ((ch >= '' && ch <= '') || ch == '.' || ch == '-') {
break;
} /* fall through */
case sw_host_end: r->host_end = p; switch (ch) {
case ':':
state = sw_port;
break;
case '/':
r->uri_start = p;
state = sw_after_slash_in_uri;
break;
case ' ':
/*
* use single "/" from request line to preserve pointers,
* if request line will be copied to large client buffer
*/
r->uri_start = r->schema_end + ;
r->uri_end = r->schema_end + ;
state = sw_host_http_09;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; case sw_host_ip_literal: if (ch >= '' && ch <= '') {
break;
} c = (u_char) (ch | 0x20);
if (c >= 'a' && c <= 'z') {
break;
}
//nginx允许URL中出现这些特殊字符
switch (ch) {
case ':':
break;
case ']':
state = sw_host_end;
break;
case '-':
case '.':
case '_':
case '~':
/* unreserved */
break;
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
case '=':
/* sub-delims */
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; case sw_port:
if (ch >= '' && ch <= '') {
break;
} switch (ch) {
case '/':
r->port_end = p;
r->uri_start = p;
state = sw_after_slash_in_uri;
break;
case ' ':
r->port_end = p;
/*
* use single "/" from request line to preserve pointers,
* if request line will be copied to large client buffer
*/
r->uri_start = r->schema_end + ;
r->uri_end = r->schema_end + ;
state = sw_host_http_09;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; /* space+ after "http://host[:port] " */
case sw_host_http_09:
switch (ch) {
case ' ':
break;
case CR:
r->http_minor = ;
state = sw_almost_done;
break;
case LF:
r->http_minor = ;
goto done;
case 'H':
r->http_protocol.data = p;
state = sw_http_H;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; /* check "/.", "//", "%", and "\" (Win32) in URI */
case sw_after_slash_in_uri: if (usual[ch >> ] & ( << (ch & 0x1f))) {
state = sw_check_uri;
break;
} switch (ch) {
case ' ':
r->uri_end = p;
state = sw_check_uri_http_09;
break;
case CR:
r->uri_end = p;
r->http_minor = ;
state = sw_almost_done;
break;
case LF:
r->uri_end = p;
r->http_minor = ;
goto done;
case '.':
r->complex_uri = ;
state = sw_uri;
break;
case '%':
r->quoted_uri = ;
state = sw_uri;
break;
case '/':
r->complex_uri = ;
state = sw_uri;
break;
#if (NGX_WIN32)
case '\\':
r->complex_uri = ;
state = sw_uri;
break;
#endif
case '?':
r->args_start = p + ;
state = sw_uri;
break;
case '#':
r->complex_uri = ;
state = sw_uri;
break;
case '+':
r->plus_in_uri = ;
break;
case '\0':
return NGX_HTTP_PARSE_INVALID_REQUEST;
default:
state = sw_check_uri;
break;
}
break; /* check "/", "%" and "\" (Win32) in URI */
case sw_check_uri: if (usual[ch >> ] & ( << (ch & 0x1f))) {
break;
} switch (ch) {
case '/':
#if (NGX_WIN32)
if (r->uri_ext == p) {
r->complex_uri = ;
state = sw_uri;
break;
}
#endif
r->uri_ext = NULL;
state = sw_after_slash_in_uri;
break;
case '.':
r->uri_ext = p + ;
break;
case ' ':
r->uri_end = p;
state = sw_check_uri_http_09;
break;
case CR:
r->uri_end = p;
r->http_minor = ;
state = sw_almost_done;
break;
case LF:
r->uri_end = p;
r->http_minor = ;
goto done;
#if (NGX_WIN32)
case '\\':
r->complex_uri = ;
state = sw_after_slash_in_uri;
break;
#endif
case '%':
r->quoted_uri = ;
state = sw_uri;
break;
case '?':
r->args_start = p + ;
state = sw_uri;
break;
case '#':
r->complex_uri = ;
state = sw_uri;
break;
case '+':
r->plus_in_uri = ;
break;
case '\0':
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; /* space+ after URI */
case sw_check_uri_http_09:
switch (ch) {
case ' ':
break;
case CR:
r->http_minor = ;
state = sw_almost_done;
break;
case LF:
r->http_minor = ;
goto done;
case 'H':
r->http_protocol.data = p;
state = sw_http_H;
break;
default:
r->space_in_uri = ;
state = sw_check_uri;
p--;
break;
}
break; /* URI */
case sw_uri: if (usual[ch >> ] & ( << (ch & 0x1f))) {
break;
} switch (ch) {
case ' ':
r->uri_end = p;
state = sw_http_09;
break;
case CR:
r->uri_end = p;
r->http_minor = ;
state = sw_almost_done;
break;
case LF:
r->uri_end = p;
r->http_minor = ;
goto done;
case '#':
r->complex_uri = ;
break;
case '\0':
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; /* space+ after URI */
case sw_http_09:
switch (ch) {
case ' ':
break;
case CR:
r->http_minor = ;
state = sw_almost_done;
break;
case LF:
r->http_minor = ;
goto done;
case 'H':
r->http_protocol.data = p;
state = sw_http_H;
break;
default:
r->space_in_uri = ;
state = sw_uri;
p--;
break;
}
break;
//解析HTTP 0.9/1.0/1.1
case sw_http_H:
switch (ch) {
case 'T':
state = sw_http_HT;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; case sw_http_HT:
switch (ch) {
case 'T':
state = sw_http_HTT;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; case sw_http_HTT:
switch (ch) {
case 'P':
state = sw_http_HTTP;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; case sw_http_HTTP:
switch (ch) {
case '/':
state = sw_first_major_digit;
break;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; /* first digit of major HTTP version */
case sw_first_major_digit:
if (ch < '' || ch > '') {
return NGX_HTTP_PARSE_INVALID_REQUEST;
} r->http_major = ch - '';
state = sw_major_digit;
break; /* major HTTP version or dot */
case sw_major_digit:
if (ch == '.') {
state = sw_first_minor_digit;
break;
} if (ch < '' || ch > '') {
return NGX_HTTP_PARSE_INVALID_REQUEST;
} r->http_major = r->http_major * + ch - '';
break; /* first digit of minor HTTP version */
case sw_first_minor_digit:
if (ch < '' || ch > '') {
return NGX_HTTP_PARSE_INVALID_REQUEST;
} r->http_minor = ch - '';
state = sw_minor_digit;
break; /* minor HTTP version or end of request line */
case sw_minor_digit:
if (ch == CR) {
state = sw_almost_done;
break;
} if (ch == LF) {
goto done;
} if (ch == ' ') {
state = sw_spaces_after_digit;
break;
} if (ch < '' || ch > '') {
return NGX_HTTP_PARSE_INVALID_REQUEST;
} r->http_minor = r->http_minor * + ch - '';
break; case sw_spaces_after_digit:
switch (ch) {
case ' ':
break;
case CR:
state = sw_almost_done;
break;
case LF:
goto done;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
break; /* end of request line */
case sw_almost_done:
r->request_end = p - ;
switch (ch) {
case LF:
goto done;
default:
return NGX_HTTP_PARSE_INVALID_REQUEST;
}
}
} b->pos = p;
r->state = state; return NGX_AGAIN; done: b->pos = p + ; if (r->request_end == NULL) {
r->request_end = p;
} r->http_version = r->http_major * + r->http_minor;
r->state = sw_start; if (r->http_version == && r->method != NGX_HTTP_GET) {
return NGX_HTTP_PARSE_INVALID_09_METHOD;
} return NGX_OK;
}

这个函数根据http协议规范中对请求行的定义实现了一个有限状态机,经过这个状态机,nginx会记录请求行中的请求方法(Method),请求uri以及http协议版本在缓冲区中的起始位置,在解析过程中还会记录一些其他有用的信息,以便后面的处理过程中使用。如果解析请求行的过程中没有产生任何问题,该函数会返回NGX_OK;如果请求行不满足协议规范,该函数会立即终止解析过程,并返回相应错误号;如果缓冲区数据不够,该函数返回NGX_AGAIN。在整个解析http请求的状态机中始终遵循着两条重要的原则

. 减少内存拷贝和回溯
内存拷贝是一个相对比较昂贵的操作,大量的内存拷贝会带来较低的运行时效率。nginx在需要做内存拷贝的地方尽量只拷贝内存的起始和结束地址而不是内存本身,这样做的话仅仅只需要两个赋值操作而已,大大降低了开销,当然这样带来的影响是后续的操作不能修改内存本身,如果修改的话,会影响到所有引用到该内存区间的地方,所以必须很小心的管理,必要的时候需要拷贝一份
. nginx中最能体现这一思想的数据结构,ngx_buf_t,它用来表示nginx中的缓存,在很多情况下,只需要将一块内存的起始地址和结束地址分别保存在它的pos和last成员中,再将它的memory标志置1,即可表示一块不能修改的内存区间,在另外的需要一块能够修改的缓存的情形中,则必须分配一块所需大小的内存并保存其起始地址,再将ngx_bug_t的temprary标志置1,表示这是一块能够被修改的内存区域

3. 读取HTTP请求头Header字段

如果是1.0或者更新的http协议,接下来要做的就是读取请求头了
\nginx-1.7.4\src\http\ngx_http_request.c

static void
ngx_http_process_request_line(ngx_event_t *rev)
{
ssize_t n;
ngx_int_t rc, rv;
ngx_str_t host;
ngx_connection_t *c;
ngx_http_request_t *r; c = rev->data;
r = c->data; ngx_log_debug0(NGX_LOG_DEBUG_HTTP, rev->log, ,
"http process request line"); //将当前时间保持在start_sec和start_msec字段,这个时间是该请求的起始时刻,将被用来计算一个请求的处理时间(request time)
//nginx使用的这个起始点和apache略有差别,nginx中请求的起始点是接收到客户端的第一个数据包开始,而apache则是接收到客户端的整个request line后开始算起
if (rev->timedout) {
//ngx_http_process_request_line函数的主要作用即是解析请求行,同样由于涉及到网络IO操作,即使是很短的一行请求行可能也不能被一次读完
//所以在之前的ngx_http_init_request函数中,ngx_http_process_request_line函数被设置为读事件的处理函数,它也只拥有一个唯一的ngx_event_t *类型参数
//并且在函数的开头,同样需要判断是否是超时事件,如果是的话,则关闭这个请求和连接;否则开始正常的解析流程
ngx_log_error(NGX_LOG_INFO, c->log, NGX_ETIMEDOUT, "client timed out");
c->timedout = ;
ngx_http_close_request(r, NGX_HTTP_REQUEST_TIME_OUT);
return;
} rc = NGX_AGAIN; for ( ;; ) { if (rc == NGX_AGAIN) {
//开始正常的解析流程。先调用ngx_http_read_request_header函数读取数据
n = ngx_http_read_request_header(r); if (n == NGX_AGAIN || n == NGX_ERROR) {
return;
}
} //如果ngx_http_read_request_header函数正常的读取到了数据,ngx_http_process_request_line函数将调用ngx_http_parse_request_line函数来解析HTTP包头
rc = ngx_http_parse_request_line(r, r->header_in); if (rc == NGX_OK) {
/*
如果返回了NGX_OK,则表示请求行被正确的解析出来了
1. 这时先记录好请求行的起始地址以及长度
2. 并将请求uri的path和参数部分保存在请求结构的uri字段,请求方法起始位置和长度保存在method_name字段,http版本起始位置和长度记录在http_protocol字段
3. 还要从uri中解析出参数以及请求资源的拓展名,分别保存在args和exten字段。接下来将要解析请求头
*/
/* the request line has been parsed successfully */ r->request_line.len = r->request_end - r->request_start;
r->request_line.data = r->request_start;
r->request_length = r->header_in->pos - r->request_start; ngx_log_debug1(NGX_LOG_DEBUG_HTTP, c->log, ,
"http request line: \"%V\"", &r->request_line); r->method_name.len = r->method_end - r->request_start + ;
r->method_name.data = r->request_line.data; if (r->http_protocol.data) {
r->http_protocol.len = r->request_end - r->http_protocol.data;
} if (ngx_http_process_request_uri(r) != NGX_OK) {
return;
} if (r->host_start && r->host_end) { host.len = r->host_end - r->host_start;
host.data = r->host_start; rc = ngx_http_validate_host(&host, r->pool, ); if (rc == NGX_DECLINED) {
ngx_log_error(NGX_LOG_INFO, c->log, ,
"client sent invalid host in request line");
ngx_http_finalize_request(r, NGX_HTTP_BAD_REQUEST);
return;
} if (rc == NGX_ERROR) {
ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR);
return;
} if (ngx_http_set_virtual_server(r, &host) == NGX_ERROR) {
return;
}
//解析完请求行之后,如果请求行的uri里面包含了域名部分,则将其保持在请求结构的headers_in成员的server字段,headers_in用来保存所有请求头
r->headers_in.server = host;
}
//检查进来的请求是否使用的是http0.9,如果是的话则使用从请求行里得到的域名
if (r->http_version < NGX_HTTP_VERSION_10) { if (r->headers_in.server.len ==
//调用ngx_http_find_virtual_server()函数来查找用来处理该请求的虚拟服务器配置,之前通过端口和地址找到的默认配置不再使用
&& ngx_http_set_virtual_server(r, &r->headers_in.server)
== NGX_ERROR)
{
return;
}
//找到相应的配置之后,则直接调用ngx_http_process_request()函数处理该请求,因为http0.9是最原始的http协议,它里面没有定义任何请求头,显然就不需要读取请求头的操作
ngx_http_process_request(r);
return;
} if (ngx_list_init(&r->headers_in.headers, r->pool, ,
sizeof(ngx_table_elt_t))
!= NGX_OK)
{
ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR);
return;
} c->log->action = "reading client request headers"; rev->handler = ngx_http_process_request_headers;
ngx_http_process_request_headers(rev); return;
}
/*
如果返回NGX_AGAIN,则需要判断一下是否是由于缓冲区空间不够,还是已读数据不够
1. 如果是缓冲区大小不够了,nginx会调用ngx_http_alloc_large_header_buffer函数来分配另一块大缓冲区,如果大缓冲区还不够装下整个请求行,nginx则会返回414错误给客户端
2. 否则分配了更大的缓冲区并拷贝之前的数据之后,继续调用
*/
if (rc != NGX_AGAIN) { /* there was error while a request line parsing */ ngx_log_error(NGX_LOG_INFO, c->log, ,
ngx_http_client_errors[rc - NGX_HTTP_CLIENT_ERROR]);
ngx_http_finalize_request(r, NGX_HTTP_BAD_REQUEST);
return;
} /* NGX_AGAIN: a request line parsing is still incomplete */
//nginx在处理request的时候,会预先分配一个client_header_buffer_size的buf,如果不够就会分配large_client_header_buffers的buf
//对于request line和每个header而言,每一个不应该超过large buf,所有的总和也不应该超过large buf size*num。Http 1.1的pipeline请求
//如果前面的请求分配的large buf,那么后面的请求会继承使用这个large buf分配的空间,当large buf 不够了再去主动分配large buf
if (r->header_in->pos == r->header_in->end) { /*
nginx在接收到客户端得请求之后,就开始解析http请求,也就是解析http header,需要分配一段buf来接收这些数据
nginx并不知道这个http header的大小,在nginx配置中client_header_buffer_size和large_client_header_buffers这两个配置项起到了作用
1. client_header_buffer_size 1k
2. large_client_header_buffers 4 8k client_header_buffer_size默认是1024字节。large_client_header_buffers默认最大分配4组8192字节的buf,每次分配一个buf
1. nginx处理http header的过程是先处理request line(http 请求的第一行)
2. 然后在处理每一个header
3. 那么处理request line的过程首先会分配client_header_buffer_size大小的空间
4. 如果这个空间不够,那么再分配一个large_client_header_buffers的空间,然后把之前的client_header_buffer_size copy到大buffer的前半部分中
5. 如果依然不够,nginx就会返回给客户端400的错误 每个header也是和如上的request line一个处理步骤,所以对于request line和每个header的大小应该不超过1个large_client_header_buffers
对于整个request line和所有header来讲,总大小不应该超过4*8192字节大小,否则也会产生400的错误
*/
rv = ngx_http_alloc_large_header_buffer(r, ); if (rv == NGX_ERROR) {
ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR);
return;
} if (rv == NGX_DECLINED) {
r->request_line.len = r->header_in->end - r->request_start;
r->request_line.data = r->request_start; ngx_log_error(NGX_LOG_INFO, c->log, ,
"client sent too long URI");
ngx_http_finalize_request(r, NGX_HTTP_REQUEST_URI_TOO_LARGE);
return;
}
}
}
}

headers_in用来保存所有请求头,它的类型为ngx_http_headers_in_t
/src/http/ngx_http_request.h

typedef struct {
ngx_list_t headers; ngx_table_elt_t *host;
ngx_table_elt_t *connection;
ngx_table_elt_t *if_modified_since;
ngx_table_elt_t *if_unmodified_since;
ngx_table_elt_t *if_match;
ngx_table_elt_t *if_none_match;
ngx_table_elt_t *user_agent;
ngx_table_elt_t *referer;
ngx_table_elt_t *content_length;
ngx_table_elt_t *content_type; ngx_table_elt_t *range;
ngx_table_elt_t *if_range; ngx_table_elt_t *transfer_encoding;
ngx_table_elt_t *expect;
ngx_table_elt_t *upgrade; #if (NGX_HTTP_GZIP)
ngx_table_elt_t *accept_encoding;
ngx_table_elt_t *via;
#endif ngx_table_elt_t *authorization; ngx_table_elt_t *keep_alive; #if (NGX_HTTP_X_FORWARDED_FOR)
ngx_array_t x_forwarded_for;
#endif #if (NGX_HTTP_REALIP)
ngx_table_elt_t *x_real_ip;
#endif #if (NGX_HTTP_HEADERS)
ngx_table_elt_t *accept;
ngx_table_elt_t *accept_language;
#endif #if (NGX_HTTP_DAV)
ngx_table_elt_t *depth;
ngx_table_elt_t *destination;
ngx_table_elt_t *overwrite;
ngx_table_elt_t *date;
#endif ngx_str_t user;
ngx_str_t passwd; ngx_array_t cookies; ngx_str_t server;
off_t content_length_n;
time_t keep_alive_n; unsigned connection_type:;
unsigned chunked:;
unsigned msie:;
unsigned msie6:;
unsigned opera:;
unsigned gecko:;
unsigned chrome:;
unsigned safari:;
unsigned konqueror:;
} ngx_http_headers_in_t;

4. HTTP请求包Header字段解析

如果读到了一些数据则调用ngx_http_parse_header_line()函数来解析,同样的该解析函数实现为一个有限状态机,逻辑很简单,只是根据http协议的解析一个请求头的name/vale对,每次调用该函数最多解析出一个请求头,该函数返回4种不同返回值,表示不同解析结果

. 返回NGX_OK,表示解析出了一行请求头,这时还要判断解析出的请求头名字里面是否有非法字符,名字里面合法的字符包括
) 字母
) 数字
) 连字符(-)
) 另外如果设置了underscores_in_headers指令为on,则下划线也是合法字符,但是nginx默认下划线不合法
当请求头里面包含了非法的字符,nginx默认只是忽略这一行请求头
如果一切都正常,nginx会将该请求头及请求头名字的hash值保存在请求结构体的headers_in成员的headers链表
对于一些常见的请求头,如Host,Connection,nginx采用了类似于配置指令的方式,事先给这些请求头分配了一个处理函数,当解析出一个请求头时,会检查该请求头是否有设置处理函数,有的话则调用之,nginx所有有处理函数的请求头都记录在ngx_http_headers_in全局数组中 . 返回NGX_AGAIN,表示当前接收到的数据不够,一行请求头还未结束,需要继续下一轮循环。在下一轮循环中,nginx首先检查请求头缓冲区header_in是否已满,如够满了,则调用ngx_http_alloc_large_header_buffer()函数分配更多缓冲区 . 返回NGX_HTTP_PARSE_INVALID_HEADER,表示请求头解析过程中遇到错误,一般为客户端发送了不符合协议规范的头部,此时nginx返回400错误 . 返回NGX_HTTP_PARSE_HEADER_DONE,表示所有请求头已经成功的解析,这时请求的状态被设置为NGX_HTTP_PROCESS_REQUEST_STATE,意味着结束了请求读取阶段,正式进入了请求处理阶段,但是实际上请求可能含有请求体,nginx在请求读取阶段并不会去读取请求体,这个工作交给了后续的请求处理阶段的模块,这样做的目的是nginx本身并不知道这些请求体是否有用,如果后续模块并不需要的话,一方面请求体一般较大,如果全部读取进内存,则白白耗费大量的内存空间,另一方面即使nginx将请求体写进磁盘,但是涉及到磁盘io,会耗费比较多时间。所以交由后续模块来决定读取还是丢弃请求体是最明智的办法

\nginx-1.7.4\src\http\ngx_http_parse.c

ngx_int_t
ngx_http_parse_header_line(ngx_http_request_t *r, ngx_buf_t *b,
ngx_uint_t allow_underscores)
{
u_char c, ch, *p;
ngx_uint_t hash, i;
enum {
sw_start = ,
sw_name,
sw_space_before_value,
sw_value,
sw_space_after_value,
sw_ignore_line,
sw_almost_done,
sw_header_almost_done
} state; /* the last '\0' is not needed because string is zero terminated */ static u_char lowcase[] =
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
"\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
"\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; state = r->state;
hash = r->header_hash;
i = r->lowcase_index;
//循环逐字符处理HTTP Header中的key:value对
for (p = b->pos; p < b->last; p++) {
ch = *p; switch (state) { /* first char */
case sw_start:
r->header_name_start = p;
r->invalid_header = ; switch (ch) {
//和apache不同,nginx对HTTP Header字段中的换行直接认定为该字段结束,而不会去跨行解析
case CR:
r->header_end = p;
state = sw_header_almost_done;
break;
case LF:
r->header_end = p;
goto header_done;
default:
state = sw_name;
//nginx自动将大小写转换为小写
c = lowcase[ch]; if (c) {
hash = ngx_hash(, c);
r->lowcase_header[] = c;
i = ;
break;
}
//根据配置信息underscores_in_headers决定是否允许下划线字符
if (ch == '_') {
if (allow_underscores) {
hash = ngx_hash(, ch);
r->lowcase_header[] = ch;
i = ; } else {
r->invalid_header = ;
} break;
}
//禁止\0截断这种非法字符
if (ch == '\0') {
return NGX_HTTP_PARSE_INVALID_HEADER;
} r->invalid_header = ; break; }
break; /* header name */
case sw_name:
c = lowcase[ch]; if (c) {
hash = ngx_hash(hash, c);
r->lowcase_header[i++] = c;
i &= (NGX_HTTP_LC_HEADER_LEN - );
break;
} if (ch == '_') {
if (allow_underscores) {
hash = ngx_hash(hash, ch);
r->lowcase_header[i++] = ch;
i &= (NGX_HTTP_LC_HEADER_LEN - ); } else {
r->invalid_header = ;
} break;
}
//HTTP Header的字段都是key:value这种格式的,通过冒号:作为name解析的终止
if (ch == ':') {
r->header_name_end = p;
state = sw_space_before_value;
break;
}
if (ch == CR) {
r->header_name_end = p;
r->header_start = p;
r->header_end = p;
state = sw_almost_done;
break;
} if (ch == LF) {
r->header_name_end = p;
r->header_start = p;
r->header_end = p;
goto done;
} /* IIS may send the duplicate "HTTP/1.1 ..." lines */
if (ch == '/'
&& r->upstream
&& p - r->header_name_start ==
&& ngx_strncmp(r->header_name_start, "HTTP", ) == )
{
state = sw_ignore_line;
break;
} if (ch == '\0') {
return NGX_HTTP_PARSE_INVALID_HEADER;
} r->invalid_header = ; break; /* space* before header value */
case sw_space_before_value:
switch (ch) {
//忽略key:value中的空格
case ' ':
break;
//在value的解析中,同样也不允许换行字符
case CR:
r->header_start = p;
r->header_end = p;
state = sw_almost_done;
break;
case LF:
r->header_start = p;
r->header_end = p;
goto done;
case '\0':
return NGX_HTTP_PARSE_INVALID_HEADER;
default:
r->header_start = p;
state = sw_value;
break;
}
break; /* header value */
case sw_value:
switch (ch) {
case ' ':
r->header_end = p;
state = sw_space_after_value;
break;
case CR:
r->header_end = p;
state = sw_almost_done;
break;
case LF:
r->header_end = p;
goto done;
case '\0':
return NGX_HTTP_PARSE_INVALID_HEADER;
}
break; /* space* before end of header line */
case sw_space_after_value:
switch (ch) {
case ' ':
break;
case CR:
state = sw_almost_done;
break;
case LF:
goto done;
case '\0':
return NGX_HTTP_PARSE_INVALID_HEADER;
default:
state = sw_value;
break;
}
break; /* ignore header line */
case sw_ignore_line:
switch (ch) {
case LF:
state = sw_start;
break;
default:
break;
}
break; /* end of header line */
case sw_almost_done:
switch (ch) {
case LF:
goto done;
case CR:
break;
default:
return NGX_HTTP_PARSE_INVALID_HEADER;
}
break; /* end of header */
case sw_header_almost_done:
switch (ch) {
case LF:
goto header_done;
default:
return NGX_HTTP_PARSE_INVALID_HEADER;
}
}
} b->pos = p;
r->state = state;
r->header_hash = hash;
r->lowcase_index = i; return NGX_AGAIN; done: b->pos = p + ;
r->state = sw_start;
r->header_hash = hash;
r->lowcase_index = i; return NGX_OK; header_done: b->pos = p + ;
r->state = sw_start; return NGX_HTTP_PARSE_HEADER_DONE;
}

返回NGX_AGAIN,表示当前接收到的数据不够,一行请求头还未结束,需要继续下一轮循环。在下一轮循环中,nginx首先检查请求头缓冲区header_in是否已满,如够满了,则调用ngx_http_alloc_large_header_buffer()函数分配更多缓冲区

static ngx_int_t
ngx_http_alloc_large_header_buffer(ngx_http_request_t *r,
ngx_uint_t request_line)
{
u_char *old, *new;
ngx_buf_t *b;
ngx_http_connection_t *hc;
ngx_http_core_srv_conf_t *cscf; ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, ,
"http alloc large header buffer"); //在解析请求行阶段,如果客户端在发送请求行之前发送了大量回车换行符将缓冲区塞满了,针对这种情况,nginx只是简单的重置缓冲区,丢弃这些垃圾数据,不需要分配更大的内存
if (request_line && r->state == ) { /* the client fills up the buffer with "\r\n" */ r->header_in->pos = r->header_in->start;
r->header_in->last = r->header_in->start; return NGX_OK;
}
//保存请求行或者请求头在旧缓冲区中的起始地址
old = request_line ? r->request_start : r->header_name_start; cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
//如果一个大缓冲区还装不下请求行或者一个请求头,则返回错误
if (r->state !=
&& (size_t) (r->header_in->pos - old)
>= cscf->large_client_header_buffers.size)
{
return NGX_DECLINED;
} hc = r->http_connection; if (hc->nfree) {
b = hc->free[--hc->nfree]; ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, ,
"http large header free: %p %uz",
b->pos, b->end - b->last);
//检查给该请求分配的请求头缓冲区个数是否已经超过限制,默认最大个数为4个
} else if (hc->nbusy < cscf->large_client_header_buffers.num) { if (hc->busy == NULL) {
hc->busy = ngx_palloc(r->connection->pool,
cscf->large_client_header_buffers.num * sizeof(ngx_buf_t *));
if (hc->busy == NULL) {
return NGX_ERROR;
}
}
//如果还没有达到最大分配数量,则分配一个新的大缓冲区
b = ngx_create_temp_buf(r->connection->pool,
cscf->large_client_header_buffers.size);
if (b == NULL) {
return NGX_ERROR;
} ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, ,
"http large header alloc: %p %uz",
b->pos, b->end - b->last); } else {
//如果已经达到最大的分配限制,则返回错误
return NGX_DECLINED;
}
//将从空闲队列取得的或者新分配的缓冲区加入已使用队列
hc->busy[hc->nbusy++] = b; /*
因为nginx中,所有的请求头的保存形式都是指针(起始和结束地址),所以一行完整的请求头必须放在连续的内存块中
如果旧的缓冲区不能再放下整行请求头,则分配新缓冲区,并从旧缓冲区拷贝已经读取的部分请求头,拷贝完之后,需要修改所有相关指针指向到新缓冲区
status为0表示解析完一行请求头之后,缓冲区正好被用完,这种情况不需要拷贝
*/
if (r->state == ) {
/*
* r->state == 0 means that a header line was parsed successfully
* and we do not need to copy incomplete header line and
* to relocate the parser header pointers
*/ r->header_in = b; return NGX_OK;
} ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, ,
"http large header copy: %d", r->header_in->pos - old); new = b->start;
//拷贝旧缓冲区中不完整的请求头
ngx_memcpy(new, old, r->header_in->pos - old); b->pos = new + (r->header_in->pos - old);
b->last = new + (r->header_in->pos - old);
//修改相应的指针指向新缓冲区
if (request_line) {
r->request_start = new; if (r->request_end) {
r->request_end = new + (r->request_end - old);
} r->method_end = new + (r->method_end - old); r->uri_start = new + (r->uri_start - old);
r->uri_end = new + (r->uri_end - old); if (r->schema_start) {
r->schema_start = new + (r->schema_start - old);
r->schema_end = new + (r->schema_end - old);
} if (r->host_start) {
r->host_start = new + (r->host_start - old);
if (r->host_end) {
r->host_end = new + (r->host_end - old);
}
} if (r->port_start) {
r->port_start = new + (r->port_start - old);
r->port_end = new + (r->port_end - old);
} if (r->uri_ext) {
r->uri_ext = new + (r->uri_ext - old);
} if (r->args_start) {
r->args_start = new + (r->args_start - old);
} if (r->http_protocol.data) {
r->http_protocol.data = new + (r->http_protocol.data - old);
} } else {
r->header_name_start = new;
r->header_name_end = new + (r->header_name_end - old);
r->header_start = new + (r->header_start - old);
r->header_end = new + (r->header_end - old);
} r->header_in = b; return NGX_OK;
}

5. HTTP Header字段解析回调处理函数

ngx_http_headers_in数组当前包含了25个常用的请求头,每个请求头都设置了一个处理函数,当前其中一部分请求头设置的是公共的处理函数,这里有2个公共的处理函数:ngx_http_process_header_line、ngx_http_process_unique_header_line
\nginx-1.7.4\src\http\ngx_http_request.c

ngx_http_header_t  ngx_http_headers_in[] = {
{ ngx_string("Host"), offsetof(ngx_http_headers_in_t, host),
ngx_http_process_host }, { ngx_string("Connection"), offsetof(ngx_http_headers_in_t, connection),
ngx_http_process_connection }, { ngx_string("If-Modified-Since"),
offsetof(ngx_http_headers_in_t, if_modified_since),
ngx_http_process_unique_header_line }, { ngx_string("If-Unmodified-Since"),
offsetof(ngx_http_headers_in_t, if_unmodified_since),
ngx_http_process_unique_header_line }, { ngx_string("If-Match"),
offsetof(ngx_http_headers_in_t, if_match),
ngx_http_process_unique_header_line }, { ngx_string("If-None-Match"),
offsetof(ngx_http_headers_in_t, if_none_match),
ngx_http_process_unique_header_line }, { ngx_string("User-Agent"), offsetof(ngx_http_headers_in_t, user_agent),
ngx_http_process_user_agent }, { ngx_string("Referer"), offsetof(ngx_http_headers_in_t, referer),
ngx_http_process_header_line }, { ngx_string("Content-Length"),
offsetof(ngx_http_headers_in_t, content_length),
ngx_http_process_unique_header_line }, { ngx_string("Content-Type"),
offsetof(ngx_http_headers_in_t, content_type),
ngx_http_process_header_line }, { ngx_string("Range"), offsetof(ngx_http_headers_in_t, range),
ngx_http_process_header_line }, { ngx_string("If-Range"),
offsetof(ngx_http_headers_in_t, if_range),
ngx_http_process_unique_header_line }, { ngx_string("Transfer-Encoding"),
offsetof(ngx_http_headers_in_t, transfer_encoding),
ngx_http_process_header_line }, { ngx_string("Expect"),
offsetof(ngx_http_headers_in_t, expect),
ngx_http_process_unique_header_line }, { ngx_string("Upgrade"),
offsetof(ngx_http_headers_in_t, upgrade),
ngx_http_process_header_line }, #if (NGX_HTTP_GZIP)
{ ngx_string("Accept-Encoding"),
offsetof(ngx_http_headers_in_t, accept_encoding),
ngx_http_process_header_line }, { ngx_string("Via"), offsetof(ngx_http_headers_in_t, via),
ngx_http_process_header_line },
#endif { ngx_string("Authorization"),
offsetof(ngx_http_headers_in_t, authorization),
ngx_http_process_unique_header_line }, { ngx_string("Keep-Alive"), offsetof(ngx_http_headers_in_t, keep_alive),
ngx_http_process_header_line }, #if (NGX_HTTP_X_FORWARDED_FOR)
{ ngx_string("X-Forwarded-For"),
offsetof(ngx_http_headers_in_t, x_forwarded_for),
ngx_http_process_multi_header_lines },
#endif #if (NGX_HTTP_REALIP)
{ ngx_string("X-Real-IP"),
offsetof(ngx_http_headers_in_t, x_real_ip),
ngx_http_process_header_line },
#endif #if (NGX_HTTP_HEADERS)
{ ngx_string("Accept"), offsetof(ngx_http_headers_in_t, accept),
ngx_http_process_header_line }, { ngx_string("Accept-Language"),
offsetof(ngx_http_headers_in_t, accept_language),
ngx_http_process_header_line },
#endif #if (NGX_HTTP_DAV)
{ ngx_string("Depth"), offsetof(ngx_http_headers_in_t, depth),
ngx_http_process_header_line }, { ngx_string("Destination"), offsetof(ngx_http_headers_in_t, destination),
ngx_http_process_header_line }, { ngx_string("Overwrite"), offsetof(ngx_http_headers_in_t, overwrite),
ngx_http_process_header_line }, { ngx_string("Date"), offsetof(ngx_http_headers_in_t, date),
ngx_http_process_header_line },
#endif { ngx_string("Cookie"), offsetof(ngx_http_headers_in_t, cookies),
ngx_http_process_multi_header_lines }, { ngx_null_string, , NULL }
};

我们拿Host头的处理函数ngx_http_process_host进行深入研究

static ngx_int_t
ngx_http_process_host(ngx_http_request_t *r, ngx_table_elt_t *h,
ngx_uint_t offset)
{
ngx_int_t rc;
ngx_str_t host; if (r->headers_in.host == NULL) {
r->headers_in.host = h;
} host = h->value;
//此函数的目的也是保存Host头的快速引用,它会对Host头的值做一些合法性检查,并从中解析出域名,保存在headers_in.server字段
//实际上前面在解析请求行时,headers_in.server可能已经被赋值为从请求行中解析出来的域名
//根据http协议的规范,如果请求行(第一行)中的uri带有域名的话,则域名以它(URL)为准,而host会被忽略,所以这里需检查一下headers_in.server是否为空,如果不为空则不需要再赋值
rc = ngx_http_validate_host(&host, r->pool, ); if (rc == NGX_DECLINED) {
ngx_log_error(NGX_LOG_INFO, r->connection->log, ,
"client sent invalid host header");
ngx_http_finalize_request(r, NGX_HTTP_BAD_REQUEST);
return NGX_ERROR;
} if (rc == NGX_ERROR) {
ngx_http_close_request(r, NGX_HTTP_INTERNAL_SERVER_ERROR);
return NGX_ERROR;
} if (r->headers_in.server.len) {
return NGX_OK;
} if (ngx_http_set_virtual_server(r, &host) == NGX_ERROR) {
return NGX_ERROR;
} r->headers_in.server = host; return NGX_OK;
}

Relevant Link:

http://m.blog.chinaunix.net/uid-27767798-id-3776815.html
http://blog.****.net/yusiguyuan/article/details/41288343
http://lxr.nginx.org/source/src/http/ngx_http_request.h#0241
http://blog.****.net/yusiguyuan/article/details/41288417
http://www.pagefault.info/?p=220

3. HTTP Request Body解析流程

0x1: HTTP请求体(body)读取

在HTTP请求处理流程中,针对有些模块需要对请求体做一些处理,那么这个模块就需要在这个阶段注册函数,其中读取请求体的函数ngx_http_read_client_request_body()是存在的,只不过不同的模块可能对请求体做不同的处理,读取请全体的函数是在某个模块的conent_handler函数中包含的,比如比如proxy模块,fastcgi模块,uwsgi模块等这些模块对请求体感兴趣,那么读取请求体的函数在这些模块的content_handler中注册
nginx核心本身不会主动读取请求体,这个工作是交给请求处理阶段的模块来做

. nginx核心提供了ngx_http_read_client_request_body()接口来读取请求体
. 另外还提供了一个丢弃请求体的接口: ngx_http_discard_request_body()

在请求执行的各个阶段中,任何一个阶段的模块如果对请求体感兴趣或者希望丢掉客户端发过来的请求体,可以分别调用这两个接口来完成。这两个接口是nginx核心提供的处理请求体的标准接口,如果希望配置文件中一些请求体相关的指令(比如client_body_in_file_only、client_body_buffer_size等)能够预期工作,以及能够正常使用nginx内置的一些和请求体相关的变量(比如$request_body和$request_body_file),一般来说所有模块都必须调用这些接口来完成相应操作,如果需要自定义接口来处理请求体,也应尽量兼容nginx默认的行为

1. 读取请求体

请求体的读取一般发生在nginx的content handler中,一些nginx内置的模块,比如proxy模块,fastcgi模块,uwsgi模块等,这些模块的行为必须将客户端过来的请求体(如果有的话)以相应协议完整的转发到后端服务进程,所有的这些模块都是调用了ngx_http_read_client_request_body()接口来完成请求体读取。值得注意的是这些模块会把客户端的请求体完整的读取后才开始往后端转发数据 \nginx-1.7.4\src\http\ngx_http_request_body.c

//ngx_http_mytest_body_handler的返回类型是void,Nginx不会根据返回值做一些收尾工作,因此,我们在该方法里处理完请求时必须要主动调用ngx_http_finalize_request方法来结束请求
/*
1. ngx_http_request_t *r: 指向请求结构的指针
2. ngx_http_client_body_handler_pt post_handler: 函数指针,当请求体读完时,它会被调用。之前也说到根据nginx现有行为,模块逻辑会在请求体读完后执行,这个回调函数一般就是模块的逻辑处理函数
*/
ngx_int_t
ngx_http_read_client_request_body(ngx_http_request_t *r,
ngx_http_client_body_handler_pt post_handler)
{
size_t preread;
ssize_t size;
ngx_int_t rc;
ngx_buf_t *b;
ngx_chain_t out, *cl;
ngx_http_request_body_t *rb;
ngx_http_core_loc_conf_t *clcf; r->main->count++; #if (NGX_HTTP_SPDY)
if (r->spdy_stream && r == r->main) {
rc = ngx_http_spdy_read_request_body(r, post_handler);
goto done;
}
#endif if (r != r->main || r->request_body || r->discard_body) {
post_handler(r);
return NGX_OK;
}
//调用ngx_http_test_expect()检查客户端是否发送了Expect: 100-continue头,是的话则给客户端回复"HTTP/1.1 100 Continue"
//根据http 1.1协议,客户端可以发送一个Expect头来向服务器表明期望发送请求体,服务器如果允许客户端发送请求体,则会回复"HTTP/1.1 100 Continue",客户端收到时,才会开始发送请求体
if (ngx_http_test_expect(r) != NGX_OK) {
rc = NGX_HTTP_INTERNAL_SERVER_ERROR;
goto done;
} rb = ngx_pcalloc(r->pool, sizeof(ngx_http_request_body_t));
if (rb == NULL) {
rc = NGX_HTTP_INTERNAL_SERVER_ERROR;
goto done;
} /*
* set by ngx_pcalloc():
*
* rb->bufs = NULL;
* rb->buf = NULL;
* rb->free = NULL;
* rb->busy = NULL;
* rb->chunked = NULL;
*/ rb->rest = -;
rb->post_handler = post_handler; /*
为接收请求体做准备工作,分配一个ngx_http_request_body_t结构,并保存在r->request_body,这个结构用来保存请求体读取过程用到的缓存引用,临时文件引用,剩余请求体大小等信息,它的定义如下
typedef struct
{
ngx_temp_file_t *temp_file; //指向储存请求体的临时文件的指针
ngx_chain_t *bufs; //指向保存请求体的链表头
ngx_buf_t *buf; //指向当前用于保存请求体的内存缓存
off_t rest; //当前剩余的请求体大小
ngx_chain_t *to_write;
ngx_http_client_body_handler_pt post_handler; //保存传给ngx_http_read_client_request_body()函数的回调函数
} ngx_http_request_body_t;
*/
r->request_body = rb;
//检查请求是否带有content_length头,如果没有该头或者客户端发送了一个值为0的content_length头,表明没有请求体,这时直接调用回调函数并返回NGX_OK即可
if (r->headers_in.content_length_n < && !r->headers_in.chunked) {
post_handler(r);
return NGX_OK;
} preread = r->header_in->last - r->header_in->pos; if (preread) { /* there is the pre-read part of the request body */ ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, ,
"http client request body preread %uz", preread); out.buf = r->header_in;
out.next = NULL; rc = ngx_http_request_body_filter(r, &out); if (rc != NGX_OK) {
goto done;
}
//判断保存请求头的缓存(r->header_in)中是否还有未处理的数据
//1. 如果有预读数据,则分配一个ngx_buf_t结构,并将r->header_in中的预读数据保存在其中
//2. 并且如果r->header_in中还有剩余空间,并且能够容下剩余未读取的请求体,这些空间将被继续使用,而不用分配新的缓存,甚至如果请求体已经被整个预读了,则不需要继续处理了,此时调用回调函数后返回
r->request_length += preread - (r->header_in->last - r->header_in->pos); if (!r->headers_in.chunked
&& rb->rest >
&& rb->rest <= (off_t) (r->header_in->end - r->header_in->last))
{
/* the whole request body may be placed in r->header_in */ b = ngx_calloc_buf(r->pool);
if (b == NULL) {
rc = NGX_HTTP_INTERNAL_SERVER_ERROR;
goto done;
} b->temporary = ;
b->start = r->header_in->pos;
b->pos = r->header_in->pos;
b->last = r->header_in->last;
b->end = r->header_in->end; rb->buf = b; r->read_event_handler = ngx_http_read_client_request_body_handler;
r->write_event_handler = ngx_http_request_empty_handler; rc = ngx_http_do_read_client_request_body(r);
goto done;
} } else {
/* set rb->rest */ if (ngx_http_request_body_filter(r, NULL) != NGX_OK) {
rc = NGX_HTTP_INTERNAL_SERVER_ERROR;
goto done;
}
} if (rb->rest == ) {
/* the whole request body was pre-read */ if (r->request_body_in_file_only) {
if (ngx_http_write_request_body(r) != NGX_OK) {
rc = NGX_HTTP_INTERNAL_SERVER_ERROR;
goto done;
} if (rb->temp_file->file.offset != ) { cl = ngx_chain_get_free_buf(r->pool, &rb->free);
if (cl == NULL) {
rc = NGX_HTTP_INTERNAL_SERVER_ERROR;
goto done;
} b = cl->buf; ngx_memzero(b, sizeof(ngx_buf_t)); b->in_file = ;
b->file_last = rb->temp_file->file.offset;
b->file = &rb->temp_file->file; rb->bufs = cl; } else {
rb->bufs = NULL;
}
} post_handler(r); return NGX_OK;
} if (rb->rest < ) {
ngx_log_error(NGX_LOG_ALERT, r->connection->log, ,
"negative request body rest");
rc = NGX_HTTP_INTERNAL_SERVER_ERROR;
goto done;
} clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module);
//client_body_buffer_size:设置缓存请求体的buffer大小,默认为系统页大小的2倍,当请求体的大小超过此大小时,nginx会把请求体写入到临时文件中
//可以根据业务需求设置合适的大小,尽量避免磁盘io操作
size = clcf->client_body_buffer_size;
size += size >> ; /* TODO: honor r->request_body_in_single_buf */ if (!r->headers_in.chunked && rb->rest < size) {
size = (ssize_t) rb->rest; if (r->request_body_in_single_buf) {
size += preread;
} } else {
size = clcf->client_body_buffer_size;
}
//由于内存的限制,ngx_http_read_client_request_body()接口读取的请求体会部分或者全部写入一个临时文件中
rb->buf = ngx_create_temp_buf(r->pool, size);
if (rb->buf == NULL) {
rc = NGX_HTTP_INTERNAL_SERVER_ERROR;
goto done;
} r->read_event_handler = ngx_http_read_client_request_body_handler;
r->write_event_handler = ngx_http_request_empty_handler; rc = ngx_http_do_read_client_request_body(r); done: if (rc >= NGX_HTTP_SPECIAL_RESPONSE) {
r->main->count--;
} return rc;
}

ngx_http_read_client_request_body是一个异步方法,调用它只是说明要求Nginx开始接收请求的包体,并不表示是否已经接收完,当接收完所有的包体内容后,post_handler指向的回调方法会被调用。因此,即使在调用了ngx_http_read_client_request_body方法后它已经返回,也无法确定这时是否已经调用过post_handler指向的方法。换句话说,ngx_http_read_client_request_body返回时既有可能已经接收完请求中所有的包体(假如包体的长度很小),也有可能还没开始接收包体
在worker进程中,调用ngx_http_read_client_request_body是不会阻塞的,要么读完socket上的buffer发现不完整立刻返回,等待下一次EPOLLIN事件,要么就是读完body了,调用用户定义的post_handler方法去处理body
ngx_http_read_client_request_body提供两种保存body的方式,一种是把body存储在内存中,另一种是把body存储到临时文件里。这个临时文件也有不同的处理方法,一种是请求结束后nginx便清理掉,另外就是永久保留这个临时文件。例如下面这两个参数就会设定为每个body都存放到临时文件里,并且这个临时文件在请求结束后不会被删除

r->request_body_in_persistent_file = ;
r->request_body_in_file_only = ;

2. 丢弃请求体

一个模块想要主动的丢弃客户端发过的请求体,可以调用nginx核心提供的ngx_http_discard_request_body()接口,主动丢弃的原因可能有很多种

. 模块的业务逻辑不需要请求体
. 客户端发送了过大的请求体
. 为了兼容http1.1协议的pipeline请求,模块有义务主动丢弃不需要的请求体。总之为了保持良好的客户端兼容性,nginx必须主动丢弃无用的请求体

\nginx-1.7.4\src\http\ngx_http_request_body.c

ngx_int_t
ngx_http_discard_request_body(ngx_http_request_t *r)
{
ssize_t size;
ngx_int_t rc;
ngx_event_t *rev; #if (NGX_HTTP_SPDY)
if (r->spdy_stream && r == r->main) {
r->spdy_stream->skip_data = NGX_SPDY_DATA_DISCARD;
return NGX_OK;
}
#endif
/*
判断了不需要再做处理的情况
1. 子请求不需要处理
2. 已经调用过此函数的也不需要再处理
*/
if (r != r->main || r->discard_body || r->request_body) {
return NGX_OK;
}
//调用ngx_http_test_expect() 处理http1.1 expect的情况
//根据http1.1的expect机制,如果客户端发送了expect头,而服务端不希望接收请求体时,必须返回417(Expectation Failed)错误。nginx并没有这样做,它只是简单的让客户端把请求体发送过来,然后丢弃掉
if (ngx_http_test_expect(r) != NGX_OK) {
return NGX_HTTP_INTERNAL_SERVER_ERROR;
} rev = r->connection->read; ngx_log_debug0(NGX_LOG_DEBUG_HTTP, rev->log, , "http set discard body"); if (rev->timer_set) {
//删掉了读事件上的定时器,因为这时本身就不需要请求体,所以也无所谓客户端发送的快还是慢了
ngx_del_timer(rev);
} if (r->headers_in.content_length_n <= && !r->headers_in.chunked) {
return NGX_OK;
} size = r->header_in->last - r->header_in->pos; if (size || r->headers_in.chunked) {
rc = ngx_http_discard_request_body_filter(r, r->header_in); if (rc != NGX_OK) {
return rc;
}
//检查请求头中的content-length头,客户端如果打算发送请求体,就必须发送content-length头
if (r->headers_in.content_length_n == ) {
return NGX_OK;
}
} rc = ngx_http_read_discarded_request_body(r); if (rc == NGX_OK) {
r->lingering_close = ;
return NGX_OK;
} if (rc >= NGX_HTTP_SPECIAL_RESPONSE) {
return rc;
} /* rc == NGX_AGAIN */ r->read_event_handler = ngx_http_discarded_request_body_handler;
//如果还有剩余的请求体未处理,该函数调用ngx_handle_read_event()在事件处理机制中挂载好读事件,并把读事件的处理函数设置为ngx_http_discarded_request_body_handler
if (ngx_handle_read_event(rev, ) != NGX_OK) {
return NGX_HTTP_INTERNAL_SERVER_ERROR;
} //做好这些准备之后,该函数最后调用ngx_http_read_discarded_request_body()接口读取客户端过来的请求体并丢弃
//如果客户端并没有一次将请求体发过来,函数会返回,剩余的数据等到下一次读事件过来时,交给ngx_http_discarded_request_body_handler()来处理
//这时,请求的discard_body将被设置为1用来标识这种情况。另外请求的引用数(count)也被加1,这样做的目的是客户端可能在nginx处理完请求之后仍未完整发送待发送的请求体,增加引用是防止nginx核心在处理完请求后直接释放了请求的相关资源
r->count++;
r->discard_body = ; return NGX_OK;
}

ngx_http_discarded_request_body_handler,这个函数每次读事件来时会被调用

void
ngx_http_discarded_request_body_handler(ngx_http_request_t *r)
{
ngx_int_t rc;
ngx_msec_t timer;
ngx_event_t *rev;
ngx_connection_t *c;
ngx_http_core_loc_conf_t *clcf; c = r->connection;
rev = c->read;
//函数一开始就处理了读事件超时的情况
if (rev->timedout) {
c->timedout = ;
c->error = ;
ngx_http_finalize_request(r, NGX_ERROR);
return;
} if (r->lingering_time) {
timer = (ngx_msec_t) r->lingering_time - (ngx_msec_t) ngx_time(); if ((ngx_msec_int_t) timer <= ) {
r->discard_body = ;
r->lingering_close = ;
ngx_http_finalize_request(r, NGX_ERROR);
return;
} } else {
timer = ;
}
//如果读事件发生在请求处理完之前,则不用处理超时事件,也不用设置定时器,函数只是简单的调用ngx_http_read_discarded_request_body()来读取并丢弃数据
rc = ngx_http_read_discarded_request_body(r); if (rc == NGX_OK) {
r->discard_body = ;
r->lingering_close = ;
ngx_http_finalize_request(r, NGX_DONE);
return;
} if (rc >= NGX_HTTP_SPECIAL_RESPONSE) {
c->error = ;
ngx_http_finalize_request(r, NGX_ERROR);
return;
} /* rc == NGX_AGAIN */ if (ngx_handle_read_event(rev, ) != NGX_OK) {
c->error = ;
ngx_http_finalize_request(r, NGX_ERROR);
return;
} if (timer) { clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module); timer *= ; if (timer > clcf->lingering_timeout) {
timer = clcf->lingering_timeout;
} ngx_add_timer(rev, timer);
}
}

Relevant Link:

http://book.51cto.com/art/201303/386672.htm
http://blog.****.net/russell_tao/article/details/5637545
http://blog.****.net/yusiguyuan/article/details/41288443
http://blog.****.net/yusiguyuan/article/details/41288619
http://blog.****.net/chosen0ne/article/details/7861048

Copyright (c) 2015 LittleHann All rights reserved