High speed: pure C code, highly optimized HTTP handling, minimal CPU footprint – easily achieving 2000 requests per second with responsive targets.
一个单线程的程序如何能达到这个性能我们就来分析一下源码吧。
先从高处俯瞰一下整个程序:
整个程序分为一下几个模块:
http模块(http_client.c),数据管理模块(database.c),爬虫兼攻击模块(crawler.c,analysis.c)和报表模块(report.c)。
其中http模块主要负责http回话的的处理,包括url解析,请求的发送,响应的解析。这个模块没用三方库来处理,所有功能都自己实现,相当牛,也相当高效。
数据管理模块主要是管理爬虫需要的站点树和检查出的错误,不多说。
爬虫兼攻击模块负责在url里插入攻击向量,以及html解析和攻击结果的检查。
报表模块是最后生成网页报表的模块,就是把数据模块里的数据输出,不解释。
好,我们来仔细分析http模块和攻击模块。
我们仔细想下整个程序的性能问题就可以发现,攻击往往是顺序进行的。而网页的下载却有快有慢(更内容多少,即时网速都有关)。所以,如果将http的处理逻辑串到攻击逻辑里面必然会造成一会儿网卡忙cpu闲,一会儿cpu闲而网卡忙。这个问题在我前面一篇文章讨论过。解决方法毫无疑问,异步化,下面我们就来看看这两个逻辑是如何异步交互的。
从攻击的角度来看:一个url需要往往都要经过至少以下一些检查步骤:
1.url的类型检查
2.xss测试
3.sql注入测试
从http的角度来看,一个url意味着建立连接,发送请求,接收响应。
从http入手,如何将网卡的性能发挥到100%,并发!skipfish的并发采用了最简单的poll,你可能疑问为什么不用epoll,答案很简单,并发量不多(skipfish默认并发是40个连接),因为http的请求主要是下载,所以一个连接需要下载很多东西,几十个连接流量已经不小了。这不是关键,不多说了。整个skipfish的主循环也在http模块中:u32 next_from_queue(void)。在main函数中有如下代码:
1
2
3
4
5
|
while
((next_from_queue() && !stop_soon) || (!show_once++)) {
…
}
|
很显然,这个函数就是这个程序的引擎。函数的意思也很明了:http请求队列里是否还有请求。这里引出了很重要的东西:请求队列。代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
|
/* Request queue descriptor: */
struct
queue_entry {
struct
http_request* req;
/* Request descriptor */
struct
http_response* res;
/* Response descriptor */
struct
conn_entry* c;
/* Connection currently used */
struct
queue_entry* prev;
/* Previous queue entry */
struct
queue_entry* next;
/* Next queue entry */
u8 retrying;
/* Request being retried? */
};
static
struct
queue_entry* queue;
|
代码很简单,一个双向链表。里面存放着请求响应对,和关联的网络连接。整个http模块所干的事情,基本也就是next_from_queue函数干的事情,说得简单一点,就是从网卡上取数据往请求队列里面的响应体里面填。填完之后呢?看代码,在next_from_queue函数里:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
if
(!p)
p = __DFL_ck_alloc(
sizeof
(
struct
pollfd) * max_connections);
while
(c) {
p[i].fd = c->fd;
p[i].events = POLLIN | POLLERR | POLLHUP;
if
(c->write_len - c->write_off || c->SSL_rd_w_wr)
p[i].events |= POLLOUT;
p[i].revents = 0;
c = c->next;
i++;
}
poll(p, conn_cur, 100);
|
这里填充poll相关的结构体并调用poll,这是整个程序除了gethostbyname以外唯一会阻塞的地方(没流量啥都干不了当然阻塞啦)。下面开始处理所有连接上的数据,代码被我删掉很多,仅留下了关键点。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
c = conn;
for
(i=0;i<conn_cur;i++) {
if
(p[i].revents & (POLLERR|POLLHUP)) {
if
(c->q) {
if
(c->write_len - c->write_off || !c->read_len) {
c->q->res->state = STATE_CONNERR;
keep = c->q->req->callback(c->q->req, c->q->res);
req_errors_net++;
req_errors_cur++;
}
else
{
if
(parse_response(c->q->req, c->q->res, c->read_buf,
c->read_len, 0) != 2) {
c->q->res->state = STATE_OK;
keep = c->q->req->callback(c->q->req, c->q->res);
if
(req_errors_cur <= max_fail)
req_errors_cur = 0;
}
else
{
c->q->res->state = STATE_CONNERR;
keep = c->q->req->callback(c->q->req, c->q->res);
req_errors_net++;
req_errors_cur++;
}
}
}
destroy_unlink_conn(c, keep);
}
else
|
以上就是当连接错误或者连接关闭的时候的处理,我们看到,这里主要做的事情就是调用parse_response来解析已获得的数据,如果解析没问题,说明网页已经传好了,就调用回调函数req->callback(c->q->req, c->q->res);,我们会经常看到这样的调用,而callback是http_request结构体里面的一个函数指针,它的值在请求发出之前就已经设置好了。我们后面再看。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
if
(((p[i].revents & POLLIN) && !c->SSL_wr_w_rd) ||
((p[i].revents & POLLOUT) && c->SSL_rd_w_wr)) {
if
(c->q) {
if
(c->proto == PROTO_HTTPS) {
read_res = SSL_read(c->srv_ssl, c->read_buf + c->read_len,
READ_CHUNK);
}
else
{
read_res = read(c->fd, c->read_buf + c->read_len, READ_CHUNK);
if
(read_res <= 0)
goto
network_error;
}
c->read_buf[c->read_len] = 0;
/* NUL-terminate for sanity. */
p_ret = parse_response(c->q->req, c->q->res, c->read_buf, c->read_len,
(c->read_len > (size_limit + READ_CHUNK)) ? 0 : 1);
if
(!p_ret || p_ret == 3) {
c->q->res->state = STATE_OK;
keep = c->q->req->callback(c->q->req, c->q->res);
}
else
if
(p_ret == 2) {
c->q->res->state = STATE_RESPERR;
destroy_unlink_conn(c, c->q->req->callback(c->q->req, c->q->res));
req_errors_http++;
req_errors_cur++;
}
else
{
c->last_rw = cur_time;
c->q->res->state = STATE_RECEIVE;
}
}
else
destroy_unlink_conn(c, 0);
/* Unsolicited response! */
}
else
|
以上是处理输入读入。也就是读http响应了。读完之后我们会发现,到处调用callback。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
if
(((p[i].revents & POLLOUT) && !c->SSL_rd_w_wr) ||
((p[i].revents & POLLIN) && c->SSL_wr_w_rd)) {
if
(c->proto == PROTO_HTTPS) {
write_res = SSL_write(c->srv_ssl, c->write_buf + c->write_off,
c->write_len - c->write_off);
}
else
{
write_res = write(c->fd, c->write_buf + c->write_off,
c->write_len - c->write_off);
}
}
}
else
if
(!p[i].revents) {
u8 keep = 0;
if
((c->q && (cur_time - c->last_rw > rw_tmout ||
cur_time - c->req_start > resp_tmout)) ||
(!c->q && (cur_time - c->last_rw > idle_tmout)) ||
(!c->q && tear_down_idle)) {
if
(c->q) {
c->q->res->state = STATE_CONNERR;
keep = c->q->req->callback(c->q->req, c->q->res);
req_errors_net++;
req_errors_cur++;
conn_busy_tmout++;
}
else
{
conn_idle_tmout++;
tear_down_idle = 0;
}
destroy_unlink_conn(c, keep);
}
}
c = next;
}
}
|
这部分就是发数据(http请求),和空闲连接的超时处理。很简单?是的,很简单。http模块够简单够高效。而这里的关键在callback上,callback其实就是http模块通知攻击模块的钩子。攻击模块的运转就是靠着一次次的callback来进行的。下面我们看看crawler.c里面,我们发现里面全都是形如inject_check*_callback的函数,这里选了几个,删减一下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
/* Starts injection attacks proper. */
static
void
inject_init2(
struct
pivot_desc* pv) {
pv->misc_cnt = BH_CHECKS;
for
(i=0;i<BH_CHECKS;i++) {
n = req_copy(pv->req, pv, 1);
n->callback = inject_check0_callback;
n->user_val = i;
async_request(n);
}
}
static
u8 inject_check0_callback(
struct
http_request* req,
struct
http_response* res) {
if
(FETCH_FAIL(res)) {
handle_error(req, res, (u8*)
"during page variability checks"
, 0);
}
else
{
if
(!same_page(&RPRES(req)->sig, &res->sig)) {
req->pivot->res_varies = 1;
problem(PROB_VARIES, req, res, 0, req->pivot, 0);
}
}
if
((--req->pivot->misc_cnt))
return
0;
/* If response fluctuates, do not perform any injection checks at all. */
if
(req->pivot->res_varies) {
end_injection_checks(req->pivot);
return
0;
}
req->pivot->misc_cnt = 0;
…
n = req_copy(req->pivot->req, req->pivot, 1);
if
(orig_state == PSTATE_CHILD_INJECT) {
replace_slash(n, (u8*)
"\\.sf\\"
);
}
else
{
ck_free(TPAR(n));
TPAR(n) = ck_strdup(tmp + 2);
ck_free(tmp);
}
n->callback = inject_check1_callback;
n->user_val = 3;
async_request(n);
return
0;
}
/* CALLBACK FOR CHECK 1: Sees if we managed to list a directory, or find
a traversal vector. Called four times, parallelized. */
static
u8 inject_check1_callback(
struct
http_request* req,
struct
http_response* res) {
struct
http_request* n;
u32 orig_state = req->pivot->state;
DEBUG_CALLBACK(req, res);
schedule_next:
/* CHECK 2: Backend XML injection - 2 requests. */
n = req_copy(RPREQ(req), req->pivot, 1);
SET_VECTOR(orig_state, n,
"sfish>'>\"></sfish><sfish>"
);
n->callback = inject_check2_callback;
n->user_val = 1;
async_request(n);
return
0;
}
/* CALLBACK FOR CHECK 2: Examines the response for XML injection. Called twice,
parallelized. */
static
u8 inject_check2_callback(
struct
http_request* req,
struct
http_response* res) {
struct
http_request* n;
u32 orig_state = req->pivot->state;
DEBUG_CALLBACK(req, res);
schedule_next:
destroy_misc_data(req->pivot, req);
/* CHECK 3: Shell command injection - 9 requests. */
n = req_copy(RPREQ(req), req->pivot, 1);
APPEND_VECTOR(orig_state, n,
"'`uname`'"
);
n->callback = inject_check3_callback;
n->user_val = 8;
async_request(n);
return
0;
}
/* CALLBACK FOR CHECK 3: Looks for shell injection patterns. Called several
times, parallelized. */
static
u8 inject_check3_callback(
struct
http_request* req,
struct
http_response* res) {
struct
http_request* n;
u32 orig_state = req->pivot->state;
DEBUG_CALLBACK(req, res);
if
(same_page(&MRES(0)->sig, &MRES(1)->sig) &&
!same_page(&MRES(0)->sig, &MRES(2)->sig)) {
problem(PROB_SH_INJECT, MREQ(0), MRES(0),
(u8*)
"responses to `true` and `false` different than to `uname`"
,
req->pivot, 0);
RESP_CHECKS(MREQ(2), MRES(2));
}
if
(same_page(&MRES(3)->sig, &MRES(4)->sig) &&
!same_page(&MRES(3)->sig, &MRES(5)->sig)) {
problem(PROB_SH_INJECT, MREQ(3), MRES(3),
(u8*)
"responses to `true` and `false` different than to `uname`"
,
req->pivot, 0);
RESP_CHECKS(MREQ(5), MRES(5));
}
if
(same_page(&MRES(6)->sig, &MRES(7)->sig) &&
!same_page(&MRES(6)->sig, &MRES(8)->sig)) {
problem(PROB_SH_INJECT, MREQ(6), MRES(6),
(u8*)
"responses to `true` and `false` different than to `uname`"
,
req->pivot, 0);
RESP_CHECKS(MREQ(8), MRES(8));
}
schedule_next:
n = req_copy(RPREQ(req), req->pivot, 1);
SET_VECTOR(orig_state, n, new_xss_tag((u8*)
".htaccess.aspx"
));
register_xss_tag(n);
n->callback = inject_check4_callback;
n->user_val = 1;
async_request(n);
return
0;
}
|
我们可以发现,每个callback的结构都差不多,先做些检查,这里就是对响应进行检查发现漏洞的地方了。然后schedule_next,在schedule_next后面将请求重新换个攻击向量,设置好请求的callback,又通过async_request(n);发出去了。async_request(n)的实现也是相当简单:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
void
async_request(
struct
http_request* req) {
struct
queue_entry *qe;
struct
http_response *res;
if
(req->proto == PROTO_NONE || !req->callback)
FATAL(
"uninitialized http_request"
);
res = ck_alloc(
sizeof
(
struct
http_response));
req->addr = maybe_lookup_host(req->host);
/* Don't try to issue extra requests if max_fail
consecutive failures exceeded; but still try to
wrap up the (partial) scan. */
if
(req_errors_cur > max_fail) {
DEBUG(
"!!! Too many subsequent request failures!\n"
);
res->state = STATE_SUPPRESS;
if
(!req->callback(req, res)) {
destroy_request(req);
destroy_response(res);
}
req_dropped++;
return
;
}
/* DNS errors mean instant fail. */
if
(!req->addr) {
DEBUG(
"!!! DNS error!\n"
);
res->state = STATE_DNSERR;
if
(!req->callback(req, res)) {
destroy_request(req);
destroy_response(res);
}
req_errors_net++;
conn_count++;
conn_failed++;
return
;
}
/* Enforce user limits. */
if
(req_count > max_requests) {
DEBUG(
"!!! Total request limit exceeded!\n"
);
res->state = STATE_SUPPRESS;
if
(!req->callback(req, res)) {
destroy_request(req);
destroy_response(res);
}
req_dropped++;
return
;
}
/* OK, looks like we're good to go. Insert the request
into the the queue. */
qe = queue;
queue = ck_alloc(
sizeof
(
struct
queue_entry));
queue->req = req;
queue->res = res;
queue->next = qe;
if
(queue->next) queue->next->prev = queue;
queue_cur++;
req_count++;
}
|
先分配响应体,然后做些解析域名啊什么的,最后把请求体塞进请求队列,完事!注意这里并不做网络IO,也不会立刻返回响应,也不会阻塞。
由此可以看出,http模块对攻击和分析模块唯一的交互便是调用回调,而攻击分析模块更http模块唯一的交互便是往队列里塞请求。主循环负责连接着两个东西,其实也就把cpu和网卡通过异步的方式连接起来了。
仔细想想可以发现,程序的主循环是在做网络数据IO。以IO来驱动其他模块联动。为什么?因为IO最慢,所以要将它的能力发挥到极限。所以就不能让IO等。所以,主循环在做IO。