-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspider.cpp
52 lines (43 loc) · 999 Bytes
/
spider.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#include "crawler.h"
#include "url.h"
#include "sqlspider.h"
#include "regex.h"
#define SIZE (1024*1024)
void startwork(int sockfd,char *arg,MySqlServer &mySql)
{
printf("startwork\n");
int recvsize=0;
char *recvbuff=recvpost(sockfd,&recvsize);
printf("recvsize=%d\n",recvsize);
Curl murl;
int depth=0;
char *p=recvbuff;
saveurl(p,murl,depth);
savehtml(recvbuff,recvsize,arg,mySql);
while(murl.q_url.size()>0)
{
Url tmp=murl.q_url.front();
murl.q_url.pop();
get_and_prase_html(murl,tmp,mySql);
}
}
int main(int argc,char **argv)
{
if(argc < 2)
{
printf("arguments of main are not enough\n");
exit(-1);
}
char *domainname=get_domainname(argv[1]);//分离域名
//printf("%s\n",domainname);
int sockfd = creat_sock(domainname);//创建套接字
if(sockfd==-1)
{
printf("creat socket failed\n");
}
sendpost(domainname,argv[1],sockfd);//封装http头并发送
MySqlServer mySql;
init_reg();
startwork(sockfd,argv[1],mySql);//work begin
//close(sockfd);
}