-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregex.cpp
45 lines (39 loc) · 899 Bytes
/
regex.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#include "regex.h"
const char * HREF_PATTERN = "href=\"\\s*\\([^ >\"]*\\)\\s*\"";
regex_t reg;
void init_reg()
{
if(regcomp(®,HREF_PATTERN,0)!=0)
{
printf("regcomp error\n");
exit(-1);
}
}
void free_reg()
{
regfree(®);
}
void saveurl(char *buff,Curl &murl,int depth)
{
printf("save url start...\n");
geturl(buff,murl,depth);
}
void geturl(char *&p,Curl &murl,int depth)
{
int nmatch=2;
regmatch_t matchptr[nmatch];
char *tmp=NULL;
int len=0;
while (regexec(®, p, nmatch, matchptr, 0) != REG_NOMATCH) {
printf("regexec is ok\n");
len = (matchptr[1].rm_eo - matchptr[1].rm_so);
p = p + matchptr[1].rm_so;
tmp = (char *)calloc(len+3, 1);
assert(tmp!=NULL);
strncpy(tmp, p, len);
tmp[len] = '\0';
p = p + len + (matchptr[0].rm_eo - matchptr[1].rm_eo);
murl.push_url(tmp,depth+1);
free(tmp);
}
}