-
Notifications
You must be signed in to change notification settings - Fork 83
/
gfwlist.py
executable file
·99 lines (80 loc) · 2.59 KB
/
gfwlist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/python3
import os
import base64
import json
import urllib.parse
import requests
GFWLIST_FILE = "gfwlist.txt"
GFWLIST_URL = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt'
def get_gfwlist():
if os.path.isfile(GFWLIST_FILE):
with open(GFWLIST_FILE, "r") as f:
text = f.read()
else:
r = requests.get(GFWLIST_URL)
r.raise_for_status()
text = r.text
return base64.b64decode(text).decode("utf-8").rstrip("\n")
def update_domains(domains, host, mode=0):
segments = host.strip(".").split(".")[::-1]
this = domains
for segment in segments:
if segment not in this:
this[segment] = {}
this = this[segment]
this["@"] = mode
def postproc_domains(domains):
# Turn all {"@": 1} into 1 to save some text
keys = list(domains.keys())
for key in keys:
if key == "@":
continue
obj = domains[key]
if len(obj) == 1 and "@" in obj:
domains[key] = obj["@"]
else:
postproc_domains(obj)
def parse_gfwlist(text):
domains = {}
blackpat = [] # blacklisted patterns
whitepat = [] # whitelisted patterns
for line in text.splitlines()[1:]:
if not line.strip() or line.startswith("!"):
continue # ignore comments and empty lines
mode = 0 # default to blacklist
if line.startswith("@@"):
mode = 1 # now it's whitelist
line = line[2:]
if line.startswith("||"):
# domain prefix
update_domains(domains, line[2:], mode)
elif line.startswith("/"):
# regex, can't handle yet
pass
else:
# Keyword pattern
# Single vertical line at either side means string boundary
if line.startswith("|"):
line = line[1:]
else:
line = "*" + line
if line.endswith("|"):
line = line[:-1]
else:
line = line + "*"
if mode == 0:
blackpat.append(line)
else:
whitepat.append(line)
postproc_domains(domains)
return domains, blackpat, whitepat
def generate_pac_partial():
gfwlist = get_gfwlist()
domains, blackpat, whitepat = parse_gfwlist(gfwlist)
return "var DOMAINS = {};\n\nvar BLACKPAT = {};\n\nvar WHITEPAT = {};\n".format(
json.dumps(domains, indent=2),
json.dumps(blackpat, indent=2),
json.dumps(whitepat, indent=2),
)
if __name__ == '__main__':
print(generate_pac_partial())