-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
74 lines (66 loc) · 2.27 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def get_path(target):
# split ext
assert target.count('.') == 1, "target.count('.') != 1"
target_ext = target.split(".")[1] # htm or html
target = target.split(".")[0]
# default
dirname = target.replace("/","-")
path = "%Y%m/%Y%m%d/%Y%m%d_%H%Mz.png"
no_match = False
if target.startswith("observations/"):
if target.endswith("heavyrain") or target.endswith("gale"):
path = "%Y%m/%Y%m%d/%Y%m%d_%H%M.png" # no "z"
elif target.endswith("24hour-precipitation"):
path = "%Y%m/24hour_%Y%m%d_%H%Mz.png"
elif "/dm/" in target:
prefix = target.split("/")[1] + "_" + target[-4:]
path = f"%Y%m/{prefix}_%Y%m%d_%H%Mz.png"
elif target.startswith("observations/hourly-"):
pass
else:
no_match = True
elif target.startswith("radar/"):
prefix = target.split("/")[-1]
path = f"%Y%m/%Y%m%d/{prefix}_%Y%m%d_%H%Mz.png"
if target_ext == "htm":
dirname = target.replace("/", "-", 1)
elif target_ext == "html":
pass
else:
no_match = True
elif target.startswith("tianqishikuang/"):
dirname = dirname.replace("-index", "")
elif target.startswith("satellite/"):
pass
else:
no_match = True
if no_match:
raise Exception(f"no match: {target}")
return dirname + "/" + path
from datetime import datetime
def url2time(url):
time_str = re.search(r"\d{12}", url).group()
time = datetime.strptime(time_str, "%Y%m%d%H%M")
return time
from sys import stderr
import requests
import re
def get_nmc_imgs(target):
try:
url_nmc = f"http://www.nmc.cn/publish/{target}"
text = requests.get(url_nmc).text
for match in re.finditer(r'data-img="(.*?)"', text):
url = match.group(1).split('?')[0]
path = url2time(url).strftime(get_path(target))
yield url, path
except Exception as e:
print(f"{target}: {e.__class__.__name__}: {e}", file=stderr)
from sys import stdin
def read_targets():
for line in stdin:
line = line.split("#")[0].strip()
if line:
yield line
for target in read_targets():
for url, path in get_nmc_imgs(target):
print(url, path)