-
Notifications
You must be signed in to change notification settings - Fork 2
/
cdaweb.py
201 lines (153 loc) · 5.71 KB
/
cdaweb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
try:
from urllib2 import urlopen, Request
except ImportError: #python 3
from urllib.request import urlopen, Request
import xml.etree.ElementTree as ET
cdaweb_base_url='https://cdaweb.gsfc.nasa.gov/WS/cdasr/1'
def open_url(url, proxy=None):
"""
Wrap urlopen to use a proxy
proxy - takes a tuple with (proxy_url, proxy_type), e.g. ('proxy.example.edu:1406', 'http')
"""
req = Request(url)
if proxy is not None:
req.set_proxy(*proxy)
resp = urlopen(req)
return resp
def fetch_xml(url, proxy=None):
"""
Fetch a URL and parse it as XML using ElementTree
"""
resp = open_url(url, proxy=proxy)
tree = ET.parse(resp)
return tree
def element_to_dict(element):
"""
Convert an ElementTree element into a dictionary
"""
# Dictionary to represent the element and its children
elem_dict={}
for child in element:
# Separate the schema name from the tag name
tagname=child.tag.split('}')[1]
if child.text is not None:
# If the child is a text object, use it as-is
child_value=child.text
else:
# Child is an XML element, parse it
child_value=element_to_dict(child)
# Add child to the dictionary
if tagname in elem_dict.keys():
# Tag is already in the dictionary, make a list instead
try:
elem_dict[tagname].append(child_value)
except AttributeError:
# Existing key is a scalar, convert it to a list and add
# the new value to the end
elem_dict[tagname]=[elem_dict[tagname],child_value]
else:
elem_dict[tagname]=child_value
return elem_dict
def xml_to_dict(tree):
"""
Convert an ElementTree tree to a dictionary
"""
root=tree.getroot()
dicts=[]
for element in root:
dicts.append(element_to_dict(element))
return dicts
def get_dataviews():
"""
Get all the CDAWeb dataviews
"""
return xml_to_dict(fetch_xml(cdaweb_base_url+'/dataviews'))
def get_observatories(dataview):
"""
Get a list of observatories in a dataview
"""
return xml_to_dict(fetch_xml(cdaweb_base_url+'/dataviews/'+dataview+'/observatoryGroups'))
def get_datasets(dataview,observatoryGroup=None):
"""
Get a list of datasets in a dataview (optionally filtered by an
observatory group)
Example:
get_datasets('sp_phys')
"""
getdata={}
if observatoryGroup is not None:
getdata['observatoryGroup']=observatoryGroup
getstr=''
for key,value in getdata.items():
getstr+= key+'='+value
return xml_to_dict(fetch_xml(cdaweb_base_url+'/dataviews/'+dataview+'/datasets?'+getstr))
def get_dataset_variables(dataview,dataset):
"""
Get the variables in a dataset
Example:
get_dataset_variables('sp_phys','OMNI2_H0_MRG1HR')
"""
return xml_to_dict(fetch_xml(cdaweb_base_url+'/dataviews/'+dataview+'/datasets/'+dataset+'/variables'))
def get_dataset_inventory(dataview,dataset):
"""
Get the inventory (available time ranges) for a dataset
Example:
get_dataset_inventory('sp_phys','OMNI2_H0_MRG1HR')
"""
return xml_to_dict(fetch_xml(cdaweb_base_url+'/dataviews/'+dataview+'/datasets/'+dataset+'/inventory'))
def datetime_to_cdaweb_url_format(datetime_value):
"""
Convert a python datetime into a string formatted the way CDAWeb expects
"""
from datetime import datetime
return '{0:%Y}{0:%m}{0:%d}T{0:%H}{0:%M}{0:%S}Z'.format(datetime_value)
def get_file(dataview,dataset,start_date,end_date,variables,format='cdf', proxy=None):
"""
Get a data file from CDAWeb
dataview (str): A CDAWeb dataview
dataset (str): A CDAWeb dataset
start_date (datetime): Start date/time for the request
end_date (datetime): End date/time for the request
variables (sequence of strings): What variables to include
format (str): What file format to retrieve (cdf, text, or gif)
Example:
from datetime import datetime
get_file('sp_phys','OMNI2_H0_MRG1HR',datetime(2005,1,1),datetime(2005,2,1),['KP1800'])
"""
start_date_str=datetime_to_cdaweb_url_format(start_date)
end_date_str=datetime_to_cdaweb_url_format(end_date)
try:
assert basestring
except (AssertionError, UnboundLocalError): #Python 3
basestring = str
if isinstance(variables,basestring):
variables=(variables,)
url=cdaweb_base_url+'/dataviews/'+dataview+'/datasets/'+dataset+'/data/'+start_date_str+','+end_date_str+'/'+','.join(variables)+'?format='+format
print(url)
root=fetch_xml(url, proxy).getroot()
file_url=root.findtext('cda:FileDescription/cda:Name',namespaces={'cda':'http://cdaweb.gsfc.nasa.gov/schema'})
if file_url is None:
status=root.findtext('cda:Status',namespaces={'cda':'http://cdaweb.gsfc.nasa.gov/schema'})
error=root.findtext('cda:Error',namespaces={'cda':'http://cdaweb.gsfc.nasa.gov/schema'})
if status is not None:
raise ValueError(status)
elif error is not None:
raise ValueError(error)
data_response = open_url(file_url, proxy=proxy)
return data_response
def get_cdf(*args,**kwargs):
"""
Get a CDF file and read it (all arguments are passed to cdaweb.get_file)
Example:
from datetime import datetime
get_cdf('sp_phys','OMNI2_H0_MRG1HR',datetime(2005,1,1),datetime(2005,2,1),['KP1800'])
"""
resp=get_file(*args,**kwargs)
from tempfile import NamedTemporaryFile
from shutil import copyfileobj
import spacepy.datamodel as dm
with NamedTemporaryFile() as tmpfile:
copyfileobj(resp,tmpfile)
tmpfile.seek(0)
data=dm.fromCDF(tmpfile.name)
return data