From 5fd8223fa60a9ea4d0687ca449099d3d824c559d Mon Sep 17 00:00:00 2001 From: Erica Smith Date: Tue, 27 Oct 2020 17:02:12 +0000 Subject: [PATCH 1/4] COVID data from gov.uk API --- code/gov_data.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 code/gov_data.py diff --git a/code/gov_data.py b/code/gov_data.py new file mode 100644 index 0000000..c8adecd --- /dev/null +++ b/code/gov_data.py @@ -0,0 +1,62 @@ +# https://docs.python.org/3/library/xml.etree.elementtree.html +import xml.etree + +# https://docs.python.org/3/library/argparse.html +import argparse + +# https://docs.python.org/3/library/csv.html +import csv + +# https://docs.python.org/3/library/json.html +import json + +# https://dateutil.readthedocs.io/en/2.8.1/ +import dateutil.parser + +# https://pypi.org/project/html5lib/ +import html5lib + +# https://requests.readthedocs.io/en/master/ +import requests + +# https://www.tutorialspoint.com/matplotlib/matplotlib_bar_plot.htm +import numpy as np +import matplotlib.pyplot as plt + +from datetime import date + +from requests import get +from json import dumps + + +ENDPOINT = "https://api.coronavirus.data.gov.uk/v1/data" +AREA_TYPE = "utla" +AREA_NAME = "sheffield" + +filters = [ + f"areaType={ AREA_TYPE }", + f"areaName={ AREA_NAME }" +] + +structure = { + "date": "2020-10-11", + "cases": { + "daily": "newCasesByPublishDate", + "cumulative": "cumCasesByPublishDate" + } +} + +api_params = { + "filters": str.join(";", filters), + "structure": dumps(structure, separators=(",", ":")), + "latestBy": "newCasesByPublishDate" +} + + +response = get(ENDPOINT, params=api_params, timeout=10) + +if response.status_code >= 400: + raise RuntimeError(f'Request failed: { response.text }') + +#print(response.url) +print(response.json()) From b7ccaae4015f9db9c93b25f24314f60ebe4fceb3 Mon Sep 17 00:00:00 2001 From: Erica Smith Date: Tue, 27 Oct 2020 17:04:18 +0000 Subject: [PATCH 2/4] Add data from gov.uk API (Draft) --- code/gov_data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/code/gov_data.py b/code/gov_data.py index c8adecd..2be4d5f 100644 --- a/code/gov_data.py +++ b/code/gov_data.py @@ -1,3 +1,5 @@ +## data taken from: https://coronavirus.data.gov.uk/developers-guide + # https://docs.python.org/3/library/xml.etree.elementtree.html import xml.etree From 762b07758bc11604762ab619c0af69ea8c411abd Mon Sep 17 00:00:00 2001 From: Erica Smith Date: Fri, 30 Oct 2020 19:01:28 +0000 Subject: [PATCH 3/4] Added heatmap visualization of cases by weekday --- code/ingest.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/code/ingest.py b/code/ingest.py index c080220..165a55c 100644 --- a/code/ingest.py +++ b/code/ingest.py @@ -65,14 +65,17 @@ def main(): args = parser.parse_args() dom = fetch() - + table = extract(dom) validated = validate(table) data = transform(validated) for row in data: print(row) + heatdata = heat_transform(validated) # separate transform for functions requiring a day-of-week value + create_visualisations(data) + create_heatmap(heatdata) # Converting output to CSV or JSON based on user input if args.csv_file is not None: @@ -105,7 +108,20 @@ def transform(rows): out = [iso_date] out.extend(int(x) for x in row[1:]) result.append(out) + return sorted(result) + +def heat_transform(rows): + """ + Similar to the transform() function above, but the weekday has also been included + """ + result = [] + for row in rows: + week_day = str(dateutil.parser.parse(row[0]).weekday()) # days stored as 0-6 ints where 0 = Monday + iso_date = str(dateutil.parser.parse(row[0]).date()) + out = [week_day,iso_date] + out.extend(int(x) for x in row[1:]) + result.append(out) return sorted(result) @@ -124,6 +140,7 @@ def extract(dom): return result + def fetch(): """ Fetch the web page and return it as a parsed DOM object. @@ -217,6 +234,46 @@ def add_column_labels(bars, axes): va="bottom", ) # horizontal/vertical align +def create_heatmap(heatdata): + + heatarray = np.array(heatdata) + days = heatarray[:,0].astype(int) # Weekdays held as 0-6 ints where 0 = Monday + studentvals = heatarray[:,3].astype(int) + staffvals = heatarray[:,2].astype(int) + + # used a histogram function for this + studentsums = np.bincount(days, weights=studentvals) + staffsums = np.bincount(days, weights=staffvals) + + plotarray = np.array([studentsums,staffsums]) + + # plot implementation uses method from https://matplotlib.org/3.1.1/gallery/images_contours_and_fields/image_annotated_heatmap.html + casevals = ["Students","Staff"] + days = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"] + + fig, ax = plt.subplots() + im = ax.imshow(plotarray) + + ax.set_xticks(np.arange(len(days))) + ax.set_yticks(np.arange(len(casevals))) + + ax.set_xticklabels(days) + ax.set_yticklabels(casevals) + + plt.setp(ax.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") + + # Loop over data dimensions and create text annotations. + for i in range(len(casevals)): + for j in range(len(days)): + text = ax.text(j, i, plotarray[i, j], + ha="center", va="center", color="w") + + ax.set_title("Case rates per weekday") + fig.tight_layout() + heatmapname = str(date.today()) + "-covid-cases-by-weekday.png" + plt.savefig(heatmapname, dpi=600) + if __name__ == "__main__": main() From 2b0d9c7d3397d62df94aa4bd0ccc971bf92869c3 Mon Sep 17 00:00:00 2001 From: Erica Smith <72455650+EDSmith-shef@users.noreply.github.com> Date: Fri, 30 Oct 2020 19:09:36 +0000 Subject: [PATCH 4/4] Delete gov_data.py --- code/gov_data.py | 64 ------------------------------------------------ 1 file changed, 64 deletions(-) delete mode 100644 code/gov_data.py diff --git a/code/gov_data.py b/code/gov_data.py deleted file mode 100644 index 2be4d5f..0000000 --- a/code/gov_data.py +++ /dev/null @@ -1,64 +0,0 @@ -## data taken from: https://coronavirus.data.gov.uk/developers-guide - -# https://docs.python.org/3/library/xml.etree.elementtree.html -import xml.etree - -# https://docs.python.org/3/library/argparse.html -import argparse - -# https://docs.python.org/3/library/csv.html -import csv - -# https://docs.python.org/3/library/json.html -import json - -# https://dateutil.readthedocs.io/en/2.8.1/ -import dateutil.parser - -# https://pypi.org/project/html5lib/ -import html5lib - -# https://requests.readthedocs.io/en/master/ -import requests - -# https://www.tutorialspoint.com/matplotlib/matplotlib_bar_plot.htm -import numpy as np -import matplotlib.pyplot as plt - -from datetime import date - -from requests import get -from json import dumps - - -ENDPOINT = "https://api.coronavirus.data.gov.uk/v1/data" -AREA_TYPE = "utla" -AREA_NAME = "sheffield" - -filters = [ - f"areaType={ AREA_TYPE }", - f"areaName={ AREA_NAME }" -] - -structure = { - "date": "2020-10-11", - "cases": { - "daily": "newCasesByPublishDate", - "cumulative": "cumCasesByPublishDate" - } -} - -api_params = { - "filters": str.join(";", filters), - "structure": dumps(structure, separators=(",", ":")), - "latestBy": "newCasesByPublishDate" -} - - -response = get(ENDPOINT, params=api_params, timeout=10) - -if response.status_code >= 400: - raise RuntimeError(f'Request failed: { response.text }') - -#print(response.url) -print(response.json())