Skip to content

Commit

Permalink
[ENH] Use pregenerate figures (#96)
Browse files Browse the repository at this point in the history
* [ENH] Switch to ssh submodules

So I don't have to retype my password

* [ENH] Add a script to prerender figures

* [ENH] Make the dash app load the images

This also means the app won't work if the images are not there

* Apply suggestions from code review

Co-authored-by: Alyssa Dai <[email protected]>

* Revert "[ENH] Switch to ssh submodules"

This reverts commit bdc1979.

* More suggestions from review

* Use prerendered figures everywhere

Also use NUM_DECIMALS

* Update data

* Updating data submodule to most recent commit

* Regenerate the prerendered figures

* Make pre-gen figures executable

And run that in WF

* Update .github/workflows/update_submodule.yml

Co-authored-by: Alyssa Dai <[email protected]>

* Add Alyssa's python setup suggestion

---------

Co-authored-by: Alyssa Dai <[email protected]>
  • Loading branch information
surchs and alyssadai authored Oct 10, 2024
1 parent 6771c1a commit e2ba7d0
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 29 deletions.
18 changes: 17 additions & 1 deletion .github/workflows/update_submodule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,27 @@ jobs:
submodules: recursive
token: ${{ secrets.CLIMATE_DB_DATA_PAT }}

- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Update submodule
run: |
cd data
git switch main && git pull
git switch main
output=$(git pull)
echo "$output"
cd ..
if [[ "$output" != *"Already up to date."* ]]; then
./code/create_prerendered_figures.py
fi
- name: Create pull request
uses: peter-evans/create-pull-request@v7
Expand Down
34 changes: 18 additions & 16 deletions climate_emotions_map/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
from dash.exceptions import PreventUpdate

from . import utility as utils
from .data_loader import NATIONAL_SAMPLE_SIZE, SURVEY_DATA
from .data_loader import (
NATIONAL_SAMPLE_SIZE,
PRERENDERED_BARPLOTS,
SURVEY_DATA,
)
from .layout import MAP_LAYOUT, SINGLE_SUBQUESTION_FIG_KW, construct_layout
from .make_descriptive_plots import make_descriptive_plots
from .make_map import make_map
Expand Down Expand Up @@ -307,25 +311,23 @@ def update_stacked_bar_plots(
show_all_responses_checked,
):
"""Update the stacked bar plots for all questions based on the selected criteria."""
if show_all_responses_checked:
threshold = None
elif not show_all_responses_checked:
threshold = DEFAULT_QUESTION["outcome"]

figure_lookup_key = (
state,
is_party_stratify_checked,
threshold,
NUM_DECIMALS,
)

figures = []
for output in ctx.outputs_list:
# Example: {'id': {'question': 'q2', 'type': 'stacked-bar-plot'}, 'property': 'figure'}
question = output["id"]["question"]

if show_all_responses_checked:
threshold = None
elif not show_all_responses_checked:
threshold = DEFAULT_QUESTION["outcome"]

figure = make_stacked_bar(
question=question,
subquestion="all",
state=state,
stratify=is_party_stratify_checked,
threshold=threshold,
decimals=NUM_DECIMALS,
)
figures.append(figure)
figures.append(PRERENDERED_BARPLOTS[figure_lookup_key][question])

return figures

Expand Down
25 changes: 22 additions & 3 deletions climate_emotions_map/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@
"""

import json
import pickle as pkl
from pathlib import Path

import pandas as pd

BASE_PATH = Path(__file__).parents[1]


def load_data_file(file: str) -> pd.DataFrame:
"""Load a TSV data file into a dataframe."""
return pd.read_csv(
Path(__file__).parents[1] / "data" / "survey_results" / file,
BASE_PATH / "data" / "survey_results" / file,
sep="\t",
dtype={"question": str, "sub_question": str, "outcome": str},
)
Expand All @@ -23,7 +26,7 @@ def load_data_file(file: str) -> pd.DataFrame:
def load_data_dictionary(file: str) -> pd.DataFrame:
"""Load a data dictionary TSV into a dataframe."""
return pd.read_csv(
Path(__file__).parents[1] / "data" / "data_dictionaries" / file,
BASE_PATH / "data" / "data_dictionaries" / file,
sep="\t",
# Some data dictionaries have "None" as a meaningful value, so we have to prevent it
# from being interpreted as a NaN by pandas
Expand All @@ -32,6 +35,21 @@ def load_data_dictionary(file: str) -> pd.DataFrame:
)


def load_prerendered_figures(file: str) -> dict:
"""Load a pickle file containing a dictionary of prerendered plotly figures."""
target_file = BASE_PATH / "code/assets" / file
# Because this module always runs the loaders, even when imported by the create_prerendered_figures module
# we need to allow for the file to not exist yet when we want to run the script the first time
if not target_file.exists():
print(
"Prerendered figures not found. Run create_prerendered_figures.py to generate them."
)
return {}

print(f"Loading prerendered figures from {target_file}")
return pkl.load(target_file.open("rb"))


def remove_ignored_rows(df: pd.DataFrame) -> pd.DataFrame:
"""Remove rows from a dataframe that have a value of TRUE in the "ignore" column."""
return df[df["ignore"] == False]
Expand All @@ -40,7 +58,7 @@ def remove_ignored_rows(df: pd.DataFrame) -> pd.DataFrame:
def load_geojson_object(file: str) -> dict:
"""Load a geojson file into a dataframe."""
return json.loads(
(Path(__file__).parents[1] / "code" / "assets" / file).read_text(),
(BASE_PATH / "code" / "assets" / file).read_text(),
)


Expand Down Expand Up @@ -155,3 +173,4 @@ def get_domain_text() -> dict[str, str]:

NATIONAL_SAMPLE_SIZE = SURVEY_DATA["samplesizes_state.tsv"]["n"].sum()
GEOJSON_OBJECTS = load_geojson_objects()
PRERENDERED_BARPLOTS = load_prerendered_figures("prerendered_figures.pkl")
13 changes: 4 additions & 9 deletions climate_emotions_map/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dash import dcc, html

from . import utility as utils
from .data_loader import DATA_DICTIONARIES, DOMAIN_TEXT
from .data_loader import DATA_DICTIONARIES, DOMAIN_TEXT, PRERENDERED_BARPLOTS
from .make_descriptive_plots import make_descriptive_plots
from .make_map import make_map
from .make_stacked_bar_plots import make_stacked_bar
Expand Down Expand Up @@ -440,14 +440,9 @@ def create_bar_plots_for_question(question_id: str, subquestion_id: str):
"type": "stacked-bar-plot",
"question": question_id,
},
figure=make_stacked_bar(
question=question_id,
subquestion=subquestion_id,
state=None,
stratify=False,
threshold=DEFAULT_QUESTION["outcome"],
decimals=NUM_DECIMALS,
),
figure=PRERENDERED_BARPLOTS[
None, False, DEFAULT_QUESTION["outcome"], NUM_DECIMALS
][question_id],
config=DCC_GRAPH_CONFIG,
),
w=1200,
Expand Down
Binary file added code/assets/prerendered_figures.pkl
Binary file not shown.
65 changes: 65 additions & 0 deletions code/create_prerendered_figures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env python

import pickle as pkl
import sys
from pathlib import Path

# Hacky hacky gets the job done for the next import
sys.path.append(str(Path(__file__).parent.parent))

from climate_emotions_map.make_stacked_bar_plots import ( # noqa
DATA_DICTIONARIES,
make_stacked_bar,
)
from climate_emotions_map.utility import DEFAULT_QUESTION, NUM_DECIMALS # noqa

UNIQUE_QUESTIONS = (
DATA_DICTIONARIES["question_dictionary.tsv"]["question"].unique().tolist()
)
UNIQUE_STATES = (
DATA_DICTIONARIES["state_abbreviations.tsv"]["state"].unique().tolist()
)
OUTPUT_FILE = Path(__file__).parents[0] / "assets/prerendered_figures.pkl"


def make_full_set_of_barplots(
state=None, stratify=None, threshold=None, decimals=NUM_DECIMALS
):
"""
This returns a dictionary for all questions where keys are question IDs
and values are the plotly graph object figure for each question.
"""
return {
question: make_stacked_bar(
question, "all", state, stratify, threshold, decimals
)
for question in UNIQUE_QUESTIONS
}


def make_all_figures():
"""
Iterate through all combinations of questions and states
to create the complete set of figures.
Returns a dictionary keyed on the tuple of (state, stratified, threshold) in that order
"""
figures = {}
# A state of None means we are looking at national level questions
for state in UNIQUE_STATES + [None]:
for stratify in [False, True]:
# For state level figures, we don't stratify by party
if state is not None and stratify:
continue
for threshold in [None, DEFAULT_QUESTION["outcome"]]:
key = (state, stratify, threshold, NUM_DECIMALS)
figures[key] = make_full_set_of_barplots(*key)
return figures


if __name__ == "__main__":
figures = make_all_figures()
with OUTPUT_FILE.open("wb") as f:
pkl.dump(figures, f)

print(f"Done prerendering figures to {OUTPUT_FILE}!")

0 comments on commit e2ba7d0

Please sign in to comment.