From e5be0f2e1a94fa781597a55ae73daaf749bd883b Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Tue, 31 Oct 2023 20:57:40 +0000 Subject: [PATCH] Slimed down new commit --- .devcontainer/devcontainer.json | 51 ++++ .devcontainer/initializeCommand | 2 + .devcontainer/initializeCommand.cmd | 1 + .devcontainer/initializeCommand.ps1 | 7 + .devcontainer/postCreateCommand | 8 + .env-example | 1 + .gitattributes | 9 + .github/workflows/build_and_publish.yml | 99 ++++++ .github/workflows/test.yml | 31 ++ .gitignore | 12 + .gitmodules | 6 + .vscode/launch.json | 15 + .vscode/settings.json | 46 +++ Dockerfile | 36 +++ Dockerfile.dev | 15 + LICENSE | 21 ++ data/.gitinclude | 0 data/interim/.gitinclude | 0 .../public_whip_data/datapackage.yaml | 277 +++++++++++++++++ .../pw_division.resource.yaml | 107 +++++++ .../pw_dyn_dreammp.resource.yaml | 51 ++++ .../pw_dyn_dreamvote.resource.yaml | 54 ++++ .../pw_dyn_wiki_motion.resource.yaml | 90 ++++++ .../public_whip_data/pw_moffice.resource.yaml | 59 ++++ .../public_whip_data/pw_mp.resource.yaml | 106 +++++++ .../public_whip_data/pw_vote.resource.yaml | 40 +++ docker-compose.yml | 9 + docs/.gitinclude | 0 docs/Gemfile | 46 +++ docs/Gemfile.lock | 274 +++++++++++++++++ docs/data.json | 3 + docs/index.md | 8 + docs/sass/_bootstrap-compat.scss | 237 ++++++++++++++ docs/sass/_header.scss | 64 ++++ docs/sass/mysoc.scss | 44 +++ notebooks/_render_config/default.yaml | 16 + notebooks/example.ipynb | 274 +++++++++++++++++ pyproject.toml | 34 +++ readme.md | 12 + script/server | 4 + script/setup | 2 + script/test | 29 ++ script/update-from-template | 17 ++ src/publicwhip_data/__init__.py | 0 src/publicwhip_data/__main__.py | 20 ++ src/publicwhip_data/mysql2sqlite | 289 ++++++++++++++++++ src/publicwhip_data/process.py | 143 +++++++++ tests/test_publicwhip_data.py | 7 + 48 files changed, 2676 insertions(+) create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/initializeCommand create mode 100644 .devcontainer/initializeCommand.cmd create mode 100644 .devcontainer/initializeCommand.ps1 create mode 100644 .devcontainer/postCreateCommand create mode 100644 .env-example create mode 100644 .gitattributes create mode 100644 .github/workflows/build_and_publish.yml create mode 100644 .github/workflows/test.yml create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100644 Dockerfile create mode 100644 Dockerfile.dev create mode 100644 LICENSE create mode 100644 data/.gitinclude create mode 100644 data/interim/.gitinclude create mode 100644 data/packages/public_whip_data/datapackage.yaml create mode 100644 data/packages/public_whip_data/pw_division.resource.yaml create mode 100644 data/packages/public_whip_data/pw_dyn_dreammp.resource.yaml create mode 100644 data/packages/public_whip_data/pw_dyn_dreamvote.resource.yaml create mode 100644 data/packages/public_whip_data/pw_dyn_wiki_motion.resource.yaml create mode 100644 data/packages/public_whip_data/pw_moffice.resource.yaml create mode 100644 data/packages/public_whip_data/pw_mp.resource.yaml create mode 100644 data/packages/public_whip_data/pw_vote.resource.yaml create mode 100644 docker-compose.yml create mode 100644 docs/.gitinclude create mode 100644 docs/Gemfile create mode 100644 docs/Gemfile.lock create mode 100644 docs/data.json create mode 100644 docs/index.md create mode 100644 docs/sass/_bootstrap-compat.scss create mode 100644 docs/sass/_header.scss create mode 100644 docs/sass/mysoc.scss create mode 100644 notebooks/_render_config/default.yaml create mode 100644 notebooks/example.ipynb create mode 100644 pyproject.toml create mode 100644 readme.md create mode 100644 script/server create mode 100644 script/setup create mode 100644 script/test create mode 100644 script/update-from-template create mode 100644 src/publicwhip_data/__init__.py create mode 100644 src/publicwhip_data/__main__.py create mode 100644 src/publicwhip_data/mysql2sqlite create mode 100644 src/publicwhip_data/process.py create mode 100644 tests/test_publicwhip_data.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..467de222 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,51 @@ +{ + "name": "mysoc_notebook", + "dockerComposeFile": "../docker-compose.yml", + "service": "app", + "overrideCommand": true, + "initializeCommand": [ + ".devcontainer/initializeCommand" + ], + "postCreateCommand": ".devcontainer/postCreateCommand", + "workspaceFolder": "/workspaces/publicwhip_data", + "extensions": [ + "ms-vscode.test-adapter-converter", + "bungcip.better-toml", + "ms-python.python", + "ms-python.vscode-pylance", + "ms-azuretools.vscode-docker", + "valentjn.vscode-ltex" + ], + "customizations": { + "codespaces": { + "repositories": { + "mysociety/data_common": { + "permissions": { + "contents": "write", + "actions": "write", + "deployments": "write", + "issues": "write", + "packages": "read", + "pull_requests": "write", + "repository_projects": "write", + "statuses": "write", + "workflows": "write" + } + }, + "mysociety/mysociety-docs-theme": { + "permissions": { + "contents": "write", + "actions": "write", + "deployments": "write", + "issues": "write", + "packages": "read", + "pull_requests": "write", + "repository_projects": "write", + "statuses": "write", + "workflows": "write" + } + } + } + } + } +} \ No newline at end of file diff --git a/.devcontainer/initializeCommand b/.devcontainer/initializeCommand new file mode 100644 index 00000000..8e72f729 --- /dev/null +++ b/.devcontainer/initializeCommand @@ -0,0 +1,2 @@ +#!/bin/sh +[ ! -d "src/data_common/src" ] && git submodule update --init || echo "Already exists" diff --git a/.devcontainer/initializeCommand.cmd b/.devcontainer/initializeCommand.cmd new file mode 100644 index 00000000..784fa880 --- /dev/null +++ b/.devcontainer/initializeCommand.cmd @@ -0,0 +1 @@ +powershell .devcontainer/initializeCommand.ps1 \ No newline at end of file diff --git a/.devcontainer/initializeCommand.ps1 b/.devcontainer/initializeCommand.ps1 new file mode 100644 index 00000000..82e005a9 --- /dev/null +++ b/.devcontainer/initializeCommand.ps1 @@ -0,0 +1,7 @@ +$Folder = 'src/data_common/src' +"Test to see if folder [$Folder] exists" +if (Test-Path -Path $Folder) { + echo "Submodule already exists" +} else { + git submodule update --init +} \ No newline at end of file diff --git a/.devcontainer/postCreateCommand b/.devcontainer/postCreateCommand new file mode 100644 index 00000000..a9f6c5a0 --- /dev/null +++ b/.devcontainer/postCreateCommand @@ -0,0 +1,8 @@ +#!/bin/bash + +echo "get submodule to appear as main rather than latest comment" +cd src/data_common +git checkout main +cd ../.. +cd docs/theme +git checkout main \ No newline at end of file diff --git a/.env-example b/.env-example new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/.env-example @@ -0,0 +1 @@ + diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..922ce3a4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,9 @@ +* text eol=lf +*.png binary +*.jpg binary +*.sqlite3 binary +*.sqlite binary +*.xlsx binary +*.xls binary +*.pdf binary +*.parquet binary \ No newline at end of file diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml new file mode 100644 index 00000000..e409040f --- /dev/null +++ b/.github/workflows/build_and_publish.yml @@ -0,0 +1,99 @@ +name: Build datasets and publish + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + pages: write + id-token: write + +on: + push: + branches: ["main"] + workflow_dispatch: + schedule: + - cron : "0 8 * * *" + + +jobs: + + build: + runs-on: ubuntu-latest + steps: + + - name: Checkout repo content + uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Run tests and try and build project + uses: mysociety/run-in-devcontainer@v1 + with: + run: | + export PATH="/root/.local/bin:$PATH" + script/test + dataset build --all + dataset version auto --auto-ban major --all --publish + dataset publish --all + + + - name: Push new data + id: auto-commit-action + uses: stefanzweifel/git-auto-commit-action@v4 + with: + commit_message: "Update repo data based on source changes" + + - name: Send GitHub Action trigger data to Slack workflow + id: slack + if: steps.auto-commit-action.outputs.changes_detected == 'true' + uses: slackapi/slack-github-action@v1.19.0 + with: + payload: | + { + "repo_url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + } + env: + SLACK_WEBHOOK_URL: ${{ secrets.HAPPY_DATABOT_SLACK_WEBHOOK }} + + - name: Setup Pages + uses: actions/configure-pages@v1 + + - name: Build with Jekyll + uses: actions/jekyll-build-pages@v1 + with: + source: docs + destination: docs/_site + + - name: Upload artifact + uses: actions/upload-pages-artifact@v1 + with: + path: docs/_site + + - name: Send GitHub Action trigger data to Slack workflow (if failed) + if: ${{ failure() }} + id: slack-failed + uses: slackapi/slack-github-action@v1.19.0 + with: + payload: | + { + "repo_url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + } + env: + SLACK_WEBHOOK_URL: ${{ secrets.SAD_DATABOT_SLACK_WEBHOOK }} + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v1 + + - uses: geekyeggo/delete-artifact@v1 + with: + name: github-pages \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..aef544a0 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,31 @@ +name: Run project tests + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-tests + cancel-in-progress: true + +on: + push: + branches-ignore: ["main"] + pull_request: + workflow_dispatch: + workflow_call: + +jobs: + tests: + runs-on: ubuntu-latest + steps: + + - name: checkout repo content + uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Run project tests + uses: mysociety/run-in-devcontainer@v1 + with: + run: | + export PATH="/root/.local/bin:$PATH" + script/test + dataset build --all + dataset version auto --auto-ban major --all \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..d3fae294 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +*.pyc +secrets.yaml +.env +data/private/* +_render/_parts +_render/_papermills +docs/_site +data/raw +docs/_site +docs/data +docs/_* +*.parquet \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..789eee18 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "src/data_common"] + path = src/data_common + url = https://github.com/mysociety/data_common +[submodule "docs/theme"] + path = docs/theme + url = https://github.com/mysociety/mysociety-docs-theme/ diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..58a21d01 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + }, + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..88a27e8b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,46 @@ +{ + "python.linting.pylintEnabled": true, + "python.defaultInterpreterPath": "/usr/local/bin/python", + "python.terminal.activateEnvironment": false, + "python.formatting.provider": "black", + "python.analysis.typeCheckingMode": "basic", + "python.analysis.stubPath": "src/data_common/typing", + "editor.formatOnSave": true, + "files.exclude": { + "**/.git": true, + "**/.svn": true, + "**/.hg": true, + "**/CVS": true, + "**/.DS_Store": true, + "**/*.pyc": { + "when": "$(basename).py" + }, + "**/__pycache__": true + }, + "files.associations": { + "**/*.html": "html", + "**/templates/**/*.html": "django-html", + "**/templates/**/*": "django-txt", + "**/requirements{/**,*}.{txt,in}": "pip-requirements" + }, + "python.linting.pylintArgs": [ + "--max-line-length=88", + "--disable=C0103,E1101,W5101,E1123,E501,E203", + "--load-plugins=pylint_django" + ], + "jupyter.jupyterServerType": "local", + "ltex.language": "en-GB", + "ltex.ltex-ls.path": "/ltex/ltex-ls-15.2.0/", + "[markdown]": { + "editor.quickSuggestions": { + "comments": "on", + "strings": "on", + "other": "on" + } + }, + "python.testing.pytestArgs": [ + "tests/" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..a22ee15c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +# This dockerfile is used by binder. + +FROM ghcr.io/mysociety/data_common:sha-54e0768 + +# Make an empty project directory so the 'self' setup doesn't fail and scripts can be setup +# Override the .pth created at previous stages to point to where the working directory will land +COPY pyproject.toml poetry.loc[k] /setup/ +COPY src/data_common/pyproject.toml src/data_common/poetry.loc[k] /setup/src/data_common/ +RUN mkdir /setup/src/publicwhip_data \ + && touch /setup/src/publicwhip_data/__init__.py \ + && mkdir --parents /setup/src/data_common/src/data_common \ + && touch /setup/src/data_common/src/data_common/__init__.py \ + && export PATH="/root/.local/bin:$PATH" \ + && cd /setup/ && poetry install \ + && echo "/workspaces/publicwhip_data/src/" > /usr/local/lib/python3.10/site-packages/publicwhip_data.pth \ + && echo "/workspaces/publicwhip_data/src/data_common/src" > /usr/local/lib/python3.10/site-packages/data_common.pth + +# special binder instructions + +RUN pip install --no-cache-dir notebook + +ARG NB_USER=jovyan +ARG NB_UID=1000 +ENV USER ${NB_USER} +ENV NB_UID ${NB_UID} +ENV HOME /home/${NB_USER} + +RUN adduser --disabled-password \ + --gecos "Default user" \ + --uid ${NB_UID} \ + ${NB_USER} + +COPY . ${HOME} +USER root +RUN chown -R ${NB_UID} ${HOME} +USER ${NB_USER} \ No newline at end of file diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 00000000..2397ab7d --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1,15 @@ +FROM ghcr.io/mysociety/data_common:sha-d6eabdb + +# Make an empty project directory so the 'self' setup doesn't fail and scripts can be setup +# Override the .pth created at previous stages to point to where the working directory will land +COPY pyproject.toml poetry.loc[k] /setup/ +COPY src/data_common/pyproject.toml src/data_common/poetry.loc[k] /setup/src/data_common/ +ENV WORKSPACE_NAME publicwhip-data +RUN mkdir /setup/src/$WORKSPACE_NAME \ + && touch /setup/src/$WORKSPACE_NAME/__init__.py \ + && mkdir --parents /setup/src/data_common/src/data_common \ + && touch /setup/src/data_common/src/data_common/__init__.py \ + && export PATH="/root/.local/bin:$PATH" \ + && cd /setup/ && poetry install \ + && echo "/workspaces/$WORKSPACE_NAME/src/" > /usr/local/lib/python3.10/site-packages/$WORKSPACE_NAME.pth \ + && echo "/workspaces/$WORKSPACE_NAME/src/data_common/src" > /usr/local/lib/python3.10/site-packages/data_common.pth \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..fc80d7fa --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 mySociety + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/data/.gitinclude b/data/.gitinclude new file mode 100644 index 00000000..e69de29b diff --git a/data/interim/.gitinclude b/data/interim/.gitinclude new file mode 100644 index 00000000..e69de29b diff --git a/data/packages/public_whip_data/datapackage.yaml b/data/packages/public_whip_data/datapackage.yaml new file mode 100644 index 00000000..6fe38e34 --- /dev/null +++ b/data/packages/public_whip_data/datapackage.yaml @@ -0,0 +1,277 @@ +name: public_whip_data +title: Public Whip data +description: "Reprocessed data dump from publicwhip.org.uk\n" +version: 0.1.0 +licenses: +- name: CC-BY-4.0 + path: https://creativecommons.org/licenses/by/4.0/ + title: Creative Commons Attribution 4.0 International License +contributors: +- title: mySociety + path: https://mysociety.org + role: author +custom: + build: publicwhip_data.process:fetch_and_move_pw + tests: + - test_public_whip_data + dataset_order: 0 + download_options: + gate: default + survey: default + header_text: default + formats: + csv: false + parquet: true + composite: + xlsx: + include: all + exclude: none + render: false + sqlite: + include: all + exclude: none + render: false + json: + include: all + exclude: none + render: false + change_log: + 0.1.0: '' + 0.2.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.3.0: 'Change in data for resource(s): pw_vote' + 0.4.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.5.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.5.1: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.6.0: 'Change in data for resource(s): pw_moffice,pw_mp,pw_vote' + 0.6.1: 'Minor change in data for resource(s): pw_moffice' + 0.6.2: 'Minor change in data for resource(s): pw_moffice' + 0.7.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.8.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.8.1: 'Minor change in data for resource(s): pw_moffice' + 0.9.0: 'Change in data for resource(s): pw_moffice,pw_vote' + 0.9.1: 'Minor change in data for resource(s): pw_moffice' + 0.9.2: 'Minor change in data for resource(s): pw_moffice' + 0.9.3: 'Minor change in data for resource(s): pw_moffice' + 0.9.4: 'Minor change in data for resource(s): pw_moffice' + 0.10.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.10.1: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.10.2: 'Minor change in data for resource(s): pw_moffice' + 0.10.3: 'Minor change in data for resource(s): pw_moffice' + 0.10.4: 'Minor change in data for resource(s): pw_moffice' + 0.10.5: 'Minor change in data for resource(s): pw_moffice' + 0.10.6: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.10.7: 'Minor change in data for resource(s): pw_moffice' + 0.11.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.11.1: 'Minor change in data for resource(s): pw_moffice' + 0.11.2: 'Minor change in data for resource(s): pw_moffice' + 0.11.3: 'Minor change in data for resource(s): pw_moffice' + 0.12.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.13.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.14.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.15.0: 'Change in data for resource(s): pw_vote' + 0.15.1: 'Minor change in data for resource(s): pw_moffice' + 0.15.2: 'Minor change in data for resource(s): pw_moffice' + 0.15.3: 'Minor change in data for resource(s): pw_moffice' + 0.15.4: 'Minor change in data for resource(s): pw_moffice' + 0.15.5: 'Minor change in data for resource(s): pw_moffice' + 0.15.6: 'Minor change in data for resource(s): pw_moffice' + 0.15.7: 'Minor change in data for resource(s): pw_moffice' + 0.15.8: 'Minor change in data for resource(s): pw_moffice' + 0.15.9: 'Minor change in data for resource(s): pw_moffice' + 0.15.10: 'Minor change in data for resource(s): pw_moffice' + 0.15.11: 'Minor change in data for resource(s): pw_moffice' + 0.15.12: 'Minor change in data for resource(s): pw_moffice' + 0.15.13: 'Minor change in data for resource(s): pw_moffice' + 0.15.14: 'Minor change in data for resource(s): pw_moffice' + 0.15.15: 'Minor change in data for resource(s): pw_moffice' + 0.15.16: 'Minor change in data for resource(s): pw_moffice' + 0.15.17: 'Minor change in data for resource(s): pw_moffice' + 0.15.18: 'Minor change in data for resource(s): pw_moffice' + 0.16.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.17.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.17.1: 'Minor change in data for resource(s): pw_moffice' + 0.17.2: 'Minor change in data for resource(s): pw_moffice' + 0.17.3: 'Minor change in data for resource(s): pw_moffice' + 0.18.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.19.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.20.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.21.0: 'Change in data for resource(s): pw_vote' + 0.21.1: 'Minor change in data for resource(s): pw_moffice' + 0.21.2: 'Minor change in data for resource(s): pw_moffice' + 0.21.3: 'Minor change in data for resource(s): pw_moffice' + 0.21.4: 'Minor change in data for resource(s): pw_moffice' + 0.22.0: 'Change in data for resource(s): pw_vote' + 0.22.1: 'Minor change in data for resource(s): pw_moffice' + 0.23.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.23.1: 'Minor change in data for resource(s): pw_moffice' + 0.23.2: 'Minor change in data for resource(s): pw_moffice' + 0.23.3: 'Minor change in data for resource(s): pw_moffice' + 0.23.4: 'Minor change in data for resource(s): pw_moffice' + 0.23.5: 'Minor change in data for resource(s): pw_moffice' + 0.23.6: 'Minor change in data for resource(s): pw_moffice' + 0.24.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.24.1: 'Minor change in data for resource(s): pw_moffice' + 0.24.2: 'Minor change in data for resource(s): pw_moffice' + 0.25.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.26.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.27.0: 'Change in data for resource(s): pw_vote' + 0.28.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.28.1: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.28.2: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.28.3: 'Minor change in data for resource(s): pw_moffice' + 0.28.4: 'Minor change in data for resource(s): pw_moffice' + 0.29.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.30.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.30.1: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.31.0: 'Change in data for resource(s): pw_vote' + 0.31.1: 'Minor change in data for resource(s): pw_moffice' + 0.31.2: 'Minor change in data for resource(s): pw_moffice' + 0.31.3: 'Minor change in data for resource(s): pw_moffice' + 0.31.4: 'Minor change in data for resource(s): pw_moffice' + 0.31.5: 'Minor change in data for resource(s): pw_moffice' + 0.31.6: 'Minor change in data for resource(s): pw_moffice' + 0.31.7: 'Minor change in data for resource(s): pw_moffice' + 0.31.8: 'Minor change in data for resource(s): pw_moffice' + 0.32.0: 'Change in data for resource(s): pw_vote' + 0.33.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.34.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.35.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.35.1: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.35.2: 'Minor change in data for resource(s): pw_moffice' + 0.35.3: 'Minor change in data for resource(s): pw_moffice' + 0.36.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.37.0: 'Change in data for resource(s): pw_vote' + 0.38.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.38.1: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.38.2: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.38.3: 'Minor change in data for resource(s): pw_moffice' + 0.38.4: 'Minor change in data for resource(s): pw_moffice' + 0.39.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.40.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.40.1: 'Minor change in data for resource(s): pw_moffice' + 0.41.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.41.1: 'Minor change in data for resource(s): pw_moffice' + 0.41.2: 'Minor change in data for resource(s): pw_moffice' + 0.41.3: 'Minor change in data for resource(s): pw_moffice' + 0.41.4: 'Minor change in data for resource(s): pw_moffice' + 0.42.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.43.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.43.1: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.43.2: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.43.3: 'Minor change in data for resource(s): pw_moffice' + 0.43.4: 'Minor change in data for resource(s): pw_moffice' + 0.44.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.45.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.46.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.46.1: 'Minor change in data for resource(s): pw_moffice' + 0.46.2: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.46.3: 'Minor change in data for resource(s): pw_moffice' + 0.46.4: 'Minor change in data for resource(s): pw_moffice' + 0.46.5: 'Minor change in data for resource(s): pw_moffice' + 0.47.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.48.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.49.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.49.1: 'Minor change in data for resource(s): pw_moffice' + 0.49.2: 'Minor change in data for resource(s): pw_moffice' + 0.49.3: 'Minor change in data for resource(s): pw_moffice' + 0.50.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.51.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.52.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.52.1: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.52.2: 'Minor change in data for resource(s): pw_moffice' + 0.52.3: 'Minor change in data for resource(s): pw_moffice' + 0.52.4: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.52.5: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.52.6: 'Minor change in data for resource(s): pw_moffice' + 0.52.7: 'Minor change in data for resource(s): pw_moffice' + 0.52.8: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.52.9: 'Minor change in data for resource(s): pw_moffice' + 0.52.10: 'Minor change in data for resource(s): pw_moffice' + 0.52.11: 'Minor change in data for resource(s): pw_moffice' + 0.52.12: 'Minor change in data for resource(s): pw_moffice' + 0.52.13: 'Minor change in data for resource(s): pw_moffice' + 0.52.14: 'Minor change in data for resource(s): pw_moffice' + 0.52.15: 'Minor change in data for resource(s): pw_moffice' + 0.52.16: 'Minor change in data for resource(s): pw_moffice' + 0.52.17: 'Minor change in data for resource(s): pw_moffice' + 0.52.18: 'Minor change in data for resource(s): pw_moffice' + 0.52.19: 'Minor change in data for resource(s): pw_moffice' + 0.52.20: 'Minor change in data for resource(s): pw_moffice' + 0.52.21: 'Minor change in data for resource(s): pw_moffice' + 0.52.22: 'Minor change in data for resource(s): pw_moffice' + 0.52.23: 'Minor change in data for resource(s): pw_moffice' + 0.52.24: 'Minor change in data for resource(s): pw_moffice' + 0.52.25: 'Minor change in data for resource(s): pw_moffice' + 0.52.26: 'Minor change in data for resource(s): pw_moffice' + 0.52.27: 'Minor change in data for resource(s): pw_moffice' + 0.52.28: 'Minor change in data for resource(s): pw_moffice' + 0.52.29: 'Minor change in data for resource(s): pw_moffice' + 0.52.30: 'Minor change in data for resource(s): pw_moffice' + 0.52.31: 'Minor change in data for resource(s): pw_moffice' + 0.52.32: 'Minor change in data for resource(s): pw_moffice' + 0.52.33: 'Minor change in data for resource(s): pw_moffice' + 0.52.34: 'Minor change in data for resource(s): pw_moffice' + 0.52.35: 'Minor change in data for resource(s): pw_moffice' + 0.52.36: 'Minor change in data for resource(s): pw_moffice' + 0.52.37: 'Minor change in data for resource(s): pw_moffice' + 0.52.38: 'Minor change in data for resource(s): pw_moffice' + 0.52.39: 'Minor change in data for resource(s): pw_moffice' + 0.52.40: 'Minor change in data for resource(s): pw_moffice' + 0.52.41: 'Minor change in data for resource(s): pw_moffice' + 0.52.42: 'Minor change in data for resource(s): pw_moffice' + 0.52.43: 'Minor change in data for resource(s): pw_moffice' + 0.53.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.54.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.55.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.56.0: 'Change in data for resource(s): pw_vote' + 0.56.1: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.56.2: 'Minor change in data for resource(s): pw_moffice' + 0.56.3: 'Minor change in data for resource(s): pw_moffice' + 0.57.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.58.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.59.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.60.0: 'Change in data for resource(s): pw_vote' + 0.60.1: 'Minor change in data for resource(s): pw_moffice' + 0.60.2: 'Minor change in data for resource(s): pw_moffice' + 0.60.3: 'Minor change in data for resource(s): pw_moffice' + 0.61.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.61.1: 'Minor change in data for resource(s): pw_moffice' + 0.61.2: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.61.3: 'Minor change in data for resource(s): pw_moffice' + 0.61.4: 'Minor change in data for resource(s): pw_moffice' + 0.61.5: 'Minor change in data for resource(s): pw_moffice' + 0.61.6: 'Minor change in data for resource(s): pw_moffice' + 0.61.7: 'Minor change in data for resource(s): pw_moffice' + 0.61.8: 'Minor change in data for resource(s): pw_moffice' + 0.61.9: 'Minor change in data for resource(s): pw_moffice' + 0.61.10: 'Minor change in data for resource(s): pw_moffice' + 0.61.11: 'Minor change in data for resource(s): pw_moffice' + 0.61.12: 'Minor change in data for resource(s): pw_moffice' + 0.61.13: 'Minor change in data for resource(s): pw_moffice' + 0.61.14: 'Minor change in data for resource(s): pw_moffice' + 0.61.15: 'Minor change in data for resource(s): pw_moffice' + 0.61.16: 'Minor change in data for resource(s): pw_moffice' + 0.61.17: 'Minor change in data for resource(s): pw_moffice' + 0.61.18: 'Minor change in data for resource(s): pw_moffice' + 0.61.19: 'Minor change in data for resource(s): pw_moffice' + 0.61.20: 'Minor change in data for resource(s): pw_moffice' + 0.61.21: 'Minor change in data for resource(s): pw_moffice' + 0.61.22: 'Minor change in data for resource(s): pw_moffice' + 0.61.23: 'Minor change in data for resource(s): pw_moffice' + 0.61.24: 'Minor change in data for resource(s): pw_moffice' + 0.61.25: 'Minor change in data for resource(s): pw_moffice' + 0.61.26: 'Minor change in data for resource(s): pw_moffice' + 0.61.27: 'Minor change in data for resource(s): pw_moffice' + 0.61.28: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.62.0: 'Change in data for resource(s): pw_division,pw_vote' + 0.63.0: 'Change in data for resource(s): pw_vote' + 0.63.1: 'Minor change in data for resource(s): pw_moffice' + 0.63.2: 'Minor change in data for resource(s): pw_moffice' + 0.63.3: 'Minor change in data for resource(s): pw_moffice' + 0.63.4: 'Minor change in data for resource(s): pw_moffice' + 0.63.5: 'Minor change in data for resource(s): pw_division,pw_moffice' + 0.63.6: 'Minor change in data for resource(s): pw_moffice' + 0.63.7: 'Minor change in data for resource(s): pw_moffice' + 0.63.8: 'Minor change in data for resource(s): pw_moffice' + 0.63.9: 'Minor change in data for resource(s): pw_moffice' + 0.63.10: 'Minor change in data for resource(s): pw_moffice' diff --git a/data/packages/public_whip_data/pw_division.resource.yaml b/data/packages/public_whip_data/pw_division.resource.yaml new file mode 100644 index 00000000..d1b5200b --- /dev/null +++ b/data/packages/public_whip_data/pw_division.resource.yaml @@ -0,0 +1,107 @@ +title: Divisions +description: Division data table +custom: + row_count: 12958 +path: pw_division.parquet +name: pw_division +profile: data-resource +scheme: file +format: parquet +hashing: md5 +encoding: utf-8 +schema: + fields: + - name: division_id + type: integer + description: ID of division + constraints: + unique: true + example: 10387 + - name: valid + type: integer + description: Validity of division (Always true) + constraints: + unique: false + enum: + - 1 + example: 1 + - name: division_date + type: string + description: Date of division + constraints: + unique: false + example: '1997-05-19' + - name: division_number + type: integer + description: Division number + constraints: + unique: false + example: 0 + - name: division_name + type: string + description: Division name + constraints: + unique: false + example: '"Duration of planning permission and consent' + - name: source_url + type: string + description: URL of source + constraints: + unique: false + example: '' + - name: motion + type: string + description: HTML text of motion + constraints: + unique: false + example: "

I beg to move amendment\ + \ No. 22, in page 3, line 20, at end insert—

\n\n

'(6A) In subsection (5) (motion for resolution\ + \ not to be moved unless certain conditions are satisfied), for \"Presiding\ + \ Officer in pursuance of a notice\" there is substituted \"Secretary of State\"\ + .'.

\n\n

Motion made, and Question\ + \ put, That the clause stand part of the Bill:—

\n\n

The Committee divided: Ayes 338, Noes 8.

" + - name: notes + type: string + description: Text of any associated database notes + constraints: + unique: false + enum: + - '' + example: '' + - name: debate_url + type: string + description: URL of debate + constraints: + unique: false + example: '' + - name: source_gid + type: string + description: GID of source + constraints: + unique: false + example: '' + - name: debate_gid + type: string + description: GID of debate + constraints: + unique: false + example: '' + - name: house + type: string + description: Which house the division was in + constraints: + unique: false + enum: + - commons + - lords + - scotland + example: commons + - name: clock_time + type: string + description: Time of division + constraints: + unique: false + example: '' +hash: 8b5cf7b11d26e443faf3d7369b446d61 diff --git a/data/packages/public_whip_data/pw_dyn_dreammp.resource.yaml b/data/packages/public_whip_data/pw_dyn_dreammp.resource.yaml new file mode 100644 index 00000000..09f68e53 --- /dev/null +++ b/data/packages/public_whip_data/pw_dyn_dreammp.resource.yaml @@ -0,0 +1,51 @@ +title: Dream MP data table +description: Datatable of 'dream MPs', user created to compare actual MPs to (powers + TWFY policy lines) +custom: + row_count: 6975 +path: pw_dyn_dreammp.parquet +name: pw_dyn_dreammp +profile: data-resource +scheme: file +format: parquet +hashing: md5 +encoding: utf-8 +schema: + fields: + - name: dream_id + type: integer + description: ID of dream MP + constraints: + unique: true + example: 1 + - name: name + type: string + description: Name of dream MP + constraints: + unique: false + example: ' unofficial Nationalist party' + - name: user_id + type: integer + description: ID of user who created dream MP + constraints: + unique: false + example: 1 + - name: description + type: string + description: Description of dream MP + constraints: + unique: false + example: "\r\nDefinition: This new left of centre party stands for Equality, Social\ + \ Justice, Trade Unionism, Solidarity, Internationalism and Progressivism. \r\ + \n" + - name: private + type: integer + description: Whether dream MP is private or visible on site + constraints: + unique: false + enum: + - 1 + - 2 + - 0 + example: 0 +hash: 22a9374b9f4b089a2ee0e606fbd0a89d diff --git a/data/packages/public_whip_data/pw_dyn_dreamvote.resource.yaml b/data/packages/public_whip_data/pw_dyn_dreamvote.resource.yaml new file mode 100644 index 00000000..f7e17b5a --- /dev/null +++ b/data/packages/public_whip_data/pw_dyn_dreamvote.resource.yaml @@ -0,0 +1,54 @@ +title: Dream vote data table +description: Connection of vote alignment to dream MP +custom: + row_count: 14829 +path: pw_dyn_dreamvote.parquet +name: pw_dyn_dreamvote +profile: data-resource +scheme: file +format: parquet +hashing: md5 +encoding: utf-8 +schema: + fields: + - name: division_date + type: string + description: Date of division + constraints: + unique: false + example: '1997-05-19' + - name: division_number + type: integer + description: Division number + constraints: + unique: false + example: 0 + - name: dream_id + type: integer + description: ID of dream MP + constraints: + unique: false + example: 1 + - name: vote + type: string + description: Vote of MP + constraints: + unique: false + enum: + - no3 + - both + - aye + - 'no' + - aye3 + example: aye + - name: house + type: string + description: House of MP + constraints: + unique: false + enum: + - commons + - lords + - scotland + example: commons +hash: 46cbcf912a0a3f3a55ec986ec00b4ad2 diff --git a/data/packages/public_whip_data/pw_dyn_wiki_motion.resource.yaml b/data/packages/public_whip_data/pw_dyn_wiki_motion.resource.yaml new file mode 100644 index 00000000..7ecb30c7 --- /dev/null +++ b/data/packages/public_whip_data/pw_dyn_wiki_motion.resource.yaml @@ -0,0 +1,90 @@ +title: Motion data table +description: Datatable of motions edited in wiki +custom: + row_count: 10603 +path: pw_dyn_wiki_motion.parquet +name: pw_dyn_wiki_motion +profile: data-resource +scheme: file +format: parquet +hashing: md5 +encoding: utf-8 +schema: + fields: + - name: wiki_id + type: integer + description: ID of wiki motion + constraints: + unique: true + example: 1 + - name: text_body + type: string + description: Text of motion + constraints: + unique: false + example: "\r\n\r\n--- MOTION EFFECT ---\r\n\r\nThe Aye-voters set to the timetable\ + \ for the Standing Committee to complete its scrutiny of the Road Safety Bill by Thursday 3rd February 2005.\r\n\r\nOnce it returns\ + \ to the House and its amendments have been accepted, there is a Third Reading\ + \ Debate, after which the Bill is sent to the House of Lords before it becomes\ + \ law.\r\n\r\n\ + \ Detailed documentation on this bill can be found at the Department for Transport.\ + \ \r\n\r\n\r\n--- COMMENTS AND NOTES ---\r\n\r\n

Motion\ + \ made, and Question put forthwith, pursuant to Standing Order No. 83A(6),

\r\ + \n\r\n

That the following provisions shall apply to the Road\ + \ Safety Bill:

\r\n\r\n

Committal

\r\n\r\n1. The Bill shall be committed to a Standing Committee.

\r\ + \n\r\n

Proceedings in Standing Committee

\r\n\r\n\ +

2. Proceedings in the Standing Committee shall (so far as\ + \ not previously concluded) be brought to a conclusion on Thursday 3rd February\ + \ 2005.

\r\n\r\n

3. The Standing Committee shall have\ + \ leave to sit twice on the first day on which it meets.

\r\n\r\n

Consideration and Third Reading

\r\n\r\n

4. Proceedings on consideration shall (so far as not previously concluded)\ + \ be brought to a conclusion one hour before the moment of interruption on the\ + \ day on which those proceedings are commenced.

\r\n\r\n

5. Proceedings on Third Reading shall (so far as not previously concluded)\ + \ be brought to a conclusion at the moment of interruption on that day.

\r\ + \n\r\n

Programming Committee

\r\n\r\n

6. Standing Order No. 83B (Programming committees) shall not apply\ + \ to proceedings on consideration and Third Reading.

\r\n\r\n

Other proceedings

\r\n\r\n

7. Any other\ + \ proceedings on the Bill (including any proceedings on consideration of Lords\ + \ Amendments or on any further messages from the Lords) may be programmed. —[Mr.\ + \ Watson.]

\r\n\r\n

The House divided: Ayes 330, Noes\ + \ 161.

\r\n\r\n" + - name: user_id + type: integer + description: ID of user who edited the motion + constraints: + unique: false + example: 1 + - name: edit_date + type: string + description: Date of edit + constraints: + unique: false + example: '2005-01-15 15:09:24' + - name: division_date + type: string + description: Date of division + constraints: + unique: false + example: '1997-05-19' + - name: division_number + type: integer + description: Number of division + constraints: + unique: false + example: 1 + - name: house + type: string + description: House of parliament + constraints: + unique: false + enum: + - commons + - lords + example: commons +hash: 4bb066cb217b4eca5f1b433759a724e0 diff --git a/data/packages/public_whip_data/pw_moffice.resource.yaml b/data/packages/public_whip_data/pw_moffice.resource.yaml new file mode 100644 index 00000000..e3de2f23 --- /dev/null +++ b/data/packages/public_whip_data/pw_moffice.resource.yaml @@ -0,0 +1,59 @@ +title: Ministerial office data table +description: Connection of ministerial office to person +custom: + row_count: 19738 +path: pw_moffice.parquet +name: pw_moffice +profile: data-resource +scheme: file +format: parquet +hashing: md5 +encoding: utf-8 +schema: + fields: + - name: moffice_id + type: integer + description: ID of ministerial office + constraints: + unique: true + example: 69428546 + - name: dept + type: string + description: Department of ministerial office + constraints: + unique: false + example: '' + - name: position + type: string + description: Position of ministerial office + constraints: + unique: false + example: ' Spokesperson for the Cabinet Office, Spokesperson for Constitutional + Affairs, Spokesperson for Scotland' + - name: from_date + type: string + description: Start date of ministerial office + constraints: + unique: false + example: '1947-01-01' + - name: to_date + type: string + description: End date of ministerial office + constraints: + unique: false + example: '1950-01-01' + - name: person + type: integer + description: ID of person + constraints: + unique: false + example: 10001 + - name: responsibility + type: string + description: Responsibility of ministerial office + constraints: + unique: false + enum: + - '' + example: '' +hash: 86a6cb5e8f5e1852eb089135d65a19e1 diff --git a/data/packages/public_whip_data/pw_mp.resource.yaml b/data/packages/public_whip_data/pw_mp.resource.yaml new file mode 100644 index 00000000..2863fdae --- /dev/null +++ b/data/packages/public_whip_data/pw_mp.resource.yaml @@ -0,0 +1,106 @@ +title: MP membership table +description: Datatable of MP information +custom: + row_count: 7230 +path: pw_mp.parquet +name: pw_mp +profile: data-resource +scheme: file +format: parquet +hashing: md5 +encoding: utf-8 +schema: + fields: + - name: mp_id + type: integer + description: ID of MP + constraints: + unique: true + example: 1 + - name: first_name + type: string + description: First name of MP + constraints: + unique: false + example: '' + - name: last_name + type: string + description: Last name of MP + constraints: + unique: false + example: '' + - name: title + type: string + description: Title of MP + constraints: + unique: false + example: '' + - name: constituency + type: string + description: Constituency of MP + constraints: + unique: false + example: '' + - name: party + type: string + description: Party of MP + constraints: + unique: false + example: Alba + - name: entered_house + type: string + description: Date of MP entering house + constraints: + unique: false + example: '1935-00-00' + - name: left_house + type: string + description: Date of MP leaving house + constraints: + unique: false + example: '1997-05-08' + - name: entered_reason + type: string + description: Reason for MP entering house + constraints: + unique: false + enum: + - general_election + - by_election + - changed_party + - reinstated + - unknown + - regional_election + - replaced_in_region + - became_presiding_officer + - appointed + example: appointed + - name: left_reason + type: string + description: Reason for MP leaving house + constraints: + unique: false + example: became_peer + - name: person + type: integer + description: ID of person + constraints: + unique: false + example: 10001 + - name: house + type: string + description: House of MP + constraints: + unique: false + enum: + - commons + - scotland + - lords + example: commons + - name: gid + type: string + description: ID of MP + constraints: + unique: true + example: uk.org.publicwhip/lord/100001 +hash: 80eebd3e6c0b07d8a3d68008bd5a00bf diff --git a/data/packages/public_whip_data/pw_vote.resource.yaml b/data/packages/public_whip_data/pw_vote.resource.yaml new file mode 100644 index 00000000..81c99937 --- /dev/null +++ b/data/packages/public_whip_data/pw_vote.resource.yaml @@ -0,0 +1,40 @@ +title: Vote data table +description: Datatable of votes +custom: + row_count: 4280371 +path: pw_vote.parquet +name: pw_vote +profile: data-resource +scheme: file +format: parquet +hashing: md5 +encoding: utf-8 +schema: + fields: + - name: division_id + type: integer + description: ID of division + constraints: + unique: false + example: 10387 + - name: mp_id + type: integer + description: ID of MP + constraints: + unique: false + example: 1 + - name: vote + type: string + description: Vote of MP + constraints: + unique: false + enum: + - 'no' + - aye + - tellno + - tellaye + - both + - abstention + - spoiled + example: abstention +hash: 43aeebf66bf5b310789be00ee48778cb diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..9b5b84f4 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,9 @@ +services: + app: + image: mysociety/publicwhip-data:${TAG:-latest} + build: + context: . + dockerfile: Dockerfile.dev + working_dir: /workspaces/publicwhip_data + volumes: + - ./:/workspaces/publicwhip_data/ \ No newline at end of file diff --git a/docs/.gitinclude b/docs/.gitinclude new file mode 100644 index 00000000..e69de29b diff --git a/docs/Gemfile b/docs/Gemfile new file mode 100644 index 00000000..7ee04945 --- /dev/null +++ b/docs/Gemfile @@ -0,0 +1,46 @@ +source "https://rubygems.org" + +# Hello! This is where you manage which Jekyll version is used to run. +# When you want to use a different version, change it below, save the +# file and run `bundle install`. Run Jekyll with `bundle exec`, like so: +# +# bundle exec jekyll serve +# +# This will help ensure the proper Jekyll version is running. +# Happy Jekylling! + +# needed for newer versions of ruby locally +gem 'webrick' + +gem "jekyll", "~> 3.9.2" + +# This is the default theme for new Jekyll sites. You may change this to anything you like. +gem "minima", "~> 2.0" + +# If you want to use GitHub Pages, remove the "gem "jekyll"" above and +# uncomment the line below. To upgrade, run `bundle update github-pages`. +# gem "github-pages", group: :jekyll_plugins + +# If you have any plugins, put them here! +group :jekyll_plugins do + gem "jekyll-feed", "~> 0.6" +end + +# Windows does not include zoneinfo files, so bundle the tzinfo-data gem +# and associated library. +install_if -> { RUBY_PLATFORM =~ %r!mingw|mswin|java! } do + gem "tzinfo", "~> 1.2" + gem "tzinfo-data" +end + +# Performance-booster for watching directories on Windows +gem "wdm", "~> 0.1.0", :install_if => Gem.win_platform? + +# kramdown v2 ships without the gfm parser by default. If you're using +# kramdown v1, comment out this line. +gem "kramdown-parser-gfm" + +# Lock `http_parser.rb` gem to `v0.6.x` on JRuby builds since newer versions of the gem +# do not have a Java counterpart. +gem "http_parser.rb", "~> 0.6.0", :platforms => [:jruby] +gem "github-pages", "~> 226", group: :jekyll_plugins \ No newline at end of file diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock new file mode 100644 index 00000000..a695c60d --- /dev/null +++ b/docs/Gemfile.lock @@ -0,0 +1,274 @@ +GEM + remote: https://rubygems.org/ + specs: + activesupport (6.0.6.1) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 0.7, < 2) + minitest (~> 5.1) + tzinfo (~> 1.1) + zeitwerk (~> 2.2, >= 2.2.2) + addressable (2.8.1) + public_suffix (>= 2.0.2, < 6.0) + coffee-script (2.4.1) + coffee-script-source + execjs + coffee-script-source (1.11.1) + colorator (1.1.0) + commonmarker (0.23.8) + concurrent-ruby (1.2.0) + dnsruby (1.61.9) + simpleidn (~> 0.1) + em-websocket (0.5.3) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0) + ethon (0.16.0) + ffi (>= 1.15.0) + eventmachine (1.2.7) + execjs (2.8.1) + faraday (2.7.4) + faraday-net_http (>= 2.0, < 3.1) + ruby2_keywords (>= 0.0.4) + faraday-net_http (3.0.2) + ffi (1.15.5) + forwardable-extended (2.6.0) + gemoji (3.0.1) + github-pages (226) + github-pages-health-check (= 1.17.9) + jekyll (= 3.9.2) + jekyll-avatar (= 0.7.0) + jekyll-coffeescript (= 1.1.1) + jekyll-commonmark-ghpages (= 0.2.0) + jekyll-default-layout (= 0.1.4) + jekyll-feed (= 0.15.1) + jekyll-gist (= 1.5.0) + jekyll-github-metadata (= 2.13.0) + jekyll-include-cache (= 0.2.1) + jekyll-mentions (= 1.6.0) + jekyll-optional-front-matter (= 0.3.2) + jekyll-paginate (= 1.1.0) + jekyll-readme-index (= 0.3.0) + jekyll-redirect-from (= 0.16.0) + jekyll-relative-links (= 0.6.1) + jekyll-remote-theme (= 0.4.3) + jekyll-sass-converter (= 1.5.2) + jekyll-seo-tag (= 2.8.0) + jekyll-sitemap (= 1.4.0) + jekyll-swiss (= 1.0.0) + jekyll-theme-architect (= 0.2.0) + jekyll-theme-cayman (= 0.2.0) + jekyll-theme-dinky (= 0.2.0) + jekyll-theme-hacker (= 0.2.0) + jekyll-theme-leap-day (= 0.2.0) + jekyll-theme-merlot (= 0.2.0) + jekyll-theme-midnight (= 0.2.0) + jekyll-theme-minimal (= 0.2.0) + jekyll-theme-modernist (= 0.2.0) + jekyll-theme-primer (= 0.6.0) + jekyll-theme-slate (= 0.2.0) + jekyll-theme-tactile (= 0.2.0) + jekyll-theme-time-machine (= 0.2.0) + jekyll-titles-from-headings (= 0.5.3) + jemoji (= 0.12.0) + kramdown (= 2.3.2) + kramdown-parser-gfm (= 1.1.0) + liquid (= 4.0.3) + mercenary (~> 0.3) + minima (= 2.5.1) + nokogiri (>= 1.13.4, < 2.0) + rouge (= 3.26.0) + terminal-table (~> 1.4) + github-pages-health-check (1.17.9) + addressable (~> 2.3) + dnsruby (~> 1.60) + octokit (~> 4.0) + public_suffix (>= 3.0, < 5.0) + typhoeus (~> 1.3) + html-pipeline (2.14.3) + activesupport (>= 2) + nokogiri (>= 1.4) + http_parser.rb (0.8.0) + i18n (0.9.5) + concurrent-ruby (~> 1.0) + jekyll (3.9.2) + addressable (~> 2.4) + colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (~> 0.7) + jekyll-sass-converter (~> 1.0) + jekyll-watch (~> 2.0) + kramdown (>= 1.17, < 3) + liquid (~> 4.0) + mercenary (~> 0.3.3) + pathutil (~> 0.9) + rouge (>= 1.7, < 4) + safe_yaml (~> 1.0) + jekyll-avatar (0.7.0) + jekyll (>= 3.0, < 5.0) + jekyll-coffeescript (1.1.1) + coffee-script (~> 2.2) + coffee-script-source (~> 1.11.1) + jekyll-commonmark (1.4.0) + commonmarker (~> 0.22) + jekyll-commonmark-ghpages (0.2.0) + commonmarker (~> 0.23.4) + jekyll (~> 3.9.0) + jekyll-commonmark (~> 1.4.0) + rouge (>= 2.0, < 4.0) + jekyll-default-layout (0.1.4) + jekyll (~> 3.0) + jekyll-feed (0.15.1) + jekyll (>= 3.7, < 5.0) + jekyll-gist (1.5.0) + octokit (~> 4.2) + jekyll-github-metadata (2.13.0) + jekyll (>= 3.4, < 5.0) + octokit (~> 4.0, != 4.4.0) + jekyll-include-cache (0.2.1) + jekyll (>= 3.7, < 5.0) + jekyll-mentions (1.6.0) + html-pipeline (~> 2.3) + jekyll (>= 3.7, < 5.0) + jekyll-optional-front-matter (0.3.2) + jekyll (>= 3.0, < 5.0) + jekyll-paginate (1.1.0) + jekyll-readme-index (0.3.0) + jekyll (>= 3.0, < 5.0) + jekyll-redirect-from (0.16.0) + jekyll (>= 3.3, < 5.0) + jekyll-relative-links (0.6.1) + jekyll (>= 3.3, < 5.0) + jekyll-remote-theme (0.4.3) + addressable (~> 2.0) + jekyll (>= 3.5, < 5.0) + jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0) + rubyzip (>= 1.3.0, < 3.0) + jekyll-sass-converter (1.5.2) + sass (~> 3.4) + jekyll-seo-tag (2.8.0) + jekyll (>= 3.8, < 5.0) + jekyll-sitemap (1.4.0) + jekyll (>= 3.7, < 5.0) + jekyll-swiss (1.0.0) + jekyll-theme-architect (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-cayman (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-dinky (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-hacker (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-leap-day (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-merlot (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-midnight (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-minimal (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-modernist (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-primer (0.6.0) + jekyll (> 3.5, < 5.0) + jekyll-github-metadata (~> 2.9) + jekyll-seo-tag (~> 2.0) + jekyll-theme-slate (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-tactile (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-theme-time-machine (0.2.0) + jekyll (> 3.5, < 5.0) + jekyll-seo-tag (~> 2.0) + jekyll-titles-from-headings (0.5.3) + jekyll (>= 3.3, < 5.0) + jekyll-watch (2.2.1) + listen (~> 3.0) + jemoji (0.12.0) + gemoji (~> 3.0) + html-pipeline (~> 2.2) + jekyll (>= 3.0, < 5.0) + kramdown (2.3.2) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.3) + listen (3.8.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + mercenary (0.3.6) + minima (2.5.1) + jekyll (>= 3.5, < 5.0) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + minitest (5.17.0) + nokogiri (1.14.2-x86_64-linux) + racc (~> 1.4) + octokit (4.25.1) + faraday (>= 1, < 3) + sawyer (~> 0.9) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + public_suffix (4.0.7) + racc (1.6.2) + rb-fsevent (0.11.2) + rb-inotify (0.10.1) + ffi (~> 1.0) + rexml (3.2.5) + rouge (3.26.0) + ruby2_keywords (0.0.5) + rubyzip (2.3.2) + safe_yaml (1.0.5) + sass (3.7.4) + sass-listen (~> 4.0.0) + sass-listen (4.0.0) + rb-fsevent (~> 0.9, >= 0.9.4) + rb-inotify (~> 0.9, >= 0.9.7) + sawyer (0.9.2) + addressable (>= 2.3.5) + faraday (>= 0.17.3, < 3) + simpleidn (0.2.1) + unf (~> 0.1.4) + terminal-table (1.8.0) + unicode-display_width (~> 1.1, >= 1.1.1) + thread_safe (0.3.6) + typhoeus (1.4.0) + ethon (>= 0.9.0) + tzinfo (1.2.11) + thread_safe (~> 0.1) + tzinfo-data (1.2022.7) + tzinfo (>= 1.0.0) + unf (0.1.4) + unf_ext + unf_ext (0.0.8.2) + unicode-display_width (1.8.0) + wdm (0.1.1) + webrick (1.8.1) + zeitwerk (2.6.7) + +PLATFORMS + x86_64-linux + +DEPENDENCIES + github-pages (~> 226) + http_parser.rb (~> 0.6.0) + jekyll (~> 3.9.2) + jekyll-feed (~> 0.6) + kramdown-parser-gfm + minima (~> 2.0) + tzinfo (~> 1.2) + tzinfo-data + wdm (~> 0.1.0) + webrick + +BUNDLED WITH + 2.4.6 diff --git a/docs/data.json b/docs/data.json new file mode 100644 index 00000000..1df77e47 --- /dev/null +++ b/docs/data.json @@ -0,0 +1,3 @@ +--- +layout: datasets/data +--- diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..3b9eaf44 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,8 @@ +--- +title: "Download publicwhip_data" +layout: datasets/front +--- + +# Public whip data + +Public whip database dumps reformatted for ease of access through duckdb. \ No newline at end of file diff --git a/docs/sass/_bootstrap-compat.scss b/docs/sass/_bootstrap-compat.scss new file mode 100644 index 00000000..09777399 --- /dev/null +++ b/docs/sass/_bootstrap-compat.scss @@ -0,0 +1,237 @@ +// mysociety-docs-theme has never been fully combined with Bootstrap, +// and if you just include both, you end up with conflicts over selectors +// like `body`, `h1`, `.container` etc. + +// One day, we’ll need to bite the bullet and properly integrate Bootstrap +// into this site. But for now, we can make do with just approximating +// the Bootstrap components we really need, like `.nav-tabs`. + +.text-center { + @extend .text--center; +} + +.pull-left { + float: left !important; +} + +.pull-right { + float: right !important; +} + +.nav-tabs { + @extend .unstyled-list; + @include clearfix(); + margin: 1em -0.2em; + + li { + float: left; + margin: 0 0.2em; + } + + a { + display: block; + padding: 0.2em 0.5em; + border-radius: 3px; + + &:hover, + &:focus { + text-decoration: none; + background-color: $colour_off_white; + } + } + + .active { + a, + a:hover, + a:focus { + background-color: $colour_links; + color: #fff; + } + } +} + +.jumbotron { + padding: 1em; + margin: 1em 0; + border-radius: 3px; + background-color: $colour_off_white; + + @media (min-width: $medium-screen) { + padding: 2em; + } +} + +.col-sm-3, .col-sm-9,.col-lg-4, .col-xs-5, .col-sm-5, .col-md-5, .col-lg-5, .col-xs-6, .col-sm-6, .col-md-6, .col-lg-6, .col-xs-7, .col-sm-7, .col-md-7, .col-lg-7, .col-xs-8, .col-sm-8, .col-md-8, .col-lg-8, .col-xs-9, .col-sm-9, .col-md-9, .col-lg-9, .col-xs-10, .col-sm-10, .col-md-10, .col-lg-10, .col-xs-11, .col-sm-11, .col-md-11, .col-lg-11, .col-xs-12, .col-sm-12, .col-md-12, .col-lg-12 { + position: relative; + min-height: 1px; + padding-left: 15px; + padding-right: 15px; +} + +@media (min-width: 768px){ + +.col-sm-1, .col-sm-2, .col-sm-3, .col-sm-4, .col-sm-5, .col-sm-6, .col-sm-7, .col-sm-8, .col-sm-9, .col-sm-10, .col-sm-11, .col-sm-12 { + float: left; +} + +.col-sm-3 { + width: 25%; +} + + +.col-sm-9 { + width: 75%; +} + + +.col-sm-4 { + width: 33.33333333%; + +} + +.col-sm-8 { + width: 66.66666667%; + } + +.col-sm-1, .col-sm-10, .col-sm-11, .col-sm-12, .col-sm-2, .col-sm-3, .col-sm-4, .col-sm-5, .col-sm-6, .col-sm-7, .col-sm-8, .col-sm-9 { + float: left; +} +} + + +.media, .media-body { + zoom: 1; + overflow: hidden; +} + +.media { + margin-top: 15px; +} + +.media-left, .media-right, .media-body { + display: table-cell; + vertical-align: top; +} + +.media-left, .media>.pull-left { + padding-right: 10px; +} + + +.media-object { + display: block; +} + +.media-body { + width: 10000px; +} + +.panel { + margin-bottom: 21px; + background-color: #ffffff; + border: 1px solid transparent; + border-radius: 4px; + -webkit-box-shadow: 0 1px 1px rgba(0,0,0,0.05); + box-shadow: 0 1px 1px rgba(0,0,0,0.05); +} + +.panel-default { + border-color: #ecf0f1; +} + +.panel-heading { + padding: 10px 15px; + border-bottom: 1px solid transparent; + border-top-right-radius: 3px; + border-top-left-radius: 3px; +} + +.panel-default>.panel-heading { + color: #2c3e50; + background-color: #ecf0f1; + border-color: #ecf0f1; +} + +.panel-body { + padding: 15px; +} + + +.form-control { + display: block; + width: 100%; + height: 45px; + padding: 10px 15px; + font-size: 15px; + line-height: 1.42857143; + color: #2c3e50; + background-color: #ffffff; + background-image: none; + border: 1px solid #dce4ec; + border-radius: 4px; + -webkit-box-shadow: inset 0 1px 1px rgba(0,0,0,0.075); + box-shadow: inset 0 1px 1px rgba(0,0,0,0.075); + -webkit-transition: border-color ease-in-out .15s,-webkit-box-shadow ease-in-out .15s; + -o-transition: border-color ease-in-out .15s,box-shadow ease-in-out .15s; + transition: border-color ease-in-out .15s,box-shadow ease-in-out .15s; +} + +.form-control, input { + border-width: 2px; + -webkit-box-shadow: none; + box-shadow: none; +} + + +.list-group { + margin-bottom: 20px; + padding-left: 0; + line-height: 1em; +} + +a.list-group-item.active, a.list-group-item.active:hover, a.list-group-item.active:focus { + border-color: #ecf0f1; +} + +.list-group-item { + position: relative; + display: block; + padding: 10px 15px; + margin-bottom: -1px; + background-color: #ffffff; + border: 1px solid #ecf0f1; + .badge { + display: inline-block; + min-width: 10px; + padding: 3px 7px; + font-size: 12px; + font-weight: 700; + line-height: 1; + color: #fff; + text-align: center; + white-space: nowrap; + vertical-align: middle; + background-color: #777; + border-radius: 10px; + } + > .badge { + float: right; + } +} + +.list-group-item:first-child { + border-top-right-radius: 4px; + border-top-left-radius: 4px; +} + +.list-group-item.active, .list-group-item.active:hover, .list-group-item.active:focus { + z-index: 2; + color: #ffffff; + background-color: #2c3e50; + border-color: #2c3e50; +} + +.row { + box-sizing: border-box; + display: block; +} \ No newline at end of file diff --git a/docs/sass/_header.scss b/docs/sass/_header.scss new file mode 100644 index 00000000..f85e27f4 --- /dev/null +++ b/docs/sass/_header.scss @@ -0,0 +1,64 @@ +.site-header { + color: #fff; + + a { + color: inherit; + } + + h1 { + font-size: 2em; + margin-bottom: 0.2em; + + @media (min-width: 48em) { + max-width: 50%; + } + } +} + +a.site-header__research-home-link { + color: mix($colour_brand, #fff, 20%); + + &:hover, &:focus { + color: inherit; + } +} + +.nav-position { + @media (min-width: 48em) { + position: absolute; + top: 50%; + right: 160px; + margin-top: -1em; + } +} + +.site-nav { + ul { + margin-top: 0; + margin-bottom: 0; + @extend .unstyled-list; + } + + li { + @media (min-width: 47.5em) { + display: inline-block; + } + } + + a { + margin-right: 0.33em; + display: block; + padding: 0.33em; + border-top: 1px solid rgba(#fff, 0.2); + + @media (min-width: 47.5em) { + display: inline-block; + border-top: none; + } + + @media (min-width: $large-screen) { + font-size: 1.125em; + margin-right: 0.66em; + } + } +} diff --git a/docs/sass/mysoc.scss b/docs/sass/mysoc.scss new file mode 100644 index 00000000..ef0f98d0 --- /dev/null +++ b/docs/sass/mysoc.scss @@ -0,0 +1,44 @@ +--- +--- +$colour_brand: #4faded; // $colour_blue +$colour_links: #f3f1eb; // $colour_off_white +$colour_background: #ffffff; +$colour_yellow: #FFD836; +$colour_green: #62B356; +$colour_red: #E04B4B; +$colour_violet: #A94CA6; +$colour_orange: #F4A140; +$colour_green: #62B356; +$theme_dir: '../theme'; + + +@import "global"; +@import "bootstrap-compat"; +@import "header"; +@import "mysoc-download"; + +.main-content { + background: #fff; + padding: 1.6em 5%; + position: relative; + margin-bottom: $base-spacing-unit; + + h2 { + border-top: 0px; + padding-top: 1px; + } + + h3 { + padding-top: 10px; + } + + .lead+h2 { + border-top: none; + padding-top: 0; + } + + .reveal-on-click+h2 { + border-top: none; + } + +} \ No newline at end of file diff --git a/notebooks/_render_config/default.yaml b/notebooks/_render_config/default.yaml new file mode 100644 index 00000000..e96e9803 --- /dev/null +++ b/notebooks/_render_config/default.yaml @@ -0,0 +1,16 @@ +title: "{{page_title}}" +slug: "{{page_title.lower().replace(' ', '_')}}" +notebooks: + - example +parameters: + page_title: "{{settings.default_page_title}}" +context: + data_common.management.settings: + - settings +options: + rerun: true + hide_input: true +upload: + gdrive: + g_folder_id: blank + g_drive_id: blank \ No newline at end of file diff --git a/notebooks/example.ipynb b/notebooks/example.ipynb new file mode 100644 index 00000000..c1de8787 --- /dev/null +++ b/notebooks/example.ipynb @@ -0,0 +1,274 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from data_common.notebook import *\n", + "\n", + "notebook_setup()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell is an example of a parameters cells." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# default-params\n", + "page_title = \"Original title\"\n", + "# The new parameters will be injected as a new cell below this using papermill." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "## Original title" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "md(\"## \" + page_title)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example code\n", + "\n", + "Start the notebook by pulling in the general set of helpers. Start the codeblock with `#HIDE` to exclude it from being displayed in render, even when 'hide_input' is false (example at top of `readme.ipynb`)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from data_common.notebook import *\n", + "\n", + "notebook_setup()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example table\n", + "\n", + "This shows a table being rendered as markdown" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VerticalHorizontaldouble_h
0A2856
1B55110
2C4386
3D91182
4E81162
5F53106
6G1938
7H87174
8I52104
\n", + "
" + ], + "text/plain": [ + " Vertical Horizontal double_h\n", + "0 A 28 56\n", + "1 B 55 110\n", + "2 C 43 86\n", + "3 D 91 182\n", + "4 E 81 162\n", + "5 F 53 106\n", + "6 G 19 38\n", + "7 H 87 174\n", + "8 I 52 104" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "source = pd.DataFrame(\n", + " {\n", + " \"Vertical\": [\"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\", \"H\", \"I\"],\n", + " \"Horizontal\": [28, 55, 43, 91, 81, 53, 19, 87, 52],\n", + " }\n", + ")\n", + "\n", + "source[\"double_h\"] = source[\"Horizontal\"] * 2\n", + "\n", + "source" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example chart" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "alt.Chart(...)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "chart = (\n", + " Chart(source)\n", + " .mark_bar()\n", + " .encode(y=\"Vertical\", x=\"Horizontal\")\n", + " .properties(\n", + " title=\"This is an example chart, with the right fonts\",\n", + " logo=True,\n", + " caption=\"Data source goes here\",\n", + " width=800,\n", + " )\n", + ")\n", + "\n", + "chart.display()" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "4cd7ab41f5fca4b9b44701077e38c5ffd31fe66a6cab21e0214b68d958d0e462" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "orig_nbformat": 2 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..e5d3e124 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,34 @@ +[tool.poetry] +name = "publicwhip_data" +version = "0.1.0" +description = "More accessible version of public whip database" +authors = [] + +[tool.poetry.dependencies] +python = ">=3.10,<3.11" +data_common = { path = "src/data_common/", develop = true } + +[tool.poetry.dev-dependencies] +pytest = "^7.1.1" + +[tool.poetry.scripts] +project = 'publicwhip_data.__main__:main' +notebook = "data_common.notebookcli.__main__:run" +dataset = "data_common.dataset.__main__:run" + +[tool.pyright] +include = ["src", "notebooks"] +exclude=["src/data_common/typings"] +stubPath="src/data_common/typings" +typeCheckingMode="basic" +reportPrivateImportUsage="warning" + +[notebook.settings] +default_page_title = "Settings file defined title" + +[tool.dataset] +dataset_dir = "data/packages" +publish_dir = "docs/" +publish_url = "https://pages.mysociety.org/publicwhip_data/" +credit_text = "If you find this data useful, please let us know to help us make the case for future funding." +credit_url = "https://survey.alchemer.com/s3/6876792/Data-usage" \ No newline at end of file diff --git a/readme.md b/readme.md new file mode 100644 index 00000000..6545e9b5 --- /dev/null +++ b/readme.md @@ -0,0 +1,12 @@ + +# publicwhip-data + +[![badge](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/mysociety/publicwhip-data/HEAD) + +More accessible version of public whip database + +This repository is available online at https://github.com/mysociety/publicwhip-data + +If Github Pages are enabled, the URL is: https://mysociety.github.io/publicwhip-data/ + +Instructions on using the features of this notebook (data publishing, notebook rendering, Github Pages) are available in [https://github.com/mysociety/data_common/blob/main/data-repo-readme.md](Data Common readme file). \ No newline at end of file diff --git a/script/server b/script/server new file mode 100644 index 00000000..795c430e --- /dev/null +++ b/script/server @@ -0,0 +1,4 @@ +#!/bin/bash +cd docs +bundle install +bundle exec jekyll serve \ No newline at end of file diff --git a/script/setup b/script/setup new file mode 100644 index 00000000..2c895f61 --- /dev/null +++ b/script/setup @@ -0,0 +1,2 @@ +#!/bin/bash +bash .devcontainer/initializeCommand \ No newline at end of file diff --git a/script/test b/script/test new file mode 100644 index 00000000..e753b90c --- /dev/null +++ b/script/test @@ -0,0 +1,29 @@ +#!/bin/bash + +echo "--------------" +echo "Running pytest" +echo "--------------" +poetry run pytest +pytestexit=$? +echo "--------------" +echo "Running black" +echo "--------------" +poetry run black . --check +blackexit=$? +echo "--------------" +echo "Running pyright" +echo "--------------" +poetry run pyright +pyrightexit=$? + +echo "Pytest status: $pytestexit" +echo "Black status: $blackexit" +echo "Pyright status: $pyrightexit" + +sum="$(($pytestexit + $blackexit + $pyrightexit))" + +if [ "$sum" != "0" ]; then + exit 1 +else + exit 0 +fi \ No newline at end of file diff --git a/script/update-from-template b/script/update-from-template new file mode 100644 index 00000000..c1d5e38d --- /dev/null +++ b/script/update-from-template @@ -0,0 +1,17 @@ +#!/bin/bash + +git remote rm template +git remote add template https://github.com/mysociety/template_data_repo +git fetch template +git merge template/main + +cd src/data_common +git fetch origin +git pull origin main +git checkout main + +cd ../.. +cd docs/theme +git fetch origin +git pull origin main +git checkout main diff --git a/src/publicwhip_data/__init__.py b/src/publicwhip_data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/publicwhip_data/__main__.py b/src/publicwhip_data/__main__.py new file mode 100644 index 00000000..b882d504 --- /dev/null +++ b/src/publicwhip_data/__main__.py @@ -0,0 +1,20 @@ +import rich_click as click +from .process import fetch_and_move_pw + + +@click.group() +def cli(): + pass + + +def main(): + cli() + + +@cli.command() +def download(): + fetch_and_move_pw() + + +if __name__ == "__main__": + main() diff --git a/src/publicwhip_data/mysql2sqlite b/src/publicwhip_data/mysql2sqlite new file mode 100644 index 00000000..8b80fbf2 --- /dev/null +++ b/src/publicwhip_data/mysql2sqlite @@ -0,0 +1,289 @@ +#!/usr/bin/awk -f + +# Authors: @esperlu, @artemyk, @gkuenning, @dumblob + +# FIXME detect empty input file and issue a warning + +function printerr( s ){ print s | "cat >&2" } + +BEGIN { + if( ARGC != 2 ){ + printerr( \ + "USAGE:\n"\ + " mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \ + " OR\n" \ + " mysql2sqlite dump_mysql.sql | sqlite3 sqlite.db\n" \ + "\n" \ + "NOTES:\n" \ + " Dash in filename is not supported, because dash (-) means stdin." ) + no_END = 1 + exit 1 + } + + # Find INT_MAX supported by both this AWK (usually an ISO C signed int) + # and SQlite. + # On non-8bit-based architectures, the additional bits are safely ignored. + + # 8bit (lower precision should not exist) + s="127" + # "63" + 0 avoids potential parser misbehavior + if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 } + # 16bit + s="32767" + if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 } + # 32bit + s="2147483647" + if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 } + # 64bit (as INTEGER in SQlite3) + s="9223372036854775807" + if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 } +# # 128bit +# s="170141183460469231731687303715884105728" +# if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 } +# # 256bit +# s="57896044618658097711785492504343953926634992332820282019728792003956564819968" +# if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 } +# # 512bit +# s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048" +# if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 } +# # 1024bit +# s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608" +# if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 } +# # higher precision probably not needed + + FS=",$" + print "PRAGMA synchronous = OFF;" + print "PRAGMA journal_mode = MEMORY;" + print "BEGIN TRANSACTION;" +} + +# historically 3 spaces separate non-argument local variables +function bit_to_int( str_bit, powtwo, i, res, bit, overflow ){ + powtwo = 1 + overflow = 0 + # 011101 = 1*2^0 + 0*2^1 + 1*2^2 ... + for( i = length( str_bit ); i > 0; --i ){ + bit = substr( str_bit, i, 1 ) + if( overflow || ( bit == 1 && res > INT_MAX_HALF ) ){ + printerr( \ + NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." ) + break + } + res = res + bit * powtwo + # no warning here as it might be the last iteration + if( powtwo > INT_MAX_HALF ){ overflow = 1; continue } + powtwo = powtwo * 2 + } + return res +} + +# CREATE TRIGGER statements have funny commenting. Remember we are in trigger. +/^\/\*.*(CREATE.*TRIGGER|create.*trigger)/ { + gsub( /^.*(TRIGGER|trigger)/, "CREATE TRIGGER" ) + print + inTrigger = 1 + next +} +# The end of CREATE TRIGGER has a stray comment terminator +/(END|end) \*\/;;/ { gsub( /\*\//, "" ); print; inTrigger = 0; next } +# The rest of triggers just get passed through +inTrigger != 0 { print; next } + +# CREATE VIEW looks like a TABLE in comments +/^\/\*.*(CREATE.*TABLE|create.*table)/ { + inView = 1 + next +} +# end of CREATE VIEW +/^(\).*(ENGINE|engine).*\*\/;)/ { + inView = 0 + next +} +# content of CREATE VIEW +inView != 0 { next } + +# skip comments +/^\/\*/ { next } + +# skip PARTITION statements +/^ *[(]?(PARTITION|partition) +[^ ]+/ { next } + +# print all INSERT lines +( /^ *\(/ && /\) *[,;] *$/ ) || /^(INSERT|insert|REPLACE|replace)/ { + prev = "" + + # first replace \\ by \_ that mysqldump never generates to deal with + # sequnces like \\n that should be translated into \n, not \. + # After we convert all escapes we replace \_ by backslashes. + gsub( /\\\\/, "\\_" ) + + # single quotes are escaped by another single quote + gsub( /\\'/, "''" ) + gsub( /\\n/, "\n" ) + gsub( /\\r/, "\r" ) + gsub( /\\"/, "\"" ) + gsub( /\\\032/, "\032" ) # substitute char + + gsub( /\\_/, "\\" ) + + # sqlite3 is limited to 16 significant digits of precision + while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){ + hexIssue = 1 + sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 ) + } + if( hexIssue ){ + printerr( \ + NR ": WARN Hex number trimmed (length longer than 16 chars)." ) + hexIssue = 0 + } + print + next +} + +# CREATE DATABASE is not supported +/^(CREATE DATABASE|create database)/ { next } + +# print the CREATE line as is and capture the table name +/^(CREATE|create)/ { + if( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){ + caseIssue = 1 + printerr( \ + NR ": WARN Potential case sensitivity issues with table/column naming\n" \ + " (see INFO at the end)." ) + } + if( match( $0, /`[^`]+/ ) ){ + tableName = substr( $0, RSTART+1, RLENGTH-1 ) + } + aInc = 0 + prev = "" + firstInTable = 1 + print + next +} + +# Replace `FULLTEXT KEY` (probably other `XXXXX KEY`) +/^ (FULLTEXT KEY|fulltext key)/ { gsub( /[A-Za-z ]+(KEY|key)/, " KEY" ) } + +# Get rid of field lengths in KEY lines +/ (PRIMARY |primary )?(KEY|key)/ { gsub( /\([0-9]+\)/, "" ) } + +aInc == 1 && /PRIMARY KEY|primary key/ { next } + +# Replace COLLATE xxx_xxxx_xx statements with COLLATE BINARY +/ (COLLATE|collate) [a-z0-9_]*/ { gsub( /(COLLATE|collate) [a-z0-9_]*/, "COLLATE BINARY" ) } + +# Print all fields definition lines except the `KEY` lines. +/^ / && !/^( (KEY|key)|\);)/ { + if( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/) ){ + aInc = 1 + gsub( /AUTO_INCREMENT|auto_increment/, "PRIMARY KEY AUTOINCREMENT" ) + } + gsub( /(UNIQUE KEY|unique key) (`.*`|".*") /, "UNIQUE " ) + gsub( /(CHARACTER SET|character set) [^ ]+[ ,]/, "" ) + # FIXME + # CREATE TRIGGER [UpdateLastTime] + # AFTER UPDATE + # ON Package + # FOR EACH ROW + # BEGIN + # UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId; + # END + gsub( /(ON|on) (UPDATE|update) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "" ) + gsub( /(DEFAULT|default) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "DEFAULT current_timestamp") + gsub( /(COLLATE|collate) [^ ]+ /, "" ) + gsub( /(ENUM|enum)[^)]+\)/, "text " ) + gsub( /(SET|set)\([^)]+\)/, "text " ) + gsub( /UNSIGNED|unsigned/, "" ) + gsub( /_utf8mb3/, "" ) + gsub( /` [^ ]*(INT|int|BIT|bit)[^ ]*/, "` integer" ) + gsub( /" [^ ]*(INT|int|BIT|bit)[^ ]*/, "\" integer" ) + ere_bit_field = "[bB]'[10]+'" + if( match($0, ere_bit_field) ){ + sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) ) + } + + # remove USING BTREE and other suffixes for USING, for example: "UNIQUE KEY + # `hostname_domain` (`hostname`,`domain`) USING BTREE," + gsub( / USING [^, ]+/, "" ) + + # field comments are not supported + gsub( / (COMMENT|comment).+$/, "" ) + # Get commas off end of line + gsub( /,.?$/, "" ) + if( prev ){ + if( firstInTable ){ + print prev + firstInTable = 0 + } + else { + print "," prev + } + } + else { + # FIXME check if this is correct in all cases + if( match( $1, + /(CONSTRAINT|constraint) ["].*["] (FOREIGN KEY|foreign key)/ ) ){ + print "," + } + } + prev = $1 +} + +/ ENGINE| engine/ { + if( prev ){ + if( firstInTable ){ + print prev + firstInTable = 0 + } + else { + print "," prev + } + } + prev="" + print ");" + next +} +# `KEY` lines are extracted from the `CREATE` block and stored in array for later print +# in a separate `CREATE KEY` command. The index name is prefixed by the table name to +# avoid a sqlite error for duplicate index name. +/^( (KEY|key)|\);)/ { + if( prev ){ + if( firstInTable ){ + print prev + firstInTable = 0 + } + else { + print "," prev + } + } + prev = "" + if( $0 == ");" ){ + print + } + else { + if( match( $0, /`[^`]+/ ) ){ + indexName = substr( $0, RSTART+1, RLENGTH-1 ) + } + if( match( $0, /\([^()]+/ ) ){ + indexKey = substr( $0, RSTART+1, RLENGTH-1 ) + } + # idx_ prefix to avoid name clashes (they really happen!) + key[tableName] = key[tableName] "CREATE INDEX \"idx_" \ + tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n" + } +} + +END { + if( no_END ){ exit 1} + # print all KEY creation lines. + for( table in key ){ printf key[table] } + + print "END TRANSACTION;" + + if( caseIssue ){ + printerr( \ + "INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \ + " or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \ + " identifiers. Thus expect errors like \"table T has no column named F\".") + } +} \ No newline at end of file diff --git a/src/publicwhip_data/process.py b/src/publicwhip_data/process.py new file mode 100644 index 00000000..a787504d --- /dev/null +++ b/src/publicwhip_data/process.py @@ -0,0 +1,143 @@ +""" +Download public whips database dumps, convert it to parquet files +""" + + +import bz2 +import shutil +import sqlite3 +import subprocess +from pathlib import Path + +import pandas as pd +import requests +import rich +from tqdm import tqdm + + +def download_url_to_file(url: str, filepath: Path) -> Path: + """ + Given a url + """ + with requests.get(url, stream=True, timeout=60) as r: # type: ignore + r.raise_for_status() + rich.print(f"Downloading [green]{url}[/green] to [green]{filepath}[/green]") + with open(filepath, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + return filepath + + +def unzip_zip_file(zip_file: Path, output_dir: Path) -> Path: + """ + Given a bz2 file, unzip it to the output_dir + """ + rich.print(f"Unzipping [green]{zip_file}[/green] to [green]{output_dir}[/green]") + filepath = output_dir / zip_file.stem + with bz2.BZ2File(zip_file) as fr, open(filepath, "wb") as fw: + shutil.copyfileobj(fr, fw) + return filepath + + +def convert_to_sqlite(input_file: Path, output_file: Path) -> Path: + """ + run the mysql2sqlite shell utility on the input file + """ + rich.print("Converting from [green]mysql[/green] to [green]sqlite[/green]") + converted_pp = subprocess.run( + ["src/publicwhip_data/mysql2sqlite", str(input_file)], + check=True, + stdout=subprocess.PIPE, + ) + + rich.print( + f"Converting [green]{input_file}[/green] to [green]{output_file}[/green]" + ) + conn = sqlite3.connect(output_file) + conn.executescript(converted_pp.stdout.decode("ISO-8859-1")) + conn.commit() + conn.close() + + return output_file + + +def convert_bz2_mysql_to_paraquet(url: str, output_dir: Path) -> list[Path]: + """ + Fetch the bz2 file, unzip it, convert it to sqlite, convert it to parquet + """ + output_dir.mkdir(exist_ok=True) + output_file = output_dir / url.split("/")[-1] + download_url_to_file(url, output_file) + result = unzip_zip_file(output_file, output_dir) + db_filename = output_dir / ( + Path(url.split("/")[-1]).stem.removesuffix(".sql") + ".db" + ) + if db_filename.exists(): + db_filename.unlink() + convert_to_sqlite(result, db_filename) + files = convert_sqlitedb_to_paraquet(db_filename) + output_file.unlink() + result.unlink() + db_filename.unlink() + return files + + +def convert_sqlitedb_to_paraquet(input_path: Path) -> list[Path]: + """ + Convert the sqlite db to a parquet file + """ + rich.print( + f"Converting [green]{input_path}[/green] to [green]parquet files[/green]" + ) + conn = sqlite3.connect(input_path) + # for all tables in the db + outputs: list[Path] = [] + for table in conn.execute("SELECT name FROM sqlite_master WHERE type='table';"): + if table[0] == "sqlite_sequence": + continue + # 7.3+0.3+0.2+5.9 + # get the size of a dataframe, chunk it into 1000 rows chunks and reassemble + count = conn.execute(f"SELECT COUNT(*) FROM {table[0]};").fetchone()[0] + rich.print(f"Converting {table[0]} with {count} rows") + dfs: list[pd.DataFrame] = [] + for i in tqdm(range(0, count, 1000)): + df = pd.read_sql_query( + f"SELECT * from {table[0]} limit 1000 offset {i}", conn + ) + dfs.append(df) + df = pd.concat(dfs) # type: ignore + output_filename: Path = input_path.parent / f"{table[0]}.parquet" + df.to_parquet(output_filename) + outputs.append(output_filename) + conn.close() + return outputs + + +def fetch_public_whip(): + """ + Convert public whip files into parquet files + """ + + urls = [ + "https://www.publicwhip.org.uk/data/pw_static_tables.sql.bz2", + "https://www.publicwhip.org.uk/data/pw_dynamic_tables.sql.bz2", + ] + for url in urls: + convert_bz2_mysql_to_paraquet(url, Path("data", "raw")) + + +def move_files(): + """ + Move the files to the data directory + """ + for file in Path("data", "raw").glob("*.parquet"): + shutil.copyfile(file, Path("data", "packages", "public_whip_data", file.name)) + + +def fetch_and_move_pw(): + fetch_public_whip() + move_files() + + +if __name__ == "__main__": + fetch_and_move_pw() diff --git a/tests/test_publicwhip_data.py b/tests/test_publicwhip_data.py new file mode 100644 index 00000000..f68d7200 --- /dev/null +++ b/tests/test_publicwhip_data.py @@ -0,0 +1,7 @@ +import publicwhip_data + +import pytest + + +def test_true_is_true(): + assert True is True