feat: separate cells (#14)

LizardByte · Aug 21, 2024 · 0e7ec32 · 0e7ec32
1 parent 34ae300
commit 0e7ec32
Showing 1 changed file with 99 additions and 44 deletions.
diff --git a/notebook/dashboard.ipynb b/notebook/dashboard.ipynb
@@ -71,11 +71,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "initial_id",
+   "id": "957d644a7520fbd",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Initialize the environment\n",
+    "# Imports\n",
     "\n",
     "# standard imports\n",
     "import os\n",
@@ -88,7 +88,17 @@
     "import plotly.express as px\n",
     "import plotly.graph_objects as go\n",
     "import plotly.io as pio\n",
-    "import requests\n",
+    "import requests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "initial_id",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup the environment\n",
     "\n",
     "# Load environment variables from .env file\n",
     "load_dotenv()\n",
@@ -99,6 +109,7 @@
     "\n",
     "# set the default plotly template\n",
     "pio.templates.default = \"plotly_dark\"\n",
+    "pio.renderers.default = 'notebook'\n",
     "\n",
     "# Fetch repository data\n",
     "org_name = \"LizardByte\"\n",
@@ -115,9 +126,17 @@
     "readthedocs_response = requests.get(readthedocs_data_url)\n",
     "if not readthedocs_response.ok:\n",
     "    raise LookupError(\"Failed to fetch data from uno\")\n",
-    "readthedocs_data = readthedocs_response.json()\n",
-    "\n",
-    "# Process data\n",
+    "readthedocs_data = readthedocs_response.json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d804270c89d8280",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get Repo Data\n",
     "repo_data = []\n",
     "for repo in repos:\n",
     "    # get license\n",
@@ -171,7 +190,17 @@
     "        \"has_readthedocs\": readthedocs_project is not None,\n",
     "        \"has_readme\": readme_file is not None,\n",
     "        \"_repo\": repo,\n",
-    "    })\n",
+    "    })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "55ae039e9f91e18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initial data frames\n",
     "\n",
     "df = pd.DataFrame(repo_data)\n",
     "df_repos = df[\n",
@@ -182,12 +211,20 @@
     "    (~df['archived']) &\n",
     "    (~df['fork']) &\n",
     "    (~df['topics'].apply(lambda topics: 'package-manager' in topics))\n",
-    "]\n",
-    "\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "466d0d2fec2e1828",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initial Results\n",
     "print(f'Total Repositories: {len(repo_data)}')\n",
     "print(f'Archived Repositories: {df[\"archived\"].sum()}')\n",
     "print(f'Forked Repositories: {df[\"fork\"].sum()}')\n",
-    "\n",
     "print(f'Total Open Issues: {df[\"issues\"].apply(len).sum()}')\n",
     "print(f'Total Open PRs: {df[\"prs\"].apply(len).sum()}')\n",
     "print(f'Open issues in active repositories: {df_repos[\"issues\"].apply(len).sum()}')\n",
@@ -236,30 +273,33 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a08398efe666a399",
+   "id": "6ce69f164706dacf",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Star History\n",
+    "# Star History Data\n",
     "stargazer_data = []\n",
     "for repo in df_repos.to_dict('records'):\n",
     "    stargazers = repo['_repo'].get_stargazers_with_dates()\n",
     "    for stargazer in stargazers:\n",
     "        stargazer_data.append({\n",
     "            \"repo\": repo['repo'],\n",
     "            \"date\": stargazer.starred_at,\n",
-    "        })\n",
-    "\n",
-    "# Convert to DataFrame\n",
+    "        })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a08398efe666a399",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Star History Visuals\n",
     "df_stargazers = pd.DataFrame(stargazer_data)\n",
-    "\n",
-    "# Sort by date\n",
     "df_stargazers = df_stargazers.sort_values(by=\"date\")\n",
-    "\n",
-    "# Calculate cumulative stars\n",
     "df_stargazers[\"cumulative_stars\"] = df_stargazers.groupby(\"repo\").cumcount() + 1\n",
     "\n",
-    "# Visualize using Plotly\n",
     "fig = px.line(\n",
     "    df_stargazers,\n",
     "    x=\"date\",\n",
@@ -268,7 +308,6 @@
     "    title=\"Star History\",\n",
     "    labels={\"date\": \"Date\", \"cumulative_stars\": \"Cumulative Stars\"},\n",
     ")\n",
-    "\n",
     "fig.show()"
    ]
   },
@@ -360,11 +399,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4f473fe7a839b01c",
+   "id": "3bde60fbd0b7e540",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Open PRs\n",
+    "# PR Data\n",
     "pr_data = []\n",
     "for repo in df_repos.to_dict('records'):\n",
     "    draft_prs = 0\n",
@@ -385,15 +424,21 @@
     "        \"Draft\": draft_prs,\n",
     "        \"Ready for review\": non_draft_prs,\n",
     "        \"Dependency\": dependabot_prs,\n",
-    "    })\n",
-    "\n",
+    "    })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f473fe7a839b01c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Open PRs\n",
     "df_prs = pd.DataFrame(pr_data)\n",
     "df_prs['total_prs'] = df_prs[['Draft', 'Ready for review', 'Dependency']].sum(axis=1)\n",
-    "\n",
-    "# Sort by total PRs in descending order\n",
     "df_prs = df_prs.sort_values(by='total_prs', ascending=False)\n",
     "\n",
-    "# Visualize data using a stacked bar chart\n",
     "fig = px.bar(\n",
     "    df_prs,\n",
     "    x='repo',\n",
@@ -427,7 +472,6 @@
     "# License distribution\n",
     "license_counts = df_repos.groupby(['license', 'repo']).size().reset_index(name='count')\n",
     "\n",
-    "# Create a nested treemap for license distribution with repo names in hover data\n",
     "fig_treemap = px.treemap(\n",
     "    license_counts,\n",
     "    path=['license', 'repo'],\n",
@@ -484,27 +528,35 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "20e09d93eda0478a",
+   "id": "2cf8ff484b1639d",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Programming Languages by Bytes of Code\n",
+    "# Programming language data\n",
     "language_data = []\n",
     "for repo in df_repos.to_dict('records'):\n",
     "    for language, bytes_of_code in repo['languages'].items():\n",
     "        language_data.append({\n",
     "            \"repo\": repo['repo'],\n",
     "            \"language\": language,\n",
     "            \"bytes_of_code\": bytes_of_code,\n",
-    "        })\n",
-    "\n",
+    "        })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "20e09d93eda0478a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Programming Languages\n",
     "df_languages = pd.DataFrame(language_data)\n",
     "\n",
     "# Aggregate data by language and repo\n",
     "language_counts_bytes = df_languages.groupby(['language', 'repo']).agg({\n",
     "    'bytes_of_code': 'sum'\n",
     "}).reset_index()\n",
-    "\n",
     "language_counts_repos = df_languages.groupby(['language', 'repo']).size().reset_index(name='repo_count')\n",
     "\n",
     "def create_language_figures(counts: pd.DataFrame, path_key: str, value_key: str):\n",
@@ -560,7 +612,6 @@
     "            }\n",
     "        ]\n",
     "    )\n",
-    "\n",
     "    fig.show()"
    ]
   },
@@ -575,24 +626,29 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ec43a8fd7f9d49bb",
+   "id": "e7ba218e5863deb7",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Docs\n",
-    "\n",
-    "# Initialize an empty list\n",
+    "# Docs data\n",
     "docs_data = []\n",
     "for repo in df_repos.to_dict('records'):\n",
     "    docs_data.append({\n",
     "        \"repo\": repo['repo'],\n",
     "        \"has_readme\": repo['has_readme'],\n",
     "        \"has_readthedocs\": repo['has_readthedocs'],\n",
-    "    })\n",
-    "\n",
+    "    })"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec43a8fd7f9d49bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Docs\n",
     "df_docs = pd.DataFrame(docs_data)\n",
-    "\n",
-    "# Aggregate data by has_readme/has_readthedocs and repo\n",
     "readme_counts = df_docs.groupby(['has_readme', 'repo']).size().reset_index(name='repo_count')\n",
     "readthedocs_counts = df_docs.groupby(['has_readthedocs', 'repo']).size().reset_index(name='repo_count')\n",
     "\n",
@@ -645,7 +701,6 @@
     "            }\n",
     "        ]\n",
     "    )\n",
-    "\n",
     "    fig.show()"
    ]
   }