Skip to content

Commit

Permalink
fundemental factor model
Browse files Browse the repository at this point in the history
  • Loading branch information
thekioskman committed Apr 28, 2022
1 parent 61a9a6f commit 7b00d68
Show file tree
Hide file tree
Showing 4 changed files with 962 additions and 702 deletions.
399 changes: 399 additions & 0 deletions data_generation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,399 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Rank</th>\n",
" <th>Name</th>\n",
" <th>Symbol</th>\n",
" <th>marketcap</th>\n",
" <th>price (USD)</th>\n",
" <th>country</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Apple</td>\n",
" <td>AAPL</td>\n",
" <td>2613550252032</td>\n",
" <td>160.15</td>\n",
" <td>United States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Microsoft</td>\n",
" <td>MSFT</td>\n",
" <td>2075883208704</td>\n",
" <td>276.90</td>\n",
" <td>United States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>Alphabet (Google)</td>\n",
" <td>GOOG</td>\n",
" <td>1608236793856</td>\n",
" <td>2432.90</td>\n",
" <td>United States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>Amazon</td>\n",
" <td>AMZN</td>\n",
" <td>1463749312512</td>\n",
" <td>2878.34</td>\n",
" <td>United States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>Tesla</td>\n",
" <td>TSLA</td>\n",
" <td>1029417271296</td>\n",
" <td>996.04</td>\n",
" <td>United States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>730</th>\n",
" <td>731</td>\n",
" <td>Jump Networks</td>\n",
" <td>JUMPNET.NS</td>\n",
" <td>11669324</td>\n",
" <td>0.12</td>\n",
" <td>India</td>\n",
" </tr>\n",
" <tr>\n",
" <th>731</th>\n",
" <td>732</td>\n",
" <td>Cemtrex</td>\n",
" <td>CETX</td>\n",
" <td>10846338</td>\n",
" <td>0.44</td>\n",
" <td>United States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>732</th>\n",
" <td>733</td>\n",
" <td>Pareteum Corporation</td>\n",
" <td>TEUM</td>\n",
" <td>1426969</td>\n",
" <td>0.01</td>\n",
" <td>United States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>733</th>\n",
" <td>734</td>\n",
" <td>Justworks</td>\n",
" <td>JW</td>\n",
" <td>0</td>\n",
" <td>0.00</td>\n",
" <td>United States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>734</th>\n",
" <td>735</td>\n",
" <td>Rhodium Enterprises</td>\n",
" <td>RHDM</td>\n",
" <td>0</td>\n",
" <td>0.00</td>\n",
" <td>United States</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>735 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Rank Name Symbol marketcap price (USD) \\\n",
"0 1 Apple AAPL 2613550252032 160.15 \n",
"1 2 Microsoft MSFT 2075883208704 276.90 \n",
"2 3 Alphabet (Google) GOOG 1608236793856 2432.90 \n",
"3 4 Amazon AMZN 1463749312512 2878.34 \n",
"4 5 Tesla TSLA 1029417271296 996.04 \n",
".. ... ... ... ... ... \n",
"730 731 Jump Networks JUMPNET.NS 11669324 0.12 \n",
"731 732 Cemtrex CETX 10846338 0.44 \n",
"732 733 Pareteum Corporation TEUM 1426969 0.01 \n",
"733 734 Justworks JW 0 0.00 \n",
"734 735 Rhodium Enterprises RHDM 0 0.00 \n",
"\n",
" country \n",
"0 United States \n",
"1 United States \n",
"2 United States \n",
"3 United States \n",
"4 United States \n",
".. ... \n",
"730 India \n",
"731 United States \n",
"732 United States \n",
"733 United States \n",
"734 United States \n",
"\n",
"[735 rows x 6 columns]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import yfinance as yf\n",
"import os\n",
"from tqdm import tqdm\n",
"\n",
"tech_sector = pd.read_csv(\"Data/tech_sector.csv\")\n",
"tech_sector"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'c:\\\\Users\\\\brian\\\\projects\\\\factor_models\\\\Data/Raw'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parent_dir = os.path.join(os.getcwd() , \"Data/Raw\")\n",
"parent_dir"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 36%|███▌ | 263/735 [50:34<1:29:50, 11.42s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"- TIXT: CircuitBreaker 'redis' is OPEN and does not permit further calls\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 65%|██████▌ | 479/735 [1:31:30<50:07, 11.75s/it] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"- 112040.KQ: No data found for this date range, symbol may be delisted\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 84%|████████▍ | 621/735 [1:58:49<22:47, 12.00s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"- CSLT: No data found, symbol may be delisted\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 86%|████████▌ | 631/735 [1:59:37<08:25, 4.86s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"- CSPR: No data found, symbol may be delisted\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 91%|█████████ | 668/735 [2:02:28<05:09, 4.62s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"- TRIT: No data found, symbol may be delisted\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 99%|█████████▉| 730/735 [2:07:05<00:21, 4.31s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"- JUMPNET.NS: No data found, symbol may be delisted\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|█████████▉| 733/735 [2:07:19<00:09, 4.56s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"- JW: No data found, symbol may be delisted\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|█████████▉| 734/735 [2:07:25<00:04, 4.87s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"- RHDM: No data found, symbol may be delisted\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 735/735 [2:07:31<00:00, 10.41s/it]\n"
]
}
],
"source": [
"#grab the stock off of yahoo\n",
"for index, row in tqdm(tech_sector.iterrows(), total=len(tech_sector)):\n",
" stock = yf.Ticker(row[\"Symbol\"])\n",
"\n",
" #get the stocks return history\n",
" path = os.path.join(parent_dir, row[\"Symbol\"])\n",
" if not(os.path.isdir(path)):\n",
" os.mkdir(path)\n",
"\n",
" #put the data regarding the stock into this folder\n",
" stock_prices = stock.history(period=\"max\")\n",
" stock_prices.to_csv(path + \"/stock_prices.csv\")\n",
"\n",
" #put the fundemental data into the folder\n",
" stock_balance_sheet = stock.quarterly_balance_sheet\n",
" stock_balance_sheet.to_csv(path + \"/stock_balance_sheet.csv\")\n",
"\n",
" stock_sustainability = stock.sustainability\n",
" \n",
" if stock_sustainability is not None:\n",
" stock_sustainability.to_csv(path + \"/sustainability.csv\")\n",
"\n",
" stock_finacials = stock.quarterly_financials\n",
" stock_finacials.to_csv(path + \"/financials.csv\")\n",
"\n",
" stock_cashflow = stock.quarterly_cashflow\n",
" stock_cashflow.to_csv(path + \"/cashflow.csv\")\n",
"\n",
" stock_earning = stock.quarterly_earnings\n",
" stock_earning.to_csv(path + \"/earnings.csv\")\n",
"\n",
" stock_basicinfo = stock.info.items()\n",
" stock_basicinfo = list(stock_basicinfo)\n",
" stock_basic_df = pd.DataFrame(stock_basicinfo)\n",
" stock_basic_df = stock_basic_df.rename(columns={0: \"info\", 1: \"value\"})\n",
" stock_basic_df.to_csv(path + \"/basic_info.csv\")"
]
}
],
"metadata": {
"interpreter": {
"hash": "d2feefea56b0213a6ec2bb54630eea72d1e2849cc5d96656889c1540b99c0617"
},
"kernelspec": {
"display_name": "Python 3.9.8 ('venv_for_ml': venv)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.8"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 7b00d68

Please sign in to comment.