-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
61a9a6f
commit 7b00d68
Showing
4 changed files
with
962 additions
and
702 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,399 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style scoped>\n", | ||
" .dataframe tbody tr th:only-of-type {\n", | ||
" vertical-align: middle;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>Rank</th>\n", | ||
" <th>Name</th>\n", | ||
" <th>Symbol</th>\n", | ||
" <th>marketcap</th>\n", | ||
" <th>price (USD)</th>\n", | ||
" <th>country</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>1</td>\n", | ||
" <td>Apple</td>\n", | ||
" <td>AAPL</td>\n", | ||
" <td>2613550252032</td>\n", | ||
" <td>160.15</td>\n", | ||
" <td>United States</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>2</td>\n", | ||
" <td>Microsoft</td>\n", | ||
" <td>MSFT</td>\n", | ||
" <td>2075883208704</td>\n", | ||
" <td>276.90</td>\n", | ||
" <td>United States</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>3</td>\n", | ||
" <td>Alphabet (Google)</td>\n", | ||
" <td>GOOG</td>\n", | ||
" <td>1608236793856</td>\n", | ||
" <td>2432.90</td>\n", | ||
" <td>United States</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>4</td>\n", | ||
" <td>Amazon</td>\n", | ||
" <td>AMZN</td>\n", | ||
" <td>1463749312512</td>\n", | ||
" <td>2878.34</td>\n", | ||
" <td>United States</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>5</td>\n", | ||
" <td>Tesla</td>\n", | ||
" <td>TSLA</td>\n", | ||
" <td>1029417271296</td>\n", | ||
" <td>996.04</td>\n", | ||
" <td>United States</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>...</th>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" <td>...</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>730</th>\n", | ||
" <td>731</td>\n", | ||
" <td>Jump Networks</td>\n", | ||
" <td>JUMPNET.NS</td>\n", | ||
" <td>11669324</td>\n", | ||
" <td>0.12</td>\n", | ||
" <td>India</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>731</th>\n", | ||
" <td>732</td>\n", | ||
" <td>Cemtrex</td>\n", | ||
" <td>CETX</td>\n", | ||
" <td>10846338</td>\n", | ||
" <td>0.44</td>\n", | ||
" <td>United States</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>732</th>\n", | ||
" <td>733</td>\n", | ||
" <td>Pareteum Corporation</td>\n", | ||
" <td>TEUM</td>\n", | ||
" <td>1426969</td>\n", | ||
" <td>0.01</td>\n", | ||
" <td>United States</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>733</th>\n", | ||
" <td>734</td>\n", | ||
" <td>Justworks</td>\n", | ||
" <td>JW</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0.00</td>\n", | ||
" <td>United States</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>734</th>\n", | ||
" <td>735</td>\n", | ||
" <td>Rhodium Enterprises</td>\n", | ||
" <td>RHDM</td>\n", | ||
" <td>0</td>\n", | ||
" <td>0.00</td>\n", | ||
" <td>United States</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"<p>735 rows × 6 columns</p>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" Rank Name Symbol marketcap price (USD) \\\n", | ||
"0 1 Apple AAPL 2613550252032 160.15 \n", | ||
"1 2 Microsoft MSFT 2075883208704 276.90 \n", | ||
"2 3 Alphabet (Google) GOOG 1608236793856 2432.90 \n", | ||
"3 4 Amazon AMZN 1463749312512 2878.34 \n", | ||
"4 5 Tesla TSLA 1029417271296 996.04 \n", | ||
".. ... ... ... ... ... \n", | ||
"730 731 Jump Networks JUMPNET.NS 11669324 0.12 \n", | ||
"731 732 Cemtrex CETX 10846338 0.44 \n", | ||
"732 733 Pareteum Corporation TEUM 1426969 0.01 \n", | ||
"733 734 Justworks JW 0 0.00 \n", | ||
"734 735 Rhodium Enterprises RHDM 0 0.00 \n", | ||
"\n", | ||
" country \n", | ||
"0 United States \n", | ||
"1 United States \n", | ||
"2 United States \n", | ||
"3 United States \n", | ||
"4 United States \n", | ||
".. ... \n", | ||
"730 India \n", | ||
"731 United States \n", | ||
"732 United States \n", | ||
"733 United States \n", | ||
"734 United States \n", | ||
"\n", | ||
"[735 rows x 6 columns]" | ||
] | ||
}, | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"import yfinance as yf\n", | ||
"import os\n", | ||
"from tqdm import tqdm\n", | ||
"\n", | ||
"tech_sector = pd.read_csv(\"Data/tech_sector.csv\")\n", | ||
"tech_sector" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"'c:\\\\Users\\\\brian\\\\projects\\\\factor_models\\\\Data/Raw'" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"parent_dir = os.path.join(os.getcwd() , \"Data/Raw\")\n", | ||
"parent_dir" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
" 36%|███▌ | 263/735 [50:34<1:29:50, 11.42s/it]" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"- TIXT: CircuitBreaker 'redis' is OPEN and does not permit further calls\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
" 65%|██████▌ | 479/735 [1:31:30<50:07, 11.75s/it] " | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"- 112040.KQ: No data found for this date range, symbol may be delisted\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
" 84%|████████▍ | 621/735 [1:58:49<22:47, 12.00s/it]" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"- CSLT: No data found, symbol may be delisted\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
" 86%|████████▌ | 631/735 [1:59:37<08:25, 4.86s/it]" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"- CSPR: No data found, symbol may be delisted\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
" 91%|█████████ | 668/735 [2:02:28<05:09, 4.62s/it]" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"- TRIT: No data found, symbol may be delisted\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
" 99%|█████████▉| 730/735 [2:07:05<00:21, 4.31s/it]" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"- JUMPNET.NS: No data found, symbol may be delisted\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"100%|█████████▉| 733/735 [2:07:19<00:09, 4.56s/it]" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"- JW: No data found, symbol may be delisted\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"100%|█████████▉| 734/735 [2:07:25<00:04, 4.87s/it]" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"- RHDM: No data found, symbol may be delisted\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"100%|██████████| 735/735 [2:07:31<00:00, 10.41s/it]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"#grab the stock off of yahoo\n", | ||
"for index, row in tqdm(tech_sector.iterrows(), total=len(tech_sector)):\n", | ||
" stock = yf.Ticker(row[\"Symbol\"])\n", | ||
"\n", | ||
" #get the stocks return history\n", | ||
" path = os.path.join(parent_dir, row[\"Symbol\"])\n", | ||
" if not(os.path.isdir(path)):\n", | ||
" os.mkdir(path)\n", | ||
"\n", | ||
" #put the data regarding the stock into this folder\n", | ||
" stock_prices = stock.history(period=\"max\")\n", | ||
" stock_prices.to_csv(path + \"/stock_prices.csv\")\n", | ||
"\n", | ||
" #put the fundemental data into the folder\n", | ||
" stock_balance_sheet = stock.quarterly_balance_sheet\n", | ||
" stock_balance_sheet.to_csv(path + \"/stock_balance_sheet.csv\")\n", | ||
"\n", | ||
" stock_sustainability = stock.sustainability\n", | ||
" \n", | ||
" if stock_sustainability is not None:\n", | ||
" stock_sustainability.to_csv(path + \"/sustainability.csv\")\n", | ||
"\n", | ||
" stock_finacials = stock.quarterly_financials\n", | ||
" stock_finacials.to_csv(path + \"/financials.csv\")\n", | ||
"\n", | ||
" stock_cashflow = stock.quarterly_cashflow\n", | ||
" stock_cashflow.to_csv(path + \"/cashflow.csv\")\n", | ||
"\n", | ||
" stock_earning = stock.quarterly_earnings\n", | ||
" stock_earning.to_csv(path + \"/earnings.csv\")\n", | ||
"\n", | ||
" stock_basicinfo = stock.info.items()\n", | ||
" stock_basicinfo = list(stock_basicinfo)\n", | ||
" stock_basic_df = pd.DataFrame(stock_basicinfo)\n", | ||
" stock_basic_df = stock_basic_df.rename(columns={0: \"info\", 1: \"value\"})\n", | ||
" stock_basic_df.to_csv(path + \"/basic_info.csv\")" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"interpreter": { | ||
"hash": "d2feefea56b0213a6ec2bb54630eea72d1e2849cc5d96656889c1540b99c0617" | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3.9.8 ('venv_for_ml': venv)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.8" | ||
}, | ||
"orig_nbformat": 4 | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.