Skip to content

Commit

Permalink
added PDF Generator API
Browse files Browse the repository at this point in the history
  • Loading branch information
janlukasschroeder committed Oct 14, 2024
1 parent 797017b commit 05b5846
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 3 deletions.
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ It includes:
- [SEC Filing Search and Full-Text Search API](#sec-edgar-filings-query-api)
- [Real-Time Filing Stream API](#sec-edgar-filings-real-time-stream-api)
- [Filing Download & PDF Render API](#filing-render--download-api)
- [PDF Generator API](#pdf-generator-api)

**Converter & Extractor APIs:**

Expand Down Expand Up @@ -175,6 +176,28 @@ with open("filename.pdf", "wb") as f:

> See the documentation for more details: https://sec-api.io/docs/sec-filings-render-api
## PDF Generator API

SEC filings, including Forms 10-K, 10-Q, 8-K, and others, are typically published in HTML, XML, or text formats. The PDF Generator API enables the conversion of any SEC filing or exhibit into a PDF file, preserving all original formatting, tables, images, and other elements from the filing.

```python
from sec_api import PdfGeneratorApi

pdfGeneratorApi = PdfGeneratorApi("YOUR_API_KEY")

# Form 8-K exhibit URL
edgar_file_url = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1320695/000132069520000148/ths12-31x201910krecast.htm"
# Form 10-K filing URL
# edgar_file_url = "https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm"

pdf_file = pdfGeneratorApi.get_pdf(edgar_file_url)

with open("filename.pdf", "wb") as f:
f.write(pdf_file)
```

> See the documentation for more details: https://sec-api.io/docs/sec-filings-render-api
## SEC EDGAR Filings Real-Time Stream API

The Stream API provides a live stream (aka feed) of newly published filings on SEC EDGAR via WebSockets. A new filing is sent to your connected client as soon as it is published.
Expand Down
19 changes: 19 additions & 0 deletions examples.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from sec_api.index import (
RenderApi,
PdfGeneratorApi,
XbrlApi,
ExtractorApi,
MappingApi,
Expand Down Expand Up @@ -43,6 +44,24 @@
f.write(binary_data)
# """

#
# PDF Generator API
#
"""
pdfGeneratorApi = PdfGeneratorApi("YOUR_API_KEY")
# Form 8-K exhibit URL
edgar_file_url = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1320695/000132069520000148/ths12-31x201910krecast.htm"
# Form 10-K filing URL
# edgar_file_url = "https://www.sec.gov/Archives/edgar/data/320193/000032019320000096/aapl-20200926.htm"
pdf_file = pdfGeneratorApi.get_pdf(edgar_file_url)
with open("filename.pdf", "wb") as f:
f.write(pdf_file)
# """


#
# XBRL-to-JSON API example
#
Expand Down
1 change: 1 addition & 0 deletions sec_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from sec_api.index import QueryApi
from sec_api.index import FullTextSearchApi
from sec_api.index import RenderApi
from sec_api.index import PdfGeneratorApi

# Extractor & Converter APIs
from sec_api.index import XbrlApi
Expand Down
36 changes: 34 additions & 2 deletions sec_api/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

query_api_endpoint = "https://api.sec-api.io"
full_text_search_api_endpoint = "https://api.sec-api.io/full-text-search"
render_api_endpoint = "https://archive.sec-api.io"
filing_download_api_endpoint = "https://archive.sec-api.io"
pdf_generator_api_endpoint = "https://api.sec-api.io/filing-reader"
xbrl_api_endpoint = "https://api.sec-api.io/xbrl-to-json"
extractor_api_endpoint = "https://api.sec-api.io/extractor"
#
Expand Down Expand Up @@ -102,7 +103,7 @@ class RenderApi:

def __init__(self, api_key, proxies=None):
self.api_key = api_key
self.api_endpoint = render_api_endpoint
self.api_endpoint = filing_download_api_endpoint
self.proxies = proxies if proxies else {}

def get_filing(self, url, return_binary=False):
Expand Down Expand Up @@ -146,6 +147,37 @@ def get_file(self, url, return_binary=False):
handle_api_error(response)


class PdfGeneratorApi:
"""
Base class for PDF Generator API
"""

def __init__(self, api_key, proxies=None):
self.api_key = api_key
self.api_endpoint = pdf_generator_api_endpoint
self.proxies = proxies if proxies else {}

def get_pdf(self, url):
response = {}
file_url = re.sub(r"ix\?doc=/", "", url)
_url = (
self.api_endpoint + "?type=pdf&url=" + file_url + "&token=" + self.api_key
)

# use backoff strategy to handle "too many requests" error.
for x in range(3):
response = requests.get(_url, proxies=self.proxies)
if response.status_code == 200:
return response.content
elif response.status_code == 429:
# wait 500 * (x + 1) milliseconds and try again
time.sleep(0.5 * (x + 1))
else:
handle_api_error(response)
else:
handle_api_error(response)


class XbrlApi:
"""
Base class for XBRL-to-JSON API
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="sec-api",
version="1.0.23",
version="1.0.24",
author="SEC API",
author_email="[email protected]",
description="SEC EDGAR Filings API",
Expand Down

0 comments on commit 05b5846

Please sign in to comment.