-
Notifications
You must be signed in to change notification settings - Fork 22
133 lines (124 loc) · 5.62 KB
/
ingest-test-fixtures-update-pr.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
name: Ingest Test Fixtures Update PR
on:
workflow_dispatch:
env:
PYTHON_VERSION: "3.10"
permissions:
id-token: write
contents: read
jobs:
setup:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/base-cache
with:
python-version: ${{ env.PYTHON_VERSION }}
check-only: 'true'
update-fixtures-and-pr:
runs-on: ubuntu-latest-m
needs: [setup]
steps:
# actions/checkout MUST come before auth
- uses: 'actions/checkout@v4'
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Get full Python version
id: full-python-version
run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT
- name: Setup virtual environment
uses: ./.github/actions/base-cache
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Update test fixtures
env:
AIRTABLE_PERSONAL_ACCESS_TOKEN: ${{ secrets.AIRTABLE_PERSONAL_ACCESS_TOKEN }}
BOX_APP_CONFIG: ${{ secrets.BOX_APP_CONFIG }}
CONFLUENCE_API_TOKEN: ${{ secrets.CONFLUENCE_API_TOKEN }}
CONFLUENCE_USER_EMAIL: ${{ secrets.CONFLUENCE_USER_EMAIL }}
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
DROPBOX_APP_KEY: ${{ secrets.DROPBOX_APP_KEY }}
DROPBOX_APP_SECRET: ${{ secrets.DROPBOX_APP_SECRET }}
DROPBOX_REFRESH_TOKEN: ${{ secrets.DROPBOX_REFRESH_TOKEN }}
GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }}
GH_READ_ONLY_ACCESS_TOKEN: ${{ secrets.GH_READ_ONLY_ACCESS_TOKEN }}
HUBSPOT_API_TOKEN: ${{ secrets.HUBSPOT_API_TOKEN }}
JIRA_INGEST_API_TOKEN: ${{ secrets.JIRA_INGEST_API_TOKEN }}
JIRA_INGEST_USER_EMAIL: ${{ secrets.JIRA_INGEST_USER_EMAIL }}
MONGODB_URI: ${{ secrets.MONGODB_URI }}
MONGODB_DATABASE_NAME: ${{ secrets.MONGODB_DATABASE_NAME }}
MS_CLIENT_CRED: ${{ secrets.MS_CLIENT_CRED }}
MS_CLIENT_ID: ${{ secrets.MS_CLIENT_ID }}
MS_TENANT_ID: ${{ secrets.MS_TENANT_ID }}
MS_USER_EMAIL: ${{ secrets.MS_USER_EMAIL }}
MS_USER_PNAME: ${{ secrets.MS_USER_PNAME }}
SALESFORCE_USERNAME: ${{secrets.SALESFORCE_USERNAME}}
SALESFORCE_CONSUMER_KEY: ${{secrets.SALESFORCE_CONSUMER_KEY}}
SALESFORCE_PRIVATE_KEY: ${{secrets.SALESFORCE_PRIVATE_KEY}}
SHAREPOINT_CLIENT_ID: ${{secrets.SHAREPOINT_CLIENT_ID}}
SHAREPOINT_CRED: ${{secrets.SHAREPOINT_CRED}}
SHAREPOINT_SITE: ${{secrets.SHAREPOINT_SITE}}
SHAREPOINT_PERMISSIONS_APP_ID: ${{secrets.SHAREPOINT_PERMISSIONS_APP_ID}}
SHAREPOINT_PERMISSIONS_APP_CRED: ${{secrets.SHAREPOINT_PERMISSIONS_APP_CRED}}
SHAREPOINT_PERMISSIONS_TENANT: ${{secrets.SHAREPOINT_PERMISSIONS_TENANT}}
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
UNS_API_KEY: ${{ secrets.UNS_API_KEY }}
UNS_PAID_API_KEY: ${{ secrets.UNS_PAID_API_KEY }}
NOTION_API_KEY: ${{ secrets.NOTION_API_KEY }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }}
AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OCTOAI_API_KEY: ${{ secrets.OCTOAI_API_KEY }}
MXBAI_API_KEY: ${{ secrets.MXBAI_API_KEY }}
TABLE_OCR: "tesseract"
OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract"
OVERWRITE_FIXTURES: "true"
CI: "true"
run: |
source .venv/bin/activate
sudo apt-get update
sudo apt-get install -y libmagic-dev poppler-utils libreoffice
make install-pandoc
sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
sudo apt-get install -y tesseract-ocr
sudo apt-get install -y tesseract-ocr-kor
tesseract --version
sudo make install-docker-compose
docker compose version
./test_e2e/test-src.sh
- name: Save branch name to environment file
id: branch
run: |
original_branch=$(git rev-parse --abbrev-ref HEAD)
suffix="|ingest-test-fixtures-update-$(git rev-parse --short HEAD)"
branch_name="$original_branch$suffix"
echo "BRANCH_NAME=$branch_name" >> $GITHUB_ENV
- name: Save PR name to environment file
id: pr
run: |
commit_sha=$(git rev-parse HEAD)
prs=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
"https://api.github.com/repos/${{ github.repository }}/commits/${commit_sha}/pulls")
pr_name=$(echo "$prs" | jq -r '.[0].title')
echo "PR_NAME=$pr_name" >> $GITHUB_ENV
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.GH_CREATE_PR_TOKEN }}
add-paths: |
test_e2e/expected-structured-output
test_e2e/metrics
commit-message: "Update ingest test fixtures"
branch: ${{ env.BRANCH_NAME }}
title: "${{ env.PR_NAME }} <- Ingest test fixtures update"
assignees: ${{ github.actor }}
reviewers: ${{ github.actor }}
delete-branch: true
body: |
This pull request includes updated ingest test fixtures.
Please review and merge if appropriate.
base: ${{ github.head_ref }}