Skip to content

Commit

Permalink
Implement file size limit + handle error + send email
Browse files Browse the repository at this point in the history
  • Loading branch information
cslzchen committed Nov 9, 2023
1 parent 4e09e0e commit 42e45b8
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 22 deletions.
1 change: 1 addition & 0 deletions addons/boa/boa_error_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ class BoaErrorCode(IntEnum):
UPLOAD_ERROR_CONFLICT = 3 # Fail to upload the output to OSF because file already exists
UPLOAD_ERROR_OTHER = 4 # Fail to upload the output to OSF due to reasons other than ``UPLOAD_ERROR_CONFLICT``
OUTPUT_ERROR = 5 # Fail to retrieve the output after Boa job has finished
SUBMISSION_ERROR_FILE_TOO_LARGE = 6 # Fail to submit to boa due to query file too large
6 changes: 4 additions & 2 deletions addons/boa/settings/defaults.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
DEFAULT_HOSTS = []
USE_SSL = True

# Max file size permitted by frontend in megabytes
MAX_UPLOAD_SIZE = 512
# Note: not applicable to the Boa addon
MAX_UPLOAD_SIZE = 512 # 512 MB

MAX_SUBMISSION_SIZE = 512 * 1024 # 512 KB

# Suffix to replace '.boa' for the output file
OUTPUT_FILE_SUFFIX = '_results.txt'
Expand Down
54 changes: 35 additions & 19 deletions addons/boa/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@


@celery_app.task(name='addons.boa.tasks.submit_to_boa')
def submit_to_boa(host, username, password, user_guid, project_guid, query_dataset,
query_file_name, file_full_path, query_download_url, output_upload_url):
def submit_to_boa(host, username, password, user_guid, project_guid,
query_dataset, query_file_name, file_size, file_full_path,
query_download_url, output_upload_url):
"""
Download Boa query file, submit it to Boa API, wait for Boa to finish the job
and upload result output to OSF. Send success / failure emails notifications.
Expand All @@ -36,12 +37,14 @@ def submit_to_boa(host, username, password, user_guid, project_guid, query_datas
* Running asyncio in celery is tricky. Refer to the discussion below for details:
* https://stackoverflow.com/questions/39815771/how-to-combine-celery-with-asyncio
"""
return async_to_sync(submit_to_boa_async)(host, username, password, user_guid, project_guid, query_dataset,
query_file_name, file_full_path, query_download_url, output_upload_url)
return async_to_sync(submit_to_boa_async)(host, username, password, user_guid, project_guid,
query_dataset, query_file_name, file_size, file_full_path,
query_download_url, output_upload_url)


async def submit_to_boa_async(host, username, password, user_guid, project_guid, query_dataset,
query_file_name, file_full_path, query_download_url, output_upload_url):
async def submit_to_boa_async(host, username, password, user_guid, project_guid,
query_dataset, query_file_name, file_size, file_full_path,
query_download_url, output_upload_url):
"""
Download Boa query file, submit it to Boa API, wait for Boa to finish the job
and upload result output to OSF. Send success / failure emails notifications.
Expand All @@ -57,6 +60,15 @@ async def submit_to_boa_async(host, username, password, user_guid, project_guid,
project_url = f'{osf_settings.DOMAIN}{project_guid}/'
output_file_name = query_file_name.replace('.boa', boa_settings.OUTPUT_FILE_SUFFIX)

if file_size > boa_settings.MAX_SUBMISSION_SIZE:
message = f'Boa query file too large to submit: user=[{user_guid}], project=[{project_guid}], ' \
f'file_name=[{query_file_name}], file_size=[{file_size}], ' \
f'full_path=[{file_full_path}], url=[{query_download_url}] ...'
await sync_to_async(handle_boa_error)(message, BoaErrorCode.SUBMISSION_ERROR_FILE_TOO_LARGE,
user.username, user.fullname, project_url, file_full_path,
query_file_name=query_file_name, file_size=file_size)
return BoaErrorCode.SUBMISSION_ERROR_FILE_TOO_LARGE

logger.debug(f'Downloading Boa query file: user=[{user_guid}], project=[{project_guid}], '
f'file_name=[{query_file_name}], full_path=[{file_full_path}], url=[{query_download_url}] ...')
download_request = request.Request(query_download_url)
Expand All @@ -66,8 +78,8 @@ async def submit_to_boa_async(host, username, password, user_guid, project_guid,
except (ValueError, HTTPError, URLError, HTTPException):
message = f'Failed to download Boa query file: user=[{user_guid}], project=[{project_guid}], ' \
f'file_name=[{query_file_name}], full_path=[{file_full_path}], url=[{query_download_url}] ...'
await sync_to_async(handle_boa_error)(message, BoaErrorCode.UNKNOWN, user.username, user.fullname, project_url,
file_full_path, query_file_name=query_file_name)
await sync_to_async(handle_boa_error)(message, BoaErrorCode.UNKNOWN, user.username, user.fullname,
project_url, file_full_path, query_file_name=query_file_name)
return BoaErrorCode.UNKNOWN
logger.info('Boa query successfully downloaded.')
logger.debug(f'Boa query:\n########\n{boa_query}\n########')
Expand All @@ -80,8 +92,8 @@ async def submit_to_boa_async(host, username, password, user_guid, project_guid,
except BoaException:
# Don't call `client.close()`, since it will fail with `BoaException` if `client.login()` fails
message = f'Boa login failed: boa_username=[{username}], boa_host=[{host}]!'
await sync_to_async(handle_boa_error)(message, BoaErrorCode.AUTHN_ERROR, user.username, user.fullname, project_url,
file_full_path, query_file_name=query_file_name)
await sync_to_async(handle_boa_error)(message, BoaErrorCode.AUTHN_ERROR, user.username, user.fullname,
project_url, file_full_path, query_file_name=query_file_name)
return BoaErrorCode.AUTHN_ERROR
logger.info('Boa login completed.')

Expand All @@ -91,8 +103,8 @@ async def submit_to_boa_async(host, username, password, user_guid, project_guid,
except BoaException:
client.close()
message = f'Failed to retrieve or verify the target Boa dataset: dataset=[{query_dataset}]!'
await sync_to_async(handle_boa_error)(message, BoaErrorCode.UNKNOWN, user.username, user.fullname, project_url,
file_full_path, query_file_name=query_file_name)
await sync_to_async(handle_boa_error)(message, BoaErrorCode.UNKNOWN, user.username, user.fullname,
project_url, file_full_path, query_file_name=query_file_name)
return BoaErrorCode.UNKNOWN
logger.info('Boa dataset retrieved.')

Expand All @@ -102,8 +114,8 @@ async def submit_to_boa_async(host, username, password, user_guid, project_guid,
except BoaException:
client.close()
message = f'Failed to submit the query to Boa API: : boa_host=[{host}], dataset=[{query_dataset}]!'
await sync_to_async(handle_boa_error)(message, BoaErrorCode.UNKNOWN, user.username, user.fullname, project_url,
file_full_path, query_file_name=query_file_name)
await sync_to_async(handle_boa_error)(message, BoaErrorCode.UNKNOWN, user.username, user.fullname,
project_url, file_full_path, query_file_name=query_file_name)
return BoaErrorCode.UNKNOWN
logger.info('Query successfully submitted.')
logger.debug(f'Waiting for job to finish: job_id=[{str(boa_job.id)}] ...')
Expand All @@ -114,14 +126,16 @@ async def submit_to_boa_async(host, username, password, user_guid, project_guid,
if boa_job.compiler_status is CompilerStatus.ERROR:
client.close()
message = f'Boa job failed with compile error: job_id=[{str(boa_job.id)}]!'
await sync_to_async(handle_boa_error)(message, BoaErrorCode.QUERY_ERROR, user.username, user.fullname, project_url,
file_full_path, query_file_name=query_file_name, job_id=boa_job.id)
await sync_to_async(handle_boa_error)(message, BoaErrorCode.QUERY_ERROR, user.username,
user.fullname, project_url, file_full_path,
query_file_name=query_file_name, job_id=boa_job.id)
return BoaErrorCode.QUERY_ERROR
elif boa_job.exec_status is ExecutionStatus.ERROR:
client.close()
message = f'Boa job failed with execution error: job_id=[{str(boa_job.id)}]!'
await sync_to_async(handle_boa_error)(message, BoaErrorCode.QUERY_ERROR, user.username, user.fullname, project_url,
file_full_path, query_file_name=query_file_name, job_id=boa_job.id)
await sync_to_async(handle_boa_error)(message, BoaErrorCode.QUERY_ERROR, user.username,
user.fullname, project_url, file_full_path,
query_file_name=query_file_name, job_id=boa_job.id)
return BoaErrorCode.QUERY_ERROR
else:
try:
Expand Down Expand Up @@ -178,7 +192,7 @@ async def submit_to_boa_async(host, username, password, user_guid, project_guid,


def handle_boa_error(message, code, username, fullname, project_url, query_file_full_path,
query_file_name=None, output_file_name=None, job_id=None):
query_file_name=None, file_size=None, output_file_name=None, job_id=None):
"""Handle Boa and WB API errors and send emails.
"""
logger.error(message)
Expand All @@ -190,6 +204,8 @@ def handle_boa_error(message, code, username, fullname, project_url, query_file_
code=code,
message=message,
query_file_name=query_file_name,
file_size=file_size,
max_file_size=boa_settings.MAX_SUBMISSION_SIZE,
query_file_full_path=query_file_full_path,
output_file_name=output_file_name,
job_id=job_id,
Expand Down
3 changes: 2 additions & 1 deletion addons/boa/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def boa_submit_job(node_addon, **kwargs):

# Query file
file_name = req_params['data']['name']
file_size = req_params['data']['sizeInt']
file_full_path = req_params['data']['materialized']
file_download_url = req_params['data']['links']['download'].replace(osf_settings.WATERBUTLER_URL,
osf_settings.WATERBUTLER_INTERNAL_URL)
Expand All @@ -107,6 +108,6 @@ def boa_submit_job(node_addon, **kwargs):

# Send to task ``submit_to_boa``
enqueue_task(submit_to_boa.s(host, username, password, user_guid, project_guid, dataset,
file_name, file_full_path, file_download_url, output_upload_url))
file_name, file_size, file_full_path, file_download_url, output_upload_url))

return {}
4 changes: 4 additions & 0 deletions website/templates/emails/addons_boa_job_failure.html.mako
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
A common cause of this failure is that the output is empty. Visit <a href="${boa_job_list_url}">Boa's job list page</a> to check if the output is empty. <br>
<br>
If you believe this is in error, contact Boa Support at <a href="mailto:${boa_support_email}">${boa_support_email}</a>. <br>
% elif code == 6:
OSF cannot submit your query file to Boa since it is too large: [${file_size} Bytes] is over the maximum allowed threshold [${max_file_size} Bytes]. <br>
<br>
If you believe this is in error, contact Boa Support at <a href="mailto:${boa_support_email}">${boa_support_email}</a>. <br>
% else:
OSF encountered an unexpected error when connecting to Boa. Please try again later. <br>
<br>
Expand Down

0 comments on commit 42e45b8

Please sign in to comment.