From d59af7a31df7b22478d98bfd6c47256a4a19b91d Mon Sep 17 00:00:00 2001 From: NickSwainston Date: Mon, 1 Apr 2019 23:09:46 +1100 Subject: [PATCH 1/7] Removing urllib 2 so that it can be used by python 3. Still getting errors expecting byte-lie not strong --- scripts/voltdownload.py | 78 ++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/scripts/voltdownload.py b/scripts/voltdownload.py index 0eb0d8d..a0561bc 100755 --- a/scripts/voltdownload.py +++ b/scripts/voltdownload.py @@ -25,15 +25,21 @@ import time import json import threading -import urllib2 -import urllib +import urllib.request import base64 import time import datetime import calendar from optparse import OptionParser -from multiprocessing import Queue -from Queue import Empty +from queue import Empty, Queue +import logging + +# set up the logger for stand-alone execution +logger = logging.getLogger(__name__) +ch = logging.StreamHandler() +formatter = logging.Formatter('%(asctime)s %(name)s %(lineno)-4d %(levelname)-7s :: %(message)s') +ch.setFormatter(formatter) +logger.addHandler(ch) username = 'ngas' password = 'ngas' @@ -56,18 +62,18 @@ def file_error(err): global ERRORS with LOCK: ERRORS.append(err) - print(err) + logging.error(err) def file_starting(filename): with LOCK: - print('%s [INFO] Downloading %s' % (time.strftime('%c'), filename)) + logging.info('Downloading %s' % (filename)) def file_complete(filename): global COMPLETE global TOTAL_FILES with LOCK: COMPLETE = COMPLETE + 1 - print('%s [INFO] %s complete [%d of %d]' % (time.strftime('%c'), filename, + logging.info('%s complete [%d of %d]' % (filename, COMPLETE, TOTAL_FILES)) def split_raw_recombined(filename): @@ -156,11 +162,8 @@ def query_observation(obs, host, filetype, timefrom, duration): processRange = True response = None - try: - url = 'http://%s/metadata/obs/?obs_id=%s&nocache' % (host, str(obs)) - request = urllib2.Request(url) - response = urllib2.urlopen(request) - + url = 'http://%s/metadata/obs/?obs_id=%s&nocache' % (host, str(obs)) + with urllib.request.urlopen(url) as response: resultbuffer = [] while True: result = response.read(32768) @@ -169,10 +172,10 @@ def query_observation(obs, host, filetype, timefrom, duration): resultbuffer.append(result) keymap = {} - files = json.loads(''.join(resultbuffer))['files'] + files = json.loads(b''.join(resultbuffer))['files'] if processRange: second = None - for f, v in files.iteritems(): + for f, v in files.items(): ft = v['filetype'] size = v['size'] add = False @@ -197,7 +200,7 @@ def query_observation(obs, host, filetype, timefrom, duration): keymap[f] = size else: - for f, v in files.iteritems(): + for f, v in files.items(): ft = v['filetype'] size = v['size'] if filetype == 11 and ft == 11: @@ -211,9 +214,6 @@ def query_observation(obs, host, filetype, timefrom, duration): return keymap - finally: - if response: - response.close() def check_complete(filename, size, dir): @@ -243,18 +243,18 @@ def download_worker(url, filename, size, out, bufsize, prestage): try: file_starting(filename) - request = urllib2.Request(url) + request = urllib.request.Request(url) base64string = base64.encodestring('%s:%s' % (username, password)).replace('\n', '') request.add_header('Authorization', 'Basic %s' % base64string) request.add_header('prestagefilelist', prestage) - u = urllib2.urlopen(request) + u = urllib.request.urlopen(request) u.fp.bufsize = bufsize file_size = int(u.headers['Content-Length']) file_size_dl = 0 - with open(out + filename, 'wb') as f: + with open(out + filename, 'wb').write(bytes_) as f: while True: buff = u.read(bufsize) if not buff: @@ -268,17 +268,17 @@ def download_worker(url, filename, size, out, bufsize, prestage): file_complete(filename) - except urllib2.HTTPError as e: - file_error('%s [ERROR] %s %s' % (time.strftime('%c'), filename, str(e.read()) )) + except urllib.error.HTTPError as e: + file_error('%s %s' % (filename, str(e.read()) )) - except urllib2.URLError as urlerror: + except urllib.error.URLError as urlerror: if hasattr(urlerror, 'reason'): - file_error('%s [ERROR] %s %s' % (time.strftime('%c'), filename, str(urlerror.reason) )) + file_error('%s %s' % (filename, str(urlerror.reason) )) else: - file_error('%s [ERROR] %s %s' % (time.strftime('%c'), filename, str(urlerror) )) + file_error('%s %s' % (filename, str(urlerror) )) except Exception as exp: - file_error('%s [ERROR] %s %s' % (time.strftime('%c'), filename, str(exp) )) + file_error('%s %s' % (filename, str(exp) )) finally: if u: @@ -335,25 +335,24 @@ def main(): print('Number of simultaneous downloads must be > 0 and <= 12') sys.exit(-1) - print('%s [INFO] Finding observation %s' % (time.strftime('%c'), options.obs)) + logger.info('Finding observation %s' % options.obs) fileresult = query_observation(options.obs, 'mwa-metadata01.pawsey.org.au', options.filetype, options.timefrom, options.duration) if len(fileresult) <= 0: - print('%s [INFO] No files found for observation %s and file type %s' % (time.strftime('%c'), - options.obs, - int(options.filetype))) + logger.info('No files found for observation %s and file type %s' % options.obs, + int(options.filetype)) sys.exit(1) print('%s [INFO] Found %s files' % (time.strftime('%c'), str(len(fileresult)))) if len(fileresult) > 12000: - print('%s [INFO] File limit exceeded 12000, please stagger your download' % (time.strftime('%c'))) + logger.error('File limit exceeded 12000, please stagger your download') sys.exit(1) # advise that we want to prestage all the files filelist = [] - for key, value in fileresult.iteritems(): + for key, value in fileresult.items(): filelist.append(key) prestage_files = json.dumps(filelist) @@ -375,12 +374,13 @@ def main(): for filename, filesize in sorted(fileresult.items()): url = 'http://%s/RETRIEVE?file_id=%s' % (options.ngashost, filename) if not check_complete(filename, int(filesize), dir): - download_queue.put((url, filename, filesize, dir, bufsize, prestage_files)) + download_queue.put((url, filename, filesize, dir, + bufsize, prestage_files)) continue file_complete(filename) threads = [] - for t in xrange(numdownload): + for t in range(numdownload): t = threading.Thread(target = download_queue_thread, args = (download_queue,)) t.setDaemon(True) threads.append(t) @@ -390,15 +390,15 @@ def main(): while t.isAlive(): t.join(timeout = 0.25) - print('%s [INFO] File Transfer Complete.' % (time.strftime('%c'))) + logger.info('File Transfer Complete.') if ERRORS: - print('%s [INFO] File Transfer Error Summary:' % (time.strftime('%c'))) + logger.error('File Transfer Error Summary:') for i in ERRORS: - print(i) + logger.error(i) raise Exception() else: - print('%s [INFO] File Transfer Success.' % (time.strftime('%c'))) + logger.info('File Transfer Success.') if __name__ == '__main__': From 6cf4952e3a073bebeceeae163356bda369a7cf29 Mon Sep 17 00:00:00 2001 From: NickSwainston Date: Wed, 3 Apr 2019 13:26:48 +1100 Subject: [PATCH 2/7] Made the logging formatting mroe readable and consistent --- scripts/voltdownload.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/voltdownload.py b/scripts/voltdownload.py index a0561bc..23e16af 100755 --- a/scripts/voltdownload.py +++ b/scripts/voltdownload.py @@ -35,11 +35,13 @@ import logging # set up the logger for stand-alone execution +logging.basicConfig(format='%(asctime)s l %(lineno)-4d [%(levelname)s] :: %(message)s', + datefmt='%a %b %d %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) -ch = logging.StreamHandler() -formatter = logging.Formatter('%(asctime)s %(name)s %(lineno)-4d %(levelname)-7s :: %(message)s') -ch.setFormatter(formatter) -logger.addHandler(ch) +#ch = logging.StreamHandler() +#formatter = logging.Formatter('%(asctime)s %(name)s %(lineno)-4d %(levelname)-7s :: %(message)s') +#ch.setFormatter(formatter) +#logger.addHandler(ch) username = 'ngas' password = 'ngas' @@ -244,7 +246,7 @@ def download_worker(url, filename, size, out, bufsize, prestage): file_starting(filename) request = urllib.request.Request(url) - base64string = base64.encodestring('%s:%s' % (username, password)).replace('\n', '') + base64string = base64.encodestring(('%s:%s' % (username,password)).encode()).decode().replace('\n', '') request.add_header('Authorization', 'Basic %s' % base64string) request.add_header('prestagefilelist', prestage) @@ -344,7 +346,7 @@ def main(): int(options.filetype)) sys.exit(1) - print('%s [INFO] Found %s files' % (time.strftime('%c'), str(len(fileresult)))) + logger.info('Found %s files' % (str(len(fileresult)))) if len(fileresult) > 12000: logger.error('File limit exceeded 12000, please stagger your download') From effc2f00ec025f1b5faf16e29a61cb4bbeefb936 Mon Sep 17 00:00:00 2001 From: NickSwainston Date: Wed, 3 Apr 2019 11:04:15 +0800 Subject: [PATCH 3/7] Fixed bugs in the python 3 conversion. Now works on Galaxy --- scripts/voltdownload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/voltdownload.py b/scripts/voltdownload.py index 23e16af..160eaf3 100755 --- a/scripts/voltdownload.py +++ b/scripts/voltdownload.py @@ -174,7 +174,7 @@ def query_observation(obs, host, filetype, timefrom, duration): resultbuffer.append(result) keymap = {} - files = json.loads(b''.join(resultbuffer))['files'] + files = json.loads(b''.join(resultbuffer).decode('utf-8'))['files'] if processRange: second = None for f, v in files.items(): @@ -256,7 +256,7 @@ def download_worker(url, filename, size, out, bufsize, prestage): file_size = int(u.headers['Content-Length']) file_size_dl = 0 - with open(out + filename, 'wb').write(bytes_) as f: + with open(out + filename, 'wb') as f: while True: buff = u.read(bufsize) if not buff: From 830d006455daa059403cbe604018234443e3373c Mon Sep 17 00:00:00 2001 From: NickSwainston Date: Tue, 11 Jun 2019 14:13:41 +0800 Subject: [PATCH 4/7] Removed parse from the params passing in client.py. UNTESTED --- mwa_pulsar_client/mwa_pulsar_client/client.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mwa_pulsar_client/mwa_pulsar_client/client.py b/mwa_pulsar_client/mwa_pulsar_client/client.py index 5875566..abffe2e 100644 --- a/mwa_pulsar_client/mwa_pulsar_client/client.py +++ b/mwa_pulsar_client/mwa_pulsar_client/client.py @@ -1,6 +1,6 @@ # -*- coding: utf8 -*- import os -import urllib +import urllib.parse import requests @@ -16,7 +16,7 @@ def detection_find_calibrator(addr, auth, **kwargs): path = '{0}/{1}/'.format(addr, 'detection_find_calibrator') r = requests.get(url=path, auth=auth, - params=urllib.urlencode(kwargs)) + params=urllib.parse.urlencode(kwargs)) r.raise_for_status() return r.json() @@ -32,7 +32,7 @@ def calibration_file_by_observation_id(addr, auth, **kwargs): path = '{0}/{1}/'.format(addr, 'calibration_file_by_observation_id') r = requests.get(url=path, auth=auth, - params=urllib.urlencode(kwargs)) + params=urllib.parse.urlencode(kwargs)) r.raise_for_status() return r.json() @@ -62,7 +62,7 @@ def calibrator_get(addr, auth, **kwargs): path = '{0}/{1}/'.format(addr, 'calibrator_get') r = requests.get(url=path, auth=auth, - params=urllib.urlencode(kwargs)) + params=urllib.parse.urlencode(kwargs)) r.raise_for_status() return r.json() @@ -107,7 +107,7 @@ def pulsar_get(addr, auth, **kwargs): path = '{0}/{1}/'.format(addr, 'pulsar_get') r = requests.get(url=path, auth=auth, - params=urllib.urlencode(kwargs)) + params=urllib.parse.urlencode(kwargs)) r.raise_for_status() return r.json() @@ -154,7 +154,7 @@ def detection_get(addr, auth, **kwargs): path = '{0}/{1}/'.format(addr, 'detection_get') r = requests.get(url=path, auth=auth, - params=urllib.urlencode(kwargs)) + params=urllib.parse.urlencode(kwargs)) r.raise_for_status() return r.json() @@ -270,7 +270,7 @@ def detection_file_download(addr, auth, filename, outputpath): params = {'filename': filename} r = requests.get(url=path, auth=auth, - params=urllib.urlencode(params), + params=urllib.parse.urlencode(params), stream=True) r.raise_for_status() @@ -330,7 +330,7 @@ def calibrator_file_download(addr, auth, filename, outputpath): params = {'filename': filename} r = requests.get(url=path, auth=auth, - params=urllib.urlencode(params), + params=urllib.parse.urlencode(params), stream=True) r.raise_for_status() @@ -362,6 +362,6 @@ def psrcat(addr, auth, pulsar): payload = {'name': pulsar, 'format': 'json'} r = requests.get(url=path, auth=auth, - params=urllib.urlencode(payload)) + params=urllib.parse.urlencode(payload)) r.raise_for_status() return r.json() From 4d1459442110bcbbeaa9cd9104a92cbd28bb4311 Mon Sep 17 00:00:00 2001 From: NickSwainston Date: Tue, 11 Jun 2019 16:16:22 +0800 Subject: [PATCH 5/7] Changed iteritems to items as is the new python3 function in client.py. Now works --- mwa_pulsar_client/mwa_pulsar_client/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mwa_pulsar_client/mwa_pulsar_client/client.py b/mwa_pulsar_client/mwa_pulsar_client/client.py index abffe2e..3c8ced9 100644 --- a/mwa_pulsar_client/mwa_pulsar_client/client.py +++ b/mwa_pulsar_client/mwa_pulsar_client/client.py @@ -246,7 +246,7 @@ def detection_file_upload(addr, auth, **kwargs): raise Exception('filepath not found') files = {'path': open(filepath, 'rb')} new_kwargs = {} - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): new_kwargs[k] = str(v) r = requests.post(url=path, auth=auth, files=files, headers=new_kwargs) r.raise_for_status() @@ -305,7 +305,7 @@ def calibrator_file_upload(addr, auth, **kwargs): raise Exception('filepath not found') files = {'path': open(filepath, 'rb')} new_kwargs = {} - for k, v in kwargs.iteritems(): + for k, v in kwargs.items(): new_kwargs[k] = str(v) r = requests.post(url=path, auth=auth, files=files, headers=new_kwargs) r.raise_for_status() From 9372f9ee509a071dfcb6aa8f8cd81f12b716a495 Mon Sep 17 00:00:00 2001 From: NickSwainston Date: Thu, 4 Jul 2019 12:46:11 +0800 Subject: [PATCH 6/7] Updated the shebag to use python 3 (necessary on Galaxy) --- scripts/voltdownload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/voltdownload.py b/scripts/voltdownload.py index 160eaf3..4baa912 100755 --- a/scripts/voltdownload.py +++ b/scripts/voltdownload.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # ICRAR - International Centre for Radio Astronomy Research # (c) UWA - The University of Western Australia, 2014 From 363e56993b8ad1a22855735392631c920bbe3731 Mon Sep 17 00:00:00 2001 From: NickSwainston Date: Thu, 9 Jan 2020 15:49:47 +0800 Subject: [PATCH 7/7] Updated the webservice url --- scripts/voltdownload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/voltdownload.py b/scripts/voltdownload.py index 4baa912..2ffd4d4 100755 --- a/scripts/voltdownload.py +++ b/scripts/voltdownload.py @@ -266,7 +266,7 @@ def download_worker(url, filename, size, out, bufsize, prestage): file_size_dl += len(buff) if file_size_dl != file_size: - raise Exception('size mismatch %s %s' % str(file_size), str(file_size_dl)) + raise Exception('size mismatch %s %s' % (str(file_size), str(file_size_dl))) file_complete(filename) @@ -339,7 +339,7 @@ def main(): logger.info('Finding observation %s' % options.obs) - fileresult = query_observation(options.obs, 'mwa-metadata01.pawsey.org.au', + fileresult = query_observation(options.obs, 'ws.mwatelescope.org', options.filetype, options.timefrom, options.duration) if len(fileresult) <= 0: logger.info('No files found for observation %s and file type %s' % options.obs,