Skip to content

Commit

Permalink
Merge pull request #7 from refinery-platform/scottx611x/read_input_fr…
Browse files Browse the repository at this point in the history
…om_url

Scottx611x/read input from url
  • Loading branch information
scottx611x authored Apr 6, 2018
2 parents f1ab71b + 1af36bb commit 8979a69
Show file tree
Hide file tree
Showing 9 changed files with 406 additions and 211 deletions.
37 changes: 20 additions & 17 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
FROM gehlenborglab/higlass
FROM gehlenborglab/higlass:v0.2.61

COPY on_startup.py /home/higlass/projects/higlass-server
COPY input.json $HIGLASS_SERVER_BASE_DIR
# Swap the "app" html with the main html to always provide the /app/ view
RUN cp /home/higlass/projects/higlass-website/app/index.html /home/higlass/projects/higlass-website/index.html

# Display our waiting page until higlass ingests all tilesets
COPY index.html /home/higlass/projects/higlass-website/index.html

# Append to the supervisord.conf and set the priority of `on_startup.py` to
# be greater than the default of `999` so that it starts up last
RUN ( echo ""; \
echo "[program:on_startup]"; \
echo "command = python /home/higlass/projects/higlass-server/on_startup.py"; \
echo "priority = 1000"; ) \
>> supervisord.conf
# Don't start nginx automatically. We'll start it manually after tilesets have been ingested
RUN sed -i '/\[program\:nginx\]/a autostart \= false' supervisord.conf

# We want higlass launcher to access the default viewconf relative to our current location
RUN sed -i 's@"#higlass","\/api@"#higlass","\.\/api@g' \
Expand All @@ -26,12 +18,23 @@ RUN sed -i 's@"\/api\/v1",@"\.\/api\/v1"@g' \
RUN sed -i 's@"http://higlass.io/api/v1"@@g' \
/home/higlass/projects/higlass-server/default-viewconf-fixture.xml


# # Replace `../` with `./` for script/img/css fetching
# RUN sed -i 's@"\.\.\/@"\.\/@g' \
# /home/higlass/projects/higlass-website/index.html
# Replace `../` with `./` for script/img/css fetching
RUN sed -i 's@"\.\.\/@"\.\/@g' \
/home/higlass/projects/higlass-website/index.html

# Higlass currently has no favicon.png causing a 500 Error
RUN touch higlass-website/assets/images/favicon.png

ENV DJANGO_SETTINGS_MODULE="higlass_server.settings"

COPY on_startup.py /home/higlass/projects/higlass-server

# Append to the supervisord.conf and set the priority of `on_startup.py` to
# be greater than the default of `999` so that it starts up after uwsgi processes.
# Running the `ingest_tileset` Django management command requires things like db migrations to have been applied,
# which the uwsgi stuff handles.
RUN ( echo; \
echo "[program:on_startup]"; \
echo "command = python /home/higlass/projects/higlass-server/on_startup.py"; \
echo "priority = 1000"; ) \
>> supervisord.conf
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
# [refinery-higlass-docker](https://hub.docker.com/r/scottx611x/refinery-higlass-docker/) [![Build Status](https://travis-ci.org/scottx611x/refinery-higlass-docker.svg?branch=master)](https://travis-ci.org/scottx611x/refinery-higlass-docker)
# [refinery-higlass-docker](https://hub.docker.com/r/scottx611x/refinery-higlass-docker/) [![Build Status](https://travis-ci.org/refinery-platform/refinery-higlass-docker.svg?branch=master)](https://travis-ci.org/refinery-platform/refinery-higlass-docker)
"Refinery-ified" flavor of the higlass-docker (https://github.com/hms-dbmi/higlass-docker/) project

🐳
```docker pull scottx611x/refinery-higlass-docker```

### Pre-Reqs:
- docker
- git
- python

### Running the Container:
- `pip install -r requirements.txt`
- `./test_runner.sh`

If the tests pass you'll be provided with some info about the currently running container:
```
Ran 3 tests in 37.126s
OK
browse: http://localhost:32921/
shell: docker exec --interactive --tty container-2018-04-03_10-11-28-standalone bash
logs: docker exec container-2018-04-03_10-11-28-standalone ./logs.sh
PASS!
```


92 changes: 0 additions & 92 deletions index.html

This file was deleted.

3 changes: 0 additions & 3 deletions input.json

This file was deleted.

144 changes: 66 additions & 78 deletions on_startup.py
Original file line number Diff line number Diff line change
@@ -1,110 +1,98 @@
import glob
import json
import logging
import os
import requests
import subprocess
from requests.exceptions import RequestException

import django

from requests.exceptions import RequestException

from django.core.management import call_command

logger = logging.getLogger(__name__)
DATA_DIRECTORY = "/refinery-data/"
FILE_URL = "file_url"
FILE_NAME = "file_name"
FILE_PATH = "file_path"
NODE_INFO = "node_info"
NODE_SOLR_INFO = "node_solr_info"


def populate_higlass_data_directory(data_dir):
"""
Download remote files specified by urls in the input.json file
:param data_dir: <String> Path to directory to populate with data
"""
with open("/data/input.json") as f:
config_data = json.loads(f.read())
class Tileset(object):

def __init__(self, refinery_node):
self.file_url = refinery_node[FILE_URL]
self.file_name = refinery_node[FILE_URL].split("/")[-1]
self.file_path = '{}{}'.format(DATA_DIRECTORY, self.file_name)
self.file_type = "cooler"
self.data_type = "matrix"

for url in config_data["file_relationships"]:
def download(self):
"""
Download a tileset from a `file_url` to disk at a `file_path`
"""
try:
# Streaming GET for potentially large files
response = requests.get(url, stream=True)
response = requests.get(self.file_url, stream=True)
except RequestException as e:
raise RuntimeError(
"Something went wrong while fetching file from {} : {}".format(
url,
self.file_url,
e
)
)
else:
with open('{}{}'.format(data_dir, url.split("/")[-1]), 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
# filter out KEEP-ALIVE new chunks
if chunk:
f.write(chunk)
finally:
response.close()

with open(self.file_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
# filter out KEEP-ALIVE new chunks
if chunk:
f.write(chunk)

def ingest_tilesets(data_dir):
"""
Ingest previously downloaded files into higlass-server w/ django
management command
:param data_dir: <String> Path to directory populated with data to ingest
"""
files_to_ingest = glob.glob(
'{}*multires.*'.format(data_dir)
)

for filename in files_to_ingest:
response.close()

def ingest(self):
"""
Ingest previously downloaded files into higlass-server w/ django
management command
:param tileset_dict: dict containing information about a tileset
"""
call_command(
"ingest_tileset",
filename="{}".format(filename),
filetype=get_filetype(filename),
datatype=get_datatype(filename)
filename=self.file_path,
filetype=self.file_type,
datatype=self.data_type
)


def get_datatype(filename):
datatype_mapping = {
"cool": "matrix",
"hitile": "vector"
}
try:
datatype = datatype_mapping[filename.split(".")[-1]]
except KeyError as e:
raise RuntimeError(
"Could not determine datatype from filename: {}".format(
filename
)
)
else:
return datatype


def get_filetype(filename):
filetype_mapping = {
"cool": "cooler",
"hitile": "hitile"
}
try:
filetype = filetype_mapping[filename.split(".")[-1]]
except KeyError:
raise RuntimeError(
"Could not determine filetype from filename: {}".format(
filename
)
)
else:
return filetype
def get_refinery_input():
""" Make a GET request to acquire the input data for the container"""
return requests.get(os.environ["INPUT_JSON_URL"]).json()


def swap_waiting_page():
subprocess.call(["mv", "/home/higlass/projects/higlass-website/app/index.html",
"/home/higlass/projects/higlass-website/index.html"])
def main():
"""
Download remote files specified by urls in the data provided by a GET to
the provided INPUT_JSON_URL then ingest the downloaded files into Higlass
Tileset objects
"""
config_data = get_refinery_input()

if __name__ == '__main__':
data_dir = "/refinery-data/"
for refinery_node_uuid in config_data[NODE_INFO]:
refinery_node = config_data[NODE_INFO][refinery_node_uuid]
tileset = Tileset(refinery_node)
tileset.download()
tileset.ingest()


if __name__ == '__main__':
# Allows for django commands to run in a standalone script
django.setup()

populate_higlass_data_directory(data_dir)
ingest_tilesets(data_dir)
main()

# Don't switch page until data ingested
swap_waiting_page()
# Start Nginx only after all tilesets have been ingested.
# NOTE: The parent process will hang around, but it doesn't hurt anything
# at this point, and it's probably more hassle than its worth to run
# NGINX from this script and kill `on_startup.py` without then killing the
# NGINX process we just started.
# Its also pretty clear that our intent here is to just `run()`
# NGINX where any more could be confusing.
subprocess.run(["/usr/sbin/nginx"])
5 changes: 5 additions & 0 deletions test-data/higlass-sample-metadata.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Sample Name,Data File,Organism
dixon2012-h1hesc-hindiii-allreps-filtered.1000kb.multires.cool,https://s3.amazonaws.com/pkerp/public/dixon2012-h1hesc-hindiii-allreps-filtered.1000kb.multires.cool,Human
G15509.K-562.2_sampleDown.multires.cool,https://s3.amazonaws.com/pkerp/public/G15509.K-562.2_sampleDown.multires.cool,Human
Dixon2012-J1-NcoI-R1-filtered.100kb.multires.cool,https://s3.amazonaws.com/pkerp/public/Dixon2012-J1-NcoI-R1-filtered.100kb.multires.cool,Human
hic-resolutions.cool,https://s3.amazonaws.com/pkerp/public/hic-resolutions.cool,Human
Loading

0 comments on commit 8979a69

Please sign in to comment.