Skip to content

Commit

Permalink
C4-24 Give ff_utils a lucene API (#58)
Browse files Browse the repository at this point in the history
* C4-24 first pass at API

* C4-24 second pass at API

* C4-24 fix flake

* C4-24 add test

* C4-24 address Kent's points

* flake

* C4-24 add ability to use generator

* QOL change

* up test_get_metadata flakiness to 3
  • Loading branch information
willronchetti authored Feb 27, 2020
1 parent 0f16b6e commit 4781ad5
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 6 deletions.
38 changes: 34 additions & 4 deletions dcicutils/es_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import logging
from elasticsearch import Elasticsearch, RequestsHttpConnection
from aws_requests_auth.boto_utils import BotoAWSRequestsAuth

'''
info about snapshots on AWS
https://docs.aws.amazon.com/elasticsearch-service/latest/developerguide/es-managedomains-snapshots.html
'''

logging.basicConfig()
logger = logging.getLogger('logger')
logger.setLevel(logging.INFO)


def create_es_client(es_url, use_aws_auth=True, **options):
Expand Down Expand Up @@ -48,6 +49,12 @@ def get_index_list(client, name, days_old=0, timestring='%Y.%m.%d', ilo=None):


def create_snapshot_repo(client, repo_name, s3_bucket):
"""
Creates a repo to store ES snapshots on
info about snapshots on AWS
https://docs.aws.amazon.com/elasticsearch-service/latest/developerguide/es-managedomains-snapshots.html
"""
snapshot_body = {'type': 's3',
'settings': {
'bucket': s3_bucket,
Expand All @@ -56,3 +63,26 @@ def create_snapshot_repo(client, repo_name, s3_bucket):
}
}
return client.snapshot.create_repository(repository=repo_name, body=snapshot_body)


def execute_lucene_query_on_es(client, index, query):
"""
Executes the given lucene query (in dictionary form)
:arg client: elasticsearch client
:arg index: index to search under
:arg query: dictionary of query
:returns: result of query or None
"""
try:
raw_result = client.search(body=query, index=index)
except Exception as e:
logger.error('Failed to execute search on index %s with query %s.\n Exception: %s' % (index, query, str(e)))
return None
try:
result = raw_result['hits']['hits']
return result
except KeyError:
logger.error('Searching index %s with query %s gave no results' % (index, query))
return None
52 changes: 52 additions & 0 deletions dcicutils/ff_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,58 @@ def get_health_page(key=None, ff_env=None):
return ret


class SearchESMetadataHandler(object):
"""
Wrapper class for executing lucene queries directly on ES.
Resolves ES instance location via health page of the given
environment. Requires AWS permissions to use.
Can be used directly but is used through search_es_metadata.
NOTE: use this class directly if you plan on making multiple subsequent requests
to the same environment.
"""

def __init__(self, key=None, ff_env=None):
self.health = get_health_page(key, ff_env) # expensive - do not call repeatedly!
self.es_url = self.health['elasticsearch']
self.client = es_utils.create_es_client(self.es_url)

def execute_search(self, index, query, is_generator=False, page_size=200):
"""
Executes lucene query on this client's index.
:arg index: index to search under
:arg query: query to run
:arg is_generator: boolean on whether or not to use a generator
:arg page_size: if using a generator, how many results to give per request
:returns: list of results of query or None
"""
if not is_generator:
return es_utils.execute_lucene_query_on_es(self.client, index=index, query=query)
return search_result_generator(get_es_search_generator(self.client, index, query, page_size=page_size))


def search_es_metadata(index, query, key=None, ff_env=None, is_generator=False):
"""
Executes a lucene search query on on the ES Instance for this
environment.
NOTE: It is okay to use this function directly but for repeat usage please use
SearchESMetadataHandler as it caches an expensive API request to AWS
:arg index: index to search under
:arg query: dictionary of query
:arg key: optional, 2-tuple authentication key (access_key_id, secret)
:arg ff_env: ff_env to use
:arg is_generator: boolean on whether or not to use a generator
:returns: list of results of query or None
"""
search_handler = SearchESMetadataHandler(key, ff_env)
return search_handler.execute_search(index, query, is_generator)


#####################
# Utility functions #
#####################
Expand Down
4 changes: 3 additions & 1 deletion test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
from dcicutils.s3_utils import s3Utils
from dcicutils.ff_utils import authorized_request

# this is the ff_env we use for integrated tests
# XXX: Refactor to config
INTEGRATED_ENV = 'fourfront-mastertest'
INTEGRATED_ES = 'https://search-fourfront-mastertest-wusehbixktyxtbagz5wzefffp4.us-east-1.es.amazonaws.com'


@pytest.fixture(scope='session')
Expand All @@ -27,6 +28,7 @@ def integrated_ff():
integrated['ff_key'] = s3.get_access_keys()
integrated['higlass_key'] = s3.get_higlass_key()
integrated['ff_env'] = INTEGRATED_ENV
integrated['es_url'] = INTEGRATED_ES
# do this to make sure env is up (will error if not)
res = authorized_request(integrated['ff_key']['server'], auth=integrated['ff_key'])
if res.status_code != 200:
Expand Down
29 changes: 29 additions & 0 deletions test/test_es_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import pytest

from dcicutils.es_utils import create_es_client, execute_lucene_query_on_es


@pytest.fixture
def es_client_fixture(integrated_ff):
""" Fixture that creates an es client to mastertest """
return create_es_client(integrated_ff['es_url'])


@pytest.mark.integrated
def test_lucene_query_basic(es_client_fixture):
""" Tests basic lucene queries via the underlying endpoint on mastertest """
results = execute_lucene_query_on_es(client=es_client_fixture, index='fourfront-mastertestuser', query={})
assert len(results) == 10
test_query = {
'query': {
'bool': {
'must': [ # search for will's user insert
{'terms': {'_id': ['1a12362f-4eb6-4a9c-8173-776667226988']}}
],
'must_not': []
}
},
'sort': [{'_uid': {'order': 'desc'}}]
}
results = execute_lucene_query_on_es(client=es_client_fixture, index='fourfront-mastertestuser', query=test_query)
assert len(results) == 1
37 changes: 36 additions & 1 deletion test/test_ff_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def test_authorized_request_integrated(integrated_ff):


@pytest.mark.integrated
@pytest.mark.flaky
@pytest.mark.flaky(max_runs=3) # very flaky for some reason
def test_get_metadata(integrated_ff, basestring):
# use this test biosource
test_item = '331111bc-8535-4448-903e-854af460b254'
Expand Down Expand Up @@ -931,6 +931,41 @@ def clear_folder(folder):
clear_folder(test_folder)


@pytest.mark.integrated
def test_search_es_metadata(integrated_ff):
""" Tests search_es_metadata on mastertest """
res = ff_utils.search_es_metadata('fourfront-mastertestuser', {'size': '1000'},
key=integrated_ff['ff_key'], ff_env=integrated_ff['ff_env'])
assert len(res) == 27
test_query = {
'query': {
'bool': {
'must': [ # search for will's user insert
{'terms': {'_id': ['1a12362f-4eb6-4a9c-8173-776667226988']}}
],
'must_not': []
}
},
'sort': [{'_uid': {'order': 'desc'}}]
}
res = ff_utils.search_es_metadata('fourfront-mastertestuser', test_query,
key=integrated_ff['ff_key'], ff_env=integrated_ff['ff_env'])
assert len(res) == 1


@pytest.mark.integrated
def test_search_es_metadata_generator(integrated_ff):
""" Tests SearchESMetadataHandler both normally and with a generator, verifies consistent results """
handler = ff_utils.SearchESMetadataHandler(key=integrated_ff['ff_key'], ff_env=integrated_ff['ff_env'])
no_gen_res = ff_utils.search_es_metadata('fourfront-mastertestuser', {'size': '1000'},
key=integrated_ff['ff_key'], ff_env=integrated_ff['ff_env'])
res = handler.execute_search('fourfront-mastertestuser', {'size': '1000'}, is_generator=True, page_size=5)
count = 0
for _ in res:
count += 1
assert count == len(no_gen_res)


def test_convert_param():
""" Very basic test that illustrates what convert_param should do """
params = {'param1': 5}
Expand Down

0 comments on commit 4781ad5

Please sign in to comment.