From 105dd8cbcf688345c173b6c258c542bb2f35b055 Mon Sep 17 00:00:00 2001 From: Sean Kelly Date: Tue, 6 Dec 2022 10:37:46 -0600 Subject: [PATCH] Plone upgrade, conversion from alpine, and resolve #18 --- Dockerfile | 9 +- README.md | 4 +- etc/versions/dists.cfg | 2 +- etc/versions/known-good-versions.cfg | 189 ++++++++++++++++++ etc/zope.cfg | 1 - src/edrn.rdf/edrn/rdf/README.rst | 12 +- .../edrn/rdf/labcascollectionrdfgenerator.py | 6 +- src/edrn.rdf/edrn/rdf/rdfupdater.py | 10 +- src/edrn.rdf/setup.py | 2 +- .../biomarkersummarizergenerator.py | 8 +- .../collaborationsummarizergenerator.py | 10 +- .../summarizer/datasetsummarizergenerator.py | 6 +- .../extresourcesummarizergenerator.py | 6 +- .../edrn/summarizer/nulljsongenerator.py | 4 +- .../publicationsummarizergenerator.py | 4 +- .../summarizer/specimensummarizergenerator.py | 4 +- .../edrn/summarizer/summarizerupdater.py | 8 +- src/edrn.summarizer/setup.py | 3 +- 18 files changed, 237 insertions(+), 51 deletions(-) diff --git a/Dockerfile b/Dockerfile index 16a3931..d104e37 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,21 +3,16 @@ # # Defines the image for the CancerDataExpo -FROM plone:5.2.2-alpine +FROM plone:5.2.9 -ENV numpy=1.19.3 +ENV numpy=1.23.4 COPY site.cfg /plone/instance/ COPY src/ /plone/instance/src/ COPY etc/ /plone/instance/etc/ RUN : &&\ - build_deps="gcc musl-dev" &&\ - apk update --quiet &&\ - apk add --quiet --virtual /build $build_deps &&\ pip --quiet install numpy==${numpy} &&\ buildout -c site.cfg &&\ find /plone -not -user plone -exec chown plone:plone {} \+ &&\ - apk del --quiet /build &&\ - rm -rf /var/cache/apk/* &&\ : diff --git a/README.md b/README.md index 5968a13..2a9e07a 100644 --- a/README.md +++ b/README.md @@ -54,12 +54,12 @@ The answer will then be in the result element of the response element of the env ## 🔧 Developing -**👉 Note:** Plone 5.2.2 (used here) is not compatible with any Python newer than 3.8. Stick with 3.8. +**👉 Note:** Plone 5.2.9 (used here) is not compatible with any Python newer than 3.8. Stick with 3.8. Do the following: ```console python3.8 -m venv venv -venv/bin/pip install --upgrade pip build wheel zc.buildout setuptools==42.0.2 numpy==1.19.3 +venv/bin/pip install --upgrade pip build wheel zc.buildout setuptools==42.0.2 numpy==1.23.4 venv/bin/buildout -c dev.cfg ``` diff --git a/etc/versions/dists.cfg b/etc/versions/dists.cfg index ae3cae7..1fcd4f6 100644 --- a/etc/versions/dists.cfg +++ b/etc/versions/dists.cfg @@ -14,7 +14,7 @@ # # That's it (for now). [buildout] -extends = https://dist.plone.org/release/5.2.2/versions.cfg +extends = https://dist.plone.org/release/5.2.9/versions.cfg # Software Distributions to Compile/Make/Install diff --git a/etc/versions/known-good-versions.cfg b/etc/versions/known-good-versions.cfg index ef5377f..9cc602e 100644 --- a/etc/versions/known-good-versions.cfg +++ b/etc/versions/known-good-versions.cfg @@ -223,3 +223,192 @@ suds2 = 0.7.1 # Required by: # unittest2==1.1.0 traceback2 = 1.4.0 + +# Added by buildout at 2022-12-05 16:58:47.176461 +biothings-client = 0.2.6 +collective.recipe.backup = 4.2.0 +isort = 5.10.1 +modernize = 0.8.0 +mr.scripty = 1.0 + +# Required by: +# edrn.summarizer==0.0.8 +biopython = 1.80 + +# Required by: +# edrn.rdf==1.3.8 +# edrn.summarizer==0.0.8 +collective.autopermission = 1.0b2 + +# Required by: +# modernize==0.8.0 +fissix = 21.11.13 + +# Required by: +# rdflib==4.2.2 +isodate = 0.6.1 + +# Required by: +# edrn.summarizer==0.0.8 +mygene = 3.2.2 + +# Required by: +# edrn.rdf==1.3.8 +pysolr = 3.9.0 + +# Required by: +# edrn.rdf==1.3.8 +# edrn.summarizer==0.0.8 +suds2 = 0.7.1 + +# Required by: +# python-dotenv==0.15.0 +typing = 3.10.0.0 + +# Added by buildout at 2022-12-05 17:12:03.918426 +biothings-client = 0.2.6 +collective.recipe.backup = 4.2.0 +isort = 5.10.1 +modernize = 0.8.0 +mr.scripty = 1.0 + +# Required by: +# edrn.summarizer==0.0.8 +biopython = 1.80 + +# Required by: +# edrn.rdf==1.3.8 +# edrn.summarizer==0.0.8 +collective.autopermission = 1.0b2 + +# Required by: +# modernize==0.8.0 +fissix = 21.11.13 + +# Required by: +# rdflib==4.2.2 +isodate = 0.6.1 + +# Required by: +# edrn.summarizer==0.0.8 +mygene = 3.2.2 + +# Required by: +# edrn.rdf==1.3.8 +pysolr = 3.9.0 + +# Required by: +# edrn.rdf==1.3.8 +# edrn.summarizer==0.0.8 +suds2 = 0.7.1 + +# Added by buildout at 2022-12-06 09:35:44.943510 +biothings-client = 0.2.6 +collective.recipe.backup = 4.2.0 +isort = 5.10.1 +modernize = 0.8.0 +mr.scripty = 1.0 + +# Required by: +# edrn.summarizer==0.0.8 +biopython = 1.80 + +# Required by: +# edrn.rdf==1.3.8 +# edrn.summarizer==0.0.8 +collective.autopermission = 1.0b2 + +# Required by: +# modernize==0.8.0 +fissix = 21.11.13 + +# Required by: +# rdflib==6.2.0 +isodate = 0.6.1 + +# Required by: +# edrn.summarizer==0.0.8 +mygene = 3.2.2 + +# Required by: +# edrn.rdf==1.3.8 +pysolr = 3.9.0 + +# Required by: +# edrn.rdf==1.3.8 +# edrn.summarizer==0.0.8 +suds2 = 0.7.1 + +# Added by buildout at 2022-12-06 09:44:08.666120 +biothings-client = 0.2.6 +collective.recipe.backup = 4.2.0 +isort = 5.10.1 +modernize = 0.8.0 +mr.scripty = 1.0 + +# Required by: +# edrn.summarizer==0.0.8 +biopython = 1.80 + +# Required by: +# edrn.rdf==1.3.8 +# edrn.summarizer==0.0.8 +collective.autopermission = 1.0b2 + +# Required by: +# modernize==0.8.0 +fissix = 21.11.13 + +# Required by: +# rdflib==5.0.0 +isodate = 0.6.1 + +# Required by: +# edrn.summarizer==0.0.8 +mygene = 3.2.2 + +# Required by: +# edrn.rdf==1.3.8 +pysolr = 3.9.0 + +# Required by: +# edrn.rdf==1.3.8 +# edrn.summarizer==0.0.8 +suds2 = 0.7.1 + +# Added by buildout at 2022-12-06 10:22:10.706859 +biothings-client = 0.2.6 +collective.recipe.backup = 4.2.0 +isort = 5.10.1 +modernize = 0.8.0 +mr.scripty = 1.0 + +# Required by: +# edrn.summarizer==0.0.8 +biopython = 1.80 + +# Required by: +# edrn.rdf==1.3.8 +# edrn.summarizer==0.0.8 +collective.autopermission = 1.0b2 + +# Required by: +# modernize==0.8.0 +fissix = 21.11.13 + +# Required by: +# rdflib==6.2.0 +isodate = 0.6.1 + +# Required by: +# edrn.summarizer==0.0.8 +mygene = 3.2.2 + +# Required by: +# edrn.rdf==1.3.8 +pysolr = 3.9.0 + +# Required by: +# edrn.rdf==1.3.8 +# edrn.summarizer==0.0.8 +suds2 = 0.7.1 diff --git a/etc/zope.cfg b/etc/zope.cfg index e7d44e8..82bcc2d 100644 --- a/etc/zope.cfg +++ b/etc/zope.cfg @@ -93,7 +93,6 @@ eggs = plone.testing plone.app.testing plone.app.robotframework - unittest2 plone.app.upgrade defaults = ['--auto-color', '--auto-progress', '--verbose'] diff --git a/src/edrn.rdf/edrn/rdf/README.rst b/src/edrn.rdf/edrn/rdf/README.rst index 990e7d8..b6967d4 100644 --- a/src/edrn.rdf/edrn/rdf/README.rst +++ b/src/edrn.rdf/edrn/rdf/README.rst @@ -311,7 +311,7 @@ And now:: 'application/rdf+xml' >>> import rdflib >>> graph = rdflib.Graph() - >>> graph.parse(data=browser.contents) + >>> graph.parse(data=browser.contents, format='xml') )> >>> len(graph) 68 @@ -431,7 +431,7 @@ And now:: >>> browser.open(portalURL + '/a-simple-source/@@rdf') >>> graph = rdflib.Graph() - >>> graph.parse(data=browser.contents) + >>> graph.parse(data=browser.contents, format='xml') )> >>> len(graph) 124 @@ -541,7 +541,7 @@ And now for the RDF:: >>> browser.open(portalURL + '/a-simple-source/@@rdf') >>> graph = rdflib.Graph() - >>> graph.parse(data=browser.contents) + >>> graph.parse(data=browser.contents, format='xml') )> >>> len(graph) 31809 @@ -672,7 +672,7 @@ And now for the RDF:: >>> browser.open(portalURL + '/a-committee-source/@@rdf') >>> graph = rdflib.Graph() - >>> graph.parse(data=browser.contents) + >>> graph.parse(data=browser.contents, format='xml') )> >>> len(graph) 298 @@ -804,7 +804,7 @@ And now for the RDF:: >>> browser.open(portalURL + '/a-protocol-source/@@rdf') >>> graph = rdflib.Graph() - >>> graph.parse(data=browser.contents) + >>> graph.parse(data=browser.contents, format='xml') )> >>> len(graph) > 9422 True @@ -969,7 +969,7 @@ And now for the RDF:: >>> browser.open(portalURL + '/a-biomuta-source/@@rdf') >>> graph = rdflib.Graph() - >>> graph.parse(data=browser.contents) + >>> graph.parse(data=browser.contents, format='xml') )> >>> len(graph) 131187 diff --git a/src/edrn.rdf/edrn/rdf/labcascollectionrdfgenerator.py b/src/edrn.rdf/edrn/rdf/labcascollectionrdfgenerator.py index 1061b3f..81d9cb9 100644 --- a/src/edrn.rdf/edrn/rdf/labcascollectionrdfgenerator.py +++ b/src/edrn.rdf/edrn/rdf/labcascollectionrdfgenerator.py @@ -86,11 +86,11 @@ def generateGraph(self): context = aq_inner(self.context) graph = rdflib.Graph() solr = Solr(context.labcasSolrURL + '/datasets', auth=(context.username, context.password)) - numDatasets = solr.search(q='*:*', rows=0).hits + numDatasets = solr.search(q='Consortium:EDRN', rows=0).hits solr = Solr(context.labcasSolrURL + '/files', auth=(context.username, context.password)) - numFiles = solr.search(q='*:*', rows=0).hits + numFiles = solr.search(q='Consortium:EDRN', rows=0).hits solr = Solr(context.labcasSolrURL + '/collections', auth=(context.username, context.password)) - results = solr.search(q='*:*', rows=999999) # 😮 TODO This'll fail once we get to a million collections + results = solr.search(q='Consortium:EDRN', rows=999999) # 😮 TODO This'll fail once we get to a million collections numCollections = results.hits for i in results: collectionID, name, consortia = i.get('id'), i.get('CollectionName', '«unknown»'), i.get('Consortium', []) diff --git a/src/edrn.rdf/edrn/rdf/rdfupdater.py b/src/edrn.rdf/edrn/rdf/rdfupdater.py index da96cce..7622017 100644 --- a/src/edrn.rdf/edrn/rdf/rdfupdater.py +++ b/src/edrn.rdf/edrn/rdf/rdfupdater.py @@ -41,9 +41,13 @@ def updateRDF(self): # Is there an active file? if context.approvedFile: # Is it identical to what we just generated? - current = Graph().parse(data=context.approvedFile.to_object.file.data) - if isomorphic(graph, current): - raise NoUpdateRequired(context) + try: + current = Graph().parse(data=context.approvedFile.to_object.file.data, format='xml') + if isomorphic(graph, current): + raise NoUpdateRequired(context) + except AttributeError: + # File not found + pass # Create a new file and set it active # TODO: Add validation steps here diff --git a/src/edrn.rdf/setup.py b/src/edrn.rdf/setup.py index 583503f..0e726a6 100644 --- a/src/edrn.rdf/setup.py +++ b/src/edrn.rdf/setup.py @@ -31,7 +31,7 @@ 'plone.app.relationfield', 'plone.behavior', 'Products.CMFPlone', - 'rdflib==4.2.2', + 'rdflib==6.2.0', 'setuptools', 'z3c.relationfield', 'suds2', diff --git a/src/edrn.summarizer/edrn/summarizer/biomarkersummarizergenerator.py b/src/edrn.summarizer/edrn/summarizer/biomarkersummarizergenerator.py index 5839ef5..e1a553e 100644 --- a/src/edrn.summarizer/edrn/summarizer/biomarkersummarizergenerator.py +++ b/src/edrn.summarizer/edrn/summarizer/biomarkersummarizergenerator.py @@ -16,7 +16,7 @@ from zope import schema from zope.component import queryUtility from plone.i18n.normalizer.interfaces import IIDNormalizer -import jsonlib +import json _typeURI = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') _bmOrganDataTypeURI = URIRef('http://edrn.nci.nih.gov/rdf/rdfs/bmdb-1.0.0#BiomarkerOrganData') @@ -108,7 +108,7 @@ def generateJson(self): raise RDFIngestException(_('This generator folder lacks one or both of its RDF source URLs.')) normalizerFunction = queryUtility(IIDNormalizer).normalize graph = ConjunctiveGraph() - graph.parse(URLInputSource(rdfDataSource)) + graph.parse(URLInputSource(rdfDataSource), format='xml') statements = self._parseRDF(graph) biomarkerTypeFreq = {} @@ -141,11 +141,11 @@ def generateJson(self): # Add organ-specific information graph = ConjunctiveGraph() - graph.parse(URLInputSource(bmoDataSource)) + graph.parse(URLInputSource(bmoDataSource), format='xml') organStatements = self._parseRDF(graph) self.addOrganSpecificInformation(allBiomarkers, organStatements, biomarkerOrganFreq) jsondata = self.generateOrganTypeStats(biomarkerTypeFreq,biomarkerOrganFreq) # C'est tout. - return jsonlib.write(jsondata) + return json.dumps(jsondata) diff --git a/src/edrn.summarizer/edrn/summarizer/collaborationsummarizergenerator.py b/src/edrn.summarizer/edrn/summarizer/collaborationsummarizergenerator.py index 340060f..7a76773 100644 --- a/src/edrn.summarizer/edrn/summarizer/collaborationsummarizergenerator.py +++ b/src/edrn.summarizer/edrn/summarizer/collaborationsummarizergenerator.py @@ -17,7 +17,7 @@ from zope.component import queryUtility from plone.i18n.normalizer.interfaces import IIDNormalizer from .exceptions import MissingParameterError -import jsonlib +import json COLLABORATIVE_GROUP_BMDB_IDS_TO_NAMES = { Literal('Breast and Gynecologic'): Literal('Breast and Gynecologic Cancers Research Group'), @@ -148,7 +148,7 @@ def generateJson(self): raise MissingParameterError(_('This generator folder lacks one or both of its RDF source URLs.')) normalizerFunction = queryUtility(IIDNormalizer).normalize graph = ConjunctiveGraph() - graph.parse(URLInputSource(bmDataSource)) + graph.parse(URLInputSource(bmDataSource), format='xml') statements = self._parseRDF(graph) allBiomarkers = {} @@ -187,14 +187,14 @@ def generateJson(self): collabDataFreq = self.updateCollaborativeGroup(predicates[_datasetIdURI], predicates[_collaborativeGroupDataURI], allDatasets, collabDataFreq) graph = ConjunctiveGraph() - graph.parse(URLInputSource(protocolDataSource)) + graph.parse(URLInputSource(protocolDataSource), format='xml') statements = self._parseRDF(graph) for uri, predicates in statements.items(): if _collaborativeGroupProURI in predicates: collabProtoFreq = self.updateCollaborativeGroup(predicates[_protocolNameURI], predicates[_collaborativeGroupProURI], allProtocols, collabProtoFreq) graph = ConjunctiveGraph() - graph.parse(URLInputSource(memberDataSource)) + graph.parse(URLInputSource(memberDataSource), format='xml') statements = self._parseRDF(graph) for uri, predicates in statements.items(): if _memberPredicateURI in predicates: @@ -205,4 +205,4 @@ def generateJson(self): jsondata = {"biomarker" : collabBmFreq, "panel" : collabPnFreq, "data" : collabDataFreq, "protocol" : collabProtoFreq, "member" : collabMemFreq} # C'est tout. - return jsonlib.write(jsondata) + return json.dumps(jsondata) diff --git a/src/edrn.summarizer/edrn/summarizer/datasetsummarizergenerator.py b/src/edrn.summarizer/edrn/summarizer/datasetsummarizergenerator.py index e6bc5e2..99707bc 100644 --- a/src/edrn.summarizer/edrn/summarizer/datasetsummarizergenerator.py +++ b/src/edrn.summarizer/edrn/summarizer/datasetsummarizergenerator.py @@ -14,7 +14,7 @@ from rdflib import ConjunctiveGraph from .utils import validateAccessibleURL from zope import schema -import jsonlib +import json _organPredicateURI = URIRef('http://edrn.nci.nih.gov/rdf/schema.rdf#organ') @@ -50,7 +50,7 @@ def getRDFStatements(self): context = aq_inner(self.context) statements = {} graph = ConjunctiveGraph() - graph.parse(URLInputSource(context.rdfDataSource)) + graph.parse(URLInputSource(context.rdfDataSource), format='xml') self.addGraphToStatements(graph, statements) return statements @@ -67,4 +67,4 @@ def generateJson(self): organDatasetCount[organ] = 1 # C'est tout. - return jsonlib.write(organDatasetCount) + return json.dumps(organDatasetCount) diff --git a/src/edrn.summarizer/edrn/summarizer/extresourcesummarizergenerator.py b/src/edrn.summarizer/edrn/summarizer/extresourcesummarizergenerator.py index 02a8826..bc07e41 100644 --- a/src/edrn.summarizer/edrn/summarizer/extresourcesummarizergenerator.py +++ b/src/edrn.summarizer/edrn/summarizer/extresourcesummarizergenerator.py @@ -16,7 +16,7 @@ from zope import schema from zope.component import queryUtility from plone.i18n.normalizer.interfaces import IIDNormalizer -import jsonlib, re +import json, re _typeURI = URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') _bmRefResourceURI = URIRef('http://edrn.nci.nih.gov/rdf/rdfs/bmdb-1.0.0#referencesResource') @@ -112,7 +112,7 @@ def generateJson(self): raise RDFIngestException(_('This generator folder lacks one or both of its RDF source URLs.')) normalizerFunction = queryUtility(IIDNormalizer).normalize graph = ConjunctiveGraph() - graph.parse(URLInputSource(rdfDataSource)) + graph.parse(URLInputSource(rdfDataSource), format='xml') statements = self._parseRDF(graph) OtherRef = {} @@ -145,4 +145,4 @@ def generateJson(self): jsondata = {"prot":ProtRef, "gene": GeneRef, "other":OtherRef} # C'est tout. - return jsonlib.write(jsondata) + return json.dumps(jsondata) diff --git a/src/edrn.summarizer/edrn/summarizer/nulljsongenerator.py b/src/edrn.summarizer/edrn/summarizer/nulljsongenerator.py index e505e8d..16e19eb 100644 --- a/src/edrn.summarizer/edrn/summarizer/nulljsongenerator.py +++ b/src/edrn.summarizer/edrn/summarizer/nulljsongenerator.py @@ -10,7 +10,7 @@ from zope import schema from edrn.summarizer import _ from .summarizergenerator import ISummarizerGenerator -import jsonlib +import json class INullJsonGenerator(ISummarizerGenerator): @@ -30,4 +30,4 @@ def __init__(self, context): def generateJson(self): '''Generate an empty graph.''' - return jsonlib.write({}) + return json.dumps({}) diff --git a/src/edrn.summarizer/edrn/summarizer/publicationsummarizergenerator.py b/src/edrn.summarizer/edrn/summarizer/publicationsummarizergenerator.py index f0cbde0..246dbc2 100644 --- a/src/edrn.summarizer/edrn/summarizer/publicationsummarizergenerator.py +++ b/src/edrn.summarizer/edrn/summarizer/publicationsummarizergenerator.py @@ -17,7 +17,7 @@ from zope.component import getUtility from plone.i18n.normalizer.interfaces import IIDNormalizer import contextlib -import jsonlib +import json from Bio import Entrez # Constants @@ -165,4 +165,4 @@ def generateJson(self): pubMedYears = self.queryPubmedYear(allPubmedIds, allPublications, pubMedYears) # C'est tout. - return jsonlib.write(pubMedYears) + return json.dumps(pubMedYears) diff --git a/src/edrn.summarizer/edrn/summarizer/specimensummarizergenerator.py b/src/edrn.summarizer/edrn/summarizer/specimensummarizergenerator.py index 5c7c94f..2aa2dc7 100644 --- a/src/edrn.summarizer/edrn/summarizer/specimensummarizergenerator.py +++ b/src/edrn.summarizer/edrn/summarizer/specimensummarizergenerator.py @@ -15,7 +15,7 @@ from .utils import validateAccessibleURL from zope import schema import urllib -import jsonlib +import json # Site identifier to ERNE identifier SITES = { @@ -118,4 +118,4 @@ def generateJson(self): specimenCount = self.getSpecimens(erneID, erneWS) # C'est tout. - return jsonlib.write(specimenCount) + return json.dumps(specimenCount) diff --git a/src/edrn.summarizer/edrn/summarizer/summarizerupdater.py b/src/edrn.summarizer/edrn/summarizer/summarizerupdater.py index a94ebfd..1f8ff2e 100644 --- a/src/edrn.summarizer/edrn/summarizer/summarizerupdater.py +++ b/src/edrn.summarizer/edrn/summarizer/summarizerupdater.py @@ -14,7 +14,7 @@ from zope.component import getUtility from zope.event import notify from zope.lifecycleevent import ObjectModifiedEvent -import datetime, jsonlib, uuid +import datetime, json, uuid SUMMARIZER_XML_MIMETYPE = 'application/rdf+xml' SUMMARIZER_JSON_MIMETYPE = 'application/json' @@ -39,14 +39,14 @@ def updateSummary(self): if generator.datatype == 'json': adapter = IJsonGenerator(generator) serialized = adapter.generateJson() - json = jsonlib.read(serialized) + json_result = json.loads(serialized) mimetype = SUMMARIZER_JSON_MIMETYPE # Is there an active file? if context.approvedFile: # Is it identical to what we just generated? - current = jsonlib.read(context.approvedFile.to_object.file.data) - if sorted(json.items()) == sorted(current.items()): + current = json.loads(context.approvedFile.to_object.file.data) + if sorted(json_result.items()) == sorted(current.items()): raise NoUpdateRequired(context) elif generator.datatype == 'rdf': diff --git a/src/edrn.summarizer/setup.py b/src/edrn.summarizer/setup.py index dc7bf64..9b1125c 100644 --- a/src/edrn.summarizer/setup.py +++ b/src/edrn.summarizer/setup.py @@ -32,8 +32,7 @@ 'plone.rest', 'plone.behavior', 'Products.CMFPlone', - 'rdflib==4.2.2', - 'jsonlib-python3', + 'rdflib==6.2.0', 'mygene', 'requests', 'biopython',