diff --git a/app/controllers/api/v1/bulk_download_controller.rb b/app/controllers/api/v1/bulk_download_controller.rb index cbbcc2ef1..80da4ee9a 100644 --- a/app/controllers/api/v1/bulk_download_controller.rb +++ b/app/controllers/api/v1/bulk_download_controller.rb @@ -461,7 +461,7 @@ def self.find_matching_directories(directory_name, file_type, accession) when 'nodirs' [] when 'all' - study.directory_listings.all + study&.directory_listings&.are_synced || [] else DirectoryListing.where(name: sanitized_dirname, study_id: study.id, sync_status: true, file_type: file_type) end diff --git a/app/javascript/components/explore/ExploreDisplayPanelManager.jsx b/app/javascript/components/explore/ExploreDisplayPanelManager.jsx index 9f5b8a0a6..d4d80aed6 100644 --- a/app/javascript/components/explore/ExploreDisplayPanelManager.jsx +++ b/app/javascript/components/explore/ExploreDisplayPanelManager.jsx @@ -200,7 +200,7 @@ export default function ExploreDisplayPanelManager({ showDifferentialExpressionPanel, setIsCellSelecting, currentPointsSelected, isCellSelecting, deGenes, setDeGenes, setShowDeGroupPicker, cellFaceting, cellFilteringSelection, cellFilterCounts, clusterCanFilter, filterErrorText, - updateFilteredCells, panelToShow, toggleViewOptions + updateFilteredCells, panelToShow, toggleViewOptions, allowCellFiltering }) { const [, setRenderForcer] = useState({}) const [dataCache] = useState(createCache()) @@ -244,14 +244,7 @@ export default function ExploreDisplayPanelManager({ const shownAnnotation = getShownAnnotation(exploreParamsWithDefaults.annotation, annotationList) - const isSubsampled = exploreParamsWithDefaults.subsample !== 'all' let cellFilteringTooltipAttrs = {} - if (isSubsampled) { - cellFilteringTooltipAttrs = { - 'data-toggle': 'tooltip', - 'data-original-title': 'Clicking will remove subsampling; plots might be noticeably slower.' - } - } /** Toggle cell filtering panel */ function toggleCellFilterPanel() { @@ -359,7 +352,7 @@ export default function ExploreDisplayPanelManager({ } - {showCellFiltering && panelToShow === 'cell-filtering' && + {showCellFiltering && panelToShow === 'cell-filtering' && allowCellFiltering && + deGenes={deGenes}/> } {panelToShow === 'differential-expression' && } - { showCellFiltering && clusterCanFilter && + { showCellFiltering && clusterCanFilter && allowCellFiltering && <>
@@ -462,7 +454,7 @@ export default function ExploreDisplayPanelManager({
} - { showCellFiltering && !clusterCanFilter && + { showCellFiltering && !clusterCanFilter && allowCellFiltering && <>
@@ -477,6 +469,21 @@ export default function ExploreDisplayPanelManager({
} + { showCellFiltering && !allowCellFiltering && + <> +
+
+ +
+
+ + } { @@ -696,6 +708,7 @@ export default function ExploreDisplayTabs({ updateFilteredCells={updateFilteredCells} panelToShow={panelToShow} toggleViewOptions={toggleViewOptions} + allowCellFiltering={allowCellFiltering} />
diff --git a/app/javascript/components/explore/PlotTabs.jsx b/app/javascript/components/explore/PlotTabs.jsx index 7fd458d1b..5d4d0c280 100644 --- a/app/javascript/components/explore/PlotTabs.jsx +++ b/app/javascript/components/explore/PlotTabs.jsx @@ -55,7 +55,10 @@ export default function PlotTabs({ const tooltip = disabledTooltips[tabKey] const numGenes = tooltip.numToSearch const geneText = `gene${tooltip.isMulti ? 's' : ''}` - const text = `To show this plot, search ${numGenes} ${geneText} using the box at left` + let text = `To show this plot, search ${numGenes} ${geneText} using the box at left` + if (['scatter', 'distribution'].includes(tabKey)) { + text += " or use the 'Collapse genes by' menu for multiple genes" + } return (
  • { + const hasProhibited = (header.search(prohibitedChars) !== -1) + if (hasProhibited) { + problemHeaders.push(header) + } + }) + + if (problemHeaders.length > 0) { + const problems = `"${problemHeaders.join('", "')}"` + const msg = `Update these headers to use only letters, numbers, and underscores: ${problems}` + issues.push(['error', 'format:cap:only-alphanumeric-underscore', msg]) + } + + return issues +} + /** Verifies metadata file has all required columns */ export function validateRequiredMetadataColumns(parsedHeaders, isAnnData=false) { const issues = [] diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js index 7e5f4fc49..08bc1da94 100644 --- a/app/javascript/lib/validation/validate-anndata.js +++ b/app/javascript/lib/validation/validate-anndata.js @@ -1,8 +1,9 @@ -import {openH5File} from 'hdf5-indexed-reader' +import { openH5File } from 'hdf5-indexed-reader' import { getOAuthToken } from '~/lib/scp-api' import { validateUnique, validateRequiredMetadataColumns, + validateAlphanumericAndUnderscores, metadataSchema, REQUIRED_CONVENTION_COLUMNS } from './shared-validation' import { getAcceptedOntologies, fetchOntologies } from './ontology-validation' @@ -285,6 +286,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) { issues = issues.concat( validateUnique(headers), + validateAlphanumericAndUnderscores(headers), requiredMetadataIssues, ontologyIdFormatIssues, ontologyLabelAndIdIssues diff --git a/app/javascript/lib/validation/validate-file-content.js b/app/javascript/lib/validation/validate-file-content.js index f09ce7d0c..bc63527fb 100644 --- a/app/javascript/lib/validation/validate-file-content.js +++ b/app/javascript/lib/validation/validate-file-content.js @@ -18,7 +18,7 @@ import { getParsedHeaderLines, parseLine, ParseException, validateUniqueCellNamesWithinFile, validateMetadataLabelMatches, validateGroupColumnCounts, timeOutCSFV, validateUnique, - validateRequiredMetadataColumns + validateRequiredMetadataColumns, validateAlphanumericAndUnderscores } from './shared-validation' import { parseDifferentialExpressionFile } from './validate-differential-expression' import { parseAnnDataFile } from './validate-anndata' @@ -154,15 +154,16 @@ function validateEqualCount(headers, annotTypes) { * Cap lines are like meta-information lines in other file formats * (e.g. VCF), but do not begin with pound signs (#). */ -function validateCapFormat([headers, annotTypes]) { +function validateCapFormat([headers, annotTypes], isMetadataFile=true) { let issues = [] if (!headers || !annotTypes) { return [['error', 'format:cap:no-cap-rows', 'File does not have 2 non-empty header rows']] } - // Check format rules that apply to both metadata and cluster files + // Check format rules that apply to both metadata and (except one rule) cluster files issues = issues.concat( validateUnique(headers), + validateAlphanumericAndUnderscores(headers, isMetadataFile), validateNameKeyword(headers), validateTypeKeyword(annotTypes), validateGroupOrNumeric(annotTypes), @@ -240,7 +241,7 @@ export async function parseMetadataFile(chunker, mimeType, fileOptions) { /** parse a cluster file, and return an array of issues, along with file parsing info */ export async function parseClusterFile(chunker, mimeType) { const { headers, delimiter } = await getParsedHeaderLines(chunker, mimeType) - let issues = validateCapFormat(headers) + let issues = validateCapFormat(headers, false) issues = issues.concat(validateClusterCoordinates(headers)) // add other header validations here @@ -264,7 +265,7 @@ export async function parseClusterFile(chunker, mimeType) { * Example: prettyAndOr(['A', 'B', 'C'], 'or') > '"A", "B", or "C"' */ function prettyAndOr(stringArray, operator) { let phrase - const quoted = stringArray.map(ext => `"${ext}"`) + const quoted = stringArray.map(ext => `".${ext}"`) if (quoted.length === 1) { phrase = quoted[0] @@ -274,7 +275,7 @@ function prettyAndOr(stringArray, operator) { } else if (quoted.length > 2) { // e.g. "A", "B", or "C" const last = quoted.slice(-1)[0] - phrase = `${quoted.slice(-1).join(', ') } ${operator} ${last}` + phrase = `${quoted.slice(0, -1).join(', ')}, ${operator} ${last}` } return phrase @@ -315,10 +316,10 @@ export async function validateGzipEncoding(file, fileType) { } } else { if (firstByte === GZIP_MAGIC_NUMBER) { - const prettyExts = prettyAndOr(EXTENSIONS_MUST_GZIP) - const problem = `Only files with extensions ${prettyExts}) may be gzipped` - const solution = 'please decompress file and retry' - throw new ParseException('encoding:missing-gz-extension', `${problem}; ${solution}`) + const prettyExts = prettyAndOr(EXTENSIONS_MUST_GZIP, 'or') + const problem = `Only files with extensions ${prettyExts} may be gzipped` + const solution = 'Please add a ".gz" extension to the file name, or decompress the file, and retry.' + throw new ParseException('encoding:missing-gz-extension', `${problem}. ${solution}`) } else { isGzipped = false } diff --git a/app/models/ann_data_file_info.rb b/app/models/ann_data_file_info.rb index d0d9184ef..51a935ed9 100644 --- a/app/models/ann_data_file_info.rb +++ b/app/models/ann_data_file_info.rb @@ -193,7 +193,7 @@ def sanitize_fragments! # create the default cluster data_fragment entries def set_default_cluster_fragments! - return false if fragments_by_type(:cluster).any? + return false if fragments_by_type(:cluster).any? || reference_file default_obsm_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys] default_obsm_keys.each do |obsm_key_name| diff --git a/app/models/differential_expression_result.rb b/app/models/differential_expression_result.rb index 04a02a7e0..8810f6374 100644 --- a/app/models/differential_expression_result.rb +++ b/app/models/differential_expression_result.rb @@ -228,7 +228,7 @@ def remove_output_files # in production, DeleteQueueJob will handle all necessary cleanup return true if study.nil? || study.detached || study.queued_for_deletion - identifier = "#{study.accession}:#{annotation_identifier}" + identifier = "#{study.accession}:#{annotation_name}--group--#{annotation_scope}" bucket_files.each do |filepath| remote = ApplicationController.firecloud_client.get_workspace_file(study.bucket_id, filepath) if remote.present? diff --git a/app/models/ingest_job.rb b/app/models/ingest_job.rb index 16f6d1592..2dbb8f738 100644 --- a/app/models/ingest_job.rb +++ b/app/models/ingest_job.rb @@ -32,6 +32,9 @@ class IngestJob # steps that need to be accounted for SPECIAL_ACTIONS = %i[differential_expression render_expression_arrays image_pipeline].freeze + # main processes that extract or ingest data for core visualizations (scatter, violin, dot, etc) + CORE_ACTIONS = %w[ingest_anndata ingest_expression ingest_cell_metadata ingest_cluster] + # jobs that need parameters objects in order to launch correctly PARAMS_OBJ_REQUIRED = %i[ differential_expression render_expression_arrays image_pipeline ingest_anndata @@ -347,7 +350,7 @@ def poll_for_completion(run_at: 1.minute.from_now) if done? && !failed? Rails.logger.info "IngestJob poller: #{pipeline_name} is done!" Rails.logger.info "IngestJob poller: #{pipeline_name} status: #{current_status}" - unless special_action? || action == :ingest_anndata + unless special_action? || (action == :ingest_anndata && study_file.is_viz_anndata?) study_file.update(parse_status: 'parsed') study_file.bundled_files.each { |sf| sf.update(parse_status: 'parsed') } end @@ -981,7 +984,7 @@ def log_to_mixpanel mixpanel_log_props = get_job_analytics # log job properties to Mixpanel MetricsService.log(mixpanel_event_name, mixpanel_log_props, user) - report_anndata_summary if study_file.is_viz_anndata? && action != :differential_expression + report_anndata_summary if study_file.is_viz_anndata? && action != :ingest_anndata end # set a mixpanel event name based on action @@ -994,7 +997,9 @@ def anndata_summary_props pipelines = ApplicationController.life_sciences_api_client.list_pipelines previous_jobs = pipelines.operations.select do |op| pipeline_args = op.metadata.dig('pipeline', 'actions').first['commands'] - pipeline_args.detect { |c| c == study_file.id.to_s } && !pipeline_args.include?('--differential-expression') + op.done? && + pipeline_args.detect { |c| c == study_file.id.to_s } && + (pipeline_args & CORE_ACTIONS).any? end # get total runtime from initial extract to final parse initial_extract = previous_jobs.last @@ -1023,6 +1028,7 @@ def anndata_summary_props op.metadata.dig('pipeline', 'actions').first['commands'].detect { |c| c == '--extract' } || op.error.present? end.count + num_files_extracted += 1 if extracted_raw_counts?(initial_extract.metadata) # event properties for Mixpanel summary event { perfTime: job_perftime, @@ -1039,20 +1045,43 @@ def anndata_summary_props } end + # determine if an ingest_anndata job extracted raw counts data + # reads from the --extract parameter to avoid counting filenames that include 'raw_counts' + def extracted_raw_counts?(pipeline_metadata) + commands = pipeline_metadata.dig('pipeline', 'actions').first['commands'] + extract_idx = commands.index('--extract') + return false if extract_idx.nil? + + extract_params = commands[extract_idx + 1] + extract_params.include?('raw_counts') + end + # report a summary of all AnnData extraction for this file to Mixpanel, if this is the last job def report_anndata_summary + study_file.reload + return false if study_file.has_anndata_summary? # don't bother checking if summary is already sent + + file_identifier = "#{study_file.upload_file_name} (#{study_file.id})" + Rails.logger.info "Checking AnnData summary for #{file_identifier} after #{action}" remaining_jobs = DelayedJobAccessor.find_jobs_by_handler_type(IngestJob, study_file) - fragment = params_object.associated_file # fragment file from this job - # discard the job that corresponds with this ingest process - still_processing = remaining_jobs.reject do |job| + # find running jobs associated with this file that are part of primary extraction (expression, metadata, clustering) + still_processing = remaining_jobs.select do |job| ingest_job = DelayedJobAccessor.dump_job_handler(job).object - ingest_job.params_object.is_a?(AnnDataIngestParameters) && ingest_job.params_object.associated_file == fragment - end.any? + ingest_job.params_object.is_a?(AnnDataIngestParameters) && + !ingest_job.done? && + %i[ingest_cluster ingest_cell_metadata ingest_expression].include?(ingest_job.action) + end - # only continue if there are no more jobs and a summary has not been sent yet - study_file.reload - return false if still_processing || study_file.has_anndata_summary? + if still_processing.any? + files = still_processing.map do |job| + ingest_job = DelayedJobAccessor.dump_job_handler(job).object + "#{ingest_job.action} - #{ingest_job.params_object.associated_file}" + end + Rails.logger.info "Found #{still_processing.count} jobs still processing for #{file_identifier} #{files.join(', ')}" + return false + end + Rails.logger.info "Sending AnnData summary for #{file_identifier} after #{action}" study_file.set_anndata_summary! # prevent race condition leading to duplicate summaries MetricsService.log('ingestSummary', anndata_summary_props, user) end diff --git a/test/api/bulk_download_controller_test.rb b/test/api/bulk_download_controller_test.rb index 2d8b7bca7..689f5eaa7 100644 --- a/test/api/bulk_download_controller_test.rb +++ b/test/api/bulk_download_controller_test.rb @@ -484,4 +484,18 @@ class BulkDownloadControllerTest < ActionDispatch::IntegrationTest assert_equal %w[FakeHCAProject AnotherFakeHCAProject].sort, hca_accessions.sort end end + + test 'should ignore unsynced directories' do + study = FactoryBot.create(:detached_study, name: 'Unsynced Dir Test', user: @user, test_array: @@studies_to_clean) + files = 1.upto(30).map do |i| + { + name: "_scp_internal/subdir/output_#{i}.tsv", + size: i * 100, + generation: SecureRandom.random_number(10000..99999) + } + end + directory = DirectoryListing.create(study:, file_type: 'tsv', name: '_scp_internal', files:, sync_status: false) + assert directory.persisted? + assert_empty Api::V1::BulkDownloadController.find_matching_directories('all', '', study.accession) + end end diff --git a/test/js/explore/explore-display-tabs.test.js b/test/js/explore/explore-display-tabs.test.js index 316b1414d..bf9f5cd9e 100644 --- a/test/js/explore/explore-display-tabs.test.js +++ b/test/js/explore/explore-display-tabs.test.js @@ -26,6 +26,13 @@ jest.mock('lib/cell-faceting', () => { } }) +// Prevent "ReferenceError: $ is not defined" errors when trying to call $(window) functions +jest.mock('lib/plot', () => { + return { + getPlotDimensions: jest.fn(() => { return { width: 100, height: 100 } }) + } +}) + import React from 'react' import { render, screen, waitFor } from '@testing-library/react' import * as UserProvider from '~/providers/UserProvider' @@ -424,6 +431,64 @@ describe('explore tabs are activated based on study info and parameters', () => expect(screen.getByTestId('cell-filtering-button')).toHaveTextContent('Filter plotted cells') }) + it('hides cell filtering when not applicable', async () => { + jest + .spyOn(UserProvider, 'getFeatureFlagsWithDefaults') + .mockReturnValue({ + show_cell_facet_filtering: true + }) + + const newExplore = { + 'cluster': 'All Cells UMAP', + 'annotation': { + 'name': 'biosample_id', + 'type': 'group', + 'scope': 'study' + }, + 'consensus': null, + 'genes': ['A1BG', 'A1BG-AS1'] + } + render(( + + )) + expect(screen.getByTestId('cell-filtering-button-disabled')).toHaveTextContent('Filtering unavailable') + }) + + it('shows cell filtering when using consensus metric', async () => { + jest + .spyOn(UserProvider, 'getFeatureFlagsWithDefaults') + .mockReturnValue({ + show_cell_facet_filtering: true + }) + + const newExplore = { + 'cluster': 'All Cells UMAP', + 'annotation': { + 'name': 'biosample_id', + 'type': 'group', + 'scope': 'study' + }, + 'consensus': 'median', + 'genes': ['A1BG', 'A1BG-AS1'], + 'tab': 'scatter', + 'facets': '' + } + render(( + + )) + expect(screen.getByTestId('cell-filtering-button')).toHaveTextContent('Filter plotted cells') + }) + it('disables cell filtering button', async () => { jest .spyOn(UserProvider, 'getFeatureFlagsWithDefaults') @@ -440,6 +505,7 @@ describe('explore tabs are activated based on study info and parameters', () => clusterCanFilter={false} filterErrorText={'Cluster is not indexed'} panelToShow={'options'} + allowCellFiltering={true} /> ) diff --git a/test/js/lib/validate-anndata.test.js b/test/js/lib/validate-anndata.test.js index b89d27c98..d5b9890ae 100644 --- a/test/js/lib/validate-anndata.test.js +++ b/test/js/lib/validate-anndata.test.js @@ -2,15 +2,11 @@ import { getHdf5File, parseAnnDataFile, getAnnDataHeaders, checkOntologyIdFormat } from 'lib/validation/validate-anndata' -const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data' - -// TODO: Uncomment this after PR merge -// const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata' +const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata' describe('Client-side file validation for AnnData', () => { it('Parses AnnData headers', async () => { - const url = `${BASE_URL}/anndata_test.h5ad` - // const url = `${BASE_URL}/valid.h5ad` // Uncomment after PR merge + const url = `${BASE_URL}/valid.h5ad` const expectedHeaders = [ '_index', 'biosample_id', @@ -32,15 +28,13 @@ describe('Client-side file validation for AnnData', () => { }) it('Reports AnnData with valid headers as valid', async () => { - const url = `${BASE_URL}/anndata_test.h5ad` - // const url = `${BASE_URL}/valid.h5ad` // Uncomment after PR merge + const url = `${BASE_URL}/valid.h5ad` const parseResults = await parseAnnDataFile(url) expect(parseResults.issues).toHaveLength(0) }) it('Reports AnnData with invalid headers as invalid', async () => { - const url = `${BASE_URL}/anndata_test_bad_header_no_species.h5ad` - // const url = `${BASE_URL}/invalid_header_no_species.h5ad` // Uncomment after PR merge + const url = `${BASE_URL}/invalid_header_no_species.h5ad` const parseResults = await parseAnnDataFile(url) expect(parseResults.issues).toHaveLength(1) @@ -63,8 +57,7 @@ describe('Client-side file validation for AnnData', () => { }) it('Parses AnnData rows and reports invalid ontology IDs', async () => { - const url = `${BASE_URL}/anndata_test_invalid_disease.h5ad` - // const url = `${BASE_URL}/invalid_disease_id.h5ad` // Uncomment after PR merge + const url = `${BASE_URL}/invalid_disease_id.h5ad` const parseResults = await parseAnnDataFile(url) expect(parseResults.issues).toHaveLength(1) @@ -77,7 +70,8 @@ describe('Client-side file validation for AnnData', () => { expect(parseResults.issues[0]).toEqual(expectedIssue) }) - // // TODO: Uncomment after PR merge + // TODO (SCP-5813): Uncomment this test upon completing "Enable ontology validation for remote AnnData" + // // it('Parses AnnData rows and reports invalid ontology labels', async () => { // const url = `${BASE_URL}/invalid_disease_label.h5ad` // const parseResults = await parseAnnDataFile(url) diff --git a/test/js/lib/validate-file-content.test.js b/test/js/lib/validate-file-content.test.js index e07c83048..1c6f6570d 100644 --- a/test/js/lib/validate-file-content.test.js +++ b/test/js/lib/validate-file-content.test.js @@ -319,6 +319,17 @@ describe('Client-side file validation', () => { expect(errors[0][1]).toEqual('encoding:missing-gz-extension') }) + it('catches real gzipped file with txt extension', async () => { + const file = createMockFile({ fileName: 'missing_gz_extension.txt'}) + const [{ errors }] = await validateLocalFile(file, { file_type: 'Cluster' }) + expect(errors).toHaveLength(1) + expect(errors[0][1]).toEqual('encoding:missing-gz-extension') + const expectedMessage = + // eslint-disable-next-line max-len + 'Only files with extensions ".gz", ".bam", or ".tbi" may be gzipped. Please add a ".gz" extension to the file name, or decompress the file, and retry.' + expect(errors[0][2]).toEqual(expectedMessage) + }) + it('does not catch gzipped RDS file without .gz extension', async () => { const file = createMockFile({ fileName: 'foo.rds', content: '\x1F\x2E3lkjf3' }) const [{ errors }] = await validateLocalFile(file, { file_type: 'Cluster' }) @@ -430,6 +441,25 @@ describe('Client-side file validation', () => { const displayedWarning = screen.getByTestId('validation-warning') expect(displayedWarning).toHaveTextContent(issues.warnings[0][2]) }) + + it('Does not throw disallowed characters in cluster header', async () => { + const file = createMockFile({ + fileName: 'foo.txt', + content: 'NAME,X,Y,invalid.header\nTYPE,numeric,numeric,numeric,numeric\nCELL_0001,34.472,32.211\nCELL_0002,15.975,10.043,5' + }) + const [{ errors }] = await validateLocalFile(file, { file_type: 'Cluster' }) + expect(errors).toHaveLength(0) + }) +}) + +it('Catches disallowed characters in metadata header', async () => { + const file = createMockFile({ fileName: 'metadata_invalid_annotation_name_period.tsv' }) + const [{ errors }] = await validateLocalFile(file, { file_type: 'Metadata' }) + expect(errors).toHaveLength(1) + + const expectedErrorType = 'format:cap:only-alphanumeric-underscore' + const errorType = errors[0][1] + expect(errorType).toBe(expectedErrorType) }) // With the client side file validation feature flag set to false expect invalid files to pass diff --git a/test/models/ann_data_file_info_test.rb b/test/models/ann_data_file_info_test.rb index fd7a80167..dfbc17228 100644 --- a/test/models/ann_data_file_info_test.rb +++ b/test/models/ann_data_file_info_test.rb @@ -127,7 +127,7 @@ def generate_id end test 'should set default cluster fragments' do - ann_data_info = AnnDataFileInfo.new + ann_data_info = AnnDataFileInfo.new(reference_file: false) assert ann_data_info.valid? default_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys] default_keys.each do |obsm_key_name| @@ -135,6 +135,11 @@ def generate_id matcher = { data_type: :cluster, name:, obsm_key_name: }.with_indifferent_access assert ann_data_info.find_fragment(**matcher).present? end + # ensure non-parseable AnnData files don't create fragment + reference_anndata = AnnDataFileInfo.new + assert reference_anndata.valid? + assert_empty reference_anndata.data_fragments + assert_empty reference_anndata.obsm_key_names end test 'should validate data fragments' do diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb index 656339135..93e00d3ae 100644 --- a/test/models/ingest_job_test.rb +++ b/test/models/ingest_job_test.rb @@ -333,7 +333,7 @@ class IngestJobTest < ActiveSupport::TestCase end end - test 'should get ingest summary for AnnData parsing' do + test 'should get ingestSummary for AnnData parsing' do ann_data_file = FactoryBot.create(:ann_data_file, name: 'data.h5ad', study: @basic_study) ann_data_file.ann_data_file_info.reference_file = false ann_data_file.ann_data_file_info.data_fragments = [ @@ -369,8 +369,9 @@ class IngestJobTest < ActiveSupport::TestCase }.with_indifferent_access pipeline_mock = MiniTest::Mock.new - 4.times {pipeline_mock.expect :metadata, metadata_mock } + 5.times {pipeline_mock.expect :metadata, metadata_mock } 2.times {pipeline_mock.expect :error, nil } + pipeline_mock.expect :done?, true operations_mock = Minitest::Mock.new operations_mock.expect :operations, [pipeline_mock] @@ -418,20 +419,27 @@ class IngestJobTest < ActiveSupport::TestCase ingest_cluster: true, name: 'UMAP', cluster_file:, domain_ranges: {}, ingest_anndata: false, extract: nil, obsm_keys: nil ) + pipeline_name = SecureRandom.uuid cluster_job = IngestJob.new( - study: @basic_study, study_file: ann_data_file, user: @user, action: :ingest_cluster, + pipeline_name:, study: @basic_study, study_file: ann_data_file, user: @user, action: :ingest_cluster, params_object: cluster_params_object ) job_mock = Minitest::Mock.new - job_mock.expect :object, cluster_job + 2.times { job_mock.expect :object, cluster_job } + + pipeline_mock = Minitest::Mock.new + pipeline_mock.expect :done?, false # negative test DelayedJobAccessor.stub :find_jobs_by_handler_type, [Delayed::Job.new] do DelayedJobAccessor.stub :dump_job_handler, job_mock do - metadata_job.report_anndata_summary - job_mock.verify - ann_data_file.reload - assert_not ann_data_file.has_anndata_summary? + ApplicationController.life_sciences_api_client.stub :get_pipeline, pipeline_mock do + metadata_job.report_anndata_summary + job_mock.verify + pipeline_mock.verify + ann_data_file.reload + assert_not ann_data_file.has_anndata_summary? + end end end @@ -458,6 +466,7 @@ class IngestJobTest < ActiveSupport::TestCase cluster_job.report_anndata_summary ann_data_file.reload assert ann_data_file.has_anndata_summary? + metrics_mock.verify end end end @@ -473,8 +482,9 @@ class IngestJobTest < ActiveSupport::TestCase pipeline = { actions: } failed_metadata = { pipeline:, events:, startTime: (now - 1.hour).to_s, endTime: now.to_s }.with_indifferent_access failed_pipeline = Minitest::Mock.new - 6.times { failed_pipeline.expect(:metadata, failed_metadata) } + 7.times { failed_pipeline.expect(:metadata, failed_metadata) } 3.times { failed_pipeline.expect(:error, true) } + failed_pipeline.expect :done?, true operations_mock = Minitest::Mock.new operations_mock.expect :operations, [failed_pipeline] client_mock = Minitest::Mock.new diff --git a/test/test_data/anndata/invalid_annotation_name_period.h5ad b/test/test_data/anndata/invalid_annotation_name_period.h5ad new file mode 100644 index 000000000..a2f02e0a1 Binary files /dev/null and b/test/test_data/anndata/invalid_annotation_name_period.h5ad differ diff --git a/test/test_data/metadata_example.txt b/test/test_data/metadata_example.txt index 07270de1b..0a9e67673 100644 --- a/test/test_data/metadata_example.txt +++ b/test/test_data/metadata_example.txt @@ -1,4 +1,4 @@ -NAME Cluster Sub-Cluster Average Intensity +NAME Cluster Subcluster Average_Intensity TYPE group group numeric CELL_0001 CLST_A CLST_A_1 7.742 CELL_0002 CLST_A CLST_A_1 -13.745 @@ -14,4 +14,4 @@ CELL_00011 CLST_C CLST_C_1 0.638 CELL_00012 CLST_C CLST_C_1 8.888 CELL_00013 CLST_C CLST_C_1 -2.27 CELL_00014 CLST_C CLST_C_2 -2.606 -CELL_00015 CLST_C CLST_C_2 -9.089 \ No newline at end of file +CELL_00015 CLST_C CLST_C_2 -9.089 diff --git a/test/test_data/validation/cluster_valid_annotation_name_period.txt b/test/test_data/validation/cluster_valid_annotation_name_period.txt new file mode 100644 index 000000000..e2dd55efd --- /dev/null +++ b/test/test_data/validation/cluster_valid_annotation_name_period.txt @@ -0,0 +1,17 @@ +NAME X Y Z Category Intensity invalid.header +TYPE numeric numeric numeric group numeric numeric +CELL_0001 34.472 32.211 60.035 C 0.719 1 +CELL_0002 15.975 10.043 21.424 B 0.904 1 +CELL_0003 -11.688 -53.645 -58.374 A 2.195 1 +CELL_0004 30.04 31.138 33.597 B -1.084 1 +CELL_0005 23.862 33.092 26.904 B 4.256 2 +CELL_0006 -39.07 -14.64 -44.643 A 1.317 2 +CELL_0007 40.039 27.206 55.211 C -4.917 2 +CELL_0008 28.755 27.187 34.686 B -3.777 2 +CELL_0009 -48.601 -13.512 -51.659 A 0.778 2 +CELL_00010 14.653 27.832 28.586 B 4.62 2 +CELL_00011 20.603 32.071 45.484 C -3.019 2 +CELL_00012 -10.333 -51.733 -26.631 A -4.989 2 +CELL_00013 -52.966 -12.484 -60.369 A 3.137 2 +CELL_00014 38.513 26.969 63.654 C -1.74 2 +CELL_00015 12.838 13.047 17.685 B -2.443 2 diff --git a/test/test_data/validation/header_count_mismatch.tsv b/test/test_data/validation/header_count_mismatch.tsv index fc495e025..a2bafbf27 100644 --- a/test/test_data/validation/header_count_mismatch.tsv +++ b/test/test_data/validation/header_count_mismatch.tsv @@ -1,4 +1,4 @@ -NAME Cluster Sub-Cluster Average Intensity +NAME Cluster Subcluster Average_Intensity TYPE group group CELL_0001 CLST_A CLST_A_1 7.742 CELL_0002 CLST_A CLST_A_1 -13.745 diff --git a/test/test_data/validation/metadata_bad_has_coordinates.txt b/test/test_data/validation/metadata_bad_has_coordinates.txt index dd0269fdd..3ee795dc2 100644 --- a/test/test_data/validation/metadata_bad_has_coordinates.txt +++ b/test/test_data/validation/metadata_bad_has_coordinates.txt @@ -1,4 +1,4 @@ -NAME CLUSTER SUB-CLUSTER x +NAME CLUSTER SUBCLUSTER x TYPE group group numeric CELL_0001 CLST_A CLST_A_1 1 CELL_0002 CLST_A CLST_A_1 2 diff --git a/test/test_data/validation/metadata_bad_type_header.txt b/test/test_data/validation/metadata_bad_type_header.txt index 3602d225c..2e1f675ad 100644 --- a/test/test_data/validation/metadata_bad_type_header.txt +++ b/test/test_data/validation/metadata_bad_type_header.txt @@ -1,4 +1,4 @@ -NAME Cluster Sub-Cluster Average Intensity +NAME Cluster Subcluster Average_Intensity notTYPE group group numeric CELL_0001 CLST_A CLST_A_1 7.742 CELL_0002 CLST_A CLST_A_1 -13.745 @@ -14,4 +14,4 @@ CELL_00011 CLST_C CLST_C_1 0.638 CELL_00012 CLST_C CLST_C_1 8.888 CELL_00013 CLST_C CLST_C_1 -2.27 CELL_00014 CLST_C CLST_C_2 -2.606 -CELL_00015 CLST_C CLST_C_2 -9.089 \ No newline at end of file +CELL_00015 CLST_C CLST_C_2 -9.089 diff --git a/test/test_data/validation/metadata_invalid_annotation_name_period.tsv b/test/test_data/validation/metadata_invalid_annotation_name_period.tsv new file mode 100644 index 000000000..b62faed5c --- /dev/null +++ b/test/test_data/validation/metadata_invalid_annotation_name_period.tsv @@ -0,0 +1,12 @@ +NAME donor_id biosample_id sex species species__ontology_label library_preparation_protocol library_preparation_protocol__ontology_label organ organ__ontology_label disease disease__ontology_label invalid.header +TYPE group group group group group group group group group group group group +A donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown +B donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown +C donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown +D donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown +E donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown +F donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown +G donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown +H donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown +I donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown +J donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown diff --git a/test/test_data/validation/missing_gz_extension.txt b/test/test_data/validation/missing_gz_extension.txt new file mode 100644 index 000000000..c98cae8e0 Binary files /dev/null and b/test/test_data/validation/missing_gz_extension.txt differ