diff --git a/app/javascript/components/explore/PlotTabs.jsx b/app/javascript/components/explore/PlotTabs.jsx
index 7fd458d1b..5d4d0c280 100644
--- a/app/javascript/components/explore/PlotTabs.jsx
+++ b/app/javascript/components/explore/PlotTabs.jsx
@@ -55,7 +55,10 @@ export default function PlotTabs({
const tooltip = disabledTooltips[tabKey]
const numGenes = tooltip.numToSearch
const geneText = `gene${tooltip.isMulti ? 's' : ''}`
- const text = `To show this plot, search ${numGenes} ${geneText} using the box at left`
+ let text = `To show this plot, search ${numGenes} ${geneText} using the box at left`
+ if (['scatter', 'distribution'].includes(tabKey)) {
+ text += " or use the 'Collapse genes by' menu for multiple genes"
+ }
return (
{
+ const hasProhibited = (header.search(prohibitedChars) !== -1)
+ if (hasProhibited) {
+ problemHeaders.push(header)
+ }
+ })
+
+ if (problemHeaders.length > 0) {
+ const problems = `"${problemHeaders.join('", "')}"`
+ const msg = `Update these headers to use only letters, numbers, and underscores: ${problems}`
+ issues.push(['error', 'format:cap:only-alphanumeric-underscore', msg])
+ }
+
+ return issues
+}
+
/** Verifies metadata file has all required columns */
export function validateRequiredMetadataColumns(parsedHeaders, isAnnData=false) {
const issues = []
diff --git a/app/javascript/lib/validation/validate-anndata.js b/app/javascript/lib/validation/validate-anndata.js
index 7e5f4fc49..08bc1da94 100644
--- a/app/javascript/lib/validation/validate-anndata.js
+++ b/app/javascript/lib/validation/validate-anndata.js
@@ -1,8 +1,9 @@
-import {openH5File} from 'hdf5-indexed-reader'
+import { openH5File } from 'hdf5-indexed-reader'
import { getOAuthToken } from '~/lib/scp-api'
import {
validateUnique, validateRequiredMetadataColumns,
+ validateAlphanumericAndUnderscores,
metadataSchema, REQUIRED_CONVENTION_COLUMNS
} from './shared-validation'
import { getAcceptedOntologies, fetchOntologies } from './ontology-validation'
@@ -285,6 +286,7 @@ export async function parseAnnDataFile(fileOrUrl, remoteProps) {
issues = issues.concat(
validateUnique(headers),
+ validateAlphanumericAndUnderscores(headers),
requiredMetadataIssues,
ontologyIdFormatIssues,
ontologyLabelAndIdIssues
diff --git a/app/javascript/lib/validation/validate-file-content.js b/app/javascript/lib/validation/validate-file-content.js
index f09ce7d0c..bc63527fb 100644
--- a/app/javascript/lib/validation/validate-file-content.js
+++ b/app/javascript/lib/validation/validate-file-content.js
@@ -18,7 +18,7 @@ import {
getParsedHeaderLines, parseLine, ParseException,
validateUniqueCellNamesWithinFile, validateMetadataLabelMatches,
validateGroupColumnCounts, timeOutCSFV, validateUnique,
- validateRequiredMetadataColumns
+ validateRequiredMetadataColumns, validateAlphanumericAndUnderscores
} from './shared-validation'
import { parseDifferentialExpressionFile } from './validate-differential-expression'
import { parseAnnDataFile } from './validate-anndata'
@@ -154,15 +154,16 @@ function validateEqualCount(headers, annotTypes) {
* Cap lines are like meta-information lines in other file formats
* (e.g. VCF), but do not begin with pound signs (#).
*/
-function validateCapFormat([headers, annotTypes]) {
+function validateCapFormat([headers, annotTypes], isMetadataFile=true) {
let issues = []
if (!headers || !annotTypes) {
return [['error', 'format:cap:no-cap-rows', 'File does not have 2 non-empty header rows']]
}
- // Check format rules that apply to both metadata and cluster files
+ // Check format rules that apply to both metadata and (except one rule) cluster files
issues = issues.concat(
validateUnique(headers),
+ validateAlphanumericAndUnderscores(headers, isMetadataFile),
validateNameKeyword(headers),
validateTypeKeyword(annotTypes),
validateGroupOrNumeric(annotTypes),
@@ -240,7 +241,7 @@ export async function parseMetadataFile(chunker, mimeType, fileOptions) {
/** parse a cluster file, and return an array of issues, along with file parsing info */
export async function parseClusterFile(chunker, mimeType) {
const { headers, delimiter } = await getParsedHeaderLines(chunker, mimeType)
- let issues = validateCapFormat(headers)
+ let issues = validateCapFormat(headers, false)
issues = issues.concat(validateClusterCoordinates(headers))
// add other header validations here
@@ -264,7 +265,7 @@ export async function parseClusterFile(chunker, mimeType) {
* Example: prettyAndOr(['A', 'B', 'C'], 'or') > '"A", "B", or "C"' */
function prettyAndOr(stringArray, operator) {
let phrase
- const quoted = stringArray.map(ext => `"${ext}"`)
+ const quoted = stringArray.map(ext => `".${ext}"`)
if (quoted.length === 1) {
phrase = quoted[0]
@@ -274,7 +275,7 @@ function prettyAndOr(stringArray, operator) {
} else if (quoted.length > 2) {
// e.g. "A", "B", or "C"
const last = quoted.slice(-1)[0]
- phrase = `${quoted.slice(-1).join(', ') } ${operator} ${last}`
+ phrase = `${quoted.slice(0, -1).join(', ')}, ${operator} ${last}`
}
return phrase
@@ -315,10 +316,10 @@ export async function validateGzipEncoding(file, fileType) {
}
} else {
if (firstByte === GZIP_MAGIC_NUMBER) {
- const prettyExts = prettyAndOr(EXTENSIONS_MUST_GZIP)
- const problem = `Only files with extensions ${prettyExts}) may be gzipped`
- const solution = 'please decompress file and retry'
- throw new ParseException('encoding:missing-gz-extension', `${problem}; ${solution}`)
+ const prettyExts = prettyAndOr(EXTENSIONS_MUST_GZIP, 'or')
+ const problem = `Only files with extensions ${prettyExts} may be gzipped`
+ const solution = 'Please add a ".gz" extension to the file name, or decompress the file, and retry.'
+ throw new ParseException('encoding:missing-gz-extension', `${problem}. ${solution}`)
} else {
isGzipped = false
}
diff --git a/app/models/ann_data_file_info.rb b/app/models/ann_data_file_info.rb
index d0d9184ef..51a935ed9 100644
--- a/app/models/ann_data_file_info.rb
+++ b/app/models/ann_data_file_info.rb
@@ -193,7 +193,7 @@ def sanitize_fragments!
# create the default cluster data_fragment entries
def set_default_cluster_fragments!
- return false if fragments_by_type(:cluster).any?
+ return false if fragments_by_type(:cluster).any? || reference_file
default_obsm_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys]
default_obsm_keys.each do |obsm_key_name|
diff --git a/app/models/differential_expression_result.rb b/app/models/differential_expression_result.rb
index 04a02a7e0..8810f6374 100644
--- a/app/models/differential_expression_result.rb
+++ b/app/models/differential_expression_result.rb
@@ -228,7 +228,7 @@ def remove_output_files
# in production, DeleteQueueJob will handle all necessary cleanup
return true if study.nil? || study.detached || study.queued_for_deletion
- identifier = "#{study.accession}:#{annotation_identifier}"
+ identifier = "#{study.accession}:#{annotation_name}--group--#{annotation_scope}"
bucket_files.each do |filepath|
remote = ApplicationController.firecloud_client.get_workspace_file(study.bucket_id, filepath)
if remote.present?
diff --git a/app/models/ingest_job.rb b/app/models/ingest_job.rb
index 16f6d1592..2dbb8f738 100644
--- a/app/models/ingest_job.rb
+++ b/app/models/ingest_job.rb
@@ -32,6 +32,9 @@ class IngestJob
# steps that need to be accounted for
SPECIAL_ACTIONS = %i[differential_expression render_expression_arrays image_pipeline].freeze
+ # main processes that extract or ingest data for core visualizations (scatter, violin, dot, etc)
+ CORE_ACTIONS = %w[ingest_anndata ingest_expression ingest_cell_metadata ingest_cluster]
+
# jobs that need parameters objects in order to launch correctly
PARAMS_OBJ_REQUIRED = %i[
differential_expression render_expression_arrays image_pipeline ingest_anndata
@@ -347,7 +350,7 @@ def poll_for_completion(run_at: 1.minute.from_now)
if done? && !failed?
Rails.logger.info "IngestJob poller: #{pipeline_name} is done!"
Rails.logger.info "IngestJob poller: #{pipeline_name} status: #{current_status}"
- unless special_action? || action == :ingest_anndata
+ unless special_action? || (action == :ingest_anndata && study_file.is_viz_anndata?)
study_file.update(parse_status: 'parsed')
study_file.bundled_files.each { |sf| sf.update(parse_status: 'parsed') }
end
@@ -981,7 +984,7 @@ def log_to_mixpanel
mixpanel_log_props = get_job_analytics
# log job properties to Mixpanel
MetricsService.log(mixpanel_event_name, mixpanel_log_props, user)
- report_anndata_summary if study_file.is_viz_anndata? && action != :differential_expression
+ report_anndata_summary if study_file.is_viz_anndata? && action != :ingest_anndata
end
# set a mixpanel event name based on action
@@ -994,7 +997,9 @@ def anndata_summary_props
pipelines = ApplicationController.life_sciences_api_client.list_pipelines
previous_jobs = pipelines.operations.select do |op|
pipeline_args = op.metadata.dig('pipeline', 'actions').first['commands']
- pipeline_args.detect { |c| c == study_file.id.to_s } && !pipeline_args.include?('--differential-expression')
+ op.done? &&
+ pipeline_args.detect { |c| c == study_file.id.to_s } &&
+ (pipeline_args & CORE_ACTIONS).any?
end
# get total runtime from initial extract to final parse
initial_extract = previous_jobs.last
@@ -1023,6 +1028,7 @@ def anndata_summary_props
op.metadata.dig('pipeline', 'actions').first['commands'].detect { |c| c == '--extract' } ||
op.error.present?
end.count
+ num_files_extracted += 1 if extracted_raw_counts?(initial_extract.metadata)
# event properties for Mixpanel summary event
{
perfTime: job_perftime,
@@ -1039,20 +1045,43 @@ def anndata_summary_props
}
end
+ # determine if an ingest_anndata job extracted raw counts data
+ # reads from the --extract parameter to avoid counting filenames that include 'raw_counts'
+ def extracted_raw_counts?(pipeline_metadata)
+ commands = pipeline_metadata.dig('pipeline', 'actions').first['commands']
+ extract_idx = commands.index('--extract')
+ return false if extract_idx.nil?
+
+ extract_params = commands[extract_idx + 1]
+ extract_params.include?('raw_counts')
+ end
+
# report a summary of all AnnData extraction for this file to Mixpanel, if this is the last job
def report_anndata_summary
+ study_file.reload
+ return false if study_file.has_anndata_summary? # don't bother checking if summary is already sent
+
+ file_identifier = "#{study_file.upload_file_name} (#{study_file.id})"
+ Rails.logger.info "Checking AnnData summary for #{file_identifier} after #{action}"
remaining_jobs = DelayedJobAccessor.find_jobs_by_handler_type(IngestJob, study_file)
- fragment = params_object.associated_file # fragment file from this job
- # discard the job that corresponds with this ingest process
- still_processing = remaining_jobs.reject do |job|
+ # find running jobs associated with this file that are part of primary extraction (expression, metadata, clustering)
+ still_processing = remaining_jobs.select do |job|
ingest_job = DelayedJobAccessor.dump_job_handler(job).object
- ingest_job.params_object.is_a?(AnnDataIngestParameters) && ingest_job.params_object.associated_file == fragment
- end.any?
+ ingest_job.params_object.is_a?(AnnDataIngestParameters) &&
+ !ingest_job.done? &&
+ %i[ingest_cluster ingest_cell_metadata ingest_expression].include?(ingest_job.action)
+ end
- # only continue if there are no more jobs and a summary has not been sent yet
- study_file.reload
- return false if still_processing || study_file.has_anndata_summary?
+ if still_processing.any?
+ files = still_processing.map do |job|
+ ingest_job = DelayedJobAccessor.dump_job_handler(job).object
+ "#{ingest_job.action} - #{ingest_job.params_object.associated_file}"
+ end
+ Rails.logger.info "Found #{still_processing.count} jobs still processing for #{file_identifier} #{files.join(', ')}"
+ return false
+ end
+ Rails.logger.info "Sending AnnData summary for #{file_identifier} after #{action}"
study_file.set_anndata_summary! # prevent race condition leading to duplicate summaries
MetricsService.log('ingestSummary', anndata_summary_props, user)
end
diff --git a/test/api/bulk_download_controller_test.rb b/test/api/bulk_download_controller_test.rb
index 2d8b7bca7..689f5eaa7 100644
--- a/test/api/bulk_download_controller_test.rb
+++ b/test/api/bulk_download_controller_test.rb
@@ -484,4 +484,18 @@ class BulkDownloadControllerTest < ActionDispatch::IntegrationTest
assert_equal %w[FakeHCAProject AnotherFakeHCAProject].sort, hca_accessions.sort
end
end
+
+ test 'should ignore unsynced directories' do
+ study = FactoryBot.create(:detached_study, name: 'Unsynced Dir Test', user: @user, test_array: @@studies_to_clean)
+ files = 1.upto(30).map do |i|
+ {
+ name: "_scp_internal/subdir/output_#{i}.tsv",
+ size: i * 100,
+ generation: SecureRandom.random_number(10000..99999)
+ }
+ end
+ directory = DirectoryListing.create(study:, file_type: 'tsv', name: '_scp_internal', files:, sync_status: false)
+ assert directory.persisted?
+ assert_empty Api::V1::BulkDownloadController.find_matching_directories('all', '', study.accession)
+ end
end
diff --git a/test/js/explore/explore-display-tabs.test.js b/test/js/explore/explore-display-tabs.test.js
index 316b1414d..bf9f5cd9e 100644
--- a/test/js/explore/explore-display-tabs.test.js
+++ b/test/js/explore/explore-display-tabs.test.js
@@ -26,6 +26,13 @@ jest.mock('lib/cell-faceting', () => {
}
})
+// Prevent "ReferenceError: $ is not defined" errors when trying to call $(window) functions
+jest.mock('lib/plot', () => {
+ return {
+ getPlotDimensions: jest.fn(() => { return { width: 100, height: 100 } })
+ }
+})
+
import React from 'react'
import { render, screen, waitFor } from '@testing-library/react'
import * as UserProvider from '~/providers/UserProvider'
@@ -424,6 +431,64 @@ describe('explore tabs are activated based on study info and parameters', () =>
expect(screen.getByTestId('cell-filtering-button')).toHaveTextContent('Filter plotted cells')
})
+ it('hides cell filtering when not applicable', async () => {
+ jest
+ .spyOn(UserProvider, 'getFeatureFlagsWithDefaults')
+ .mockReturnValue({
+ show_cell_facet_filtering: true
+ })
+
+ const newExplore = {
+ 'cluster': 'All Cells UMAP',
+ 'annotation': {
+ 'name': 'biosample_id',
+ 'type': 'group',
+ 'scope': 'study'
+ },
+ 'consensus': null,
+ 'genes': ['A1BG', 'A1BG-AS1']
+ }
+ render((
+
+ ))
+ expect(screen.getByTestId('cell-filtering-button-disabled')).toHaveTextContent('Filtering unavailable')
+ })
+
+ it('shows cell filtering when using consensus metric', async () => {
+ jest
+ .spyOn(UserProvider, 'getFeatureFlagsWithDefaults')
+ .mockReturnValue({
+ show_cell_facet_filtering: true
+ })
+
+ const newExplore = {
+ 'cluster': 'All Cells UMAP',
+ 'annotation': {
+ 'name': 'biosample_id',
+ 'type': 'group',
+ 'scope': 'study'
+ },
+ 'consensus': 'median',
+ 'genes': ['A1BG', 'A1BG-AS1'],
+ 'tab': 'scatter',
+ 'facets': ''
+ }
+ render((
+
+ ))
+ expect(screen.getByTestId('cell-filtering-button')).toHaveTextContent('Filter plotted cells')
+ })
+
it('disables cell filtering button', async () => {
jest
.spyOn(UserProvider, 'getFeatureFlagsWithDefaults')
@@ -440,6 +505,7 @@ describe('explore tabs are activated based on study info and parameters', () =>
clusterCanFilter={false}
filterErrorText={'Cluster is not indexed'}
panelToShow={'options'}
+ allowCellFiltering={true}
/>
)
diff --git a/test/js/lib/validate-anndata.test.js b/test/js/lib/validate-anndata.test.js
index b89d27c98..d5b9890ae 100644
--- a/test/js/lib/validate-anndata.test.js
+++ b/test/js/lib/validate-anndata.test.js
@@ -2,15 +2,11 @@ import {
getHdf5File, parseAnnDataFile, getAnnDataHeaders, checkOntologyIdFormat
} from 'lib/validation/validate-anndata'
-const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data'
-
-// TODO: Uncomment this after PR merge
-// const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata'
+const BASE_URL = 'https://github.com/broadinstitute/single_cell_portal_core/raw/development/test/test_data/anndata'
describe('Client-side file validation for AnnData', () => {
it('Parses AnnData headers', async () => {
- const url = `${BASE_URL}/anndata_test.h5ad`
- // const url = `${BASE_URL}/valid.h5ad` // Uncomment after PR merge
+ const url = `${BASE_URL}/valid.h5ad`
const expectedHeaders = [
'_index',
'biosample_id',
@@ -32,15 +28,13 @@ describe('Client-side file validation for AnnData', () => {
})
it('Reports AnnData with valid headers as valid', async () => {
- const url = `${BASE_URL}/anndata_test.h5ad`
- // const url = `${BASE_URL}/valid.h5ad` // Uncomment after PR merge
+ const url = `${BASE_URL}/valid.h5ad`
const parseResults = await parseAnnDataFile(url)
expect(parseResults.issues).toHaveLength(0)
})
it('Reports AnnData with invalid headers as invalid', async () => {
- const url = `${BASE_URL}/anndata_test_bad_header_no_species.h5ad`
- // const url = `${BASE_URL}/invalid_header_no_species.h5ad` // Uncomment after PR merge
+ const url = `${BASE_URL}/invalid_header_no_species.h5ad`
const parseResults = await parseAnnDataFile(url)
expect(parseResults.issues).toHaveLength(1)
@@ -63,8 +57,7 @@ describe('Client-side file validation for AnnData', () => {
})
it('Parses AnnData rows and reports invalid ontology IDs', async () => {
- const url = `${BASE_URL}/anndata_test_invalid_disease.h5ad`
- // const url = `${BASE_URL}/invalid_disease_id.h5ad` // Uncomment after PR merge
+ const url = `${BASE_URL}/invalid_disease_id.h5ad`
const parseResults = await parseAnnDataFile(url)
expect(parseResults.issues).toHaveLength(1)
@@ -77,7 +70,8 @@ describe('Client-side file validation for AnnData', () => {
expect(parseResults.issues[0]).toEqual(expectedIssue)
})
- // // TODO: Uncomment after PR merge
+ // TODO (SCP-5813): Uncomment this test upon completing "Enable ontology validation for remote AnnData"
+ //
// it('Parses AnnData rows and reports invalid ontology labels', async () => {
// const url = `${BASE_URL}/invalid_disease_label.h5ad`
// const parseResults = await parseAnnDataFile(url)
diff --git a/test/js/lib/validate-file-content.test.js b/test/js/lib/validate-file-content.test.js
index e07c83048..1c6f6570d 100644
--- a/test/js/lib/validate-file-content.test.js
+++ b/test/js/lib/validate-file-content.test.js
@@ -319,6 +319,17 @@ describe('Client-side file validation', () => {
expect(errors[0][1]).toEqual('encoding:missing-gz-extension')
})
+ it('catches real gzipped file with txt extension', async () => {
+ const file = createMockFile({ fileName: 'missing_gz_extension.txt'})
+ const [{ errors }] = await validateLocalFile(file, { file_type: 'Cluster' })
+ expect(errors).toHaveLength(1)
+ expect(errors[0][1]).toEqual('encoding:missing-gz-extension')
+ const expectedMessage =
+ // eslint-disable-next-line max-len
+ 'Only files with extensions ".gz", ".bam", or ".tbi" may be gzipped. Please add a ".gz" extension to the file name, or decompress the file, and retry.'
+ expect(errors[0][2]).toEqual(expectedMessage)
+ })
+
it('does not catch gzipped RDS file without .gz extension', async () => {
const file = createMockFile({ fileName: 'foo.rds', content: '\x1F\x2E3lkjf3' })
const [{ errors }] = await validateLocalFile(file, { file_type: 'Cluster' })
@@ -430,6 +441,25 @@ describe('Client-side file validation', () => {
const displayedWarning = screen.getByTestId('validation-warning')
expect(displayedWarning).toHaveTextContent(issues.warnings[0][2])
})
+
+ it('Does not throw disallowed characters in cluster header', async () => {
+ const file = createMockFile({
+ fileName: 'foo.txt',
+ content: 'NAME,X,Y,invalid.header\nTYPE,numeric,numeric,numeric,numeric\nCELL_0001,34.472,32.211\nCELL_0002,15.975,10.043,5'
+ })
+ const [{ errors }] = await validateLocalFile(file, { file_type: 'Cluster' })
+ expect(errors).toHaveLength(0)
+ })
+})
+
+it('Catches disallowed characters in metadata header', async () => {
+ const file = createMockFile({ fileName: 'metadata_invalid_annotation_name_period.tsv' })
+ const [{ errors }] = await validateLocalFile(file, { file_type: 'Metadata' })
+ expect(errors).toHaveLength(1)
+
+ const expectedErrorType = 'format:cap:only-alphanumeric-underscore'
+ const errorType = errors[0][1]
+ expect(errorType).toBe(expectedErrorType)
})
// With the client side file validation feature flag set to false expect invalid files to pass
diff --git a/test/models/ann_data_file_info_test.rb b/test/models/ann_data_file_info_test.rb
index fd7a80167..dfbc17228 100644
--- a/test/models/ann_data_file_info_test.rb
+++ b/test/models/ann_data_file_info_test.rb
@@ -127,7 +127,7 @@ def generate_id
end
test 'should set default cluster fragments' do
- ann_data_info = AnnDataFileInfo.new
+ ann_data_info = AnnDataFileInfo.new(reference_file: false)
assert ann_data_info.valid?
default_keys = AnnDataIngestParameters::PARAM_DEFAULTS[:obsm_keys]
default_keys.each do |obsm_key_name|
@@ -135,6 +135,11 @@ def generate_id
matcher = { data_type: :cluster, name:, obsm_key_name: }.with_indifferent_access
assert ann_data_info.find_fragment(**matcher).present?
end
+ # ensure non-parseable AnnData files don't create fragment
+ reference_anndata = AnnDataFileInfo.new
+ assert reference_anndata.valid?
+ assert_empty reference_anndata.data_fragments
+ assert_empty reference_anndata.obsm_key_names
end
test 'should validate data fragments' do
diff --git a/test/models/ingest_job_test.rb b/test/models/ingest_job_test.rb
index 656339135..93e00d3ae 100644
--- a/test/models/ingest_job_test.rb
+++ b/test/models/ingest_job_test.rb
@@ -333,7 +333,7 @@ class IngestJobTest < ActiveSupport::TestCase
end
end
- test 'should get ingest summary for AnnData parsing' do
+ test 'should get ingestSummary for AnnData parsing' do
ann_data_file = FactoryBot.create(:ann_data_file, name: 'data.h5ad', study: @basic_study)
ann_data_file.ann_data_file_info.reference_file = false
ann_data_file.ann_data_file_info.data_fragments = [
@@ -369,8 +369,9 @@ class IngestJobTest < ActiveSupport::TestCase
}.with_indifferent_access
pipeline_mock = MiniTest::Mock.new
- 4.times {pipeline_mock.expect :metadata, metadata_mock }
+ 5.times {pipeline_mock.expect :metadata, metadata_mock }
2.times {pipeline_mock.expect :error, nil }
+ pipeline_mock.expect :done?, true
operations_mock = Minitest::Mock.new
operations_mock.expect :operations, [pipeline_mock]
@@ -418,20 +419,27 @@ class IngestJobTest < ActiveSupport::TestCase
ingest_cluster: true, name: 'UMAP', cluster_file:, domain_ranges: {}, ingest_anndata: false,
extract: nil, obsm_keys: nil
)
+ pipeline_name = SecureRandom.uuid
cluster_job = IngestJob.new(
- study: @basic_study, study_file: ann_data_file, user: @user, action: :ingest_cluster,
+ pipeline_name:, study: @basic_study, study_file: ann_data_file, user: @user, action: :ingest_cluster,
params_object: cluster_params_object
)
job_mock = Minitest::Mock.new
- job_mock.expect :object, cluster_job
+ 2.times { job_mock.expect :object, cluster_job }
+
+ pipeline_mock = Minitest::Mock.new
+ pipeline_mock.expect :done?, false
# negative test
DelayedJobAccessor.stub :find_jobs_by_handler_type, [Delayed::Job.new] do
DelayedJobAccessor.stub :dump_job_handler, job_mock do
- metadata_job.report_anndata_summary
- job_mock.verify
- ann_data_file.reload
- assert_not ann_data_file.has_anndata_summary?
+ ApplicationController.life_sciences_api_client.stub :get_pipeline, pipeline_mock do
+ metadata_job.report_anndata_summary
+ job_mock.verify
+ pipeline_mock.verify
+ ann_data_file.reload
+ assert_not ann_data_file.has_anndata_summary?
+ end
end
end
@@ -458,6 +466,7 @@ class IngestJobTest < ActiveSupport::TestCase
cluster_job.report_anndata_summary
ann_data_file.reload
assert ann_data_file.has_anndata_summary?
+ metrics_mock.verify
end
end
end
@@ -473,8 +482,9 @@ class IngestJobTest < ActiveSupport::TestCase
pipeline = { actions: }
failed_metadata = { pipeline:, events:, startTime: (now - 1.hour).to_s, endTime: now.to_s }.with_indifferent_access
failed_pipeline = Minitest::Mock.new
- 6.times { failed_pipeline.expect(:metadata, failed_metadata) }
+ 7.times { failed_pipeline.expect(:metadata, failed_metadata) }
3.times { failed_pipeline.expect(:error, true) }
+ failed_pipeline.expect :done?, true
operations_mock = Minitest::Mock.new
operations_mock.expect :operations, [failed_pipeline]
client_mock = Minitest::Mock.new
diff --git a/test/test_data/anndata/invalid_annotation_name_period.h5ad b/test/test_data/anndata/invalid_annotation_name_period.h5ad
new file mode 100644
index 000000000..a2f02e0a1
Binary files /dev/null and b/test/test_data/anndata/invalid_annotation_name_period.h5ad differ
diff --git a/test/test_data/metadata_example.txt b/test/test_data/metadata_example.txt
index 07270de1b..0a9e67673 100644
--- a/test/test_data/metadata_example.txt
+++ b/test/test_data/metadata_example.txt
@@ -1,4 +1,4 @@
-NAME Cluster Sub-Cluster Average Intensity
+NAME Cluster Subcluster Average_Intensity
TYPE group group numeric
CELL_0001 CLST_A CLST_A_1 7.742
CELL_0002 CLST_A CLST_A_1 -13.745
@@ -14,4 +14,4 @@ CELL_00011 CLST_C CLST_C_1 0.638
CELL_00012 CLST_C CLST_C_1 8.888
CELL_00013 CLST_C CLST_C_1 -2.27
CELL_00014 CLST_C CLST_C_2 -2.606
-CELL_00015 CLST_C CLST_C_2 -9.089
\ No newline at end of file
+CELL_00015 CLST_C CLST_C_2 -9.089
diff --git a/test/test_data/validation/cluster_valid_annotation_name_period.txt b/test/test_data/validation/cluster_valid_annotation_name_period.txt
new file mode 100644
index 000000000..e2dd55efd
--- /dev/null
+++ b/test/test_data/validation/cluster_valid_annotation_name_period.txt
@@ -0,0 +1,17 @@
+NAME X Y Z Category Intensity invalid.header
+TYPE numeric numeric numeric group numeric numeric
+CELL_0001 34.472 32.211 60.035 C 0.719 1
+CELL_0002 15.975 10.043 21.424 B 0.904 1
+CELL_0003 -11.688 -53.645 -58.374 A 2.195 1
+CELL_0004 30.04 31.138 33.597 B -1.084 1
+CELL_0005 23.862 33.092 26.904 B 4.256 2
+CELL_0006 -39.07 -14.64 -44.643 A 1.317 2
+CELL_0007 40.039 27.206 55.211 C -4.917 2
+CELL_0008 28.755 27.187 34.686 B -3.777 2
+CELL_0009 -48.601 -13.512 -51.659 A 0.778 2
+CELL_00010 14.653 27.832 28.586 B 4.62 2
+CELL_00011 20.603 32.071 45.484 C -3.019 2
+CELL_00012 -10.333 -51.733 -26.631 A -4.989 2
+CELL_00013 -52.966 -12.484 -60.369 A 3.137 2
+CELL_00014 38.513 26.969 63.654 C -1.74 2
+CELL_00015 12.838 13.047 17.685 B -2.443 2
diff --git a/test/test_data/validation/header_count_mismatch.tsv b/test/test_data/validation/header_count_mismatch.tsv
index fc495e025..a2bafbf27 100644
--- a/test/test_data/validation/header_count_mismatch.tsv
+++ b/test/test_data/validation/header_count_mismatch.tsv
@@ -1,4 +1,4 @@
-NAME Cluster Sub-Cluster Average Intensity
+NAME Cluster Subcluster Average_Intensity
TYPE group group
CELL_0001 CLST_A CLST_A_1 7.742
CELL_0002 CLST_A CLST_A_1 -13.745
diff --git a/test/test_data/validation/metadata_bad_has_coordinates.txt b/test/test_data/validation/metadata_bad_has_coordinates.txt
index dd0269fdd..3ee795dc2 100644
--- a/test/test_data/validation/metadata_bad_has_coordinates.txt
+++ b/test/test_data/validation/metadata_bad_has_coordinates.txt
@@ -1,4 +1,4 @@
-NAME CLUSTER SUB-CLUSTER x
+NAME CLUSTER SUBCLUSTER x
TYPE group group numeric
CELL_0001 CLST_A CLST_A_1 1
CELL_0002 CLST_A CLST_A_1 2
diff --git a/test/test_data/validation/metadata_bad_type_header.txt b/test/test_data/validation/metadata_bad_type_header.txt
index 3602d225c..2e1f675ad 100644
--- a/test/test_data/validation/metadata_bad_type_header.txt
+++ b/test/test_data/validation/metadata_bad_type_header.txt
@@ -1,4 +1,4 @@
-NAME Cluster Sub-Cluster Average Intensity
+NAME Cluster Subcluster Average_Intensity
notTYPE group group numeric
CELL_0001 CLST_A CLST_A_1 7.742
CELL_0002 CLST_A CLST_A_1 -13.745
@@ -14,4 +14,4 @@ CELL_00011 CLST_C CLST_C_1 0.638
CELL_00012 CLST_C CLST_C_1 8.888
CELL_00013 CLST_C CLST_C_1 -2.27
CELL_00014 CLST_C CLST_C_2 -2.606
-CELL_00015 CLST_C CLST_C_2 -9.089
\ No newline at end of file
+CELL_00015 CLST_C CLST_C_2 -9.089
diff --git a/test/test_data/validation/metadata_invalid_annotation_name_period.tsv b/test/test_data/validation/metadata_invalid_annotation_name_period.tsv
new file mode 100644
index 000000000..b62faed5c
--- /dev/null
+++ b/test/test_data/validation/metadata_invalid_annotation_name_period.tsv
@@ -0,0 +1,12 @@
+NAME donor_id biosample_id sex species species__ontology_label library_preparation_protocol library_preparation_protocol__ontology_label organ organ__ontology_label disease disease__ontology_label invalid.header
+TYPE group group group group group group group group group group group group
+A donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown
+B donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown
+C donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown
+D donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown
+E donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown
+F donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown
+G donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown
+H donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown
+I donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown
+J donor_1 biosample_1 unknown NCBITaxon_9606 Homo sapiens EFO_0009901 10x 3' v1 UBERON_0000178 blood PATO_0000461 normal unknown
diff --git a/test/test_data/validation/missing_gz_extension.txt b/test/test_data/validation/missing_gz_extension.txt
new file mode 100644
index 000000000..c98cae8e0
Binary files /dev/null and b/test/test_data/validation/missing_gz_extension.txt differ