From 6d4d2137a783a1b4f1f2444fd42790073f67e1c7 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Wed, 14 Aug 2024 12:44:33 +0200 Subject: [PATCH] Use buffered writer to write XML files --- src/error.rs | 4 ++-- src/exporter/saltxml.rs | 3 +++ src/exporter/saltxml/corpus_structure.rs | 5 +++-- src/exporter/saltxml/document.rs | 14 +++++++++----- ...r__conllu__tests__conll_fail_invalid_heads.snap | 4 ++-- ...rter__exmaralda__tests__bad_timevalue_fail.snap | 4 ++-- ..._importer__exmaralda__tests__category_fail.snap | 4 ++-- ...__importer__exmaralda__tests__invalid_fail.snap | 4 ++-- ...__importer__exmaralda__tests__speaker_fail.snap | 4 ++-- ..._importer__exmaralda__tests__timeline_fail.snap | 4 ++-- ...__exmaralda__tests__undefined_speaker_fail.snap | 4 ++-- ...er__exmaralda__tests__underspec_event_fail.snap | 8 ++++---- ...porter__exmaralda__tests__unknown_tli_fail.snap | 4 ++-- src/importer/relannis/tests.rs | 2 +- tests/snapshots/cli__run_failing_conversion.snap | 3 +-- 15 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/error.rs b/src/error.rs index 1780defd..54dfb4ba 100644 --- a/src/error.rs +++ b/src/error.rs @@ -15,13 +15,13 @@ pub type StandardErrorResult = std::result::Result }, - #[error("Error during exporting corpus from {path} with {exporter:?}: {reason:?}")] + #[error("Error during exporting corpus to {path} with {exporter:?}: {reason:?}")] Export { reason: String, exporter: String, path: PathBuf, }, - #[error("Error during importing corpus to {path} with {importer:?}: {reason:?}")] + #[error("Error during importing corpus from {path} with {importer:?}: {reason:?}")] Import { reason: String, importer: String, diff --git a/src/exporter/saltxml.rs b/src/exporter/saltxml.rs index af7b9201..f65dd380 100644 --- a/src/exporter/saltxml.rs +++ b/src/exporter/saltxml.rs @@ -45,6 +45,9 @@ impl Exporter for ExportSaltXml { ) -> Result<(), Box> { let progress = ProgressReporter::new_unknown_total_work(tx.clone(), step_id.clone())?; let corpus_mapper = SaltCorpusStructureMapper::new(); + + std::fs::create_dir_all(output_path)?; + progress.info("Mapping SaltXML corpus structure")?; let document_node_ids = corpus_mapper.map_corpus_structure(graph, output_path)?; let progress = ProgressReporter::new(tx, step_id, document_node_ids.len())?; diff --git a/src/exporter/saltxml/corpus_structure.rs b/src/exporter/saltxml/corpus_structure.rs index 9a24a370..fc488c7f 100644 --- a/src/exporter/saltxml/corpus_structure.rs +++ b/src/exporter/saltxml/corpus_structure.rs @@ -1,4 +1,4 @@ -use std::ffi::OsStr; +use std::{ffi::OsStr, io::BufWriter}; use anyhow::Context; use graphannis::{ @@ -36,9 +36,10 @@ impl SaltCorpusStructureMapper { let project_file_path = output_path.join("saltProject.salt"); let output_file = std::fs::File::create(project_file_path)?; + let buffered_output_file = BufWriter::new(output_file); let mut writer = EmitterConfig::new() .perform_indent(true) - .create_writer(output_file); + .create_writer(buffered_output_file); writer.write(XmlEvent::StartDocument { version: xml::common::XmlVersion::Version11, diff --git a/src/exporter/saltxml/document.rs b/src/exporter/saltxml/document.rs index e3c6896b..f52eb174 100644 --- a/src/exporter/saltxml/document.rs +++ b/src/exporter/saltxml/document.rs @@ -1,4 +1,4 @@ -use std::{collections::BTreeMap, fs::File, sync::Arc}; +use std::{collections::BTreeMap, fs::File, io::BufWriter, sync::Arc}; use anyhow::Context; use graphannis::{ @@ -61,9 +61,10 @@ impl SaltDocumentGraphMapper { output_path: &std::path::Path, ) -> anyhow::Result<()> { let output_file = self.create_saltfile(graph, document_node_id, output_path)?; + let buffered_output_file = BufWriter::new(output_file); let mut writer = EmitterConfig::new() .perform_indent(true) - .create_writer(output_file); + .create_writer(buffered_output_file); writer.write(XmlEvent::StartDocument { version: xml::common::XmlVersion::Version11, @@ -225,12 +226,15 @@ impl SaltDocumentGraphMapper { Ok(output_file) } - fn map_textual_ds( + fn map_textual_ds( &self, graph: &AnnotationGraph, document_node_id: NodeID, - salt_writer: &mut SaltWriter, - ) -> anyhow::Result<()> { + salt_writer: &mut SaltWriter, + ) -> anyhow::Result<()> + where + W: std::io::Write, + { let ordering_components = graph.get_all_components(Some(AnnotationComponentType::Ordering), None); diff --git a/src/importer/conllu/snapshots/annatto__importer__conllu__tests__conll_fail_invalid_heads.snap b/src/importer/conllu/snapshots/annatto__importer__conllu__tests__conll_fail_invalid_heads.snap index 7b525947..3a2765ce 100644 --- a/src/importer/conllu/snapshots/annatto__importer__conllu__tests__conll_fail_invalid_heads.snap +++ b/src/importer/conllu/snapshots/annatto__importer__conllu__tests__conll_fail_invalid_heads.snap @@ -1,5 +1,5 @@ --- source: src/importer/conllu/tests.rs -expression: job.err().unwrap() +expression: job.err().unwrap().to_string() --- -Error during importing corpus to invalid-heads/test_file with "import_conllu": "Failed to build dependency tree: Unknown head id `9` (1, 1)" +Error during importing corpus from invalid-heads/test_file with "import_conllu": "Failed to build dependency tree: Unknown head id `9` (1, 1)" diff --git a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__bad_timevalue_fail.snap b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__bad_timevalue_fail.snap index 964dc23d..ae74f08f 100644 --- a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__bad_timevalue_fail.snap +++ b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__bad_timevalue_fail.snap @@ -1,5 +1,5 @@ --- source: src/importer/exmaralda/tests.rs -expression: r.err().unwrap() +expression: r.err().unwrap().to_string() --- -Error during importing corpus to ./tests/data/import/exmaralda/fail-bad_timevalue/test_doc.exb with "import_exmaralda": "Failed to parse tli time value." +Error during importing corpus from ./tests/data/import/exmaralda/fail-bad_timevalue/test_doc.exb with "import_exmaralda": "Failed to parse tli time value." diff --git a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__category_fail.snap b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__category_fail.snap index d68c4513..87ed70dc 100644 --- a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__category_fail.snap +++ b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__category_fail.snap @@ -1,5 +1,5 @@ --- source: src/importer/exmaralda/tests.rs -expression: r.err().unwrap() +expression: r.err().unwrap().to_string() --- -Error during importing corpus to ./tests/data/import/exmaralda/fail-no_category/test_doc.exb with "import_exmaralda": "Tier encountered with undefined category attribute." +Error during importing corpus from ./tests/data/import/exmaralda/fail-no_category/test_doc.exb with "import_exmaralda": "Tier encountered with undefined category attribute." diff --git a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__invalid_fail.snap b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__invalid_fail.snap index ef98e8c9..4643f0ad 100644 --- a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__invalid_fail.snap +++ b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__invalid_fail.snap @@ -1,5 +1,5 @@ --- source: src/importer/exmaralda/tests.rs -expression: r.err().unwrap() +expression: r.err().unwrap().to_string() --- -Error during importing corpus to ./tests/data/import/exmaralda/fail-invalid/import/exmaralda/test_doc_invalid.exb with "import_exmaralda": "Start time is bigger than end time for ids: T1--T2 " +Error during importing corpus from ./tests/data/import/exmaralda/fail-invalid/import/exmaralda/test_doc_invalid.exb with "import_exmaralda": "Start time is bigger than end time for ids: T1--T2 " diff --git a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__speaker_fail.snap b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__speaker_fail.snap index a357fc72..89ef2861 100644 --- a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__speaker_fail.snap +++ b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__speaker_fail.snap @@ -1,5 +1,5 @@ --- source: src/importer/exmaralda/tests.rs -expression: r.err().unwrap() +expression: r.err().unwrap().to_string() --- -Error during importing corpus to ./tests/data/import/exmaralda/fail-no_speaker/test_doc.exb with "import_exmaralda": "Undefined speaker (not defined in tier attributes)." +Error during importing corpus from ./tests/data/import/exmaralda/fail-no_speaker/test_doc.exb with "import_exmaralda": "Undefined speaker (not defined in tier attributes)." diff --git a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__timeline_fail.snap b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__timeline_fail.snap index d58b33eb..213daf87 100644 --- a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__timeline_fail.snap +++ b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__timeline_fail.snap @@ -1,5 +1,5 @@ --- source: src/importer/exmaralda/tests.rs -expression: r.err().unwrap() +expression: r.err().unwrap().to_string() --- -Error during importing corpus to ./tests/data/import/exmaralda/fail-corrupt_timeline/import/exmaralda/test_doc.exb with "import_exmaralda": "Start time is bigger than end time for ids: T1--T2 " +Error during importing corpus from ./tests/data/import/exmaralda/fail-corrupt_timeline/import/exmaralda/test_doc.exb with "import_exmaralda": "Start time is bigger than end time for ids: T1--T2 " diff --git a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__undefined_speaker_fail.snap b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__undefined_speaker_fail.snap index 551d1749..5a5b685e 100644 --- a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__undefined_speaker_fail.snap +++ b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__undefined_speaker_fail.snap @@ -1,5 +1,5 @@ --- source: src/importer/exmaralda/tests.rs -expression: r.err().unwrap() +expression: r.err().unwrap().to_string() --- -Error during importing corpus to ./tests/data/import/exmaralda/fail-undefined_speaker/test_doc.exb with "import_exmaralda": "Speaker `dipl` has not been defined in speaker-table." +Error during importing corpus from ./tests/data/import/exmaralda/fail-undefined_speaker/test_doc.exb with "import_exmaralda": "Speaker `dipl` has not been defined in speaker-table." diff --git a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__underspec_event_fail.snap b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__underspec_event_fail.snap index d758adba..3798e0d1 100644 --- a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__underspec_event_fail.snap +++ b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__underspec_event_fail.snap @@ -1,7 +1,7 @@ --- source: src/importer/exmaralda/tests.rs -expression: r.err().unwrap() +expression: r.err().unwrap().to_string() --- -Conversion failed with errors: Error during importing corpus to ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine end id of currently processed event `in`. Event will be skipped. Import will fail." -Error during importing corpus to ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine start id of currently processed event `York`. Event will be skipped. Import will fail." -Error during importing corpus to ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine start id of currently processed event `in`. Event will be skipped. Import will fail." +Conversion failed with errors: Error during importing corpus from ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine end id of currently processed event `in`. Event will be skipped. Import will fail." +Error during importing corpus from ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine start id of currently processed event `York`. Event will be skipped. Import will fail." +Error during importing corpus from ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine start id of currently processed event `in`. Event will be skipped. Import will fail." diff --git a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__unknown_tli_fail.snap b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__unknown_tli_fail.snap index b55e8dc3..df69eed7 100644 --- a/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__unknown_tli_fail.snap +++ b/src/importer/exmaralda/snapshots/annatto__importer__exmaralda__tests__unknown_tli_fail.snap @@ -1,5 +1,5 @@ --- source: src/importer/exmaralda/tests.rs -expression: r.err().unwrap() +expression: r.err().unwrap().to_string() --- -Error during importing corpus to ./tests/data/import/exmaralda/fail-unknown_tli/test_doc.exb with "import_exmaralda": "Unknown time line item: T1" +Error during importing corpus from ./tests/data/import/exmaralda/fail-unknown_tli/test_doc.exb with "import_exmaralda": "Unknown time line item: T1" diff --git a/src/importer/relannis/tests.rs b/src/importer/relannis/tests.rs index 427b2d01..f4498ff9 100644 --- a/src/importer/relannis/tests.rs +++ b/src/importer/relannis/tests.rs @@ -338,7 +338,7 @@ fn parse_relannis_workflow() { // This should fail, because the input directory does not exist assert_eq!(true, r.is_err()); assert_eq!( - r#"Error during importing corpus to ../data/import/relannis/does-not-exist with "import_relannis": "directory ./tests/workflows/../data/import/relannis/does-not-exist not found""#, + r#"Error during importing corpus from ../data/import/relannis/does-not-exist with "import_relannis": "directory ./tests/workflows/../data/import/relannis/does-not-exist not found""#, r.err().unwrap().to_string() ) } diff --git a/tests/snapshots/cli__run_failing_conversion.snap b/tests/snapshots/cli__run_failing_conversion.snap index fee47627..7abf03dc 100644 --- a/tests/snapshots/cli__run_failing_conversion.snap +++ b/tests/snapshots/cli__run_failing_conversion.snap @@ -2,5 +2,4 @@ source: tests/cli.rs expression: output_err --- -Error: Error during importing corpus to doesnotexist.graphml with "import_graphml": "No such file or directory (os error 2)" - +Error: Error during importing corpus from doesnotexist.graphml with "import_graphml": "No such file or directory (os error 2)"