Skip to content

Commit

Permalink
Use buffered writer to write XML files
Browse files Browse the repository at this point in the history
  • Loading branch information
thomaskrause committed Aug 14, 2024
1 parent eb75a81 commit 6d4d213
Show file tree
Hide file tree
Showing 15 changed files with 39 additions and 32 deletions.
4 changes: 2 additions & 2 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ pub type StandardErrorResult<T> = std::result::Result<T, Box<dyn std::error::Err
pub enum AnnattoError {
#[error("Conversion failed with errors: {}", errors.iter().map(|e| e.to_string()).join("\n"))]
ConversionFailed { errors: Vec<AnnattoError> },
#[error("Error during exporting corpus from {path} with {exporter:?}: {reason:?}")]
#[error("Error during exporting corpus to {path} with {exporter:?}: {reason:?}")]
Export {
reason: String,
exporter: String,
path: PathBuf,
},
#[error("Error during importing corpus to {path} with {importer:?}: {reason:?}")]
#[error("Error during importing corpus from {path} with {importer:?}: {reason:?}")]
Import {
reason: String,
importer: String,
Expand Down
3 changes: 3 additions & 0 deletions src/exporter/saltxml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ impl Exporter for ExportSaltXml {
) -> Result<(), Box<dyn std::error::Error>> {
let progress = ProgressReporter::new_unknown_total_work(tx.clone(), step_id.clone())?;
let corpus_mapper = SaltCorpusStructureMapper::new();

std::fs::create_dir_all(output_path)?;

progress.info("Mapping SaltXML corpus structure")?;
let document_node_ids = corpus_mapper.map_corpus_structure(graph, output_path)?;
let progress = ProgressReporter::new(tx, step_id, document_node_ids.len())?;
Expand Down
5 changes: 3 additions & 2 deletions src/exporter/saltxml/corpus_structure.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::ffi::OsStr;
use std::{ffi::OsStr, io::BufWriter};

use anyhow::Context;
use graphannis::{
Expand Down Expand Up @@ -36,9 +36,10 @@ impl SaltCorpusStructureMapper {

let project_file_path = output_path.join("saltProject.salt");
let output_file = std::fs::File::create(project_file_path)?;
let buffered_output_file = BufWriter::new(output_file);
let mut writer = EmitterConfig::new()
.perform_indent(true)
.create_writer(output_file);
.create_writer(buffered_output_file);

writer.write(XmlEvent::StartDocument {
version: xml::common::XmlVersion::Version11,
Expand Down
14 changes: 9 additions & 5 deletions src/exporter/saltxml/document.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::BTreeMap, fs::File, sync::Arc};
use std::{collections::BTreeMap, fs::File, io::BufWriter, sync::Arc};

use anyhow::Context;
use graphannis::{
Expand Down Expand Up @@ -61,9 +61,10 @@ impl SaltDocumentGraphMapper {
output_path: &std::path::Path,
) -> anyhow::Result<()> {
let output_file = self.create_saltfile(graph, document_node_id, output_path)?;
let buffered_output_file = BufWriter::new(output_file);
let mut writer = EmitterConfig::new()
.perform_indent(true)
.create_writer(output_file);
.create_writer(buffered_output_file);

writer.write(XmlEvent::StartDocument {
version: xml::common::XmlVersion::Version11,
Expand Down Expand Up @@ -225,12 +226,15 @@ impl SaltDocumentGraphMapper {
Ok(output_file)
}

fn map_textual_ds(
fn map_textual_ds<W>(
&self,
graph: &AnnotationGraph,
document_node_id: NodeID,
salt_writer: &mut SaltWriter<File>,
) -> anyhow::Result<()> {
salt_writer: &mut SaltWriter<W>,
) -> anyhow::Result<()>
where
W: std::io::Write,
{
let ordering_components =
graph.get_all_components(Some(AnnotationComponentType::Ordering), None);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
source: src/importer/conllu/tests.rs
expression: job.err().unwrap()
expression: job.err().unwrap().to_string()
---
Error during importing corpus to invalid-heads/test_file with "import_conllu": "Failed to build dependency tree: Unknown head id `9` (1, 1)"
Error during importing corpus from invalid-heads/test_file with "import_conllu": "Failed to build dependency tree: Unknown head id `9` (1, 1)"
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
source: src/importer/exmaralda/tests.rs
expression: r.err().unwrap()
expression: r.err().unwrap().to_string()
---
Error during importing corpus to ./tests/data/import/exmaralda/fail-bad_timevalue/test_doc.exb with "import_exmaralda": "Failed to parse tli time value."
Error during importing corpus from ./tests/data/import/exmaralda/fail-bad_timevalue/test_doc.exb with "import_exmaralda": "Failed to parse tli time value."
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
source: src/importer/exmaralda/tests.rs
expression: r.err().unwrap()
expression: r.err().unwrap().to_string()
---
Error during importing corpus to ./tests/data/import/exmaralda/fail-no_category/test_doc.exb with "import_exmaralda": "Tier encountered with undefined category attribute."
Error during importing corpus from ./tests/data/import/exmaralda/fail-no_category/test_doc.exb with "import_exmaralda": "Tier encountered with undefined category attribute."
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
source: src/importer/exmaralda/tests.rs
expression: r.err().unwrap()
expression: r.err().unwrap().to_string()
---
Error during importing corpus to ./tests/data/import/exmaralda/fail-invalid/import/exmaralda/test_doc_invalid.exb with "import_exmaralda": "Start time is bigger than end time for ids: T1--T2 "
Error during importing corpus from ./tests/data/import/exmaralda/fail-invalid/import/exmaralda/test_doc_invalid.exb with "import_exmaralda": "Start time is bigger than end time for ids: T1--T2 "
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
source: src/importer/exmaralda/tests.rs
expression: r.err().unwrap()
expression: r.err().unwrap().to_string()
---
Error during importing corpus to ./tests/data/import/exmaralda/fail-no_speaker/test_doc.exb with "import_exmaralda": "Undefined speaker (not defined in tier attributes)."
Error during importing corpus from ./tests/data/import/exmaralda/fail-no_speaker/test_doc.exb with "import_exmaralda": "Undefined speaker (not defined in tier attributes)."
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
source: src/importer/exmaralda/tests.rs
expression: r.err().unwrap()
expression: r.err().unwrap().to_string()
---
Error during importing corpus to ./tests/data/import/exmaralda/fail-corrupt_timeline/import/exmaralda/test_doc.exb with "import_exmaralda": "Start time is bigger than end time for ids: T1--T2 "
Error during importing corpus from ./tests/data/import/exmaralda/fail-corrupt_timeline/import/exmaralda/test_doc.exb with "import_exmaralda": "Start time is bigger than end time for ids: T1--T2 "
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
source: src/importer/exmaralda/tests.rs
expression: r.err().unwrap()
expression: r.err().unwrap().to_string()
---
Error during importing corpus to ./tests/data/import/exmaralda/fail-undefined_speaker/test_doc.exb with "import_exmaralda": "Speaker `dipl` has not been defined in speaker-table."
Error during importing corpus from ./tests/data/import/exmaralda/fail-undefined_speaker/test_doc.exb with "import_exmaralda": "Speaker `dipl` has not been defined in speaker-table."
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
source: src/importer/exmaralda/tests.rs
expression: r.err().unwrap()
expression: r.err().unwrap().to_string()
---
Conversion failed with errors: Error during importing corpus to ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine end id of currently processed event `in`. Event will be skipped. Import will fail."
Error during importing corpus to ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine start id of currently processed event `York`. Event will be skipped. Import will fail."
Error during importing corpus to ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine start id of currently processed event `in`. Event will be skipped. Import will fail."
Conversion failed with errors: Error during importing corpus from ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine end id of currently processed event `in`. Event will be skipped. Import will fail."
Error during importing corpus from ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine start id of currently processed event `York`. Event will be skipped. Import will fail."
Error during importing corpus from ./tests/data/import/exmaralda/fail-no_start_no_end/test_doc.exb with "import_exmaralda": "Could not determine start id of currently processed event `in`. Event will be skipped. Import will fail."
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
source: src/importer/exmaralda/tests.rs
expression: r.err().unwrap()
expression: r.err().unwrap().to_string()
---
Error during importing corpus to ./tests/data/import/exmaralda/fail-unknown_tli/test_doc.exb with "import_exmaralda": "Unknown time line item: T1"
Error during importing corpus from ./tests/data/import/exmaralda/fail-unknown_tli/test_doc.exb with "import_exmaralda": "Unknown time line item: T1"
2 changes: 1 addition & 1 deletion src/importer/relannis/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ fn parse_relannis_workflow() {
// This should fail, because the input directory does not exist
assert_eq!(true, r.is_err());
assert_eq!(
r#"Error during importing corpus to ../data/import/relannis/does-not-exist with "import_relannis": "directory ./tests/workflows/../data/import/relannis/does-not-exist not found""#,
r#"Error during importing corpus from ../data/import/relannis/does-not-exist with "import_relannis": "directory ./tests/workflows/../data/import/relannis/does-not-exist not found""#,
r.err().unwrap().to_string()
)
}
3 changes: 1 addition & 2 deletions tests/snapshots/cli__run_failing_conversion.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@
source: tests/cli.rs
expression: output_err
---
Error: Error during importing corpus to doesnotexist.graphml with "import_graphml": "No such file or directory (os error 2)"

Error: Error during importing corpus from doesnotexist.graphml with "import_graphml": "No such file or directory (os error 2)"

0 comments on commit 6d4d213

Please sign in to comment.