diff --git a/CHANGELOG.md b/CHANGELOG.md index 444889ef..d6710437 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- `table` export has feature to customize n/a-value, which by default is the empty string + ## [0.15.0] - 2024-08-14 ## [0.15.0] - 2024-08-14 diff --git a/src/exporter/snapshots/annatto__exporter__table__tests__custom.snap b/src/exporter/snapshots/annatto__exporter__table__tests__custom.snap new file mode 100644 index 00000000..7220d766 --- /dev/null +++ b/src/exporter/snapshots/annatto__exporter__table__tests__custom.snap @@ -0,0 +1,17 @@ +--- +source: src/exporter/table.rs +expression: export.unwrap() +--- +'lemma';'id_lemma';'upos';'id_upos';'xpos';'id_xpos';'Case';'id_Case';'Number';'id_Number';'in_Pointing__dep_deprel';'in_Pointing__dep';'sent_id';'id_sent_id';'text';'id_text';'Person';'id_Person';'Tense';'id_Tense';'out_Pointing__dep_deprel';'out_Pointing__dep';'SpaceAfter';'id_SpaceAfter';'PronType';'id_PronType' +'they';'valid/website_example#t5_1';'PRON';'valid/website_example#t5_1';'PRP';'valid/website_example#t5_1';'Nom';'valid/website_example#t5_1';'Plur';'valid/website_example#t5_1';'nsubj';'valid/website_example#t5_68';'1';'valid/website_example#s1_1';'They buy and sell books.';'valid/website_example#s1_1';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a' +'buy';'valid/website_example#t5_68';'VERB';'valid/website_example#t5_68';'VBP';'valid/website_example#t5_68';'n/a';'n/a';'Plur';'valid/website_example#t5_68';'n/a';'n/a';'1';'valid/website_example#s1_1';'They buy and sell books.';'valid/website_example#s1_1';'3';'valid/website_example#t5_68';'Pres';'valid/website_example#t5_68';'punct;obj;conj;nsubj';'valid/website_example#t9_66;valid/website_example#t8_76;valid/website_example#t7_32;valid/website_example#t5_1';'n/a';'n/a';'n/a';'n/a' +'and';'valid/website_example#t6_67';'CONJ';'valid/website_example#t6_67';'CC';'valid/website_example#t6_67';'n/a';'n/a';'n/a';'n/a';'cc';'valid/website_example#t7_32';'1';'valid/website_example#s1_1';'They buy and sell books.';'valid/website_example#s1_1';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a' +'sell';'valid/website_example#t7_32';'VERB';'valid/website_example#t7_32';'VBP';'valid/website_example#t7_32';'n/a';'n/a';'Plur';'valid/website_example#t7_32';'conj';'valid/website_example#t5_68';'1';'valid/website_example#s1_1';'They buy and sell books.';'valid/website_example#s1_1';'3';'valid/website_example#t7_32';'Pres';'valid/website_example#t7_32';'cc';'valid/website_example#t6_67';'n/a';'n/a';'n/a';'n/a' +'book';'valid/website_example#t8_76';'NOUN';'valid/website_example#t8_76';'NNS';'valid/website_example#t8_76';'n/a';'n/a';'Plur';'valid/website_example#t8_76';'obj';'valid/website_example#t5_68';'1';'valid/website_example#s1_1';'They buy and sell books.';'valid/website_example#s1_1';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'No';'valid/website_example#t8_76';'n/a';'n/a' +'.';'valid/website_example#t9_66';'PUNCT';'valid/website_example#t9_66';'.';'valid/website_example#t9_66';'n/a';'n/a';'n/a';'n/a';'punct';'valid/website_example#t5_68';'1';'valid/website_example#s1_1';'They buy and sell books.';'valid/website_example#s1_1';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a' +'I';'valid/website_example#t14_1';'PRON';'valid/website_example#t14_1';'PRP';'valid/website_example#t14_1';'Nom';'valid/website_example#t14_1';'Sing';'valid/website_example#t14_1';'nsubj';'valid/website_example#t14_57';'2';'valid/website_example#s10_34';'I have no clue.';'valid/website_example#s10_34';'1';'valid/website_example#t14_1';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a' +'have';'valid/website_example#t14_57';'VERB';'valid/website_example#t14_57';'VBP';'valid/website_example#t14_57';'n/a';'n/a';'Sing';'valid/website_example#t14_57';'n/a';'n/a';'2';'valid/website_example#s10_34';'I have no clue.';'valid/website_example#s10_34';'1';'valid/website_example#t14_57';'Pres';'valid/website_example#t14_57';'punct;obj;nsubj';'valid/website_example#t17_54;valid/website_example#t16_38;valid/website_example#t14_1';'n/a';'n/a';'n/a';'n/a' +'no';'valid/website_example#t15_64';'DET';'valid/website_example#t15_64';'DT';'valid/website_example#t15_64';'n/a';'n/a';'n/a';'n/a';'det';'valid/website_example#t16_38';'2';'valid/website_example#s10_34';'I have no clue.';'valid/website_example#s10_34';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'Neg';'valid/website_example#t15_64' +'clue';'valid/website_example#t16_38';'NOUN';'valid/website_example#t16_38';'NN';'valid/website_example#t16_38';'n/a';'n/a';'Sing';'valid/website_example#t16_38';'obj';'valid/website_example#t14_57';'2';'valid/website_example#s10_34';'I have no clue.';'valid/website_example#s10_34';'n/a';'n/a';'n/a';'n/a';'det';'valid/website_example#t15_64';'No';'valid/website_example#t16_38';'n/a';'n/a' +'.';'valid/website_example#t17_54';'PUNCT';'valid/website_example#t17_54';'.';'valid/website_example#t17_54';'n/a';'n/a';'n/a';'n/a';'punct';'valid/website_example#t14_57';'2';'valid/website_example#s10_34';'I have no clue.';'valid/website_example#s10_34';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a';'n/a' + diff --git a/src/exporter/table.rs b/src/exporter/table.rs index d4726931..5344e240 100644 --- a/src/exporter/table.rs +++ b/src/exporter/table.rs @@ -1,5 +1,4 @@ use std::{ - borrow::Cow, collections::{btree_map::Entry, BTreeMap}, ops::Bound, path::Path, @@ -69,6 +68,15 @@ pub struct ExportTable { /// ``` #[serde(default)] quote_char: Option, + /// Provides the string sequence used for n/a. Default is the empty string. + /// + /// Example: + /// ```toml + /// [export.config] + /// no_value = "n/a" + /// ``` + #[serde(default)] + no_value: String, /// By listing annotation components, the ingoing edges of that component and their annotations /// will be exported as well. Multiple ingoing edges will be separated by a ";". Each exported /// node will be checked for ingoing edges in the respective components. @@ -99,6 +107,7 @@ impl Default for ExportTable { doc_anno: default_doc_anno(), delimiter: default_delimiter(), quote_char: None, + no_value: String::default(), ingoing: vec![], outgoing: vec![], } @@ -200,8 +209,8 @@ impl Exporter for ExportTable { } } -type Data<'a> = BTreeMap>; -type EdgeData<'a> = BTreeMap>>; // insertion order is critical +type Data = BTreeMap; +type EdgeData = BTreeMap>; // insertion order is critical type SingleEdgeData<'a> = (String, &'a AnnotationComponent, Vec<(String, String)>); impl ExportTable { @@ -264,8 +273,8 @@ impl ExportTable { let value = node_annos .get_value_for_item(&rn, &anno_key)? .ok_or(anyhow!("Annotation has no value"))?; - data.insert(index, value); - data.insert(index + 1, node_name.clone()); + data.insert(index, value.to_string()); + data.insert(index + 1, node_name.to_string()); } } if follow_edges { @@ -298,11 +307,11 @@ impl ExportTable { match edge_column_data.entry(index) { Entry::Vacant(e) => { let mut new_value = LinkedHashSet::default(); - new_value.insert(Cow::Owned(value)); + new_value.insert(value); e.insert(new_value); } Entry::Occupied(mut e) => { - e.get_mut().insert(Cow::Owned(value)); + e.get_mut().insert(value); } }; } @@ -310,9 +319,9 @@ impl ExportTable { } } data.extend( - edge_column_data.into_iter().map(|(ix, value_set)| { - (ix, Cow::Owned(value_set.iter().join(";").to_string())) - }), + edge_column_data + .into_iter() + .map(|(ix, value_set)| (ix, value_set.iter().join(";").to_string())), ); table_data.push(data); } @@ -335,7 +344,12 @@ impl ExportTable { for mut entry in table_data { let mut row = Vec::with_capacity(index_bound); for col_index in 0..index_bound { - row.push(entry.remove(&col_index).unwrap_or_default().to_string()); + row.push( + entry + .remove(&col_index) + .unwrap_or(self.no_value.to_string()) + .to_string(), + ); } if !row.iter().all(String::is_empty) { writer.write_record(&row)?; @@ -533,4 +547,44 @@ mod tests { assert!(export.is_ok(), "error: {:?}", export.err()); assert_snapshot!(export.unwrap()); } + + #[test] + fn custom() { + let to_conll = ImportCoNLLU::default(); + let mprt = to_conll.import_corpus( + Path::new("tests/data/import/conll/valid/"), + StepID { + module_name: "test_import_conll".to_string(), + path: None, + }, + None, + ); + assert!(mprt.is_ok()); + let mut update_import = mprt.unwrap(); + let g = AnnotationGraph::with_default_graphstorages(true); + assert!(g.is_ok()); + let mut graph = g.unwrap(); + assert!(graph.apply_update(&mut update_import, |_| {}).is_ok()); + let export = export_to_string( + &graph, + ExportTable { + delimiter: ';', + no_value: "n/a".to_string(), + quote_char: Some('\''), + ingoing: vec![AnnotationComponent::new( + AnnotationComponentType::Pointing, + "".into(), + "dep".into(), + )], + outgoing: vec![AnnotationComponent::new( + AnnotationComponentType::Pointing, + "".into(), + "dep".into(), + )], + ..Default::default() + }, + ); + assert!(export.is_ok(), "error: {:?}", export.err()); + assert_snapshot!(export.unwrap()); + } }