From 69421fee852248446dc4c3103bfb416753896d04 Mon Sep 17 00:00:00 2001 From: Thomas Krause Date: Mon, 7 Oct 2024 15:25:39 +0200 Subject: [PATCH] Add empty table importer --- src/importer/mod.rs | 1 + src/importer/table.rs | 60 +++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 12 ++++++--- 3 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 src/importer/table.rs diff --git a/src/importer/mod.rs b/src/importer/mod.rs index 2b70060b..35efb61e 100644 --- a/src/importer/mod.rs +++ b/src/importer/mod.rs @@ -9,6 +9,7 @@ pub mod opus; pub mod ptb; pub mod relannis; pub mod saltxml; +pub mod table; pub mod textgrid; pub mod toolbox; pub mod treetagger; diff --git a/src/importer/table.rs b/src/importer/table.rs new file mode 100644 index 00000000..284ac2c9 --- /dev/null +++ b/src/importer/table.rs @@ -0,0 +1,60 @@ +use documented::{Documented, DocumentedFields}; +use graphannis::update::GraphUpdate; +use serde::Deserialize; +use struct_field_names_as_array::FieldNamesAsSlice; + +use super::Importer; +const FILE_EXTENSIONS: [&str; 3] = ["csv", "tsv", "conll"]; + +/// Imports table-like CSV files. +#[derive(Deserialize, Documented, DocumentedFields, FieldNamesAsSlice)] +#[serde(default, deny_unknown_fields)] +pub struct ImportTable { + /// The provided character defines the column delimiter. The default value is tab. + /// + /// Example: + /// ```toml + /// [export.config] + /// delimiter = ";" + /// ``` + #[serde(default = "default_delimiter")] + delimiter: char, + /// The provided character will be used for quoting values. If nothing is provided, all columns will contain bare values. If a character is provided, + /// all values will be quoted. + /// + /// Example: + /// ```toml + /// [export.config] + /// quote_char = "\"" + /// ``` + #[serde(default)] + quote_char: Option, +} + +impl Default for ImportTable { + fn default() -> Self { + Self { + delimiter: default_delimiter(), + quote_char: None, + } + } +} + +fn default_delimiter() -> char { + '\t' +} + +impl Importer for ImportTable { + fn import_corpus( + &self, + input_path: &std::path::Path, + step_id: crate::StepID, + tx: Option, + ) -> Result> { + todo!() + } + + fn file_extensions(&self) -> &[&str] { + todo!() + } +} diff --git a/src/lib.rs b/src/lib.rs index bb1a1e47..2ec27830 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,9 +28,9 @@ use graphannis::AnnotationGraph; use importer::{ conllu::ImportCoNLLU, exmaralda::ImportEXMARaLDA, file_nodes::CreateFileNodes, graphml::GraphMLImporter, meta::AnnotateCorpus, none::CreateEmptyCorpus, opus::ImportOpusLinks, - ptb::ImportPTB, relannis::ImportRelAnnis, saltxml::ImportSaltXml, textgrid::ImportTextgrid, - toolbox::ImportToolBox, treetagger::ImportTreeTagger, xlsx::ImportSpreadsheet, xml::ImportXML, - Importer, + ptb::ImportPTB, relannis::ImportRelAnnis, saltxml::ImportSaltXml, table::ImportTable, + textgrid::ImportTextgrid, toolbox::ImportToolBox, treetagger::ImportTreeTagger, + xlsx::ImportSpreadsheet, xml::ImportXML, Importer, }; use manipulator::{ check::Check, chunker::Chunk, collapse::Collapse, enumerate::EnumerateMatches, @@ -171,6 +171,7 @@ pub enum ReadFrom { PTB(#[serde(default)] ImportPTB), RelAnnis(#[serde(default)] ImportRelAnnis), SaltXml(#[serde(default)] ImportSaltXml), + Table(#[serde(default)] ImportTable), TextGrid(#[serde(default)] ImportTextgrid), Toolbox(#[serde(default)] ImportToolBox), TreeTagger(#[serde(default)] ImportTreeTagger), @@ -198,6 +199,7 @@ impl ReadFrom { ReadFrom::PTB(m) => m, ReadFrom::RelAnnis(m) => m, ReadFrom::SaltXml(m) => m, + ReadFrom::Table(m) => m, ReadFrom::TextGrid(m) => m, ReadFrom::Toolbox(m) => m, ReadFrom::TreeTagger(m) => m, @@ -220,6 +222,7 @@ impl ReadFromDiscriminants { ReadFromDiscriminants::PTB => ImportPTB::DOCS, ReadFromDiscriminants::RelAnnis => ImportRelAnnis::DOCS, ReadFromDiscriminants::SaltXml => ImportSaltXml::DOCS, + ReadFromDiscriminants::Table => ImportTable::DOCS, ReadFromDiscriminants::TextGrid => ImportTextgrid::DOCS, ReadFromDiscriminants::Toolbox => ImportToolBox::DOCS, ReadFromDiscriminants::TreeTagger => ImportTreeTagger::DOCS, @@ -263,6 +266,9 @@ impl ReadFromDiscriminants { ImportTextgrid::FIELD_NAMES_AS_SLICE, ImportTextgrid::FIELD_DOCS, ), + ReadFromDiscriminants::Table => { + (ImportTable::FIELD_NAMES_AS_SLICE, ImportTable::FIELD_DOCS) + } ReadFromDiscriminants::TreeTagger => ( ImportTreeTagger::FIELD_NAMES_AS_SLICE, ImportTreeTagger::FIELD_DOCS,