Skip to content

Commit

Permalink
Add empty table importer
Browse files Browse the repository at this point in the history
  • Loading branch information
thomaskrause committed Oct 7, 2024
1 parent a96c8ec commit 69421fe
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/importer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub mod opus;
pub mod ptb;
pub mod relannis;
pub mod saltxml;
pub mod table;
pub mod textgrid;
pub mod toolbox;
pub mod treetagger;
Expand Down
60 changes: 60 additions & 0 deletions src/importer/table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
use documented::{Documented, DocumentedFields};
use graphannis::update::GraphUpdate;
use serde::Deserialize;
use struct_field_names_as_array::FieldNamesAsSlice;

use super::Importer;
const FILE_EXTENSIONS: [&str; 3] = ["csv", "tsv", "conll"];

Check failure on line 7 in src/importer/table.rs

View workflow job for this annotation

GitHub Actions / Static code analysis

constant `FILE_EXTENSIONS` is never used

Check failure on line 7 in src/importer/table.rs

View workflow job for this annotation

GitHub Actions / Unit tests

constant `FILE_EXTENSIONS` is never used

/// Imports table-like CSV files.
#[derive(Deserialize, Documented, DocumentedFields, FieldNamesAsSlice)]
#[serde(default, deny_unknown_fields)]
pub struct ImportTable {
/// The provided character defines the column delimiter. The default value is tab.
///
/// Example:
/// ```toml
/// [export.config]
/// delimiter = ";"
/// ```
#[serde(default = "default_delimiter")]
delimiter: char,

Check failure on line 21 in src/importer/table.rs

View workflow job for this annotation

GitHub Actions / Static code analysis

fields `delimiter` and `quote_char` are never read

Check failure on line 21 in src/importer/table.rs

View workflow job for this annotation

GitHub Actions / Unit tests

fields `delimiter` and `quote_char` are never read
/// The provided character will be used for quoting values. If nothing is provided, all columns will contain bare values. If a character is provided,
/// all values will be quoted.
///
/// Example:
/// ```toml
/// [export.config]
/// quote_char = "\""
/// ```
#[serde(default)]
quote_char: Option<char>,
}

impl Default for ImportTable {
fn default() -> Self {
Self {
delimiter: default_delimiter(),
quote_char: None,
}
}
}

fn default_delimiter() -> char {
'\t'
}

impl Importer for ImportTable {
fn import_corpus(
&self,
input_path: &std::path::Path,

Check failure on line 50 in src/importer/table.rs

View workflow job for this annotation

GitHub Actions / Static code analysis

unused variable: `input_path`

Check failure on line 50 in src/importer/table.rs

View workflow job for this annotation

GitHub Actions / Unit tests

unused variable: `input_path`
step_id: crate::StepID,

Check failure on line 51 in src/importer/table.rs

View workflow job for this annotation

GitHub Actions / Static code analysis

unused variable: `step_id`

Check failure on line 51 in src/importer/table.rs

View workflow job for this annotation

GitHub Actions / Unit tests

unused variable: `step_id`
tx: Option<crate::workflow::StatusSender>,

Check failure on line 52 in src/importer/table.rs

View workflow job for this annotation

GitHub Actions / Static code analysis

unused variable: `tx`

Check failure on line 52 in src/importer/table.rs

View workflow job for this annotation

GitHub Actions / Unit tests

unused variable: `tx`
) -> Result<GraphUpdate, Box<dyn std::error::Error>> {
todo!()
}

fn file_extensions(&self) -> &[&str] {
todo!()
}
}
12 changes: 9 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ use graphannis::AnnotationGraph;
use importer::{
conllu::ImportCoNLLU, exmaralda::ImportEXMARaLDA, file_nodes::CreateFileNodes,
graphml::GraphMLImporter, meta::AnnotateCorpus, none::CreateEmptyCorpus, opus::ImportOpusLinks,
ptb::ImportPTB, relannis::ImportRelAnnis, saltxml::ImportSaltXml, textgrid::ImportTextgrid,
toolbox::ImportToolBox, treetagger::ImportTreeTagger, xlsx::ImportSpreadsheet, xml::ImportXML,
Importer,
ptb::ImportPTB, relannis::ImportRelAnnis, saltxml::ImportSaltXml, table::ImportTable,
textgrid::ImportTextgrid, toolbox::ImportToolBox, treetagger::ImportTreeTagger,
xlsx::ImportSpreadsheet, xml::ImportXML, Importer,
};
use manipulator::{
check::Check, chunker::Chunk, collapse::Collapse, enumerate::EnumerateMatches,
Expand Down Expand Up @@ -171,6 +171,7 @@ pub enum ReadFrom {
PTB(#[serde(default)] ImportPTB),
RelAnnis(#[serde(default)] ImportRelAnnis),
SaltXml(#[serde(default)] ImportSaltXml),
Table(#[serde(default)] ImportTable),
TextGrid(#[serde(default)] ImportTextgrid),
Toolbox(#[serde(default)] ImportToolBox),
TreeTagger(#[serde(default)] ImportTreeTagger),
Expand Down Expand Up @@ -198,6 +199,7 @@ impl ReadFrom {
ReadFrom::PTB(m) => m,
ReadFrom::RelAnnis(m) => m,
ReadFrom::SaltXml(m) => m,
ReadFrom::Table(m) => m,
ReadFrom::TextGrid(m) => m,
ReadFrom::Toolbox(m) => m,
ReadFrom::TreeTagger(m) => m,
Expand All @@ -220,6 +222,7 @@ impl ReadFromDiscriminants {
ReadFromDiscriminants::PTB => ImportPTB::DOCS,
ReadFromDiscriminants::RelAnnis => ImportRelAnnis::DOCS,
ReadFromDiscriminants::SaltXml => ImportSaltXml::DOCS,
ReadFromDiscriminants::Table => ImportTable::DOCS,
ReadFromDiscriminants::TextGrid => ImportTextgrid::DOCS,
ReadFromDiscriminants::Toolbox => ImportToolBox::DOCS,
ReadFromDiscriminants::TreeTagger => ImportTreeTagger::DOCS,
Expand Down Expand Up @@ -263,6 +266,9 @@ impl ReadFromDiscriminants {
ImportTextgrid::FIELD_NAMES_AS_SLICE,
ImportTextgrid::FIELD_DOCS,
),
ReadFromDiscriminants::Table => {
(ImportTable::FIELD_NAMES_AS_SLICE, ImportTable::FIELD_DOCS)
}
ReadFromDiscriminants::TreeTagger => (
ImportTreeTagger::FIELD_NAMES_AS_SLICE,
ImportTreeTagger::FIELD_DOCS,
Expand Down

0 comments on commit 69421fe

Please sign in to comment.