diff --git a/Cargo.lock b/Cargo.lock index 17078e9..dbe1554 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -294,7 +294,7 @@ checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "wikitext_table_parser" -version = "0.3.0" +version = "0.3.1" dependencies = [ "pyo3", "regex", diff --git a/Cargo.toml b/Cargo.toml index c3e5b54..aa93b78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "wikitext_table_parser" -version = "0.3.0" +version = "0.3.1" edition = "2021" license = "Apache-2.0" keywords = ["wiki", "wikitext","table","parser"] diff --git a/README.md b/README.md index 6eb5caa..50a7978 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ A table in wikitext should like: #### Installation ```toml [dependencies] -wikitext_table_parser = "0.3.0" +wikitext_table_parser = "0.3.1" ``` #### Usage Example ```rust @@ -64,25 +64,28 @@ fn main() { WikitextTableParser::new(table_tokenizer, cell_tokenizer, &content, true); for event in wikitext_table_parser { match event { - Event::TableStart => { + Event::TableStart {} => { println!("Table START!"); } - Event::TableStyle(table_style) => { + Event::TableStyle { text: table_style } => { println!("table style{:?}#", table_style); } - Event::TableCaption(text) => { + Event::TableCaption { text } => { println!("table name{:?}#", text); } - Event::RowStyle(row_style) => { + Event::RowStyle { text: row_style } => { println!("----- {:?} -----", row_style); } - Event::ColStyle(col_style) => { - print!("col style: {:?} -- ", col_style); + Event::ColStart { cell_type } =>{ + print!("{:?} ",cell_type); } - Event::ColEnd(text) => { - println!("col data: {:?}", text); + Event::ColStyle { text: col_style } => { + print!("style: {:?} -> ", col_style); } - Event::TableEnd => { + Event::ColEnd { text } => { + println!("data: {:?}", text); + } + Event::TableEnd {} => { println!("Table END!"); } _ => {} @@ -139,6 +142,7 @@ for event in parser.event_log_queue: print("col style:", event.text) elif isinstance(event, Event.ColEnd): print("col data:", event.text) + print("-"*20) elif isinstance(event, Event.TableCaptionStart): pass elif isinstance(event, Event.TableCaption): @@ -148,7 +152,7 @@ for event in parser.event_log_queue: elif isinstance(event, Event.RowStyle): print("row style:", event.text) elif isinstance(event, Event.RowEnd): - print("-"*20) + print("="*30) else: raise NotImplementedError(event) ``` \ No newline at end of file diff --git a/examples/use_in_python.py b/examples/use_in_python.py index 40bc7e2..917af30 100644 --- a/examples/use_in_python.py +++ b/examples/use_in_python.py @@ -34,6 +34,7 @@ print("col style:", event.text) elif isinstance(event, Event.ColEnd): print("col data:", event.text) + print("-"*20) elif isinstance(event, Event.TableCaptionStart): pass elif isinstance(event, Event.TableCaption): @@ -43,6 +44,6 @@ elif isinstance(event, Event.RowStyle): print("row style:", event.text) elif isinstance(event, Event.RowEnd): - print("-"*20) + print("="*30) else: raise NotImplementedError(event) diff --git a/src/main.rs b/src/main.rs index a4578c4..7868361 100644 --- a/src/main.rs +++ b/src/main.rs @@ -43,11 +43,14 @@ fn main() { Event::RowStyle { text: row_style } => { println!("----- {:?} -----", row_style); } + Event::ColStart { cell_type } =>{ + print!("{:?} ",cell_type); + } Event::ColStyle { text: col_style } => { - print!("col style: {:?} -- ", col_style); + print!("style: {:?} -> ", col_style); } Event::ColEnd { text } => { - println!("col data: {:?}", text); + println!("data: {:?}", text); } Event::TableEnd {} => { println!("Table END!"); diff --git a/src/parser.rs b/src/parser.rs index 591bf62..1d8232f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -42,9 +42,9 @@ pub enum CellType { #[derive(Debug)] pub struct WikitextTableParser { state: State, - #[pyo3(get,set)] + #[pyo3(get, set)] event_log_queue: Vec, - #[pyo3(get,set)] + #[pyo3(get, set)] tokens: Vec, text_buffer: String, table_tokenizer: Tokenizer, @@ -165,6 +165,7 @@ impl WikitextTableParser { fn step(&mut self) { let token = self.tokens.remove(0); + // println!("{:?}", token); match self.state { State::Idle => { if &token == TableSpecialTokens::TableStart.as_ref() { @@ -185,6 +186,12 @@ impl WikitextTableParser { }); self.clear_text_buffer(); self.transition(Event::RowStart {}); + } else if &token == TableSpecialTokens::TableHeaderCell.as_ref() { + self.transition(Event::TableStyle { + text: self.get_text_buffer_data(), + }); + self.clear_text_buffer(); + self.transition(Event::RowStart {}); } // end of table else if &token == TableSpecialTokens::TableEnd.as_ref() { @@ -204,6 +211,7 @@ impl WikitextTableParser { // match ! after the caption, this type will not have a row style // and should turn in to read col state else if &token == TableSpecialTokens::TableHeaderCell.as_ref() { + // catch table caption and trans the state to "State::ReadTable" self.transition(Event::TableCaption { text: self.get_text_buffer_data(), }); @@ -264,6 +272,9 @@ impl WikitextTableParser { text: self.get_text_buffer_data(), }); self.clear_text_buffer(); + self.transition(Event::ColStart { + cell_type: CellType::DataCell, + }); } // match \n! or \n!! else if &token == TableSpecialTokens::TableHeaderCell.as_ref() @@ -276,6 +287,9 @@ impl WikitextTableParser { text: self.get_text_buffer_data(), }); self.clear_text_buffer(); + self.transition(Event::ColStart { + cell_type: CellType::HeaderCell, + }); } else if &token == TableSpecialTokens::TableRow.as_ref() { self.transition(Event::ColStyle { text: self.get_style_text_buffer_data(), @@ -325,9 +339,7 @@ impl WikitextTableParser { // State::ReadCol (State::ReadCol, Event::ColStyle { text }) => {} - (State::ReadCol, Event::ColEnd { text }) => { - self.state = State::ReadCol - }, + (State::ReadCol, Event::ColEnd { text }) => self.state = State::ReadCol, (State::ReadCol, Event::RowStart {}) => self.state = State::ReadRow, // Else