From f405f924eb54e79b5b244681f3687b70dafc8b03 Mon Sep 17 00:00:00 2001 From: Mr-Leshiy Date: Sun, 3 Nov 2024 13:04:19 +0200 Subject: [PATCH 01/10] move parsers into the separate module --- rust/cbork-cddl-parser/src/lib.rs | 61 ++++++---------------------- rust/cbork-cddl-parser/src/parser.rs | 42 +++++++++++++++++++ rust/cbork-cddl-parser/tests/cddl.rs | 4 +- 3 files changed, 57 insertions(+), 50 deletions(-) create mode 100644 rust/cbork-cddl-parser/src/parser.rs diff --git a/rust/cbork-cddl-parser/src/lib.rs b/rust/cbork-cddl-parser/src/lib.rs index af8b4ab29..f7d3933a8 100644 --- a/rust/cbork-cddl-parser/src/lib.rs +++ b/rust/cbork-cddl-parser/src/lib.rs @@ -2,57 +2,22 @@ #![allow(missing_docs)] // TODO(apskhem): Temporary, to bo removed in a subsequent PR +mod parser; + use derive_more::{Display, From}; pub use pest::Parser; use pest::{error::Error, iterators::Pairs}; -pub mod rfc_8610 { - pub use pest::Parser; - - #[derive(pest_derive::Parser)] - #[grammar = "grammar/rfc_8610.pest"] - pub struct RFC8610Parser; -} - -pub mod rfc_9165 { - pub use pest::Parser; - - #[derive(pest_derive::Parser)] - #[grammar = "grammar/rfc_8610.pest"] - #[grammar = "grammar/rfc_9165.pest"] - pub struct RFC8610Parser; -} - -pub mod cddl { - pub use pest::Parser; - - #[derive(pest_derive::Parser)] - #[grammar = "grammar/rfc_8610.pest"] - #[grammar = "grammar/rfc_9165.pest"] - #[grammar = "grammar/cddl_modules.pest"] - pub struct RFC8610Parser; -} - -pub mod cddl_test { - pub use pest::Parser; - - // Parser with DEBUG rules. These rules are only used in tests. - #[derive(pest_derive::Parser)] - #[grammar = "grammar/rfc_8610.pest"] - #[grammar = "grammar/rfc_9165.pest"] - #[grammar = "grammar/cddl_modules.pest"] - #[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests. - pub struct CDDLTestParser; -} +pub use crate::parser::{cddl, cddl_test, rfc_8610, rfc_9165}; /// Represents different parser extensions for handling CDDL specifications. pub enum Extension { /// RFC8610 ONLY limited parser. - RFC8610Parser, + RFC8610, /// RFC8610 and RFC9165 limited parser. - RFC9165Parser, + RFC9165, /// RFC8610, RFC9165, and CDDL modules. - CDDLParser, + CDDL, } // CDDL Standard Postlude - read from an external file @@ -120,18 +85,18 @@ pub fn parse_cddl<'a>( input.push_str(POSTLUDE); let result = match extension { - Extension::RFC8610Parser => { - rfc_8610::RFC8610Parser::parse(rfc_8610::Rule::cddl, input) + Extension::RFC8610 => { + rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input) .map(AST::RFC8610) .map_err(CDDLErrorType::RFC8610) }, - Extension::RFC9165Parser => { - rfc_9165::RFC8610Parser::parse(rfc_9165::Rule::cddl, input) + Extension::RFC9165 => { + rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input) .map(AST::RFC9165) .map_err(CDDLErrorType::RFC9165) }, - Extension::CDDLParser => { - cddl::RFC8610Parser::parse(cddl::Rule::cddl, input) + Extension::CDDL => { + cddl::Parser::parse(cddl::Rule::cddl, input) .map(AST::CDDL) .map_err(CDDLErrorType::CDDL) }, @@ -147,7 +112,7 @@ mod tests { #[test] fn it_works() { let mut input = String::new(); - let result = parse_cddl(&mut input, &Extension::CDDLParser); + let result = parse_cddl(&mut input, &Extension::CDDL); match result { Ok(c) => println!("{c:?}"), diff --git a/rust/cbork-cddl-parser/src/parser.rs b/rust/cbork-cddl-parser/src/parser.rs new file mode 100644 index 000000000..7f6fcda00 --- /dev/null +++ b/rust/cbork-cddl-parser/src/parser.rs @@ -0,0 +1,42 @@ +//! A parser for CDDL using the [pest](https://github.com/pest-parser/pest). +//! Utilized for parsing in accordance with RFC-8610, RFC-9165. + +/// RFC-8610 parser. +pub mod rfc_8610 { + /// A Pest parser for RFC-8610. + #[derive(pest_derive::Parser)] + #[grammar = "grammar/rfc_8610.pest"] + pub struct Parser; +} + +/// RFC-9165 parser. +pub mod rfc_9165 { + /// A Pest parser for RFC-9165. + #[derive(pest_derive::Parser)] + #[grammar = "grammar/rfc_8610.pest"] + #[grammar = "grammar/rfc_9165.pest"] + pub struct Parser; +} + +/// Full CDDL syntax parser. +pub mod cddl { + /// A Pest parser for a full CDDL syntax. + #[derive(pest_derive::Parser)] + #[grammar = "grammar/rfc_8610.pest"] + #[grammar = "grammar/rfc_9165.pest"] + #[grammar = "grammar/cddl_modules.pest"] + pub struct Parser; +} + +/// Full CDDL syntax test parser. +/// Parser with DEBUG rules. These rules are only used in tests. +pub mod cddl_test { + #[allow(dead_code)] + /// A Pest test parser for a full CDDL syntax. + #[derive(pest_derive::Parser)] + #[grammar = "grammar/rfc_8610.pest"] + #[grammar = "grammar/rfc_9165.pest"] + #[grammar = "grammar/cddl_modules.pest"] + #[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests. + pub struct CDDLTestParser; +} diff --git a/rust/cbork-cddl-parser/tests/cddl.rs b/rust/cbork-cddl-parser/tests/cddl.rs index 9adc9384d..5ab2bbf32 100644 --- a/rust/cbork-cddl-parser/tests/cddl.rs +++ b/rust/cbork-cddl-parser/tests/cddl.rs @@ -32,7 +32,7 @@ fn parse_cddl_files() { for file_path in valid_file_paths { let mut content = fs::read_to_string(file_path).unwrap(); - if let Err(e) = parse_cddl(&mut content, &Extension::CDDLParser) { + if let Err(e) = parse_cddl(&mut content, &Extension::CDDL) { err_messages.push(format!("{}) {file_path:?} {e}", err_messages.len() + 1)); } } @@ -41,7 +41,7 @@ fn parse_cddl_files() { for file_path in invalid_file_paths { let mut content = fs::read_to_string(file_path).unwrap(); - let result = parse_cddl(&mut content, &Extension::CDDLParser); + let result = parse_cddl(&mut content, &Extension::CDDL); assert!(result.is_err(), "{:?} is expected to fail", &file_path); } From e4acb4a6cfdaf9d4c1e99622753241b4988eadf4 Mon Sep 17 00:00:00 2001 From: Mr-Leshiy Date: Sun, 3 Nov 2024 15:50:19 +0200 Subject: [PATCH 02/10] add validate_cddl pub function --- rust/cbork-cddl-parser/Cargo.toml | 1 + rust/cbork-cddl-parser/src/lib.rs | 116 +++------------------------ rust/cbork-cddl-parser/src/parser.rs | 64 ++++++++++++++- rust/cbork-cddl-parser/tests/cddl.rs | 6 +- 4 files changed, 73 insertions(+), 114 deletions(-) diff --git a/rust/cbork-cddl-parser/Cargo.toml b/rust/cbork-cddl-parser/Cargo.toml index b2c7e307a..7abfef170 100644 --- a/rust/cbork-cddl-parser/Cargo.toml +++ b/rust/cbork-cddl-parser/Cargo.toml @@ -19,3 +19,4 @@ derive_more = {version = "1.0.0", features = ["from","display"] } pest = { version = "2.7.13", features = ["std", "pretty-print", "memchr", "const_prec_climber"] } pest_derive = { version = "2.7.13", features = ["grammar-extras"] } thiserror = "1.0.64" +anyhow = "1.0.89" diff --git a/rust/cbork-cddl-parser/src/lib.rs b/rust/cbork-cddl-parser/src/lib.rs index f7d3933a8..802ea699a 100644 --- a/rust/cbork-cddl-parser/src/lib.rs +++ b/rust/cbork-cddl-parser/src/lib.rs @@ -1,125 +1,27 @@ //! A parser for CDDL, utilized for parsing in accordance with RFC 8610. -#![allow(missing_docs)] // TODO(apskhem): Temporary, to bo removed in a subsequent PR - mod parser; -use derive_more::{Display, From}; -pub use pest::Parser; -use pest::{error::Error, iterators::Pairs}; - -pub use crate::parser::{cddl, cddl_test, rfc_8610, rfc_9165}; +pub use crate::parser::cddl_test; -/// Represents different parser extensions for handling CDDL specifications. +/// Represents different grammar extensions for handling CDDL specifications. pub enum Extension { - /// RFC8610 ONLY limited parser. + /// RFC8610 ONLY limited grammar. RFC8610, - /// RFC8610 and RFC9165 limited parser. + /// RFC8610 and RFC9165 limited grammar. RFC9165, - /// RFC8610, RFC9165, and CDDL modules. + /// RFC8610, RFC9165, and CDDL grammar. CDDL, } -// CDDL Standard Postlude - read from an external file -pub const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); - -/// Abstract Syntax Tree (AST) representing parsed CDDL syntax. -// TODO: this is temporary. need to add more pragmatic nodes -#[derive(Debug)] -pub enum AST<'a> { - /// Represents the AST for RFC 8610 CDDL rules. - RFC8610(Pairs<'a, rfc_8610::Rule>), - /// Represents the AST for RFC 9165 CDDL rules. - RFC9165(Pairs<'a, rfc_9165::Rule>), - /// Represents the AST for CDDL Modules rules. - CDDL(Pairs<'a, cddl::Rule>), -} - -/// Represents different types of errors related to different types of extension. -#[derive(Display, Debug)] -pub enum CDDLErrorType { - /// An error related to RFC 8610 extension. - RFC8610(Error), - /// An error related to RFC 9165 extension. - RFC9165(Error), - /// An error related to CDDL modules extension. - CDDL(Error), -} - -/// Represents an error that may occur during CDDL parsing. -#[derive(thiserror::Error, Debug, From)] -#[error("{0}")] -pub struct CDDLError(CDDLErrorType); - -/// Parses and checks semantically a CDDL input string. -/// -/// # Arguments -/// -/// * `input` - A string containing the CDDL input to be parsed. -/// -/// # Returns -/// -/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing -/// a boxed `CDDLError` indicating the parsing error. +/// Verifies semantically a CDDL input string. /// /// # Errors /// /// This function may return an error in the following cases: /// /// - If there is an issue with parsing the CDDL input. -/// -/// # Examples -/// -/// ```rs -/// use cbork_cddl_parser::{parse_cddl, Extension}; -/// use std:fs; -/// -/// let mut input = fs::read_to_string("path/to/your/file.cddl").unwrap(); -/// let result = parse_cddl(&mut input, &Extension::CDDLParser); -/// assert!(result.is_ok()); -/// ``` -pub fn parse_cddl<'a>( - input: &'a mut String, extension: &Extension, -) -> Result, Box> { - input.push_str("\n\n"); - input.push_str(POSTLUDE); - - let result = match extension { - Extension::RFC8610 => { - rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input) - .map(AST::RFC8610) - .map_err(CDDLErrorType::RFC8610) - }, - Extension::RFC9165 => { - rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input) - .map(AST::RFC9165) - .map_err(CDDLErrorType::RFC9165) - }, - Extension::CDDL => { - cddl::Parser::parse(cddl::Rule::cddl, input) - .map(AST::CDDL) - .map_err(CDDLErrorType::CDDL) - }, - }; - - result.map_err(|e| Box::new(CDDLError::from(e))) -} - -#[cfg(test)] -mod tests { - use crate::*; - - #[test] - fn it_works() { - let mut input = String::new(); - let result = parse_cddl(&mut input, &Extension::CDDL); - - match result { - Ok(c) => println!("{c:?}"), - Err(e) => { - println!("{e:?}"); - println!("{e}"); - }, - } - } +pub fn validate_cddl(input: &mut String, extension: &Extension) -> anyhow::Result<()> { + parser::parse_cddl(input, extension)?; + Ok(()) } diff --git a/rust/cbork-cddl-parser/src/parser.rs b/rust/cbork-cddl-parser/src/parser.rs index 7f6fcda00..b7efafc63 100644 --- a/rust/cbork-cddl-parser/src/parser.rs +++ b/rust/cbork-cddl-parser/src/parser.rs @@ -1,8 +1,13 @@ //! A parser for CDDL using the [pest](https://github.com/pest-parser/pest). //! Utilized for parsing in accordance with RFC-8610, RFC-9165. +use pest::{iterators::Pairs, Parser}; + +use crate::Extension; + /// RFC-8610 parser. -pub mod rfc_8610 { +#[allow(missing_docs)] +mod rfc_8610 { /// A Pest parser for RFC-8610. #[derive(pest_derive::Parser)] #[grammar = "grammar/rfc_8610.pest"] @@ -10,7 +15,8 @@ pub mod rfc_8610 { } /// RFC-9165 parser. -pub mod rfc_9165 { +#[allow(missing_docs)] +mod rfc_9165 { /// A Pest parser for RFC-9165. #[derive(pest_derive::Parser)] #[grammar = "grammar/rfc_8610.pest"] @@ -19,7 +25,8 @@ pub mod rfc_9165 { } /// Full CDDL syntax parser. -pub mod cddl { +#[allow(missing_docs)] +mod cddl { /// A Pest parser for a full CDDL syntax. #[derive(pest_derive::Parser)] #[grammar = "grammar/rfc_8610.pest"] @@ -30,8 +37,9 @@ pub mod cddl { /// Full CDDL syntax test parser. /// Parser with DEBUG rules. These rules are only used in tests. +#[allow(missing_docs)] pub mod cddl_test { - #[allow(dead_code)] + pub use pest::Parser; /// A Pest test parser for a full CDDL syntax. #[derive(pest_derive::Parser)] #[grammar = "grammar/rfc_8610.pest"] @@ -40,3 +48,51 @@ pub mod cddl_test { #[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests. pub struct CDDLTestParser; } + +/// CDDL Standard Postlude - read from an external file +const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); + +/// Abstract Syntax Tree (AST) representing parsed CDDL syntax. +#[derive(Debug)] +#[allow(dead_code)] +pub enum Ast<'a> { + /// Represents the AST for RFC-8610 CDDL rules. + Rfc8610(Pairs<'a, rfc_8610::Rule>), + /// Represents the AST for RFC-9165 CDDL rules. + Rfc9165(Pairs<'a, rfc_9165::Rule>), + /// Represents the AST for CDDL Modules rules. + Cddl(Pairs<'a, cddl::Rule>), +} + +/// Parses and checks semantically a CDDL input string. +/// +/// # Arguments +/// +/// * `input` - A string containing the CDDL input to be parsed. +/// +/// # Returns +/// +/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing +/// a boxed `CDDLError` indicating the parsing error. +/// +/// # Errors +/// +/// This function may return an error in the following cases: +/// +/// - If there is an issue with parsing the CDDL input. +pub(crate) fn parse_cddl<'a>( + input: &'a mut String, extension: &Extension, +) -> anyhow::Result> { + input.push_str("\n\n"); + input.push_str(POSTLUDE); + + match extension { + Extension::RFC8610 => { + Ok(rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input).map(Ast::Rfc8610)?) + }, + Extension::RFC9165 => { + Ok(rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input).map(Ast::Rfc9165)?) + }, + Extension::CDDL => Ok(cddl::Parser::parse(cddl::Rule::cddl, input).map(Ast::Cddl)?), + } +} diff --git a/rust/cbork-cddl-parser/tests/cddl.rs b/rust/cbork-cddl-parser/tests/cddl.rs index 5ab2bbf32..0c7f63ca0 100644 --- a/rust/cbork-cddl-parser/tests/cddl.rs +++ b/rust/cbork-cddl-parser/tests/cddl.rs @@ -1,6 +1,6 @@ use std::{ffi::OsStr, fs, io::Result}; -use cbork_cddl_parser::{parse_cddl, Extension}; +use cbork_cddl_parser::{validate_cddl, Extension}; #[test] /// # Panics @@ -32,7 +32,7 @@ fn parse_cddl_files() { for file_path in valid_file_paths { let mut content = fs::read_to_string(file_path).unwrap(); - if let Err(e) = parse_cddl(&mut content, &Extension::CDDL) { + if let Err(e) = validate_cddl(&mut content, &Extension::CDDL) { err_messages.push(format!("{}) {file_path:?} {e}", err_messages.len() + 1)); } } @@ -41,7 +41,7 @@ fn parse_cddl_files() { for file_path in invalid_file_paths { let mut content = fs::read_to_string(file_path).unwrap(); - let result = parse_cddl(&mut content, &Extension::CDDL); + let result = validate_cddl(&mut content, &Extension::CDDL); assert!(result.is_err(), "{:?} is expected to fail", &file_path); } From fe1d5e106f4f2683caf91de8588382baf15efcbd Mon Sep 17 00:00:00 2001 From: Mr-Leshiy Date: Mon, 4 Nov 2024 08:57:16 +0200 Subject: [PATCH 03/10] add processor module --- rust/cbork-cddl-parser/src/lib.rs | 1 + rust/cbork-cddl-parser/src/parser.rs | 25 ++++----- rust/cbork-cddl-parser/src/processor/expr.rs | 19 +++++++ rust/cbork-cddl-parser/src/processor/mod.rs | 57 ++++++++++++++++++++ 4 files changed, 90 insertions(+), 12 deletions(-) create mode 100644 rust/cbork-cddl-parser/src/processor/expr.rs create mode 100644 rust/cbork-cddl-parser/src/processor/mod.rs diff --git a/rust/cbork-cddl-parser/src/lib.rs b/rust/cbork-cddl-parser/src/lib.rs index 802ea699a..cf7c3c4b9 100644 --- a/rust/cbork-cddl-parser/src/lib.rs +++ b/rust/cbork-cddl-parser/src/lib.rs @@ -1,6 +1,7 @@ //! A parser for CDDL, utilized for parsing in accordance with RFC 8610. mod parser; +mod processor; pub use crate::parser::cddl_test; diff --git a/rust/cbork-cddl-parser/src/parser.rs b/rust/cbork-cddl-parser/src/parser.rs index b7efafc63..36572a44b 100644 --- a/rust/cbork-cddl-parser/src/parser.rs +++ b/rust/cbork-cddl-parser/src/parser.rs @@ -7,32 +7,32 @@ use crate::Extension; /// RFC-8610 parser. #[allow(missing_docs)] -mod rfc_8610 { +pub(crate) mod rfc_8610 { /// A Pest parser for RFC-8610. #[derive(pest_derive::Parser)] #[grammar = "grammar/rfc_8610.pest"] - pub struct Parser; + pub(crate) struct Parser; } /// RFC-9165 parser. #[allow(missing_docs)] -mod rfc_9165 { +pub(crate) mod rfc_9165 { /// A Pest parser for RFC-9165. #[derive(pest_derive::Parser)] #[grammar = "grammar/rfc_8610.pest"] #[grammar = "grammar/rfc_9165.pest"] - pub struct Parser; + pub(crate) struct Parser; } /// Full CDDL syntax parser. #[allow(missing_docs)] -mod cddl { +pub(crate) mod cddl { /// A Pest parser for a full CDDL syntax. #[derive(pest_derive::Parser)] #[grammar = "grammar/rfc_8610.pest"] #[grammar = "grammar/rfc_9165.pest"] #[grammar = "grammar/cddl_modules.pest"] - pub struct Parser; + pub(crate) struct Parser; } /// Full CDDL syntax test parser. @@ -55,7 +55,7 @@ const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); /// Abstract Syntax Tree (AST) representing parsed CDDL syntax. #[derive(Debug)] #[allow(dead_code)] -pub enum Ast<'a> { +pub(crate) enum Ast<'a> { /// Represents the AST for RFC-8610 CDDL rules. Rfc8610(Pairs<'a, rfc_8610::Rule>), /// Represents the AST for RFC-9165 CDDL rules. @@ -86,13 +86,14 @@ pub(crate) fn parse_cddl<'a>( input.push_str("\n\n"); input.push_str(POSTLUDE); - match extension { + let ast = match extension { Extension::RFC8610 => { - Ok(rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input).map(Ast::Rfc8610)?) + rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input).map(Ast::Rfc8610)? }, Extension::RFC9165 => { - Ok(rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input).map(Ast::Rfc9165)?) + rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input).map(Ast::Rfc9165)? }, - Extension::CDDL => Ok(cddl::Parser::parse(cddl::Rule::cddl, input).map(Ast::Cddl)?), - } + Extension::CDDL => cddl::Parser::parse(cddl::Rule::cddl, input).map(Ast::Cddl)?, + }; + Ok(ast) } diff --git a/rust/cbork-cddl-parser/src/processor/expr.rs b/rust/cbork-cddl-parser/src/processor/expr.rs new file mode 100644 index 000000000..9f60b47fe --- /dev/null +++ b/rust/cbork-cddl-parser/src/processor/expr.rs @@ -0,0 +1,19 @@ +//! A `CddlExpr` trait implementations + +use super::CddlExpr; +use crate::parser::{cddl, rfc_8610, rfc_9165}; + +impl CddlExpr for rfc_8610::Rule { + const CDDL: Self = rfc_8610::Rule::cddl; + const RULE: Self = rfc_8610::Rule::rule; +} + +impl CddlExpr for rfc_9165::Rule { + const CDDL: Self = rfc_9165::Rule::cddl; + const RULE: Self = rfc_9165::Rule::rule; +} + +impl CddlExpr for cddl::Rule { + const CDDL: Self = cddl::Rule::cddl; + const RULE: Self = cddl::Rule::rule; +} diff --git a/rust/cbork-cddl-parser/src/processor/mod.rs b/rust/cbork-cddl-parser/src/processor/mod.rs new file mode 100644 index 000000000..2ccfee352 --- /dev/null +++ b/rust/cbork-cddl-parser/src/processor/mod.rs @@ -0,0 +1,57 @@ +//! A CDDL AST processor + +mod expr; + +use anyhow::{anyhow, ensure}; +use pest::{iterators::Pairs, RuleType}; + +use crate::parser::Ast; + +/// A helper generic trait for representing a whole CDDL grammar with all extensions. +trait CddlExpr: RuleType { + /// `cddl` rule + const CDDL: Self; + /// `rule` rule + const RULE: Self; +} + +/// Processes the AST. +#[allow(dead_code)] +pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<()> { + match ast { + Ast::Rfc8610(pairs) => process_ast_impl(pairs), + Ast::Rfc9165(pairs) => process_ast_impl(pairs), + Ast::Cddl(pairs) => process_ast_impl(pairs), + } +} + +/// Process AST implementation +fn process_ast_impl(mut ast: Pairs<'_, E>) -> anyhow::Result<()> { + let ast_root = ast.next().ok_or(anyhow!("Empty AST"))?; + ensure!( + ast_root.as_rule() == E::CDDL && ast.next().is_none(), + "AST must have only one root rule, which must be a `cddl` rule." + ); + + let pairs = ast_root.into_inner(); + + for pair in pairs { + if pair.as_rule() == E::RULE {} + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{parser::parse_cddl, Extension}; + + #[test] + fn test() { + let mut file = include_str!("../../tests/cddl/valid_rfc8610_simple_1.cddl").to_string(); + + let ast = parse_cddl(&mut file, &Extension::CDDL).unwrap(); + process_ast(ast).unwrap(); + } +} From 5932610df895fc18c94165f4ae34421f1019fcfe Mon Sep 17 00:00:00 2001 From: Mr-Leshiy Date: Tue, 5 Nov 2024 20:54:36 +0200 Subject: [PATCH 04/10] rename `rule` to `expr` and some simple expression processing functions --- .../src/grammar/cddl_test.pest | 2 +- .../src/grammar/rfc_8610.pest | 4 +-- rust/cbork-cddl-parser/src/processor/expr.rs | 15 ++++++-- rust/cbork-cddl-parser/src/processor/mod.rs | 16 ++++++--- rust/cbork-cddl-parser/src/processor/rules.rs | 34 +++++++++++++++++++ 5 files changed, 61 insertions(+), 10 deletions(-) create mode 100644 rust/cbork-cddl-parser/src/processor/rules.rs diff --git a/rust/cbork-cddl-parser/src/grammar/cddl_test.pest b/rust/cbork-cddl-parser/src/grammar/cddl_test.pest index 4eb04bda5..726e522c8 100644 --- a/rust/cbork-cddl-parser/src/grammar/cddl_test.pest +++ b/rust/cbork-cddl-parser/src/grammar/cddl_test.pest @@ -8,7 +8,7 @@ // cspell: words grpchoice grpent memberkey bareword optcom /// Test Expression for the `rule` Rule. -rule_TEST = ${ SOI ~ rule ~ EOI } +rule_TEST = ${ SOI ~ expr ~ EOI } /// Test Expression for the `typename` Rule. typename_TEST = ${ SOI ~ typename ~ EOI } diff --git a/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest b/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest index 4a609ca51..f33f4a9c4 100644 --- a/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest +++ b/rust/cbork-cddl-parser/src/grammar/rfc_8610.pest @@ -7,13 +7,13 @@ cddl = ${ SOI - ~ S ~ (rule ~ S)+ + ~ S ~ (expr ~ S)+ ~ EOI } // ----------------------------------------------------------------------------- // Rules -rule = ${ +expr = ${ (typename ~ genericparm? ~ S ~ assignt ~ S ~ type) | (groupname ~ genericparm? ~ S ~ assigng ~ S ~ grpent) } diff --git a/rust/cbork-cddl-parser/src/processor/expr.rs b/rust/cbork-cddl-parser/src/processor/expr.rs index 9f60b47fe..99a584828 100644 --- a/rust/cbork-cddl-parser/src/processor/expr.rs +++ b/rust/cbork-cddl-parser/src/processor/expr.rs @@ -5,15 +5,24 @@ use crate::parser::{cddl, rfc_8610, rfc_9165}; impl CddlExpr for rfc_8610::Rule { const CDDL: Self = rfc_8610::Rule::cddl; - const RULE: Self = rfc_8610::Rule::rule; + const EXPR: Self = rfc_8610::Rule::expr; + const GENERIC_PARAM: Self = rfc_8610::Rule::genericparm; + const GROUPNAME: Self = rfc_8610::Rule::groupname; + const TYPENAME: Self = rfc_8610::Rule::typename; } impl CddlExpr for rfc_9165::Rule { const CDDL: Self = rfc_9165::Rule::cddl; - const RULE: Self = rfc_9165::Rule::rule; + const EXPR: Self = rfc_9165::Rule::expr; + const GENERIC_PARAM: Self = rfc_9165::Rule::genericparm; + const GROUPNAME: Self = rfc_9165::Rule::groupname; + const TYPENAME: Self = rfc_9165::Rule::typename; } impl CddlExpr for cddl::Rule { const CDDL: Self = cddl::Rule::cddl; - const RULE: Self = cddl::Rule::rule; + const EXPR: Self = cddl::Rule::expr; + const GENERIC_PARAM: Self = cddl::Rule::genericparm; + const GROUPNAME: Self = cddl::Rule::groupname; + const TYPENAME: Self = cddl::Rule::typename; } diff --git a/rust/cbork-cddl-parser/src/processor/mod.rs b/rust/cbork-cddl-parser/src/processor/mod.rs index 2ccfee352..13d877cd7 100644 --- a/rust/cbork-cddl-parser/src/processor/mod.rs +++ b/rust/cbork-cddl-parser/src/processor/mod.rs @@ -1,9 +1,11 @@ //! A CDDL AST processor mod expr; +mod rules; use anyhow::{anyhow, ensure}; use pest::{iterators::Pairs, RuleType}; +use rules::try_expr; use crate::parser::Ast; @@ -11,8 +13,14 @@ use crate::parser::Ast; trait CddlExpr: RuleType { /// `cddl` rule const CDDL: Self; - /// `rule` rule - const RULE: Self; + /// `expr` rule + const EXPR: Self; + /// `typename` rule + const TYPENAME: Self; + /// `groupname` rule + const GROUPNAME: Self; + /// `genericparm` rule + const GENERIC_PARAM: Self; } /// Processes the AST. @@ -27,7 +35,7 @@ pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<()> { /// Process AST implementation fn process_ast_impl(mut ast: Pairs<'_, E>) -> anyhow::Result<()> { - let ast_root = ast.next().ok_or(anyhow!("Empty AST"))?; + let ast_root = ast.next().ok_or(anyhow!("Missing `cddl` rule."))?; ensure!( ast_root.as_rule() == E::CDDL && ast.next().is_none(), "AST must have only one root rule, which must be a `cddl` rule." @@ -36,7 +44,7 @@ fn process_ast_impl(mut ast: Pairs<'_, E>) -> anyhow::Result<()> { let pairs = ast_root.into_inner(); for pair in pairs { - if pair.as_rule() == E::RULE {} + try_expr(pair)?; } Ok(()) diff --git a/rust/cbork-cddl-parser/src/processor/rules.rs b/rust/cbork-cddl-parser/src/processor/rules.rs new file mode 100644 index 000000000..67972beb9 --- /dev/null +++ b/rust/cbork-cddl-parser/src/processor/rules.rs @@ -0,0 +1,34 @@ +//! Processing pest rules in the CDDL grammar. + +#![allow(missing_docs, clippy::missing_docs_in_private_items)] + +use anyhow::{anyhow, bail}; +use pest::iterators::Pair; + +use super::CddlExpr; + +pub(crate) fn try_expr(pair: Pair<'_, E>) -> anyhow::Result<()> { + let rule = pair.as_rule(); + if rule == E::EXPR { + let mut inner = pair.into_inner(); + + let name = inner + .next() + .ok_or(anyhow!("Missing `typename` or `groupname`."))?; + try_name(name)?; + Ok(()) + } else { + bail!("Not a `expr` rule, got {rule:?}."); + } +} + +pub(crate) fn try_name(pair: Pair<'_, E>) -> anyhow::Result<()> { + let rule = pair.as_rule(); + if rule == E::TYPENAME || rule == E::GROUPNAME { + let mut inner = pair.into_inner(); + let _name = inner.next().ok_or(anyhow!("Missing `id` rule."))?; + Ok(()) + } else { + bail!("Not a `typename` or `groupname` rule, got {rule:?}."); + } +} From 6830ae313c23cc9caf71d80b4cdd056c6e3a73bf Mon Sep 17 00:00:00 2001 From: Mr-Leshiy Date: Thu, 7 Nov 2024 16:42:37 +0200 Subject: [PATCH 05/10] wip --- rust/cbork-cddl-parser/src/lib.rs | 5 +- .../cbork-cddl-parser/src/preprocessor/mod.rs | 66 +++++++++++++++++++ rust/cbork-cddl-parser/src/processor/expr.rs | 28 -------- rust/cbork-cddl-parser/src/processor/mod.rs | 65 ------------------ rust/cbork-cddl-parser/src/processor/rules.rs | 34 ---------- 5 files changed, 69 insertions(+), 129 deletions(-) create mode 100644 rust/cbork-cddl-parser/src/preprocessor/mod.rs delete mode 100644 rust/cbork-cddl-parser/src/processor/expr.rs delete mode 100644 rust/cbork-cddl-parser/src/processor/mod.rs delete mode 100644 rust/cbork-cddl-parser/src/processor/rules.rs diff --git a/rust/cbork-cddl-parser/src/lib.rs b/rust/cbork-cddl-parser/src/lib.rs index cf7c3c4b9..5ac2f7754 100644 --- a/rust/cbork-cddl-parser/src/lib.rs +++ b/rust/cbork-cddl-parser/src/lib.rs @@ -1,7 +1,7 @@ //! A parser for CDDL, utilized for parsing in accordance with RFC 8610. mod parser; -mod processor; +mod preprocessor; pub use crate::parser::cddl_test; @@ -23,6 +23,7 @@ pub enum Extension { /// /// - If there is an issue with parsing the CDDL input. pub fn validate_cddl(input: &mut String, extension: &Extension) -> anyhow::Result<()> { - parser::parse_cddl(input, extension)?; + let ast = parser::parse_cddl(input, extension)?; + preprocessor::process_ast(ast)?; Ok(()) } diff --git a/rust/cbork-cddl-parser/src/preprocessor/mod.rs b/rust/cbork-cddl-parser/src/preprocessor/mod.rs new file mode 100644 index 000000000..6b4e743a5 --- /dev/null +++ b/rust/cbork-cddl-parser/src/preprocessor/mod.rs @@ -0,0 +1,66 @@ +//! A CDDL AST preprocessor. +//! First processing step, which takes a CDDL `AST` and returning a list of CDDL +//! `Expression`. +//! +//! Preprocessor steps: +//! - Resolve #include and #import directives, by just adding the imported rules into the +//! final expression list +//! - Resolves all generics by taking the generic arguments and substituting it. + +use anyhow::{anyhow, ensure}; +use pest::{ + iterators::{Pair, Pairs}, + RuleType, +}; + +use crate::parser::{cddl, rfc_8610, rfc_9165, Ast}; + +/// Processes the AST. +pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<()> { + match ast { + Ast::Rfc8610(ast) => { + let _exprs = process_root(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr)?; + }, + Ast::Rfc9165(ast) => { + let _exprs = process_root(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr)?; + }, + Ast::Cddl(ast) => { + let exprs = process_root(ast, cddl::Rule::cddl, cddl::Rule::expr)?; + + for expr in exprs { + println!("{:?}", expr.as_rule()); + } + }, + } + Ok(()) +} + +/// Process the root rule of the AST. +/// Returns a vector of expressions of the underlying AST. +fn process_root( + mut ast: Pairs<'_, R>, root_rule: R, expr_rule: R, +) -> anyhow::Result>> { + let ast_root = ast.next().ok_or(anyhow!("Empty AST."))?; + ensure!( + ast_root.as_rule() == root_rule && ast.next().is_none(), + "AST must have only one root rule, which must be a `{root_rule:?}` rule." + ); + Ok(ast_root + .into_inner() + .filter(|pair| pair.as_rule() == expr_rule) + .collect()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::parse_cddl; + + #[test] + fn it_works() { + let mut cddl = include_str!("../../tests/cddl/valid_rfc8610_simple_1.cddl").to_string(); + + let ast = parse_cddl(&mut cddl, &crate::Extension::CDDL).unwrap(); + process_ast(ast).unwrap(); + } +} diff --git a/rust/cbork-cddl-parser/src/processor/expr.rs b/rust/cbork-cddl-parser/src/processor/expr.rs deleted file mode 100644 index 99a584828..000000000 --- a/rust/cbork-cddl-parser/src/processor/expr.rs +++ /dev/null @@ -1,28 +0,0 @@ -//! A `CddlExpr` trait implementations - -use super::CddlExpr; -use crate::parser::{cddl, rfc_8610, rfc_9165}; - -impl CddlExpr for rfc_8610::Rule { - const CDDL: Self = rfc_8610::Rule::cddl; - const EXPR: Self = rfc_8610::Rule::expr; - const GENERIC_PARAM: Self = rfc_8610::Rule::genericparm; - const GROUPNAME: Self = rfc_8610::Rule::groupname; - const TYPENAME: Self = rfc_8610::Rule::typename; -} - -impl CddlExpr for rfc_9165::Rule { - const CDDL: Self = rfc_9165::Rule::cddl; - const EXPR: Self = rfc_9165::Rule::expr; - const GENERIC_PARAM: Self = rfc_9165::Rule::genericparm; - const GROUPNAME: Self = rfc_9165::Rule::groupname; - const TYPENAME: Self = rfc_9165::Rule::typename; -} - -impl CddlExpr for cddl::Rule { - const CDDL: Self = cddl::Rule::cddl; - const EXPR: Self = cddl::Rule::expr; - const GENERIC_PARAM: Self = cddl::Rule::genericparm; - const GROUPNAME: Self = cddl::Rule::groupname; - const TYPENAME: Self = cddl::Rule::typename; -} diff --git a/rust/cbork-cddl-parser/src/processor/mod.rs b/rust/cbork-cddl-parser/src/processor/mod.rs deleted file mode 100644 index 13d877cd7..000000000 --- a/rust/cbork-cddl-parser/src/processor/mod.rs +++ /dev/null @@ -1,65 +0,0 @@ -//! A CDDL AST processor - -mod expr; -mod rules; - -use anyhow::{anyhow, ensure}; -use pest::{iterators::Pairs, RuleType}; -use rules::try_expr; - -use crate::parser::Ast; - -/// A helper generic trait for representing a whole CDDL grammar with all extensions. -trait CddlExpr: RuleType { - /// `cddl` rule - const CDDL: Self; - /// `expr` rule - const EXPR: Self; - /// `typename` rule - const TYPENAME: Self; - /// `groupname` rule - const GROUPNAME: Self; - /// `genericparm` rule - const GENERIC_PARAM: Self; -} - -/// Processes the AST. -#[allow(dead_code)] -pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<()> { - match ast { - Ast::Rfc8610(pairs) => process_ast_impl(pairs), - Ast::Rfc9165(pairs) => process_ast_impl(pairs), - Ast::Cddl(pairs) => process_ast_impl(pairs), - } -} - -/// Process AST implementation -fn process_ast_impl(mut ast: Pairs<'_, E>) -> anyhow::Result<()> { - let ast_root = ast.next().ok_or(anyhow!("Missing `cddl` rule."))?; - ensure!( - ast_root.as_rule() == E::CDDL && ast.next().is_none(), - "AST must have only one root rule, which must be a `cddl` rule." - ); - - let pairs = ast_root.into_inner(); - - for pair in pairs { - try_expr(pair)?; - } - - Ok(()) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{parser::parse_cddl, Extension}; - - #[test] - fn test() { - let mut file = include_str!("../../tests/cddl/valid_rfc8610_simple_1.cddl").to_string(); - - let ast = parse_cddl(&mut file, &Extension::CDDL).unwrap(); - process_ast(ast).unwrap(); - } -} diff --git a/rust/cbork-cddl-parser/src/processor/rules.rs b/rust/cbork-cddl-parser/src/processor/rules.rs deleted file mode 100644 index 67972beb9..000000000 --- a/rust/cbork-cddl-parser/src/processor/rules.rs +++ /dev/null @@ -1,34 +0,0 @@ -//! Processing pest rules in the CDDL grammar. - -#![allow(missing_docs, clippy::missing_docs_in_private_items)] - -use anyhow::{anyhow, bail}; -use pest::iterators::Pair; - -use super::CddlExpr; - -pub(crate) fn try_expr(pair: Pair<'_, E>) -> anyhow::Result<()> { - let rule = pair.as_rule(); - if rule == E::EXPR { - let mut inner = pair.into_inner(); - - let name = inner - .next() - .ok_or(anyhow!("Missing `typename` or `groupname`."))?; - try_name(name)?; - Ok(()) - } else { - bail!("Not a `expr` rule, got {rule:?}."); - } -} - -pub(crate) fn try_name(pair: Pair<'_, E>) -> anyhow::Result<()> { - let rule = pair.as_rule(); - if rule == E::TYPENAME || rule == E::GROUPNAME { - let mut inner = pair.into_inner(); - let _name = inner.next().ok_or(anyhow!("Missing `id` rule."))?; - Ok(()) - } else { - bail!("Not a `typename` or `groupname` rule, got {rule:?}."); - } -} From c74d50f585b417f684eff32a22cb03397edd7096 Mon Sep 17 00:00:00 2001 From: Mr-Leshiy Date: Thu, 7 Nov 2024 16:44:23 +0200 Subject: [PATCH 06/10] wip --- rust/cbork-cddl-parser/src/parser.rs | 12 ++++++------ rust/cbork-cddl-parser/src/preprocessor/mod.rs | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/rust/cbork-cddl-parser/src/parser.rs b/rust/cbork-cddl-parser/src/parser.rs index 36572a44b..3aabb940d 100644 --- a/rust/cbork-cddl-parser/src/parser.rs +++ b/rust/cbork-cddl-parser/src/parser.rs @@ -52,10 +52,10 @@ pub mod cddl_test { /// CDDL Standard Postlude - read from an external file const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); -/// Abstract Syntax Tree (AST) representing parsed CDDL syntax. +/// PEST Abstract Syntax Tree (AST) representing parsed CDDL syntax. #[derive(Debug)] #[allow(dead_code)] -pub(crate) enum Ast<'a> { +pub(crate) enum PestAst<'a> { /// Represents the AST for RFC-8610 CDDL rules. Rfc8610(Pairs<'a, rfc_8610::Rule>), /// Represents the AST for RFC-9165 CDDL rules. @@ -82,18 +82,18 @@ pub(crate) enum Ast<'a> { /// - If there is an issue with parsing the CDDL input. pub(crate) fn parse_cddl<'a>( input: &'a mut String, extension: &Extension, -) -> anyhow::Result> { +) -> anyhow::Result> { input.push_str("\n\n"); input.push_str(POSTLUDE); let ast = match extension { Extension::RFC8610 => { - rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input).map(Ast::Rfc8610)? + rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input).map(PestAst::Rfc8610)? }, Extension::RFC9165 => { - rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input).map(Ast::Rfc9165)? + rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input).map(PestAst::Rfc9165)? }, - Extension::CDDL => cddl::Parser::parse(cddl::Rule::cddl, input).map(Ast::Cddl)?, + Extension::CDDL => cddl::Parser::parse(cddl::Rule::cddl, input).map(PestAst::Cddl)?, }; Ok(ast) } diff --git a/rust/cbork-cddl-parser/src/preprocessor/mod.rs b/rust/cbork-cddl-parser/src/preprocessor/mod.rs index 6b4e743a5..d2192ff76 100644 --- a/rust/cbork-cddl-parser/src/preprocessor/mod.rs +++ b/rust/cbork-cddl-parser/src/preprocessor/mod.rs @@ -13,18 +13,18 @@ use pest::{ RuleType, }; -use crate::parser::{cddl, rfc_8610, rfc_9165, Ast}; +use crate::parser::{cddl, rfc_8610, rfc_9165, PestAst}; /// Processes the AST. -pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<()> { +pub(crate) fn process_ast(ast: PestAst) -> anyhow::Result<()> { match ast { - Ast::Rfc8610(ast) => { + PestAst::Rfc8610(ast) => { let _exprs = process_root(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr)?; }, - Ast::Rfc9165(ast) => { + PestAst::Rfc9165(ast) => { let _exprs = process_root(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr)?; }, - Ast::Cddl(ast) => { + PestAst::Cddl(ast) => { let exprs = process_root(ast, cddl::Rule::cddl, cddl::Rule::expr)?; for expr in exprs { From 01c3e1d15468e856b335ca566ac8c0796bbcf645 Mon Sep 17 00:00:00 2001 From: Mr-Leshiy Date: Sat, 9 Nov 2024 17:41:48 +0200 Subject: [PATCH 07/10] refactor Ast --- rust/cbork-cddl-parser/src/lib.rs | 2 +- rust/cbork-cddl-parser/src/parser.rs | 23 +++++----- .../cbork-cddl-parser/src/preprocessor/mod.rs | 43 +++++++------------ 3 files changed, 30 insertions(+), 38 deletions(-) diff --git a/rust/cbork-cddl-parser/src/lib.rs b/rust/cbork-cddl-parser/src/lib.rs index 5ac2f7754..d7632ed46 100644 --- a/rust/cbork-cddl-parser/src/lib.rs +++ b/rust/cbork-cddl-parser/src/lib.rs @@ -24,6 +24,6 @@ pub enum Extension { /// - If there is an issue with parsing the CDDL input. pub fn validate_cddl(input: &mut String, extension: &Extension) -> anyhow::Result<()> { let ast = parser::parse_cddl(input, extension)?; - preprocessor::process_ast(ast)?; + let _ast = preprocessor::process_ast(ast)?; Ok(()) } diff --git a/rust/cbork-cddl-parser/src/parser.rs b/rust/cbork-cddl-parser/src/parser.rs index 3aabb940d..77a77c1c3 100644 --- a/rust/cbork-cddl-parser/src/parser.rs +++ b/rust/cbork-cddl-parser/src/parser.rs @@ -1,7 +1,7 @@ //! A parser for CDDL using the [pest](https://github.com/pest-parser/pest). //! Utilized for parsing in accordance with RFC-8610, RFC-9165. -use pest::{iterators::Pairs, Parser}; +use pest::{iterators::Pair, Parser}; use crate::Extension; @@ -54,14 +54,13 @@ const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); /// PEST Abstract Syntax Tree (AST) representing parsed CDDL syntax. #[derive(Debug)] -#[allow(dead_code)] -pub(crate) enum PestAst<'a> { +pub(crate) enum Ast<'a> { /// Represents the AST for RFC-8610 CDDL rules. - Rfc8610(Pairs<'a, rfc_8610::Rule>), + Rfc8610(Vec>), /// Represents the AST for RFC-9165 CDDL rules. - Rfc9165(Pairs<'a, rfc_9165::Rule>), + Rfc9165(Vec>), /// Represents the AST for CDDL Modules rules. - Cddl(Pairs<'a, cddl::Rule>), + Cddl(Vec>), } /// Parses and checks semantically a CDDL input string. @@ -82,18 +81,22 @@ pub(crate) enum PestAst<'a> { /// - If there is an issue with parsing the CDDL input. pub(crate) fn parse_cddl<'a>( input: &'a mut String, extension: &Extension, -) -> anyhow::Result> { +) -> anyhow::Result> { input.push_str("\n\n"); input.push_str(POSTLUDE); let ast = match extension { Extension::RFC8610 => { - rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input).map(PestAst::Rfc8610)? + rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input) + .map(|p| Ast::Rfc8610(p.collect()))? }, Extension::RFC9165 => { - rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input).map(PestAst::Rfc9165)? + rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input) + .map(|p| Ast::Rfc9165(p.collect()))? + }, + Extension::CDDL => { + cddl::Parser::parse(cddl::Rule::cddl, input).map(|p| Ast::Cddl(p.collect()))? }, - Extension::CDDL => cddl::Parser::parse(cddl::Rule::cddl, input).map(PestAst::Cddl)?, }; Ok(ast) } diff --git a/rust/cbork-cddl-parser/src/preprocessor/mod.rs b/rust/cbork-cddl-parser/src/preprocessor/mod.rs index d2192ff76..4458f29bf 100644 --- a/rust/cbork-cddl-parser/src/preprocessor/mod.rs +++ b/rust/cbork-cddl-parser/src/preprocessor/mod.rs @@ -1,48 +1,37 @@ //! A CDDL AST preprocessor. -//! First processing step, which takes a CDDL `AST` and returning a list of CDDL -//! `Expression`. //! -//! Preprocessor steps: -//! - Resolve #include and #import directives, by just adding the imported rules into the -//! final expression list -//! - Resolves all generics by taking the generic arguments and substituting it. +//! - Validates the root rule of the AST to be a `cddl` rule. +//! - Filters out all rules that are not `expr` rules. +//! - (TODO) Resolve #include and #import directives, by just adding the imported rules +//! into the final expression list use anyhow::{anyhow, ensure}; -use pest::{ - iterators::{Pair, Pairs}, - RuleType, -}; +use pest::{iterators::Pair, RuleType}; -use crate::parser::{cddl, rfc_8610, rfc_9165, PestAst}; +use crate::parser::{cddl, rfc_8610, rfc_9165, Ast}; /// Processes the AST. -pub(crate) fn process_ast(ast: PestAst) -> anyhow::Result<()> { +pub(crate) fn process_ast(ast: Ast) -> anyhow::Result { match ast { - PestAst::Rfc8610(ast) => { - let _exprs = process_root(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr)?; + Ast::Rfc8610(ast) => { + process_root(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr).map(Ast::Rfc8610) }, - PestAst::Rfc9165(ast) => { - let _exprs = process_root(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr)?; - }, - PestAst::Cddl(ast) => { - let exprs = process_root(ast, cddl::Rule::cddl, cddl::Rule::expr)?; - - for expr in exprs { - println!("{:?}", expr.as_rule()); - } + Ast::Rfc9165(ast) => { + process_root(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr).map(Ast::Rfc9165) }, + Ast::Cddl(ast) => process_root(ast, cddl::Rule::cddl, cddl::Rule::expr).map(Ast::Cddl), } - Ok(()) } /// Process the root rule of the AST. /// Returns a vector of expressions of the underlying AST. fn process_root( - mut ast: Pairs<'_, R>, root_rule: R, expr_rule: R, + ast: Vec>, root_rule: R, expr_rule: R, ) -> anyhow::Result>> { - let ast_root = ast.next().ok_or(anyhow!("Empty AST."))?; + let mut ast_iter = ast.into_iter(); + let ast_root = ast_iter.next().ok_or(anyhow!("Empty AST."))?; ensure!( - ast_root.as_rule() == root_rule && ast.next().is_none(), + ast_root.as_rule() == root_rule && ast_iter.next().is_none(), "AST must have only one root rule, which must be a `{root_rule:?}` rule." ); Ok(ast_root From e342c95b224e185973d7e686b8a5cf98535de807 Mon Sep 17 00:00:00 2001 From: Mr-Leshiy Date: Sat, 9 Nov 2024 17:59:50 +0200 Subject: [PATCH 08/10] wip --- .../{preprocessor/mod.rs => preprocessor.rs} | 33 +++++++------------ 1 file changed, 11 insertions(+), 22 deletions(-) rename rust/cbork-cddl-parser/src/{preprocessor/mod.rs => preprocessor.rs} (53%) diff --git a/rust/cbork-cddl-parser/src/preprocessor/mod.rs b/rust/cbork-cddl-parser/src/preprocessor.rs similarity index 53% rename from rust/cbork-cddl-parser/src/preprocessor/mod.rs rename to rust/cbork-cddl-parser/src/preprocessor.rs index 4458f29bf..b20cc849d 100644 --- a/rust/cbork-cddl-parser/src/preprocessor/mod.rs +++ b/rust/cbork-cddl-parser/src/preprocessor.rs @@ -14,19 +14,22 @@ use crate::parser::{cddl, rfc_8610, rfc_9165, Ast}; pub(crate) fn process_ast(ast: Ast) -> anyhow::Result { match ast { Ast::Rfc8610(ast) => { - process_root(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr).map(Ast::Rfc8610) + process_root_and_filter(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr) + .map(Ast::Rfc8610) }, Ast::Rfc9165(ast) => { - process_root(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr).map(Ast::Rfc9165) + process_root_and_filter(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr) + .map(Ast::Rfc9165) + }, + Ast::Cddl(ast) => { + process_root_and_filter(ast, cddl::Rule::cddl, cddl::Rule::expr).map(Ast::Cddl) }, - Ast::Cddl(ast) => process_root(ast, cddl::Rule::cddl, cddl::Rule::expr).map(Ast::Cddl), } } -/// Process the root rule of the AST. -/// Returns a vector of expressions of the underlying AST. -fn process_root( - ast: Vec>, root_rule: R, expr_rule: R, +/// Process the root rule of the AST and filter out all non `expected_rule` rules. +fn process_root_and_filter( + ast: Vec>, root_rule: R, expected_rule: R, ) -> anyhow::Result>> { let mut ast_iter = ast.into_iter(); let ast_root = ast_iter.next().ok_or(anyhow!("Empty AST."))?; @@ -36,20 +39,6 @@ fn process_root( ); Ok(ast_root .into_inner() - .filter(|pair| pair.as_rule() == expr_rule) + .filter(|pair| pair.as_rule() == expected_rule) .collect()) } - -#[cfg(test)] -mod tests { - use super::*; - use crate::parser::parse_cddl; - - #[test] - fn it_works() { - let mut cddl = include_str!("../../tests/cddl/valid_rfc8610_simple_1.cddl").to_string(); - - let ast = parse_cddl(&mut cddl, &crate::Extension::CDDL).unwrap(); - process_ast(ast).unwrap(); - } -} From f7dd70c8d947a391a28abff0fb4bad769e64eb63 Mon Sep 17 00:00:00 2001 From: Mr-Leshiy Date: Sun, 10 Nov 2024 15:36:55 +0200 Subject: [PATCH 09/10] remove unused deps --- rust/cbork-cddl-parser/Cargo.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/rust/cbork-cddl-parser/Cargo.toml b/rust/cbork-cddl-parser/Cargo.toml index 7abfef170..312bab140 100644 --- a/rust/cbork-cddl-parser/Cargo.toml +++ b/rust/cbork-cddl-parser/Cargo.toml @@ -15,8 +15,6 @@ repository.workspace = true workspace = true [dependencies] -derive_more = {version = "1.0.0", features = ["from","display"] } pest = { version = "2.7.13", features = ["std", "pretty-print", "memchr", "const_prec_climber"] } pest_derive = { version = "2.7.13", features = ["grammar-extras"] } -thiserror = "1.0.64" anyhow = "1.0.89" From 4e4deecb64799e1e170f0ca94025b35c24751dc3 Mon Sep 17 00:00:00 2001 From: Mr-Leshiy Date: Sun, 17 Nov 2024 18:21:36 +0200 Subject: [PATCH 10/10] move CDDLTestParser to the tests::common mod --- rust/cbork-cddl-parser/src/lib.rs | 2 -- rust/cbork-cddl-parser/src/parser.rs | 14 -------------- rust/cbork-cddl-parser/tests/byte_sequences.rs | 4 +--- rust/cbork-cddl-parser/tests/character_sets.rs | 7 +++---- rust/cbork-cddl-parser/tests/comments.rs | 4 +--- rust/cbork-cddl-parser/tests/common/mod.rs | 14 ++++++++++---- rust/cbork-cddl-parser/tests/group_elements.rs | 4 +--- rust/cbork-cddl-parser/tests/identifiers.rs | 8 ++------ rust/cbork-cddl-parser/tests/literal_values.rs | 4 +--- rust/cbork-cddl-parser/tests/rules.rs | 8 ++------ rust/cbork-cddl-parser/tests/text_sequences.rs | 4 +--- rust/cbork-cddl-parser/tests/type_declarations.rs | 8 ++------ 12 files changed, 24 insertions(+), 57 deletions(-) diff --git a/rust/cbork-cddl-parser/src/lib.rs b/rust/cbork-cddl-parser/src/lib.rs index d7632ed46..8d8d163c2 100644 --- a/rust/cbork-cddl-parser/src/lib.rs +++ b/rust/cbork-cddl-parser/src/lib.rs @@ -3,8 +3,6 @@ mod parser; mod preprocessor; -pub use crate::parser::cddl_test; - /// Represents different grammar extensions for handling CDDL specifications. pub enum Extension { /// RFC8610 ONLY limited grammar. diff --git a/rust/cbork-cddl-parser/src/parser.rs b/rust/cbork-cddl-parser/src/parser.rs index 77a77c1c3..ae4b16ed2 100644 --- a/rust/cbork-cddl-parser/src/parser.rs +++ b/rust/cbork-cddl-parser/src/parser.rs @@ -35,20 +35,6 @@ pub(crate) mod cddl { pub(crate) struct Parser; } -/// Full CDDL syntax test parser. -/// Parser with DEBUG rules. These rules are only used in tests. -#[allow(missing_docs)] -pub mod cddl_test { - pub use pest::Parser; - /// A Pest test parser for a full CDDL syntax. - #[derive(pest_derive::Parser)] - #[grammar = "grammar/rfc_8610.pest"] - #[grammar = "grammar/rfc_9165.pest"] - #[grammar = "grammar/cddl_modules.pest"] - #[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests. - pub struct CDDLTestParser; -} - /// CDDL Standard Postlude - read from an external file const POSTLUDE: &str = include_str!("grammar/postlude.cddl"); diff --git a/rust/cbork-cddl-parser/tests/byte_sequences.rs b/rust/cbork-cddl-parser/tests/byte_sequences.rs index 6f5ea0215..9686e1f24 100644 --- a/rust/cbork-cddl-parser/tests/byte_sequences.rs +++ b/rust/cbork-cddl-parser/tests/byte_sequences.rs @@ -1,9 +1,7 @@ // cspell: words hexpair rstuvw abcdefghijklmnopqrstuvwyz rstuvw Xhhb Bhcm -use cbork_cddl_parser::cddl_test::Rule; - mod common; -use common::byte_sequences::*; +use common::{byte_sequences::*, Rule}; #[test] /// Test if the `HEX_PAIR` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/character_sets.rs b/rust/cbork-cddl-parser/tests/character_sets.rs index 46a3e2424..80d5d6663 100644 --- a/rust/cbork-cddl-parser/tests/character_sets.rs +++ b/rust/cbork-cddl-parser/tests/character_sets.rs @@ -1,9 +1,8 @@ // cspell: words PCHAR pchar BCHAR bchar SESC sesc SCHAR schar fffd fffe -use cbork_cddl_parser::{ - self, - cddl_test::{CDDLTestParser, Parser, Rule}, -}; +mod common; +use common::{CDDLTestParser, Rule}; +use pest::Parser; #[test] /// Test if the `WHITESPACE` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/comments.rs b/rust/cbork-cddl-parser/tests/comments.rs index 435ab3633..99403aa20 100644 --- a/rust/cbork-cddl-parser/tests/comments.rs +++ b/rust/cbork-cddl-parser/tests/comments.rs @@ -1,7 +1,5 @@ -use cbork_cddl_parser::{self, cddl_test::Rule}; - mod common; -use common::comments::*; +use common::{comments::*, Rule}; #[test] /// Test if the `COMMENT` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/common/mod.rs b/rust/cbork-cddl-parser/tests/common/mod.rs index 107a95527..04719901c 100644 --- a/rust/cbork-cddl-parser/tests/common/mod.rs +++ b/rust/cbork-cddl-parser/tests/common/mod.rs @@ -1,7 +1,4 @@ -use cbork_cddl_parser::{ - self, - cddl_test::{CDDLTestParser, Parser, Rule}, -}; +use pest::Parser; pub(crate) mod byte_sequences; pub(crate) mod comments; @@ -12,7 +9,16 @@ pub(crate) mod rules; pub(crate) mod text_sequences; pub(crate) mod type_declarations; +/// A Pest test parser for a full CDDL syntax. +#[derive(pest_derive::Parser)] +#[grammar = "grammar/rfc_8610.pest"] +#[grammar = "grammar/rfc_9165.pest"] +#[grammar = "grammar/cddl_modules.pest"] +#[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests. +pub struct CDDLTestParser; + /// # Panics +#[allow(dead_code)] pub(crate) fn check_tests_rule(rule_type: Rule, passes: &[&str], fails: &[&str]) { for test in passes { let parse = CDDLTestParser::parse(rule_type, test); diff --git a/rust/cbork-cddl-parser/tests/group_elements.rs b/rust/cbork-cddl-parser/tests/group_elements.rs index e735ad1a2..1e52424ca 100644 --- a/rust/cbork-cddl-parser/tests/group_elements.rs +++ b/rust/cbork-cddl-parser/tests/group_elements.rs @@ -1,10 +1,8 @@ // cspell: words OPTCOM MEMBERKEY bareword tstr GRPENT GRPCHOICE // cspell: words optcom memberkey grpent grpchoice -use cbork_cddl_parser::{self, cddl_test::Rule}; - mod common; -use common::{group_elements::*, identifiers::*}; +use common::{group_elements::*, identifiers::*, Rule}; #[test] /// Test if the `occur` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/identifiers.rs b/rust/cbork-cddl-parser/tests/identifiers.rs index 63a0c80f0..04623e772 100644 --- a/rust/cbork-cddl-parser/tests/identifiers.rs +++ b/rust/cbork-cddl-parser/tests/identifiers.rs @@ -1,12 +1,8 @@ // cspell: words aname groupsocket typesocket groupsocket -use cbork_cddl_parser::{ - self, - cddl_test::{CDDLTestParser, Parser, Rule}, -}; - mod common; -use common::identifiers::*; +use common::{identifiers::*, CDDLTestParser, Rule}; +use pest::Parser; #[test] /// Check if the name components pass properly. diff --git a/rust/cbork-cddl-parser/tests/literal_values.rs b/rust/cbork-cddl-parser/tests/literal_values.rs index 727dc7fd3..21ad645ed 100644 --- a/rust/cbork-cddl-parser/tests/literal_values.rs +++ b/rust/cbork-cddl-parser/tests/literal_values.rs @@ -2,10 +2,8 @@ use std::ops::Deref; -use cbork_cddl_parser::{self, cddl_test::Rule}; - mod common; -use common::{byte_sequences::*, literal_values::*, text_sequences::*}; +use common::{byte_sequences::*, literal_values::*, text_sequences::*, Rule}; #[test] /// Test if the `uint` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/rules.rs b/rust/cbork-cddl-parser/tests/rules.rs index b33b9239e..1340e97bb 100644 --- a/rust/cbork-cddl-parser/tests/rules.rs +++ b/rust/cbork-cddl-parser/tests/rules.rs @@ -1,13 +1,9 @@ // cspell: words GENERICARG bigfloat ASSIGNG GROUPNAME tstr genericarg GENERICARG // cspell: words assigng assignt ASSIGNT GENERICPARM genericparm -use cbork_cddl_parser::{ - self, - cddl_test::{CDDLTestParser, Parser, Rule}, -}; - mod common; -use common::{rules::*, type_declarations::*}; +use common::{rules::*, type_declarations::*, CDDLTestParser, Rule}; +use pest::Parser; #[test] /// Test if the `genericarg` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/text_sequences.rs b/rust/cbork-cddl-parser/tests/text_sequences.rs index 4c5270489..ac1d61178 100644 --- a/rust/cbork-cddl-parser/tests/text_sequences.rs +++ b/rust/cbork-cddl-parser/tests/text_sequences.rs @@ -1,7 +1,5 @@ -use cbork_cddl_parser::{self, cddl_test::Rule}; - mod common; -use common::text_sequences::*; +use common::{text_sequences::*, Rule}; #[test] /// Test if the `S` rule passes properly. diff --git a/rust/cbork-cddl-parser/tests/type_declarations.rs b/rust/cbork-cddl-parser/tests/type_declarations.rs index 14f024e14..274920417 100644 --- a/rust/cbork-cddl-parser/tests/type_declarations.rs +++ b/rust/cbork-cddl-parser/tests/type_declarations.rs @@ -1,13 +1,9 @@ // cspell: words CTLOP aname groupsocket typesocket RANGEOP tstr ctlop // cspell: words rangeop RANGEOP -use cbork_cddl_parser::{ - self, - cddl_test::{CDDLTestParser, Parser, Rule}, -}; - mod common; -use common::type_declarations::*; +use common::{type_declarations::*, CDDLTestParser, Rule}; +use pest::Parser; #[test] /// Test if the `ctlop` rule passes properly.