Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(rust/cbork): Add a CDDL preprocessing step #80

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions rust/cbork-cddl-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ repository.workspace = true
workspace = true

[dependencies]
derive_more = {version = "1.0.0", features = ["from","display"] }
pest = { version = "2.7.13", features = ["std", "pretty-print", "memchr", "const_prec_climber"] }
pest_derive = { version = "2.7.13", features = ["grammar-extras"] }
thiserror = "1.0.64"
anyhow = "1.0.89"
2 changes: 1 addition & 1 deletion rust/cbork-cddl-parser/src/grammar/cddl_test.pest
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// cspell: words grpchoice grpent memberkey bareword optcom

/// Test Expression for the `rule` Rule.
rule_TEST = ${ SOI ~ rule ~ EOI }
rule_TEST = ${ SOI ~ expr ~ EOI }

/// Test Expression for the `typename` Rule.
typename_TEST = ${ SOI ~ typename ~ EOI }
Expand Down
4 changes: 2 additions & 2 deletions rust/cbork-cddl-parser/src/grammar/rfc_8610.pest
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@

cddl = ${
SOI
~ S ~ (rule ~ S)+
~ S ~ (expr ~ S)+
~ EOI
}

// -----------------------------------------------------------------------------
// Rules
rule = ${
expr = ${
(typename ~ genericparm? ~ S ~ assignt ~ S ~ type)
| (groupname ~ genericparm? ~ S ~ assigng ~ S ~ grpent)
}
Expand Down
161 changes: 14 additions & 147 deletions rust/cbork-cddl-parser/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,160 +1,27 @@
//! A parser for CDDL, utilized for parsing in accordance with RFC 8610.

#![allow(missing_docs)] // TODO(apskhem): Temporary, to bo removed in a subsequent PR
mod parser;
mod preprocessor;

use derive_more::{Display, From};
pub use pest::Parser;
use pest::{error::Error, iterators::Pairs};

pub mod rfc_8610 {
pub use pest::Parser;

#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
pub struct RFC8610Parser;
}

pub mod rfc_9165 {
pub use pest::Parser;

#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
#[grammar = "grammar/rfc_9165.pest"]
pub struct RFC8610Parser;
}

pub mod cddl {
pub use pest::Parser;

#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
#[grammar = "grammar/rfc_9165.pest"]
#[grammar = "grammar/cddl_modules.pest"]
pub struct RFC8610Parser;
}

pub mod cddl_test {
pub use pest::Parser;

// Parser with DEBUG rules. These rules are only used in tests.
#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
#[grammar = "grammar/rfc_9165.pest"]
#[grammar = "grammar/cddl_modules.pest"]
#[grammar = "grammar/cddl_test.pest"] // Ideally this would only be used in tests.
pub struct CDDLTestParser;
}

/// Represents different parser extensions for handling CDDL specifications.
/// Represents different grammar extensions for handling CDDL specifications.
pub enum Extension {
/// RFC8610 ONLY limited parser.
RFC8610Parser,
/// RFC8610 and RFC9165 limited parser.
RFC9165Parser,
/// RFC8610, RFC9165, and CDDL modules.
CDDLParser,
}

// CDDL Standard Postlude - read from an external file
pub const POSTLUDE: &str = include_str!("grammar/postlude.cddl");

/// Abstract Syntax Tree (AST) representing parsed CDDL syntax.
// TODO: this is temporary. need to add more pragmatic nodes
#[derive(Debug)]
pub enum AST<'a> {
/// Represents the AST for RFC 8610 CDDL rules.
RFC8610(Pairs<'a, rfc_8610::Rule>),
/// Represents the AST for RFC 9165 CDDL rules.
RFC9165(Pairs<'a, rfc_9165::Rule>),
/// Represents the AST for CDDL Modules rules.
CDDL(Pairs<'a, cddl::Rule>),
/// RFC8610 ONLY limited grammar.
RFC8610,
/// RFC8610 and RFC9165 limited grammar.
RFC9165,
/// RFC8610, RFC9165, and CDDL grammar.
CDDL,
}

/// Represents different types of errors related to different types of extension.
#[derive(Display, Debug)]
pub enum CDDLErrorType {
/// An error related to RFC 8610 extension.
RFC8610(Error<rfc_8610::Rule>),
/// An error related to RFC 9165 extension.
RFC9165(Error<rfc_9165::Rule>),
/// An error related to CDDL modules extension.
CDDL(Error<cddl::Rule>),
}

/// Represents an error that may occur during CDDL parsing.
#[derive(thiserror::Error, Debug, From)]
#[error("{0}")]
pub struct CDDLError(CDDLErrorType);

/// Parses and checks semantically a CDDL input string.
///
/// # Arguments
///
/// * `input` - A string containing the CDDL input to be parsed.
///
/// # Returns
///
/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing
/// a boxed `CDDLError` indicating the parsing error.
/// Verifies semantically a CDDL input string.
///
/// # Errors
///
/// This function may return an error in the following cases:
///
/// - If there is an issue with parsing the CDDL input.
///
/// # Examples
///
/// ```rs
/// use cbork_cddl_parser::{parse_cddl, Extension};
/// use std:fs;
///
/// let mut input = fs::read_to_string("path/to/your/file.cddl").unwrap();
/// let result = parse_cddl(&mut input, &Extension::CDDLParser);
/// assert!(result.is_ok());
/// ```
pub fn parse_cddl<'a>(
input: &'a mut String, extension: &Extension,
) -> Result<AST<'a>, Box<CDDLError>> {
input.push_str("\n\n");
input.push_str(POSTLUDE);

let result = match extension {
Extension::RFC8610Parser => {
rfc_8610::RFC8610Parser::parse(rfc_8610::Rule::cddl, input)
.map(AST::RFC8610)
.map_err(CDDLErrorType::RFC8610)
},
Extension::RFC9165Parser => {
rfc_9165::RFC8610Parser::parse(rfc_9165::Rule::cddl, input)
.map(AST::RFC9165)
.map_err(CDDLErrorType::RFC9165)
},
Extension::CDDLParser => {
cddl::RFC8610Parser::parse(cddl::Rule::cddl, input)
.map(AST::CDDL)
.map_err(CDDLErrorType::CDDL)
},
};

result.map_err(|e| Box::new(CDDLError::from(e)))
}

#[cfg(test)]
mod tests {
use crate::*;

#[test]
fn it_works() {
let mut input = String::new();
let result = parse_cddl(&mut input, &Extension::CDDLParser);

match result {
Ok(c) => println!("{c:?}"),
Err(e) => {
println!("{e:?}");
println!("{e}");
},
}
}
pub fn validate_cddl(input: &mut String, extension: &Extension) -> anyhow::Result<()> {
let ast = parser::parse_cddl(input, extension)?;
let _ast = preprocessor::process_ast(ast)?;
Ok(())
}
88 changes: 88 additions & 0 deletions rust/cbork-cddl-parser/src/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
//! A parser for CDDL using the [pest](https://github.com/pest-parser/pest).
//! Utilized for parsing in accordance with RFC-8610, RFC-9165.

use pest::{iterators::Pair, Parser};

use crate::Extension;

/// RFC-8610 parser.
#[allow(missing_docs)]
pub(crate) mod rfc_8610 {
/// A Pest parser for RFC-8610.
#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
pub(crate) struct Parser;
}

/// RFC-9165 parser.
#[allow(missing_docs)]
pub(crate) mod rfc_9165 {
/// A Pest parser for RFC-9165.
#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
#[grammar = "grammar/rfc_9165.pest"]
pub(crate) struct Parser;
}

/// Full CDDL syntax parser.
#[allow(missing_docs)]
pub(crate) mod cddl {
/// A Pest parser for a full CDDL syntax.
#[derive(pest_derive::Parser)]
#[grammar = "grammar/rfc_8610.pest"]
#[grammar = "grammar/rfc_9165.pest"]
#[grammar = "grammar/cddl_modules.pest"]
pub(crate) struct Parser;
}

/// CDDL Standard Postlude - read from an external file
const POSTLUDE: &str = include_str!("grammar/postlude.cddl");

/// PEST Abstract Syntax Tree (AST) representing parsed CDDL syntax.
#[derive(Debug)]
pub(crate) enum Ast<'a> {
/// Represents the AST for RFC-8610 CDDL rules.
Rfc8610(Vec<Pair<'a, rfc_8610::Rule>>),
/// Represents the AST for RFC-9165 CDDL rules.
Rfc9165(Vec<Pair<'a, rfc_9165::Rule>>),
/// Represents the AST for CDDL Modules rules.
Cddl(Vec<Pair<'a, cddl::Rule>>),
}

/// Parses and checks semantically a CDDL input string.
///
/// # Arguments
///
/// * `input` - A string containing the CDDL input to be parsed.
///
/// # Returns
///
/// Returns `Ok(())` if parsing is successful, otherwise returns an `Err` containing
/// a boxed `CDDLError` indicating the parsing error.
///
/// # Errors
///
/// This function may return an error in the following cases:
///
/// - If there is an issue with parsing the CDDL input.
pub(crate) fn parse_cddl<'a>(
input: &'a mut String, extension: &Extension,
) -> anyhow::Result<Ast<'a>> {
input.push_str("\n\n");
input.push_str(POSTLUDE);

let ast = match extension {
Extension::RFC8610 => {
rfc_8610::Parser::parse(rfc_8610::Rule::cddl, input)
.map(|p| Ast::Rfc8610(p.collect()))?
},
Extension::RFC9165 => {
rfc_9165::Parser::parse(rfc_9165::Rule::cddl, input)
.map(|p| Ast::Rfc9165(p.collect()))?
},
Extension::CDDL => {
cddl::Parser::parse(cddl::Rule::cddl, input).map(|p| Ast::Cddl(p.collect()))?
},
};
Ok(ast)
}
44 changes: 44 additions & 0 deletions rust/cbork-cddl-parser/src/preprocessor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//! A CDDL AST preprocessor.
//!
//! - Validates the root rule of the AST to be a `cddl` rule.
//! - Filters out all rules that are not `expr` rules.
//! - (TODO) Resolve #include and #import directives, by just adding the imported rules
//! into the final expression list

use anyhow::{anyhow, ensure};
use pest::{iterators::Pair, RuleType};

use crate::parser::{cddl, rfc_8610, rfc_9165, Ast};

/// Processes the AST.
pub(crate) fn process_ast(ast: Ast) -> anyhow::Result<Ast> {
match ast {
Ast::Rfc8610(ast) => {
process_root_and_filter(ast, rfc_8610::Rule::cddl, rfc_8610::Rule::expr)
.map(Ast::Rfc8610)
},
Ast::Rfc9165(ast) => {
process_root_and_filter(ast, rfc_9165::Rule::cddl, rfc_9165::Rule::expr)
.map(Ast::Rfc9165)
},
Ast::Cddl(ast) => {
process_root_and_filter(ast, cddl::Rule::cddl, cddl::Rule::expr).map(Ast::Cddl)
},
}
}

/// Process the root rule of the AST and filter out all non `expected_rule` rules.
fn process_root_and_filter<R: RuleType>(
ast: Vec<Pair<'_, R>>, root_rule: R, expected_rule: R,
) -> anyhow::Result<Vec<Pair<'_, R>>> {
let mut ast_iter = ast.into_iter();
let ast_root = ast_iter.next().ok_or(anyhow!("Empty AST."))?;
ensure!(
ast_root.as_rule() == root_rule && ast_iter.next().is_none(),
"AST must have only one root rule, which must be a `{root_rule:?}` rule."
);
Ok(ast_root
.into_inner()
.filter(|pair| pair.as_rule() == expected_rule)
.collect())
}
4 changes: 1 addition & 3 deletions rust/cbork-cddl-parser/tests/byte_sequences.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// cspell: words hexpair rstuvw abcdefghijklmnopqrstuvwyz rstuvw Xhhb Bhcm

use cbork_cddl_parser::cddl_test::Rule;

mod common;
use common::byte_sequences::*;
use common::{byte_sequences::*, Rule};

#[test]
/// Test if the `HEX_PAIR` rule passes properly.
Expand Down
6 changes: 3 additions & 3 deletions rust/cbork-cddl-parser/tests/cddl.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{ffi::OsStr, fs, io::Result};

use cbork_cddl_parser::{parse_cddl, Extension};
use cbork_cddl_parser::{validate_cddl, Extension};

#[test]
/// # Panics
Expand Down Expand Up @@ -32,7 +32,7 @@ fn parse_cddl_files() {
for file_path in valid_file_paths {
let mut content = fs::read_to_string(file_path).unwrap();

if let Err(e) = parse_cddl(&mut content, &Extension::CDDLParser) {
if let Err(e) = validate_cddl(&mut content, &Extension::CDDL) {
err_messages.push(format!("{}) {file_path:?} {e}", err_messages.len() + 1));
}
}
Expand All @@ -41,7 +41,7 @@ fn parse_cddl_files() {
for file_path in invalid_file_paths {
let mut content = fs::read_to_string(file_path).unwrap();

let result = parse_cddl(&mut content, &Extension::CDDLParser);
let result = validate_cddl(&mut content, &Extension::CDDL);

assert!(result.is_err(), "{:?} is expected to fail", &file_path);
}
Expand Down
7 changes: 3 additions & 4 deletions rust/cbork-cddl-parser/tests/character_sets.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
// cspell: words PCHAR pchar BCHAR bchar SESC sesc SCHAR schar fffd fffe

use cbork_cddl_parser::{
self,
cddl_test::{CDDLTestParser, Parser, Rule},
};
mod common;
use common::{CDDLTestParser, Rule};
use pest::Parser;

#[test]
/// Test if the `WHITESPACE` rule passes properly.
Expand Down
Loading