Skip to content

Commit

Permalink
feat!: better type parsing (#47)
Browse files Browse the repository at this point in the history
### Supported Types and other cases

- [x] Primitive Types: `nil`, `any`, `unknown`, `boolean`, `string`,
`number`, `integer`, `function`, `thread`, `userdata` and
`lightuserdata`
- [x] Array: `TYPE[]`
- [x] Union Type: `TYPE | TYPE | TYPE`
- [x] Union of literals: `'"g@"'|'"g@$"'`, `'1'|'2'|'3'` etc. _Variants
should be wrapped inside `'`_
- [x] Table: `table<KEY_TYPE, VALUE_TYPE>`
- [x] Table Literal: `{ key1: VALUE_TYPE, key2: VALUE_TYPE }`
- [x] Reference Type i.e., alias, class etc.
- [x] Function: `fun(PARAM: TYPE): RETURN_TYPE`
  - [x] Optional param: `fun(PARAM?: TYPE): RETURN_TYPE`
  - [x] Missing Type: `fun(a)` (will fallback to `any` type)
  - [x] Multiple returns `fun(): string, number`
- [x] Grouping: `(string|number)[]`
- [x] Optional `---@param a? string`

#### Not Supported

- Dict `{ [string]: VALUE_TYPE }` instead use table or table literal.
- Function w/ multiple returns as `VALUE_TYPE` in table literal i.e. `{
key1: func(): string, string, key2: string }`

---

Resolves #27
Resolves #46 
Resolves #48
  • Loading branch information
numToStr authored Oct 12, 2022
1 parent a8840cf commit 7e46731
Show file tree
Hide file tree
Showing 11 changed files with 648 additions and 221 deletions.
304 changes: 132 additions & 172 deletions src/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,180 +1,32 @@
mod token;
pub use token::*;

use std::ops::Range;

use chumsky::{
prelude::{any, choice, end, filter, just, take_until, Simple},
text::{ident, keyword, newline, TextParser},
recursive::recursive,
text::{ident, keyword, newline, whitespace, TextParser},
Parser,
};

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Kind {
Dot,
Colon,
Local,
}

impl Kind {
pub fn as_char(&self) -> char {
match self {
Self::Dot => '.',
Self::Colon => ':',
Self::Local => '#',
}
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Scope {
Public,
Private,
Protected,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum TagType {
/// ```lua
/// ---@toc <name>
/// ```
Toc(String),
/// ```lua
/// ---@mod <name> [desc]
/// ```
Module(String, Option<String>),
/// ```lua
/// ---@divider <char>
/// ```
Divider(char),
/// ```lua
/// function one.two() end
/// one.two = function() end
/// ```
Func {
prefix: Option<String>,
name: String,
kind: Kind,
},
/// ```lua
/// one = 1
/// one.two = 12
/// ```
Expr {
prefix: Option<String>,
name: String,
kind: Kind,
},
/// ```lua
/// ---@export <module>
/// or
/// return <module>\eof
/// ```
Export(String),
/// ```lua
/// ---@brief [[
/// ```
BriefStart,
/// ```lua
/// ---@brief ]]
/// ```
BriefEnd,
/// ```lua
/// ---@param <name[?]> <type[|type...]> [description]
/// ```
Param {
name: String,
ty: String,
desc: Option<String>,
},
/// ```lua
/// ---@return <type> [<name> [comment] | [name] #<comment>]
/// ```
Return {
ty: String,
name: Option<String>,
desc: Option<String>,
},
/// ```lua
/// ---@class <name>
/// ```
Class(String),
/// ```lua
/// ---@field [public|private|protected] <name> <type> [description]
/// ```
Field {
scope: Scope,
name: String,
ty: String,
desc: Option<String>,
},
/// ```lua
/// -- Simple Alias
/// ---@alias <name> <type>
///
/// -- Enum alias
/// ---@alias <name>
/// ```
Alias(String, Option<String>),
/// ```lua
/// ---| '<value>' [# description]
/// ```
Variant(String, Option<String>),
/// ```lua
/// ---@type <type> [desc]
/// ```
Type(String, Option<String>),
/// ```lua
/// ---@tag <name>
/// ```
Tag(String),
/// ```lua
/// ---@see <name>
/// ```
See(String),
/// ```lua
/// ---@usage `<code>`
/// ```
Usage(String),
/// ```lua
/// ---@usage [[
/// ```
UsageStart,
/// ```lua
/// ---@usage ]]
/// ```
UsageEnd,
/// ```lua
/// ---TEXT
/// ```
Comment(String),
/// Text nodes which are not needed
Skip,
}

type Spanned = (TagType, Range<usize>);

const C: [char; 3] = ['.', '_', '-'];

#[derive(Debug)]
pub struct Lexer;

impl Lexer {
/// Parse emmylua/lua files into rust token
pub fn parse(src: &str) -> Result<Vec<Spanned>, Vec<Simple<char>>> {
pub fn init() -> impl Parser<char, Vec<Spanned>, Error = Simple<char>> {
let triple = just("---");
let space = just(' ').repeated().at_least(1);
let till_eol = take_until(newline());

let comment = till_eol.map(|(x, _)| x.iter().collect());
let desc = space.ignore_then(comment).or_not();

// Source: https://github.com/sumneko/lua-language-server/wiki/Annotations#documenting-types
// A TYPE could be
// - primary = string|number|boolean
// - fn = func(...):string
// - enum = "one"|"two"|"three"
// - or: primary (| primary)+
// - optional = primary?
// - table = table<string, string>
// - array = primary[]
let ty = filter(|x: &char| !x.is_whitespace()).repeated().collect();

let scope = choice((
keyword("public").to(Scope::Public),
keyword("protected").to(Scope::Protected),
Expand All @@ -196,21 +48,130 @@ impl Lexer {
)))
.ignored();

let union_literal = just('\'')
.ignore_then(filter(|c| c != &'\'').repeated())
.then_ignore(just('\''))
.collect();

let variant = just('|')
.then_ignore(space)
.ignore_then(
just('\'')
.ignore_then(filter(|c| c != &'\'').repeated())
.then_ignore(just('\''))
.collect(),
)
.ignore_then(union_literal)
.then(
space
.ignore_then(just('#').ignore_then(space).ignore_then(comment))
.or_not(),
)
.map(|(t, d)| TagType::Variant(t, d));

let optional = just('?').or_not().map(|c| match c {
Some(_) => TypeVal::Opt as fn(_, _) -> _,
None => TypeVal::Req as fn(_, _) -> _,
});

let name = filter(|x: &char| x.is_alphanumeric() || C.contains(x))
.repeated()
.collect();

let ty = recursive(|inner| {
let comma = just(',').padded();
let colon = just(':').padded();

let any = just("any").to(Ty::Any);
let unknown = just("unknown").to(Ty::Unknown);
let nil = just("nil").to(Ty::Nil);
let boolean = just("boolean").to(Ty::Boolean);
let string = just("string").to(Ty::String);
let num = just("number").to(Ty::Number);
let int = just("integer").to(Ty::Integer);
let function = just("function").to(Ty::Function);
let thread = just("thread").to(Ty::Thread);
let userdata = just("userdata").to(Ty::Userdata);
let lightuserdata = just("lightuserdata").to(Ty::Lightuserdata);

#[inline]
fn array_union(
p: impl Parser<char, Ty, Error = Simple<char>>,
inner: impl Parser<char, Ty, Error = Simple<char>>,
) -> impl Parser<char, Ty, Error = Simple<char>> {
p.then(just("[]").repeated())
.foldl(|arr, _| Ty::Array(Box::new(arr)))
// NOTE: Not the way I wanted i.e., Ty::Union(Vec<Ty>) it to be, but it's better than nothing
.then(just('|').padded().ignore_then(inner).repeated())
.foldl(|x, y| Ty::Union(Box::new(x), Box::new(y)))
}

let list_like = ident()
.padded()
.then(optional)
.then(
colon
.ignore_then(inner.clone())
.or_not()
// NOTE: if param type is missing then LLS treats it as `any`
.map(|x| x.unwrap_or(Ty::Any)),
)
.map(|((n, attr), t)| attr(n, t))
.separated_by(comma)
.allow_trailing();

let fun = just("fun")
.ignore_then(
list_like
.clone()
.delimited_by(just('(').then(whitespace()), whitespace().then(just(')'))),
)
.then(
colon
.ignore_then(inner.clone().separated_by(comma))
.or_not(),
)
.map(|(param, ret)| Ty::Fun(param, ret));

let table = just("table")
.ignore_then(
just('<')
.ignore_then(inner.clone().map(Box::new))
.then_ignore(comma)
.then(inner.clone().map(Box::new))
.then_ignore(just('>'))
.or_not(),
)
.map(Ty::Table);

let dict = list_like
.delimited_by(just('{').then(whitespace()), whitespace().then(just('}')))
.map(Ty::Dict);

let ty_name = name.map(Ty::Ref);

let parens = inner
.clone()
.delimited_by(just('(').padded(), just(')').padded());

// Union of string literals: '"g@"'|'"g@$"'
let string_literal = union_literal.map(Ty::Ref);

choice((
array_union(any, inner.clone()),
array_union(unknown, inner.clone()),
array_union(nil, inner.clone()),
array_union(boolean, inner.clone()),
array_union(string, inner.clone()),
array_union(num, inner.clone()),
array_union(int, inner.clone()),
array_union(function, inner.clone()),
array_union(thread, inner.clone()),
array_union(userdata, inner.clone()),
array_union(lightuserdata, inner.clone()),
array_union(fun, inner.clone()),
array_union(table, inner.clone()),
array_union(dict, inner.clone()),
array_union(parens, inner.clone()),
array_union(string_literal, inner.clone()),
array_union(ty_name, inner),
))
});

let tag = just('@').ignore_then(choice((
private.to(TagType::Skip),
just("toc")
Expand All @@ -219,7 +180,7 @@ impl Lexer {
.map(TagType::Toc),
just("mod")
.then_ignore(space)
.ignore_then(ty)
.ignore_then(name)
.then(desc)
.map(|(name, desc)| TagType::Module(name, desc)),
just("divider")
Expand All @@ -232,14 +193,14 @@ impl Lexer {
))),
just("param")
.ignore_then(space)
.ignore_then(ty) // I am using `ty` here because param can have `?`
.ignore_then(ident().then(optional))
.then_ignore(space)
.then(ty)
.then(ty.clone())
.then(desc)
.map(|((name, ty), desc)| TagType::Param { name, ty, desc }),
.map(|(((name, opt), ty), desc)| TagType::Param(opt(name, ty), desc)),
just("return")
.ignore_then(space)
.ignore_then(ty)
.ignore_then(ty.clone())
.then(choice((
newline().to((None, None)),
space.ignore_then(choice((
Expand All @@ -250,14 +211,14 @@ impl Lexer {
.map(|(ty, (name, desc))| TagType::Return { ty, name, desc }),
just("class")
.ignore_then(space)
.ignore_then(ident())
.ignore_then(name)
.map(TagType::Class),
just("field")
.ignore_then(space.ignore_then(scope).or_not())
.then_ignore(space)
.then(ident())
.then_ignore(space)
.then(ty)
.then(ty.clone())
.then(desc)
.map(|(((scope, name), ty), desc)| TagType::Field {
scope: scope.unwrap_or(Scope::Public),
Expand All @@ -267,8 +228,8 @@ impl Lexer {
}),
just("alias")
.ignore_then(space)
.ignore_then(ident())
.then(space.ignore_then(ty).or_not())
.ignore_then(name)
.then(space.ignore_then(ty.clone()).or_not())
.map(|(name, ty)| TagType::Alias(name, ty)),
just("type")
.ignore_then(space)
Expand Down Expand Up @@ -350,6 +311,5 @@ impl Lexer {
.padded()
.map_with_span(|t, r| (t, r))
.repeated()
.parse(src)
}
}
Loading

0 comments on commit 7e46731

Please sign in to comment.