From 229b026573eec22ec2407ab3baeefd0b190b70ee Mon Sep 17 00:00:00 2001 From: shadow cat Date: Sat, 11 Apr 2026 03:50:43 -0400 Subject: [PATCH] work --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/parser/cursor/mod.rs | 52 ++++++++++------- src/parser/cursor/span.rs | 0 src/parser/cursor/token.rs | 106 ++++++++++++++++++++++------------ src/parser/node/mod.rs | 31 +--------- src/parser/node/parse.rs | 20 ++++++- src/parser/nodes/block.rs | 34 ----------- src/parser/nodes/expr.rs | 2 +- src/parser/nodes/ident.rs | 2 +- src/parser/nodes/item.rs | 24 ++++++++ src/parser/nodes/mod.rs | 39 ++++++++++++- src/parser/nodes/module.rs | 35 +++++++++++ src/parser/nodes/statement.rs | 29 ++++++++-- src/parser/nodes/ty.rs | 22 +++++++ test/main.lang | 2 +- 16 files changed, 266 insertions(+), 136 deletions(-) delete mode 100644 src/parser/cursor/span.rs delete mode 100644 src/parser/nodes/block.rs create mode 100644 src/parser/nodes/item.rs create mode 100644 src/parser/nodes/module.rs create mode 100644 src/parser/nodes/ty.rs diff --git a/Cargo.lock b/Cargo.lock index e9631c3..9065268 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,5 +3,5 @@ version = 4 [[package]] -name = "lang2" +name = "lang" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index d3a195c..3760968 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "lang2" +name = "lang" version = "0.1.0" edition = "2024" diff --git a/src/parser/cursor/mod.rs b/src/parser/cursor/mod.rs index f2c815c..0161661 100644 --- a/src/parser/cursor/mod.rs +++ b/src/parser/cursor/mod.rs @@ -1,5 +1,4 @@ use crate::io::{CompilerMsg, Span, Spanned}; -use std::iter::Peekable; mod lit; mod token; @@ -8,37 +7,52 @@ pub use token::*; pub struct Cursor<'a> { pub span: Span, - tokens: Peekable>, + next: Option, + tokens: Tokens<'a>, } impl<'a> Cursor<'a> { pub fn new(text: &'a str, file: usize) -> Self { - Self { + let mut s = Self { span: Span { start: 0, end: 0, file, }, - tokens: Tokens::new(text, file).peekable(), - } + next: None, + tokens: Tokens::new(text, file), + }; + s.next(); + s } pub fn next(&mut self) -> Option { - self.tokens.next().map(|inst| { + let mut next = self.tokens.next(); + std::mem::swap(&mut self.next, &mut next); + next.map(|inst| { self.span = inst.span; inst.inner }) } - pub fn peek(&mut self) -> Option<&Token> { - self.tokens.peek().map(|inst| &inst.inner) + pub fn next_if(&mut self, token: Token) -> bool { + if self.peek().is_some_and(|t| *t == token) { + self.next(); + true + } else { + false + } + } + + pub fn peek(&self) -> Option<&Token> { + self.next.as_ref().map(|i| &i.inner) } pub fn expect_next(&mut self) -> Result { self.next().ok_or_else(CompilerMsg::unexpected_eof) } - pub fn expect_peek(&mut self) -> Result<&Token, CompilerMsg> { + pub fn expect_peek(&self) -> Result<&Token, CompilerMsg> { self.peek().ok_or_else(CompilerMsg::unexpected_eof) } @@ -47,22 +61,16 @@ impl<'a> Cursor<'a> { if next == token { Ok(next) } else { - self.unexpected(next, &format!("'{token}'")) + self.unexpected(&next, &format!("'{token}'")) } } - pub fn unexpected(&self, token: Token, expected: &str) -> Result { - Err(CompilerMsg::unexpected_token( - Spanned { - inner: token, - span: self.span, - }, - expected, - )) + pub fn unexpected(&self, token: &Token, expected: &str) -> Result { + Err(CompilerMsg::unexpected_token(token, self.span, expected)) } pub fn peek_start(&mut self) -> usize { - self.tokens.peek().map(|i| i.span.start).unwrap_or(0) + self.next.as_ref().map(|i| i.span.start).unwrap_or(0) } pub fn cur_end(&mut self) -> usize { @@ -75,10 +83,10 @@ impl<'a> Cursor<'a> { } impl CompilerMsg { - pub fn unexpected_token(inst: TokenInst, expected: &str) -> Self { + pub fn unexpected_token(token: &Token, span: Span, expected: &str) -> Self { Self { - spans: vec![inst.span], - msg: format!("Unexpected token '{}'; expected {expected}", inst.inner), + spans: vec![span], + msg: format!("Unexpected token '{}'; expected {expected}", token), } } diff --git a/src/parser/cursor/span.rs b/src/parser/cursor/span.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/parser/cursor/token.rs b/src/parser/cursor/token.rs index 7e1ae6a..29b126e 100644 --- a/src/parser/cursor/token.rs +++ b/src/parser/cursor/token.rs @@ -3,17 +3,26 @@ use std::{iter::Peekable, str::CharIndices}; def_tokens! { symbol { - '=' => Equal, - ':' => Colon, - ';' => Semicolon, - '{' => OpenCurly, - '}' => CloseCurly, - '(' => OpenParen, - ')' => CloseParen, - '-' => Dash, - '+' => Plus, - '*' => Asterisk, - '/' => Slash, + Equal: "=", + Colon: ":", + Semicolon: ";", + Plus: "+", + Dash: "-", + Asterisk: "*", + Slash: "/", + OpenParen: "(", + CloseParen: ")", + OpenSquare: "[", + CloseSquare: "]", + OpenCurly: "{", + CloseCurly: "}", + Arrow: "->", + DoubleArrow: "=>", + PlusEqual: "+=", + DashEqual: "-=", + AsteriskEqual: "*=", + SlashEqual: "/=", + DoubleColon: "::", } keyword { Let: "let", @@ -22,6 +31,7 @@ def_tokens! { Loop: "loop", While: "while", For: "for", + Match: "match", } other { Ident(String), @@ -55,13 +65,53 @@ impl Iterator for Tokens<'_> { end: i, file: self.file, }; - if let Some(inner) = from_char(c) { - return Some(Spanned { inner, span }); - } if c.is_whitespace() { return self.next(); } + macro_rules! then { + (_ => $def:expr, $($char:expr => $to:expr,)*) => { + match self.chars.peek() { + $(Some((_, $char)) => { + self.chars.next(); + $to + },)* + _ => $def, + } + }; + } let inner = match c { + '(' => Token::OpenParen, + ')' => Token::CloseParen, + '[' => Token::OpenSquare, + ']' => Token::CloseSquare, + '{' => Token::OpenCurly, + '}' => Token::CloseCurly, + '+' => then! { + _ => Token::Plus, + '=' => Token::PlusEqual, + }, + '-' => then! { + _ => Token::Dash, + '=' => Token::DashEqual, + '>' => Token::Arrow, + }, + '*' => then! { + _ => Token::Asterisk, + '=' => Token::AsteriskEqual, + }, + '/' => then! { + _ => Token::Slash, + '=' => Token::SlashEqual, + }, + ':' => then! { + _ => Token::Colon, + ':' => Token::DoubleColon, + }, + ';' => Token::Semicolon, + '=' => then! { + _ => Token::Equal, + '>' => Token::DoubleArrow, + }, '0'..='9' => { let mut s = c.to_string(); while let Some((i, c)) = self.chars.peek() @@ -105,23 +155,11 @@ impl Iterator for Tokens<'_> { } } -macro_rules! expand_sym_names { - ({ - $($sym_char:expr => $sym_name:ident,)* - }) => { - $($sym_name,)* - }; - ({ - $($sym_char:expr => $sym_res:tt,)* - }) => { - expand_sym_names!($sym_res) - }; -} -use expand_sym_names; - macro_rules! def_tokens { { - symbol $syms:tt + symbol { + $($sym_name:ident: $sym_str:expr,)* + } keyword { $($kw_name:ident: $kw_str:expr,)* } @@ -131,16 +169,10 @@ macro_rules! def_tokens { } => { #[derive(PartialEq)] pub enum Token { - $($syms,)* + $($sym_name,)* $($kw_name,)* $($other_name($data),)* } - fn from_char(c: char) -> Option { - match c { - $($sym_char => Some(Token::$sym_res),)* - _ => None, - } - } fn from_str(s: String) -> Token { match s.as_str() { $($kw_str => Token::$kw_name,)* @@ -150,7 +182,7 @@ macro_rules! def_tokens { impl std::fmt::Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - $(Token::$sym_res => write!(f, "{}", $sym_char),)* + $(Token::$sym_name => write!(f, "{}", $sym_str),)* $(Token::$kw_name => write!(f, $kw_str),)* $(Token::$other_name(v) => write!(f, "{v}"),)* } diff --git a/src/parser/node/mod.rs b/src/parser/node/mod.rs index 002ece4..729f631 100644 --- a/src/parser/node/mod.rs +++ b/src/parser/node/mod.rs @@ -1,6 +1,6 @@ use crate::{ io::{CompilerOutput, Span}, - parser::{Cursor, Lit, nodes::*}, + parser::{Cursor, nodes::*}, }; mod id; @@ -8,16 +8,8 @@ mod parse; pub use id::*; pub use parse::*; -def_nodes! { - exprs: Expr, - idents: Ident, - statements: Statement, - blocks: Block, - lits: Lit, -} - impl Nodes { - pub fn parse_root(path: &str, output: &mut CompilerOutput) -> Option<(Self, Id)> { + pub fn parse_root(path: &str, output: &mut CompilerOutput) -> Option<(Self, Id)> { let root_code = match std::fs::read_to_string(path) { Ok(code) => code, Err(err) => { @@ -66,22 +58,3 @@ pub trait Node: Sized { fn vec(nodes: &Nodes) -> &NodeVec; fn vec_mut(nodes: &mut Nodes) -> &mut NodeVec; } - -macro_rules! def_nodes { - {$($field:ident: $ty:ident,)*} => { - #[derive(Default)] - pub struct Nodes { - $($field: NodeVec<$ty>,)* - } - - $(impl Node for $ty { - fn vec(nodes: &Nodes) -> &NodeVec { - &nodes.$field - } - fn vec_mut(nodes: &mut Nodes) -> &mut NodeVec { - &mut nodes.$field - } - })* - }; -} -use def_nodes; diff --git a/src/parser/node/parse.rs b/src/parser/node/parse.rs index 7a9ef38..8e86db8 100644 --- a/src/parser/node/parse.rs +++ b/src/parser/node/parse.rs @@ -10,6 +10,16 @@ pub trait Parsable: Sized + Node { fn parse(ctx: &mut ParseCtx) -> Result; } +pub trait ParsableWith: Sized + Node { + fn parse_with(ctx: &mut ParseCtx, input: Input) -> Result; +} + +impl ParsableWith<()> for P { + fn parse_with(ctx: &mut ParseCtx, _: ()) -> Result { + P::parse(ctx) + } +} + pub struct ParseCtx<'a> { start: usize, cursor: Cursor<'a>, @@ -24,10 +34,18 @@ impl<'a> ParseCtx<'a> { cursor, } } + pub fn parse(&mut self) -> Result, CompilerMsg> { + self.parse_with(()) + } + + pub fn parse_with, Input>( + &mut self, + input: Input, + ) -> Result, CompilerMsg> { let old_start = self.start; self.start = self.cursor.peek_start(); - let res = P::parse(self).map(|r| self.push(r)); + let res = P::parse_with(self, input).map(|r| self.push(r)); self.start = old_start; res } diff --git a/src/parser/nodes/block.rs b/src/parser/nodes/block.rs deleted file mode 100644 index 90297f8..0000000 --- a/src/parser/nodes/block.rs +++ /dev/null @@ -1,34 +0,0 @@ -use super::*; - -pub struct Block { - statements: Vec>, -} - -impl Parsable for Block { - fn parse(ctx: &mut ParseCtx) -> Result { - let mut statements = Vec::new(); - if *ctx.expect_peek()? != Token::CloseCurly { - statements.push(ctx.parse()?); - while *ctx.expect_peek()? == Token::Semicolon { - ctx.next(); - statements.push(ctx.parse()?); - } - } - Ok(Self { statements }) - } -} - -impl FmtNode for Block { - fn fmt(&self, f: &mut std::fmt::Formatter, mut ctx: DisplayCtx) -> std::fmt::Result { - ctx.indent += 3; - write!(f, "{{")?; - if !self.statements.is_empty() { - writeln!(f)?; - } - for &s in &self.statements { - writeln!(f, "{}{};", " ".repeat(ctx.indent), s.dsp(ctx))?; - } - write!(f, "}}")?; - Ok(()) - } -} diff --git a/src/parser/nodes/expr.rs b/src/parser/nodes/expr.rs index 8bea327..e778d81 100644 --- a/src/parser/nodes/expr.rs +++ b/src/parser/nodes/expr.rs @@ -13,7 +13,7 @@ impl Parsable for Expr { Token::Dash => Self::Negate(ctx.parse()?), Token::Ident(s) => Self::Ident(ctx.ident(s)), Token::Lit(l) => Self::Lit(ctx.lit(l)), - other => return ctx.unexpected(other, "an expression"), + other => return ctx.unexpected(&other, "an expression"), }; let Some(next) = ctx.peek() else { return Ok(e1); diff --git a/src/parser/nodes/ident.rs b/src/parser/nodes/ident.rs index 37dc936..8c48674 100644 --- a/src/parser/nodes/ident.rs +++ b/src/parser/nodes/ident.rs @@ -14,7 +14,7 @@ impl Parsable for Ident { fn parse(ctx: &mut super::ParseCtx) -> Result { match ctx.expect_next()? { Token::Ident(ident) => Ok(Self { inner: ident }), - t => ctx.unexpected(t, "an identifier"), + t => ctx.unexpected(&t, "an identifier"), } } } diff --git a/src/parser/nodes/item.rs b/src/parser/nodes/item.rs new file mode 100644 index 0000000..5c75366 --- /dev/null +++ b/src/parser/nodes/item.rs @@ -0,0 +1,24 @@ +use super::*; + +pub enum Item { + Module(Id), + Statement(Id), +} + +impl Parsable for Item { + fn parse(ctx: &mut ParseCtx) -> Result { + Ok(match ctx.expect_peek()? { + Token::Fn => Self::Module(ctx.parse()?), + _ => Self::Statement(ctx.parse()?), + }) + } +} + +impl FmtNode for Item { + fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result { + match self { + Item::Module(id) => write!(f, "{}", id.dsp(ctx)), + Item::Statement(id) => write!(f, "{}", id.dsp(ctx)), + } + } +} diff --git a/src/parser/nodes/mod.rs b/src/parser/nodes/mod.rs index 55d8f88..a19e1e9 100644 --- a/src/parser/nodes/mod.rs +++ b/src/parser/nodes/mod.rs @@ -1,11 +1,44 @@ -mod block; mod expr; mod ident; +mod item; +mod module; mod statement; -pub use block::*; +mod ty; pub use expr::*; pub use ident::*; +pub use item::*; +pub use module::*; pub use statement::*; +pub use ty::*; -use super::{DisplayCtx, FmtNode, Id, Lit, Parsable, ParseCtx, Token}; +use super::{DisplayCtx, FmtNode, Id, Lit, Node, NodeVec, Parsable, ParseCtx, Token}; use crate::io::CompilerMsg; + +def_nodes! { + exprs: Expr, + idents: Ident, + statements: Statement, + blocks: Module, + lits: Lit, + types: Type, + items: Item, +} + +macro_rules! def_nodes { + {$($field:ident: $ty:ident,)*} => { + #[derive(Default)] + pub struct Nodes { + $(pub $field: NodeVec<$ty>,)* + } + + $(impl Node for $ty { + fn vec(nodes: &Nodes) -> &NodeVec { + &nodes.$field + } + fn vec_mut(nodes: &mut Nodes) -> &mut NodeVec { + &mut nodes.$field + } + })* + }; +} +use def_nodes; diff --git a/src/parser/nodes/module.rs b/src/parser/nodes/module.rs new file mode 100644 index 0000000..f704a64 --- /dev/null +++ b/src/parser/nodes/module.rs @@ -0,0 +1,35 @@ +use super::*; + +pub struct Module { + items: Vec>, +} + +impl Parsable for Module { + fn parse(ctx: &mut ParseCtx) -> Result { + let mut items = Vec::new(); + if ctx.peek().is_none() { + return Ok(Self { items }); + } + items.push(ctx.parse()?); + while *ctx.expect_peek()? == Token::Semicolon { + ctx.next(); + items.push(ctx.parse()?); + } + Ok(Self { items }) + } +} + +impl FmtNode for Module { + fn fmt(&self, f: &mut std::fmt::Formatter, mut ctx: DisplayCtx) -> std::fmt::Result { + ctx.indent += 3; + write!(f, "{{")?; + if !self.items.is_empty() { + writeln!(f)?; + } + for &i in &self.items { + writeln!(f, "{}{};", " ".repeat(ctx.indent), i.dsp(ctx))?; + } + write!(f, "}}")?; + Ok(()) + } +} diff --git a/src/parser/nodes/statement.rs b/src/parser/nodes/statement.rs index 4fa4e54..5bca4e6 100644 --- a/src/parser/nodes/statement.rs +++ b/src/parser/nodes/statement.rs @@ -1,8 +1,15 @@ pub use super::*; pub enum Statement { - Let(Id, Id), - If { cond: Id, body: Id }, + Let { + name: Id, + ty: Option>, + val: Id, + }, + If { + cond: Id, + body: Id, + }, Expr(Id), } @@ -12,8 +19,16 @@ impl Parsable for Statement { Token::Let => { ctx.next(); let name = ctx.parse()?; + let mut ty = None; + if ctx.next_if(Token::Colon) { + ty = Some(ctx.parse()?); + } ctx.expect(Token::Equal)?; - Self::Let(name, ctx.parse()?) + Self::Let { + name, + ty, + val: ctx.parse()?, + } } Token::If => { ctx.next(); @@ -32,8 +47,12 @@ impl FmtNode for Statement { Self::If { cond, body } => { write!(f, "if {} {}", cond.dsp(ctx), body.dsp(ctx)) } - Self::Let(name, expr) => { - write!(f, "let {} = {}", name.dsp(ctx), expr.dsp(ctx)) + Self::Let { name, ty, val } => { + write!(f, "let {}", name.dsp(ctx))?; + if let Some(ty) = ty { + write!(f, ": {}", ty.dsp(ctx))?; + } + write!(f, " = {}", val.dsp(ctx)) } Self::Expr(expr) => expr.fmt(f, ctx), } diff --git a/src/parser/nodes/ty.rs b/src/parser/nodes/ty.rs new file mode 100644 index 0000000..f6c8c08 --- /dev/null +++ b/src/parser/nodes/ty.rs @@ -0,0 +1,22 @@ +use super::*; + +pub enum Type { + Ident(Id), +} + +impl Parsable for Type { + fn parse(ctx: &mut ParseCtx) -> Result { + Ok(match ctx.expect_next()? { + Token::Ident(s) => Self::Ident(ctx.ident(s)), + t => ctx.unexpected(&t, "a type")?, + }) + } +} + +impl FmtNode for Type { + fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result { + match self { + Type::Ident(id) => id.fmt(f, ctx), + } + } +} diff --git a/test/main.lang b/test/main.lang index 29fabab..6ef13e4 100644 --- a/test/main.lang +++ b/test/main.lang @@ -1 +1 @@ -let x = arst -3 +let x: i32 = arst -3