From e5aea8b24e8f21525498f61c1c9d449448746e63 Mon Sep 17 00:00:00 2001 From: Shadow Cat Date: Sun, 6 Oct 2024 12:42:46 -0400 Subject: [PATCH] stuff --- src/main.rs | 40 +---- src/parser/cursor.rs | 55 ------ src/parser/expr.rs | 228 ------------------------- src/token/cursor.rs | 78 --------- src/token/mod.rs | 113 ------------ src/token/string.rs | 50 ------ src/v1/mod.rs | 24 +++ src/{ => v1}/parser/body.rs | 8 +- src/v1/parser/cursor.rs | 61 +++++++ src/v1/parser/error.rs | 50 ++++++ src/v1/parser/expr.rs | 168 ++++++++++++++++++ src/{ => v1}/parser/mod.rs | 11 +- src/v1/parser/token/cursor.rs | 79 +++++++++ src/v1/parser/token/file.rs | 65 +++++++ src/{ => v1/parser}/token/keyword.rs | 8 + src/v1/parser/token/mod.rs | 83 +++++++++ src/{ => v1/parser}/token/symbol.rs | 74 +++++--- src/v1/parser/val.rs | 110 ++++++++++++ src/v2/mod.rs | 23 +++ src/v2/parser/body.rs | 120 +++++++++++++ src/v2/parser/cursor.rs | 135 +++++++++++++++ src/{ => v2}/parser/error.rs | 35 ++-- src/v2/parser/expr.rs | 247 +++++++++++++++++++++++++++ src/v2/parser/mod.rs | 70 ++++++++ src/v2/parser/util.rs | 10 ++ test.lang | 6 +- 26 files changed, 1338 insertions(+), 613 deletions(-) delete mode 100644 src/parser/cursor.rs delete mode 100644 src/parser/expr.rs delete mode 100644 src/token/cursor.rs delete mode 100644 src/token/mod.rs delete mode 100644 src/token/string.rs create mode 100644 src/v1/mod.rs rename src/{ => v1}/parser/body.rs (93%) create mode 100644 src/v1/parser/cursor.rs create mode 100644 src/v1/parser/error.rs create mode 100644 src/v1/parser/expr.rs rename src/{ => v1}/parser/mod.rs (91%) create mode 100644 src/v1/parser/token/cursor.rs create mode 100644 src/v1/parser/token/file.rs rename src/{ => v1/parser}/token/keyword.rs (63%) create mode 100644 src/v1/parser/token/mod.rs rename src/{ => v1/parser}/token/symbol.rs (57%) create mode 100644 src/v1/parser/val.rs create mode 100644 src/v2/mod.rs create mode 100644 src/v2/parser/body.rs create mode 100644 src/v2/parser/cursor.rs rename src/{ => v2}/parser/error.rs (65%) create mode 100644 src/v2/parser/expr.rs create mode 100644 src/v2/parser/mod.rs create mode 100644 src/v2/parser/util.rs diff --git a/src/main.rs b/src/main.rs index f49a6ea..cfa8770 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,39 +1,15 @@ -use std::{ - ffi::OsStr, - io::{BufRead, BufReader}, -}; - -mod parser; -mod token; mod util; - -use parser::{print_error, Expr, Module, TokenCursor}; +mod v1; +mod v2; fn main() { let arg = std::env::args_os().nth(1); - if let Some(file) = arg { - run_file(&file); + if let Some(path) = arg { + let file = std::fs::read_to_string(path).expect("failed to read file"); + println!("{file}"); + v1::parse_file(&file); + // v2::parse_file(&file); } else { - run_stdin(); - } -} - -fn run_file(path: &OsStr) { - let file = std::fs::read_to_string(path).expect("failed to read file"); - let tokens = token::parse(&file).unwrap(); - match Module::parse(&mut TokenCursor::from(tokens.as_slice())) { - Err(err) => print_error(err, &file), - Ok(module) => println!("{module:#?}"), - } -} - -fn run_stdin() { - for line in BufReader::new(std::io::stdin()).lines() { - let str = &line.expect("failed to read line"); - let tokens = token::parse(str).unwrap(); - println!( - "{:?}", - Expr::parse(&mut TokenCursor::from(tokens.as_slice())) - ); + v1::run_stdin(); } } diff --git a/src/parser/cursor.rs b/src/parser/cursor.rs deleted file mode 100644 index 3bbda8c..0000000 --- a/src/parser/cursor.rs +++ /dev/null @@ -1,55 +0,0 @@ -use crate::token::{Keyword, Symbol, Token, TokenInstance}; - -use super::error::{unexpected_end, unexpected_token, ParserError}; - -pub struct TokenCursor<'a> { - tokens: &'a [TokenInstance], - pos: usize, -} - -impl TokenCursor<'_> { - pub fn next(&mut self) -> Option<&TokenInstance> { - let res = self.tokens.get(self.pos); - self.pos += 1; - res - } - pub fn expect_next(&mut self) -> Result<&TokenInstance, ParserError> { - self.next().ok_or(unexpected_end()) - } - pub fn expect_token(&mut self, t: Token) -> Result<(), ParserError> { - let next = self.expect_next()?; - if t == next.token { - Ok(()) - } else { - unexpected_token(next, &format!("{t:?}")) - } - } - pub fn expect_sym(&mut self, symbol: Symbol) -> Result<(), ParserError> { - self.expect_token(Token::Symbol(symbol)) - } - pub fn expect_kw(&mut self, kw: Keyword) -> Result<(), ParserError> { - self.expect_token(Token::Keyword(kw)) - } - pub fn peek(&self) -> Option<&TokenInstance> { - self.tokens.get(self.pos) - } - pub fn expect_peek(&mut self) -> Result<&TokenInstance, ParserError> { - self.peek().ok_or(unexpected_end()) - } - pub fn expect_ident(&mut self) -> Result { - let i = self.expect_next()?; - let Token::Ident(n) = &i.token else { - return unexpected_token(i, "an identifier"); - }; - Ok(n.to_string()) - } -} - -impl<'a> From<&'a [TokenInstance]> for TokenCursor<'a> { - fn from(tokens: &'a [TokenInstance]) -> Self { - Self { - tokens, - pos: 0, - } - } -} diff --git a/src/parser/expr.rs b/src/parser/expr.rs deleted file mode 100644 index 029645d..0000000 --- a/src/parser/expr.rs +++ /dev/null @@ -1,228 +0,0 @@ -use std::fmt::{Debug, Write}; - -use super::{ - cursor::TokenCursor, - error::{unexpected_token, ParserError}, - Body, -}; -use crate::token::{StringType, Symbol, Token, TokenInstance}; - -pub enum Expr { - Const(ConstVal), - Ident(String), - Op(Operator, Vec), - Block(Body), - Call(Box, Vec), -} - -#[derive(Debug, PartialEq, Eq)] -pub enum Operator { - Add, - Sub, - Mul, - Div, - LessThan, - GreaterThan, - Offset, -} - -#[derive(PartialEq, Eq)] -pub enum ConstVal { - String(String), - Char(char), - Number(String), - Unit, -} - -impl Expr { - pub fn parse(cursor: &mut TokenCursor) -> Result { - let Some(next) = cursor.peek() else { - return Ok(Expr::Const(ConstVal::Unit)); - }; - let mut cur = if next.is_symbol(Symbol::OpenParen) { - cursor.next(); - let expr = Self::parse(cursor)?; - cursor.expect_sym(Symbol::CloseParen)?; - expr - } else if next.is_symbol(Symbol::OpenCurly) { - let expr = Body::parse(cursor)?; - Expr::Block(expr) - } else { - let unit = Self::parse_unit(next)?; - cursor.next(); - unit - }; - let Some(mut next) = cursor.peek() else { - return Ok(cur); - }; - while next.is_symbol(Symbol::OpenParen) { - cursor.next(); - let inner = Self::parse(cursor)?; - cursor.expect_sym(Symbol::CloseParen)?; - cur = Self::Call(Box::new(cur), vec![inner]); - let Some(next2) = cursor.peek() else { - return Ok(cur); - }; - next = next2 - } - if let Some(op) = Operator::from_token(&next.token) { - cursor.next(); - let next = Self::parse(cursor)?; - let mut vals = vec![cur]; - if let Self::Op(op_next, mut vs) = next { - if op == op_next { - vals.extend(vs); - } else if op.presedence() > op_next.presedence() { - vals.push(vs.remove(0)); - if vs.len() == 1 { - return Ok(Self::Op( - op_next, - vec![Self::Op(op, vals), vs.pop().unwrap()], - )); - } else { - vals.push(Self::Op(op_next, vs)); - } - } else { - vals.push(Self::Op(op_next, vs)); - } - } else { - vals.push(next); - } - return Ok(Self::Op(op, vals)); - }; - match next.token { - Token::Symbol(Symbol::Semicolon | Symbol::CloseParen | Symbol::CloseCurly) => Ok(cur), - _ => unexpected_token(next, "an operator or ending"), - } - } - fn parse_unit(inst: &TokenInstance) -> Result { - match &inst.token { - Token::String(ty, s) => { - Self::parse_str(*ty, s).map_err(|e| ParserError::from_instances(&[inst], e)) - } - Token::Ident(name) => Ok(Self::parse_ident(name.to_string())), - _ => unexpected_token(inst, "a string or a name"), - } - } - fn parse_str(ty: StringType, s: &str) -> Result { - match ty { - StringType::DoubleQuote => Ok(Self::Const(ConstVal::String(s.to_string()))), - StringType::SingleQuote => { - if s.len() == 1 { - Ok(Self::Const(ConstVal::Char(s.chars().next().unwrap()))) - } else { - Err("Characters must only have one char".to_string()) - } - } - } - } - fn parse_ident(str: String) -> Self { - match str.chars().next().unwrap() { - '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '.' => { - Self::Const(ConstVal::Number(str)) - } - _ => Self::Ident(str), - } - } -} - -impl Operator { - pub fn presedence(&self) -> u32 { - match self { - Operator::LessThan => 0, - Operator::GreaterThan => 0, - Operator::Add => 1, - Operator::Sub => 2, - Operator::Mul => 3, - Operator::Div => 4, - Operator::Offset => 5, - } - } - pub fn str(&self) -> &str { - match self { - Self::Add => "+", - Self::Sub => "-", - Self::Mul => "*", - Self::Div => "/", - Self::LessThan => "<", - Self::GreaterThan => ">", - Self::Offset => ".", - } - } - pub fn from_token(token: &Token) -> Option { - let Token::Symbol(symbol) = token else { - return None; - }; - Some(match symbol { - Symbol::OpenAngle => Operator::LessThan, - Symbol::CloseAngle => Operator::GreaterThan, - Symbol::Plus => Operator::Add, - Symbol::Minus => Operator::Sub, - Symbol::Asterisk => Operator::Mul, - Symbol::Slash => Operator::Div, - Symbol::Dot => Operator::Offset, - _ => { - return None; - } - }) - } - pub fn pad(&self) -> bool { - match self { - Operator::Add => true, - Operator::Sub => true, - Operator::Mul => true, - Operator::Div => true, - Operator::LessThan => true, - Operator::GreaterThan => true, - Operator::Offset => false, - } - } -} - -impl Debug for Expr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Expr::Const(c) => c.fmt(f), - Expr::Ident(n) => f.write_str(n), - Expr::Block(b) => b.fmt(f), - Expr::Op(op, exprs) => { - f.write_char('(')?; - exprs[0].fmt(f)?; - for expr in exprs.iter().skip(1) { - if op.pad() { - write!(f, " {} ", op.str())?; - } else { - f.write_str(op.str())?; - } - expr.fmt(f)?; - } - f.write_char(')')?; - Ok(()) - } - Expr::Call(n, args) => { - n.fmt(f)?; - f.write_char('(')?; - if let Some(a) = args.first() { - a.fmt(f)?; - } - for arg in args.iter().skip(1) { - f.write_str(", ")?; - arg.fmt(f)?; - } - f.write_char(')')?; - Ok(()) - } - } - } -} - -impl Debug for ConstVal { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::String(str) => str.fmt(f), - Self::Char(c) => c.fmt(f), - Self::Number(str) => f.write_str(str), - Self::Unit => f.write_str("()"), - } - } -} diff --git a/src/token/cursor.rs b/src/token/cursor.rs deleted file mode 100644 index 04fa1d9..0000000 --- a/src/token/cursor.rs +++ /dev/null @@ -1,78 +0,0 @@ -#[derive(Debug, Clone, Copy)] -pub struct FilePos { - pub line: usize, - pub col: usize, -} - -pub struct CharCursor<'a> { - chars: &'a [u8], - i: usize, - pos: FilePos, - prev_pos: FilePos, -} - -// TODO: support unicode -impl CharCursor<'_> { - pub fn next(&mut self) -> Option { - let res = self.get(self.i)?; - self.mov(); - Some(res) - } - pub fn next_with_pos(&mut self) -> Option<(FilePos, char)> { - let res = self.get(self.i)?; - let pos = self.pos; - self.mov(); - Some((pos, res)) - } - pub fn peek(&mut self) -> Option { - self.get(self.i) - } - fn mov(&mut self) { - self.prev_pos = self.pos; - if self.chars[self.i] == b'\n' { - self.pos.col = 0; - self.pos.line += 1; - } else { - self.pos.col += 1; - } - self.i += 1; - } - pub fn advance_if(&mut self, c: char) -> bool { - if let Some(c2) = self.get(self.i) { - if c2 == c { - self.mov(); - return true; - } - } - false - } - pub fn expect_next(&mut self) -> Result { - self.next().ok_or("Unexpected end of input".to_string()) - } - pub fn get(&self, i: usize) -> Option { - self.chars.get(i).map(|b| *b as char) - } - pub fn pos(&self) -> FilePos { - self.pos - } - pub fn prev_pos(&self) -> FilePos { - self.prev_pos - } -} - -impl<'a> From<&'a str> for CharCursor<'a> { - fn from(value: &'a str) -> Self { - Self { - chars: value.as_bytes(), - i: 0, - pos: FilePos::start(), - prev_pos: FilePos::start(), - } - } -} - -impl FilePos { - pub fn start() -> Self { - Self { line: 0, col: 0 } - } -} diff --git a/src/token/mod.rs b/src/token/mod.rs deleted file mode 100644 index 80c9f86..0000000 --- a/src/token/mod.rs +++ /dev/null @@ -1,113 +0,0 @@ -mod cursor; -mod keyword; -mod string; -mod symbol; - -use cursor::*; -pub use keyword::*; -pub use string::*; -pub use symbol::*; - -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum Token { - String(StringType, String), - Symbol(Symbol), - Ident(String), - Keyword(Keyword), -} - -#[derive(Debug, Clone, Copy)] -pub struct FileRegion { - pub start: FilePos, - pub end: FilePos, -} - -#[derive(Debug)] -pub struct TokenInstance { - pub token: Token, - pub loc: FileRegion, -} - -pub fn parse(str: &str) -> Result, String> { - let mut tokens = Vec::new(); - let mut word = String::new(); - let mut word_start = FilePos::start(); - let mut word_end = FilePos::start(); - let mut cursor = CharCursor::from(str); - while let Some((start, c)) = cursor.next_with_pos() { - if c == '/' && cursor.advance_if('/') { - while cursor.peek() != Some('\n') { - cursor.next(); - } - continue; - } - let add = if c.is_whitespace() { - None - } else if let Some(lit) = StringType::from_start(c) { - let str = lit.parse(&mut cursor)?; - let end = cursor.prev_pos(); - Some(TokenInstance { - token: Token::String(lit, str), - loc: FileRegion { start, end }, - }) - } else if let Some(symbol) = Symbol::from_start(c, &mut cursor) { - let end = cursor.prev_pos(); - Some(TokenInstance { - token: Token::Symbol(symbol?), - loc: FileRegion { start, end }, - }) - } else { - word.push(c); - word_end = start; - continue; - }; - if !word.is_empty() { - tokens.push(TokenInstance { - token: Token::from_string(&word), - loc: FileRegion { start: word_start, end: word_end }, - }); - word.clear(); - } - word_start = cursor.pos(); - if let Some(token) = add { - tokens.push(token); - } - } - if !word.is_empty() { - tokens.push(TokenInstance { - token: Token::from_string(&word), - loc: FileRegion { start: word_start, end: word_end }, - }); - } - Ok(tokens) -} - -impl Token { - fn from_string(str: &str) -> Self { - match Keyword::from_string(str) { - Some(k) => Self::Keyword(k), - None => Self::Ident(str.to_string()), - } - } - pub fn is_symbol(&self, symbol: Symbol) -> bool { - match self { - Token::Symbol(s) => *s == symbol, - _ => false, - } - } - pub fn is_keyword(&self, kw: Keyword) -> bool { - match self { - Token::Keyword(k) => *k == kw, - _ => false, - } - } -} - -impl TokenInstance { - pub fn is_keyword(&self, kw: Keyword) -> bool { - self.token.is_keyword(kw) - } - pub fn is_symbol(&self, symbol: Symbol) -> bool { - self.token.is_symbol(symbol) - } -} diff --git a/src/token/string.rs b/src/token/string.rs deleted file mode 100644 index 70c370c..0000000 --- a/src/token/string.rs +++ /dev/null @@ -1,50 +0,0 @@ -use super::CharCursor; - -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub enum StringType { - DoubleQuote, - SingleQuote, -} - -impl StringType { - pub fn from_start(c: char) -> Option { - Some(match c { - '"' => Self::DoubleQuote, - '\'' => Self::SingleQuote, - _ => return None, - }) - } - pub fn end(&self) -> char { - match self { - StringType::DoubleQuote => '"', - StringType::SingleQuote => '\'', - } - } - pub fn parse( - &self, - stream: &mut CharCursor, - ) -> Result { - let end = self.end(); - let mut str = String::new(); - loop { - let c = stream.expect_next()?; - if c == end { - return Ok(str); - } - str.push(match c { - '\\' => { - let next = stream.expect_next()?; - match next { - '"' => '"', - '\'' => '\'', - 't' => '\t', - 'n' => '\n', - '0' => '\0', - c => return Err(format!("Unknown escape character {c}")), - } - } - _ => c, - }) - } - } -} diff --git a/src/v1/mod.rs b/src/v1/mod.rs new file mode 100644 index 0000000..47057e3 --- /dev/null +++ b/src/v1/mod.rs @@ -0,0 +1,24 @@ +use std::io::{stdout, BufRead, BufReader}; + +mod parser; + +use parser::{Module, Statement, TokenCursor}; + +pub fn parse_file(file: &str) { + match Module::parse(&mut TokenCursor::from(file)) { + Err(err) => err.write_for(&mut stdout(), file).unwrap(), + Ok(module) => println!("{module:#?}"), + } +} + +pub fn run_stdin() { + for line in BufReader::new(std::io::stdin()).lines() { + let str = &line.expect("failed to read line"); + let mut cursor = TokenCursor::from(&str[..]); + let out = &mut stdout(); + match Statement::parse(&mut cursor) { + Ok(expr) => println!("{:?}", expr), + Err(err) => err.write_for(out, str).unwrap(), + } + } +} diff --git a/src/parser/body.rs b/src/v1/parser/body.rs similarity index 93% rename from src/parser/body.rs rename to src/v1/parser/body.rs index 5ca6878..06d9265 100644 --- a/src/parser/body.rs +++ b/src/v1/parser/body.rs @@ -1,11 +1,9 @@ use std::fmt::{Debug, Write}; -use crate::token::{Keyword, Symbol, Token}; +use super::token::{Keyword, Symbol, Token}; use crate::util::Padder; -use super::cursor::TokenCursor; -use super::error::{unexpected_token, ParserError}; -use super::Expr; +use super::{Expr, ParserError, TokenCursor}; pub struct Body { statements: Vec, @@ -59,7 +57,7 @@ impl Statement { } else if next.is_symbol(Symbol::CloseCurly) { Self::Return(expr) } else { - return unexpected_token(next, "a ';' or '}'"); + return Err(ParserError::unexpected_token(next, "a ';' or '}'")); } } }) diff --git a/src/v1/parser/cursor.rs b/src/v1/parser/cursor.rs new file mode 100644 index 0000000..2fe1ebc --- /dev/null +++ b/src/v1/parser/cursor.rs @@ -0,0 +1,61 @@ +use std::ops::{Deref, DerefMut}; + +use super::error::ParserError; +use super::token::{CharCursor, Keyword, Symbol, Token, TokenInstance}; + +pub struct TokenCursor<'a> { + cursor: CharCursor<'a>, + next: Option, +} + +impl<'a> TokenCursor<'a> { + pub fn next(&mut self) -> Option { + std::mem::replace(&mut self.next, TokenInstance::parse(&mut self.cursor)) + } + pub fn expect_next(&mut self) -> Result { + self.next().ok_or(ParserError::unexpected_end()) + } + pub fn expect_token(&mut self, t: Token) -> Result<(), ParserError> { + let next = self.expect_next()?; + if t == next.token { + Ok(()) + } else { + Err(ParserError::unexpected_token(&next, &format!("{t:?}"))) + } + } + pub fn expect_sym(&mut self, symbol: Symbol) -> Result<(), ParserError> { + self.expect_token(Token::Symbol(symbol)) + } + pub fn expect_kw(&mut self, kw: Keyword) -> Result<(), ParserError> { + self.expect_token(Token::Keyword(kw)) + } + pub fn peek(&self) -> Option<&TokenInstance> { + self.next.as_ref() + } + pub fn expect_peek(&mut self) -> Result<&TokenInstance, ParserError> { + self.peek().ok_or(ParserError::unexpected_end()) + } + pub fn expect_ident(&mut self) -> Result { + let i = self.expect_next()?; + let Token::Ident(n) = &i.token else { + return Err(ParserError::unexpected_token(&i, "an identifier")); + }; + Ok(n.to_string()) + } + pub fn chars(&mut self) -> &mut CharCursor<'a> { + &mut self.cursor + } +} + +impl<'a> From<&'a str> for TokenCursor<'a> { + fn from(string: &'a str) -> Self { + Self::from(CharCursor::from(string)) + } +} + +impl<'a> From> for TokenCursor<'a> { + fn from(mut cursor: CharCursor<'a>) -> Self { + let cur = TokenInstance::parse(&mut cursor); + Self { cursor, next: cur } + } +} diff --git a/src/v1/parser/error.rs b/src/v1/parser/error.rs new file mode 100644 index 0000000..a5709ac --- /dev/null +++ b/src/v1/parser/error.rs @@ -0,0 +1,50 @@ +use super::{token::{FileRegion, TokenInstance}, FilePos}; + +#[derive(Debug)] +pub struct ParserError { + pub msg: String, + pub regions: Vec, +} + +impl ParserError { + pub fn from_instances(instances: &[&TokenInstance], msg: String) -> Self { + ParserError { + msg, + regions: instances.iter().map(|i| i.region).collect(), + } + } + pub fn from_msg(msg: String) -> Self { + Self { + msg, + regions: Vec::new(), + } + } + pub fn at(pos: FilePos, msg: String) -> Self { + Self { + msg, + regions: vec![FileRegion { + start: pos, + end: pos, + }], + } + } + pub fn unexpected_end() -> Self { + Self::from_msg("unexpected end of input".to_string()) + } + pub fn unexpected_token(inst: &TokenInstance, expected: &str) -> Self { + let t = &inst.token; + ParserError::from_instances( + &[inst], + format!("Unexpected token {t:?}; expected {expected}"), + ) + } + pub fn write_for(&self, writer: &mut impl std::io::Write, file: &str) -> std::io::Result<()> { + let after = if self.regions.is_empty() { "" } else { ":" }; + writeln!(writer, "error: {}{}", self.msg, after)?; + for reg in &self.regions { + reg.write_for(writer, file)?; + } + Ok(()) + } +} + diff --git a/src/v1/parser/expr.rs b/src/v1/parser/expr.rs new file mode 100644 index 0000000..65384fa --- /dev/null +++ b/src/v1/parser/expr.rs @@ -0,0 +1,168 @@ +use std::fmt::{Debug, Write}; + +use super::token::{Symbol, Token}; +use super::{Body, Number, ParserError, TokenCursor, Val}; + +pub enum Expr { + Val(Val), + Ident(String), + BinaryOp(Operator, Box, Box), + Block(Body), + Call(Box, Vec), +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum Operator { + Add, + Sub, + Mul, + Div, + LessThan, + GreaterThan, + Access, +} + +impl Expr { + pub fn parse(cursor: &mut TokenCursor) -> Result { + let Some(next) = cursor.peek() else { + return Ok(Expr::Val(Val::Unit)); + }; + let mut e1 = if next.is_symbol(Symbol::OpenParen) { + cursor.next(); + let expr = Self::parse(cursor)?; + cursor.expect_sym(Symbol::CloseParen)?; + expr + } else if next.is_symbol(Symbol::OpenCurly) { + let expr = Body::parse(cursor)?; + Expr::Block(expr) + } else { + Self::parse_unit(cursor)? + }; + let Some(mut next) = cursor.peek() else { + return Ok(e1); + }; + while next.is_symbol(Symbol::OpenParen) { + cursor.next(); + let inner = Self::parse(cursor)?; + cursor.expect_sym(Symbol::CloseParen)?; + e1 = Self::Call(Box::new(e1), vec![inner]); + let Some(next2) = cursor.peek() else { + return Ok(e1); + }; + next = next2 + } + Ok(if let Some(op) = Operator::from_token(&next.token) { + cursor.next(); + let e2 = Self::parse(cursor)?; + if let Self::BinaryOp(op_next, e3, e4) = e2 { + if op.presedence() > op_next.presedence() { + Self::BinaryOp(op_next, Box::new(Self::BinaryOp(op, Box::new(e1), e3)), e4) + } else { + Self::BinaryOp(op, Box::new(e1), Box::new(Self::BinaryOp(op_next, e3, e4))) + } + } else { + Self::BinaryOp(op, Box::new(e1), Box::new(e2)) + } + } else { + e1 + }) + } + fn parse_unit(cursor: &mut TokenCursor) -> Result { + if let Some(val) = Val::parse(cursor)? { + return Ok(Self::Val(val)); + } + let inst = cursor.expect_next()?; + match &inst.token { + Token::Ident(name) => Ok(Self::Ident(name.to_string())), + _ => Err(ParserError::unexpected_token( + &inst, + "an identifier or value", + )), + } + } +} + +impl Operator { + pub fn presedence(&self) -> u32 { + match self { + Operator::LessThan => 0, + Operator::GreaterThan => 0, + Operator::Add => 1, + Operator::Sub => 2, + Operator::Mul => 3, + Operator::Div => 4, + Operator::Access => 5, + } + } + pub fn str(&self) -> &str { + match self { + Self::Add => "+", + Self::Sub => "-", + Self::Mul => "*", + Self::Div => "/", + Self::LessThan => "<", + Self::GreaterThan => ">", + Self::Access => ".", + } + } + pub fn from_token(token: &Token) -> Option { + let Token::Symbol(symbol) = token else { + return None; + }; + Some(match symbol { + Symbol::OpenAngle => Operator::LessThan, + Symbol::CloseAngle => Operator::GreaterThan, + Symbol::Plus => Operator::Add, + Symbol::Minus => Operator::Sub, + Symbol::Asterisk => Operator::Mul, + Symbol::Slash => Operator::Div, + Symbol::Dot => Operator::Access, + _ => { + return None; + } + }) + } + pub fn pad(&self) -> bool { + match self { + Operator::Add => true, + Operator::Sub => true, + Operator::Mul => true, + Operator::Div => true, + Operator::LessThan => true, + Operator::GreaterThan => true, + Operator::Access => false, + } + } +} + +impl Debug for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Expr::Val(c) => c.fmt(f)?, + Expr::Ident(n) => f.write_str(n)?, + Expr::Block(b) => b.fmt(f)?, + Expr::BinaryOp(op, e1, e2) => { + write!(f, "({:?}", *e1)?; + if op.pad() { + write!(f, " {} ", op.str())?; + } else { + write!(f, "{}", op.str())?; + } + write!(f, "{:?})", *e2)?; + } + Expr::Call(n, args) => { + n.fmt(f)?; + f.write_char('(')?; + if let Some(a) = args.first() { + a.fmt(f)?; + } + for arg in args.iter().skip(1) { + f.write_str(", ")?; + arg.fmt(f)?; + } + f.write_char(')')?; + } + } + Ok(()) + } +} diff --git a/src/parser/mod.rs b/src/v1/parser/mod.rs similarity index 91% rename from src/parser/mod.rs rename to src/v1/parser/mod.rs index 1befb39..76eda86 100644 --- a/src/parser/mod.rs +++ b/src/v1/parser/mod.rs @@ -1,14 +1,19 @@ -use crate::token::{Keyword, Symbol}; use std::fmt::Debug; mod body; mod cursor; mod error; mod expr; +mod token; +mod val; + pub use body::*; pub use cursor::*; -pub use expr::*; pub use error::*; +pub use expr::*; +pub use val::*; + +use token::*; #[derive(Debug)] pub struct Module { @@ -30,7 +35,7 @@ impl Module { if next.is_keyword(Keyword::Fn) { functions.push(Function::parse(cursor)?); } else { - return unexpected_token(cursor.next().unwrap(), "fn"); + return Err(ParserError::unexpected_token(next, "fn")); } } } diff --git a/src/v1/parser/token/cursor.rs b/src/v1/parser/token/cursor.rs new file mode 100644 index 0000000..0ff09cb --- /dev/null +++ b/src/v1/parser/token/cursor.rs @@ -0,0 +1,79 @@ +use std::{iter::Peekable, str::Chars}; + +use crate::v1::parser::ParserError; + +use super::FilePos; + +pub struct CharCursor<'a> { + chars: Peekable>, + pos: FilePos, + prev_pos: FilePos, +} + +impl CharCursor<'_> { + pub fn next(&mut self) -> Option { + let res = self.peek()?; + self.advance(); + Some(res) + } + pub fn expect(&mut self, c: char) -> Result<(), ParserError> { + let next = self.expect_next()?; + if next == c { + Ok(()) + } else { + Err(ParserError::at( + self.prev_pos, + format!("unexpected char '{next}'; expected '{c}'"), + )) + } + } + pub fn skip_whitespace(&mut self) { + while self.peek().is_some_and(|c| c.is_whitespace()) { + self.advance(); + } + } + pub fn peek(&mut self) -> Option { + self.chars.peek().copied() + } + pub fn advance(&mut self) { + let Some(next) = self.chars.next() else { + return; + }; + self.prev_pos = self.pos; + if next == '\n' { + self.pos.col = 0; + self.pos.line += 1; + } else { + self.pos.col += 1; + } + } + pub fn advance_if(&mut self, c: char) -> bool { + if let Some(c2) = self.peek() { + if c2 == c { + self.advance(); + return true; + } + } + false + } + pub fn expect_next(&mut self) -> Result { + self.next() + .ok_or(ParserError::from_msg("Unexpected end of input".to_string())) + } + pub fn pos(&self) -> FilePos { + self.pos + } + pub fn prev_pos(&self) -> FilePos { + self.prev_pos + } +} + +impl<'a> From<&'a str> for CharCursor<'a> { + fn from(value: &'a str) -> Self { + Self { + chars: value.chars().peekable(), + pos: FilePos::start(), + prev_pos: FilePos::start(), + } + } +} diff --git a/src/v1/parser/token/file.rs b/src/v1/parser/token/file.rs new file mode 100644 index 0000000..762af9f --- /dev/null +++ b/src/v1/parser/token/file.rs @@ -0,0 +1,65 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct FilePos { + pub line: usize, + pub col: usize, +} + +#[derive(Debug, Clone, Copy)] +pub struct FileRegion { + pub start: FilePos, + pub end: FilePos, +} + +impl FilePos { + pub fn start() -> Self { + Self { line: 0, col: 0 } + } +} + +const BEFORE: usize = 1; +const AFTER: usize = 1; + +impl FileRegion { + pub fn write_for(&self, writer: &mut impl std::io::Write, file: &str) -> std::io::Result<()> { + let start = self.start.line.saturating_sub(BEFORE); + let num_before = self.start.line - start; + let mut lines = file.lines().skip(start); + let width = format!("{}", self.end.line + AFTER).len(); + let same_line = self.start.line == self.end.line; + for i in 0..num_before { + writeln!(writer, "{:>width$} | {}", start + i, lines.next().unwrap())?; + } + let line = lines.next().unwrap(); + writeln!(writer, "{:>width$} | {}", self.start.line, line)?; + let len = if same_line { + self.end.col - self.start.col + 1 + } else { + line.len() - self.start.col + }; + writeln!( + writer, + "{} | {}", + " ".repeat(width), + " ".repeat(self.start.col) + &"^".repeat(len) + )?; + if !same_line { + for _ in 0..self.end.line - self.start.line - 1 { + lines.next(); + } + let line = lines.next().unwrap(); + writeln!(writer, "{:>width$} | {}", self.end.line, line)?; + writeln!( + writer, + "{} | {}", + " ".repeat(width), + "^".repeat(self.end.col + 1) + )?; + } + for i in 0..AFTER { + if let Some(next) = lines.next() { + writeln!(writer, "{:>width$} | {}", self.end.line + i + 1, next)?; + } + } + Ok(()) + } +} diff --git a/src/token/keyword.rs b/src/v1/parser/token/keyword.rs similarity index 63% rename from src/token/keyword.rs rename to src/v1/parser/token/keyword.rs index f22782c..ef49f21 100644 --- a/src/token/keyword.rs +++ b/src/v1/parser/token/keyword.rs @@ -16,4 +16,12 @@ impl Keyword { _ => return None, }) } + pub const fn str(&self) -> &str { + match self { + Keyword::Fn => "fn", + Keyword::Let => "let", + Keyword::If => "if", + Keyword::Return => "return", + } + } } diff --git a/src/v1/parser/token/mod.rs b/src/v1/parser/token/mod.rs new file mode 100644 index 0000000..8f03556 --- /dev/null +++ b/src/v1/parser/token/mod.rs @@ -0,0 +1,83 @@ +mod cursor; +mod file; +mod keyword; +mod symbol; + +pub use cursor::*; +pub use file::*; +pub use keyword::*; +pub use symbol::*; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Token { + Symbol(Symbol), + Ident(String), + Keyword(Keyword), +} + +#[derive(Debug)] +pub struct TokenInstance { + pub token: Token, + pub region: FileRegion, +} + +impl TokenInstance { + pub fn parse(cursor: &mut CharCursor) -> Option { + cursor.skip_whitespace(); + cursor.peek()?; + let start = cursor.pos(); + if let Some(s) = Symbol::parse(cursor) { + if s == Symbol::DoubleSlash { + while cursor.next() != Some('\n') {} + return Self::parse(cursor); + } + let end = cursor.prev_pos(); + return Some(Self { + token: Token::Symbol(s), + region: FileRegion { start, end }, + }); + } + let mut word = String::new(); + while let Some(c) = cursor.peek() { + if c.is_whitespace() || Symbol::from_char(c).is_some() { + break; + } + word.push(c); + cursor.advance(); + } + let end = cursor.prev_pos(); + let token = if let Some(keyword) = Keyword::from_string(&word) { + Token::Keyword(keyword) + } else { + Token::Ident(word) + }; + Some(Self { + token, + region: FileRegion { start, end }, + }) + } +} + +impl Token { + pub fn is_symbol(&self, symbol: Symbol) -> bool { + match self { + Token::Symbol(s) => *s == symbol, + _ => false, + } + } + pub fn is_keyword(&self, kw: Keyword) -> bool { + match self { + Token::Keyword(k) => *k == kw, + _ => false, + } + } +} + +impl TokenInstance { + pub fn is_keyword(&self, kw: Keyword) -> bool { + self.token.is_keyword(kw) + } + pub fn is_symbol(&self, symbol: Symbol) -> bool { + self.token.is_symbol(symbol) + } +} diff --git a/src/token/symbol.rs b/src/v1/parser/token/symbol.rs similarity index 57% rename from src/token/symbol.rs rename to src/v1/parser/token/symbol.rs index 0ac0e19..6565a47 100644 --- a/src/token/symbol.rs +++ b/src/v1/parser/token/symbol.rs @@ -15,6 +15,7 @@ pub enum Symbol { Minus, Asterisk, Slash, + DoubleSlash, Dot, OpenParen, CloseParen, @@ -24,11 +25,20 @@ pub enum Symbol { CloseSquare, OpenAngle, CloseAngle, + SingleQuote, + DoubleQuote, } impl Symbol { - pub fn from_start(c: char, stream: &mut CharCursor) -> Option> { - Some(Ok(match c { + pub fn parse(cursor: &mut CharCursor) -> Option { + Self::from_char(cursor.peek()?).map(|mut s| { + cursor.advance(); + s.finish(cursor); + s + }) + } + pub fn from_char(c: char) -> Option { + Some(match c { '(' => Self::OpenParen, ')' => Self::CloseParen, '[' => Self::OpenSquare, @@ -38,35 +48,43 @@ impl Symbol { '<' => Self::OpenAngle, '>' => Self::CloseAngle, ';' => Self::Semicolon, - ':' => { - if stream.advance_if(':') { - Self::DoubleColon - } else { - Self::Colon - } - } + ':' => Self::Colon, '+' => Self::Plus, - '-' => { - if stream.advance_if('>') { - Self::Arrow - } else { - Self::Minus - } - } + '-' => Self::Minus, '*' => Self::Asterisk, '/' => Self::Slash, - '=' => { - if stream.advance_if('=') { - Self::DoubleEquals - } else if stream.advance_if('>') { - Self::DoubleArrow - } else { - Self::Equals - } - } + '=' => Self::Equals, '.' => Self::Dot, + '\'' => Self::SingleQuote, + '"' => Self::DoubleQuote, _ => return None, - })) + }) + } + pub fn finish(&mut self, cursor: &mut CharCursor) { + let Some(next) = cursor.peek() else { + return; + }; + *self = match self { + Self::Colon => match next { + ':' => Self::DoubleColon, + _ => return, + }, + Self::Minus => match next { + '>' => Self::Arrow, + _ => return, + }, + Self::Equals => match next { + '=' => Self::DoubleEquals, + '>' => Self::DoubleArrow, + _ => return, + } + Self::Slash => match next { + '/' => Self::DoubleSlash, + _ => return, + } + _ => return, + }; + cursor.advance(); } pub fn str(&self) -> &str { match self { @@ -81,6 +99,7 @@ impl Symbol { Symbol::Minus => "-", Symbol::Asterisk => "*", Symbol::Slash => "/", + Symbol::DoubleSlash => "//", Symbol::Dot => ".", Symbol::OpenParen => "(", Symbol::CloseParen => ")", @@ -90,6 +109,9 @@ impl Symbol { Symbol::CloseSquare => "]", Symbol::OpenAngle => "<", Symbol::CloseAngle => ">", + Symbol::SingleQuote => "'", + Symbol::DoubleQuote => "\"", + } } } diff --git a/src/v1/parser/val.rs b/src/v1/parser/val.rs new file mode 100644 index 0000000..3a4a2f6 --- /dev/null +++ b/src/v1/parser/val.rs @@ -0,0 +1,110 @@ +use super::{CharCursor, ParserError, Symbol, Token, TokenCursor}; +use std::fmt::Debug; + +#[derive(PartialEq, Eq)] +pub enum Val { + String(String), + Char(char), + Number(Number), + Unit, +} + +#[derive(PartialEq, Eq)] +pub struct Number { + pub whole: String, + pub decimal: Option, + pub ty: Option, +} + +impl Val { + pub fn parse(cursor: &mut TokenCursor) -> Result, ParserError> { + let inst = cursor.expect_peek()?; + let mut res = match &inst.token { + Token::Symbol(Symbol::SingleQuote) => { + let chars = cursor.chars(); + let c = chars.expect_next()?; + chars.expect('\'')?; + Self::Char(c) + } + Token::Symbol(Symbol::DoubleQuote) => Self::String(string_from(cursor.chars())?), + Token::Ident(text) => { + let first = text.chars().next().unwrap(); + if first.is_ascii_digit() { + Self::Number(Number { + whole: text.to_string(), + decimal: None, + ty: None, + }) + } else { + return Ok(None); + } + } + _ => return Ok(None), + }; + cursor.next(); + if let Some(next) = cursor.peek() { + if let Self::Number(num) = &mut res { + if let Token::Symbol(Symbol::Dot) = next.token { + let chars = cursor.chars(); + if let Some(c) = chars.peek() { + if c.is_ascii_digit() { + cursor.next(); + let decimal = cursor.expect_ident()?; + num.decimal = Some(decimal); + } + } + } + } + } + Ok(Some(res)) + } +} +pub fn string_from(cursor: &mut CharCursor) -> Result { + let mut str = String::new(); + loop { + let c = cursor.expect_next()?; + if c == '"' { + return Ok(str); + } + str.push(match c { + '\\' => { + let next = cursor.expect_next()?; + match next { + '"' => '"', + '\'' => '\'', + 't' => '\t', + 'n' => '\n', + '0' => '\0', + _ => { + todo!(); + } + } + } + _ => c, + }) + } +} + +impl Debug for Val { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::String(str) => str.fmt(f), + Self::Char(c) => c.fmt(f), + Self::Number(n) => n.fmt(f), + Self::Unit => f.write_str("()"), + } + } +} + +impl Debug for Number { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.whole)?; + if let Some(d) = &self.decimal { + write!(f, ".{}", d)?; + } + if let Some(ty) = &self.ty { + write!(f, "T{}", ty)?; + } + Ok(()) + } +} diff --git a/src/v2/mod.rs b/src/v2/mod.rs new file mode 100644 index 0000000..790b956 --- /dev/null +++ b/src/v2/mod.rs @@ -0,0 +1,23 @@ +use std::{ffi::OsStr, io::{BufRead, BufReader}}; + +use parser::{print_error, CharCursor, Module, Statement}; + +mod parser; + +pub fn parse_file(file: &str) { + match Module::parse(&mut CharCursor::from(file)) { + Err(err) => print_error(err, file), + Ok(module) => println!("{module:#?}"), + } +} + +pub fn run_stdin() { + for line in BufReader::new(std::io::stdin()).lines() { + let str = &line.expect("failed to read line"); + let mut cursor = CharCursor::from(&str[..]); + match Statement::parse(&mut cursor) { + Ok(expr) => println!("{:?}", expr), + Err(err) => print_error(err, str), + } + } +} diff --git a/src/v2/parser/body.rs b/src/v2/parser/body.rs new file mode 100644 index 0000000..c01d05f --- /dev/null +++ b/src/v2/parser/body.rs @@ -0,0 +1,120 @@ +use std::collections::HashSet; +use std::fmt::{Debug, Write}; +use std::sync::LazyLock; + +use crate::util::Padder; + +use super::util::WHITESPACE_SET; +use super::CharCursor; +use super::Expr; +use super::ParserError; + +static NAME_END: LazyLock> = LazyLock::new(|| { + let mut set = WHITESPACE_SET.clone(); + set.extend(&['(']); + set +}); + +pub struct Body { + statements: Vec, +} + +pub enum Statement { + Let(String, Expr), + Return(Expr), + Expr(Expr), +} + +impl Body { + pub fn parse(cursor: &mut CharCursor) -> Result { + cursor.skip_whitespace(); + let mut statements = Vec::new(); + cursor.expect_char('{')?; + loop { + cursor.skip_whitespace(); + let next = cursor.expect_peek()?; + if next == '}' { + cursor.next(); + return Ok(Self { statements }); + } + statements.push(Statement::parse(cursor)?); + } + } +} + +impl Statement { + pub fn parse(cursor: &mut CharCursor) -> Result { + cursor.skip_whitespace(); + Ok(if cursor.advance_if_str("let", &WHITESPACE_SET) { + cursor.skip_whitespace(); + let name = cursor.until(&NAME_END); + if name.is_empty() { + return Err(ParserError::at( + cursor.pos(), + "Expected variable name".to_string(), + )); + } + cursor.skip_whitespace(); + cursor.expect_char('=')?; + let expr = Expr::parse(cursor)?; + cursor.skip_whitespace(); + cursor.expect_char(';')?; + Self::Let(name, expr) + } else if cursor.advance_if_str("return", &WHITESPACE_SET) { + let expr = Expr::parse(cursor)?; + cursor.skip_whitespace(); + cursor.expect_char(';')?; + Self::Return(expr) + } else { + let expr = Expr::parse(cursor)?; + match cursor.expect_peek()? { + ';' => { + cursor.next(); + Self::Expr(expr) + } + '}' => Self::Return(expr), + _ => { + cursor.next(); + return Err(ParserError::at( + cursor.prev_pos(), + "unexpected end of statement; expected a ';' or '}'".to_string(), + )); + } + } + }) + } +} + +impl Debug for Statement { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Statement::Let(n, e) => { + write!(f, "let {n} = {e:?};")?; + } + Statement::Return(e) => { + write!(f, "return {e:?};")?; + } + Statement::Expr(e) => { + write!(f, "{e:?};")?; + } + } + Ok(()) + } +} + +impl Debug for Body { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.statements.first().is_some() { + write!(f, "{{\n ")?; + let mut padder = Padder::new(f); + for s in &self.statements { + // they don't expose wrap_buf :grief: + write!(padder, "{s:?}\n")?; + } + write!(f, "}}")?; + } else { + write!(f, "{{}}")?; + } + Ok(()) + } +} diff --git a/src/v2/parser/cursor.rs b/src/v2/parser/cursor.rs new file mode 100644 index 0000000..2b636e1 --- /dev/null +++ b/src/v2/parser/cursor.rs @@ -0,0 +1,135 @@ +use std::{collections::HashSet, iter::Peekable, str::Chars}; + +use super::{error::ParserError, util::WHITESPACE_SET}; + +#[derive(Debug, Clone, Copy)] +pub struct FilePos { + pub line: usize, + pub col: usize, +} + +#[derive(Debug, Clone, Copy)] +pub struct FileRegion { + pub start: FilePos, + pub end: FilePos, +} + +pub struct CharCursor<'a> { + chars: Peekable>, + pos: FilePos, + prev_pos: FilePos, +} + +impl CharCursor<'_> { + pub fn until(&mut self, set: &HashSet) -> String { + let mut str = String::new(); + loop { + let Some(next) = self.peek() else { + return str; + }; + if set.contains(&next) { + return str; + } + str.push(next); + self.advance(); + } + } + pub fn skip_whitespace(&mut self) { + while self.peek().is_some_and(|c| c.is_whitespace()) { + self.advance(); + } + let mut copy = self.chars.clone(); + if let Some('/') = copy.next() { + if let Some('/') = copy.next() { + self.advance(); + self.advance(); + while self.next() != Some('\n') {} + self.skip_whitespace(); + } + } + } + pub fn next(&mut self) -> Option { + let res = self.peek()?; + self.advance(); + Some(res) + } + pub fn peek(&mut self) -> Option { + self.chars.peek().copied() + } + pub fn advance(&mut self) { + self.prev_pos = self.pos; + if self.peek().is_some_and(|c| c == '\n') { + self.pos.col = 0; + self.pos.line += 1; + } else { + self.pos.col += 1; + } + self.chars.next(); + } + pub fn advance_if(&mut self, c: char) -> bool { + if let Some(c2) = self.peek() { + if c2 == c { + self.advance(); + return true; + } + } + false + } + pub fn advance_if_str(&mut self, exp: &str, end: &HashSet) -> bool { + let mut new = self.chars.clone(); + for e in exp.chars() { + let Some(c) = new.next() else { + return false; + }; + if e != c { + return false; + } + } + if new.peek().is_some_and(|c| !end.contains(c)) { + return false; + } + for _ in 0..exp.len() { + self.advance(); + } + true + } + pub fn expect_char(&mut self, c: char) -> Result<(), ParserError> { + let next = self.expect_next()?; + if next == c { + Ok(()) + } else { + Err(ParserError::at( + self.prev_pos, + format!("unexpected char '{next}'; expected '{c}'"), + )) + } + } + pub fn expect_next(&mut self) -> Result { + self.next().ok_or(ParserError::unexpected_end()) + } + pub fn expect_peek(&mut self) -> Result { + self.peek().ok_or(ParserError::unexpected_end()) + } + pub fn pos(&self) -> FilePos { + self.pos + } + pub fn prev_pos(&self) -> FilePos { + self.prev_pos + } +} + +impl<'a> From<&'a str> for CharCursor<'a> { + fn from(value: &'a str) -> Self { + Self { + chars: value.chars().peekable(), + pos: FilePos::start(), + prev_pos: FilePos::start(), + } + } +} + +impl FilePos { + pub fn start() -> Self { + Self { line: 0, col: 0 } + } +} diff --git a/src/parser/error.rs b/src/v2/parser/error.rs similarity index 65% rename from src/parser/error.rs rename to src/v2/parser/error.rs index 89b1a8e..0dbd4b3 100644 --- a/src/parser/error.rs +++ b/src/v2/parser/error.rs @@ -1,4 +1,4 @@ -use crate::token::{FileRegion, TokenInstance}; +use super::{FilePos, FileRegion}; #[derive(Debug)] pub struct ParserError { @@ -7,37 +7,32 @@ pub struct ParserError { } impl ParserError { - pub fn from_instances(instances: &[&TokenInstance], msg: String) -> Self { - ParserError { - msg, - regions: instances.iter().map(|i| i.loc).collect(), - } - } pub fn from_msg(msg: String) -> Self { Self { msg, regions: Vec::new(), } } -} - -pub fn unexpected_token(inst: &TokenInstance, expected: &str) -> Result { - let t = &inst.token; - Err(ParserError::from_instances( - &[inst], - format!("Unexpected token {t:?}; expected {expected}"), - )) -} - -pub fn unexpected_end() -> ParserError { - ParserError::from_msg("Unexpected end of input".to_string()) + pub fn at(pos: FilePos, msg: String) -> Self { + Self { + msg, + regions: vec![FileRegion { + start: pos, + end: pos, + }], + } + } + pub fn unexpected_end() -> Self { + Self::from_msg("Unexpected end of input".to_string()) + } } const BEFORE: usize = 1; const AFTER: usize = 1; pub fn print_error(err: ParserError, file: &str) { - println!("error: {}:", err.msg); + let after = if err.regions.is_empty() {""} else {":"}; + println!("error: {}{}", err.msg, after); for reg in err.regions { print_region(file, reg); } diff --git a/src/v2/parser/expr.rs b/src/v2/parser/expr.rs new file mode 100644 index 0000000..8f6cb3c --- /dev/null +++ b/src/v2/parser/expr.rs @@ -0,0 +1,247 @@ +use super::{util::WHITESPACE_SET, Body, CharCursor, ParserError}; +use std::{collections::HashSet, fmt::Debug, sync::LazyLock}; + +static SYMBOLS: LazyLock> = LazyLock::new(|| { + let mut set = HashSet::new(); + for o in Operator::ALL { + for c in o.str().chars() { + set.insert(c); + } + } + set +}); + +static IDENT_END: LazyLock> = LazyLock::new(|| { + let mut set = WHITESPACE_SET.clone(); + let symbols = &SYMBOLS; + set.extend(symbols.iter().chain(&[';', '(', ')'])); + set +}); + +#[derive(Debug)] +pub enum Val { + String(String), + Number(String), + Unit, +} + +pub enum Expr { + Block(Body), + Val(Val), + Ident(String), + BinaryOp(Operator, Box, Box), + Call(Box, Vec), +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Operator { + Add, + Sub, + Mul, + Div, + LessThan, + GreaterThan, + Offset, +} + +impl Expr { + pub fn parse(cursor: &mut CharCursor) -> Result { + cursor.skip_whitespace(); + let Some(next) = cursor.peek() else { + return Ok(Self::Val(Val::Unit)); + }; + let mut e1 = match next { + '(' => { + cursor.advance(); + let expr = Self::parse(cursor)?; + cursor.skip_whitespace(); + cursor.expect_char(')')?; + expr + } + '{' => { + Self::Block(Body::parse(cursor)?) + } + _ => { + if let Some(val) = Val::parse_nonunit(cursor)? { + Self::Val(val) + } else { + let name = cursor.until(&IDENT_END); + Self::Ident(name) + } + } + }; + cursor.skip_whitespace(); + let Some(mut next) = cursor.peek() else { + return Ok(e1); + }; + while next == '(' { + cursor.advance(); + let inner = Self::parse(cursor)?; + cursor.skip_whitespace(); + cursor.expect_char(')')?; + e1 = Self::Call(Box::new(e1), vec![inner]); + let Some(next2) = cursor.peek() else { + return Ok(e1); + }; + next = next2 + } + if let Some(op) = Operator::parse(cursor) { + let e2 = Self::parse(cursor)?; + return Ok(if let Self::BinaryOp(op_next, e2, e3) = e2 { + if op.presedence() > op_next.presedence() { + Self::BinaryOp(op_next, Box::new(Self::BinaryOp(op, Box::new(e1), e2)), e3) + } else { + Self::BinaryOp(op, Box::new(e1), Box::new(Self::BinaryOp(op_next, e2, e3))) + } + } else { + Self::BinaryOp(op, Box::new(e1), Box::new(e2)) + }); + }; + Ok(e1) + } +} + +impl Val { + pub fn parse_nonunit(cursor: &mut CharCursor) -> Result, ParserError> { + let Some(next) = cursor.peek() else { + return Ok(None); + }; + Ok(Some(match next { + '"' => { + cursor.advance(); + let mut str = String::new(); + loop { + let mut next = cursor.expect_next()?; + if next == '"' { + break; + } + if next == '\\' { + next = match cursor.expect_next()? { + '"' => '"', + c => { + return Err(ParserError::at( + cursor.pos(), + format!("unexpected escape char '{c}'"), + )) + } + } + } + str.push(next); + } + Self::String(str) + } + '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => { + let mut str = String::new(); + loop { + let Some(next) = cursor.peek() else { + break; + }; + match next { + '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => { + str.push(next); + } + _ => break, + } + cursor.advance(); + } + Self::Number(str) + } + _ => { + return Ok(None); + } + })) + } +} + +impl Operator { + const ALL: [Self; 7] = [ + Self::Add, + Self::Sub, + Self::Mul, + Self::Div, + Self::Offset, + Self::GreaterThan, + Self::LessThan, + ]; + pub fn presedence(&self) -> u32 { + match self { + Operator::LessThan => 0, + Operator::GreaterThan => 0, + Operator::Add => 1, + Operator::Sub => 2, + Operator::Mul => 3, + Operator::Div => 4, + Operator::Offset => 5, + } + } + pub fn str(&self) -> &str { + match self { + Self::Add => "+", + Self::Sub => "-", + Self::Mul => "*", + Self::Div => "/", + Self::LessThan => "<", + Self::GreaterThan => ">", + Self::Offset => ".", + } + } + pub fn parse(cursor: &mut CharCursor) -> Option { + let res = match cursor.peek()? { + '+' => Operator::Add, + '-' => Operator::Sub, + '*' => Operator::Mul, + '/' => Operator::Div, + '.' => Operator::Offset, + _ => return None, + }; + for _ in 0..res.str().len() { + cursor.advance(); + } + Some(res) + } + pub fn pad(&self) -> bool { + match self { + Operator::Add => true, + Operator::Sub => true, + Operator::Mul => true, + Operator::Div => true, + Operator::LessThan => true, + Operator::GreaterThan => true, + Operator::Offset => false, + } + } +} + +impl Debug for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Expr::Block(b) => write!(f, "{:?}", b)?, + Expr::Ident(n) => f.write_str(n)?, + Expr::BinaryOp(op, e1, e2) => { + write!(f, "({:?}", *e1)?; + if op.pad() { + write!(f, " {} ", op.str())?; + } else { + write!(f, "{}", op.str())?; + } + write!(f, "{:?})", *e2)?; + } + Expr::Call(n, args) => { + n.fmt(f)?; + write!(f, "(")?; + if let Some(a) = args.first() { + a.fmt(f)?; + } + for arg in args.iter().skip(1) { + write!(f, ", ")?; + arg.fmt(f)?; + } + write!(f, ")")?; + } + Expr::Val(v) => { + write!(f, "{:?}", v)?; + } + } + Ok(()) + } +} diff --git a/src/v2/parser/mod.rs b/src/v2/parser/mod.rs new file mode 100644 index 0000000..9dafc2c --- /dev/null +++ b/src/v2/parser/mod.rs @@ -0,0 +1,70 @@ +use std::{collections::HashSet, fmt::Debug, sync::LazyLock}; + +mod body; +mod cursor; +mod error; +mod expr; +mod util; + +pub use body::*; +pub use cursor::*; +pub use error::*; +pub use expr::*; +use util::WHITESPACE_SET; + +#[derive(Debug)] +pub struct Module { + functions: Vec, +} + +pub struct Function { + pub name: String, + pub body: Body, +} + +static NAME_END: LazyLock> = LazyLock::new(|| { + let mut set = WHITESPACE_SET.clone(); + set.extend(&['(']); + set +}); + +impl Module { + pub fn parse(cursor: &mut CharCursor) -> Result { + let mut functions = Vec::new(); + loop { + let next = cursor.until(&WHITESPACE_SET); + if next.is_empty() { + return Ok(Self { functions }); + } + if next == "fn" { + functions.push(Function::parse(cursor)?); + } else { + return Err(ParserError::at(cursor.pos(), "expected fn".to_string())); + } + } + } +} + +impl Function { + pub fn parse(cursor: &mut CharCursor) -> Result { + cursor.skip_whitespace(); + let name = cursor.until(&NAME_END); + if name.is_empty() { + return Err(ParserError::at(cursor.pos(), "expected function name".to_string())); + } + cursor.expect_char('(')?; + cursor.expect_char(')')?; + let body = Body::parse(cursor)?; + Ok(Self { name, body }) + } +} + +impl Debug for Function { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("fn ")?; + f.write_str(&self.name)?; + f.write_str("() ")?; + self.body.fmt(f)?; + Ok(()) + } +} diff --git a/src/v2/parser/util.rs b/src/v2/parser/util.rs new file mode 100644 index 0000000..200a568 --- /dev/null +++ b/src/v2/parser/util.rs @@ -0,0 +1,10 @@ +use std::{collections::HashSet, sync::LazyLock}; + +pub const WHITESPACE: [char; 25] = [ + '\u{0009}', '\u{000A}', '\u{000B}', '\u{000C}', '\u{000D}', '\u{0020}', '\u{0085}', '\u{00A0}', + '\u{1680}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}', '\u{2006}', + '\u{2007}', '\u{2008}', '\u{2009}', '\u{200A}', '\u{2028}', '\u{2029}', '\u{202F}', '\u{205F}', + '\u{3000}', +]; + +pub static WHITESPACE_SET: LazyLock> = LazyLock::new(|| HashSet::from_iter(WHITESPACE)); diff --git a/test.lang b/test.lang index 02a084d..43c4940 100644 --- a/test.lang +++ b/test.lang @@ -2,10 +2,10 @@ fn main() { // let x = 3; let y = 4 + 4 + 5; let z = 1 * 2 - 3 / test * 4; - let r = 1 - 2 + 3; + let r = 1-2.5 + 3; let w = 1 * (2 - 3) / "test" - 7; - asntei - let a = test(3); + let a = test('3'); + let c = '3' ; test(5); return 5 +