From 148ad00c83084193fb07f0621a3332b3edd965b3 Mon Sep 17 00:00:00 2001 From: shadow cat Date: Sat, 5 Oct 2024 11:09:10 -0400 Subject: [PATCH] initial commit --- .gitignore | 1 + Cargo.lock | 7 ++ Cargo.toml | 8 ++ src/main.rs | 39 ++++++++ src/parser/body.rs | 108 ++++++++++++++++++++ src/parser/cursor.rs | 55 +++++++++++ src/parser/error.rs | 65 ++++++++++++ src/parser/expr.rs | 228 +++++++++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 58 +++++++++++ src/token/cursor.rs | 78 +++++++++++++++ src/token/keyword.rs | 19 ++++ src/token/mod.rs | 113 +++++++++++++++++++++ src/token/string.rs | 50 ++++++++++ src/token/symbol.rs | 101 +++++++++++++++++++ src/util/mod.rs | 38 ++++++++ test.lang | 19 ++++ 16 files changed, 987 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/main.rs create mode 100644 src/parser/body.rs create mode 100644 src/parser/cursor.rs create mode 100644 src/parser/error.rs create mode 100644 src/parser/expr.rs create mode 100644 src/parser/mod.rs create mode 100644 src/token/cursor.rs create mode 100644 src/token/keyword.rs create mode 100644 src/token/mod.rs create mode 100644 src/token/string.rs create mode 100644 src/token/symbol.rs create mode 100644 src/util/mod.rs create mode 100644 test.lang diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..afaae83 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "lang" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..7541543 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "lang" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..f49a6ea --- /dev/null +++ b/src/main.rs @@ -0,0 +1,39 @@ +use std::{ + ffi::OsStr, + io::{BufRead, BufReader}, +}; + +mod parser; +mod token; +mod util; + +use parser::{print_error, Expr, Module, TokenCursor}; + +fn main() { + let arg = std::env::args_os().nth(1); + if let Some(file) = arg { + run_file(&file); + } else { + run_stdin(); + } +} + +fn run_file(path: &OsStr) { + let file = std::fs::read_to_string(path).expect("failed to read file"); + let tokens = token::parse(&file).unwrap(); + match Module::parse(&mut TokenCursor::from(tokens.as_slice())) { + Err(err) => print_error(err, &file), + Ok(module) => println!("{module:#?}"), + } +} + +fn run_stdin() { + for line in BufReader::new(std::io::stdin()).lines() { + let str = &line.expect("failed to read line"); + let tokens = token::parse(str).unwrap(); + println!( + "{:?}", + Expr::parse(&mut TokenCursor::from(tokens.as_slice())) + ); + } +} diff --git a/src/parser/body.rs b/src/parser/body.rs new file mode 100644 index 0000000..5ca6878 --- /dev/null +++ b/src/parser/body.rs @@ -0,0 +1,108 @@ +use std::fmt::{Debug, Write}; + +use crate::token::{Keyword, Symbol, Token}; +use crate::util::Padder; + +use super::cursor::TokenCursor; +use super::error::{unexpected_token, ParserError}; +use super::Expr; + +pub struct Body { + statements: Vec, +} + +pub enum Statement { + Let(String, Expr), + Return(Expr), + Expr(Expr), +} + +impl Body { + pub fn parse(cursor: &mut TokenCursor) -> Result { + let mut statements = Vec::new(); + cursor.expect_sym(Symbol::OpenCurly)?; + loop { + let next = cursor.expect_peek()?; + if next.is_symbol(Symbol::CloseCurly) { + cursor.next(); + return Ok(Self { statements }); + } + statements.push(Statement::parse(cursor)?); + } + } +} + +impl Statement { + pub fn parse(cursor: &mut TokenCursor) -> Result { + let next = cursor.expect_peek()?; + Ok(match next.token { + Token::Keyword(Keyword::Let) => { + cursor.next(); + let name = cursor.expect_ident()?; + cursor.expect_sym(Symbol::Equals)?; + let expr = Expr::parse(cursor)?; + cursor.expect_sym(Symbol::Semicolon)?; + Self::Let(name, expr) + } + Token::Keyword(Keyword::Return) => { + cursor.next(); + let expr = Expr::parse(cursor)?; + cursor.expect_sym(Symbol::Semicolon)?; + Self::Return(expr) + } + _ => { + let expr = Expr::parse(cursor)?; + let next = cursor.expect_peek()?; + if next.is_symbol(Symbol::Semicolon) { + cursor.next(); + Self::Expr(expr) + } else if next.is_symbol(Symbol::CloseCurly) { + Self::Return(expr) + } else { + return unexpected_token(next, "a ';' or '}'"); + } + } + }) + } +} + +impl Debug for Statement { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Statement::Let(n, e) => { + f.write_str("let ")?; + f.write_str(n)?; + f.write_str(" = ")?; + e.fmt(f)?; + f.write_char(';')?; + } + Statement::Return(e) => { + f.write_str("return ")?; + e.fmt(f)?; + f.write_char(';')?; + } + Statement::Expr(e) => { + e.fmt(f)?; + f.write_char(';')?; + } + } + Ok(()) + } +} + +impl Debug for Body { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.statements.first().is_some() { + f.write_str("{\n ")?; + let mut padder = Padder::new(f); + for s in &self.statements { + // they don't expose wrap_buf :grief: + padder.write_str(&format!("{s:?}\n"))?; + } + f.write_char('}')?; + } else { + f.write_str("{}")?; + } + Ok(()) + } +} diff --git a/src/parser/cursor.rs b/src/parser/cursor.rs new file mode 100644 index 0000000..3bbda8c --- /dev/null +++ b/src/parser/cursor.rs @@ -0,0 +1,55 @@ +use crate::token::{Keyword, Symbol, Token, TokenInstance}; + +use super::error::{unexpected_end, unexpected_token, ParserError}; + +pub struct TokenCursor<'a> { + tokens: &'a [TokenInstance], + pos: usize, +} + +impl TokenCursor<'_> { + pub fn next(&mut self) -> Option<&TokenInstance> { + let res = self.tokens.get(self.pos); + self.pos += 1; + res + } + pub fn expect_next(&mut self) -> Result<&TokenInstance, ParserError> { + self.next().ok_or(unexpected_end()) + } + pub fn expect_token(&mut self, t: Token) -> Result<(), ParserError> { + let next = self.expect_next()?; + if t == next.token { + Ok(()) + } else { + unexpected_token(next, &format!("{t:?}")) + } + } + pub fn expect_sym(&mut self, symbol: Symbol) -> Result<(), ParserError> { + self.expect_token(Token::Symbol(symbol)) + } + pub fn expect_kw(&mut self, kw: Keyword) -> Result<(), ParserError> { + self.expect_token(Token::Keyword(kw)) + } + pub fn peek(&self) -> Option<&TokenInstance> { + self.tokens.get(self.pos) + } + pub fn expect_peek(&mut self) -> Result<&TokenInstance, ParserError> { + self.peek().ok_or(unexpected_end()) + } + pub fn expect_ident(&mut self) -> Result { + let i = self.expect_next()?; + let Token::Ident(n) = &i.token else { + return unexpected_token(i, "an identifier"); + }; + Ok(n.to_string()) + } +} + +impl<'a> From<&'a [TokenInstance]> for TokenCursor<'a> { + fn from(tokens: &'a [TokenInstance]) -> Self { + Self { + tokens, + pos: 0, + } + } +} diff --git a/src/parser/error.rs b/src/parser/error.rs new file mode 100644 index 0000000..89b1a8e --- /dev/null +++ b/src/parser/error.rs @@ -0,0 +1,65 @@ +use crate::token::{FileRegion, TokenInstance}; + +#[derive(Debug)] +pub struct ParserError { + pub msg: String, + pub regions: Vec, +} + +impl ParserError { + pub fn from_instances(instances: &[&TokenInstance], msg: String) -> Self { + ParserError { + msg, + regions: instances.iter().map(|i| i.loc).collect(), + } + } + pub fn from_msg(msg: String) -> Self { + Self { + msg, + regions: Vec::new(), + } + } +} + +pub fn unexpected_token(inst: &TokenInstance, expected: &str) -> Result { + let t = &inst.token; + Err(ParserError::from_instances( + &[inst], + format!("Unexpected token {t:?}; expected {expected}"), + )) +} + +pub fn unexpected_end() -> ParserError { + ParserError::from_msg("Unexpected end of input".to_string()) +} + +const BEFORE: usize = 1; +const AFTER: usize = 1; + +pub fn print_error(err: ParserError, file: &str) { + println!("error: {}:", err.msg); + for reg in err.regions { + print_region(file, reg); + } +} + +pub fn print_region(file: &str, reg: FileRegion) { + let start = reg.start.line.saturating_sub(BEFORE); + let num_before = reg.start.line - start; + let mut lines = file.lines().skip(start); + let len = reg.end.col - reg.start.col + 1; + let width = format!("{}", reg.end.line + AFTER).len(); + for i in 0..num_before + 1 { + println!("{:>width$} | {}", start + i, lines.next().unwrap()); + } + println!( + "{} | {}", + " ".repeat(width), + " ".repeat(reg.start.col) + &"^".repeat(len) + ); + for i in 0..AFTER { + if let Some(next) = lines.next() { + println!("{:>width$} | {}", reg.end.line + i + 1, next); + } + } +} diff --git a/src/parser/expr.rs b/src/parser/expr.rs new file mode 100644 index 0000000..029645d --- /dev/null +++ b/src/parser/expr.rs @@ -0,0 +1,228 @@ +use std::fmt::{Debug, Write}; + +use super::{ + cursor::TokenCursor, + error::{unexpected_token, ParserError}, + Body, +}; +use crate::token::{StringType, Symbol, Token, TokenInstance}; + +pub enum Expr { + Const(ConstVal), + Ident(String), + Op(Operator, Vec), + Block(Body), + Call(Box, Vec), +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Operator { + Add, + Sub, + Mul, + Div, + LessThan, + GreaterThan, + Offset, +} + +#[derive(PartialEq, Eq)] +pub enum ConstVal { + String(String), + Char(char), + Number(String), + Unit, +} + +impl Expr { + pub fn parse(cursor: &mut TokenCursor) -> Result { + let Some(next) = cursor.peek() else { + return Ok(Expr::Const(ConstVal::Unit)); + }; + let mut cur = if next.is_symbol(Symbol::OpenParen) { + cursor.next(); + let expr = Self::parse(cursor)?; + cursor.expect_sym(Symbol::CloseParen)?; + expr + } else if next.is_symbol(Symbol::OpenCurly) { + let expr = Body::parse(cursor)?; + Expr::Block(expr) + } else { + let unit = Self::parse_unit(next)?; + cursor.next(); + unit + }; + let Some(mut next) = cursor.peek() else { + return Ok(cur); + }; + while next.is_symbol(Symbol::OpenParen) { + cursor.next(); + let inner = Self::parse(cursor)?; + cursor.expect_sym(Symbol::CloseParen)?; + cur = Self::Call(Box::new(cur), vec![inner]); + let Some(next2) = cursor.peek() else { + return Ok(cur); + }; + next = next2 + } + if let Some(op) = Operator::from_token(&next.token) { + cursor.next(); + let next = Self::parse(cursor)?; + let mut vals = vec![cur]; + if let Self::Op(op_next, mut vs) = next { + if op == op_next { + vals.extend(vs); + } else if op.presedence() > op_next.presedence() { + vals.push(vs.remove(0)); + if vs.len() == 1 { + return Ok(Self::Op( + op_next, + vec![Self::Op(op, vals), vs.pop().unwrap()], + )); + } else { + vals.push(Self::Op(op_next, vs)); + } + } else { + vals.push(Self::Op(op_next, vs)); + } + } else { + vals.push(next); + } + return Ok(Self::Op(op, vals)); + }; + match next.token { + Token::Symbol(Symbol::Semicolon | Symbol::CloseParen | Symbol::CloseCurly) => Ok(cur), + _ => unexpected_token(next, "an operator or ending"), + } + } + fn parse_unit(inst: &TokenInstance) -> Result { + match &inst.token { + Token::String(ty, s) => { + Self::parse_str(*ty, s).map_err(|e| ParserError::from_instances(&[inst], e)) + } + Token::Ident(name) => Ok(Self::parse_ident(name.to_string())), + _ => unexpected_token(inst, "a string or a name"), + } + } + fn parse_str(ty: StringType, s: &str) -> Result { + match ty { + StringType::DoubleQuote => Ok(Self::Const(ConstVal::String(s.to_string()))), + StringType::SingleQuote => { + if s.len() == 1 { + Ok(Self::Const(ConstVal::Char(s.chars().next().unwrap()))) + } else { + Err("Characters must only have one char".to_string()) + } + } + } + } + fn parse_ident(str: String) -> Self { + match str.chars().next().unwrap() { + '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '.' => { + Self::Const(ConstVal::Number(str)) + } + _ => Self::Ident(str), + } + } +} + +impl Operator { + pub fn presedence(&self) -> u32 { + match self { + Operator::LessThan => 0, + Operator::GreaterThan => 0, + Operator::Add => 1, + Operator::Sub => 2, + Operator::Mul => 3, + Operator::Div => 4, + Operator::Offset => 5, + } + } + pub fn str(&self) -> &str { + match self { + Self::Add => "+", + Self::Sub => "-", + Self::Mul => "*", + Self::Div => "/", + Self::LessThan => "<", + Self::GreaterThan => ">", + Self::Offset => ".", + } + } + pub fn from_token(token: &Token) -> Option { + let Token::Symbol(symbol) = token else { + return None; + }; + Some(match symbol { + Symbol::OpenAngle => Operator::LessThan, + Symbol::CloseAngle => Operator::GreaterThan, + Symbol::Plus => Operator::Add, + Symbol::Minus => Operator::Sub, + Symbol::Asterisk => Operator::Mul, + Symbol::Slash => Operator::Div, + Symbol::Dot => Operator::Offset, + _ => { + return None; + } + }) + } + pub fn pad(&self) -> bool { + match self { + Operator::Add => true, + Operator::Sub => true, + Operator::Mul => true, + Operator::Div => true, + Operator::LessThan => true, + Operator::GreaterThan => true, + Operator::Offset => false, + } + } +} + +impl Debug for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Expr::Const(c) => c.fmt(f), + Expr::Ident(n) => f.write_str(n), + Expr::Block(b) => b.fmt(f), + Expr::Op(op, exprs) => { + f.write_char('(')?; + exprs[0].fmt(f)?; + for expr in exprs.iter().skip(1) { + if op.pad() { + write!(f, " {} ", op.str())?; + } else { + f.write_str(op.str())?; + } + expr.fmt(f)?; + } + f.write_char(')')?; + Ok(()) + } + Expr::Call(n, args) => { + n.fmt(f)?; + f.write_char('(')?; + if let Some(a) = args.first() { + a.fmt(f)?; + } + for arg in args.iter().skip(1) { + f.write_str(", ")?; + arg.fmt(f)?; + } + f.write_char(')')?; + Ok(()) + } + } + } +} + +impl Debug for ConstVal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::String(str) => str.fmt(f), + Self::Char(c) => c.fmt(f), + Self::Number(str) => f.write_str(str), + Self::Unit => f.write_str("()"), + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..1befb39 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,58 @@ +use crate::token::{Keyword, Symbol}; +use std::fmt::Debug; + +mod body; +mod cursor; +mod error; +mod expr; +pub use body::*; +pub use cursor::*; +pub use expr::*; +pub use error::*; + +#[derive(Debug)] +pub struct Module { + functions: Vec, +} + +pub struct Function { + pub name: String, + pub body: Body, +} + +impl Module { + pub fn parse(cursor: &mut TokenCursor) -> Result { + let mut functions = Vec::new(); + loop { + let Some(next) = cursor.peek() else { + return Ok(Self { functions }); + }; + if next.is_keyword(Keyword::Fn) { + functions.push(Function::parse(cursor)?); + } else { + return unexpected_token(cursor.next().unwrap(), "fn"); + } + } + } +} + +impl Function { + pub fn parse(cursor: &mut TokenCursor) -> Result { + cursor.expect_kw(Keyword::Fn)?; + let name = cursor.expect_ident()?; + cursor.expect_sym(Symbol::OpenParen)?; + cursor.expect_sym(Symbol::CloseParen)?; + let body = Body::parse(cursor)?; + Ok(Self { name, body }) + } +} + +impl Debug for Function { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("fn ")?; + f.write_str(&self.name)?; + f.write_str("() ")?; + self.body.fmt(f)?; + Ok(()) + } +} diff --git a/src/token/cursor.rs b/src/token/cursor.rs new file mode 100644 index 0000000..04fa1d9 --- /dev/null +++ b/src/token/cursor.rs @@ -0,0 +1,78 @@ +#[derive(Debug, Clone, Copy)] +pub struct FilePos { + pub line: usize, + pub col: usize, +} + +pub struct CharCursor<'a> { + chars: &'a [u8], + i: usize, + pos: FilePos, + prev_pos: FilePos, +} + +// TODO: support unicode +impl CharCursor<'_> { + pub fn next(&mut self) -> Option { + let res = self.get(self.i)?; + self.mov(); + Some(res) + } + pub fn next_with_pos(&mut self) -> Option<(FilePos, char)> { + let res = self.get(self.i)?; + let pos = self.pos; + self.mov(); + Some((pos, res)) + } + pub fn peek(&mut self) -> Option { + self.get(self.i) + } + fn mov(&mut self) { + self.prev_pos = self.pos; + if self.chars[self.i] == b'\n' { + self.pos.col = 0; + self.pos.line += 1; + } else { + self.pos.col += 1; + } + self.i += 1; + } + pub fn advance_if(&mut self, c: char) -> bool { + if let Some(c2) = self.get(self.i) { + if c2 == c { + self.mov(); + return true; + } + } + false + } + pub fn expect_next(&mut self) -> Result { + self.next().ok_or("Unexpected end of input".to_string()) + } + pub fn get(&self, i: usize) -> Option { + self.chars.get(i).map(|b| *b as char) + } + pub fn pos(&self) -> FilePos { + self.pos + } + pub fn prev_pos(&self) -> FilePos { + self.prev_pos + } +} + +impl<'a> From<&'a str> for CharCursor<'a> { + fn from(value: &'a str) -> Self { + Self { + chars: value.as_bytes(), + i: 0, + pos: FilePos::start(), + prev_pos: FilePos::start(), + } + } +} + +impl FilePos { + pub fn start() -> Self { + Self { line: 0, col: 0 } + } +} diff --git a/src/token/keyword.rs b/src/token/keyword.rs new file mode 100644 index 0000000..f22782c --- /dev/null +++ b/src/token/keyword.rs @@ -0,0 +1,19 @@ +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum Keyword { + Fn, + Let, + If, + Return, +} + +impl Keyword { + pub fn from_string(str: &str) -> Option { + Some(match str { + "fn" => Self::Fn, + "let" => Self::Let, + "if" => Self::If, + "return" => Self::Return, + _ => return None, + }) + } +} diff --git a/src/token/mod.rs b/src/token/mod.rs new file mode 100644 index 0000000..80c9f86 --- /dev/null +++ b/src/token/mod.rs @@ -0,0 +1,113 @@ +mod cursor; +mod keyword; +mod string; +mod symbol; + +use cursor::*; +pub use keyword::*; +pub use string::*; +pub use symbol::*; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Token { + String(StringType, String), + Symbol(Symbol), + Ident(String), + Keyword(Keyword), +} + +#[derive(Debug, Clone, Copy)] +pub struct FileRegion { + pub start: FilePos, + pub end: FilePos, +} + +#[derive(Debug)] +pub struct TokenInstance { + pub token: Token, + pub loc: FileRegion, +} + +pub fn parse(str: &str) -> Result, String> { + let mut tokens = Vec::new(); + let mut word = String::new(); + let mut word_start = FilePos::start(); + let mut word_end = FilePos::start(); + let mut cursor = CharCursor::from(str); + while let Some((start, c)) = cursor.next_with_pos() { + if c == '/' && cursor.advance_if('/') { + while cursor.peek() != Some('\n') { + cursor.next(); + } + continue; + } + let add = if c.is_whitespace() { + None + } else if let Some(lit) = StringType::from_start(c) { + let str = lit.parse(&mut cursor)?; + let end = cursor.prev_pos(); + Some(TokenInstance { + token: Token::String(lit, str), + loc: FileRegion { start, end }, + }) + } else if let Some(symbol) = Symbol::from_start(c, &mut cursor) { + let end = cursor.prev_pos(); + Some(TokenInstance { + token: Token::Symbol(symbol?), + loc: FileRegion { start, end }, + }) + } else { + word.push(c); + word_end = start; + continue; + }; + if !word.is_empty() { + tokens.push(TokenInstance { + token: Token::from_string(&word), + loc: FileRegion { start: word_start, end: word_end }, + }); + word.clear(); + } + word_start = cursor.pos(); + if let Some(token) = add { + tokens.push(token); + } + } + if !word.is_empty() { + tokens.push(TokenInstance { + token: Token::from_string(&word), + loc: FileRegion { start: word_start, end: word_end }, + }); + } + Ok(tokens) +} + +impl Token { + fn from_string(str: &str) -> Self { + match Keyword::from_string(str) { + Some(k) => Self::Keyword(k), + None => Self::Ident(str.to_string()), + } + } + pub fn is_symbol(&self, symbol: Symbol) -> bool { + match self { + Token::Symbol(s) => *s == symbol, + _ => false, + } + } + pub fn is_keyword(&self, kw: Keyword) -> bool { + match self { + Token::Keyword(k) => *k == kw, + _ => false, + } + } +} + +impl TokenInstance { + pub fn is_keyword(&self, kw: Keyword) -> bool { + self.token.is_keyword(kw) + } + pub fn is_symbol(&self, symbol: Symbol) -> bool { + self.token.is_symbol(symbol) + } +} diff --git a/src/token/string.rs b/src/token/string.rs new file mode 100644 index 0000000..70c370c --- /dev/null +++ b/src/token/string.rs @@ -0,0 +1,50 @@ +use super::CharCursor; + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum StringType { + DoubleQuote, + SingleQuote, +} + +impl StringType { + pub fn from_start(c: char) -> Option { + Some(match c { + '"' => Self::DoubleQuote, + '\'' => Self::SingleQuote, + _ => return None, + }) + } + pub fn end(&self) -> char { + match self { + StringType::DoubleQuote => '"', + StringType::SingleQuote => '\'', + } + } + pub fn parse( + &self, + stream: &mut CharCursor, + ) -> Result { + let end = self.end(); + let mut str = String::new(); + loop { + let c = stream.expect_next()?; + if c == end { + return Ok(str); + } + str.push(match c { + '\\' => { + let next = stream.expect_next()?; + match next { + '"' => '"', + '\'' => '\'', + 't' => '\t', + 'n' => '\n', + '0' => '\0', + c => return Err(format!("Unknown escape character {c}")), + } + } + _ => c, + }) + } + } +} diff --git a/src/token/symbol.rs b/src/token/symbol.rs new file mode 100644 index 0000000..0ac0e19 --- /dev/null +++ b/src/token/symbol.rs @@ -0,0 +1,101 @@ +use std::fmt::Debug; + +use super::CharCursor; + +#[derive(PartialEq, Eq, Clone, Copy)] +pub enum Symbol { + Semicolon, + Colon, + DoubleColon, + Equals, + DoubleEquals, + Arrow, + DoubleArrow, + Plus, + Minus, + Asterisk, + Slash, + Dot, + OpenParen, + CloseParen, + OpenCurly, + CloseCurly, + OpenSquare, + CloseSquare, + OpenAngle, + CloseAngle, +} + +impl Symbol { + pub fn from_start(c: char, stream: &mut CharCursor) -> Option> { + Some(Ok(match c { + '(' => Self::OpenParen, + ')' => Self::CloseParen, + '[' => Self::OpenSquare, + ']' => Self::CloseSquare, + '{' => Self::OpenCurly, + '}' => Self::CloseCurly, + '<' => Self::OpenAngle, + '>' => Self::CloseAngle, + ';' => Self::Semicolon, + ':' => { + if stream.advance_if(':') { + Self::DoubleColon + } else { + Self::Colon + } + } + '+' => Self::Plus, + '-' => { + if stream.advance_if('>') { + Self::Arrow + } else { + Self::Minus + } + } + '*' => Self::Asterisk, + '/' => Self::Slash, + '=' => { + if stream.advance_if('=') { + Self::DoubleEquals + } else if stream.advance_if('>') { + Self::DoubleArrow + } else { + Self::Equals + } + } + '.' => Self::Dot, + _ => return None, + })) + } + pub fn str(&self) -> &str { + match self { + Symbol::Semicolon => ";", + Symbol::Colon => ":", + Symbol::DoubleColon => "::", + Symbol::Equals => "=", + Symbol::DoubleEquals => "==", + Symbol::Arrow => "->", + Symbol::DoubleArrow => "=>", + Symbol::Plus => "+", + Symbol::Minus => "-", + Symbol::Asterisk => "*", + Symbol::Slash => "/", + Symbol::Dot => ".", + Symbol::OpenParen => "(", + Symbol::CloseParen => ")", + Symbol::OpenCurly => "{", + Symbol::CloseCurly => "}", + Symbol::OpenSquare => "[", + Symbol::CloseSquare => "]", + Symbol::OpenAngle => "<", + Symbol::CloseAngle => ">", + } + } +} + +impl Debug for Symbol { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "'{}'", self.str()) + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs new file mode 100644 index 0000000..9fb957c --- /dev/null +++ b/src/util/mod.rs @@ -0,0 +1,38 @@ +use core::fmt; + +pub struct Padder<'buf> { + buf: &'buf mut (dyn fmt::Write + 'buf), + on_newline: bool, +} + +impl fmt::Write for Padder<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + for s in s.split_inclusive('\n') { + if self.on_newline { + self.buf.write_str(" ")?; + } + + self.on_newline = s.ends_with('\n'); + self.buf.write_str(s)?; + } + + Ok(()) + } + + fn write_char(&mut self, c: char) -> fmt::Result { + if self.on_newline { + self.buf.write_str(" ")?; + } + self.on_newline = c == '\n'; + self.buf.write_char(c) + } +} + +impl<'buf> Padder<'buf> { + pub fn new(buf: &'buf mut (dyn fmt::Write + 'buf)) -> Self { + Self { + buf, + on_newline: false, + } + } +} diff --git a/test.lang b/test.lang new file mode 100644 index 0000000..02a084d --- /dev/null +++ b/test.lang @@ -0,0 +1,19 @@ +fn main() { + // let x = 3; + let y = 4 + 4 + 5; + let z = 1 * 2 - 3 / test * 4; + let r = 1 - 2 + 3; + let w = 1 * (2 - 3) / "test" - 7; + asntei + let a = test(3); + test(5); + return 5 + + + a; + let b = (test2.func)(3 + 4)(8)("a"); + let x = { + return 5; + let a = 3; + b + }; +}