initial commit

This commit is contained in:
2024-10-05 11:09:10 -04:00
commit 148ad00c83
16 changed files with 987 additions and 0 deletions

108
src/parser/body.rs Normal file
View File

@@ -0,0 +1,108 @@
use std::fmt::{Debug, Write};
use crate::token::{Keyword, Symbol, Token};
use crate::util::Padder;
use super::cursor::TokenCursor;
use super::error::{unexpected_token, ParserError};
use super::Expr;
pub struct Body {
statements: Vec<Statement>,
}
pub enum Statement {
Let(String, Expr),
Return(Expr),
Expr(Expr),
}
impl Body {
pub fn parse(cursor: &mut TokenCursor) -> Result<Self, ParserError> {
let mut statements = Vec::new();
cursor.expect_sym(Symbol::OpenCurly)?;
loop {
let next = cursor.expect_peek()?;
if next.is_symbol(Symbol::CloseCurly) {
cursor.next();
return Ok(Self { statements });
}
statements.push(Statement::parse(cursor)?);
}
}
}
impl Statement {
pub fn parse(cursor: &mut TokenCursor) -> Result<Self, ParserError> {
let next = cursor.expect_peek()?;
Ok(match next.token {
Token::Keyword(Keyword::Let) => {
cursor.next();
let name = cursor.expect_ident()?;
cursor.expect_sym(Symbol::Equals)?;
let expr = Expr::parse(cursor)?;
cursor.expect_sym(Symbol::Semicolon)?;
Self::Let(name, expr)
}
Token::Keyword(Keyword::Return) => {
cursor.next();
let expr = Expr::parse(cursor)?;
cursor.expect_sym(Symbol::Semicolon)?;
Self::Return(expr)
}
_ => {
let expr = Expr::parse(cursor)?;
let next = cursor.expect_peek()?;
if next.is_symbol(Symbol::Semicolon) {
cursor.next();
Self::Expr(expr)
} else if next.is_symbol(Symbol::CloseCurly) {
Self::Return(expr)
} else {
return unexpected_token(next, "a ';' or '}'");
}
}
})
}
}
impl Debug for Statement {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Statement::Let(n, e) => {
f.write_str("let ")?;
f.write_str(n)?;
f.write_str(" = ")?;
e.fmt(f)?;
f.write_char(';')?;
}
Statement::Return(e) => {
f.write_str("return ")?;
e.fmt(f)?;
f.write_char(';')?;
}
Statement::Expr(e) => {
e.fmt(f)?;
f.write_char(';')?;
}
}
Ok(())
}
}
impl Debug for Body {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.statements.first().is_some() {
f.write_str("{\n ")?;
let mut padder = Padder::new(f);
for s in &self.statements {
// they don't expose wrap_buf :grief:
padder.write_str(&format!("{s:?}\n"))?;
}
f.write_char('}')?;
} else {
f.write_str("{}")?;
}
Ok(())
}
}

55
src/parser/cursor.rs Normal file
View File

@@ -0,0 +1,55 @@
use crate::token::{Keyword, Symbol, Token, TokenInstance};
use super::error::{unexpected_end, unexpected_token, ParserError};
pub struct TokenCursor<'a> {
tokens: &'a [TokenInstance],
pos: usize,
}
impl TokenCursor<'_> {
pub fn next(&mut self) -> Option<&TokenInstance> {
let res = self.tokens.get(self.pos);
self.pos += 1;
res
}
pub fn expect_next(&mut self) -> Result<&TokenInstance, ParserError> {
self.next().ok_or(unexpected_end())
}
pub fn expect_token(&mut self, t: Token) -> Result<(), ParserError> {
let next = self.expect_next()?;
if t == next.token {
Ok(())
} else {
unexpected_token(next, &format!("{t:?}"))
}
}
pub fn expect_sym(&mut self, symbol: Symbol) -> Result<(), ParserError> {
self.expect_token(Token::Symbol(symbol))
}
pub fn expect_kw(&mut self, kw: Keyword) -> Result<(), ParserError> {
self.expect_token(Token::Keyword(kw))
}
pub fn peek(&self) -> Option<&TokenInstance> {
self.tokens.get(self.pos)
}
pub fn expect_peek(&mut self) -> Result<&TokenInstance, ParserError> {
self.peek().ok_or(unexpected_end())
}
pub fn expect_ident(&mut self) -> Result<String, ParserError> {
let i = self.expect_next()?;
let Token::Ident(n) = &i.token else {
return unexpected_token(i, "an identifier");
};
Ok(n.to_string())
}
}
impl<'a> From<&'a [TokenInstance]> for TokenCursor<'a> {
fn from(tokens: &'a [TokenInstance]) -> Self {
Self {
tokens,
pos: 0,
}
}
}

65
src/parser/error.rs Normal file
View File

@@ -0,0 +1,65 @@
use crate::token::{FileRegion, TokenInstance};
#[derive(Debug)]
pub struct ParserError {
pub msg: String,
pub regions: Vec<FileRegion>,
}
impl ParserError {
pub fn from_instances(instances: &[&TokenInstance], msg: String) -> Self {
ParserError {
msg,
regions: instances.iter().map(|i| i.loc).collect(),
}
}
pub fn from_msg(msg: String) -> Self {
Self {
msg,
regions: Vec::new(),
}
}
}
pub fn unexpected_token<T>(inst: &TokenInstance, expected: &str) -> Result<T, ParserError> {
let t = &inst.token;
Err(ParserError::from_instances(
&[inst],
format!("Unexpected token {t:?}; expected {expected}"),
))
}
pub fn unexpected_end() -> ParserError {
ParserError::from_msg("Unexpected end of input".to_string())
}
const BEFORE: usize = 1;
const AFTER: usize = 1;
pub fn print_error(err: ParserError, file: &str) {
println!("error: {}:", err.msg);
for reg in err.regions {
print_region(file, reg);
}
}
pub fn print_region(file: &str, reg: FileRegion) {
let start = reg.start.line.saturating_sub(BEFORE);
let num_before = reg.start.line - start;
let mut lines = file.lines().skip(start);
let len = reg.end.col - reg.start.col + 1;
let width = format!("{}", reg.end.line + AFTER).len();
for i in 0..num_before + 1 {
println!("{:>width$} | {}", start + i, lines.next().unwrap());
}
println!(
"{} | {}",
" ".repeat(width),
" ".repeat(reg.start.col) + &"^".repeat(len)
);
for i in 0..AFTER {
if let Some(next) = lines.next() {
println!("{:>width$} | {}", reg.end.line + i + 1, next);
}
}
}

228
src/parser/expr.rs Normal file
View File

@@ -0,0 +1,228 @@
use std::fmt::{Debug, Write};
use super::{
cursor::TokenCursor,
error::{unexpected_token, ParserError},
Body,
};
use crate::token::{StringType, Symbol, Token, TokenInstance};
pub enum Expr {
Const(ConstVal),
Ident(String),
Op(Operator, Vec<Expr>),
Block(Body),
Call(Box<Expr>, Vec<Expr>),
}
#[derive(Debug, PartialEq, Eq)]
pub enum Operator {
Add,
Sub,
Mul,
Div,
LessThan,
GreaterThan,
Offset,
}
#[derive(PartialEq, Eq)]
pub enum ConstVal {
String(String),
Char(char),
Number(String),
Unit,
}
impl Expr {
pub fn parse(cursor: &mut TokenCursor) -> Result<Self, ParserError> {
let Some(next) = cursor.peek() else {
return Ok(Expr::Const(ConstVal::Unit));
};
let mut cur = if next.is_symbol(Symbol::OpenParen) {
cursor.next();
let expr = Self::parse(cursor)?;
cursor.expect_sym(Symbol::CloseParen)?;
expr
} else if next.is_symbol(Symbol::OpenCurly) {
let expr = Body::parse(cursor)?;
Expr::Block(expr)
} else {
let unit = Self::parse_unit(next)?;
cursor.next();
unit
};
let Some(mut next) = cursor.peek() else {
return Ok(cur);
};
while next.is_symbol(Symbol::OpenParen) {
cursor.next();
let inner = Self::parse(cursor)?;
cursor.expect_sym(Symbol::CloseParen)?;
cur = Self::Call(Box::new(cur), vec![inner]);
let Some(next2) = cursor.peek() else {
return Ok(cur);
};
next = next2
}
if let Some(op) = Operator::from_token(&next.token) {
cursor.next();
let next = Self::parse(cursor)?;
let mut vals = vec![cur];
if let Self::Op(op_next, mut vs) = next {
if op == op_next {
vals.extend(vs);
} else if op.presedence() > op_next.presedence() {
vals.push(vs.remove(0));
if vs.len() == 1 {
return Ok(Self::Op(
op_next,
vec![Self::Op(op, vals), vs.pop().unwrap()],
));
} else {
vals.push(Self::Op(op_next, vs));
}
} else {
vals.push(Self::Op(op_next, vs));
}
} else {
vals.push(next);
}
return Ok(Self::Op(op, vals));
};
match next.token {
Token::Symbol(Symbol::Semicolon | Symbol::CloseParen | Symbol::CloseCurly) => Ok(cur),
_ => unexpected_token(next, "an operator or ending"),
}
}
fn parse_unit(inst: &TokenInstance) -> Result<Self, ParserError> {
match &inst.token {
Token::String(ty, s) => {
Self::parse_str(*ty, s).map_err(|e| ParserError::from_instances(&[inst], e))
}
Token::Ident(name) => Ok(Self::parse_ident(name.to_string())),
_ => unexpected_token(inst, "a string or a name"),
}
}
fn parse_str(ty: StringType, s: &str) -> Result<Self, String> {
match ty {
StringType::DoubleQuote => Ok(Self::Const(ConstVal::String(s.to_string()))),
StringType::SingleQuote => {
if s.len() == 1 {
Ok(Self::Const(ConstVal::Char(s.chars().next().unwrap())))
} else {
Err("Characters must only have one char".to_string())
}
}
}
}
fn parse_ident(str: String) -> Self {
match str.chars().next().unwrap() {
'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '.' => {
Self::Const(ConstVal::Number(str))
}
_ => Self::Ident(str),
}
}
}
impl Operator {
pub fn presedence(&self) -> u32 {
match self {
Operator::LessThan => 0,
Operator::GreaterThan => 0,
Operator::Add => 1,
Operator::Sub => 2,
Operator::Mul => 3,
Operator::Div => 4,
Operator::Offset => 5,
}
}
pub fn str(&self) -> &str {
match self {
Self::Add => "+",
Self::Sub => "-",
Self::Mul => "*",
Self::Div => "/",
Self::LessThan => "<",
Self::GreaterThan => ">",
Self::Offset => ".",
}
}
pub fn from_token(token: &Token) -> Option<Self> {
let Token::Symbol(symbol) = token else {
return None;
};
Some(match symbol {
Symbol::OpenAngle => Operator::LessThan,
Symbol::CloseAngle => Operator::GreaterThan,
Symbol::Plus => Operator::Add,
Symbol::Minus => Operator::Sub,
Symbol::Asterisk => Operator::Mul,
Symbol::Slash => Operator::Div,
Symbol::Dot => Operator::Offset,
_ => {
return None;
}
})
}
pub fn pad(&self) -> bool {
match self {
Operator::Add => true,
Operator::Sub => true,
Operator::Mul => true,
Operator::Div => true,
Operator::LessThan => true,
Operator::GreaterThan => true,
Operator::Offset => false,
}
}
}
impl Debug for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expr::Const(c) => c.fmt(f),
Expr::Ident(n) => f.write_str(n),
Expr::Block(b) => b.fmt(f),
Expr::Op(op, exprs) => {
f.write_char('(')?;
exprs[0].fmt(f)?;
for expr in exprs.iter().skip(1) {
if op.pad() {
write!(f, " {} ", op.str())?;
} else {
f.write_str(op.str())?;
}
expr.fmt(f)?;
}
f.write_char(')')?;
Ok(())
}
Expr::Call(n, args) => {
n.fmt(f)?;
f.write_char('(')?;
if let Some(a) = args.first() {
a.fmt(f)?;
}
for arg in args.iter().skip(1) {
f.write_str(", ")?;
arg.fmt(f)?;
}
f.write_char(')')?;
Ok(())
}
}
}
}
impl Debug for ConstVal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::String(str) => str.fmt(f),
Self::Char(c) => c.fmt(f),
Self::Number(str) => f.write_str(str),
Self::Unit => f.write_str("()"),
}
}
}

58
src/parser/mod.rs Normal file
View File

@@ -0,0 +1,58 @@
use crate::token::{Keyword, Symbol};
use std::fmt::Debug;
mod body;
mod cursor;
mod error;
mod expr;
pub use body::*;
pub use cursor::*;
pub use expr::*;
pub use error::*;
#[derive(Debug)]
pub struct Module {
functions: Vec<Function>,
}
pub struct Function {
pub name: String,
pub body: Body,
}
impl Module {
pub fn parse(cursor: &mut TokenCursor) -> Result<Self, ParserError> {
let mut functions = Vec::new();
loop {
let Some(next) = cursor.peek() else {
return Ok(Self { functions });
};
if next.is_keyword(Keyword::Fn) {
functions.push(Function::parse(cursor)?);
} else {
return unexpected_token(cursor.next().unwrap(), "fn");
}
}
}
}
impl Function {
pub fn parse(cursor: &mut TokenCursor) -> Result<Self, ParserError> {
cursor.expect_kw(Keyword::Fn)?;
let name = cursor.expect_ident()?;
cursor.expect_sym(Symbol::OpenParen)?;
cursor.expect_sym(Symbol::CloseParen)?;
let body = Body::parse(cursor)?;
Ok(Self { name, body })
}
}
impl Debug for Function {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("fn ")?;
f.write_str(&self.name)?;
f.write_str("() ")?;
self.body.fmt(f)?;
Ok(())
}
}