START OF COMPILER

This commit is contained in:
2024-10-11 17:31:03 -04:00
parent bb3a0ad113
commit de79445ede
39 changed files with 710 additions and 94 deletions

14
src/parser/mod.rs Normal file
View File

@@ -0,0 +1,14 @@
mod v1;
mod v2;
pub fn main() {
let arg = std::env::args_os().nth(1);
if let Some(path) = arg {
let file = std::fs::read_to_string(path).expect("failed to read file");
println!("{file}");
v1::parse_file(&file);
// v2::parse_file(&file);
} else {
v1::run_stdin();
}
}

131
src/parser/v1/body.rs Normal file
View File

@@ -0,0 +1,131 @@
use std::fmt::{Debug, Write};
use super::{
token::{Keyword, Symbol, Token},
Node, Parsable, ParserErrors,
};
use crate::util::Padder;
use super::{Expr, ParserError, TokenCursor};
#[derive(Clone)]
pub struct Body {
statements: Vec<Node<Statement>>,
}
#[derive(Clone)]
pub enum Statement {
Let(String, Node<Expr>),
Return(Node<Expr>),
Expr(Node<Expr>),
}
impl Statement {
pub fn ended_with_error(&self) -> bool {
let expr = match self {
Statement::Let(_, e) => e,
Statement::Return(e) => e,
Statement::Expr(e) => e,
};
expr.is_err() || expr.as_ref().is_ok_and(|e| e.ended_with_error())
}
}
impl Parsable for Body {
fn parse(cursor: &mut TokenCursor, errors: &mut ParserErrors) -> Result<Self, ParserError> {
let mut statements = Vec::new();
let statement_end = &[Symbol::Semicolon, Symbol::CloseCurly];
cursor.expect_sym(Symbol::OpenCurly)?;
if cursor.expect_peek()?.is_symbol(Symbol::CloseCurly) {
cursor.next();
return Ok(Self { statements });
}
let mut expect_semi = false;
loop {
let next = cursor.expect_peek()?;
if next.is_symbol(Symbol::CloseCurly) {
cursor.next();
return Ok(Self { statements });
}
if next.is_symbol(Symbol::Semicolon) {
cursor.next();
expect_semi = false;
continue;
} else if expect_semi {
errors.add(ParserError {
msg: "expected ';'".to_string(),
spans: vec![cursor.next_pos().char_span()],
});
}
let statement: Node<Statement> = Node::parse(cursor, errors);
expect_semi = true;
if statement.is_err() || statement.as_ref().is_ok_and(|s| s.ended_with_error()) {
let res = cursor
.seek(|t| t.is_symbol_and(|s| statement_end.contains(&s)))
.ok_or(ParserError::unexpected_end())?;
}
statements.push(statement);
}
}
}
impl Parsable for Statement {
fn parse(cursor: &mut TokenCursor, errors: &mut ParserErrors) -> Result<Self, ParserError> {
let next = cursor.expect_peek()?;
Ok(match next.token {
Token::Keyword(Keyword::Let) => {
cursor.next();
let name = cursor.expect_ident()?;
cursor.expect_sym(Symbol::Equals)?;
let expr = Node::parse(cursor, errors);
Self::Let(name, expr)
}
Token::Keyword(Keyword::Return) => {
cursor.next();
Self::Return(Node::parse(cursor, errors))
}
_ => Self::Expr(Node::parse(cursor, errors)),
})
}
}
impl Debug for Statement {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Statement::Let(n, e) => {
f.write_str("let ")?;
f.write_str(n)?;
f.write_str(" = ")?;
e.fmt(f)?;
f.write_char(';')?;
}
Statement::Return(e) => {
f.write_str("return ")?;
e.fmt(f)?;
f.write_char(';')?;
}
Statement::Expr(e) => {
e.fmt(f)?;
f.write_char(';')?;
}
}
Ok(())
}
}
impl Debug for Body {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.statements.first().is_some() {
f.write_str("{\n ")?;
let mut padder = Padder::new(f);
for s in &self.statements {
// they don't expose wrap_buf :grief:
padder.write_str(&format!("{s:?}\n"))?;
}
f.write_char('}')?;
} else {
f.write_str("{}")?;
}
Ok(())
}
}

90
src/parser/v1/cursor.rs Normal file
View File

@@ -0,0 +1,90 @@
use super::error::ParserError;
use super::token::{CharCursor, Keyword, Symbol, Token, TokenInstance};
use super::FilePos;
pub struct TokenCursor<'a> {
cursor: CharCursor<'a>,
next: Option<TokenInstance>,
next_pos: FilePos,
prev_end: FilePos,
}
impl<'a> TokenCursor<'a> {
pub fn next(&mut self) -> Option<TokenInstance> {
self.prev_end = self.cursor.prev_pos();
self.next_pos = self.cursor.next_pos();
std::mem::replace(&mut self.next, TokenInstance::parse(&mut self.cursor))
}
pub fn expect_next(&mut self) -> Result<TokenInstance, ParserError> {
self.peek().ok_or(ParserError::unexpected_end())?;
Ok(self.next().unwrap())
}
pub fn expect_token(&mut self, t: Token) -> Result<(), ParserError> {
let next = self.expect_next()?;
if t == next.token {
Ok(())
} else {
Err(ParserError::unexpected_token(&next, &format!("{t:?}")))
}
}
pub fn expect_sym(&mut self, symbol: Symbol) -> Result<(), ParserError> {
self.expect_token(Token::Symbol(symbol))
}
pub fn seek_sym(&mut self, symbol: Symbol) {
while self
.next()
.is_some_and(|n| n.token != Token::Symbol(symbol))
{}
}
pub fn seek(&mut self, f: impl Fn(&TokenInstance) -> bool) -> Option<&TokenInstance> {
loop {
if f(self.peek()?) {
return self.peek();
}
self.next();
}
}
pub fn expect_kw(&mut self, kw: Keyword) -> Result<(), ParserError> {
self.expect_token(Token::Keyword(kw))
}
pub fn peek(&self) -> Option<&TokenInstance> {
self.next.as_ref()
}
pub fn expect_peek(&mut self) -> Result<&TokenInstance, ParserError> {
self.peek().ok_or(ParserError::unexpected_end())
}
pub fn expect_ident(&mut self) -> Result<String, ParserError> {
let i = self.expect_next()?;
let Token::Ident(n) = &i.token else {
return Err(ParserError::unexpected_token(&i, "an identifier"));
};
Ok(n.to_string())
}
pub fn chars(&mut self) -> &mut CharCursor<'a> {
&mut self.cursor
}
pub fn prev_end(&self) -> FilePos {
self.prev_end
}
pub fn next_pos(&self) -> FilePos {
self.next_pos
}
}
impl<'a> From<&'a str> for TokenCursor<'a> {
fn from(string: &'a str) -> Self {
Self::from(CharCursor::from(string))
}
}
impl<'a> From<CharCursor<'a>> for TokenCursor<'a> {
fn from(mut cursor: CharCursor<'a>) -> Self {
let cur = TokenInstance::parse(&mut cursor);
Self {
cursor,
next: cur,
next_pos: FilePos::start(),
prev_end: FilePos::start(),
}
}
}

62
src/parser/v1/error.rs Normal file
View File

@@ -0,0 +1,62 @@
use super::{
token::{FileSpan, TokenInstance},
FilePos,
};
#[derive(Debug, Clone)]
pub struct ParserError {
pub msg: String,
pub spans: Vec<FileSpan>,
}
pub struct ParserErrors {
pub errs: Vec<ParserError>,
}
impl ParserError {
pub fn from_instances(instances: &[&TokenInstance], msg: String) -> Self {
ParserError {
msg,
spans: instances.iter().map(|i| i.span).collect(),
}
}
pub fn from_msg(msg: String) -> Self {
Self {
msg,
spans: Vec::new(),
}
}
pub fn at(pos: FilePos, msg: String) -> Self {
Self {
msg,
spans: vec![FileSpan::at(pos)],
}
}
pub fn unexpected_end() -> Self {
Self::from_msg("unexpected end of input".to_string())
}
pub fn unexpected_token(inst: &TokenInstance, expected: &str) -> Self {
let t = &inst.token;
ParserError::from_instances(
&[inst],
format!("unexpected token {t:?}; expected {expected}"),
)
}
pub fn write_for(&self, writer: &mut impl std::io::Write, file: &str) -> std::io::Result<()> {
let after = if self.spans.is_empty() { "" } else { ":" };
writeln!(writer, "error: {}{}", self.msg, after)?;
for span in &self.spans {
span.write_for(writer, file)?;
}
Ok(())
}
}
impl ParserErrors {
pub fn new() -> Self {
Self { errs: Vec::new() }
}
pub fn add(&mut self, err: ParserError) {
self.errs.push(err);
}
}

203
src/parser/v1/expr.rs Normal file
View File

@@ -0,0 +1,203 @@
use std::fmt::{Debug, Write};
use super::token::{Symbol, Token};
use super::{Body, Node, Parsable, ParserError, ParserErrors, TokenCursor, Literal};
pub type ExprNode = Node<Box<Expr>>;
#[derive(Clone)]
pub enum Expr {
Lit(Node<Literal>),
Ident(String),
BinaryOp(Operator, ExprNode, ExprNode),
Block(Node<Body>),
Call(ExprNode, Vec<Node<Expr>>),
Group(ExprNode),
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum Operator {
Add,
Sub,
Mul,
Div,
LessThan,
GreaterThan,
Access,
Assign,
}
impl Expr {
pub fn ended_with_error(&self) -> bool {
match self {
Expr::Lit(_) => false,
Expr::Ident(_) => false,
Expr::BinaryOp(_, _, e) => e.is_err() || e.as_ref().is_ok_and(|e| e.ended_with_error()),
Expr::Block(b) => b.is_err(),
Expr::Call(_, _) => false,
Expr::Group(_) => false,
}
}
}
impl Parsable for Expr {
fn parse(cursor: &mut TokenCursor, errors: &mut ParserErrors) -> Result<Self, ParserError> {
let start = cursor.next_pos();
let next = cursor.expect_peek()?;
let mut e1 = if next.is_symbol(Symbol::OpenParen) {
cursor.next();
if cursor.expect_peek()?.is_symbol(Symbol::CloseParen) {
cursor.next();
return Ok(Expr::Lit(Node::new(
Literal::Unit,
cursor.next_pos().char_span(),
)));
}
let expr = Node::parse(cursor, errors).bx();
if expr.is_err() {
cursor.seek_sym(Symbol::CloseParen);
}
cursor.expect_sym(Symbol::CloseParen)?;
Self::Group(expr)
} else if next.is_symbol(Symbol::OpenCurly) {
Self::Block(Node::parse(cursor, errors))
} else if let Some(val) = Node::maybe_parse(cursor, errors) {
Self::Lit(val)
} else {
let next = cursor.peek().unwrap();
match &next.token {
Token::Ident(name) => {
let name = name.to_string();
cursor.next();
Self::Ident(name)
}
_ => {
return Err(ParserError::unexpected_token(next, "an expression"));
}
}
};
let Some(mut next) = cursor.peek() else {
return Ok(e1);
};
while next.is_symbol(Symbol::OpenParen) {
cursor.next();
let inner = Node::parse(cursor, errors);
cursor.expect_sym(Symbol::CloseParen)?;
let end = cursor.prev_end();
e1 = Self::Call(Node::new(Box::new(e1), start.to(end)), vec![inner]);
let Some(next2) = cursor.peek() else {
return Ok(e1);
};
next = next2
}
let end = cursor.prev_end();
Ok(if let Some(mut op) = Operator::from_token(&next.token) {
cursor.next();
let mut n1 = Node::new(Box::new(e1), start.to(end));
let mut n2 = Node::parse(cursor, errors).bx();
if let Ok(box Self::BinaryOp(op2, n21, n22)) = n2.as_ref() {
if op.presedence() > op2.presedence() {
n1 = Node::new(
Box::new(Self::BinaryOp(op, n1, n21.clone())),
start.to(n21.span.end),
);
op = *op2;
n2 = n22.clone();
}
}
Self::BinaryOp(op, n1, n2)
} else {
e1
})
}
}
impl Operator {
pub fn presedence(&self) -> u32 {
match self {
Operator::Assign => 0,
Operator::LessThan => 1,
Operator::GreaterThan => 1,
Operator::Add => 2,
Operator::Sub => 3,
Operator::Mul => 4,
Operator::Div => 5,
Operator::Access => 6,
}
}
pub fn str(&self) -> &str {
match self {
Self::Add => "+",
Self::Sub => "-",
Self::Mul => "*",
Self::Div => "/",
Self::LessThan => "<",
Self::GreaterThan => ">",
Self::Access => ".",
Self::Assign => "=",
}
}
pub fn from_token(token: &Token) -> Option<Self> {
let Token::Symbol(symbol) = token else {
return None;
};
Some(match symbol {
Symbol::OpenAngle => Operator::LessThan,
Symbol::CloseAngle => Operator::GreaterThan,
Symbol::Plus => Operator::Add,
Symbol::Minus => Operator::Sub,
Symbol::Asterisk => Operator::Mul,
Symbol::Slash => Operator::Div,
Symbol::Dot => Operator::Access,
Symbol::Equals => Operator::Assign,
_ => {
return None;
}
})
}
pub fn pad(&self) -> bool {
match self {
Self::Add => true,
Self::Sub => true,
Self::Mul => true,
Self::Div => true,
Self::LessThan => true,
Self::GreaterThan => true,
Self::Access => false,
Self::Assign => true,
}
}
}
impl Debug for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expr::Lit(c) => c.fmt(f)?,
Expr::Ident(n) => f.write_str(n)?,
Expr::Block(b) => b.fmt(f)?,
Expr::BinaryOp(op, e1, e2) => {
write!(f, "({:?}", *e1)?;
if op.pad() {
write!(f, " {} ", op.str())?;
} else {
write!(f, "{}", op.str())?;
}
write!(f, "{:?})", *e2)?;
}
Expr::Call(n, args) => {
n.fmt(f)?;
f.write_char('(')?;
if let Some(a) = args.first() {
a.fmt(f)?;
}
for arg in args.iter().skip(1) {
f.write_str(", ")?;
arg.fmt(f)?;
}
f.write_char(')')?;
}
Expr::Group(inner) => inner.fmt(f)?,
}
Ok(())
}
}

46
src/parser/v1/mod.rs Normal file
View File

@@ -0,0 +1,46 @@
use std::io::{stdout, BufRead, BufReader};
mod body;
mod cursor;
mod error;
mod expr;
mod module;
mod node;
mod token;
mod val;
pub use body::*;
pub use cursor::*;
pub use error::*;
pub use expr::*;
pub use module::*;
pub use node::*;
pub use val::*;
use token::*;
pub fn parse_file(file: &str) {
let mut errors = ParserErrors::new();
let node = Node::<Module>::parse(&mut TokenCursor::from(file), &mut errors);
if let Ok(module) = node.as_ref() {
println!("{module:#?}");
};
let out = &mut stdout();
for err in errors.errs {
err.write_for(out, file).unwrap();
}
}
pub fn run_stdin() {
for line in BufReader::new(std::io::stdin()).lines() {
let mut errors = ParserErrors::new();
let str = &line.expect("failed to read line");
let mut cursor = TokenCursor::from(&str[..]);
if let Ok(expr) = Node::<Statement>::parse(&mut cursor, &mut errors).as_ref() {
println!("{:?}", expr);
}
let out = &mut stdout();
for err in errors.errs {
err.write_for(out, str).unwrap();
}
}
}

52
src/parser/v1/module.rs Normal file
View File

@@ -0,0 +1,52 @@
use std::fmt::Debug;
use super::{token::*, Body, Node, Parsable, ParserError, ParserErrors, TokenCursor};
#[derive(Debug)]
pub struct Module {
functions: Vec<Node<Function>>,
}
#[derive(Clone)]
pub struct Function {
pub name: String,
pub body: Node<Body>,
}
impl Parsable for Module {
fn parse(cursor: &mut TokenCursor, errors: &mut ParserErrors) -> Result<Self, ParserError> {
let mut functions = Vec::new();
loop {
let Some(next) = cursor.peek() else {
return Ok(Self { functions });
};
if next.is_keyword(Keyword::Fn) {
functions.push(Node::parse(cursor, errors));
} else {
return Err(ParserError::unexpected_token(next, "fn"));
}
}
}
}
impl Parsable for Function {
fn parse(cursor: &mut TokenCursor, errors: &mut ParserErrors) -> Result<Self, ParserError> {
cursor.expect_kw(Keyword::Fn)?;
let name = cursor.expect_ident()?;
cursor.expect_sym(Symbol::OpenParen)?;
cursor.expect_sym(Symbol::CloseParen)?;
let body = Node::parse(cursor, errors);
Ok(Self { name, body })
}
}
impl Debug for Function {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("fn ")?;
f.write_str(&self.name)?;
f.write_str("() ")?;
self.body.fmt(f)?;
Ok(())
}
}

98
src/parser/v1/node.rs Normal file
View File

@@ -0,0 +1,98 @@
use std::{
fmt::Debug,
ops::{Deref, DerefMut},
};
use super::{FileSpan, ParserError, ParserErrors, TokenCursor};
#[derive(Clone)]
pub struct Node<T> {
pub inner: Result<T, ()>,
pub span: FileSpan,
}
pub trait Parsable: Sized {
fn parse(cursor: &mut TokenCursor, errors: &mut ParserErrors) -> Result<Self, ParserError>;
}
pub trait MaybeParsable: Sized {
fn maybe_parse(
cursor: &mut TokenCursor,
errors: &mut ParserErrors,
) -> Result<Option<Self>, ParserError>;
}
impl<T: Parsable> Node<T> {
pub fn parse(cursor: &mut TokenCursor, errors: &mut ParserErrors) -> Self {
let start = cursor.next_pos();
let inner = T::parse(cursor, errors).map_err(|e| errors.add(e));
let end = cursor.prev_end();
Self {
inner,
span: start.to(end),
}
}
}
impl<T: MaybeParsable> Node<T> {
pub fn maybe_parse(cursor: &mut TokenCursor, errors: &mut ParserErrors) -> Option<Self> {
let start = cursor.next_pos();
let inner = match T::maybe_parse(cursor, errors) {
Ok(v) => Ok(v?),
Err(e) => {
errors.add(e);
Err(())
}
};
let end = cursor.prev_end();
Some(Self {
inner,
span: start.to(end),
})
}
}
impl<T> Node<T> {
pub fn new(inner: T, span: FileSpan) -> Self {
Self {
inner: Ok(inner),
span,
}
}
pub fn bx(self) -> Node<Box<T>> {
Node {
inner: self.inner.map(|v| Box::new(v)),
span: self.span,
}
}
}
impl<T> Node<Box<T>> {
pub fn unbx(self) -> Node<T> {
Node {
inner: self.inner.map(|v| *v),
span: self.span,
}
}
}
impl<T> Deref for Node<T> {
type Target = Result<T, ()>;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl<T> DerefMut for Node<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}
impl<T: Debug> Debug for Node<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match &self.inner {
Ok(v) => v.fmt(f),
Err(_) => f.write_str("{error}"),
}
}
}

View File

@@ -0,0 +1,77 @@
use std::{iter::Peekable, str::Chars};
use super::super::ParserError;
use super::FilePos;
pub struct CharCursor<'a> {
chars: Peekable<Chars<'a>>,
next_pos: FilePos,
prev_pos: FilePos,
}
impl CharCursor<'_> {
pub fn next(&mut self) -> Option<char> {
let res = self.peek()?;
self.advance();
Some(res)
}
pub fn expect(&mut self, c: char) -> Result<(), ParserError> {
let next = self.expect_next()?;
if next == c {
Ok(())
} else {
Err(ParserError::at(
self.prev_pos,
format!("unexpected char '{next}'; expected '{c}'"),
))
}
}
pub fn skip_whitespace(&mut self) {
while self.peek().is_some_and(|c| c.is_whitespace()) {
self.advance();
}
}
pub fn peek(&mut self) -> Option<char> {
self.chars.peek().copied()
}
pub fn advance(&mut self) {
let Some(next) = self.chars.next() else {
return;
};
self.prev_pos = self.next_pos;
if next == '\n' {
self.next_pos.col = 0;
self.next_pos.line += 1;
} else {
self.next_pos.col += 1;
}
}
pub fn advance_if(&mut self, c: char) -> bool {
if let Some(c2) = self.peek() {
if c2 == c {
self.advance();
return true;
}
}
false
}
pub fn expect_next(&mut self) -> Result<char, ParserError> {
self.next().ok_or(ParserError::unexpected_end())
}
pub fn next_pos(&self) -> FilePos {
self.next_pos
}
pub fn prev_pos(&self) -> FilePos {
self.prev_pos
}
}
impl<'a> From<&'a str> for CharCursor<'a> {
fn from(value: &'a str) -> Self {
Self {
chars: value.chars().peekable(),
next_pos: FilePos::start(),
prev_pos: FilePos::start(),
}
}
}

View File

@@ -0,0 +1,80 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct FilePos {
pub line: usize,
pub col: usize,
}
#[derive(Debug, Clone, Copy)]
pub struct FileSpan {
pub start: FilePos,
pub end: FilePos,
}
impl FilePos {
pub fn start() -> Self {
Self { line: 0, col: 0 }
}
}
impl FilePos {
pub fn to(self, end: FilePos) -> FileSpan {
FileSpan { start: self, end }
}
pub fn char_span(self) -> FileSpan {
FileSpan::at(self)
}
}
const BEFORE: usize = 1;
const AFTER: usize = 0;
impl FileSpan {
pub fn at(pos: FilePos) -> Self {
Self {
start: pos,
end: pos,
}
}
pub fn write_for(&self, writer: &mut impl std::io::Write, file: &str) -> std::io::Result<()> {
let start = self.start.line.saturating_sub(BEFORE);
let num_before = self.start.line - start;
let mut lines = file.lines().skip(start);
let width = format!("{}", self.end.line + AFTER).len();
let same_line = self.start.line == self.end.line;
for i in 0..num_before {
writeln!(writer, "{:>width$} | {}", start + i, lines.next().unwrap())?;
}
let line = lines.next().unwrap();
writeln!(writer, "{:>width$} | {}", self.start.line, line)?;
let len = if same_line {
self.end.col - self.start.col + 1
} else {
line.len() - self.start.col
};
writeln!(
writer,
"{} | {}",
" ".repeat(width),
" ".repeat(self.start.col) + &"^".repeat(len)
)?;
if !same_line {
for _ in 0..self.end.line - self.start.line - 1 {
lines.next();
}
let line = lines.next().unwrap();
writeln!(writer, "{:>width$} | {}", self.end.line, line)?;
writeln!(
writer,
"{} | {}",
" ".repeat(width),
"^".repeat(self.end.col + 1)
)?;
}
// for i in 0..AFTER {
// if let Some(next) = lines.next() {
// writeln!(writer, "{:>width$} | {}", self.end.line + i + 1, next)?;
// }
// }
Ok(())
}
}

View File

@@ -0,0 +1,27 @@
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum Keyword {
Fn,
Let,
If,
Return,
}
impl Keyword {
pub fn from_string(str: &str) -> Option<Self> {
Some(match str {
"fn" => Self::Fn,
"let" => Self::Let,
"if" => Self::If,
"return" => Self::Return,
_ => return None,
})
}
pub const fn str(&self) -> &str {
match self {
Keyword::Fn => "fn",
Keyword::Let => "let",
Keyword::If => "if",
Keyword::Return => "return",
}
}
}

View File

@@ -0,0 +1,90 @@
mod cursor;
mod file;
mod keyword;
mod symbol;
use std::ops::Deref;
pub use cursor::*;
pub use file::*;
pub use keyword::*;
pub use symbol::*;
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Token {
Symbol(Symbol),
Ident(String),
Keyword(Keyword),
}
#[derive(Debug, Clone)]
pub struct TokenInstance {
pub token: Token,
pub span: FileSpan,
}
impl TokenInstance {
pub fn parse(cursor: &mut CharCursor) -> Option<TokenInstance> {
cursor.skip_whitespace();
cursor.peek()?;
let start = cursor.next_pos();
if let Some(s) = Symbol::parse(cursor) {
if s == Symbol::DoubleSlash {
while cursor.next() != Some('\n') {}
return Self::parse(cursor);
}
let end = cursor.prev_pos();
return Some(Self {
token: Token::Symbol(s),
span: FileSpan { start, end },
});
}
let mut word = String::new();
while let Some(c) = cursor.peek() {
if c.is_whitespace() || Symbol::from_char(c).is_some() {
break;
}
word.push(c);
cursor.advance();
}
let end = cursor.prev_pos();
let token = if let Some(keyword) = Keyword::from_string(&word) {
Token::Keyword(keyword)
} else {
Token::Ident(word)
};
Some(Self {
token,
span: FileSpan { start, end },
})
}
}
impl Token {
pub fn is_symbol(&self, symbol: Symbol) -> bool {
match self {
Token::Symbol(s) => *s == symbol,
_ => false,
}
}
pub fn is_symbol_and(&self, f: impl Fn(Symbol) -> bool) -> bool {
match self {
Token::Symbol(s) => f(*s),
_ => false,
}
}
pub fn is_keyword(&self, kw: Keyword) -> bool {
match self {
Token::Keyword(k) => *k == kw,
_ => false,
}
}
}
impl Deref for TokenInstance {
type Target = Token;
fn deref(&self) -> &Self::Target {
&self.token
}
}

View File

@@ -0,0 +1,125 @@
use std::fmt::Debug;
use super::CharCursor;
#[derive(PartialEq, Eq, Clone, Copy)]
pub enum Symbol {
Semicolon,
Colon,
DoubleColon,
Equals,
DoubleEquals,
Arrow,
DoubleArrow,
Plus,
Minus,
Asterisk,
Slash,
DoubleSlash,
Dot,
OpenParen,
CloseParen,
OpenCurly,
CloseCurly,
OpenSquare,
CloseSquare,
OpenAngle,
CloseAngle,
SingleQuote,
DoubleQuote,
Bang,
}
impl Symbol {
pub fn parse(cursor: &mut CharCursor) -> Option<Self> {
Self::from_char(cursor.peek()?).map(|mut s| {
cursor.advance();
s.finish(cursor);
s
})
}
pub fn from_char(c: char) -> Option<Self> {
Some(match c {
'(' => Self::OpenParen,
')' => Self::CloseParen,
'[' => Self::OpenSquare,
']' => Self::CloseSquare,
'{' => Self::OpenCurly,
'}' => Self::CloseCurly,
'<' => Self::OpenAngle,
'>' => Self::CloseAngle,
';' => Self::Semicolon,
':' => Self::Colon,
'+' => Self::Plus,
'-' => Self::Minus,
'*' => Self::Asterisk,
'/' => Self::Slash,
'=' => Self::Equals,
'.' => Self::Dot,
'\'' => Self::SingleQuote,
'"' => Self::DoubleQuote,
'!' => Self::Bang,
_ => return None,
})
}
pub fn finish(&mut self, cursor: &mut CharCursor) {
let Some(next) = cursor.peek() else {
return;
};
*self = match self {
Self::Colon => match next {
':' => Self::DoubleColon,
_ => return,
},
Self::Minus => match next {
'>' => Self::Arrow,
_ => return,
},
Self::Equals => match next {
'=' => Self::DoubleEquals,
'>' => Self::DoubleArrow,
_ => return,
}
Self::Slash => match next {
'/' => Self::DoubleSlash,
_ => return,
}
_ => return,
};
cursor.advance();
}
pub fn str(&self) -> &str {
match self {
Self::Semicolon => ";",
Self::Colon => ":",
Self::DoubleColon => "::",
Self::Equals => "=",
Self::DoubleEquals => "==",
Self::Arrow => "->",
Self::DoubleArrow => "=>",
Self::Plus => "+",
Self::Minus => "-",
Self::Asterisk => "*",
Self::Slash => "/",
Self::DoubleSlash => "//",
Self::Dot => ".",
Self::OpenParen => "(",
Self::CloseParen => ")",
Self::OpenCurly => "{",
Self::CloseCurly => "}",
Self::OpenSquare => "[",
Self::CloseSquare => "]",
Self::OpenAngle => "<",
Self::CloseAngle => ">",
Self::SingleQuote => "'",
Self::DoubleQuote => "\"",
Self::Bang => "!",
}
}
}
impl Debug for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "'{}'", self.str())
}
}

111
src/parser/v1/val.rs Normal file
View File

@@ -0,0 +1,111 @@
use super::{CharCursor, MaybeParsable, ParserError, ParserErrors, Symbol, Token, TokenCursor};
use std::fmt::Debug;
#[derive(Clone, PartialEq, Eq)]
pub enum Literal {
String(String),
Char(char),
Number(Number),
Unit,
}
#[derive(Clone, PartialEq, Eq)]
pub struct Number {
pub whole: String,
pub decimal: Option<String>,
pub ty: Option<String>,
}
impl MaybeParsable for Literal {
fn maybe_parse(cursor: &mut TokenCursor, _: &mut ParserErrors) -> Result<Option<Self>, ParserError> {
let inst = cursor.expect_peek()?;
let mut res = match &inst.token {
Token::Symbol(Symbol::SingleQuote) => {
let chars = cursor.chars();
let c = chars.expect_next()?;
chars.expect('\'')?;
Self::Char(c)
}
Token::Symbol(Symbol::DoubleQuote) => Self::String(string_from(cursor.chars())?),
Token::Ident(text) => {
let first = text.chars().next().unwrap();
if first.is_ascii_digit() {
Self::Number(Number {
whole: text.to_string(),
decimal: None,
ty: None,
})
} else {
return Ok(None);
}
}
_ => return Ok(None),
};
cursor.next();
if let Some(next) = cursor.peek() {
if let Self::Number(num) = &mut res {
if let Token::Symbol(Symbol::Dot) = next.token {
let chars = cursor.chars();
if let Some(c) = chars.peek() {
if c.is_ascii_digit() {
cursor.next();
let decimal = cursor.expect_ident()?;
num.decimal = Some(decimal);
}
}
}
}
}
Ok(Some(res))
}
}
pub fn string_from(cursor: &mut CharCursor) -> Result<String, ParserError> {
let mut str = String::new();
loop {
let c = cursor.expect_next()?;
if c == '"' {
return Ok(str);
}
str.push(match c {
'\\' => {
let next = cursor.expect_next()?;
match next {
'"' => '"',
'\'' => '\'',
't' => '\t',
'n' => '\n',
'0' => '\0',
_ => {
todo!();
}
}
}
_ => c,
})
}
}
impl Debug for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::String(str) => str.fmt(f),
Self::Char(c) => c.fmt(f),
Self::Number(n) => n.fmt(f),
Self::Unit => f.write_str("()"),
}
}
}
impl Debug for Number {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.whole)?;
if let Some(d) = &self.decimal {
write!(f, ".{}", d)?;
}
if let Some(ty) = &self.ty {
write!(f, "T{}", ty)?;
}
Ok(())
}
}

120
src/parser/v2/body.rs Normal file
View File

@@ -0,0 +1,120 @@
use std::collections::HashSet;
use std::fmt::{Debug, Write};
use std::sync::LazyLock;
use crate::util::Padder;
use super::util::WHITESPACE_SET;
use super::CharCursor;
use super::Expr;
use super::ParserError;
static NAME_END: LazyLock<HashSet<char>> = LazyLock::new(|| {
let mut set = WHITESPACE_SET.clone();
set.extend(&['(']);
set
});
pub struct Body {
statements: Vec<Statement>,
}
pub enum Statement {
Let(String, Expr),
Return(Expr),
Expr(Expr),
}
impl Body {
pub fn parse(cursor: &mut CharCursor) -> Result<Self, ParserError> {
cursor.skip_whitespace();
let mut statements = Vec::new();
cursor.expect_char('{')?;
loop {
cursor.skip_whitespace();
let next = cursor.expect_peek()?;
if next == '}' {
cursor.next();
return Ok(Self { statements });
}
statements.push(Statement::parse(cursor)?);
}
}
}
impl Statement {
pub fn parse(cursor: &mut CharCursor) -> Result<Self, ParserError> {
cursor.skip_whitespace();
Ok(if cursor.advance_if_str("let", &WHITESPACE_SET) {
cursor.skip_whitespace();
let name = cursor.until(&NAME_END);
if name.is_empty() {
return Err(ParserError::at(
cursor.pos(),
"Expected variable name".to_string(),
));
}
cursor.skip_whitespace();
cursor.expect_char('=')?;
let expr = Expr::parse(cursor)?;
cursor.skip_whitespace();
cursor.expect_char(';')?;
Self::Let(name, expr)
} else if cursor.advance_if_str("return", &WHITESPACE_SET) {
let expr = Expr::parse(cursor)?;
cursor.skip_whitespace();
cursor.expect_char(';')?;
Self::Return(expr)
} else {
let expr = Expr::parse(cursor)?;
match cursor.expect_peek()? {
';' => {
cursor.next();
Self::Expr(expr)
}
'}' => Self::Return(expr),
_ => {
cursor.next();
return Err(ParserError::at(
cursor.prev_pos(),
"unexpected end of statement; expected a ';' or '}'".to_string(),
));
}
}
})
}
}
impl Debug for Statement {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Statement::Let(n, e) => {
write!(f, "let {n} = {e:?};")?;
}
Statement::Return(e) => {
write!(f, "return {e:?};")?;
}
Statement::Expr(e) => {
write!(f, "{e:?};")?;
}
}
Ok(())
}
}
impl Debug for Body {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.statements.first().is_some() {
write!(f, "{{\n ")?;
let mut padder = Padder::new(f);
for s in &self.statements {
// they don't expose wrap_buf :grief:
writeln!(padder, "{s:?}")?;
}
write!(f, "}}")?;
} else {
write!(f, "{{}}")?;
}
Ok(())
}
}

135
src/parser/v2/cursor.rs Normal file
View File

@@ -0,0 +1,135 @@
use std::{collections::HashSet, iter::Peekable, str::Chars};
use super::{error::ParserError, util::WHITESPACE_SET};
#[derive(Debug, Clone, Copy)]
pub struct FilePos {
pub line: usize,
pub col: usize,
}
#[derive(Debug, Clone, Copy)]
pub struct FileRegion {
pub start: FilePos,
pub end: FilePos,
}
pub struct CharCursor<'a> {
chars: Peekable<Chars<'a>>,
pos: FilePos,
prev_pos: FilePos,
}
impl CharCursor<'_> {
pub fn until(&mut self, set: &HashSet<char>) -> String {
let mut str = String::new();
loop {
let Some(next) = self.peek() else {
return str;
};
if set.contains(&next) {
return str;
}
str.push(next);
self.advance();
}
}
pub fn skip_whitespace(&mut self) {
while self.peek().is_some_and(|c| c.is_whitespace()) {
self.advance();
}
let mut copy = self.chars.clone();
if let Some('/') = copy.next() {
if let Some('/') = copy.next() {
self.advance();
self.advance();
while self.next() != Some('\n') {}
self.skip_whitespace();
}
}
}
pub fn next(&mut self) -> Option<char> {
let res = self.peek()?;
self.advance();
Some(res)
}
pub fn peek(&mut self) -> Option<char> {
self.chars.peek().copied()
}
pub fn advance(&mut self) {
self.prev_pos = self.pos;
if self.peek().is_some_and(|c| c == '\n') {
self.pos.col = 0;
self.pos.line += 1;
} else {
self.pos.col += 1;
}
self.chars.next();
}
pub fn advance_if(&mut self, c: char) -> bool {
if let Some(c2) = self.peek() {
if c2 == c {
self.advance();
return true;
}
}
false
}
pub fn advance_if_str(&mut self, exp: &str, end: &HashSet<char>) -> bool {
let mut new = self.chars.clone();
for e in exp.chars() {
let Some(c) = new.next() else {
return false;
};
if e != c {
return false;
}
}
if new.peek().is_some_and(|c| !end.contains(c)) {
return false;
}
for _ in 0..exp.len() {
self.advance();
}
true
}
pub fn expect_char(&mut self, c: char) -> Result<(), ParserError> {
let next = self.expect_next()?;
if next == c {
Ok(())
} else {
Err(ParserError::at(
self.prev_pos,
format!("unexpected char '{next}'; expected '{c}'"),
))
}
}
pub fn expect_next(&mut self) -> Result<char, ParserError> {
self.next().ok_or(ParserError::unexpected_end())
}
pub fn expect_peek(&mut self) -> Result<char, ParserError> {
self.peek().ok_or(ParserError::unexpected_end())
}
pub fn pos(&self) -> FilePos {
self.pos
}
pub fn prev_pos(&self) -> FilePos {
self.prev_pos
}
}
impl<'a> From<&'a str> for CharCursor<'a> {
fn from(value: &'a str) -> Self {
Self {
chars: value.chars().peekable(),
pos: FilePos::start(),
prev_pos: FilePos::start(),
}
}
}
impl FilePos {
pub fn start() -> Self {
Self { line: 0, col: 0 }
}
}

60
src/parser/v2/error.rs Normal file
View File

@@ -0,0 +1,60 @@
use super::{FilePos, FileRegion};
#[derive(Debug)]
pub struct ParserError {
pub msg: String,
pub regions: Vec<FileRegion>,
}
impl ParserError {
pub fn from_msg(msg: String) -> Self {
Self {
msg,
regions: Vec::new(),
}
}
pub fn at(pos: FilePos, msg: String) -> Self {
Self {
msg,
regions: vec![FileRegion {
start: pos,
end: pos,
}],
}
}
pub fn unexpected_end() -> Self {
Self::from_msg("Unexpected end of input".to_string())
}
}
const BEFORE: usize = 1;
const AFTER: usize = 1;
pub fn print_error(err: ParserError, file: &str) {
let after = if err.regions.is_empty() {""} else {":"};
println!("error: {}{}", err.msg, after);
for reg in err.regions {
print_region(file, reg);
}
}
pub fn print_region(file: &str, reg: FileRegion) {
let start = reg.start.line.saturating_sub(BEFORE);
let num_before = reg.start.line - start;
let mut lines = file.lines().skip(start);
let len = reg.end.col - reg.start.col + 1;
let width = format!("{}", reg.end.line + AFTER).len();
for i in 0..num_before + 1 {
println!("{:>width$} | {}", start + i, lines.next().unwrap());
}
println!(
"{} | {}",
" ".repeat(width),
" ".repeat(reg.start.col) + &"^".repeat(len)
);
for i in 0..AFTER {
if let Some(next) = lines.next() {
println!("{:>width$} | {}", reg.end.line + i + 1, next);
}
}
}

247
src/parser/v2/expr.rs Normal file
View File

@@ -0,0 +1,247 @@
use super::{util::WHITESPACE_SET, Body, CharCursor, ParserError};
use std::{collections::HashSet, fmt::Debug, sync::LazyLock};
static SYMBOLS: LazyLock<HashSet<char>> = LazyLock::new(|| {
let mut set = HashSet::new();
for o in Operator::ALL {
for c in o.str().chars() {
set.insert(c);
}
}
set
});
static IDENT_END: LazyLock<HashSet<char>> = LazyLock::new(|| {
let mut set = WHITESPACE_SET.clone();
let symbols = &SYMBOLS;
set.extend(symbols.iter().chain(&[';', '(', ')']));
set
});
#[derive(Debug)]
pub enum Val {
String(String),
Number(String),
Unit,
}
pub enum Expr {
Block(Body),
Val(Val),
Ident(String),
BinaryOp(Operator, Box<Expr>, Box<Expr>),
Call(Box<Expr>, Vec<Expr>),
}
#[derive(Debug, PartialEq, Eq)]
pub enum Operator {
Add,
Sub,
Mul,
Div,
LessThan,
GreaterThan,
Offset,
}
impl Expr {
pub fn parse(cursor: &mut CharCursor) -> Result<Self, ParserError> {
cursor.skip_whitespace();
let Some(next) = cursor.peek() else {
return Ok(Self::Val(Val::Unit));
};
let mut e1 = match next {
'(' => {
cursor.advance();
let expr = Self::parse(cursor)?;
cursor.skip_whitespace();
cursor.expect_char(')')?;
expr
}
'{' => {
Self::Block(Body::parse(cursor)?)
}
_ => {
if let Some(val) = Val::parse_nonunit(cursor)? {
Self::Val(val)
} else {
let name = cursor.until(&IDENT_END);
Self::Ident(name)
}
}
};
cursor.skip_whitespace();
let Some(mut next) = cursor.peek() else {
return Ok(e1);
};
while next == '(' {
cursor.advance();
let inner = Self::parse(cursor)?;
cursor.skip_whitespace();
cursor.expect_char(')')?;
e1 = Self::Call(Box::new(e1), vec![inner]);
let Some(next2) = cursor.peek() else {
return Ok(e1);
};
next = next2
}
if let Some(op) = Operator::parse(cursor) {
let e2 = Self::parse(cursor)?;
return Ok(if let Self::BinaryOp(op_next, e2, e3) = e2 {
if op.presedence() > op_next.presedence() {
Self::BinaryOp(op_next, Box::new(Self::BinaryOp(op, Box::new(e1), e2)), e3)
} else {
Self::BinaryOp(op, Box::new(e1), Box::new(Self::BinaryOp(op_next, e2, e3)))
}
} else {
Self::BinaryOp(op, Box::new(e1), Box::new(e2))
});
};
Ok(e1)
}
}
impl Val {
pub fn parse_nonunit(cursor: &mut CharCursor) -> Result<Option<Self>, ParserError> {
let Some(next) = cursor.peek() else {
return Ok(None);
};
Ok(Some(match next {
'"' => {
cursor.advance();
let mut str = String::new();
loop {
let mut next = cursor.expect_next()?;
if next == '"' {
break;
}
if next == '\\' {
next = match cursor.expect_next()? {
'"' => '"',
c => {
return Err(ParserError::at(
cursor.pos(),
format!("unexpected escape char '{c}'"),
))
}
}
}
str.push(next);
}
Self::String(str)
}
'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => {
let mut str = String::new();
loop {
let Some(next) = cursor.peek() else {
break;
};
match next {
'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => {
str.push(next);
}
_ => break,
}
cursor.advance();
}
Self::Number(str)
}
_ => {
return Ok(None);
}
}))
}
}
impl Operator {
const ALL: [Self; 7] = [
Self::Add,
Self::Sub,
Self::Mul,
Self::Div,
Self::Offset,
Self::GreaterThan,
Self::LessThan,
];
pub fn presedence(&self) -> u32 {
match self {
Operator::LessThan => 0,
Operator::GreaterThan => 0,
Operator::Add => 1,
Operator::Sub => 2,
Operator::Mul => 3,
Operator::Div => 4,
Operator::Offset => 5,
}
}
pub fn str(&self) -> &str {
match self {
Self::Add => "+",
Self::Sub => "-",
Self::Mul => "*",
Self::Div => "/",
Self::LessThan => "<",
Self::GreaterThan => ">",
Self::Offset => ".",
}
}
pub fn parse(cursor: &mut CharCursor) -> Option<Self> {
let res = match cursor.peek()? {
'+' => Operator::Add,
'-' => Operator::Sub,
'*' => Operator::Mul,
'/' => Operator::Div,
'.' => Operator::Offset,
_ => return None,
};
for _ in 0..res.str().len() {
cursor.advance();
}
Some(res)
}
pub fn pad(&self) -> bool {
match self {
Operator::Add => true,
Operator::Sub => true,
Operator::Mul => true,
Operator::Div => true,
Operator::LessThan => true,
Operator::GreaterThan => true,
Operator::Offset => false,
}
}
}
impl Debug for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expr::Block(b) => write!(f, "{:?}", b)?,
Expr::Ident(n) => f.write_str(n)?,
Expr::BinaryOp(op, e1, e2) => {
write!(f, "({:?}", *e1)?;
if op.pad() {
write!(f, " {} ", op.str())?;
} else {
write!(f, "{}", op.str())?;
}
write!(f, "{:?})", *e2)?;
}
Expr::Call(n, args) => {
n.fmt(f)?;
write!(f, "(")?;
if let Some(a) = args.first() {
a.fmt(f)?;
}
for arg in args.iter().skip(1) {
write!(f, ", ")?;
arg.fmt(f)?;
}
write!(f, ")")?;
}
Expr::Val(v) => {
write!(f, "{:?}", v)?;
}
}
Ok(())
}
}

32
src/parser/v2/mod.rs Normal file
View File

@@ -0,0 +1,32 @@
use std::io::{BufRead, BufReader};
mod body;
mod cursor;
mod error;
mod expr;
mod module;
mod util;
pub use body::*;
pub use cursor::*;
pub use error::*;
pub use expr::*;
pub use module::*;
pub fn parse_file(file: &str) {
match Module::parse(&mut CharCursor::from(file)) {
Err(err) => print_error(err, file),
Ok(module) => println!("{module:#?}"),
}
}
pub fn run_stdin() {
for line in BufReader::new(std::io::stdin()).lines() {
let str = &line.expect("failed to read line");
let mut cursor = CharCursor::from(&str[..]);
match Statement::parse(&mut cursor) {
Ok(expr) => println!("{:?}", expr),
Err(err) => print_error(err, str),
}
}
}

59
src/parser/v2/module.rs Normal file
View File

@@ -0,0 +1,59 @@
use std::{collections::HashSet, fmt::Debug, sync::LazyLock};
use super::{util::WHITESPACE_SET, Body, CharCursor, ParserError};
#[derive(Debug)]
pub struct Module {
functions: Vec<Function>,
}
pub struct Function {
pub name: String,
pub body: Body,
}
static NAME_END: LazyLock<HashSet<char>> = LazyLock::new(|| {
let mut set = WHITESPACE_SET.clone();
set.extend(&['(']);
set
});
impl Module {
pub fn parse(cursor: &mut CharCursor) -> Result<Self, ParserError> {
let mut functions = Vec::new();
loop {
let next = cursor.until(&WHITESPACE_SET);
if next.is_empty() {
return Ok(Self { functions });
}
if next == "fn" {
functions.push(Function::parse(cursor)?);
} else {
return Err(ParserError::at(cursor.pos(), "expected fn".to_string()));
}
}
}
}
impl Function {
pub fn parse(cursor: &mut CharCursor) -> Result<Self, ParserError> {
cursor.skip_whitespace();
let name = cursor.until(&NAME_END);
if name.is_empty() {
return Err(ParserError::at(cursor.pos(), "expected function name".to_string()));
}
cursor.expect_char('(')?;
cursor.expect_char(')')?;
let body = Body::parse(cursor)?;
Ok(Self { name, body })
}
}
impl Debug for Function {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("fn ")?;
f.write_str(&self.name)?;
f.write_str("() ")?;
self.body.fmt(f)?;
Ok(())
}
}

10
src/parser/v2/util.rs Normal file
View File

@@ -0,0 +1,10 @@
use std::{collections::HashSet, sync::LazyLock};
pub const WHITESPACE: [char; 25] = [
'\u{0009}', '\u{000A}', '\u{000B}', '\u{000C}', '\u{000D}', '\u{0020}', '\u{0085}', '\u{00A0}',
'\u{1680}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}', '\u{2006}',
'\u{2007}', '\u{2008}', '\u{2009}', '\u{200A}', '\u{2028}', '\u{2029}', '\u{202F}', '\u{205F}',
'\u{3000}',
];
pub static WHITESPACE_SET: LazyLock<HashSet<char>> = LazyLock::new(|| HashSet::from_iter(WHITESPACE));