From bdf08ce52cf857a8d1b6a278de9f2bca20d957f7 Mon Sep 17 00:00:00 2001 From: shadow cat Date: Wed, 8 Apr 2026 23:28:50 -0400 Subject: [PATCH] stuff --- src/io/mod.rs | 106 ++++++++++++++++++++++ src/main.rs | 18 ++-- src/parser/cursor/mod.rs | 47 ++++++---- src/parser/cursor/span.rs | 24 ----- src/parser/cursor/token.rs | 16 ++-- src/parser/error.rs | 15 ---- src/parser/mod.rs | 3 +- src/parser/node.rs | 87 ------------------ src/parser/node/expr.rs | 78 ++++++++++++++++ src/parser/node/mod.rs | 179 +++++++++++++++++++++++++++++++++++++ test/main.lang | 2 +- 11 files changed, 417 insertions(+), 158 deletions(-) create mode 100644 src/io/mod.rs delete mode 100644 src/parser/error.rs delete mode 100644 src/parser/node.rs create mode 100644 src/parser/node/expr.rs create mode 100644 src/parser/node/mod.rs diff --git a/src/io/mod.rs b/src/io/mod.rs new file mode 100644 index 0000000..35ae6d3 --- /dev/null +++ b/src/io/mod.rs @@ -0,0 +1,106 @@ +#[derive(Debug, Clone, Copy)] +pub struct Span { + pub file: usize, + pub start: usize, + pub end: usize, +} + +pub struct Spanned { + pub inner: T, + pub span: Span, +} + +impl std::ops::Deref for Spanned { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl std::ops::DerefMut for Spanned { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +pub struct CompilerMsg { + pub spans: Vec, + pub msg: String, +} + +#[derive(Default)] +pub struct CompilerOutput { + pub errors: Vec, + pub files: Vec, +} + +impl CompilerOutput { + pub fn new() -> Self { + Self::default() + } + pub fn error(&mut self, msg: impl Into) { + self.errors.push(msg.into()); + } + pub fn write(&self, w: &mut impl std::io::Write) { + let files: Vec<_> = self + .files + .iter() + .map(|path| std::fs::read_to_string(path).unwrap()) + .collect(); + for error in &self.errors { + writeln!(w, "Error: {}", error.msg).unwrap(); + for span in &error.spans { + span.write(w, &files[span.file]).unwrap(); + } + } + } +} + +impl Span { + pub fn write(&self, w: &mut impl std::io::Write, text: &str) -> std::io::Result<()> { + let mut line_start = 0; + let mut found = false; + let mut line = 1; + let mut spans = Vec::new(); + for (i, c) in text.char_indices() { + if i == self.start { + found = true; + } + if i == self.end { + found = true; + } + if c == '\n' { + if found { + spans.push((line, line_start..i)); + } + line_start = i + 1; + line += 1; + found = false; + } + } + let start_chars = text[self.start..].lines().next().unwrap().len(); + let underline = "\x1b[4:3m"; + let underline_color = "\x1b[58;5;1m"; + let end = "\x1b[0m"; + if let [(line, range)] = &spans[..] { + writeln!( + w, + " {line:3} | {}{underline}{underline_color}{}{end}{}", + &text[range.start..self.start], + &text[self.start..=self.end], + &text[(self.end + 1)..range.end] + )?; + } + Ok(()) + } +} + +impl From for CompilerMsg { + fn from(value: String) -> Self { + Self { + spans: Vec::new(), + msg: value.to_string(), + } + } +} diff --git a/src/main.rs b/src/main.rs index 5b9d09c..94bea59 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,6 @@ +use crate::{io::CompilerOutput, parser::Nodes}; + +mod io; mod parser; fn main() { @@ -6,12 +9,11 @@ fn main() { println!("file expected"); return; }; - let code = match std::fs::read_to_string(path) { - Ok(code) => code, - Err(err) => { - println!("Failed to read input file: {err}"); - return; - } - }; - parser::parse(&code); + let mut output = CompilerOutput::new(); + let nodes = Nodes::parse_root(&path, &mut output); + if let Some((nodes, root)) = nodes { + nodes.format(&mut std::io::stdout(), root).unwrap(); + println!(); + } + output.write(&mut std::io::stdout()); } diff --git a/src/parser/cursor/mod.rs b/src/parser/cursor/mod.rs index 28525b2..e7d03df 100644 --- a/src/parser/cursor/mod.rs +++ b/src/parser/cursor/mod.rs @@ -1,9 +1,7 @@ -use crate::parser::error::ParseError; -pub use span::*; +use crate::io::{CompilerMsg, Span, Spanned}; use std::iter::Peekable; pub use token::*; -mod span; mod token; pub struct Cursor<'a> { @@ -12,10 +10,14 @@ pub struct Cursor<'a> { } impl<'a> Cursor<'a> { - pub fn new(text: &'a str) -> Self { + pub fn new(text: &'a str, file: usize) -> Self { Self { - span: Span { first: 0, last: 0 }, - tokens: Tokens::new(text).peekable(), + span: Span { + start: 0, + end: 0, + file, + }, + tokens: Tokens::new(text, file).peekable(), } } @@ -30,14 +32,14 @@ impl<'a> Cursor<'a> { self.tokens.peek().map(|inst| &inst.inner) } - pub fn expect_next(&mut self) -> Result { - self.next().ok_or_else(|| ParseError { + pub fn expect_next(&mut self) -> Result { + self.next().ok_or_else(|| CompilerMsg { spans: Vec::new(), msg: "unexpected end of file".to_string(), }) } - pub fn expect(&mut self, token: Token) -> Result { + pub fn expect(&mut self, token: Token) -> Result { let next = self.expect_next()?; if next == token { Ok(next) @@ -46,7 +48,7 @@ impl<'a> Cursor<'a> { } } - pub fn expect_ident(&mut self) -> Result { + pub fn expect_ident(&mut self) -> Result { let next = self.expect_next()?; if let Token::Ident(s) = next { Ok(s) @@ -55,8 +57,8 @@ impl<'a> Cursor<'a> { } } - pub fn unexpected(&self, token: Token, expected: &str) -> Result { - Err(ParseError::unexpected_token( + pub fn unexpected(&self, token: Token, expected: &str) -> Result { + Err(CompilerMsg::unexpected_token( Spanned { inner: token, span: self.span, @@ -65,11 +67,24 @@ impl<'a> Cursor<'a> { )) } - pub fn peek_first(&mut self) -> usize { - self.tokens.peek().map(|i| i.span.first).unwrap_or(0) + pub fn peek_start(&mut self) -> usize { + self.tokens.peek().map(|i| i.span.start).unwrap_or(0) } - pub fn cur_last(&mut self) -> usize { - self.span.last + pub fn cur_end(&mut self) -> usize { + self.span.end + } + + pub fn file(&mut self) -> usize { + self.span.file + } +} + +impl CompilerMsg { + pub fn unexpected_token(inst: TokenInst, expected: &str) -> Self { + Self { + spans: vec![inst.span], + msg: format!("Unexpected token '{}'; expected {expected}", inst.inner), + } } } diff --git a/src/parser/cursor/span.rs b/src/parser/cursor/span.rs index 6a5159a..e69de29 100644 --- a/src/parser/cursor/span.rs +++ b/src/parser/cursor/span.rs @@ -1,24 +0,0 @@ -#[derive(Clone, Copy)] -pub struct Span { - pub first: usize, - pub last: usize, -} - -pub struct Spanned { - pub inner: T, - pub span: Span, -} - -impl std::ops::Deref for Spanned { - type Target = T; - - fn deref(&self) -> &Self::Target { - &self.inner - } -} - -impl std::ops::DerefMut for Spanned { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.inner - } -} diff --git a/src/parser/cursor/token.rs b/src/parser/cursor/token.rs index 4a7b6df..c747836 100644 --- a/src/parser/cursor/token.rs +++ b/src/parser/cursor/token.rs @@ -39,12 +39,14 @@ impl From for Token { pub type TokenInst = Spanned; pub struct Tokens<'a> { + file: usize, text: Peekable>, } impl<'a> Tokens<'a> { - pub fn new(code: &'a str) -> Self { + pub fn new(code: &'a str, file: usize) -> Self { Self { + file, text: code.char_indices().peekable(), } } @@ -55,7 +57,11 @@ impl Iterator for Tokens<'_> { fn next(&mut self) -> Option { let (i, c) = self.text.next()?; - let mut span = Span { first: i, last: i }; + let mut span = Span { + start: i, + end: i, + file: self.file, + }; Some(Spanned { inner: match c { '=' => Token::Equal, @@ -78,7 +84,7 @@ impl Iterator for Tokens<'_> { && c.is_alphanumeric() { s.push(*c); - span.last = *i; + span.end = *i; self.text.next(); } Lit::Number(s).into() @@ -89,7 +95,7 @@ impl Iterator for Tokens<'_> { && !matches!(c, '"') { s.push(*c); - span.last = *i; + span.end = *i; self.text.next(); } self.text.next(); @@ -104,7 +110,7 @@ impl Iterator for Tokens<'_> { ) { s.push(*c); - span.last = *i; + span.end = *i; self.text.next(); } match s.as_str() { diff --git a/src/parser/error.rs b/src/parser/error.rs deleted file mode 100644 index 1f4c190..0000000 --- a/src/parser/error.rs +++ /dev/null @@ -1,15 +0,0 @@ -use crate::parser::cursor::{Span, TokenInst}; - -pub struct ParseError { - pub spans: Vec, - pub msg: String, -} - -impl ParseError { - pub fn unexpected_token(inst: TokenInst, expected: &str) -> Self { - Self { - spans: vec![inst.span], - msg: format!("Unexpected token {}; expected {expected}", inst.inner), - } - } -} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7bd5375..2f154a1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,5 +1,4 @@ mod cursor; -mod error; mod node; -pub fn parse(code: &str) {} +pub use node::*; diff --git a/src/parser/node.rs b/src/parser/node.rs deleted file mode 100644 index 7a78b38..0000000 --- a/src/parser/node.rs +++ /dev/null @@ -1,87 +0,0 @@ -use std::marker::PhantomData; - -use crate::parser::{ - cursor::{Cursor, Span, Token}, - error::ParseError, -}; - -pub trait Parsable: Sized { - fn parse(ctx: &mut ParseCtx) -> Result; - fn vec(nodes: &mut Nodes) -> &mut NodeVec; -} - -pub struct ParseCtx<'a> { - cursor: Cursor<'a>, - nodes: Nodes, -} - -pub struct Nodes { - statements: NodeVec, - exprs: NodeVec, - idents: NodeVec, -} - -pub struct NodeVec { - vec: Vec, - spans: Vec, -} - -impl NodeVec { - pub fn add(&mut self, v: N, span: Span) -> Id { - let id = self.vec.len(); - self.vec.push(v); - self.spans.push(span); - Id { - id, - _pd: PhantomData, - } - } -} - -pub enum Statement { - Let(Id, Id), -} - -pub enum Expr { - Ident(Id), - Negate(Id), - Assign(Id, Id), -} - -pub struct Id { - id: usize, - _pd: PhantomData, -} - -pub struct Ident { - inner: String, -} - -impl Parsable for Expr { - fn parse(ctx: &mut ParseCtx) -> Result { - Ok(match ctx.cursor.expect_next()? { - Token::Dash => Self::Negate(ctx.parse()?), - Token::Ident(s) => Self::Ident(ctx.ident(s)), - other => return ctx.cursor.unexpected(other, "an expression"), - }) - } - - fn vec(nodes: &mut Nodes) -> &mut NodeVec { - &mut nodes.exprs - } -} - -impl ParseCtx<'_> { - pub fn parse(&mut self) -> Result, ParseError> { - let first = self.cursor.peek_first(); - P::parse(self).map(|r| { - let last = self.cursor.cur_last(); - P::vec(&mut self.nodes).add(r, Span { first, last }) - }) - } - - pub fn ident(&mut self, s: String) -> Id { - let span = self.cursor.span; - self.nodes.idents.add(Ident { inner: s }, span) - } -} diff --git a/src/parser/node/expr.rs b/src/parser/node/expr.rs new file mode 100644 index 0000000..6468ca4 --- /dev/null +++ b/src/parser/node/expr.rs @@ -0,0 +1,78 @@ +use crate::{ + io::CompilerMsg, + parser::{ + cursor::{Lit, Token}, + *, + }, +}; + +pub enum Statement { + Let(Id, Id), +} + +pub enum Expr { + Ident(Id), + Lit(Id), + Negate(Id), + Assign(Id, Id), +} + +pub struct Ident { + pub inner: String, +} + +impl FmtNode for Ident { + fn format(&self, w: &mut impl std::io::Write, _: &Nodes) -> std::io::Result<()> { + write!(w, "{}", self.inner) + } +} + +impl Parsable for Expr { + fn parse(ctx: &mut ParseCtx) -> Result { + let e1 = match ctx.cursor.expect_next()? { + Token::Dash => Self::Negate(ctx.parse()?), + Token::Ident(s) => Self::Ident(ctx.ident(s)), + Token::Lit(l) => Self::Lit(ctx.lit(l)), + other => return ctx.cursor.unexpected(other, "an expression"), + }; + let Some(next) = ctx.cursor.peek() else { + return Ok(e1); + }; + Ok(match next { + Token::Equal => { + let e1 = ctx.push_adv(e1); + let e2: Id = ctx.parse()?; + Expr::Assign(e1, e2) + } + _ => e1, + }) + } +} + +impl FmtNode for Expr { + fn format(&self, w: &mut impl std::io::Write, nodes: &Nodes) -> std::io::Result<()> { + match *self { + Expr::Ident(id) => nodes.format(w, id), + Expr::Lit(id) => nodes.format(w, id), + Expr::Negate(id) => { + write!(w, "-")?; + nodes.format(w, id) + } + Expr::Assign(id1, id2) => { + nodes.format(w, id1)?; + write!(w, " = ")?; + nodes.format(w, id2) + } + } + } +} + +impl FmtNode for Lit { + fn format(&self, w: &mut impl std::io::Write, _: &Nodes) -> std::io::Result<()> { + match self { + Lit::Number(v) => write!(w, "{v}"), + Lit::Bool(v) => write!(w, "{v}"), + Lit::String(v) => write!(w, "{v}"), + } + } +} diff --git a/src/parser/node/mod.rs b/src/parser/node/mod.rs new file mode 100644 index 0000000..66e24eb --- /dev/null +++ b/src/parser/node/mod.rs @@ -0,0 +1,179 @@ +use crate::{ + io::{CompilerMsg, CompilerOutput, Span}, + parser::cursor::{Cursor, Lit}, +}; +use std::{marker::PhantomData, ops::Index}; + +mod expr; +pub use expr::*; + +pub trait Parsable: Sized + Node { + fn parse(ctx: &mut ParseCtx) -> Result; +} + +pub struct ParseCtx<'a> { + start: usize, + cursor: Cursor<'a>, + nodes: Nodes, +} + +def_nodes!( + exprs: Expr, + idents: Ident, + statements: Statement, + lits: Lit, +); + +impl Nodes { + pub fn parse_root(path: &str, output: &mut CompilerOutput) -> Option<(Self, Id)> { + let root_code = match std::fs::read_to_string(path) { + Ok(code) => code, + Err(err) => { + output.error(format!("Failed to read input file: {err}")); + return None; + } + }; + output.files.push(path.to_string()); + let nodes = Self::default(); + let mut ctx = ParseCtx { + start: 0, + nodes, + cursor: Cursor::new(&root_code, 0), + }; + let root = match ctx.parse::() { + Ok(expr) => expr, + Err(msg) => { + output.error(msg); + return None; + } + }; + Some((ctx.nodes, root)) + } + + pub fn format( + &self, + w: &mut impl std::io::Write, + id: Id, + ) -> std::io::Result<()> { + self[id].format(w, self) + } +} + +impl Index> for Nodes { + type Output = N; + + fn index(&self, index: Id) -> &Self::Output { + &N::vec(self).vec[index.id] + } +} + +impl Index<&Id> for Nodes { + type Output = N; + + fn index(&self, index: &Id) -> &Self::Output { + &N::vec(self).vec[index.id] + } +} + +#[derive(Debug)] +pub struct NodeVec { + vec: Vec, + spans: Vec, +} + +impl NodeVec { + pub fn add(&mut self, v: N, span: Span) -> Id { + let id = self.vec.len(); + self.vec.push(v); + self.spans.push(span); + Id { + id, + _pd: PhantomData, + } + } +} + +impl Default for NodeVec { + fn default() -> Self { + Self { + vec: Default::default(), + spans: Default::default(), + } + } +} + +pub struct Id { + id: usize, + _pd: PhantomData, +} + +impl Clone for Id { + fn clone(&self) -> Self { + *self + } +} + +impl Copy for Id {} + +pub trait Node: Sized { + fn vec(nodes: &Nodes) -> &NodeVec; + fn vec_mut(nodes: &mut Nodes) -> &mut NodeVec; +} + +impl ParseCtx<'_> { + pub fn parse(&mut self) -> Result, CompilerMsg> { + let old_start = self.start; + self.start = self.cursor.peek_start(); + let res = P::parse(self).map(|r| self.push(r)); + self.start = old_start; + res + } + + pub fn ident(&mut self, s: String) -> Id { + let span = self.cursor.span; + self.nodes.idents.add(Ident { inner: s }, span) + } + pub fn lit(&mut self, lit: Lit) -> Id { + let span = self.cursor.span; + self.nodes.lits.add(lit, span) + } + pub fn push_adv(&mut self, node: N) -> Id { + let res = self.push(node); + self.cursor.next(); + res + } + pub fn push(&mut self, node: N) -> Id { + let end = self.cursor.cur_end(); + N::vec_mut(&mut self.nodes).add( + node, + Span { + file: self.cursor.file(), + start: self.start, + end, + }, + ) + } +} + +macro_rules! def_nodes { + ($($field:ident: $ty:ident,)*) => { + #[derive(Default)] + pub struct Nodes { + $($field: NodeVec<$ty>,)* + } + + $(impl Node for $ty { + fn vec(nodes: &Nodes) -> &NodeVec { + &nodes.$field + } + fn vec_mut(nodes: &mut Nodes) -> &mut NodeVec { + &mut nodes.$field + } + })* + }; +} +use def_nodes; + +pub trait FmtNode: Node { + fn format(&self, w: &mut impl std::io::Write, nodes: &Nodes) -> std::io::Result<()>; +} diff --git a/test/main.lang b/test/main.lang index e19844e..f0974a1 100644 --- a/test/main.lang +++ b/test/main.lang @@ -1 +1 @@ -let x = 3; +x =/ arst -3