diff --git a/src/backend/arch/mod.rs b/src/backend/arch/mod.rs new file mode 100644 index 0000000..2a99bf5 --- /dev/null +++ b/src/backend/arch/mod.rs @@ -0,0 +1 @@ +pub mod x86_64; diff --git a/src/backend/arch/x86_64.rs b/src/backend/arch/x86_64.rs new file mode 100644 index 0000000..0680178 --- /dev/null +++ b/src/backend/arch/x86_64.rs @@ -0,0 +1,63 @@ +pub struct Reg(u8); + +pub struct RegMode { + reg: Reg, + mode: BitMode, +} + +macro_rules! def_regs { + ($($val:literal $reg:ident: $B64:literal $B32:literal $B16:literal $B8:literal $($B16H:literal)?,)*) => { + impl Reg { + $( + pub const $reg: u8 = $val; + )* + } + impl RegMode { + pub fn parse(s: &str) -> Option { + let (reg, mode) = match s.to_lowercase().as_str() { + $( + $B64 => ($val, BitMode::B64), + $B32 => ($val, BitMode::B32), + $B16 => ($val, BitMode::B16), + $B8 => ($val, BitMode::B8), + $($B16H => ($val, BitMode::B16H),)? + )* + _ => return None, + }; + Some(RegMode { + reg: Reg(reg), + mode, + }) + } + } + }; +} + +def_regs! { + 0b0000 A : "rax" "eax" "ax" "al" "ah", + 0b0001 C : "rcx" "ecx" "cx" "cl" "ch", + 0b0010 D : "rdx" "edx" "dx" "dl" "dh", + 0b0011 B : "rbx" "ebx" "bx" "bl" "bh", + + 0b0100 SP: "rsp" "esp" "sp" "spl", + 0b0101 BP: "rbp" "ebp" "bp" "sbl", + 0b0110 SI: "rsi" "esi" "si" "sil", + 0b0111 DI: "rdi" "edi" "di" "dil", + + 0b1000 R8 : "r8" "r8d" "r8w" "r8b", + 0b1001 R9 : "r9" "r9d" "r9w" "r9b", + 0b1010 R10: "r10" "r10d" "r10w" "r10b", + 0b1011 R11: "r11" "r11d" "r11w" "r11b", + 0b1100 R12: "r12" "r12d" "r12w" "r12b", + 0b1101 R13: "r13" "r13d" "r13w" "r13b", + 0b1110 R14: "r14" "r14d" "r14w" "r14b", + 0b1111 R15: "r15" "r15d" "r15w" "r15b", +} + +pub enum BitMode { + B64, + B32, + B16, + B16H, + B8, +} diff --git a/src/backend/mod.rs b/src/backend/mod.rs new file mode 100644 index 0000000..0374a92 --- /dev/null +++ b/src/backend/mod.rs @@ -0,0 +1 @@ +pub mod arch; diff --git a/src/io/mod.rs b/src/io/mod.rs index beab14f..3f657e1 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -122,6 +122,15 @@ impl From for CompilerMsg { } } +impl From<&str> for CompilerMsg { + fn from(msg: &str) -> Self { + Self { + spans: Vec::new(), + msg: msg.to_string(), + } + } +} + impl> From<(S, Span)> for CompilerMsg { fn from((msg, span): (S, Span)) -> Self { Self { diff --git a/src/ir/structs/mod.rs b/src/ir/structs/mod.rs index 22a9d66..e5bb04e 100644 --- a/src/ir/structs/mod.rs +++ b/src/ir/structs/mod.rs @@ -16,16 +16,9 @@ pub struct Statement { } pub enum StatementTy { - Define, - Assign { - target: VarId, - ty: TypeId, - val: VarId, - }, - Call { - target: VarId, - args: Vec, - }, + Define { target: VarId, val: VarId }, + Assign { target: VarId, val: VarId }, + Call { target: VarId, args: Vec }, } pub struct Var { @@ -36,6 +29,7 @@ pub struct Var { pub enum Type { Unsigned(u8), Signed(u8), + Infer, } pub type VarId = u32; diff --git a/src/main.rs b/src/main.rs index 1a1a346..f5fa2d2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ use crate::{io::CompilerOutput, parser_ir::parse_program}; +mod backend; mod io; mod ir; mod parser; diff --git a/src/parser/cursor/token.rs b/src/parser/cursor/token.rs index 5d3b4df..3e02570 100644 --- a/src/parser/cursor/token.rs +++ b/src/parser/cursor/token.rs @@ -38,6 +38,7 @@ def_tokens! { For: "for", Match: "match", Break: "break", + Asm: "asm", } other { Ident(String), diff --git a/src/parser/nodes/asm/mod.rs b/src/parser/nodes/asm/mod.rs new file mode 100644 index 0000000..4202cef --- /dev/null +++ b/src/parser/nodes/asm/mod.rs @@ -0,0 +1,20 @@ +use crate::parser::{Node, cursor::Token}; + +pub mod x86_64; + +pub enum AsmBlock { + X86_64(x86_64::Asm), +} + +impl Node for AsmBlock { + fn parse(ctx: &mut crate::parser::ParseCtx) -> Result { + ctx.expect(Token::OpenCurly)?; + let asm = ctx.parse()?; + ctx.expect(Token::CloseCurly)?; + Ok(Self::X86_64(asm)) + } + + fn fmt(&self, f: &mut std::fmt::Formatter, ctx: crate::parser::DisplayCtx) -> std::fmt::Result { + write!(f, "asm {{ ... }}") + } +} diff --git a/src/parser/nodes/asm/x86_64.rs b/src/parser/nodes/asm/x86_64.rs new file mode 100644 index 0000000..f2b8d49 --- /dev/null +++ b/src/parser/nodes/asm/x86_64.rs @@ -0,0 +1,89 @@ +use crate::{ + backend::arch::x86_64::RegMode, + io::{CompilerMsg, Span}, + parser::{ + Node, + cursor::{LitTy, Token}, + }, +}; + +pub struct Asm { + instrs: Vec, +} + +pub enum Instr { + Mov { dst: RegMode, src: RegImm }, + Int { code: u64 }, +} + +pub enum RegImm { + Reg(RegMode), + Imm(u64), +} + +impl Node for Asm { + fn parse(ctx: &mut crate::parser::ParseCtx) -> Result { + let mut instrs = Vec::new(); + while let Some(Token::Ident(next)) = ctx.peek() { + match next.as_str() { + "mov" => { + ctx.next(); + let dst = parse_reg(ctx)?; + ctx.expect(Token::Comma)?; + let src = parse_rmi(ctx)?; + instrs.push(Instr::Mov { dst, src }); + } + "int" => { + ctx.next(); + let Token::Lit(LitTy::Number(num)) = ctx.expect_next()? else { + return Err("Expected an immediate".into()); + }; + let code = parse_imm(&num, ctx.span)?; + instrs.push(Instr::Int { code }); + } + _ => { + let msg = format!("Unknown instruction {next}"); + ctx.next(); + return Err(CompilerMsg { + msg, + spans: vec![ctx.span], + }); + } + } + } + Ok(Self { instrs }) + } + + fn fmt(&self, f: &mut std::fmt::Formatter, ctx: crate::parser::DisplayCtx) -> std::fmt::Result { + todo!() + } +} + +pub fn parse_imm(mut s: &str, span: Span) -> Result { + let mut radix = 10; + if s.starts_with("0x") { + radix = 16; + s = &s[2..]; + } + u64::from_str_radix(s, radix) + .map_err(|_| CompilerMsg::from(("invalid immediate", span))) +} + +pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result { + let next = ctx.expect_next()?; + let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register or immediate"); + Ok(match &next { + Token::Ident(ident) => RegImm::Reg(RegMode::parse(ident).ok_or_else(err)?), + Token::Lit(LitTy::Number(num)) => RegImm::Imm(parse_imm(num, ctx.span)?), + _ => return Err(err()), + }) +} + +pub fn parse_reg(ctx: &mut crate::parser::ParseCtx) -> Result { + let next = ctx.expect_next()?; + let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register"); + let Token::Ident(next) = &next else { + return Err(err()); + }; + RegMode::parse(next).ok_or_else(err) +} diff --git a/src/parser/nodes/expr.rs b/src/parser/nodes/expr.rs index 90c6b28..6ca2913 100644 --- a/src/parser/nodes/expr.rs +++ b/src/parser/nodes/expr.rs @@ -45,6 +45,7 @@ pub enum ExprTy { Import(Ident), Fn(Box), Break, + Asm(AsmBlock), } impl Node for Expr { @@ -162,6 +163,7 @@ impl ExprTy { Self::Break => { write!(f, "break") } + Self::Asm(asm) => asm.fmt(f, ctx), } } } @@ -216,6 +218,7 @@ impl Expr { let ident = ctx.parse()?; ExprTy::Import(ident) } + Token::Asm => ExprTy::Asm(ctx.parse()?), other => return ctx.unexpected(other, "an expression"), }; Ok(Self { @@ -258,7 +261,7 @@ impl Expr { | ExprTy::If { body, .. } | ExprTy::Negate(body) | ExprTy::Assign { val: body, .. } => body.ends_with_block(), - | ExprTy::Define { val: body, .. } => body.ends_with_block(), + ExprTy::Define { val: body, .. } => body.ends_with_block(), ExprTy::Fn(f) => f.ends_with_block(), _ => false, } diff --git a/src/parser/nodes/mod.rs b/src/parser/nodes/mod.rs index 6c3f9cd..240ec2b 100644 --- a/src/parser/nodes/mod.rs +++ b/src/parser/nodes/mod.rs @@ -1,3 +1,4 @@ +mod asm; mod body; mod expr; mod func; @@ -5,6 +6,7 @@ mod ident; mod param; mod struct_; mod ty; +pub use asm::*; pub use body::*; pub use expr::*; pub use func::*; diff --git a/test/hello.lang b/test/hello.lang new file mode 100644 index 0000000..11ae5e5 --- /dev/null +++ b/test/hello.lang @@ -0,0 +1,5 @@ +asm { + mov eax, 1 + mov ebx, 39 + int 0x80 +}