give immediates a sign & fix stuff

This commit is contained in:
2026-06-12 17:08:42 -04:00
parent e199620856
commit 7280f7b071
7 changed files with 282 additions and 150 deletions
+37 -88
View File
@@ -1,7 +1,5 @@
use crate::backend::Symbol;
use super::*; use super::*;
use util::*; use crate::backend::Symbol;
type ERes = Result<(), CompilerMsg>; type ERes = Result<(), CompilerMsg>;
@@ -12,30 +10,6 @@ pub struct Code {
pub(super) missing: Vec<(usize, Symbol)>, pub(super) missing: Vec<(usize, Symbol)>,
} }
#[derive(Clone, Copy)]
pub struct Mem {
pub reg: Reg,
pub disp: i32,
pub width: Width,
}
#[derive(Clone, Copy)]
pub enum RegImmMem {
Reg(Reg),
Imm(u64),
Mem(Mem),
}
#[derive(Clone, Copy)]
pub enum RegMem {
Reg(Reg),
Mem(Mem),
}
pub fn mem(reg: Reg, disp: i32, width: Width) -> Mem {
Mem { reg, disp, width }
}
impl Code { impl Code {
pub fn mov(&mut self, dst: impl Into<RegMem>, src: impl Into<RegImmMem>) -> ERes { pub fn mov(&mut self, dst: impl Into<RegMem>, src: impl Into<RegImmMem>) -> ERes {
let dst = dst.into(); let dst = dst.into();
@@ -58,42 +32,56 @@ impl Code {
self.bytes.push(modrm_regs(src, dst)); self.bytes.push(modrm_regs(src, dst));
} }
RegImmMem::Imm(src) => { RegImmMem::Imm(src) => {
let src_width = Width::fit(src); let src_width = src.width_unsigned()?;
if src_width > dst.width() { if src_width > dst.width() {
return Err("immediate cannot fit in register".into()); return Err("immediate cannot fit in register".into());
} }
self.prefix16(dst); self.prefix16(dst);
if src_width <= Width::B32 { if dst.width() == Width::B64 && src_width <= Width::B32 && src.0 < 0 {
dst.lower64(); self.bytes
.extend([rex(dst.width(), 0, 0, dst), 0xc7, 0xc0 | dst.base()]);
self.imm(src, Width::B32);
} else {
if src_width <= Width::B32 {
dst = dst.lower64();
}
if dst.requires_rex() {
self.bytes.push(rex(dst.width(), 0, 0, dst));
}
let opcode = 0xb0 | ((dst.width().gt8() as u8) << 3);
self.bytes.push(opcode | dst.base());
self.imm(src, dst.width());
} }
if dst.requires_rex() {
self.bytes.push(rex(dst.width(), 0, 0, dst));
}
let opcode = 0xb0 | ((dst.width().gt8() as u8) << 3);
self.bytes.push(opcode | dst.base());
self.bytes.extend(&src.to_le_bytes()[..dst.width().bytes()]);
} }
RegImmMem::Mem(src) => todo!(), RegImmMem::Mem(src) => todo!(),
}, },
RegMem::Mem(dst) => match src { RegMem::Mem(dst) => match src {
RegImmMem::Reg(src) => todo!(), RegImmMem::Reg(src) => todo!(),
RegImmMem::Imm(src) => { RegImmMem::Imm(src) => {
let src_width = Width::fit(src); let encode_width = dst.width.min(Width::B32);
let src_width = if dst.width == Width::B64 {
src.width_signed()
} else {
src.width_unsigned()
}?;
if src_width == Width::B64 { if src_width == Width::B64 {
return Err("cannot move 64 bit immediate into memory".into()); return Err("cannot move 64 bit immediate into memory".into());
} }
if src_width > dst.width {
return Err("source cannot fit in destination".into());
}
match dst.reg.width() { match dst.reg.width() {
Width::B8 | Width::B16 => return Err("invalid register width".into()), Width::B8 | Width::B16 => return Err("invalid register width".into()),
Width::B32 => self.bytes.push(0x67), Width::B32 => self.bytes.push(0x67),
Width::B64 => (), Width::B64 => (),
} }
self.prefix16(src_width); self.prefix16(encode_width);
if dst.reg.requires_mem_rex() { if dst.reg.requires_mem_rex() || dst.width == Width::B64 {
self.bytes.push(rex(src_width, 0, 0, dst.reg)); self.bytes.push(rex(dst.width, 0, 0, dst.reg));
} }
self.bytes.push(0xc6 | (src_width != Width::B8) as u8); self.bytes.push(0xc6 | (encode_width != Width::B8) as u8);
self.modrm_regdisp(dst.reg, dst.disp); self.modrm_regdisp(dst.reg, dst.disp);
self.bytes.extend(&src.to_le_bytes()[..src_width.bytes()]); self.imm(src, encode_width);
} }
RegImmMem::Mem(_) => return Err("cannot move memory to memory".into()), RegImmMem::Mem(_) => return Err("cannot move memory to memory".into()),
}, },
@@ -113,14 +101,14 @@ impl Code {
Width::B16 => {} Width::B16 => {}
_ => return Err("register must be 64 or 16 bit".into()), _ => return Err("register must be 64 or 16 bit".into()),
}, },
RegImmMem::Imm(imm) => match Width::fit(imm) { RegImmMem::Imm(imm) => match imm.width_unsigned()? {
Width::B8 => { Width::B8 => {
self.bytes.push(0x6a); self.bytes.push(0x6a);
self.bytes.push(imm as u8); self.bytes.push(imm.0 as u8);
} }
Width::B16 | Width::B32 => { Width::B16 | Width::B32 => {
self.bytes.push(0x68); self.bytes.push(0x68);
self.bytes.extend((imm as u32).to_le_bytes()); self.bytes.extend((imm.0 as u32).to_le_bytes());
} }
Width::B64 => return Err("immediate must be 32 bit or less".into()), Width::B64 => return Err("immediate must be 32 bit or less".into()),
}, },
@@ -215,6 +203,10 @@ impl Code {
self.missing self.missing
.extend(other.missing.iter().map(|&(p, s)| (pos + p, s))); .extend(other.missing.iter().map(|&(p, s)| (pos + p, s)));
} }
fn imm(&mut self, imm: Imm, width: Width) {
self.bytes.extend(&imm.0.to_le_bytes()[..width.bytes()]);
}
} }
pub fn encode(f: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>) -> Result<Code, CompilerMsg> { pub fn encode(f: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>) -> Result<Code, CompilerMsg> {
@@ -222,46 +214,3 @@ pub fn encode(f: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>) -> Result<Co
f(&mut code)?; f(&mut code)?;
Ok(code) Ok(code)
} }
// fromrot
impl From<Reg> for RegImmMem {
fn from(value: Reg) -> Self {
Self::Reg(value)
}
}
impl From<Reg> for RegMem {
fn from(value: Reg) -> Self {
Self::Reg(value)
}
}
impl From<Mem> for RegImmMem {
fn from(value: Mem) -> Self {
Self::Mem(value)
}
}
impl From<Mem> for RegMem {
fn from(value: Mem) -> Self {
Self::Mem(value)
}
}
impl From<u64> for RegImmMem {
fn from(value: u64) -> Self {
Self::Imm(value)
}
}
impl From<i64> for RegImmMem {
fn from(value: i64) -> Self {
Self::Imm(value as u64)
}
}
impl From<i32> for RegImmMem {
fn from(value: i32) -> Self {
Self::Imm(value as u64)
}
}
+3
View File
@@ -2,6 +2,7 @@ mod compile;
mod encode; mod encode;
mod reg; mod reg;
mod test; mod test;
mod types;
mod util; mod util;
use crate::{ use crate::{
@@ -14,6 +15,8 @@ pub use compile::*;
pub use encode::*; pub use encode::*;
pub use reg::*; pub use reg::*;
pub use test::bin::run as bin_test; pub use test::bin::run as bin_test;
pub use types::*;
use util::*;
pub struct X86_64; pub struct X86_64;
+9 -28
View File
@@ -52,8 +52,10 @@ impl Reg {
} }
/// if self has 64 bit width, changes width to 32 bit /// if self has 64 bit width, changes width to 32 bit
pub fn lower64(&mut self) { pub fn lower64(&self) -> Self {
self.width.lower64() let mut new = *self;
new.width = new.width.min(Width::B32);
new
} }
pub fn requires_rex(&self) -> bool { pub fn requires_rex(&self) -> bool {
@@ -76,7 +78,7 @@ impl Reg {
} }
impl Width { impl Width {
pub const fn max(&self) -> u64 { pub const fn max_val(&self) -> u64 {
match self { match self {
Self::B64 => u64::MAX, Self::B64 => u64::MAX,
Self::B32 => u32::MAX as u64, Self::B32 => u32::MAX as u64,
@@ -85,10 +87,8 @@ impl Width {
} }
} }
pub fn lower64(&mut self) { pub fn min(self, other: Self) -> Self {
if matches!(self, Width::B64) { if self <= other { self } else { other }
*self = Width::B32;
}
} }
pub const fn bytes(&self) -> usize { pub const fn bytes(&self) -> usize {
@@ -100,27 +100,6 @@ impl Width {
} }
} }
pub const fn fit(val: u64) -> Self {
const B8: u64 = 1 << 8;
const B16: u64 = 1 << 16;
const B32: u64 = 1 << 32;
match val {
..B8 => Self::B8,
B8..B16 => Self::B16,
B16..B32 => Self::B32,
B32.. => Self::B64,
}
}
pub const fn fiti(val: u64) -> Self {
match val {
..0x80 => Self::B8,
0x80..0x8000 => Self::B16,
0x8000..0x8000_0000 => Self::B32,
0x8000_0000.. => Self::B64,
}
}
/// greater than 8 bits /// greater than 8 bits
pub const fn gt8(&self) -> bool { pub const fn gt8(&self) -> bool {
!matches!(self, Self::B8) !matches!(self, Self::B8)
@@ -205,6 +184,8 @@ macro_rules! def_regs {
use def_regs; use def_regs;
use crate::arch::x86_64::Imm;
impl From<Reg> for Width { impl From<Reg> for Width {
fn from(value: Reg) -> Self { fn from(value: Reg) -> Self {
value.width value.width
+41 -32
View File
@@ -11,40 +11,29 @@ const DISPS: &[i32] = &[
i32::MAX, i32::MAX,
]; ];
const IMMS: &[u64] = &[ const IMMS: &[i128] = &[
0x0, 0x0,
u8::MAX as u64, i8::MIN as i128,
u8::MAX as u64 + 1, i8::MAX as i128,
u16::MAX as u64, i16::MIN as i128,
u16::MAX as u64 + 1, i16::MAX as i128,
u32::MAX as u64, i32::MIN as i128,
u32::MAX as u64 + 1, i32::MAX as i128,
// nasm likes to think u64::MAX is -1i32 for some reason i64::MIN as i128,
i64::MAX as u64, i64::MAX as i128,
u8::MAX as i128,
u8::MAX as i128 + 1,
u16::MAX as i128,
u16::MAX as i128 + 1,
u32::MAX as i128,
u32::MAX as i128 + 1,
i64::MAX as i128,
]; ];
const WIDTHS: &[Width] = &[Width::B8, Width::B16, Width::B32, Width::B64];
#[test] #[test]
fn mov() { fn mov() {
for &reg in Reg::IMPORTANT {
for &disp in DISPS {
for &imm in IMMS {
let width = Width::fit(imm);
let size = match width {
Width::B8 => "BYTE",
Width::B16 => "WORD",
Width::B32 => "DWORD",
Width::B64 => "QWORD",
};
let ddisp = (disp as i64).abs();
let sign = if disp < 0 { '-' } else { '+' };
eq!(
format!("mov {size} [{reg}{sign}0x{ddisp:x}], 0x{imm:x}"),
mov(mem(reg, disp, width), imm)
);
}
}
}
for &r1 in Reg::IMPORTANT { for &r1 in Reg::IMPORTANT {
for &r2 in Reg::IMPORTANT { for &r2 in Reg::IMPORTANT {
eq!(format!("mov {r1}, {r2}"), mov(r1, r2)); eq!(format!("mov {r1}, {r2}"), mov(r1, r2));
@@ -53,9 +42,29 @@ fn mov() {
for &r1 in Reg::IMPORTANT { for &r1 in Reg::IMPORTANT {
for &imm in IMMS { for &imm in IMMS {
eq!(format!("mov {r1}, 0x{imm:x}"), mov(r1, imm)); eq!(format!("mov {r1}, {imm}"), mov(r1, imm));
} }
} }
for &reg in Reg::IMPORTANT {
for &disp in DISPS {
for &imm in IMMS {
for &width in WIDTHS {
let mem = mem(reg, disp, width);
eq!(format!("mov {mem}, {imm}"), mov(mem, imm));
}
}
}
}
}
fn u_to_i(u: u64) -> i64 {
match u {
0..0x100 => u as i8 as i64,
0x100..0x10000 => u as i16 as i64,
0x10000..0x100000000 => u as i32 as i64,
0x100000000.. => u as i64,
}
} }
macro_rules! eq { macro_rules! eq {
@@ -65,8 +74,8 @@ macro_rules! eq {
let mut code = Code::default(); let mut code = Code::default();
let res = code.$instr $args; let res = code.$instr $args;
match (expected, res) { match (expected, res) {
(Ok(_), Err(e)) => { (Ok(expected), Err(e)) => {
panic!("{asm}: failed to compile: {}", e.msg); panic!("{asm}: failed to compile: {}\nexpected: {expected:x?}", e.msg);
} }
(Err(e), Ok(_)) => { (Err(e), Ok(_)) => {
let res = &code.bytes[..]; let res = &code.bytes[..];
+162
View File
@@ -0,0 +1,162 @@
use std::num::TryFromIntError;
use super::*;
#[derive(Clone, Copy)]
pub struct Mem {
pub reg: Reg,
pub disp: i32,
pub width: Width,
}
#[derive(Clone, Copy)]
pub enum RegImmMem {
Reg(Reg),
Imm(Imm),
Mem(Mem),
}
#[derive(Clone, Copy)]
pub enum RegMem {
Reg(Reg),
Mem(Mem),
}
#[derive(Clone, Copy, PartialEq, PartialOrd)]
pub struct Imm(pub i128);
pub fn mem(reg: Reg, disp: i32, width: Width) -> Mem {
Mem { reg, disp, width }
}
impl Imm {
pub fn overflow_msg() -> CompilerMsg {
"immediate overflow".into()
}
pub fn width_signed(&self) -> Result<Width, CompilerMsg> {
Ok(match self.0 {
-0x80..=0x7f => Width::B8,
-0x8000..=0x7fff => Width::B16,
-0x8000_0000..=0x7fff_ffff => Width::B32,
-0x8000_0000_0000_0000..=0x7fff_ffff_ffff_ffff => Width::B64,
_ => return Err(Self::overflow_msg()),
})
}
pub fn width_unsigned(&self) -> Result<Width, CompilerMsg> {
Ok(match self.0 {
-0xff..=0xff => Width::B8,
-0xffff..=0xffff => Width::B16,
-0xffff_ffff..=0xffff_ffff => Width::B32,
-0xffff_ffff_ffff_ffff..=0xffff_ffff_ffff_ffff => Width::B64,
_ => return Err(Self::overflow_msg()),
})
}
}
impl TryFrom<Imm> for u8 {
type Error = TryFromIntError;
fn try_from(value: Imm) -> Result<Self, Self::Error> {
value.0.try_into()
}
}
impl std::fmt::Display for Mem {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Mem { reg, disp, width } = *self;
let size = match width {
Width::B8 => "BYTE",
Width::B16 => "WORD",
Width::B32 => "DWORD",
Width::B64 => "QWORD",
};
write!(f, "{size} [{reg} {}]", signed_hex(disp as i128, true))
}
}
// fromrot
impl From<Reg> for RegImmMem {
fn from(value: Reg) -> Self {
Self::Reg(value)
}
}
impl From<Reg> for RegMem {
fn from(value: Reg) -> Self {
Self::Reg(value)
}
}
impl From<Mem> for RegImmMem {
fn from(value: Mem) -> Self {
Self::Mem(value)
}
}
impl From<Mem> for RegMem {
fn from(value: Mem) -> Self {
Self::Mem(value)
}
}
impl From<u64> for RegImmMem {
fn from(value: u64) -> Self {
Self::Imm(value.into())
}
}
impl From<i64> for RegImmMem {
fn from(value: i64) -> Self {
Self::Imm(value.into())
}
}
impl From<i32> for RegImmMem {
fn from(value: i32) -> Self {
Self::Imm(value.into())
}
}
impl From<i128> for RegImmMem {
fn from(value: i128) -> Self {
Self::Imm(value.into())
}
}
impl From<u64> for Imm {
fn from(value: u64) -> Self {
Self(value as i128)
}
}
impl From<i64> for Imm {
fn from(value: i64) -> Self {
Self(value as i128)
}
}
impl From<i32> for Imm {
fn from(value: i32) -> Self {
Self(value as i128)
}
}
impl From<i128> for Imm {
fn from(value: i128) -> Self {
Self(value)
}
}
impl std::fmt::Display for Imm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl std::fmt::Debug for Imm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
+20
View File
@@ -59,3 +59,23 @@ impl RexBit for Width {
self == Width::B64 self == Width::B64
} }
} }
pub struct SignedHex {
pub val: i128,
pub op: bool,
}
pub fn signed_hex(val: i128, op: bool) -> SignedHex {
SignedHex { val, op }
}
impl std::fmt::Display for SignedHex {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let dsp = self.val.abs();
let sign = match (self.op, self.val < 0) {
(true, true) => "- ",
(true, false) => "+ ",
(false, true) => "-",
(false, false) => "",
};
write!(f, "{sign}0x{dsp:x}")
}
}
+10 -2
View File
@@ -47,13 +47,21 @@ impl Node for Code {
} }
} }
pub fn parse_imm(mut s: &str, span: Span) -> Result<u64, CompilerMsg> { pub fn parse_imm(mut s: &str, span: Span) -> Result<Imm, CompilerMsg> {
let mut radix = 10; let mut radix = 10;
let mut mult = 1;
if s.starts_with('-') {
mult = -1;
s = &s[1..];
}
if s.starts_with("0x") { if s.starts_with("0x") {
radix = 16; radix = 16;
s = &s[2..]; s = &s[2..];
} }
u64::from_str_radix(s, radix).map_err(|_| CompilerMsg::from(("invalid immediate", span))) let abs = u64::from_str_radix(s, radix)
.map_err(|_| CompilerMsg::from(("invalid immediate", span)))?;
let val = (abs as i128) * mult;
Ok(Imm(val))
} }
pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result<RegImmMem, CompilerMsg> { pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result<RegImmMem, CompilerMsg> {