From ea88ffc451fbee92614d3da9c669d1076ccefa61 Mon Sep 17 00:00:00 2001 From: Jeff Date: Wed, 6 Nov 2024 18:39:09 -0500 Subject: [PATCH] Move the disassembler to its own module --- dust-lang/src/chunk.rs | 369 +--------------------------------- dust-lang/src/disassembler.rs | 366 +++++++++++++++++++++++++++++++++ dust-lang/src/lib.rs | 4 +- 3 files changed, 373 insertions(+), 366 deletions(-) create mode 100644 dust-lang/src/disassembler.rs diff --git a/dust-lang/src/chunk.rs b/dust-lang/src/chunk.rs index 1fda552..ef19e28 100644 --- a/dust-lang/src/chunk.rs +++ b/dust-lang/src/chunk.rs @@ -3,47 +3,15 @@ //! A chunk consists of a sequence of instructions and their positions, a list of constants, and a //! list of locals that can be executed by the Dust virtual machine. Chunks have a name when they //! belong to a named function. -//! -//! # Disassembly -//! -//! Chunks can be disassembled into a human-readable format using the `disassemble` method. The -//! output is designed to be displayed in a terminal and can be styled for better readability. -//! -//! ```text -//! ┌──────────────────────────────────────────────────────────────────────────────┐ -//! │ /var/home/jeff/Repositories/dust/target/debug/dust-shell │ -//! │ 3 instructions, 1 constants, 0 locals, returns none │ -//! │ Instructions │ -//! │ ------------ │ -//! │ INDEX BYTECODE OPERATION INFO TYPE POSITION │ -//! │ ----- -------- ------------- ------------------------- --------- ----------- │ -//! │ 0 00000003 LOAD_CONSTANT R0 = C0 str (11, 26) │ -//! │ 1 01390117 CALL_NATIVE write_line(R0) (0, 27) │ -//! │ 2 00000018 RETURN (27, 27) │ -//! │┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈│ -//! │ Locals │ -//! │ ------ │ -//! │ INDEX IDENTIFIER TYPE MUTABLE SCOPE REGISTER │ -//! │ ----- ---------- -------- ------- ------- -------- │ -//! │┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈│ -//! │ Constants │ -//! │ --------- │ -//! │ INDEX VALUE │ -//! │ ----- --------------- │ -//! │ 0 Hello, world! │ -//! └──────────────────────────────────────────────────────────────────────────────┘ -//! ``` use std::{ cmp::Ordering, - env::current_exe, fmt::{self, Debug, Display, Formatter}, }; -use colored::Colorize; use serde::{Deserialize, Serialize}; -use crate::{ConcreteValue, Instruction, Operation, Span, Type, Value}; +use crate::{Disassembler, Instruction, Operation, Span, Type, Value}; /// In-memory representation of a Dust program or function. /// @@ -299,8 +267,8 @@ impl Chunk { } } - pub fn disassembler(&self) -> ChunkDisassembler { - ChunkDisassembler::new(self) + pub fn disassembler(&self) -> Disassembler { + Disassembler::new(self) } } @@ -407,336 +375,7 @@ impl Display for Scope { } } -/// Builder that constructs a human-readable representation of a chunk. -pub struct ChunkDisassembler<'a> { - output: String, - chunk: &'a Chunk, - source: Option<&'a str>, - - // Options - width: usize, - styled: bool, - indent: usize, -} - -impl<'a> ChunkDisassembler<'a> { - const INSTRUCTION_HEADER: [&'static str; 4] = [ - "Instructions", - "------------", - " i BYTECODE OPERATION INFO TYPE POSITION ", - "--- -------- ------------- -------------------- --------------- ------------", - ]; - - const CONSTANT_HEADER: [&'static str; 4] = [ - "Constants", - "---------", - "INDEX VALUE ", - "----- ---------------", - ]; - - const LOCAL_HEADER: [&'static str; 4] = [ - "Locals", - "------", - "INDEX IDENTIFIER TYPE MUTABLE SCOPE REGISTER", - "----- ---------- ---------- ------- ------- --------", - ]; - - /// The default width of the disassembly output. To correctly align the output, this should - /// return the width of the longest line that the disassembler is guaranteed to produce. - pub fn default_width() -> usize { - let longest_line = Self::INSTRUCTION_HEADER[3]; - - longest_line.chars().count().max(80) - } - - pub fn new(chunk: &'a Chunk) -> Self { - Self { - output: String::new(), - chunk, - source: None, - width: Self::default_width(), - styled: false, - indent: 0, - } - } - - pub fn source(mut self, source: &'a str) -> Self { - self.source = Some(source); - - self - } - - pub fn width(mut self, width: usize) -> Self { - self.width = width; - - self - } - - pub fn styled(mut self, styled: bool) -> Self { - self.styled = styled; - - self - } - - pub fn indent(mut self, indent: usize) -> Self { - self.indent = indent; - - self - } - - fn push( - &mut self, - text: &str, - center: bool, - style_bold: bool, - style_dim: bool, - add_border: bool, - ) { - let characters = text.chars().collect::>(); - let content_width = if add_border { - self.width - 2 - } else { - self.width - }; - let (line_characters, remainder) = characters - .split_at_checked(content_width) - .unwrap_or((characters.as_slice(), &[])); - let (left_pad_length, right_pad_length) = { - let extra_space = content_width.saturating_sub(characters.len()); - - if center { - (extra_space / 2, extra_space / 2 + extra_space % 2) - } else { - (0, extra_space) - } - }; - let content = if style_bold { - line_characters - .iter() - .collect::() - .bold() - .to_string() - } else if style_dim { - line_characters - .iter() - .collect::() - .dimmed() - .to_string() - } else { - line_characters.iter().collect::() - }; - let length_before_content = self.output.chars().count(); - - for _ in 0..self.indent { - self.output.push_str("│ "); - } - - if add_border { - self.output.push('│'); - } - - self.output.push_str(&" ".repeat(left_pad_length)); - self.output.push_str(&content); - self.output.push_str(&" ".repeat(right_pad_length)); - - let length_after_content = self.output.chars().count(); - let line_length = length_after_content - length_before_content; - - if line_length < content_width - 1 { - self.output - .push_str(&" ".repeat(content_width - line_length)); - } - - if add_border { - self.output.push('│'); - } - - self.output.push('\n'); - - if !remainder.is_empty() { - self.push( - remainder.iter().collect::().as_str(), - center, - style_bold, - style_dim, - add_border, - ); - } - } - - fn push_header(&mut self, header: &str) { - self.push(header, true, self.styled, false, true); - } - - fn push_details(&mut self, details: &str) { - self.push(details, true, false, false, true); - } - - fn push_border(&mut self, border: &str) { - self.push(border, false, false, false, false); - } - - fn push_empty(&mut self) { - self.push("", false, false, false, true); - } - - pub fn disassemble(mut self) -> String { - let top_border = "┌".to_string() + &"─".repeat(self.width - 2) + "┐"; - let section_border = "│".to_string() + &"┈".repeat(self.width - 2) + "│"; - let bottom_border = "└".to_string() + &"─".repeat(self.width - 2) + "┘"; - let name_display = self - .chunk - .name - .as_ref() - .map(|identifier| identifier.to_string()) - .unwrap_or_else(|| { - current_exe() - .map(|path| path.to_string_lossy().to_string()) - .unwrap_or("Chunk Disassembly".to_string()) - }); - - self.push_border(&top_border); - self.push_header(&name_display); - - if let Some(source) = self.source { - self.push_empty(); - self.push_details( - &source - .replace(" ", "") - .replace("\n\n", " ") - .replace('\n', " "), - ); - self.push_empty(); - } - - let info_line = format!( - "{} instructions, {} constants, {} locals, returns {}", - self.chunk.instructions.len(), - self.chunk.constants.len(), - self.chunk.locals.len(), - self.chunk - .return_type() - .map(|r#type| r#type.to_string()) - .unwrap_or("none".to_string()) - ); - - self.push(&info_line, true, false, true, true); - self.push_empty(); - - for line in &Self::INSTRUCTION_HEADER { - self.push_header(line); - } - - for (index, (instruction, position)) in self.chunk.instructions.iter().enumerate() { - let bytecode = format!("{:02X}", u32::from(instruction)); - let operation = instruction.operation().to_string(); - let info = instruction.disassembly_info(self.chunk); - let type_display = instruction - .yielded_type(self.chunk) - .map(|r#type| { - let type_string = r#type.to_string(); - - if type_string.len() > 15 { - format!("{type_string:.12}...") - } else { - type_string - } - }) - .unwrap_or(String::with_capacity(0)); - let position = position.to_string(); - - let instruction_display = format!( - "{index:^3} {bytecode:>8} {operation:13} {info:20} {type_display:^15} {position:12}" - ); - - self.push_details(&instruction_display); - } - - self.push_border(§ion_border); - - for line in &Self::LOCAL_HEADER { - self.push_header(line); - } - - for ( - index, - Local { - identifier_index, - r#type, - scope, - register_index, - is_mutable: mutable, - }, - ) in self.chunk.locals.iter().enumerate() - { - let identifier_display = self - .chunk - .constants - .get(*identifier_index as usize) - .map(|value| value.to_string()) - .unwrap_or_else(|| "unknown".to_string()); - let type_display = r#type - .as_ref() - .map(|r#type| { - let type_string = r#type.to_string(); - - if type_string.len() > 10 { - format!("{type_string:.7}...") - } else { - type_string - } - }) - .unwrap_or("unknown".to_string()); - let local_display = format!( - "{index:<5} {identifier_display:10} {type_display:10} {mutable:7} {scope:7} {register_index:8}" - ); - - self.push_details(&local_display); - } - - self.push_border(§ion_border); - - for line in &Self::CONSTANT_HEADER { - self.push_header(line); - } - - for (index, value) in self.chunk.constants.iter().enumerate() { - let value_display = { - let value_string = value.to_string(); - - if value_string.len() > 15 { - format!("{value_string:.12}...") - } else { - value_string - } - }; - let constant_display = format!("{index:<5} {value_display:^15}"); - - self.push_details(&constant_display); - - if let Some(function_disassembly) = match value { - Value::Concrete(ConcreteValue::Function(function)) => Some({ - function - .chunk() - .disassembler() - .styled(self.styled) - .indent(self.indent + 1) - .disassemble() - }), - _ => None, - } { - self.output.push_str(&function_disassembly); - } - } - - self.push_border(&bottom_border); - - let _ = self.output.trim_end_matches('\n'); - - self.output - } -} - +/// Errors that can occur when using a [`Chunk`]. #[derive(Clone, Debug, PartialEq)] pub enum ChunkError { ConstantIndexOutOfBounds { index: usize }, diff --git a/dust-lang/src/disassembler.rs b/dust-lang/src/disassembler.rs new file mode 100644 index 0000000..0a7be92 --- /dev/null +++ b/dust-lang/src/disassembler.rs @@ -0,0 +1,366 @@ +//! Tool for disassembling chunks into a human-readable format. +//! +//! A disassembler can be created by calling [Chunk::disassembler][] or by instantiating one with +//! [Disassembler::new][]. +//! +//! # Options +//! +//! The disassembler can be customized with the 'styled' option, which will apply ANSI color codes +//! to the output. +//! +//! # Output +//! +//! The output of [Disassembler::disassemble] is a string that can be printed to the console or +//! written to a file. Below is an example of the disassembly for a simple "Hello, world!" program. +//! +//! ```text +//! ┌──────────────────────────────────────────────────────────────────────────────┐ +//! │ │ +//! │ │ +//! │ write_line("Hello, world!") │ +//! │ │ +//! │ 3 instructions, 1 constants, 0 locals, returns none │ +//! │ │ +//! │ Instructions │ +//! │ ------------ │ +//! │ i BYTECODE OPERATION INFO TYPE POSITION │ +//! │--- -------- ------------- -------------------- ---------------- ------------ │ +//! │ 0 03 LOAD_CONSTANT R0 = C0 str (11, 26) │ +//! │ 1 1390117 CALL_NATIVE write_line(R0) (0, 27) │ +//! │ 2 18 RETURN (27, 27) │ +//! │┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈│ +//! │ Locals │ +//! │ ------ │ +//! │ i IDENTIFIER TYPE MUTABLE SCOPE REGISTER │ +//! │ --- ---------- ---------------- ------- ------- -------- │ +//! │┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈│ +//! │ Constants │ +//! │ --------- │ +//! │ i VALUE │ +//! │ --- --------------- │ +//! │ 0 Hello, world! │ +//! └──────────────────────────────────────────────────────────────────────────────┘ +//! ``` +use std::env::current_exe; + +use colored::Colorize; + +use crate::{Chunk, ConcreteValue, Local, Value}; + +const INSTRUCTION_HEADER: [&str; 4] = [ + "Instructions", + "------------", + " i BYTECODE OPERATION INFO TYPE POSITION ", + "--- -------- ------------- -------------------- ---------------- ------------", +]; + +const CONSTANT_HEADER: [&str; 4] = [ + "Constants", + "---------", + " i VALUE ", + "--- ---------------", +]; + +const LOCAL_HEADER: [&str; 4] = [ + "Locals", + "------", + " i IDENTIFIER TYPE MUTABLE SCOPE REGISTER", + "--- ---------- ---------------- ------- ------- --------", +]; + +/// Builder that constructs a human-readable representation of a chunk. +/// +/// See the [module-level documentation](index.html) for more information. +pub struct Disassembler<'a> { + output: String, + chunk: &'a Chunk, + source: Option<&'a str>, + + // Options + styled: bool, + indent: usize, +} + +impl<'a> Disassembler<'a> { + pub fn new(chunk: &'a Chunk) -> Self { + Self { + output: String::new(), + chunk, + source: None, + styled: false, + indent: 0, + } + } + + /// The default width of the disassembly output. To correctly align the output, this should + /// return the width of the longest line that the disassembler is guaranteed to produce. + pub fn default_width() -> usize { + let longest_line = INSTRUCTION_HEADER[3]; + + longest_line.chars().count().max(80) + } + + pub fn source(mut self, source: &'a str) -> Self { + self.source = Some(source); + + self + } + + pub fn styled(mut self, styled: bool) -> Self { + self.styled = styled; + + self + } + + pub fn indent(mut self, indent: usize) -> Self { + self.indent = indent; + + self + } + + fn push( + &mut self, + text: &str, + center: bool, + style_bold: bool, + style_dim: bool, + add_border: bool, + ) { + let width = Disassembler::default_width(); + let characters = text.chars().collect::>(); + let content_width = if add_border { width - 2 } else { width }; + let (line_characters, remainder) = characters + .split_at_checked(content_width) + .unwrap_or((characters.as_slice(), &[])); + let (left_pad_length, right_pad_length) = { + let extra_space = content_width.saturating_sub(characters.len()); + + if center { + (extra_space / 2, extra_space / 2 + extra_space % 2) + } else { + (0, extra_space) + } + }; + let content = if style_bold { + line_characters + .iter() + .collect::() + .bold() + .to_string() + } else if style_dim { + line_characters + .iter() + .collect::() + .dimmed() + .to_string() + } else { + line_characters.iter().collect::() + }; + let length_before_content = self.output.chars().count(); + + for _ in 0..self.indent { + self.output.push_str("│ "); + } + + if add_border { + self.output.push('│'); + } + + self.output.push_str(&" ".repeat(left_pad_length)); + self.output.push_str(&content); + self.output.push_str(&" ".repeat(right_pad_length)); + + let length_after_content = self.output.chars().count(); + let line_length = length_after_content - length_before_content; + + if line_length < content_width - 1 { + self.output + .push_str(&" ".repeat(content_width - line_length)); + } + + if add_border { + self.output.push('│'); + } + + self.output.push('\n'); + + if !remainder.is_empty() { + self.push( + remainder.iter().collect::().as_str(), + center, + style_bold, + style_dim, + add_border, + ); + } + } + + fn push_header(&mut self, header: &str) { + self.push(header, true, self.styled, false, true); + } + + fn push_details(&mut self, details: &str) { + self.push(details, true, false, false, true); + } + + fn push_border(&mut self, border: &str) { + self.push(border, false, false, false, false); + } + + fn push_empty(&mut self) { + self.push("", false, false, false, true); + } + + pub fn disassemble(mut self) -> String { + let width = Disassembler::default_width(); + let top_border = "┌".to_string() + &"─".repeat(width - 2) + "┐"; + let section_border = "│".to_string() + &"┈".repeat(width - 2) + "│"; + let bottom_border = "└".to_string() + &"─".repeat(width - 2) + "┘"; + let name_display = self + .chunk + .name() + .map(|identifier| identifier.to_string()) + .unwrap_or_else(|| { + current_exe() + .map(|path| path.to_string_lossy().to_string()) + .unwrap_or("Chunk Disassembly".to_string()) + }); + + self.push_border(&top_border); + self.push_header(&name_display); + + if let Some(source) = self.source { + self.push_empty(); + self.push_details( + &source + .replace(" ", "") + .replace("\n\n", " ") + .replace('\n', " "), + ); + self.push_empty(); + } + + let info_line = format!( + "{} instructions, {} constants, {} locals, returns {}", + self.chunk.len(), + self.chunk.constants().len(), + self.chunk.locals().len(), + self.chunk + .return_type() + .map(|r#type| r#type.to_string()) + .unwrap_or("none".to_string()) + ); + + self.push(&info_line, true, false, true, true); + self.push_empty(); + + for line in INSTRUCTION_HEADER { + self.push_header(line); + } + + for (index, (instruction, position)) in self.chunk.instructions().iter().enumerate() { + let bytecode = format!("{:02X}", u32::from(instruction)); + let operation = instruction.operation().to_string(); + let info = instruction.disassembly_info(self.chunk); + let type_display = instruction + .yielded_type(self.chunk) + .map(|r#type| { + let type_string = r#type.to_string(); + + if type_string.len() > 16 { + format!("{type_string:.13}...") + } else { + type_string + } + }) + .unwrap_or(String::with_capacity(0)); + let position = position.to_string(); + + let instruction_display = format!( + "{index:^3} {bytecode:>8} {operation:13} {info:20} {type_display:^16} {position:12}" + ); + + self.push_details(&instruction_display); + } + + self.push_border(§ion_border); + + for line in LOCAL_HEADER { + self.push_header(line); + } + + for ( + index, + Local { + identifier_index, + r#type, + scope, + register_index, + is_mutable: mutable, + }, + ) in self.chunk.locals().iter().enumerate() + { + let identifier_display = self + .chunk + .constants() + .get(*identifier_index as usize) + .map(|value| value.to_string()) + .unwrap_or_else(|| "unknown".to_string()); + let type_display = r#type + .as_ref() + .map(|r#type| { + let type_string = r#type.to_string(); + + if type_string.len() > 16 { + format!("{type_string:.13}...") + } else { + type_string + } + }) + .unwrap_or("unknown".to_string()); + let local_display = format!( + "{index:^3} {identifier_display:10} {type_display:16} {mutable:7} {scope:7} {register_index:8}" + ); + + self.push_details(&local_display); + } + + self.push_border(§ion_border); + + for line in CONSTANT_HEADER { + self.push_header(line); + } + + for (index, value) in self.chunk.constants().iter().enumerate() { + let value_display = { + let value_string = value.to_string(); + + if value_string.len() > 15 { + format!("{value_string:.12}...") + } else { + value_string + } + }; + let constant_display = format!("{index:^3} {value_display:^15}"); + + self.push_details(&constant_display); + + if let Value::Concrete(ConcreteValue::Function(function)) = value { + let function_disassembly = function + .chunk() + .disassembler() + .styled(self.styled) + .indent(self.indent + 1) + .disassemble(); + + self.push_details(&function_disassembly); + } + } + + self.push_border(&bottom_border); + + let _ = self.output.trim_end_matches('\n'); + + self.output + } +} diff --git a/dust-lang/src/lib.rs b/dust-lang/src/lib.rs index 6cf67e6..c25a776 100644 --- a/dust-lang/src/lib.rs +++ b/dust-lang/src/lib.rs @@ -2,6 +2,7 @@ pub mod chunk; pub mod compiler; +pub mod disassembler; pub mod dust_error; pub mod formatter; pub mod instruction; @@ -14,8 +15,9 @@ pub mod r#type; pub mod value; pub mod vm; -pub use crate::chunk::{Chunk, ChunkDisassembler, ChunkError, Local, Scope}; +pub use crate::chunk::{Chunk, ChunkError, Local, Scope}; pub use crate::compiler::{compile, CompileError, Compiler}; +pub use crate::disassembler::Disassembler; pub use crate::dust_error::{AnnotatedError, DustError}; pub use crate::formatter::{format, Formatter}; pub use crate::instruction::Instruction;