1
0

Move the disassembler to its own module

This commit is contained in:
Jeff 2024-11-06 18:39:09 -05:00
parent a997665d1a
commit ea88ffc451
3 changed files with 373 additions and 366 deletions

View File

@ -3,47 +3,15 @@
//! A chunk consists of a sequence of instructions and their positions, a list of constants, and a
//! list of locals that can be executed by the Dust virtual machine. Chunks have a name when they
//! belong to a named function.
//!
//! # Disassembly
//!
//! Chunks can be disassembled into a human-readable format using the `disassemble` method. The
//! output is designed to be displayed in a terminal and can be styled for better readability.
//!
//! ```text
//! ┌──────────────────────────────────────────────────────────────────────────────┐
//! │ /var/home/jeff/Repositories/dust/target/debug/dust-shell │
//! │ 3 instructions, 1 constants, 0 locals, returns none │
//! │ Instructions │
//! │ ------------ │
//! │ INDEX BYTECODE OPERATION INFO TYPE POSITION │
//! │ ----- -------- ------------- ------------------------- --------- ----------- │
//! │ 0 00000003 LOAD_CONSTANT R0 = C0 str (11, 26) │
//! │ 1 01390117 CALL_NATIVE write_line(R0) (0, 27) │
//! │ 2 00000018 RETURN (27, 27) │
//! │┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈│
//! │ Locals │
//! │ ------ │
//! │ INDEX IDENTIFIER TYPE MUTABLE SCOPE REGISTER │
//! │ ----- ---------- -------- ------- ------- -------- │
//! │┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈│
//! │ Constants │
//! │ --------- │
//! │ INDEX VALUE │
//! │ ----- --------------- │
//! │ 0 Hello, world! │
//! └──────────────────────────────────────────────────────────────────────────────┘
//! ```
use std::{
cmp::Ordering,
env::current_exe,
fmt::{self, Debug, Display, Formatter},
};
use colored::Colorize;
use serde::{Deserialize, Serialize};
use crate::{ConcreteValue, Instruction, Operation, Span, Type, Value};
use crate::{Disassembler, Instruction, Operation, Span, Type, Value};
/// In-memory representation of a Dust program or function.
///
@ -299,8 +267,8 @@ impl Chunk {
}
}
pub fn disassembler(&self) -> ChunkDisassembler {
ChunkDisassembler::new(self)
pub fn disassembler(&self) -> Disassembler {
Disassembler::new(self)
}
}
@ -407,336 +375,7 @@ impl Display for Scope {
}
}
/// Builder that constructs a human-readable representation of a chunk.
pub struct ChunkDisassembler<'a> {
output: String,
chunk: &'a Chunk,
source: Option<&'a str>,
// Options
width: usize,
styled: bool,
indent: usize,
}
impl<'a> ChunkDisassembler<'a> {
const INSTRUCTION_HEADER: [&'static str; 4] = [
"Instructions",
"------------",
" i BYTECODE OPERATION INFO TYPE POSITION ",
"--- -------- ------------- -------------------- --------------- ------------",
];
const CONSTANT_HEADER: [&'static str; 4] = [
"Constants",
"---------",
"INDEX VALUE ",
"----- ---------------",
];
const LOCAL_HEADER: [&'static str; 4] = [
"Locals",
"------",
"INDEX IDENTIFIER TYPE MUTABLE SCOPE REGISTER",
"----- ---------- ---------- ------- ------- --------",
];
/// The default width of the disassembly output. To correctly align the output, this should
/// return the width of the longest line that the disassembler is guaranteed to produce.
pub fn default_width() -> usize {
let longest_line = Self::INSTRUCTION_HEADER[3];
longest_line.chars().count().max(80)
}
pub fn new(chunk: &'a Chunk) -> Self {
Self {
output: String::new(),
chunk,
source: None,
width: Self::default_width(),
styled: false,
indent: 0,
}
}
pub fn source(mut self, source: &'a str) -> Self {
self.source = Some(source);
self
}
pub fn width(mut self, width: usize) -> Self {
self.width = width;
self
}
pub fn styled(mut self, styled: bool) -> Self {
self.styled = styled;
self
}
pub fn indent(mut self, indent: usize) -> Self {
self.indent = indent;
self
}
fn push(
&mut self,
text: &str,
center: bool,
style_bold: bool,
style_dim: bool,
add_border: bool,
) {
let characters = text.chars().collect::<Vec<char>>();
let content_width = if add_border {
self.width - 2
} else {
self.width
};
let (line_characters, remainder) = characters
.split_at_checked(content_width)
.unwrap_or((characters.as_slice(), &[]));
let (left_pad_length, right_pad_length) = {
let extra_space = content_width.saturating_sub(characters.len());
if center {
(extra_space / 2, extra_space / 2 + extra_space % 2)
} else {
(0, extra_space)
}
};
let content = if style_bold {
line_characters
.iter()
.collect::<String>()
.bold()
.to_string()
} else if style_dim {
line_characters
.iter()
.collect::<String>()
.dimmed()
.to_string()
} else {
line_characters.iter().collect::<String>()
};
let length_before_content = self.output.chars().count();
for _ in 0..self.indent {
self.output.push_str("");
}
if add_border {
self.output.push('│');
}
self.output.push_str(&" ".repeat(left_pad_length));
self.output.push_str(&content);
self.output.push_str(&" ".repeat(right_pad_length));
let length_after_content = self.output.chars().count();
let line_length = length_after_content - length_before_content;
if line_length < content_width - 1 {
self.output
.push_str(&" ".repeat(content_width - line_length));
}
if add_border {
self.output.push('│');
}
self.output.push('\n');
if !remainder.is_empty() {
self.push(
remainder.iter().collect::<String>().as_str(),
center,
style_bold,
style_dim,
add_border,
);
}
}
fn push_header(&mut self, header: &str) {
self.push(header, true, self.styled, false, true);
}
fn push_details(&mut self, details: &str) {
self.push(details, true, false, false, true);
}
fn push_border(&mut self, border: &str) {
self.push(border, false, false, false, false);
}
fn push_empty(&mut self) {
self.push("", false, false, false, true);
}
pub fn disassemble(mut self) -> String {
let top_border = "".to_string() + &"".repeat(self.width - 2) + "";
let section_border = "".to_string() + &"".repeat(self.width - 2) + "";
let bottom_border = "".to_string() + &"".repeat(self.width - 2) + "";
let name_display = self
.chunk
.name
.as_ref()
.map(|identifier| identifier.to_string())
.unwrap_or_else(|| {
current_exe()
.map(|path| path.to_string_lossy().to_string())
.unwrap_or("Chunk Disassembly".to_string())
});
self.push_border(&top_border);
self.push_header(&name_display);
if let Some(source) = self.source {
self.push_empty();
self.push_details(
&source
.replace(" ", "")
.replace("\n\n", " ")
.replace('\n', " "),
);
self.push_empty();
}
let info_line = format!(
"{} instructions, {} constants, {} locals, returns {}",
self.chunk.instructions.len(),
self.chunk.constants.len(),
self.chunk.locals.len(),
self.chunk
.return_type()
.map(|r#type| r#type.to_string())
.unwrap_or("none".to_string())
);
self.push(&info_line, true, false, true, true);
self.push_empty();
for line in &Self::INSTRUCTION_HEADER {
self.push_header(line);
}
for (index, (instruction, position)) in self.chunk.instructions.iter().enumerate() {
let bytecode = format!("{:02X}", u32::from(instruction));
let operation = instruction.operation().to_string();
let info = instruction.disassembly_info(self.chunk);
let type_display = instruction
.yielded_type(self.chunk)
.map(|r#type| {
let type_string = r#type.to_string();
if type_string.len() > 15 {
format!("{type_string:.12}...")
} else {
type_string
}
})
.unwrap_or(String::with_capacity(0));
let position = position.to_string();
let instruction_display = format!(
"{index:^3} {bytecode:>8} {operation:13} {info:20} {type_display:^15} {position:12}"
);
self.push_details(&instruction_display);
}
self.push_border(&section_border);
for line in &Self::LOCAL_HEADER {
self.push_header(line);
}
for (
index,
Local {
identifier_index,
r#type,
scope,
register_index,
is_mutable: mutable,
},
) in self.chunk.locals.iter().enumerate()
{
let identifier_display = self
.chunk
.constants
.get(*identifier_index as usize)
.map(|value| value.to_string())
.unwrap_or_else(|| "unknown".to_string());
let type_display = r#type
.as_ref()
.map(|r#type| {
let type_string = r#type.to_string();
if type_string.len() > 10 {
format!("{type_string:.7}...")
} else {
type_string
}
})
.unwrap_or("unknown".to_string());
let local_display = format!(
"{index:<5} {identifier_display:10} {type_display:10} {mutable:7} {scope:7} {register_index:8}"
);
self.push_details(&local_display);
}
self.push_border(&section_border);
for line in &Self::CONSTANT_HEADER {
self.push_header(line);
}
for (index, value) in self.chunk.constants.iter().enumerate() {
let value_display = {
let value_string = value.to_string();
if value_string.len() > 15 {
format!("{value_string:.12}...")
} else {
value_string
}
};
let constant_display = format!("{index:<5} {value_display:^15}");
self.push_details(&constant_display);
if let Some(function_disassembly) = match value {
Value::Concrete(ConcreteValue::Function(function)) => Some({
function
.chunk()
.disassembler()
.styled(self.styled)
.indent(self.indent + 1)
.disassemble()
}),
_ => None,
} {
self.output.push_str(&function_disassembly);
}
}
self.push_border(&bottom_border);
let _ = self.output.trim_end_matches('\n');
self.output
}
}
/// Errors that can occur when using a [`Chunk`].
#[derive(Clone, Debug, PartialEq)]
pub enum ChunkError {
ConstantIndexOutOfBounds { index: usize },

View File

@ -0,0 +1,366 @@
//! Tool for disassembling chunks into a human-readable format.
//!
//! A disassembler can be created by calling [Chunk::disassembler][] or by instantiating one with
//! [Disassembler::new][].
//!
//! # Options
//!
//! The disassembler can be customized with the 'styled' option, which will apply ANSI color codes
//! to the output.
//!
//! # Output
//!
//! The output of [Disassembler::disassemble] is a string that can be printed to the console or
//! written to a file. Below is an example of the disassembly for a simple "Hello, world!" program.
//!
//! ```text
//! ┌──────────────────────────────────────────────────────────────────────────────┐
//! │ <file name omitted> │
//! │ │
//! │ write_line("Hello, world!") │
//! │ │
//! │ 3 instructions, 1 constants, 0 locals, returns none │
//! │ │
//! │ Instructions │
//! │ ------------ │
//! │ i BYTECODE OPERATION INFO TYPE POSITION │
//! │--- -------- ------------- -------------------- ---------------- ------------ │
//! │ 0 03 LOAD_CONSTANT R0 = C0 str (11, 26) │
//! │ 1 1390117 CALL_NATIVE write_line(R0) (0, 27) │
//! │ 2 18 RETURN (27, 27) │
//! │┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈│
//! │ Locals │
//! │ ------ │
//! │ i IDENTIFIER TYPE MUTABLE SCOPE REGISTER │
//! │ --- ---------- ---------------- ------- ------- -------- │
//! │┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈┈│
//! │ Constants │
//! │ --------- │
//! │ i VALUE │
//! │ --- --------------- │
//! │ 0 Hello, world! │
//! └──────────────────────────────────────────────────────────────────────────────┘
//! ```
use std::env::current_exe;
use colored::Colorize;
use crate::{Chunk, ConcreteValue, Local, Value};
const INSTRUCTION_HEADER: [&str; 4] = [
"Instructions",
"------------",
" i BYTECODE OPERATION INFO TYPE POSITION ",
"--- -------- ------------- -------------------- ---------------- ------------",
];
const CONSTANT_HEADER: [&str; 4] = [
"Constants",
"---------",
" i VALUE ",
"--- ---------------",
];
const LOCAL_HEADER: [&str; 4] = [
"Locals",
"------",
" i IDENTIFIER TYPE MUTABLE SCOPE REGISTER",
"--- ---------- ---------------- ------- ------- --------",
];
/// Builder that constructs a human-readable representation of a chunk.
///
/// See the [module-level documentation](index.html) for more information.
pub struct Disassembler<'a> {
output: String,
chunk: &'a Chunk,
source: Option<&'a str>,
// Options
styled: bool,
indent: usize,
}
impl<'a> Disassembler<'a> {
pub fn new(chunk: &'a Chunk) -> Self {
Self {
output: String::new(),
chunk,
source: None,
styled: false,
indent: 0,
}
}
/// The default width of the disassembly output. To correctly align the output, this should
/// return the width of the longest line that the disassembler is guaranteed to produce.
pub fn default_width() -> usize {
let longest_line = INSTRUCTION_HEADER[3];
longest_line.chars().count().max(80)
}
pub fn source(mut self, source: &'a str) -> Self {
self.source = Some(source);
self
}
pub fn styled(mut self, styled: bool) -> Self {
self.styled = styled;
self
}
pub fn indent(mut self, indent: usize) -> Self {
self.indent = indent;
self
}
fn push(
&mut self,
text: &str,
center: bool,
style_bold: bool,
style_dim: bool,
add_border: bool,
) {
let width = Disassembler::default_width();
let characters = text.chars().collect::<Vec<char>>();
let content_width = if add_border { width - 2 } else { width };
let (line_characters, remainder) = characters
.split_at_checked(content_width)
.unwrap_or((characters.as_slice(), &[]));
let (left_pad_length, right_pad_length) = {
let extra_space = content_width.saturating_sub(characters.len());
if center {
(extra_space / 2, extra_space / 2 + extra_space % 2)
} else {
(0, extra_space)
}
};
let content = if style_bold {
line_characters
.iter()
.collect::<String>()
.bold()
.to_string()
} else if style_dim {
line_characters
.iter()
.collect::<String>()
.dimmed()
.to_string()
} else {
line_characters.iter().collect::<String>()
};
let length_before_content = self.output.chars().count();
for _ in 0..self.indent {
self.output.push_str("");
}
if add_border {
self.output.push('│');
}
self.output.push_str(&" ".repeat(left_pad_length));
self.output.push_str(&content);
self.output.push_str(&" ".repeat(right_pad_length));
let length_after_content = self.output.chars().count();
let line_length = length_after_content - length_before_content;
if line_length < content_width - 1 {
self.output
.push_str(&" ".repeat(content_width - line_length));
}
if add_border {
self.output.push('│');
}
self.output.push('\n');
if !remainder.is_empty() {
self.push(
remainder.iter().collect::<String>().as_str(),
center,
style_bold,
style_dim,
add_border,
);
}
}
fn push_header(&mut self, header: &str) {
self.push(header, true, self.styled, false, true);
}
fn push_details(&mut self, details: &str) {
self.push(details, true, false, false, true);
}
fn push_border(&mut self, border: &str) {
self.push(border, false, false, false, false);
}
fn push_empty(&mut self) {
self.push("", false, false, false, true);
}
pub fn disassemble(mut self) -> String {
let width = Disassembler::default_width();
let top_border = "".to_string() + &"".repeat(width - 2) + "";
let section_border = "".to_string() + &"".repeat(width - 2) + "";
let bottom_border = "".to_string() + &"".repeat(width - 2) + "";
let name_display = self
.chunk
.name()
.map(|identifier| identifier.to_string())
.unwrap_or_else(|| {
current_exe()
.map(|path| path.to_string_lossy().to_string())
.unwrap_or("Chunk Disassembly".to_string())
});
self.push_border(&top_border);
self.push_header(&name_display);
if let Some(source) = self.source {
self.push_empty();
self.push_details(
&source
.replace(" ", "")
.replace("\n\n", " ")
.replace('\n', " "),
);
self.push_empty();
}
let info_line = format!(
"{} instructions, {} constants, {} locals, returns {}",
self.chunk.len(),
self.chunk.constants().len(),
self.chunk.locals().len(),
self.chunk
.return_type()
.map(|r#type| r#type.to_string())
.unwrap_or("none".to_string())
);
self.push(&info_line, true, false, true, true);
self.push_empty();
for line in INSTRUCTION_HEADER {
self.push_header(line);
}
for (index, (instruction, position)) in self.chunk.instructions().iter().enumerate() {
let bytecode = format!("{:02X}", u32::from(instruction));
let operation = instruction.operation().to_string();
let info = instruction.disassembly_info(self.chunk);
let type_display = instruction
.yielded_type(self.chunk)
.map(|r#type| {
let type_string = r#type.to_string();
if type_string.len() > 16 {
format!("{type_string:.13}...")
} else {
type_string
}
})
.unwrap_or(String::with_capacity(0));
let position = position.to_string();
let instruction_display = format!(
"{index:^3} {bytecode:>8} {operation:13} {info:20} {type_display:^16} {position:12}"
);
self.push_details(&instruction_display);
}
self.push_border(&section_border);
for line in LOCAL_HEADER {
self.push_header(line);
}
for (
index,
Local {
identifier_index,
r#type,
scope,
register_index,
is_mutable: mutable,
},
) in self.chunk.locals().iter().enumerate()
{
let identifier_display = self
.chunk
.constants()
.get(*identifier_index as usize)
.map(|value| value.to_string())
.unwrap_or_else(|| "unknown".to_string());
let type_display = r#type
.as_ref()
.map(|r#type| {
let type_string = r#type.to_string();
if type_string.len() > 16 {
format!("{type_string:.13}...")
} else {
type_string
}
})
.unwrap_or("unknown".to_string());
let local_display = format!(
"{index:^3} {identifier_display:10} {type_display:16} {mutable:7} {scope:7} {register_index:8}"
);
self.push_details(&local_display);
}
self.push_border(&section_border);
for line in CONSTANT_HEADER {
self.push_header(line);
}
for (index, value) in self.chunk.constants().iter().enumerate() {
let value_display = {
let value_string = value.to_string();
if value_string.len() > 15 {
format!("{value_string:.12}...")
} else {
value_string
}
};
let constant_display = format!("{index:^3} {value_display:^15}");
self.push_details(&constant_display);
if let Value::Concrete(ConcreteValue::Function(function)) = value {
let function_disassembly = function
.chunk()
.disassembler()
.styled(self.styled)
.indent(self.indent + 1)
.disassemble();
self.push_details(&function_disassembly);
}
}
self.push_border(&bottom_border);
let _ = self.output.trim_end_matches('\n');
self.output
}
}

View File

@ -2,6 +2,7 @@
pub mod chunk;
pub mod compiler;
pub mod disassembler;
pub mod dust_error;
pub mod formatter;
pub mod instruction;
@ -14,8 +15,9 @@ pub mod r#type;
pub mod value;
pub mod vm;
pub use crate::chunk::{Chunk, ChunkDisassembler, ChunkError, Local, Scope};
pub use crate::chunk::{Chunk, ChunkError, Local, Scope};
pub use crate::compiler::{compile, CompileError, Compiler};
pub use crate::disassembler::Disassembler;
pub use crate::dust_error::{AnnotatedError, DustError};
pub use crate::formatter::{format, Formatter};
pub use crate::instruction::Instruction;