gbf_core/module.rs
1#![deny(missing_docs)]
2
3use serde::Serialize;
4use std::{
5 collections::HashMap,
6 fmt::{self, Display, Formatter},
7};
8use thiserror::Error;
9
10use crate::{
11 basic_block::{BasicBlockId, BasicBlockType},
12 bytecode_loader::{self, BytecodeLoaderError},
13 decompiler::{
14 ast::visitors::emit_context::EmitContext,
15 function_decompiler::{FunctionDecompilerBuilder, FunctionDecompilerError},
16 },
17 function::{Function, FunctionId},
18 instruction::Instruction,
19 utils::Gs2BytecodeAddress,
20};
21
22/// Error type for module operations.
23#[derive(Error, Debug, Clone, Serialize)]
24pub enum ModuleError {
25 /// Error for when a function is not found in the module.
26 #[error("Function not found: {0}")]
27 FunctionNotFoundById(FunctionId),
28
29 /// Error for when a function is not found in the module.
30 #[error("Function not found: {0}")]
31 FunctionNotFoundByName(String),
32
33 /// When a function is created with a name that already exists.
34 #[error("Function with name {0} already exists.")]
35 DuplicateFunctionName(String),
36
37 /// When a function is created with an address that already exists.
38 #[error("Function with address {0} already exists with the name {1}.")]
39 DuplicateFunctionAddress(Gs2BytecodeAddress, String),
40
41 /// Error for when the bytecode loader fails to load bytecode.
42 #[error("BytecodeLoaderError: {0}")]
43 BytecodeLoaderError(#[from] BytecodeLoaderError),
44}
45
46/// Represents a builder for a `Module`.
47pub struct ModuleBuilder {
48 name: Option<String>,
49 reader: Option<Box<dyn std::io::Read>>,
50}
51
52/// Public API for `ModuleBuilder`.
53impl ModuleBuilder {
54 /// Create a new `ModuleBuilder`.
55 ///
56 /// # Arguments
57 /// - `name`: The name of the module.
58 ///
59 /// # Returns
60 /// - A new `ModuleBuilder` instance.
61 ///
62 /// # Example
63 /// ```
64 /// use gbf_core::module::ModuleBuilder;
65 ///
66 /// let builder = ModuleBuilder::new();
67 /// ```
68 pub fn new() -> Self {
69 Self {
70 name: None,
71 reader: None,
72 }
73 }
74 /// Set the name of the module.
75 ///
76 /// # Arguments
77 /// - `name`: The name of the module.
78 ///
79 /// # Returns
80 /// - A reference to the builder.
81 ///
82 /// # Example
83 /// ```
84 /// use gbf_core::module::ModuleBuilder;
85 ///
86 /// let builder = ModuleBuilder::new().name("test");
87 /// ```
88 pub fn name<N: Into<String>>(mut self, name: N) -> Self {
89 self.name = Some(name.into());
90 self
91 }
92
93 /// Set the reader for the module.
94 ///
95 /// # Arguments
96 /// - `reader`: The reader to use for the module.
97 ///
98 /// # Returns
99 /// - A reference to the builder.
100 ///
101 /// # Example
102 /// ```
103 /// use gbf_core::module::ModuleBuilder;
104 ///
105 /// let builder = ModuleBuilder::new().reader(Box::new(std::io::Cursor::new(vec![0x00, 0x01])));
106 /// ```
107 pub fn reader(mut self, reader: Box<dyn std::io::Read>) -> Self {
108 self.reader = Some(reader);
109 self
110 }
111
112 /// Build the `Module` from the builder.
113 ///
114 /// # Returns
115 /// - A new `Module` instance.
116 ///
117 /// # Example
118 /// ```
119 /// use gbf_core::module::ModuleBuilder;
120 ///
121 /// let module = ModuleBuilder::new().name("test").build().unwrap();
122 /// ```
123 pub fn build(self) -> Result<Module, ModuleError> {
124 let mut module = Module {
125 name: self.name,
126 functions: Vec::new(),
127 id_to_index: HashMap::new(),
128 name_to_id: HashMap::new(),
129 address_to_id: HashMap::new(),
130 };
131
132 // Create entry function
133 let fun_id = FunctionId::new_without_name(module.functions.len(), 0);
134
135 // Create new function struct
136 module.functions.push(Function::new(fun_id.clone()));
137 module.id_to_index.insert(fun_id.clone(), 0);
138 module.name_to_id.insert(None, fun_id.clone());
139 module.address_to_id.insert(0, fun_id.clone());
140
141 if let Some(reader) = self.reader {
142 module.load(reader)?;
143 }
144
145 Ok(module)
146 }
147}
148
149/// Represents a GS2 module in a bytecode system. A module contains
150/// functions, strings, and other data.
151pub struct Module {
152 /// The name of the module.
153 pub name: Option<String>,
154 /// A list of functions in the module, which provides fast sequential access.
155 functions: Vec<Function>,
156 /// A map of function IDs to their index in the functions vector.
157 id_to_index: HashMap<FunctionId, usize>,
158 /// A map of function names to their IDs.
159 name_to_id: HashMap<Option<String>, FunctionId>,
160 /// A map of function addresses to their IDs.
161 address_to_id: HashMap<Gs2BytecodeAddress, FunctionId>,
162}
163
164/// Public API for `Module`.
165impl Module {
166 /// Create a new function in the module.
167 ///
168 /// # Returns
169 /// - The `FunctionId` of the new function.
170 ///
171 /// # Errors
172 /// - `ModuleError::EntryModuleDefinedMoreThanOnce` if the entry function is already set.
173 ///
174 /// # Example
175 /// ```
176 /// use gbf_core::module::ModuleBuilder;
177 ///
178 /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
179 /// let function_id = module.create_function("test_function", 123).unwrap();
180 /// ```
181 pub fn create_function<N: Into<String>>(
182 &mut self,
183 name: N,
184 address: Gs2BytecodeAddress,
185 ) -> Result<FunctionId, ModuleError> {
186 let name = name.into();
187 let function_id = FunctionId::new(self.functions.len(), Some(name.clone()), address);
188
189 // Check for duplicate function name
190 if self.name_to_id.contains_key(&Some(name.clone())) {
191 return Err(ModuleError::DuplicateFunctionName(name));
192 }
193
194 // Check for duplicate function address
195 if self.address_to_id.contains_key(&address) {
196 let existing_id = self.address_to_id.get(&address).unwrap().clone();
197 let existing_name = existing_id.name.unwrap_or("{entry function}".to_string());
198 return Err(ModuleError::DuplicateFunctionAddress(
199 address,
200 existing_name,
201 ));
202 }
203
204 // Create new function struct
205 self.functions.push(Function::new(function_id.clone()));
206 self.id_to_index
207 .insert(function_id.clone(), self.functions.len() - 1);
208 self.name_to_id
209 .insert(Some(name.clone()), function_id.clone());
210 self.address_to_id.insert(address, function_id.clone());
211
212 Ok(function_id)
213 }
214
215 /// Decompile the module.
216 ///
217 /// # Arguments
218 /// - `ctx`: The EmitContext to use for decompilation.
219 ///
220 /// # Returns
221 /// - A Result containing the decompiled module if successful, or an error if not.
222 pub fn decompile(&self, ctx: EmitContext) -> Result<String, FunctionDecompilerError> {
223 let mut decompiled = String::new();
224 for function in self.functions.iter() {
225 let mut decompiler = FunctionDecompilerBuilder::new(function).build();
226 let decompiled_function = decompiler.decompile(ctx)?;
227 decompiled.push_str(&decompiled_function);
228 decompiled.push('\n');
229 }
230 Ok(decompiled)
231 }
232
233 /// Check if the function exists in the module
234 ///
235 /// # Arguments
236 /// - `name`: The name of the function to check.
237 ///
238 /// # Returns
239 /// - A boolean indicating if the function exists.
240 ///
241 /// # Example
242 /// ```
243 /// use gbf_core::module::ModuleBuilder;
244 ///
245 /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
246 /// let function_id = module.create_function("test_function", 123).unwrap();
247 /// assert!(module.has_function("test_function"));
248 /// ```
249 pub fn has_function<N: Into<String>>(&self, name: N) -> bool {
250 let name = name.into();
251 self.name_to_id.contains_key(&Some(name))
252 }
253
254 /// Get function by name
255 ///
256 /// # Arguments
257 /// - `name`: The name of the function to retrieve.
258 ///
259 /// # Returns
260 /// - A reference to the function, if it exists.
261 ///
262 /// # Errors
263 /// - `ModuleError::FunctionNotFoundByName` if the function does not exist.
264 ///
265 /// # Example
266 /// ```
267 /// use gbf_core::module::ModuleBuilder;
268 ///
269 /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
270 /// let function_id = module.create_function("test_function", 123).unwrap();
271 /// let function = module.get_function_by_name("test_function").unwrap();
272 /// ```
273 pub fn get_function_by_name<N: Into<String>>(&self, name: N) -> Result<&Function, ModuleError> {
274 let name = name.into();
275 let id = self.get_function_id_by_name(name)?;
276 self.get_function_by_id(&id)
277 }
278
279 /// Get the entry function of the module.
280 ///
281 /// # Returns
282 /// - A reference to the entry function.
283 ///
284 /// # Example
285 /// ```
286 /// use gbf_core::module::ModuleBuilder;
287 ///
288 /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
289 /// let function_id = module.create_function("test_function", 123).unwrap();
290 /// let entry_function = module.get_entry_function();
291 /// ```
292 pub fn get_entry_function(&self) -> &Function {
293 // Get the function at address 0
294 self.functions.first().expect("Entry function must exist")
295 }
296
297 /// Get the entry function id of the module (mutable).
298 ///
299 /// # Returns
300 /// - A mutable reference to the entry function.
301 ///
302 /// # Example
303 /// ```
304 /// use gbf_core::module::ModuleBuilder;
305 ///
306 /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
307 /// let function_id = module.create_function("test_function", 123).unwrap();
308 /// let entry_function = module.get_entry_function_mut();
309 /// ```
310 pub fn get_entry_function_mut(&mut self) -> &mut Function {
311 // Get the function at address 0
312 self.functions
313 .get_mut(0)
314 .expect("Entry function must exist")
315 }
316
317 /// Get function by name (mutable)
318 ///
319 /// # Arguments
320 /// - `name`: The name of the function to retrieve.
321 ///
322 /// # Returns
323 /// - A mutable reference to the function, if it exists.
324 ///
325 /// # Errors
326 /// - `ModuleError::FunctionNotFoundByName` if the function does not exist.
327 ///
328 /// # Example
329 /// ```
330 /// use gbf_core::module::ModuleBuilder;
331 ///
332 /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
333 /// let function_id = module.create_function("test_function", 123).unwrap();
334 /// let function = module.get_function_by_name_mut("test_function").unwrap();
335 /// ```
336 pub fn get_function_by_name_mut<N: Into<String>>(
337 &mut self,
338 name: N,
339 ) -> Result<&mut Function, ModuleError> {
340 let name = name.into();
341 let id = self.get_function_id_by_name(name)?;
342 self.get_function_by_id_mut(&id)
343 }
344
345 /// Get function id by name
346 ///
347 /// # Arguments
348 /// - `name`: The name of the function to retrieve.
349 ///
350 /// # Returns
351 /// - The `FunctionId` of the function, if it exists.
352 ///
353 /// # Errors
354 /// - `ModuleError::FunctionNotFoundByName` if the function does not exist.
355 ///
356 /// # Example
357 /// ```
358 /// use gbf_core::module::ModuleBuilder;
359 ///
360 /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
361 /// let function_id = module.create_function("test_function", 123).unwrap();
362 /// let function_id = module.get_function_id_by_name("test_function").unwrap();
363 /// ```
364 pub fn get_function_id_by_name<N: Into<String>>(
365 &self,
366 name: N,
367 ) -> Result<FunctionId, ModuleError> {
368 let name = name.into();
369 self.name_to_id
370 .get(&Some(name.clone()))
371 .cloned()
372 .ok_or(ModuleError::FunctionNotFoundByName(name))
373 }
374
375 /// Get the number of functions in the module.
376 ///
377 /// # Returns
378 /// - The number of functions in the module.
379 ///
380 /// # Example
381 /// ```
382 /// use gbf_core::module::ModuleBuilder;
383 ///
384 /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
385 /// let function_id = module.create_function("test_function", 123).unwrap();
386 /// assert_eq!(module.len(), 2);
387 /// ```
388 pub fn len(&self) -> usize {
389 self.functions.len()
390 }
391
392 /// Check if the `Module` is empty.
393 ///
394 /// # Returns
395 /// - A boolean indicating if the `Module` is empty.
396 ///
397 /// # Example
398 /// ```
399 /// use gbf_core::module::ModuleBuilder;
400 ///
401 /// let module = ModuleBuilder::new().name("test.gs2").build().unwrap();
402 /// assert!(!module.is_empty());
403 /// ```
404 pub fn is_empty(&self) -> bool {
405 // The module will always have an entry function, so this is always false
406 self.functions.is_empty()
407 }
408}
409
410/// Internal API for `Module`.
411impl Module {
412 /// Load bytecode into the module using a reader.
413 ///
414 /// # Arguments
415 /// - `reader`: The reader to use to load the bytecode.
416 ///
417 /// # Errors
418 /// - `ModuleError::BytecodeLoaderError` if the bytecode loader fails to load the bytecode.
419 /// - `ModuleError::EntryModuleDefinedMoreThanOnce` if the entry function is already set.
420 fn load<R: std::io::Read>(&mut self, reader: R) -> Result<(), ModuleError> {
421 let loaded_bytecode = bytecode_loader::BytecodeLoaderBuilder::new(reader).build()?;
422
423 // Iterate through each instruction in the bytecode
424 for (offset, instruction) in loaded_bytecode.instructions.iter().enumerate() {
425 // Check if instruction is even reachable. If it's not, we can skip it
426 if !loaded_bytecode.is_instruction_reachable(offset) {
427 continue;
428 }
429 let function_name = loaded_bytecode.get_function_name_for_address(offset);
430 // The precondition above guarantees that this will always be true
431 assert!(function_name.is_ok());
432 let function_name = function_name.unwrap().clone();
433
434 if let Some(function_name) = function_name.clone() {
435 if !self.has_function(function_name.clone()) {
436 let function_name_clone = function_name.clone();
437 let offset = loaded_bytecode
438 .function_map
439 .get(&Some(function_name_clone))
440 .expect("Function must exist in the function map");
441 self.create_function(function_name, *offset)?;
442 }
443 }
444
445 let is_entry = function_name.is_none();
446
447 // Get the function reference. If the function is an entry function, we will use the entry function, otherwise we will use the function name
448 let function = if is_entry {
449 self.get_entry_function_mut()
450 } else {
451 self.get_function_by_name_mut(function_name.unwrap())?
452 };
453
454 // Get the start address for the basic block
455 let start_address = loaded_bytecode.find_block_start_address(offset);
456
457 // Create new basic block if it doesn't exist
458 if !function.basic_block_exists_by_address(start_address) {
459 // We won't run into this error because we are not making an entry block here
460 function
461 .create_block(BasicBlockType::Normal, start_address)
462 .expect("Block collisions are not possible");
463 }
464
465 // Get the basic block reference
466 let block = function
467 .get_basic_block_by_start_address_mut(start_address)
468 .unwrap();
469
470 // Add the instruction to the basic block
471 block.add_instruction(instruction.clone());
472 }
473
474 // To the entry block, let's create a new basic block with address set to the length of the bytecode
475 // This is the block that will be used to represent the end of the module
476 let entry = self.get_entry_function_mut();
477 entry
478 .create_block(
479 BasicBlockType::ModuleEnd,
480 loaded_bytecode.instructions.len() as Gs2BytecodeAddress,
481 )
482 .unwrap();
483
484 // Iterate through each function that was created. For each function, we will iterate through
485 // each basic block and find the terminator instruction. Based on the terminator opcode,
486 // we will connect edges in the graph.
487 for function in self.functions.iter_mut() {
488 let block_data: Vec<_> = function
489 .iter()
490 .map(|block| (block.id, block.last_instruction().cloned()))
491 .collect();
492
493 for (id, terminator) in block_data {
494 Self::process_block_edges(function, id, terminator);
495 }
496 }
497 Ok(())
498 }
499
500 fn process_block_edges(
501 function: &mut Function,
502 id: BasicBlockId,
503 terminator: Option<Instruction>,
504 ) {
505 if let Some(terminator) = terminator {
506 let terminator_opcode = terminator.opcode;
507 let terminator_operand = terminator.operand;
508 let terminator_address = terminator.address;
509 if terminator_opcode.has_jump_target() {
510 if let Some(branch_address) =
511 terminator_operand.and_then(|o| o.get_number_value().ok())
512 {
513 let branch_block_id = function
514 .get_basic_block_id_by_start_address(branch_address as Gs2BytecodeAddress)
515 .expect("Block must exist");
516 function.add_edge(id, branch_block_id).unwrap();
517 }
518 }
519
520 // If appropriate, connect the next block
521 if terminator_opcode.connects_to_next_block() {
522 let next_block_id = function
523 .get_basic_block_id_by_start_address(terminator_address + 1)
524 .expect("Block must exist");
525 function.add_edge(id, next_block_id).unwrap();
526 }
527 }
528 }
529
530 /// Get a function by its `FunctionId`.
531 ///
532 /// # Arguments
533 /// - `id`: The `FunctionId` of the function to retrieve.
534 ///
535 /// # Returns
536 /// - A reference to the function, if it exists.
537 ///
538 /// # Errors
539 /// - `ModuleError::FunctionNotFoundById` if the function does not exist.
540 fn get_function_by_id(&self, id: &FunctionId) -> Result<&Function, ModuleError> {
541 let index = self
542 .id_to_index
543 .get(id)
544 .ok_or(ModuleError::FunctionNotFoundById(id.clone()))?;
545
546 // Provides fast sequential access, but panics if the index is out of bounds
547 Ok(&self.functions[*index])
548 }
549
550 /// Get a mutable reference to a function by its `FunctionId`.
551 ///
552 /// # Arguments
553 /// - `id`: The `FunctionId` of the function to retrieve.
554 ///
555 /// # Returns
556 /// - A mutable reference to the function, if it exists.
557 ///
558 /// # Errors
559 /// - `ModuleError::FunctionNotFoundById` if the function does not exist.
560 fn get_function_by_id_mut(&mut self, id: &FunctionId) -> Result<&mut Function, ModuleError> {
561 let index = self
562 .id_to_index
563 .get(id)
564 .ok_or(ModuleError::FunctionNotFoundById(id.clone()))?;
565
566 // Provides fast sequential access, but panics if the index is out of bounds
567 Ok(&mut self.functions[*index])
568 }
569}
570
571// === Implementations ===
572
573/// Display implementation for `Module`.
574impl Display for Module {
575 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
576 write!(f, "{}", self.name.as_deref().unwrap_or("Unnamed Module"))
577 }
578}
579
580/// Default implementation for `ModuleBuilder`.
581impl Default for ModuleBuilder {
582 fn default() -> Self {
583 Self::new()
584 }
585}
586
587/// Deref implementation for `Module`.
588impl std::ops::Deref for Module {
589 type Target = Vec<Function>;
590
591 fn deref(&self) -> &Self::Target {
592 &self.functions
593 }
594}
595
596/// Index implementation for `Module`.
597impl std::ops::Index<usize> for Module {
598 type Output = Function;
599
600 fn index(&self, index: usize) -> &Self::Output {
601 &self.functions[index]
602 }
603}
604
605/// Immutable IntoIterator implementation for `Module`.
606impl<'a> IntoIterator for &'a Module {
607 type Item = &'a Function;
608 type IntoIter = std::slice::Iter<'a, Function>;
609
610 fn into_iter(self) -> Self::IntoIter {
611 self.functions.iter()
612 }
613}
614
615/// Mutable IntoIterator implementation for `Module`.
616impl<'a> IntoIterator for &'a mut Module {
617 type Item = &'a mut Function;
618 type IntoIter = std::slice::IterMut<'a, Function>;
619
620 fn into_iter(self) -> Self::IntoIter {
621 self.functions.iter_mut()
622 }
623}
624
625#[cfg(test)]
626mod tests {
627 use super::*;
628
629 #[test]
630 fn load_bytecode() {
631 let bytecode = [
632 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
633 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00,
634 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
635 ];
636 // make new module with generics
637 let module = ModuleBuilder::new()
638 .reader(Box::new(std::io::Cursor::new(bytecode.to_vec())))
639 .build();
640
641 assert!(module.is_ok());
642
643 // test failure case
644 let bytecode = [0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04];
645 let module = ModuleBuilder::new()
646 .reader(Box::new(std::io::Cursor::new(bytecode.to_vec())))
647 .build();
648 assert!(module.is_err());
649 }
650}