gbf_core/
module.rs

1#![deny(missing_docs)]
2
3use serde::Serialize;
4use std::{
5    collections::HashMap,
6    fmt::{self, Display, Formatter},
7};
8use thiserror::Error;
9
10use crate::{
11    basic_block::{BasicBlockId, BasicBlockType},
12    bytecode_loader::{self, BytecodeLoaderError},
13    decompiler::{
14        ast::visitors::emit_context::EmitContext,
15        function_decompiler::{FunctionDecompilerBuilder, FunctionDecompilerError},
16    },
17    function::{Function, FunctionId},
18    instruction::Instruction,
19    utils::Gs2BytecodeAddress,
20};
21
22/// Error type for module operations.
23#[derive(Error, Debug, Clone, Serialize)]
24pub enum ModuleError {
25    /// Error for when a function is not found in the module.
26    #[error("Function not found: {0}")]
27    FunctionNotFoundById(FunctionId),
28
29    /// Error for when a function is not found in the module.
30    #[error("Function not found: {0}")]
31    FunctionNotFoundByName(String),
32
33    /// When a function is created with a name that already exists.
34    #[error("Function with name {0} already exists.")]
35    DuplicateFunctionName(String),
36
37    /// When a function is created with an address that already exists.
38    #[error("Function with address {0} already exists with the name {1}.")]
39    DuplicateFunctionAddress(Gs2BytecodeAddress, String),
40
41    /// Error for when the bytecode loader fails to load bytecode.
42    #[error("BytecodeLoaderError: {0}")]
43    BytecodeLoaderError(#[from] BytecodeLoaderError),
44}
45
46/// Represents a builder for a `Module`.
47pub struct ModuleBuilder {
48    name: Option<String>,
49    reader: Option<Box<dyn std::io::Read>>,
50}
51
52/// Public API for `ModuleBuilder`.
53impl ModuleBuilder {
54    /// Create a new `ModuleBuilder`.
55    ///
56    /// # Arguments
57    /// - `name`: The name of the module.
58    ///
59    /// # Returns
60    /// - A new `ModuleBuilder` instance.
61    ///
62    /// # Example
63    /// ```
64    /// use gbf_core::module::ModuleBuilder;
65    ///
66    /// let builder = ModuleBuilder::new();
67    /// ```
68    pub fn new() -> Self {
69        Self {
70            name: None,
71            reader: None,
72        }
73    }
74    /// Set the name of the module.
75    ///
76    /// # Arguments
77    /// - `name`: The name of the module.
78    ///
79    /// # Returns
80    /// - A reference to the builder.
81    ///
82    /// # Example
83    /// ```
84    /// use gbf_core::module::ModuleBuilder;
85    ///
86    /// let builder = ModuleBuilder::new().name("test");
87    /// ```
88    pub fn name<N: Into<String>>(mut self, name: N) -> Self {
89        self.name = Some(name.into());
90        self
91    }
92
93    /// Set the reader for the module.
94    ///
95    /// # Arguments
96    /// - `reader`: The reader to use for the module.
97    ///
98    /// # Returns
99    /// - A reference to the builder.
100    ///
101    /// # Example
102    /// ```
103    /// use gbf_core::module::ModuleBuilder;
104    ///
105    /// let builder = ModuleBuilder::new().reader(Box::new(std::io::Cursor::new(vec![0x00, 0x01])));
106    /// ```
107    pub fn reader(mut self, reader: Box<dyn std::io::Read>) -> Self {
108        self.reader = Some(reader);
109        self
110    }
111
112    /// Build the `Module` from the builder.
113    ///
114    /// # Returns
115    /// - A new `Module` instance.
116    ///
117    /// # Example
118    /// ```
119    /// use gbf_core::module::ModuleBuilder;
120    ///
121    /// let module = ModuleBuilder::new().name("test").build().unwrap();
122    /// ```
123    pub fn build(self) -> Result<Module, ModuleError> {
124        let mut module = Module {
125            name: self.name,
126            functions: Vec::new(),
127            id_to_index: HashMap::new(),
128            name_to_id: HashMap::new(),
129            address_to_id: HashMap::new(),
130        };
131
132        // Create entry function
133        let fun_id = FunctionId::new_without_name(module.functions.len(), 0);
134
135        // Create new function struct
136        module.functions.push(Function::new(fun_id.clone()));
137        module.id_to_index.insert(fun_id.clone(), 0);
138        module.name_to_id.insert(None, fun_id.clone());
139        module.address_to_id.insert(0, fun_id.clone());
140
141        if let Some(reader) = self.reader {
142            module.load(reader)?;
143        }
144
145        Ok(module)
146    }
147}
148
149/// Represents a GS2 module in a bytecode system. A module contains
150/// functions, strings, and other data.
151pub struct Module {
152    /// The name of the module.
153    pub name: Option<String>,
154    /// A list of functions in the module, which provides fast sequential access.
155    functions: Vec<Function>,
156    /// A map of function IDs to their index in the functions vector.
157    id_to_index: HashMap<FunctionId, usize>,
158    /// A map of function names to their IDs.
159    name_to_id: HashMap<Option<String>, FunctionId>,
160    /// A map of function addresses to their IDs.
161    address_to_id: HashMap<Gs2BytecodeAddress, FunctionId>,
162}
163
164/// Public API for `Module`.
165impl Module {
166    /// Create a new function in the module.
167    ///
168    /// # Returns
169    /// - The `FunctionId` of the new function.
170    ///
171    /// # Errors
172    /// - `ModuleError::EntryModuleDefinedMoreThanOnce` if the entry function is already set.
173    ///
174    /// # Example
175    /// ```
176    /// use gbf_core::module::ModuleBuilder;
177    ///
178    /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
179    /// let function_id = module.create_function("test_function", 123).unwrap();
180    /// ```
181    pub fn create_function<N: Into<String>>(
182        &mut self,
183        name: N,
184        address: Gs2BytecodeAddress,
185    ) -> Result<FunctionId, ModuleError> {
186        let name = name.into();
187        let function_id = FunctionId::new(self.functions.len(), Some(name.clone()), address);
188
189        // Check for duplicate function name
190        if self.name_to_id.contains_key(&Some(name.clone())) {
191            return Err(ModuleError::DuplicateFunctionName(name));
192        }
193
194        // Check for duplicate function address
195        if self.address_to_id.contains_key(&address) {
196            let existing_id = self.address_to_id.get(&address).unwrap().clone();
197            let existing_name = existing_id.name.unwrap_or("{entry function}".to_string());
198            return Err(ModuleError::DuplicateFunctionAddress(
199                address,
200                existing_name,
201            ));
202        }
203
204        // Create new function struct
205        self.functions.push(Function::new(function_id.clone()));
206        self.id_to_index
207            .insert(function_id.clone(), self.functions.len() - 1);
208        self.name_to_id
209            .insert(Some(name.clone()), function_id.clone());
210        self.address_to_id.insert(address, function_id.clone());
211
212        Ok(function_id)
213    }
214
215    /// Decompile the module.
216    ///
217    /// # Arguments
218    /// - `ctx`: The EmitContext to use for decompilation.
219    ///
220    /// # Returns
221    /// - A Result containing the decompiled module if successful, or an error if not.
222    pub fn decompile(&self, ctx: EmitContext) -> Result<String, FunctionDecompilerError> {
223        let mut decompiled = String::new();
224        for function in self.functions.iter() {
225            let mut decompiler = FunctionDecompilerBuilder::new(function).build();
226            let decompiled_function = decompiler.decompile(ctx)?;
227            decompiled.push_str(&decompiled_function);
228            decompiled.push('\n');
229        }
230        Ok(decompiled)
231    }
232
233    /// Check if the function exists in the module
234    ///
235    /// # Arguments
236    /// - `name`: The name of the function to check.
237    ///
238    /// # Returns
239    /// - A boolean indicating if the function exists.
240    ///
241    /// # Example
242    /// ```
243    /// use gbf_core::module::ModuleBuilder;
244    ///
245    /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
246    /// let function_id = module.create_function("test_function", 123).unwrap();
247    /// assert!(module.has_function("test_function"));
248    /// ```
249    pub fn has_function<N: Into<String>>(&self, name: N) -> bool {
250        let name = name.into();
251        self.name_to_id.contains_key(&Some(name))
252    }
253
254    /// Get function by name
255    ///
256    /// # Arguments
257    /// - `name`: The name of the function to retrieve.
258    ///
259    /// # Returns
260    /// - A reference to the function, if it exists.
261    ///
262    /// # Errors
263    /// - `ModuleError::FunctionNotFoundByName` if the function does not exist.
264    ///
265    /// # Example
266    /// ```
267    /// use gbf_core::module::ModuleBuilder;
268    ///
269    /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
270    /// let function_id = module.create_function("test_function", 123).unwrap();
271    /// let function = module.get_function_by_name("test_function").unwrap();
272    /// ```
273    pub fn get_function_by_name<N: Into<String>>(&self, name: N) -> Result<&Function, ModuleError> {
274        let name = name.into();
275        let id = self.get_function_id_by_name(name)?;
276        self.get_function_by_id(&id)
277    }
278
279    /// Get the entry function of the module.
280    ///
281    /// # Returns
282    /// - A reference to the entry function.
283    ///
284    /// # Example
285    /// ```
286    /// use gbf_core::module::ModuleBuilder;
287    ///
288    /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
289    /// let function_id = module.create_function("test_function", 123).unwrap();
290    /// let entry_function = module.get_entry_function();
291    /// ```
292    pub fn get_entry_function(&self) -> &Function {
293        // Get the function at address 0
294        self.functions.first().expect("Entry function must exist")
295    }
296
297    /// Get the entry function id of the module (mutable).
298    ///
299    /// # Returns
300    /// - A mutable reference to the entry function.
301    ///
302    /// # Example
303    /// ```
304    /// use gbf_core::module::ModuleBuilder;
305    ///
306    /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
307    /// let function_id = module.create_function("test_function", 123).unwrap();
308    /// let entry_function = module.get_entry_function_mut();
309    /// ```
310    pub fn get_entry_function_mut(&mut self) -> &mut Function {
311        // Get the function at address 0
312        self.functions
313            .get_mut(0)
314            .expect("Entry function must exist")
315    }
316
317    /// Get function by name (mutable)
318    ///
319    /// # Arguments
320    /// - `name`: The name of the function to retrieve.
321    ///
322    /// # Returns
323    /// - A mutable reference to the function, if it exists.
324    ///
325    /// # Errors
326    /// - `ModuleError::FunctionNotFoundByName` if the function does not exist.
327    ///
328    /// # Example
329    /// ```
330    /// use gbf_core::module::ModuleBuilder;
331    ///
332    /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
333    /// let function_id = module.create_function("test_function", 123).unwrap();
334    /// let function = module.get_function_by_name_mut("test_function").unwrap();
335    /// ```
336    pub fn get_function_by_name_mut<N: Into<String>>(
337        &mut self,
338        name: N,
339    ) -> Result<&mut Function, ModuleError> {
340        let name = name.into();
341        let id = self.get_function_id_by_name(name)?;
342        self.get_function_by_id_mut(&id)
343    }
344
345    /// Get function id by name
346    ///
347    /// # Arguments
348    /// - `name`: The name of the function to retrieve.
349    ///
350    /// # Returns
351    /// - The `FunctionId` of the function, if it exists.
352    ///
353    /// # Errors
354    /// - `ModuleError::FunctionNotFoundByName` if the function does not exist.
355    ///
356    /// # Example
357    /// ```
358    /// use gbf_core::module::ModuleBuilder;
359    ///
360    /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
361    /// let function_id = module.create_function("test_function", 123).unwrap();
362    /// let function_id = module.get_function_id_by_name("test_function").unwrap();
363    /// ```
364    pub fn get_function_id_by_name<N: Into<String>>(
365        &self,
366        name: N,
367    ) -> Result<FunctionId, ModuleError> {
368        let name = name.into();
369        self.name_to_id
370            .get(&Some(name.clone()))
371            .cloned()
372            .ok_or(ModuleError::FunctionNotFoundByName(name))
373    }
374
375    /// Get the number of functions in the module.
376    ///
377    /// # Returns
378    /// - The number of functions in the module.
379    ///
380    /// # Example
381    /// ```
382    /// use gbf_core::module::ModuleBuilder;
383    ///
384    /// let mut module = ModuleBuilder::new().name("test.gs2").build().unwrap();
385    /// let function_id = module.create_function("test_function", 123).unwrap();
386    /// assert_eq!(module.len(), 2);
387    /// ```
388    pub fn len(&self) -> usize {
389        self.functions.len()
390    }
391
392    /// Check if the `Module` is empty.
393    ///
394    /// # Returns
395    /// - A boolean indicating if the `Module` is empty.
396    ///
397    /// # Example
398    /// ```
399    /// use gbf_core::module::ModuleBuilder;
400    ///
401    /// let module = ModuleBuilder::new().name("test.gs2").build().unwrap();
402    /// assert!(!module.is_empty());
403    /// ```
404    pub fn is_empty(&self) -> bool {
405        // The module will always have an entry function, so this is always false
406        self.functions.is_empty()
407    }
408}
409
410/// Internal API for `Module`.
411impl Module {
412    /// Load bytecode into the module using a reader.
413    ///
414    /// # Arguments
415    /// - `reader`: The reader to use to load the bytecode.
416    ///
417    /// # Errors
418    /// - `ModuleError::BytecodeLoaderError` if the bytecode loader fails to load the bytecode.
419    /// - `ModuleError::EntryModuleDefinedMoreThanOnce` if the entry function is already set.
420    fn load<R: std::io::Read>(&mut self, reader: R) -> Result<(), ModuleError> {
421        let loaded_bytecode = bytecode_loader::BytecodeLoaderBuilder::new(reader).build()?;
422
423        // Iterate through each instruction in the bytecode
424        for (offset, instruction) in loaded_bytecode.instructions.iter().enumerate() {
425            // Check if instruction is even reachable. If it's not, we can skip it
426            if !loaded_bytecode.is_instruction_reachable(offset) {
427                continue;
428            }
429            let function_name = loaded_bytecode.get_function_name_for_address(offset);
430            // The precondition above guarantees that this will always be true
431            assert!(function_name.is_ok());
432            let function_name = function_name.unwrap().clone();
433
434            if let Some(function_name) = function_name.clone() {
435                if !self.has_function(function_name.clone()) {
436                    let function_name_clone = function_name.clone();
437                    let offset = loaded_bytecode
438                        .function_map
439                        .get(&Some(function_name_clone))
440                        .expect("Function must exist in the function map");
441                    self.create_function(function_name, *offset)?;
442                }
443            }
444
445            let is_entry = function_name.is_none();
446
447            // Get the function reference. If the function is an entry function, we will use the entry function, otherwise we will use the function name
448            let function = if is_entry {
449                self.get_entry_function_mut()
450            } else {
451                self.get_function_by_name_mut(function_name.unwrap())?
452            };
453
454            // Get the start address for the basic block
455            let start_address = loaded_bytecode.find_block_start_address(offset);
456
457            // Create new basic block if it doesn't exist
458            if !function.basic_block_exists_by_address(start_address) {
459                // We won't run into this error because we are not making an entry block here
460                function
461                    .create_block(BasicBlockType::Normal, start_address)
462                    .expect("Block collisions are not possible");
463            }
464
465            // Get the basic block reference
466            let block = function
467                .get_basic_block_by_start_address_mut(start_address)
468                .unwrap();
469
470            // Add the instruction to the basic block
471            block.add_instruction(instruction.clone());
472        }
473
474        // To the entry block, let's create a new basic block with address set to the length of the bytecode
475        // This is the block that will be used to represent the end of the module
476        let entry = self.get_entry_function_mut();
477        entry
478            .create_block(
479                BasicBlockType::ModuleEnd,
480                loaded_bytecode.instructions.len() as Gs2BytecodeAddress,
481            )
482            .unwrap();
483
484        // Iterate through each function that was created. For each function, we will iterate through
485        // each basic block and find the terminator instruction. Based on the terminator opcode,
486        // we will connect edges in the graph.
487        for function in self.functions.iter_mut() {
488            let block_data: Vec<_> = function
489                .iter()
490                .map(|block| (block.id, block.last_instruction().cloned()))
491                .collect();
492
493            for (id, terminator) in block_data {
494                Self::process_block_edges(function, id, terminator);
495            }
496        }
497        Ok(())
498    }
499
500    fn process_block_edges(
501        function: &mut Function,
502        id: BasicBlockId,
503        terminator: Option<Instruction>,
504    ) {
505        if let Some(terminator) = terminator {
506            let terminator_opcode = terminator.opcode;
507            let terminator_operand = terminator.operand;
508            let terminator_address = terminator.address;
509            if terminator_opcode.has_jump_target() {
510                if let Some(branch_address) =
511                    terminator_operand.and_then(|o| o.get_number_value().ok())
512                {
513                    let branch_block_id = function
514                        .get_basic_block_id_by_start_address(branch_address as Gs2BytecodeAddress)
515                        .expect("Block must exist");
516                    function.add_edge(id, branch_block_id).unwrap();
517                }
518            }
519
520            // If appropriate, connect the next block
521            if terminator_opcode.connects_to_next_block() {
522                let next_block_id = function
523                    .get_basic_block_id_by_start_address(terminator_address + 1)
524                    .expect("Block must exist");
525                function.add_edge(id, next_block_id).unwrap();
526            }
527        }
528    }
529
530    /// Get a function by its `FunctionId`.
531    ///
532    /// # Arguments
533    /// - `id`: The `FunctionId` of the function to retrieve.
534    ///
535    /// # Returns
536    /// - A reference to the function, if it exists.
537    ///
538    /// # Errors
539    /// - `ModuleError::FunctionNotFoundById` if the function does not exist.
540    fn get_function_by_id(&self, id: &FunctionId) -> Result<&Function, ModuleError> {
541        let index = self
542            .id_to_index
543            .get(id)
544            .ok_or(ModuleError::FunctionNotFoundById(id.clone()))?;
545
546        // Provides fast sequential access, but panics if the index is out of bounds
547        Ok(&self.functions[*index])
548    }
549
550    /// Get a mutable reference to a function by its `FunctionId`.
551    ///
552    /// # Arguments
553    /// - `id`: The `FunctionId` of the function to retrieve.
554    ///
555    /// # Returns
556    /// - A mutable reference to the function, if it exists.
557    ///
558    /// # Errors
559    /// - `ModuleError::FunctionNotFoundById` if the function does not exist.
560    fn get_function_by_id_mut(&mut self, id: &FunctionId) -> Result<&mut Function, ModuleError> {
561        let index = self
562            .id_to_index
563            .get(id)
564            .ok_or(ModuleError::FunctionNotFoundById(id.clone()))?;
565
566        // Provides fast sequential access, but panics if the index is out of bounds
567        Ok(&mut self.functions[*index])
568    }
569}
570
571// === Implementations ===
572
573/// Display implementation for `Module`.
574impl Display for Module {
575    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
576        write!(f, "{}", self.name.as_deref().unwrap_or("Unnamed Module"))
577    }
578}
579
580/// Default implementation for `ModuleBuilder`.
581impl Default for ModuleBuilder {
582    fn default() -> Self {
583        Self::new()
584    }
585}
586
587/// Deref implementation for `Module`.
588impl std::ops::Deref for Module {
589    type Target = Vec<Function>;
590
591    fn deref(&self) -> &Self::Target {
592        &self.functions
593    }
594}
595
596/// Index implementation for `Module`.
597impl std::ops::Index<usize> for Module {
598    type Output = Function;
599
600    fn index(&self, index: usize) -> &Self::Output {
601        &self.functions[index]
602    }
603}
604
605/// Immutable IntoIterator implementation for `Module`.
606impl<'a> IntoIterator for &'a Module {
607    type Item = &'a Function;
608    type IntoIter = std::slice::Iter<'a, Function>;
609
610    fn into_iter(self) -> Self::IntoIter {
611        self.functions.iter()
612    }
613}
614
615/// Mutable IntoIterator implementation for `Module`.
616impl<'a> IntoIterator for &'a mut Module {
617    type Item = &'a mut Function;
618    type IntoIter = std::slice::IterMut<'a, Function>;
619
620    fn into_iter(self) -> Self::IntoIter {
621        self.functions.iter_mut()
622    }
623}
624
625#[cfg(test)]
626mod tests {
627    use super::*;
628
629    #[test]
630    fn load_bytecode() {
631        let bytecode = [
632            0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
633            0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00,
634            0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
635        ];
636        // make new module with generics
637        let module = ModuleBuilder::new()
638            .reader(Box::new(std::io::Cursor::new(bytecode.to_vec())))
639            .build();
640
641        assert!(module.is_ok());
642
643        // test failure case
644        let bytecode = [0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04];
645        let module = ModuleBuilder::new()
646            .reader(Box::new(std::io::Cursor::new(bytecode.to_vec())))
647            .build();
648        assert!(module.is_err());
649    }
650}