1#![deny(missing_docs)]
2
3use std::backtrace::Backtrace;
4
5use cyclic_region_reducer::CyclicRegionReducer;
6use if_region_reducer::IfRegionReducer;
7use linear_region_reducer::LinearRegionReducer;
8use petgraph::{
9 algo::dominators::simple_fast,
10 graph::{DiGraph, NodeIndex},
11 visit::{DfsPostOrder, Walker},
12};
13use region::{Region, RegionId, RegionType};
14use serde::{Deserialize, Serialize};
15use tail_region_reducer::TailRegionReducer;
16use vbranch::VirtualBranchReducer;
17
18use crate::{
19 cfg_dot::{CfgDot, CfgDotConfig, DotRenderableGraph, NodeResolver},
20 opcode::Opcode,
21 utils::{GBF_GREEN, GBF_RED, GBF_YELLOW},
22};
23
24use super::ast::{AstKind, AstNodeError};
25
26use thiserror::Error;
27
28pub mod cyclic_region_reducer;
30pub mod if_region_reducer;
32pub mod linear_region_reducer;
34pub mod region;
36pub mod tail_region_reducer;
38pub mod vbranch;
40
41pub trait RegionReducer {
43 fn reduce_region(
45 &mut self,
46 analysis: &mut StructureAnalysis,
47 region_id: RegionId,
48 ) -> Result<bool, StructureAnalysisError>;
49}
50
51#[derive(Debug, Error, Serialize)]
53pub enum StructureAnalysisError {
54 #[error("Region not found: {:?}", region_id)]
56 RegionNotFound {
57 region_id: RegionId,
59
60 #[serde(skip)]
62 backtrace: Backtrace,
63 },
64
65 #[error("Entry region not found")]
67 EntryRegionNotFound {
68 #[serde(skip)]
70 backtrace: Backtrace,
71 },
72
73 #[error("Maximum number of iterations reached: {max_iterations}")]
75 MaxIterationsReached {
76 max_iterations: usize,
78 #[serde(skip)]
80 backtrace: Backtrace,
81 },
82
83 #[error("Expected condition not found")]
85 ExpectedConditionNotFound {
86 #[serde(skip)]
88 backtrace: Backtrace,
89 },
90
91 #[error("An AST node error occurred")]
93 AstNodeError {
94 source: Box<AstNodeError>,
96
97 #[serde(skip)]
99 backtrace: Backtrace,
100 },
101
102 #[error("A structure analysis error occurred: {message}")]
104 Other {
105 message: String,
107
108 #[serde(skip)]
110 backtrace: Backtrace,
111 },
112}
113
114impl StructureAnalysisError {
115 pub fn backtrace(&self) -> &Backtrace {
117 match self {
118 StructureAnalysisError::RegionNotFound { backtrace, .. } => backtrace,
119 StructureAnalysisError::EntryRegionNotFound { backtrace } => backtrace,
120 StructureAnalysisError::MaxIterationsReached { backtrace, .. } => backtrace,
121 StructureAnalysisError::ExpectedConditionNotFound { backtrace } => backtrace,
122 StructureAnalysisError::AstNodeError { backtrace, .. } => backtrace,
123 StructureAnalysisError::Other { backtrace, .. } => backtrace,
124 }
125 }
126}
127
128#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Copy)]
130pub enum ControlFlowEdgeType {
131 Branch,
133 Fallthrough,
135}
136
137#[derive(Default)]
139pub struct StructureAnalysis {
140 regions: Vec<Region>,
142 region_graph: DiGraph<RegionId, ControlFlowEdgeType>,
144 debug_mode: bool,
146 snapshots: Vec<String>,
148 max_iterations: usize,
150 region_to_highlight: Option<RegionId>,
152 is_marked: bool,
154}
155
156impl StructureAnalysis {
157 pub fn new(debug_mode: bool, structure_max_iterations: usize) -> Self {
159 Self {
160 regions: Vec::new(),
161 region_graph: DiGraph::new(),
162 debug_mode,
163 snapshots: Vec::new(),
164 max_iterations: structure_max_iterations,
165 region_to_highlight: None,
166 is_marked: false,
167 }
168 }
169
170 pub fn add_region(&mut self, region_type: RegionType) -> RegionId {
172 let region_id = RegionId::new(self.regions.len());
173 self.regions.push(Region::new(region_type, region_id));
174 self.region_graph.add_node(region_id);
175 region_id
176 }
177
178 pub fn connect_regions(
180 &mut self,
181 from: RegionId,
182 to: RegionId,
183 edge_type: ControlFlowEdgeType,
184 ) -> Result<(), StructureAnalysisError> {
185 let from_node = self.get_node_index(from)?;
186 let to_node = self.get_node_index(to)?;
187 if self.region_graph.find_edge(from_node, to_node).is_none() {
189 self.region_graph.add_edge(from_node, to_node, edge_type);
190 }
191 Ok(())
192 }
193
194 pub fn get_region(&self, region_id: RegionId) -> Result<&Region, StructureAnalysisError> {
196 self.regions
197 .get(region_id.index)
198 .ok_or(StructureAnalysisError::RegionNotFound {
199 region_id,
200 backtrace: Backtrace::capture(),
201 })
202 }
203
204 pub fn get_region_mut(
206 &mut self,
207 region_id: RegionId,
208 ) -> Result<&mut Region, StructureAnalysisError> {
209 self.regions
210 .get_mut(region_id.index)
211 .ok_or(StructureAnalysisError::RegionNotFound {
212 region_id,
213 backtrace: Backtrace::capture(),
214 })
215 }
216
217 pub fn get_entry_region(&self) -> RegionId {
219 RegionId::new(0)
222 }
223
224 pub fn get_node_index(&self, region_id: RegionId) -> Result<NodeIndex, StructureAnalysisError> {
226 self.region_graph
227 .node_indices()
228 .find(|&idx| self.region_graph[idx] == region_id)
229 .ok_or(StructureAnalysisError::RegionNotFound {
230 region_id,
231 backtrace: Backtrace::capture(),
232 })
233 }
234
235 pub fn get_region_type(
237 &self,
238 region_id: RegionId,
239 ) -> Result<RegionType, StructureAnalysisError> {
240 self.get_region(region_id)
241 .map(|region| region.get_region_type())
242 }
243
244 pub fn get_branch_opcode(
246 &self,
247 region_id: RegionId,
248 ) -> Result<Option<Opcode>, StructureAnalysisError> {
249 self.get_region(region_id)
250 .map(|region| region.get_branch_opcode())
251 }
252
253 pub fn get_region_id(&self, node_index: NodeIndex) -> Result<RegionId, StructureAnalysisError> {
255 self.region_graph
256 .node_weight(node_index)
257 .cloned()
258 .ok_or(StructureAnalysisError::Other {
259 message: "Node index not found".to_string(),
260 backtrace: Backtrace::capture(),
261 })
262 }
263
264 pub fn execute(&mut self) -> Result<(), StructureAnalysisError> {
266 self.capture_snapshot(None);
268
269 let mut iterations = 0;
270
271 while self.region_graph.node_count() > 1 {
273 if iterations > self.max_iterations {
275 return Err(StructureAnalysisError::MaxIterationsReached {
276 max_iterations: self.max_iterations,
277 backtrace: Backtrace::capture(),
278 });
279 }
280
281 let old_node_count = self.region_graph.node_count();
282
283 let entry_region_id = self.get_entry_region();
285 let entry_region_node_index = self.get_node_index(entry_region_id)?;
286
287 let dfs = DfsPostOrder::new(&self.region_graph, entry_region_node_index);
288 let nodes: Vec<RegionId> = dfs
290 .iter(&self.region_graph)
291 .collect::<Vec<NodeIndex>>()
292 .iter()
293 .map(|node| self.get_region_id(*node))
294 .collect::<Result<Vec<_>, _>>()?;
295
296 for region_id in nodes {
298 if self.regions[region_id.index].get_region_type() == RegionType::Inactive {
300 continue;
301 }
302 loop {
303 self.is_marked = false;
305
306 let mut did_reduce = self.reduce_acyclic_region(region_id)?;
308
309 if !did_reduce && self.is_cyclic(region_id)? {
310 did_reduce = CyclicRegionReducer.reduce_region(self, region_id)?;
311 }
312
313 if !did_reduce {
314 break;
315 } else {
316 self.after_reduce(region_id);
317 assert!(self.is_marked);
318 }
319 }
320 }
321
322 let new_node_count = self.region_graph.node_count();
323
324 if old_node_count == new_node_count && new_node_count > 1 {
326 self.post_reduce()?;
328 }
329
330 iterations += 1;
331 }
332
333 Ok(())
334 }
335
336 pub fn push_to_region<T>(&mut self, region_id: RegionId, node: T)
338 where
339 T: Into<AstKind>,
340 {
341 let region = self
342 .regions
343 .get_mut(region_id.index)
344 .expect("Region not found");
345 region.push_node(node.into());
346 }
347
348 pub fn push_unresolved_to_region(&mut self, region_id: RegionId, node: AstKind) {
350 let region = self
351 .regions
352 .get_mut(region_id.index)
353 .expect("Region not found");
354 region.push_unresolved_node(node);
355 }
356
357 pub fn get_single_successor(
365 &self,
366 region_id: RegionId,
367 ) -> Result<Option<RegionId>, StructureAnalysisError> {
368 let successors = self.get_successors(region_id)?;
369
370 if successors.len() != 1 {
371 return Ok(None);
372 }
373
374 Ok(Some(successors[0].0))
375 }
376
377 pub fn get_single_predecessor(
385 &self,
386 region_id: RegionId,
387 ) -> Result<Option<RegionId>, StructureAnalysisError> {
388 let preds = self.get_predecessors(region_id)?;
389
390 if preds.len() != 1 {
391 return Ok(None);
392 }
393
394 Ok(Some(preds[0]))
395 }
396
397 pub fn is_cyclic(&self, region_id: RegionId) -> Result<bool, StructureAnalysisError> {
406 let preds = self.get_predecessors(region_id)?;
407
408 for pred in preds {
409 if pred == region_id || self.is_back_edge(pred, region_id)? {
410 return Ok(true);
411 }
412 }
413
414 Ok(false)
415 }
416
417 pub fn is_back_edge(
427 &self,
428 region: RegionId,
429 successor: RegionId,
430 ) -> Result<bool, StructureAnalysisError> {
431 self.dominates_strictly(successor, region)
432 }
433
434 pub fn dominates_strictly(
444 &self,
445 dominator: RegionId,
446 dominatee: RegionId,
447 ) -> Result<bool, StructureAnalysisError> {
448 let entry_node = self.get_node_index(self.get_entry_region())?;
449 let dominator_node = self.get_node_index(dominator)?;
450 let dominatee_node = self.get_node_index(dominatee)?;
451
452 let dominators = simple_fast(&self.region_graph, entry_node);
455
456 let doms = dominators.strict_dominators(dominator_node);
457
458 Ok(doms
460 .map(|doms| doms.collect::<Vec<_>>().contains(&dominatee_node))
461 .unwrap_or(false))
462 }
463
464 pub fn get_single_linear_successor(
472 &self,
473 region_id: RegionId,
474 ) -> Result<Option<RegionId>, StructureAnalysisError> {
475 let region_type = self.regions[region_id.index].get_region_type();
477
478 if region_type != RegionType::Linear {
480 return Ok(None);
481 }
482
483 self.get_single_successor(region_id)
484 }
485
486 pub fn has_single_predecessor(&self, id: RegionId) -> Result<bool, StructureAnalysisError> {
494 let node_index = self.get_node_index(id)?;
495 Ok(self
496 .region_graph
497 .neighbors_directed(node_index, petgraph::Direction::Incoming)
498 .count()
499 == 1)
500 }
501
502 pub fn remove_edge(
512 &mut self,
513 from_region_id: RegionId,
514 to_region_id: RegionId,
515 ) -> Result<(), StructureAnalysisError> {
516 let region_node = self.get_node_index(from_region_id)?;
517 let successor_node = self.get_node_index(to_region_id)?;
518 let edge_index = self
519 .region_graph
520 .find_edge(region_node, successor_node)
521 .ok_or(StructureAnalysisError::Other {
522 message: "Edge not found".to_string(),
523 backtrace: Backtrace::capture(),
524 })?;
525 assert!(self.region_graph.remove_edge(edge_index).is_some());
527
528 Ok(())
529 }
530
531 pub fn get_successors(
539 &self,
540 region_id: RegionId,
541 ) -> Result<Vec<(RegionId, ControlFlowEdgeType)>, StructureAnalysisError> {
542 let region_node = self.get_node_index(region_id)?;
543 self.region_graph
544 .neighbors_directed(region_node, petgraph::Direction::Outgoing)
545 .map(|node| {
546 let region_id = self.get_region_id(node)?;
547 let edge = self
548 .region_graph
549 .find_edge(region_node, node)
550 .ok_or_else(|| StructureAnalysisError::Other {
551 message: "Edge not found".to_string(),
552 backtrace: Backtrace::capture(),
553 })?;
554 let edge_weight = self.region_graph.edge_weight(edge).ok_or_else(|| {
555 StructureAnalysisError::Other {
556 message: "Edge weight not found".to_string(),
557 backtrace: Backtrace::capture(),
558 }
559 })?;
560 Ok((region_id, *edge_weight))
561 })
562 .collect()
563 }
564
565 pub fn get_predecessors(
573 &self,
574 region_id: RegionId,
575 ) -> Result<Vec<RegionId>, StructureAnalysisError> {
576 let region_node = self.get_node_index(region_id)?;
577 self.region_graph
578 .neighbors_directed(region_node, petgraph::Direction::Incoming)
579 .map(|node| self.get_region_id(node))
580 .collect()
581 }
582
583 pub fn remove_node(&mut self, region_id: RegionId) -> Result<(), StructureAnalysisError> {
588 let node_index = self.get_node_index(region_id)?;
589 assert!(self.region_graph.remove_node(node_index).is_some());
590
591 self.regions[region_id.index].set_region_type(RegionType::Inactive);
593
594 Ok(())
595 }
596
597 pub fn get_snapshots(&self) -> Result<&Vec<String>, StructureAnalysisError> {
599 if !self.debug_mode {
600 return Err(StructureAnalysisError::Other {
601 message: "Debug mode is not enabled".to_string(),
602 backtrace: Backtrace::capture(),
603 });
604 }
605
606 Ok(&self.snapshots)
607 }
608
609 pub fn before_reduce(&mut self, region_id: RegionId) {
611 self.capture_region_snapshot(region_id);
612 self.is_marked = true;
613 }
614
615 pub fn after_reduce(&mut self, region_id: RegionId) {
617 self.capture_region_snapshot(region_id);
618 }
619
620 pub fn capture_snapshot(&mut self, region_to_highlight: Option<RegionId>) {
622 if !self.debug_mode {
623 return;
624 }
625 self.region_to_highlight = region_to_highlight;
626 let dot = self.render_dot(CfgDotConfig::default());
627 self.snapshots.push(dot);
628 self.region_to_highlight = None;
629 }
630
631 pub fn capture_region_snapshot(&mut self, region_to_highlight: RegionId) {
636 self.capture_snapshot(Some(region_to_highlight));
637 }
638}
639
640impl StructureAnalysis {
642 fn reduce_acyclic_region(
644 &mut self,
645 region_id: RegionId,
646 ) -> Result<bool, StructureAnalysisError> {
647 let region =
649 self.regions
650 .get(region_id.index)
651 .ok_or(StructureAnalysisError::RegionNotFound {
652 region_id,
653 backtrace: Backtrace::capture(),
654 })?;
655 Ok(match region.get_region_type() {
656 RegionType::Linear => LinearRegionReducer.reduce_region(self, region_id)?,
657 RegionType::Tail => false,
658 RegionType::Inactive => Err(StructureAnalysisError::Other {
659 message: "Inactive region".to_string(),
660 backtrace: Backtrace::capture(),
661 })?,
662 RegionType::ControlFlow => IfRegionReducer.reduce_region(self, region_id)?,
663 })
664 }
665
666 fn post_reduce(&mut self) -> Result<bool, StructureAnalysisError> {
668 let entry_region_id = self.get_entry_region();
669 let entry_region_node_index = self.get_node_index(entry_region_id)?;
670 let dfs = DfsPostOrder::new(&self.region_graph, entry_region_node_index);
671 let nodes: Vec<RegionId> = dfs
673 .iter(&self.region_graph)
674 .collect::<Vec<NodeIndex>>()
675 .iter()
676 .map(|node| self.get_region_id(*node))
677 .collect::<Result<Vec<_>, _>>()?;
678
679 for region_id in nodes {
681 if self.regions[region_id.index].get_region_type() == RegionType::Inactive {
683 continue;
684 }
685
686 if TailRegionReducer.reduce_region(self, region_id)? {
687 return Ok(true);
688 }
689
690 if VirtualBranchReducer.reduce_region(self, region_id)? {
691 return Ok(true);
692 }
693 }
694
695 Ok(false)
696 }
697}
698
699impl DotRenderableGraph for StructureAnalysis {
701 fn render_dot(&self, config: CfgDotConfig) -> String {
706 let dot = CfgDot { config };
707 dot.render(&self.region_graph, self)
708 }
709}
710
711impl NodeResolver for StructureAnalysis {
712 type NodeData = Region;
713
714 fn resolve(&self, node_index: NodeIndex) -> Option<&Self::NodeData> {
715 let region_id = self.get_region_id(node_index).ok()?;
716 self.regions.get(region_id.index)
717 }
718
719 fn resolve_edge_color(&self, n1: NodeIndex, n2: NodeIndex) -> String {
720 let edge = self.region_graph.find_edge(n1, n2);
722 if let Some(edge) = edge {
723 match self.region_graph.edge_weight(edge) {
724 Some(ControlFlowEdgeType::Branch) => GBF_GREEN.to_string(),
725 Some(ControlFlowEdgeType::Fallthrough) => GBF_RED.to_string(),
726 None => GBF_RED.to_string(),
727 }
728 } else {
729 GBF_YELLOW.to_string()
730 }
731 }
732
733 fn resolve_border_color(&self, index: NodeIndex) -> Option<String> {
734 let region_id = self.get_region_id(index).ok()?;
735
736 if self.region_to_highlight == Some(region_id) {
737 Some(GBF_GREEN.to_string())
738 } else {
739 None
740 }
741 }
742}
743
744#[cfg(test)]
745mod tests {
746 use super::*;
747 use crate::decompiler::ast::{new_assignment, new_id};
748
749 #[test]
750 fn test_remove_edge() -> Result<(), StructureAnalysisError> {
751 let mut structure_analysis = StructureAnalysis::new(false, 100);
752
753 let entry_region = structure_analysis.add_region(RegionType::Linear);
754 let region_1 = structure_analysis.add_region(RegionType::Linear);
755 let region_2 = structure_analysis.add_region(RegionType::Tail);
756
757 structure_analysis
759 .push_to_region(entry_region, new_assignment(new_id("foo"), new_id("bar")));
760 structure_analysis
762 .get_region_mut(entry_region)?
763 .set_jump_expr(Some(new_id("foo").into()));
764 structure_analysis.push_to_region(region_1, new_assignment(new_id("foo2"), new_id("bar2")));
765 structure_analysis.push_to_region(region_1, new_assignment(new_id("foo3"), new_id("bar3")));
766 structure_analysis.push_to_region(region_2, new_assignment(new_id("foo4"), new_id("bar4")));
767 structure_analysis.push_to_region(region_2, new_assignment(new_id("foo5"), new_id("bar5")));
768 structure_analysis.connect_regions(
769 entry_region,
770 region_1,
771 ControlFlowEdgeType::Fallthrough,
772 )?;
773 structure_analysis.connect_regions(entry_region, region_2, ControlFlowEdgeType::Branch)?;
774 structure_analysis.connect_regions(region_1, region_2, ControlFlowEdgeType::Fallthrough)?;
775
776 structure_analysis.remove_edge(entry_region, region_1)?;
780 structure_analysis.remove_edge(region_1, region_2)?;
782 structure_analysis.remove_node(region_1)?;
784 let successors = structure_analysis.get_successors(entry_region)?;
786 assert_eq!(successors.len(), 1);
788 assert_eq!(successors[0].0, region_2);
790 structure_analysis.execute()?;
792 assert_eq!(structure_analysis.region_graph.node_count(), 1);
794
795 Ok(())
796 }
797}