Change default value to detect dispatcher when at least 3 exit blocks are presents

Allow to specific maturities at a instruction simplification rule level
Count non side effect modifications separately so that they don't trig a new unflattening pass
2021-02-25 16:35:01 +01:00 · 2021-02-25 16:33:56 +01:00 · 2021-02-25 16:33:12 +01:00 · 2021-02-25 16:30:29 +01:00 · 2021-02-25 16:26:05 +01:00 · 2021-02-25 16:19:38 +01:00
19 changed files with 317 additions and 32 deletions
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ It was designed with the following goals in mind:

 # Installation

-**Only IDA v7.5 or later is supported** (since we need the microcode Python API) 
+**Only IDA v7.5 or later is supported with Python 3.7 and higher** (since we need the microcode Python API) 

 Copy this repository in `.idapro/plugins`

--- a/d810/cfg_utils.py
+++ b/d810/cfg_utils.py
@@ -262,6 +262,7 @@ def create_block(blk: mblock_t, blk_ins: List[minsn_t], is_0_way: bool = False)
    new_blk = insert_nop_blk(blk)
    for ins in blk_ins:
        tmp_ins = minsn_t(ins)
+        tmp_ins.setaddr(new_blk.tail.ea)
        new_blk.insert_into_block(tmp_ins, new_blk.tail)

    if is_0_way:
@@ -438,17 +439,20 @@ def mba_remove_simple_goto_blocks(mba: mbl_array_t) -> int:
    return nb_change


-def mba_deep_cleaning(mba: mbl_array_t) -> int:
+def mba_deep_cleaning(mba: mbl_array_t, call_mba_combine_block=True) -> int:
    if mba.maturity < MMAT_CALLS:
        # Doing this optimization before MMAT_CALLS may create blocks with call instruction (not last instruction)
        # IDA does like that and will raise a 50864 error
        return 0
-    mba.remove_empty_blocks()
-    mba.combine_blocks()
-    nb_change = mba_remove_simple_goto_blocks(mba)
-    if nb_change > 0:
-        mba.remove_empty_blocks()
+    if call_mba_combine_block:
+        # Ideally we want IDA to simplify the graph for us with combine_blocks
+        # However, We observe several crashes when this option is activated
+        # (especially when it is used during  O-LLVM unflattening)
+        # TODO: investigate the root cause of this issue
        mba.combine_blocks()
+    else:
+        mba.remove_empty_blocks()
+    nb_change = mba_remove_simple_goto_blocks(mba)
    return nb_change


--- a/d810/conf/init.py
+++ b/d810/conf/init.py
@@ -4,7 +4,7 @@ import json

 class D810Configuration(object):
    def __init__(self):
-        self.config_dir = os.path.join(os.getenv("HOME"), ".idapro", "plugins", "d810", "conf")
+        self.config_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
        self.config_file = os.path.join(self.config_dir, "options.json")
        with open(self.config_file, "r") as fp:
            self._options = json.load(fp)
--- a/d810/emulator.py
+++ b/d810/emulator.py
@@ -357,6 +357,12 @@ class MicroCodeInterpreter(object):
                raise e
            else:
                return None
+        except Exception as e:
+            emulator_log.error("Unexpected exception while computing constant mop value: '{0}': {1}".format(format_mop_t(mop), e))
+            if raise_exception:
+                raise e
+            else:
+                return None


 class MopMapping(object):
--- a/d810/hexrays_helpers.py
+++ b/d810/hexrays_helpers.py
@@ -135,12 +135,14 @@ MSB_TABLE = {1: 0x80, 2: 0x8000, 4: 0x80000000, 8: 0x8000000000000000}


 # Hex-Rays mop equality checking
-def equal_bnot_cst(lo: mop_t, ro: mop_t) -> bool:
+def equal_bnot_cst(lo: mop_t, ro: mop_t, mop_size=None) -> bool:
    if (lo.t != mop_n) or (ro.t != mop_n):
        return False
    if lo.size != ro.size:
        return False
-    return lo.nnn.value ^ ro.nnn.value == AND_TABLE[lo.size]
+    if mop_size is None:
+        mop_size = lo.size
+    return lo.nnn.value ^ ro.nnn.value == AND_TABLE[mop_size]


 def equal_bnot_mop(lo: mop_t, ro: mop_t, test_two_sides=True) -> bool:
--- a/d810/ida_ui.py
+++ b/d810/ida_ui.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import os
 import json
 import logging
 import idaapi
@@ -67,7 +68,7 @@ class PluginConfigurationFileForm_t(QtWidgets.QDialog):

    def choose_log_dir(self):
        logger.debug("Calling save_rule_configuration")
-        log_dir = QtWidgets.QFileDialog.getExistingDirectory(self, "Open Directory", "/home",
+        log_dir = QtWidgets.QFileDialog.getExistingDirectory(self, "Open Directory", os.path.expanduser("~"),
                                                                  QtWidgets.QFileDialog.ShowDirsOnly |
                                                                  QtWidgets.QFileDialog.DontResolveSymlinks)
        if log_dir != "":
--- a/d810/log.ini
+++ b/d810/log.ini
@@ -54,7 +54,7 @@ qualname=D810.tracker
 propagate=0

 [logger_D810Emulator]
-level=INFO
+level=WARNING
 handlers=defaultFileHandler
 qualname=D810.emulator
 propagate=0
--- a/d810/log.py
+++ b/d810/log.py
@@ -3,7 +3,7 @@ import shutil
 import logging
 import logging.config

-LOG_CONFIG_FILENAME = "log.ini"""
+LOG_CONFIG_FILENAME = "log.ini"
 LOG_FILENAME = "d810.log"
 Z3_TEST_FILENAME = "z3_check_instructions_substitution.py"

--- a/d810/manager_info.json
+++ b/d810/manager_info.json
@@ -19,6 +19,7 @@
    "d810.optimizers.instructions.pattern_matching.rewrite_sub",
    "d810.optimizers.instructions.pattern_matching.rewrite_xor",
    "d810.optimizers.instructions.pattern_matching.weird",
+    "d810.optimizers.instructions.pattern_matching.experimental",
    "d810.optimizers.instructions.pattern_matching",
    "d810.optimizers.instructions.chain.handler",
    "d810.optimizers.instructions.chain.chain_rules",
@@ -46,6 +47,7 @@
    "d810.optimizers.flow.flattening.unflattener_fake_jump",
    "d810.optimizers.flow.flattening.unflattener_switch_case",
    "d810.optimizers.flow.flattening.unflattener_indirect",
+    "d810.optimizers.flow.flattening.fix_pred_cond_jump_block",
    "d810.optimizers.flow.flattening",
    "d810.optimizers.flow",
    "d810.hexrays_helpers",
--- a/d810/optimizers/flow/flattening/init.py
+++ b/d810/optimizers/flow/flattening/init.py
@@ -2,5 +2,7 @@ from d810.optimizers.flow.flattening.unflattener import Unflattener
 from d810.optimizers.flow.flattening.unflattener_switch_case import UnflattenerSwitchCase
 from d810.optimizers.flow.flattening.unflattener_indirect import UnflattenerTigressIndirect
 from d810.optimizers.flow.flattening.unflattener_fake_jump import UnflattenerFakeJump
+from d810.optimizers.flow.flattening.fix_pred_cond_jump_block import FixPredecessorOfConditionalJumpBlock

-UNFLATTENING_BLK_RULES = [Unflattener(), UnflattenerSwitchCase(), UnflattenerTigressIndirect(), UnflattenerFakeJump()]
+UNFLATTENING_BLK_RULES = [Unflattener(), UnflattenerSwitchCase(), UnflattenerTigressIndirect(), UnflattenerFakeJump(),
+                          FixPredecessorOfConditionalJumpBlock()]
--- a/d810/optimizers/flow/flattening/fix_pred_cond_jump_block.py
+++ b/d810/optimizers/flow/flattening/fix_pred_cond_jump_block.py
@@ -0,0 +1,161 @@
+import logging
+from typing import List, Tuple
+from ida_hexrays import *
+
+from d810.tracker import MopTracker
+from d810.cfg_utils import duplicate_block, make_2way_block_goto, update_blk_successor
+
+from d810.hexrays_formatters import format_minsn_t, dump_microcode_for_debug
+from d810.optimizers.flow.flattening.utils import get_all_possibles_values
+from d810.optimizers.flow.flattening.generic import GenericUnflatteningRule
+from d810.utils import unsigned_to_signed
+
+
+unflat_logger = logging.getLogger('D810.unflat')
+
+JMP_OPCODE_HANDLED = [m_jnz, m_jz, m_jae, m_jb, m_ja, m_jbe, m_jge, m_jg, m_jl, m_jle]
+
+
+class FixPredecessorOfConditionalJumpBlock(GenericUnflatteningRule):
+    DESCRIPTION = "Detect if a predecessor of a conditional block always takes the same path and patch it (works for O-LLVM style control flow flattening)"
+    DEFAULT_UNFLATTENING_MATURITIES = [MMAT_CALLS, MMAT_GLBOPT1, MMAT_GLBOPT2]
+    DEFAULT_MAX_PASSES = 100
+
+    def is_jump_taken(self, jmp_blk: mblock_t, pred_comparison_values: List[int]) -> Tuple[bool, bool]:
+        if len(pred_comparison_values) == 0:
+            return False, False
+        jmp_ins = jmp_blk.tail
+        compared_value = jmp_ins.r.nnn.value
+        compared_value_size = jmp_ins.r.size
+        is_jmp_always_taken = False
+        is_jmp_never_taken = False
+        if jmp_ins.opcode == m_jnz:
+            is_jmp_always_taken = all([possible_value != compared_value for possible_value in pred_comparison_values])
+            is_jmp_never_taken = all([possible_value == compared_value for possible_value in pred_comparison_values])
+        elif jmp_ins.opcode == m_jz:
+            is_jmp_always_taken = all([possible_value == compared_value for possible_value in pred_comparison_values])
+            is_jmp_never_taken = all([possible_value != compared_value for possible_value in pred_comparison_values])
+        elif jmp_ins.opcode == m_jae:
+            is_jmp_always_taken = all([possible_value >= compared_value for possible_value in pred_comparison_values])
+            is_jmp_never_taken = all([possible_value < compared_value for possible_value in pred_comparison_values])
+        elif jmp_ins.opcode == m_jb:
+            is_jmp_always_taken = all([possible_value < compared_value for possible_value in pred_comparison_values])
+            is_jmp_never_taken = all([possible_value >= compared_value for possible_value in pred_comparison_values])
+        elif jmp_ins.opcode == m_ja:
+            is_jmp_always_taken = all([possible_value > compared_value for possible_value in pred_comparison_values])
+            is_jmp_never_taken = all([possible_value <= compared_value for possible_value in pred_comparison_values])
+        elif jmp_ins.opcode == m_jbe:
+            is_jmp_always_taken = all([unsigned_to_signed(possible_value, compared_value_size) > unsigned_to_signed(
+                compared_value, compared_value_size) for possible_value in pred_comparison_values])
+            is_jmp_never_taken = all([unsigned_to_signed(possible_value, compared_value_size) <= unsigned_to_signed(
+                compared_value, compared_value_size) for possible_value in pred_comparison_values])
+        elif jmp_ins.opcode == m_jg:
+            is_jmp_always_taken = all([unsigned_to_signed(possible_value, compared_value_size) > unsigned_to_signed(
+                compared_value, compared_value_size) for possible_value in pred_comparison_values])
+            is_jmp_never_taken = all([unsigned_to_signed(possible_value, compared_value_size) <= unsigned_to_signed(
+                compared_value, compared_value_size) for possible_value in pred_comparison_values])
+        elif jmp_ins.opcode == m_jge:
+            is_jmp_always_taken = all([unsigned_to_signed(possible_value, compared_value_size) >= unsigned_to_signed(
+                compared_value, compared_value_size) for possible_value in pred_comparison_values])
+            is_jmp_never_taken = all([unsigned_to_signed(possible_value, compared_value_size) < unsigned_to_signed(
+                compared_value, compared_value_size) for possible_value in pred_comparison_values])
+        elif jmp_ins.opcode == m_jl:
+            is_jmp_always_taken = all([unsigned_to_signed(possible_value, compared_value_size) < unsigned_to_signed(
+                compared_value, compared_value_size) for possible_value in pred_comparison_values])
+            is_jmp_never_taken = all([unsigned_to_signed(possible_value, compared_value_size) >= unsigned_to_signed(
+                compared_value, compared_value_size) for possible_value in pred_comparison_values])
+        elif jmp_ins.opcode == m_jle:
+            is_jmp_always_taken = all([unsigned_to_signed(possible_value, compared_value_size) <= unsigned_to_signed(
+                compared_value, compared_value_size) for possible_value in pred_comparison_values])
+            is_jmp_never_taken = all([unsigned_to_signed(possible_value, compared_value_size) > unsigned_to_signed(
+                compared_value, compared_value_size) for possible_value in pred_comparison_values])
+        return is_jmp_always_taken, is_jmp_never_taken
+
+    def sort_predecessors(self, blk):
+        # this function sorts the blk predecessors into three list:
+        # - A list of predecessors where the jump is always taken
+        # - A list of predecessors where the jump is never taken
+        # - A list of predecessors where we don't know
+        pred_jmp_always_taken = []
+        pred_jmp_never_taken = []
+        pred_jmp_unk = []
+        op_compared = mop_t(blk.tail.l)
+        blk_preset_list = [x for x in blk.predset]
+        for pred_serial in blk_preset_list:
+            cmp_variable_tracker = MopTracker([op_compared], max_nb_block=100, max_path=1000)
+            cmp_variable_tracker.reset()
+            pred_blk = blk.mba.get_mblock(pred_serial)
+            pred_histories = cmp_variable_tracker.search_backward(pred_blk, pred_blk.tail)
+            pred_values = get_all_possibles_values(pred_histories, [op_compared])
+            pred_values = [x[0] for x in pred_values]
+            unflat_logger.info("Pred {0} has {1} possible path ({2} different cst): {3}"
+                               .format(pred_blk.serial, len(pred_values), len(set(pred_values)), pred_values))
+            if None in pred_values:
+                pred_jmp_unk.append(pred_blk)
+                continue
+            is_jmp_always_taken, is_jmp_never_taken = self.is_jump_taken(blk, pred_values)
+            if is_jmp_always_taken and is_jmp_never_taken:
+                # this should never happen
+                unflat_logger.error("It seems that I am stupid: '{0}' is always taken and not taken when coming from {1}: {2}".format(format_minsn_t(blk.tail), pred_blk.serial, pred_values))
+                pred_jmp_unk.append(pred_blk)
+                continue
+            if is_jmp_always_taken:
+                unflat_logger.info("It seems that '{0}' is always taken when coming from {1}: {2}".format(format_minsn_t(blk.tail), pred_blk.serial, pred_values))
+                pred_jmp_always_taken.append(pred_blk)
+            if is_jmp_never_taken:
+                unflat_logger.info("It seems that '{0}' is never taken when coming from {1}: {2}".format(format_minsn_t(blk.tail), pred_blk.serial, pred_values))
+                pred_jmp_never_taken.append(pred_blk)
+        return pred_jmp_always_taken, pred_jmp_never_taken, pred_jmp_unk
+
+    def analyze_blk(self, blk: mblock_t) -> int:
+        if (blk.tail is None) or blk.tail.opcode not in JMP_OPCODE_HANDLED:
+            return 0
+        if blk.tail.r.t != mop_n:
+            return 0
+        unflat_logger.info("Checking if block {0} can be simplified: {1}".format(blk.serial, format_minsn_t(blk.tail)))
+        pred_jmp_always_taken, pred_jmp_never_taken, pred_jmp_unk = self.sort_predecessors(blk)
+        unflat_logger.info("Block {0} has {1} preds: {2} always jmp, {3} never jmp, {4} unk".format(blk.serial, blk.npred(), len(pred_jmp_always_taken), len(pred_jmp_never_taken), len(pred_jmp_unk)))
+        nb_change = 0
+        if len(pred_jmp_always_taken) > 0:
+            dump_microcode_for_debug(self.mba, self.log_dir, "{0}_{1}_before_jmp_always_fix".format(self.cur_maturity_pass, blk.serial))
+            for pred_blk in pred_jmp_always_taken:
+                new_jmp_block, new_default_block = duplicate_block(blk)
+                make_2way_block_goto(new_jmp_block, blk.tail.d.b)
+                update_blk_successor(pred_blk, blk.serial, new_jmp_block.serial)
+            dump_microcode_for_debug(self.mba, self.log_dir, "{0}_{1}_after_jmp_always_fix".format(self.cur_maturity_pass, blk.serial))
+            nb_change += len(pred_jmp_always_taken)
+        if len(pred_jmp_never_taken) > 0:
+            dump_microcode_for_debug(self.mba, self.log_dir, "{0}_{1}_before_jmp_never_fix".format(self.cur_maturity_pass, blk.serial))
+            for pred_blk in pred_jmp_never_taken:
+                new_jmp_block, new_default_block = duplicate_block(blk)
+                make_2way_block_goto(new_jmp_block, blk.serial + 1)
+                update_blk_successor(pred_blk, blk.serial, new_jmp_block.serial)
+            dump_microcode_for_debug(self.mba, self.log_dir, "{0}_{1}_after_jmp_never_fix".format(self.cur_maturity_pass, blk.serial))
+            nb_change += len(pred_jmp_never_taken)
+        return nb_change
+
+    def optimize(self, blk: mblock_t) -> int:
+        self.mba = blk.mba
+        if not self.check_if_rule_should_be_used(blk):
+            return 0
+        self.last_pass_nb_patch_done = self.analyze_blk(blk)
+        if self.last_pass_nb_patch_done > 0:
+            self.mba.mark_chains_dirty()
+            self.mba.optimize_local(0)
+            self.mba.verify(True)
+        return self.last_pass_nb_patch_done
+
+    def check_if_rule_should_be_used(self, blk: mblock_t) -> bool:
+        if self.cur_maturity != self.mba.maturity:
+            self.cur_maturity = self.mba.maturity
+            self.cur_maturity_pass = 0
+        if self.cur_maturity not in self.maturities:
+            return False
+        if (self.DEFAULT_MAX_PASSES is not None) and (self.cur_maturity_pass >= self.DEFAULT_MAX_PASSES):
+            return False
+        if (blk.tail is None) or blk.tail.opcode not in JMP_OPCODE_HANDLED:
+            return False
+        if blk.tail.r.t != mop_n:
+            return False
+        self.cur_maturity_pass += 1
+        return True
--- a/d810/optimizers/flow/flattening/generic.py
+++ b/d810/optimizers/flow/flattening/generic.py
@@ -300,6 +300,7 @@ class GenericDispatcherUnflatteningRule(GenericUnflatteningRule):
        self.dispatcher_list = []
        self.max_duplication_passes = self.DEFAULT_MAX_DUPLICATION_PASSES
        self.max_passes = self.DEFAULT_MAX_PASSES
+        self.non_significant_changes = 0

    def check_if_rule_should_be_used(self, blk: mblock_t) -> bool:
        if not super().check_if_rule_should_be_used(blk):
@@ -327,12 +328,20 @@ class GenericDispatcherUnflatteningRule(GenericUnflatteningRule):
    def ensure_all_dispatcher_fathers_are_direct(self) -> int:
        nb_change = 0
        for dispatcher_info in self.dispatcher_list:
+            nb_change += self.ensure_dispatcher_fathers_are_direct(dispatcher_info)
            dispatcher_father_list = [self.mba.get_mblock(x) for x in dispatcher_info.entry_block.blk.predset]
            for dispatcher_father in dispatcher_father_list:
                nb_change += ensure_child_has_an_unconditional_father(dispatcher_father,
                                                                      dispatcher_info.entry_block.blk)
        return nb_change

+    def ensure_dispatcher_fathers_are_direct(self, dispatcher_info: GenericDispatcherInfo) -> int:
+        nb_change = 0
+        dispatcher_father_list = [self.mba.get_mblock(x) for x in dispatcher_info.entry_block.blk.predset]
+        for dispatcher_father in dispatcher_father_list:
+            nb_change += ensure_child_has_an_unconditional_father(dispatcher_father, dispatcher_info.entry_block.blk)
+        return nb_change
+
    def register_initialization_variables(self, mop_tracker):
        pass

@@ -358,6 +367,11 @@ class GenericDispatcherUnflatteningRule(GenericUnflatteningRule):
            raise NotDuplicableFatherException("Dispatcher {0} predecessor {1} is not duplicable: {2}"
                                               .format(dispatcher_entry_block.serial, dispatcher_father.serial,
                                                       father_histories_cst))
+        for father_history_cst in father_histories_cst:
+            if None in father_history_cst:
+                raise NotDuplicableFatherException("Dispatcher {0} predecessor {1} has None value: {2}"
+                                                   .format(dispatcher_entry_block.serial, dispatcher_father.serial,
+                                                           father_histories_cst))

        unflat_logger.info("Dispatcher {0} predecessor {1} is resolvable: {2}"
                           .format(dispatcher_entry_block.serial, dispatcher_father.serial, father_histories_cst))
@@ -407,9 +421,9 @@ class GenericDispatcherUnflatteningRule(GenericUnflatteningRule):
                                           .format(dispatcher_father.serial, mop_searched_values_list))

    def remove_flattening(self) -> int:
-        total_nb_change = ensure_last_block_is_goto(self.mba)
-        total_nb_change += self.ensure_all_dispatcher_fathers_are_direct()
-        nb_flattened_branches = 0
+        total_nb_change = 0
+        self.non_significant_changes = ensure_last_block_is_goto(self.mba)
+        self.non_significant_changes += self.ensure_all_dispatcher_fathers_are_direct()
        for dispatcher_info in self.dispatcher_list:
            dump_microcode_for_debug(self.mba, self.log_dir, "unflat_{0}_dispatcher_{1}_before_duplication"
                                     .format(self.cur_maturity_pass, dispatcher_info.entry_block.serial))
@@ -447,7 +461,7 @@ class GenericDispatcherUnflatteningRule(GenericUnflatteningRule):
        if not self.check_if_rule_should_be_used(blk):
            return 0
        self.last_pass_nb_patch_done = 0
-        unflat_logger.info("Unflattening at maturity {0} path {1}".format(self.cur_maturity, self.cur_maturity_pass))
+        unflat_logger.info("Unflattening at maturity {0} pass {1}".format(self.cur_maturity, self.cur_maturity_pass))
        dump_microcode_for_debug(self.mba, self.log_dir, "unflat_{0}_start".format(self.cur_maturity_pass))
        self.retrieve_all_dispatchers()
        if len(self.dispatcher_list) == 0:
@@ -458,12 +472,12 @@ class GenericDispatcherUnflatteningRule(GenericUnflatteningRule):
            for dispatcher_info in self.dispatcher_list:
                dispatcher_info.print_info()
            self.last_pass_nb_patch_done = self.remove_flattening()
-        unflat_logger.info("Unflattening at maturity {0} path {1}: {2} changes"
+        unflat_logger.info("Unflattening at maturity {0} pass {1}: {2} changes"
                           .format(self.cur_maturity, self.cur_maturity_pass, self.last_pass_nb_patch_done))
-        mba_deep_cleaning(self.mba)
+        nb_clean = mba_deep_cleaning(self.mba, False)
        dump_microcode_for_debug(self.mba, self.log_dir, "unflat_{0}_after_cleaning".format(self.cur_maturity_pass))
-        if self.last_pass_nb_patch_done > 0:
+        if self.last_pass_nb_patch_done + nb_clean + self.non_significant_changes > 0:
            self.mba.mark_chains_dirty()
            self.mba.optimize_local(0)
-            self.mba.verify(True)
+        self.mba.verify(True)
        return self.last_pass_nb_patch_done
--- a/d810/optimizers/flow/flattening/unflattener.py
+++ b/d810/optimizers/flow/flattening/unflattener.py
@@ -29,6 +29,7 @@ class OllvmDispatcherInfo(GenericDispatcherInfo):
        self.comparison_values.append(num_mop.nnn.value)
        self._explore_children(self.entry_block)
        dispatcher_blk_with_external_father = self._get_dispatcher_blocks_with_external_father()
+        # TODO: I think this can be wrong because we are too permissive in detection of dispatcher blocks
        if len(dispatcher_blk_with_external_father) != 0:
            return False
        return True
@@ -103,7 +104,7 @@ class OllvmDispatcherInfo(GenericDispatcherInfo):
 class OllvmDispatcherCollector(GenericDispatcherCollector):
    DISPATCHER_CLASS = OllvmDispatcherInfo
    DEFAULT_DISPATCHER_MIN_INTERNAL_BLOCK = 2
-    DEFAULT_DISPATCHER_MIN_EXIT_BLOCK = 2
+    DEFAULT_DISPATCHER_MIN_EXIT_BLOCK = 3
    DEFAULT_DISPATCHER_MIN_COMPARISON_VALUE = 2


--- a/d810/optimizers/instructions/analysis/pattern_guess.py
+++ b/d810/optimizers/instructions/analysis/pattern_guess.py
@@ -3,6 +3,7 @@ import os
 from d810.ast import minsn_to_ast
 from d810.hexrays_formatters import format_minsn_t, format_mop_t, maturity_to_string

+from d810.optimizers.handler import DEFAULT_INSTRUCTION_MATURITIES
 from d810.optimizers.instructions.analysis.handler import InstructionAnalysisRule
 from d810.optimizers.instructions.analysis.utils import get_possible_patterns

@@ -12,6 +13,7 @@ class ExampleGuessingRule(InstructionAnalysisRule):

    def __init__(self):
        super().__init__()
+        self.maturities = DEFAULT_INSTRUCTION_MATURITIES
        self.cur_maturity = None
        self.min_nb_var = 1
        self.max_nb_var = 3
--- a/d810/optimizers/instructions/early/mem_read.py
+++ b/d810/optimizers/instructions/early/mem_read.py
@@ -63,11 +63,18 @@ class SetGlobalVariablesToZeroIfDetectedReadOnly(EarlyRule):
        return True

    def check_candidate(self, candidate):
-        if candidate["ro_dword"].mop.t != mop_v:
-            return False
-        mem_read_address = candidate["ro_dword"].mop.g
-        if not self.is_read_only_inited_var(mem_read_address):
+        mem_read_address = None
+        if candidate["ro_dword"].mop.t == mop_v:
+            mem_read_address = candidate["ro_dword"].mop.g
+        elif candidate["ro_dword"].mop.t == mop_a:
+            if candidate["ro_dword"].mop.a.t == mop_v:
+                mem_read_address = candidate["ro_dword"].mop.a.g
+
+        if mem_read_address is None:
            return False

+        if not self.is_read_only_inited_var(mem_read_address):
+            return False
        candidate.add_constant_leaf("val_res", 0, candidate["ro_dword"].mop.size)
        return True
+
--- a/d810/optimizers/instructions/handler.py
+++ b/d810/optimizers/instructions/handler.py
@@ -3,7 +3,7 @@ import logging
 from typing import List
 from ida_hexrays import *

-from d810.optimizers.handler import OptimizationRule, DEFAULT_INSTRUCTION_MATURITIES
+from d810.optimizers.handler import OptimizationRule
 from d810.hexrays_formatters import format_minsn_t
 from d810.ast import minsn_to_ast, AstNode
 from d810.errors import D810Exception
@@ -16,7 +16,7 @@ optimizer_logger = logging.getLogger('D810.optimizer')
 class InstructionOptimizationRule(OptimizationRule):
    def __init__(self):
        super().__init__()
-        self.maturities = DEFAULT_INSTRUCTION_MATURITIES
+        self.maturities = []

    def check_and_replace(self, blk, ins):
        return None
@@ -97,6 +97,8 @@ class InstructionOptimizer(object):
        if not is_valid_rule_class:
            return False
        optimizer_logger.debug("Adding rule {0}".format(rule))
+        if len(rule.maturities) == 0:
+            rule.maturities = self.maturities
        self.rules.add(rule)
        self.rules_usage_info[rule.name] = 0
        return True
@@ -114,9 +116,11 @@ class InstructionOptimizer(object):
    def get_optimized_instruction(self, blk: mblock_t, ins: minsn_t):
        if blk is not None:
            self.cur_maturity = blk.mba.maturity
-        if self.cur_maturity not in self.maturities:
-            return None
+        # if self.cur_maturity not in self.maturities:
+        #     return None
        for rule in self.rules:
+            if self.cur_maturity not in rule.maturities:
+                continue
            try:
                new_ins = rule.check_and_replace(blk, ins)
                if new_ins is not None:
--- a/d810/optimizers/instructions/pattern_matching/init.py
+++ b/d810/optimizers/instructions/pattern_matching/init.py
@@ -12,6 +12,7 @@ from d810.optimizers.instructions.pattern_matching.rewrite_or import *
 from d810.optimizers.instructions.pattern_matching.rewrite_sub import *
 from d810.optimizers.instructions.pattern_matching.rewrite_xor import *
 from d810.optimizers.instructions.pattern_matching.weird import *
+from d810.optimizers.instructions.pattern_matching.experimental import *

 PATTERN_MATCHING_RULES = [x() for x in get_all_subclasses(PatternMatchingRule)]

--- a/d810/optimizers/instructions/pattern_matching/experimental.py
+++ b/d810/optimizers/instructions/pattern_matching/experimental.py
@@ -0,0 +1,36 @@
+from ida_hexrays import *
+
+from d810.optimizers.instructions.pattern_matching.handler import PatternMatchingRule
+from d810.ast import AstLeaf, AstConstant, AstNode
+from d810.hexrays_formatters import  format_mop_t
+
+class ReplaceMovHigh(PatternMatchingRule):
+    PATTERN = AstNode(m_mov,
+                      AstConstant('c_0'))
+    REPLACEMENT_PATTERN = AstNode(m_or, AstConstant("new_c_0"), AstNode(m_and, AstLeaf("new_reg"), AstConstant("mask")))
+
+    def check_candidate(self, candidate):
+        # IDA does not do constant propagation for pattern such as:
+        # mov     #0x65A4.2, r6.2
+        # mov     #0x210F.2, r6^2.2
+        # jz      r0.4, r6.4
+        # Thus, we try to detect mov to r6^2 and replace by (or #0x210F0000.4, r6.4 & 0x0000ffff.4, r6.4
+        # By doing that, IDA constant propagation will work again.
+
+        if candidate.dst_mop.t != mop_r:
+            return False
+        dst_reg_name = format_mop_t(candidate.dst_mop)
+        if dst_reg_name is None:
+            return False
+        if "^2" in dst_reg_name:
+            if candidate["c_0"].mop.size != 2:
+                return False
+            candidate.add_constant_leaf("new_c_0", candidate["c_0"].value << 16, 4)
+            candidate.add_constant_leaf("mask", 0xffff, 4)
+            new_dst_reg = mop_t()
+            new_dst_reg.make_reg(candidate.dst_mop.r - 2, 4)
+            candidate.add_leaf("new_reg", new_dst_reg)
+            candidate.dst_mop = new_dst_reg
+            return True
+        else:
+            return False
--- a/d810/optimizers/instructions/pattern_matching/rewrite_xor.py
+++ b/d810/optimizers/instructions/pattern_matching/rewrite_xor.py
@@ -2,7 +2,7 @@ from ida_hexrays import *

 from d810.optimizers.instructions.pattern_matching.handler import PatternMatchingRule
 from d810.ast import AstLeaf, AstConstant, AstNode
-from d810.hexrays_helpers import equal_bnot_mop, SUB_TABLE
+from d810.hexrays_helpers import equal_bnot_mop, equal_bnot_cst, SUB_TABLE


 class Xor_HackersDelightRule_1(PatternMatchingRule):
@@ -270,6 +270,48 @@ class Xor_Rule_3(PatternMatchingRule):
    REPLACEMENT_PATTERN = AstNode(m_xor, AstNode(m_bnot, AstLeaf('x_0')), AstLeaf('x_1'))


+class Xor_Rule_4(PatternMatchingRule):
+    PATTERN = AstNode(m_or,
+                      AstNode(m_and,
+                              AstLeaf("x_0"),
+                              AstLeaf("bnot_x_1")),
+                      AstNode(m_and,
+                              AstLeaf("bnot_x_0"),
+                              AstLeaf("x_1")))
+
+    REPLACEMENT_PATTERN = AstNode(m_xor,
+                                  AstLeaf('x_0'),
+                                  AstLeaf("x_1"))
+
+    def check_candidate(self, candidate):
+        if not equal_bnot_mop(candidate["x_0"].mop, candidate["bnot_x_0"].mop):
+            return False
+        if not equal_bnot_mop(candidate["x_1"].mop, candidate["bnot_x_1"].mop):
+            return False
+        return True
+
+
+class Xor_Rule_4_WithXdu(PatternMatchingRule):
+    PATTERN = AstNode(m_or,
+                      AstNode(m_and,
+                              AstLeaf("x_0"),
+                              AstConstant("bnot_c_1")),
+                      AstNode(m_and,
+                              AstNode(m_bnot, AstLeaf("x_0")),
+                              AstConstant("c_1")))
+
+    REPLACEMENT_PATTERN = AstNode(m_xor,
+                                  AstLeaf("x_0"),
+                                  AstLeaf("c_1"))
+
+    def check_candidate(self, candidate):
+        if candidate["x_0"].mop.t != mop_d:
+            return False
+        if candidate["x_0"].mop.d.opcode != m_xdu:
+            return False
+        return equal_bnot_cst(candidate["c_1"].mop, candidate["bnot_c_1"].mop, mop_size=candidate["x_0"].mop.d.l.size)
+
+
 class XorAlmost_Rule_1(PatternMatchingRule):
    PATTERN = AstNode(m_sub,
                      AstNode(m_add,
Author	SHA1	Message	Date
Boris Batteux	87fe12ca9c	Change default value to detect dispatcher when at least 3 exit blocks are presents	2021-02-25 16:35:01 +01:00
Boris Batteux	68c9473608	Allow to specific maturities at a instruction simplification rule level	2021-02-25 16:33:56 +01:00
Boris Batteux	a57a48800f	Count non side effect modifications separately so that they don't trig a new unflattening pass	2021-02-25 16:33:12 +01:00
Boris Batteux	5a452443e7	Add option to not call combine_blocks since we noticed several crashes when this is activated	2021-02-25 16:30:29 +01:00
Boris Batteux	3de70b50a0	Reduce verbosity	2021-02-25 16:26:05 +01:00
Boris Batteux	d3443e9102	Handle cases where constant is accessed by address	2021-02-25 16:19:38 +01:00
Boris Batteux	b45c7ea84b	Merging	2021-02-18 15:50:28 +01:00
Boris Batteux	9f2cdcb524	Add a requirement for Python 3.7 or higher to support annotations	2021-02-18 15:45:39 +01:00
Boris Batteux	982f96af82	Add a new method to simplify control flow obfuscation	2021-02-18 15:44:31 +01:00
Boris Batteux	a7e39214f2	Add a rule to simplify constant earlier in decompilation	2021-02-18 15:33:34 +01:00
Boris Batteux	388f7fa241	Add a rule to simplify constant earlier in decompilation	2021-02-18 15:33:05 +01:00
Boris Batteux	fd923d1d42	Also return None for all type of Exception	2021-02-18 15:31:13 +01:00
Boris Batteux	4046716de9	Mark MBA as dirty if we remove some block while cleaning	2021-02-18 15:28:52 +01:00
Boris Batteux	e7820ac63b	combine_blocks already calls remove_empty_blocks Calling combine_blocks twice can make IDA crash sometimes	2021-02-18 15:28:21 +01:00
Boris Batteux	49a91daee4	Add new XOR rules	2021-02-18 15:25:53 +01:00
Boris Batteux	1885f43bf5	Allow to specify the size of the mop operand manually	2021-02-18 15:23:48 +01:00
Boris Batteux	74c8427040	Ignore logs	2020-11-30 09:11:43 +01:00
boris	a668e5d896	Use Windows-Linux compatible dir name	2020-11-27 15:18:03 +01:00
boris	ef56bc2cd0	Cleaning	2020-11-27 15:10:34 +01:00
boris	83b51c4620	Fix error on Windows	2020-11-27 15:10:18 +01:00