diff --git a/plugins/warp/src/container/disk.rs b/plugins/warp/src/container/disk.rs index 02da0f3034..b0f48c29a2 100644 --- a/plugins/warp/src/container/disk.rs +++ b/plugins/warp/src/container/disk.rs @@ -27,7 +27,11 @@ pub struct DiskContainer { impl DiskContainer { pub fn new(name: String, sources: HashMap) -> Self { - Self { name, sources, writable: true } + Self { + name, + sources, + writable: true, + } } pub fn new_from_dir(dir_path: PathBuf) -> Self { diff --git a/plugins/workflow_objc/src/activities/mod.rs b/plugins/workflow_objc/src/activities/mod.rs index f1132d034b..98c9770f8a 100644 --- a/plugins/workflow_objc/src/activities/mod.rs +++ b/plugins/workflow_objc/src/activities/mod.rs @@ -1,3 +1,4 @@ pub mod inline_stubs; pub mod objc_msg_send_calls; +pub mod remove_memory_management; pub mod super_init; diff --git a/plugins/workflow_objc/src/activities/remove_memory_management.rs b/plugins/workflow_objc/src/activities/remove_memory_management.rs new file mode 100644 index 0000000000..470c315209 --- /dev/null +++ b/plugins/workflow_objc/src/activities/remove_memory_management.rs @@ -0,0 +1,192 @@ +use binaryninja::{ + architecture::{Architecture as _, CoreRegister, Register as _, RegisterInfo as _}, + binary_view::{BinaryView, BinaryViewExt as _}, + low_level_il::{ + expression::{ExpressionHandler, LowLevelILExpressionKind}, + function::{LowLevelILFunction, Mutable, NonSSA}, + instruction::{ + InstructionHandler, LowLevelILInstruction, LowLevelILInstructionKind, + LowLevelInstructionIndex, + }, + lifting::LowLevelILLabel, + LowLevelILRegisterKind, + }, + workflow::AnalysisContext, +}; + +use crate::{error::ILLevel, metadata::GlobalState, Error}; + +// TODO: We should also handle `objc_retain_x` / `objc_release_x` variants +// that use a custom calling convention. +const IGNORABLE_MEMORY_MANAGEMENT_FUNCTIONS: &[&[u8]] = &[ + b"_objc_autorelease", + b"_objc_autoreleaseReturnValue", + b"_objc_release", + b"_objc_retain", + b"_objc_retainAutorelease", + b"_objc_retainAutoreleaseReturnValue", + b"_objc_retainAutoreleasedReturnValue", + b"_objc_retainBlock", + b"_objc_unsafeClaimAutoreleasedReturnValue", +]; + +fn is_call_to_ignorable_memory_management_function<'func>( + view: &binaryninja::binary_view::BinaryView, + instr: &'func LowLevelILInstruction<'func, Mutable, NonSSA>, +) -> bool { + let target = match instr.kind() { + LowLevelILInstructionKind::Call(call) | LowLevelILInstructionKind::TailCall(call) => { + let LowLevelILExpressionKind::ConstPtr(address) = call.target().kind() else { + return false; + }; + address.value() + } + LowLevelILInstructionKind::Goto(target) => target.address(), + _ => return false, + }; + let Some(symbol) = view.symbol_by_address(target) else { + return false; + }; + + let symbol_name = symbol.full_name(); + let symbol_name = symbol_name.to_bytes(); + + // Remove any j_ prefix that the shared cache workflow adds to stub functions. + let symbol_name = symbol_name.strip_prefix(b"j_").unwrap_or(symbol_name); + + IGNORABLE_MEMORY_MANAGEMENT_FUNCTIONS.contains(&symbol_name) +} + +fn process_instruction( + bv: &BinaryView, + llil: &LowLevelILFunction, + insn: &LowLevelILInstruction, + link_register: LowLevelILRegisterKind, + link_register_size: usize, +) -> Result { + if !is_call_to_ignorable_memory_management_function(bv, insn) { + return Ok(false); + } + + // TODO: Removing calls to `objc_release` can sometimes leave behind a load of a struct field + // that appears to be unused. It's not clear whether we should be trying to detect and remove + // those here, or if some later analysis pass should be cleaning them up but isn't. + + match insn.kind() { + LowLevelILInstructionKind::TailCall(_) => unsafe { + llil.set_current_address(insn.address()); + llil.replace_expression( + insn.expr_idx(), + llil.ret(llil.reg(link_register_size, link_register)), + ); + }, + LowLevelILInstructionKind::Call(_) => unsafe { + // The memory management functions that are currently supported either return void + // or return their first argument. For arm64, the first argument is passed in `x0` + // and results are returned in `x0`, so we can replace the call with a nop. We'll need + // to revisit this to support other architectures, and to support the `objc_retain_x` + // `objc_release_x` functions that accept their argument in a different register. + llil.set_current_address(insn.address()); + llil.replace_expression(insn.expr_idx(), llil.nop()); + }, + LowLevelILInstructionKind::Goto(_) if insn.index.0 == 0 => unsafe { + // If the `objc_retain` is the first instruction in the function, this function + // can only contain the call to the memory management function since when the + // memory management function returns, it will return to this function's caller. + llil.set_current_address(insn.address()); + llil.replace_expression( + insn.expr_idx(), + llil.ret(llil.reg(link_register_size, link_register)), + ); + }, + LowLevelILInstructionKind::Goto(_) => { + // The shared cache workflow inlines calls to stub functions, which causes them + // to show up as a `lr = ; goto ;` + // sequence. We need to remove the load of `lr` and update the `goto` to jump + // to the next instruction. + + let Some(prev) = + llil.instruction_from_index(LowLevelInstructionIndex(insn.index.0 - 1)) + else { + return Ok(false); + }; + + let target = match prev.kind() { + LowLevelILInstructionKind::SetReg(op) if op.dest_reg() == link_register => { + let LowLevelILExpressionKind::ConstPtr(value) = op.source_expr().kind() else { + return Ok(false); + }; + value.value() + } + _ => return Ok(false), + }; + + let Some(LowLevelInstructionIndex(target_idx)) = llil.instruction_index_at(target) + else { + return Ok(false); + }; + + // TODO: Manually creating a label like this is fragile and relies on a) knowledge of + // how labels are used by core, and b) that the target is the first instruction in + // a basic block. We should do this differently. + let mut label = LowLevelILLabel::new(); + label.operand = target_idx; + + unsafe { + llil.set_current_address(prev.address()); + llil.replace_expression(prev.expr_idx(), llil.nop()); + llil.set_current_address(insn.address()); + llil.replace_expression(insn.expr_idx(), llil.goto(&mut label)); + } + } + _ => return Ok(false), + } + + Ok(true) +} + +pub fn process(ac: &AnalysisContext) -> Result<(), Error> { + let view = ac.view(); + if GlobalState::should_ignore_view(&view) { + return Ok(()); + } + + let func = ac.function(); + + let Some(link_register) = func.arch().link_reg() else { + return Ok(()); + }; + let link_register_size = link_register.info().size(); + let link_register = LowLevelILRegisterKind::Arch(link_register); + + let Some(llil) = (unsafe { ac.llil_function() }) else { + return Err(Error::MissingIL { + level: ILLevel::Low, + func_start: func.start(), + }); + }; + + let mut function_changed = false; + for block in llil.basic_blocks().iter() { + for insn in block.iter() { + match process_instruction(&view, &llil, &insn, link_register, link_register_size) { + Ok(true) => function_changed = true, + Ok(_) => {} + Err(err) => { + log::error!( + "Error processing instruction at {:#x}: {}", + insn.address(), + err + ); + continue; + } + } + } + } + + if function_changed { + // Regenerate SSA form after modifications + llil.generate_ssa_form(); + } + Ok(()) +} diff --git a/plugins/workflow_objc/src/activities/super_init.rs b/plugins/workflow_objc/src/activities/super_init.rs index fc6676e924..cfd3ecdc79 100644 --- a/plugins/workflow_objc/src/activities/super_init.rs +++ b/plugins/workflow_objc/src/activities/super_init.rs @@ -4,8 +4,8 @@ use binaryninja::{ function::Function, medium_level_il::{ operation::{ - Constant, LiftedCallSsa, LiftedLoadSsa, LiftedSetVarSsa, LiftedSetVarSsaField, - LiftedVarPhi, Var, VarSsa, + Constant, LiftedCallSsa, LiftedLoadSsa, LiftedSetVarSsa, LiftedSetVarSsaField, Var, + VarSsa, }, MediumLevelILFunction, MediumLevelILLiftedInstruction, MediumLevelILLiftedInstructionKind, }, @@ -169,18 +169,9 @@ fn return_type_for_super_init(call: &Call, view: &BinaryView) -> Option src, - MediumLevelILLiftedInstructionKind::VarPhi(LiftedVarPhi { .. }) => { + _ => { // The Swift compiler generates code that conditionally assigns to the receiver field of `objc_super`. - // TODO: Recognize that pattern and handle it. log::debug!( - " found phi node for definition of `objc_super` variable at {:#0x} {:?}", - super_param_def.address, - super_param_def - ); - return None; - } - _ => { - log::error!( "Unexpected variable definition kind at {:#0x} {:#x?}", super_param_def.address, super_param_def diff --git a/plugins/workflow_objc/src/workflow.rs b/plugins/workflow_objc/src/workflow.rs index f411951bf4..f1054411bf 100644 --- a/plugins/workflow_objc/src/workflow.rs +++ b/plugins/workflow_objc/src/workflow.rs @@ -69,9 +69,28 @@ pub fn register_activities() -> Result<(), WorkflowRegistrationError> { run(activities::super_init::process), ); + let remove_memory_management_activity = Activity::new_with_action( + activity::Config::action( + "core.function.objectiveC.removeMemoryManagement", + "Obj-C: Remove reference counting calls", + "Remove calls to objc_retain / objc_release / objc_autorelease to simplify the resulting higher-level ILs", + ) + .eligibility( + activity::Eligibility::auto_with_default(false).matching_all_predicates(&[ + activity::ViewType::in_(["Mach-O", "DSCView"]).into(), + activity::Platform::in_(["mac-aarch64", "ios-aarch64"]).into() + ]) + ), + run(activities::remove_memory_management::process), + ); + workflow .activity_after(&inline_stubs_activity, "core.function.translateTailCalls")? .activity_after(&objc_msg_send_calls_activity, &inline_stubs_activity.name())? + .activity_before( + &remove_memory_management_activity, + "core.function.generateMediumLevelIL", + )? .activity_after(&super_init_activity, "core.function.generateMediumLevelIL")? .register_with_config(WORKFLOW_INFO)?; diff --git a/rust/src/workflow/activity.rs b/rust/src/workflow/activity.rs index a893e79830..a9afc2f107 100644 --- a/rust/src/workflow/activity.rs +++ b/rust/src/workflow/activity.rs @@ -453,6 +453,52 @@ impl From for Predicate { } } +/// A predicate that checks the platform of the [`BinaryView`](crate::binary_view::BinaryView). +#[must_use] +pub enum Platform { + In(Vec), + NotIn(Vec), +} + +impl Platform { + /// Creates a new predicate that checks if the platform of the [`BinaryView`](crate::binary_view::BinaryView) + /// _is_ in the provided list. + pub fn in_(values: I) -> Self + where + I: IntoIterator, + S: AsRef, + { + Platform::In(values.into_iter().map(|s| s.as_ref().to_string()).collect()) + } + + /// Creates a new predicate that checks if the platform of the [`BinaryView`](crate::binary_view::BinaryView) + /// _is not_ in the provided list. + pub fn not_in(values: I) -> Self + where + I: IntoIterator, + S: AsRef, + { + Platform::NotIn(values.into_iter().map(|s| s.as_ref().to_string()).collect()) + } +} + +impl From for Predicate { + fn from(predicate: Platform) -> Self { + match predicate { + Platform::In(value) => Predicate { + predicate_type: PredicateType::Platform, + operator: Operator::In, + value: serde_json::json!(value), + }, + Platform::NotIn(value) => Predicate { + predicate_type: PredicateType::Platform, + operator: Operator::NotIn, + value: serde_json::json!(value), + }, + } + } +} + /// A predicate that evaluates the value of a specific setting. #[must_use] pub struct Setting { @@ -533,6 +579,7 @@ impl From for Predicate { enum PredicateType { Setting { identifier: String }, ViewType, + Platform, } #[derive(Deserialize, Serialize, Debug, Copy, Clone)]