diff --git a/Cargo.lock b/Cargo.lock index 3b05cdec61..93b6283d1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -274,6 +274,17 @@ dependencies = [ "syn", ] +[[package]] +name = "bstr" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.19.0" @@ -3488,6 +3499,7 @@ version = "0.1.0" dependencies = [ "binaryninja", "binaryninjacore-sys", + "bstr", "dashmap", "log", "once_cell", diff --git a/plugins/workflow_objc/Cargo.toml b/plugins/workflow_objc/Cargo.toml index 09a2d3a725..f6392d9f3b 100644 --- a/plugins/workflow_objc/Cargo.toml +++ b/plugins/workflow_objc/Cargo.toml @@ -14,3 +14,4 @@ log = "0.4" dashmap = { version = "6.1", features = ["rayon"]} once_cell = "1.20" thiserror = "2.0" +bstr = "1.12" diff --git a/plugins/workflow_objc/src/activities/mod.rs b/plugins/workflow_objc/src/activities/mod.rs index 1134690014..f1132d034b 100644 --- a/plugins/workflow_objc/src/activities/mod.rs +++ b/plugins/workflow_objc/src/activities/mod.rs @@ -1,2 +1,3 @@ pub mod inline_stubs; pub mod objc_msg_send_calls; +pub mod super_init; diff --git a/plugins/workflow_objc/src/activities/objc_msg_send_calls.rs b/plugins/workflow_objc/src/activities/objc_msg_send_calls.rs index 239b606c0e..213d831447 100644 --- a/plugins/workflow_objc/src/activities/objc_msg_send_calls.rs +++ b/plugins/workflow_objc/src/activities/objc_msg_send_calls.rs @@ -12,6 +12,7 @@ use binaryninja::{ }; use crate::{ + error::ILLevel, metadata::{GlobalState, Selector}, Error, }; @@ -31,11 +32,17 @@ pub fn process(ac: &AnalysisContext) -> Result<(), Error> { let func_start = ac.function().start(); let Some(llil) = (unsafe { ac.llil_function() }) else { - return Err(Error::MissingLowLevelIL { func_start }); + return Err(Error::MissingIL { + level: ILLevel::Low, + func_start, + }); }; let Some(ssa) = llil.ssa_form() else { - return Err(Error::MissingSsaForm { func_start }); + return Err(Error::MissingSsaForm { + level: ILLevel::Low, + func_start, + }); }; let func = ac.function(); diff --git a/plugins/workflow_objc/src/activities/objc_msg_send_calls/adjust_call_type.rs b/plugins/workflow_objc/src/activities/objc_msg_send_calls/adjust_call_type.rs index 21a3d35fd6..d9cb134321 100644 --- a/plugins/workflow_objc/src/activities/objc_msg_send_calls/adjust_call_type.rs +++ b/plugins/workflow_objc/src/activities/objc_msg_send_calls/adjust_call_type.rs @@ -11,9 +11,7 @@ use binaryninja::{ }; use super::MessageSendType; -use crate::{metadata::Selector, Error}; - -const HEURISTIC_CONFIDENCE: u8 = 192; +use crate::{metadata::Selector, workflow::Confidence, Error}; fn named_type(bv: &BinaryView, name: &str) -> Option> { bv.type_by_name(name) @@ -68,7 +66,7 @@ pub fn process_call( let func_type = Type::function(&return_type, params, false); func.set_auto_call_type_adjustment( insn.address(), - Conf::new(func_type, HEURISTIC_CONFIDENCE).as_ref(), + Conf::new(func_type, Confidence::ObjCMsgSend as u8).as_ref(), Some(arch), ); diff --git a/plugins/workflow_objc/src/activities/super_init.rs b/plugins/workflow_objc/src/activities/super_init.rs new file mode 100644 index 0000000000..ab4586173d --- /dev/null +++ b/plugins/workflow_objc/src/activities/super_init.rs @@ -0,0 +1,331 @@ +use binaryninja::{ + binary_view::{BinaryView, BinaryViewBase, BinaryViewExt as _}, + confidence::Conf, + function::Function, + medium_level_il::{ + operation::{ + Constant, LiftedCallSsa, LiftedLoadSsa, LiftedSetVarSsa, LiftedSetVarSsaField, + LiftedVarPhi, Var, VarSsa, + }, + MediumLevelILFunction, MediumLevelILLiftedInstruction, MediumLevelILLiftedInstructionKind, + }, + rc::Ref, + types::Type, + variable::{RegisterValueType, SSAVariable}, + workflow::AnalysisContext, +}; +use bstr::{BStr, ByteSlice}; + +use crate::{ + error::ILLevel, + metadata::{GlobalState, Selector}, + workflow::Confidence, + Error, +}; + +// The `j_` prefix is for stub functions in the shared cache. +// It is added by the shared cache workflow. +const OBJC_MSG_SEND_SUPER_FUNCTIONS: &[&[u8]] = &[ + b"_objc_msgSendSuper2", + b"j__objc_msgSendSuper2", + b"_objc_msgSendSuper", + b"j__objc_msgSendSuper", +]; + +fn ssa_variable_value_or_load_of_constant_pointer( + function: &MediumLevelILFunction, + var: &SSAVariable, +) -> Option { + let value = function.ssa_variable_value(var); + match value.state { + RegisterValueType::ConstantPointerValue => return Some(value.value as u64), + RegisterValueType::UndeterminedValue => {} + _ => return None, + } + + let def = function.ssa_variable_definition(var)?; + let MediumLevelILLiftedInstructionKind::SetVarSsa(set_var) = def.lift().kind else { + return None; + }; + + let MediumLevelILLiftedInstructionKind::LoadSsa(LiftedLoadSsa { src, .. }) = set_var.src.kind + else { + return None; + }; + + match src.kind { + MediumLevelILLiftedInstructionKind::ConstPtr(Constant { constant }) => Some(constant), + _ => None, + } +} + +/// If `instr` is a constant pointer or is a variable whose value is loaded from a constant pointer, +/// return that pointer address. +fn match_constant_pointer_or_load_of_constant_pointer( + instr: &MediumLevelILLiftedInstruction, +) -> Option { + match instr.kind { + MediumLevelILLiftedInstructionKind::ConstPtr(Constant { constant }) => Some(constant), + MediumLevelILLiftedInstructionKind::VarSsa(var) => { + ssa_variable_value_or_load_of_constant_pointer(&instr.function, &var.src) + } + _ => None, + } +} + +#[allow(clippy::struct_field_names)] +struct Call<'a> { + pub instr: &'a MediumLevelILLiftedInstruction, + pub call: &'a LiftedCallSsa, + pub target: Ref, +} + +/// Returns a `Call` if `instr` is a call or tail call to a function whose name appears in `function_names` +fn match_call_to_function_named<'a>( + instr: &'a MediumLevelILLiftedInstruction, + view: &'a BinaryView, + function_names: &'a [&[u8]], +) -> Option> { + let (MediumLevelILLiftedInstructionKind::TailcallSsa(ref call) + | MediumLevelILLiftedInstructionKind::CallSsa(ref call)) = instr.kind + else { + return None; + }; + + let MediumLevelILLiftedInstructionKind::ConstPtr(Constant { + constant: call_target, + }) = call.dest.kind + else { + return None; + }; + + let target_function = view.function_at(&instr.function.function().platform(), call_target)?; + let function_name = target_function.symbol().full_name(); + if !function_names.contains(&function_name.to_bytes()) { + return None; + } + + Some(Call { + instr, + call, + target: target_function, + }) +} + +fn class_name_from_symbol_name(symbol_name: &BStr) -> Option<&BStr> { + // The symbol name for the `objc_class_t` can have different names depending + // on factors such as being local or external, and whether the reference + // is from the shared cache or a standalone Mach-O file. + Some(if symbol_name.starts_with(b"cls_") { + &symbol_name[4..] + } else if symbol_name.starts_with(b"clsRef_") { + &symbol_name[7..] + } else if symbol_name.starts_with(b"_OBJC_CLASS_$_") { + &symbol_name[14..] + } else { + return None; + }) +} + +/// Detect the return type for a call to `objc_msgSendSuper2` where the selector is in the `init` family. +/// Returns `None` if selector is not in the `init` family or the return type cannot be determined. +fn return_type_for_super_init(call: &Call, view: &BinaryView) -> Option> { + // Expecting to see at least `objc_super` and a selector. + if call.call.params.len() < 2 { + return None; + } + + let selector_addr = match_constant_pointer_or_load_of_constant_pointer(&call.call.params[1])?; + let selector = Selector::from_address(view, selector_addr).ok()?; + + // TODO: This will match `initialize` and `initiate` which are not init methods. + if !selector.name.starts_with("init") { + return None; + } + + let super_param = &call.call.params[0]; + let MediumLevelILLiftedInstructionKind::VarSsa(VarSsa { + src: super_param_var, + }) = super_param.kind + else { + log::debug!( + "Unhandled super paramater format at {:#0x} {:?}", + super_param.address, + super_param + ); + return None; + }; + + // Parameter is an SSA variable. Find its definitions to find when it was assigned. + // From there we can determine the values it was assigned. + let Some(super_param_def) = call + .instr + .function + .ssa_variable_definition(&super_param_var) + else { + log::debug!(" could not find definition of variable?"); + return None; + }; + + let src = match super_param_def.lift().kind { + MediumLevelILLiftedInstructionKind::SetVarSsa(LiftedSetVarSsa { src, .. }) => src, + MediumLevelILLiftedInstructionKind::VarPhi(LiftedVarPhi { .. }) => { + // The Swift compiler generates code that conditionally assigns to the receiver field of `objc_super`. + // TODO: Recognize that pattern and handle it. + log::debug!( + " found phi node for definition of `objc_super` variable at {:#0x} {:?}", + super_param_def.address, + super_param_def + ); + return None; + } + _ => { + log::error!( + "Unexpected variable definition kind at {:#0x} {:#x?}", + super_param_def.address, + super_param_def + ); + return None; + } + }; + + let src_var = match src.kind { + MediumLevelILLiftedInstructionKind::AddressOf(Var { src: src_var }) => src_var, + MediumLevelILLiftedInstructionKind::VarSsa(_) + | MediumLevelILLiftedInstructionKind::Sub(_) => { + // The Swift compiler generates code that initializes the `objc_super` variable in more varied ways. + log::debug!( + " found non-address-of variable definition of `objc_super` variable at {:#0x} {:?}", + super_param_def.address, + super_param_def + ); + return None; + } + _ => { + log::error!( + "Unexpected source of variable definition at {:#0x} {:x?}", + super_param_def.address, + super_param_def + ); + return None; + } + }; + + // `src_var` is a `struct objc_super`. Find constant values assigned to the `super_class` field (second field). + let super_class_constants: Vec<_> = + call.instr + .function + .variable_definitions(&src_var) + .into_iter() + .filter_map(|def| { + let def = def.lift(); + let src = match def.kind { + MediumLevelILLiftedInstructionKind::SetVarAliasedField( + LiftedSetVarSsaField { src, offset, .. }, + ) if offset == view.address_size() as u64 => src, + _ => { + return None; + } + }; + + match src.kind { + MediumLevelILLiftedInstructionKind::ConstPtr(Constant { constant }) => { + Some(constant) + } + _ => None, + } + }) + .collect(); + + // In the common case there are either zero or one assignments to the `super_class` field. + // If there are zero, that likely means the assigned value was not a constant. Handling + // that is above my pay grade. + let &[super_class_ptr] = &super_class_constants[..] else { + log::debug!( + "Unexpected number of assignments to super class found for {:#0x}: {:#0x?}", + src.address, + super_class_constants + ); + return None; + }; + + let Some(super_class_symbol) = view.symbol_by_address(super_class_ptr) else { + log::debug!("No symbol found for super class at {super_class_ptr:#0x}"); + return None; + }; + + let super_class_symbol_name = super_class_symbol.full_name(); + let Some(class_name) = + class_name_from_symbol_name(super_class_symbol_name.to_bytes().as_bstr()) + else { + log::debug!("Unable to extract class name from symbol name: {super_class_symbol_name:?}"); + return None; + }; + + let Some(class_type) = view.type_by_name(class_name.to_str_lossy()) else { + log::debug!("No type found for class named {class_name:?}"); + return None; + }; + + Some(Type::pointer(&call.target.arch(), &class_type)) +} + +/// Adjust the return type of the call represented by `call`. +fn adjust_return_type_of_call(call: &Call<'_>, return_type: &Type) { + let function = call.instr.function.function(); + + // We're changing only the return type, so preserve other aspects of any existing call type adjustment. + let target_function_type = if let Some(existing_call_type_adjustment) = + function.call_type_adjustment(call.instr.address, None) + { + existing_call_type_adjustment.contents + } else { + call.target.function_type() + }; + + // There's nothing to do if the return type is already correct + if let Some(conf) = target_function_type.return_value() { + if &*conf.contents == return_type { + return; + } + } + + let adjusted_call_type = target_function_type + .to_builder() + .set_child_type(return_type) + .finalize(); + + function.set_auto_call_type_adjustment( + call.instr.address, + Conf::new(&*adjusted_call_type, Confidence::SuperInit as u8), + None, + ); +} + +fn process_instruction(instr: &MediumLevelILLiftedInstruction, view: &BinaryView) -> Option<()> { + let call = match_call_to_function_named(instr, view, OBJC_MSG_SEND_SUPER_FUNCTIONS)?; + + adjust_return_type_of_call(&call, return_type_for_super_init(&call, view)?.as_ref()); + Some(()) +} + +pub fn process(ac: &AnalysisContext) -> Result<(), Error> { + let bv = ac.view(); + if GlobalState::should_ignore_view(&bv) { + return Ok(()); + } + + let mlil = ac.mlil_function().ok_or(Error::MissingIL { + level: ILLevel::Medium, + func_start: ac.function().start(), + })?; + let mlil_ssa = mlil.ssa_form(); + + for block in &mlil_ssa.basic_blocks() { + for instr in block.iter() { + process_instruction(&instr.lift(), &bv); + } + } + + Ok(()) +} diff --git a/plugins/workflow_objc/src/error.rs b/plugins/workflow_objc/src/error.rs index 7532822662..8353d015de 100644 --- a/plugins/workflow_objc/src/error.rs +++ b/plugins/workflow_objc/src/error.rs @@ -11,13 +11,20 @@ impl From<()> for WorkflowRegistrationError { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ILLevel { + Low, + Medium, + High, +} + #[derive(Error, Debug)] pub enum Error { - #[error("Unable to retrieve low-level IL for function at {func_start:#x}")] - MissingLowLevelIL { func_start: u64 }, + #[error("Unable to retrieve {level:?} IL for function at {func_start:#x}")] + MissingIL { level: ILLevel, func_start: u64 }, - #[error("Unable to retrieve low-level SSA IL for function at {func_start:#x}")] - MissingSsaForm { func_start: u64 }, + #[error("Unable to retrieve {level:?} SSA IL for function at {func_start:#x}")] + MissingSsaForm { level: ILLevel, func_start: u64 }, #[error("Unexpected LLIL operation at address {address:#x} (expected {expected})")] UnexpectedLlilOperation { address: u64, expected: String }, diff --git a/plugins/workflow_objc/src/workflow.rs b/plugins/workflow_objc/src/workflow.rs index 090a8e5e4e..f411951bf4 100644 --- a/plugins/workflow_objc/src/workflow.rs +++ b/plugins/workflow_objc/src/workflow.rs @@ -2,6 +2,14 @@ use binaryninja::workflow::{activity, Activity, AnalysisContext, Workflow}; use crate::{activities, error::WorkflowRegistrationError}; +/// Base confidence levels for types applied by each of the activities in this workflow. +/// These are ordered such that later activities can override types applied by earlier activities. +#[repr(u8)] +pub enum Confidence { + ObjCMsgSend = 96, + SuperInit = 100, +} + const WORKFLOW_INFO: &str = r#"{ "title": "Objective-C", "description": "Enhanced analysis for Objective-C code.", @@ -48,9 +56,23 @@ pub fn register_activities() -> Result<(), WorkflowRegistrationError> { run(activities::inline_stubs::process), ); + let super_init_activity = Activity::new_with_action( + activity::Config::action( + "core.function.objectiveC.types.superInit", + "Obj-C: Adjust return types of [super init…] calls", + "Adjust the return type of calls to objc_msgSendSuper2 where the selector is in the init family.", + ) + .eligibility( + activity::Eligibility::auto().predicate( + activity::ViewType::in_(["Mach-O", "DSCView"]), + )), + run(activities::super_init::process), + ); + workflow .activity_after(&inline_stubs_activity, "core.function.translateTailCalls")? .activity_after(&objc_msg_send_calls_activity, &inline_stubs_activity.name())? + .activity_after(&super_init_activity, "core.function.generateMediumLevelIL")? .register_with_config(WORKFLOW_INFO)?; Ok(()) diff --git a/rust/src/function.rs b/rust/src/function.rs index 558f8d937f..c4061328a7 100644 --- a/rust/src/function.rs +++ b/rust/src/function.rs @@ -1697,12 +1697,7 @@ impl Function { let mut owned_raw_var_ty = Conf::<&Type>::into_raw(var_type.into()); let name = name.to_cstr(); unsafe { - BNCreateUserStackVariable( - self.handle, - offset, - &mut owned_raw_var_ty, - name.as_ptr(), - ) + BNCreateUserStackVariable(self.handle, offset, &mut owned_raw_var_ty, name.as_ptr()) } } @@ -1750,12 +1745,7 @@ impl Function { let mut owned_raw_var_ty = Conf::<&Type>::into_raw(var_type.into()); let name = name.to_cstr(); unsafe { - BNCreateAutoStackVariable( - self.handle, - offset, - &mut owned_raw_var_ty, - name.as_ptr(), - ) + BNCreateAutoStackVariable(self.handle, offset, &mut owned_raw_var_ty, name.as_ptr()) } } diff --git a/rust/src/types.rs b/rust/src/types.rs index 33a0858323..036e53f50c 100644 --- a/rust/src/types.rs +++ b/rust/src/types.rs @@ -101,6 +101,27 @@ impl TypeBuilder { self } + pub fn set_child_type<'a, T: Into>>(&self, ty: T) -> &Self { + let mut type_with_confidence = Conf::<&Type>::into_raw(ty.into()); + unsafe { BNTypeBuilderSetChildType(self.handle, &mut type_with_confidence) }; + self + } + + /// This is an alias for [`Self::set_child_type`]. + pub fn set_target<'a, T: Into>>(&self, ty: T) -> &Self { + self.set_child_type(ty) + } + + /// This is an alias for [`Self::set_child_type`]. + pub fn set_element_type<'a, T: Into>>(&self, ty: T) -> &Self { + self.set_child_type(ty) + } + + /// This is an alias for [`Self::set_child_type`]. + pub fn set_return_value<'a, T: Into>>(&self, ty: T) -> &Self { + self.set_child_type(ty) + } + // Readable properties pub fn type_class(&self) -> TypeClass {