diff --git a/demangle.cpp b/demangle.cpp index 8f13c2132..bb7eb1326 100644 --- a/demangle.cpp +++ b/demangle.cpp @@ -47,8 +47,19 @@ namespace BinaryNinja { bool DemangleMS(Architecture* arch, const std::string& mangledName, Ref& outType, QualifiedName& outVarName, BinaryView* view) { - const bool simplify = Settings::Instance()->Get("analysis.types.templateSimplifier", view); - return DemangleMS(arch, mangledName, outType, outVarName, simplify); + BNType* localType = nullptr; + char** localVarName = nullptr; + size_t localSize = 0; + if (!BNDemangleMSWithOptions(arch->GetObject(), mangledName.c_str(), &localType, &localVarName, &localSize, + view ? view->GetObject() : nullptr)) + return false; + outType = localType ? new Type(localType) : nullptr; + for (size_t i = 0; i < localSize; i++) + { + outVarName.push_back(localVarName[i]); + } + BNFreeDemangledName(&localVarName, localSize); + return true; } bool DemangleMS(Architecture* arch, const std::string& mangledName, Ref& outType, QualifiedName& outVarName, diff --git a/demangler/gnu3/demangle_gnu3.cpp b/demangler/gnu3/demangle_gnu3.cpp index f2130a4e1..f3c1a8e60 100644 --- a/demangler/gnu3/demangle_gnu3.cpp +++ b/demangler/gnu3/demangle_gnu3.cpp @@ -58,7 +58,7 @@ static inline void rtrim(string &s) } -static size_t TotalStringSize(const _STD_VECTOR<_STD_STRING>& v) +static size_t TotalStringSize(const StringList& v) { size_t n = 0; for (const auto& s : v) @@ -67,6 +67,25 @@ static size_t TotalStringSize(const _STD_VECTOR<_STD_STRING>& v) } +static string JoinNameSegments(const StringList& name) +{ + if (name.empty()) + return {}; + if (name.size() == 1) + return name[0]; + + string out; + out.reserve(TotalStringSize(name) + (name.size() - 1) * 2); + out += name[0]; + for (size_t i = 1; i < name.size(); i++) + { + out += "::"; + out += name[i]; + } + return out; +} + + static string GetTemplateString(const vector& args) { // Pre-calculate total length to avoid reallocations @@ -333,7 +352,6 @@ void DemangleGNU3::Reset(Architecture* arch, const string& mangledName) { m_reader.Reset(mangledName); m_arch = arch; - m_varName.clear(); m_substitute.clear(); m_templateSubstitute.clear(); m_functionSubstitute.clear(); @@ -351,7 +369,7 @@ void DemangleGNU3::Reset(Architecture* arch, const string& mangledName) } -DemangledTypeNode DemangleGNU3::CreateUnknownType(const QualifiedName& s) +DemangledTypeNode DemangleGNU3::CreateUnknownType(const StringList& s) { return DemangledTypeNode::NamedType(UnknownNamedTypeClass, s); } @@ -359,7 +377,7 @@ DemangledTypeNode DemangleGNU3::CreateUnknownType(const QualifiedName& s) DemangledTypeNode DemangleGNU3::CreateUnknownType(const string& s) { - return DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{s}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{s}); } @@ -590,7 +608,6 @@ DemangledTypeNode DemangleGNU3::DemangleType() DemangledTypeNode type; bool cnst = false, vltl = false, rstrct = false; bool substitute = false; - QualifiedName name; DemangleCVQualifiers(cnst, vltl, rstrct); @@ -602,7 +619,7 @@ DemangledTypeNode DemangleGNU3::DemangleType() if (vltl) type.SetVolatile(true); if (rstrct) - type.SetPointerSuffix({RestrictSuffix}); + type.SetPointerSuffixBits(1u << RestrictSuffix); PushType(type); return type; } @@ -663,20 +680,20 @@ DemangledTypeNode DemangleGNU3::DemangleType() if (m_reader.Peek() == 's') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(StructNamedTypeClass, _STD_VECTOR<_STD_STRING>{DemangleSourceName()}); + type = DemangledTypeNode::NamedType(StructNamedTypeClass, StringList{DemangleSourceName()}); break; } else if (m_reader.Peek() == 'u') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(UnionNamedTypeClass, _STD_VECTOR<_STD_STRING>{DemangleSourceName()}); + type = DemangledTypeNode::NamedType(UnionNamedTypeClass, StringList{DemangleSourceName()}); break; } else if (m_reader.Peek() == 'e') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(EnumNamedTypeClass, QualifiedName({DemangleSourceName()}), - m_arch->GetDefaultIntegerSize(), m_arch->GetDefaultIntegerSize()); + type = DemangledTypeNode::NamedType(EnumNamedTypeClass, StringList{DemangleSourceName()}, + m_arch->GetDefaultIntegerSize()); break; } @@ -786,7 +803,6 @@ DemangledTypeNode DemangleGNU3::DemangleType() string fullName = member.GetStringBeforeName() + "(" + memberName.GetString() + "::*)" + member.GetStringAfterName(); //member.SetScope(NonStaticScope); //DemangledTypeNode ptr = DemangledTypeNode::PointerType(m_arch, member, cnst, vltl); - //QualifiedName qn({memberName.GetString(), "*"}); type = CreateUnknownType(fullName); substitute = true; break; @@ -805,7 +821,7 @@ DemangledTypeNode DemangleGNU3::DemangleType() case 'c': type = CreateUnknownType("decltype(auto)"); break; //decltype(auto) case 'n': { - static const QualifiedName stdNullptrTName(vector{"std", "nullptr_t"}); + static const StringList stdNullptrTName(vector{"std", "nullptr_t"}); type = CreateUnknownType(stdNullptrTName); break; } @@ -905,19 +921,19 @@ DemangledTypeNode DemangleGNU3::DemangleType() DemangledTypeNode DemangleGNU3::DemangleSubstitution() { - static const QualifiedName stdAllocatorName(vector{"std", "allocator"}); - static const QualifiedName stdBasicStringName(vector{"std", "basic_string"}); - static const QualifiedName stdIostreamName(vector{"std", "iostream"}); - static const QualifiedName stdIstreamName(vector{"std", "istream"}); - static const QualifiedName stdOstreamName(vector{"std", "ostream"}); - static const QualifiedName stdStringName(vector{"std", "string"}); - static const QualifiedName stdName(vector{"std"}); + static const StringList stdAllocatorName(vector{"std", "allocator"}); + static const StringList stdBasicStringName(vector{"std", "basic_string"}); + static const StringList stdIostreamName(vector{"std", "iostream"}); + static const StringList stdIstreamName(vector{"std", "istream"}); + static const StringList stdOstreamName(vector{"std", "ostream"}); + static const StringList stdStringName(vector{"std", "string"}); + static const StringList stdName(vector{"std"}); indent() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); char elm; elm = m_reader.Read(); - QualifiedName name; + StringList name; size_t number = 0; switch (elm) { @@ -1023,7 +1039,7 @@ string DemangleGNU3::DemanglePrimaryExpression() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); char elm1 = '\0'; string out; - QualifiedName tmpList; + StringList tmpList; bool oldTopLevel; //expr-primary if (m_reader.PeekString(2) == "_Z") @@ -1397,7 +1413,7 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() } -QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() +StringList DemangleGNU3::DemangleBaseUnresolvedName() { // ::= # unresolved name // ::= on # unresolved operator-function-id @@ -1407,7 +1423,7 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() indent() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - QualifiedName out; + StringList out; if (m_reader.Length() > 1) { const string str = m_reader.PeekString(2); @@ -1819,7 +1835,7 @@ string DemangleGNU3::DemangleExpression() { out += DemangleSourceName(); // Push bare name (before template args) to substitution table. - PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); + PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{out})); if (m_reader.Peek() == 'I') { vector args; @@ -1828,13 +1844,13 @@ string DemangleGNU3::DemangleExpression() DemangleTemplateArgs(args); out += GetTemplateString(args); // Also push the template instantiation (name+args). - PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); + PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{out})); } out += "::"; }while (m_reader.Peek() != 'E'); m_reader.Consume(); - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); return out; } if (isdigit(m_reader.Peek())) @@ -1874,7 +1890,7 @@ string DemangleGNU3::DemangleExpression() // so check rather than unconditionally consuming. if (m_reader.Peek() == 'E') m_reader.Consume(); - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); return out; } else @@ -1916,7 +1932,7 @@ string DemangleGNU3::DemangleExpression() } } // peek is not a digit: fall back for operator-names ("on") / destructor-names ("dn"). - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); } return out; default: @@ -2027,7 +2043,7 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) indent(); MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - DemangledTypeNode type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, QualifiedName()); + DemangledTypeNode type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{}); bool cnst = false, vltl = false, rstrct = false; bool ref = false; bool rvalueRef = false; @@ -2099,15 +2115,16 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) if (!isTemplate) { type.SetNameType(newType.GetNameType()); - auto aNames = type.GetTypeName(); - auto bNames = newType.GetTypeName(); - _STD_VECTOR<_STD_STRING> newName; + const auto& aNames = type.GetTypeName(); + const auto& bNames = newType.GetTypeName(); + StringList newName; newName.reserve(aNames.size() + bNames.size()); newName.insert(newName.end(), aNames.begin(), aNames.end()); newName.insert(newName.end(), bNames.begin(), bNames.end()); if (TotalStringSize(newName) > MAX_DEMANGLE_LENGTH) throw DemangleException("Detected adversarial mangled string"); - type.SetNTR(type.GetNTRClass(), newName); + type.SetTypeName(std::move(newName)); + type.SetNTRType(newType.GetNTRClass()); type.SetHasTemplateArguments(false); } // Consume any ABI tags (B ) following this name component. @@ -2163,7 +2180,7 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() indent(); MyLogDebug("%s '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); DemangledTypeNode type; - QualifiedName varName; + StringList varName; // The local function has its own template scope. Save the outer template // substitution table and set m_topLevel = true so that when the local // function's template args are parsed (e.g. handleMessageDelayed), @@ -2201,16 +2218,17 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, varName); auto aNames = type.GetTypeName(); auto bNames = tmpType.GetTypeName(); - _STD_VECTOR<_STD_STRING> newName; + StringList newName; newName.reserve(aNames.size() + bNames.size()); newName.insert(newName.end(), aNames.begin(), aNames.end()); newName.insert(newName.end(), bNames.begin(), bNames.end()); if (TotalStringSize(newName) > MAX_DEMANGLE_LENGTH) throw DemangleException("Detected adversarial mangled string"); type.SetTypeName(std::move(newName)); + type.SetNTRType(tmpType.GetNTRClass()); type.SetConst(tmpType.IsConst()); type.SetVolatile(tmpType.IsVolatile()); - type.SetPointerSuffix(tmpType.GetPointerSuffix()); + type.SetPointerSuffixBits(tmpType.GetPointerSuffixBits()); } else { @@ -2331,7 +2349,7 @@ DemangledTypeNode DemangleGNU3::DemangleName() } -DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) +DemangledTypeNode DemangleGNU3::DemangleSymbol(StringList& varName) { indent(); MyLogDebug("%s: %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); @@ -2341,7 +2359,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) ParamList params; bool cnst = false, vltl = false, rstrct = false; bool oldTopLevel; - QualifiedName name; + StringList name; /* ::= @@ -2387,7 +2405,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{name.GetString() + " [transaction clone]" + t.GetStringAfterName()}); + StringList{JoinNameSegments(name) + " [transaction clone]" + t.GetStringAfterName()}); } case 'V': { @@ -2576,7 +2594,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) annotation += ']'; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{routineName + annotation}); + StringList{routineName + annotation}); } default: throw DemangleException(); @@ -2628,7 +2646,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"covariant_return_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"covariant_return_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName()}); } case 'C': { @@ -2638,7 +2656,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) throw DemangleException(); return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"construction_vtable_for_" + DemangleTypeString() + "-in-" + t.GetString()}); + StringList{"construction_vtable_for_" + DemangleTypeString() + "-in-" + t.GetString()}); } case 'D': LogWarn("Unsupported: 'typeinfo common proxy'\n"); @@ -2656,7 +2674,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"non-virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"non-virtual_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName()}); } case 'H': // TLS init function { @@ -2665,11 +2683,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"tls_init_function_for_" + t.GetTypeAndName(name)}); + StringList{"tls_init_function_for_" + t.GetTypeAndName(name)}); } case 'I': return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"typeinfo_for_" + DemangleTypeString()}); + StringList{"typeinfo_for_" + DemangleTypeString()}); case 'J': LogWarn("Unsupported: 'java class'\n"); throw DemangleException(); @@ -2684,7 +2702,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) { DemangledTypeNode t = DemangleType(); return DemangledTypeNode::NamedType(StructNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"VTT_for_" + t.GetString()}); + StringList{"VTT_for_" + t.GetString()}); } case 'v': // virtual thunk { @@ -2699,11 +2717,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"virtual_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName()}); } case 'V': //Vtable return DemangledTypeNode::NamedType(StructNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"vtable_for_" + DemangleTypeString()}); + StringList{"vtable_for_" + DemangleTypeString()}); case 'W': // TLS wrapper function { oldTopLevel = m_topLevel; @@ -2711,7 +2729,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"tls_wrapper_function_for_" + t.GetTypeAndName(name)}); + StringList{"tls_wrapper_function_for_" + t.GetTypeAndName(name)}); } default: throw DemangleException(); @@ -2735,7 +2753,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) varName = type.GetTypeName(); cnst = type.IsConst(); vltl = type.IsVolatile(); - auto suffix = type.GetPointerSuffix(); + auto suffix = type.GetPointerSuffixBits(); if (m_reader.Peek() == 'J') { m_reader.Consume(); @@ -2828,14 +2846,14 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) if (isReturnTypeUnknown) type.SetReturnTypeConfidence(BN_MINIMUM_CONFIDENCE); - type.SetPointerSuffix(suffix); + type.SetPointerSuffixBits(suffix); type.SetConst(cnst); type.SetVolatile(vltl); if (rstrct) - type.SetPointerSuffix({RestrictSuffix}); + type.SetPointerSuffixBits(1u << RestrictSuffix); // PrintTables(); - MyLogDebug("Done: %s%s%s\n", type.GetStringBeforeName().c_str(), varName.GetString().c_str(), + MyLogDebug("Done: %s%s%s\n", type.GetStringBeforeName().c_str(), JoinNameSegments(varName).c_str(), type.GetStringAfterName().c_str()); dedent(); @@ -2892,117 +2910,130 @@ bool DemangleGNU3Static::DemangleGlobalHeader(string& name, string& header) } -bool DemangleGNU3Static::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName) +namespace { - // Handle _block_invoke[.N] and _block_invoke_N suffixes (Clang/Apple block invocations). - // E.g. ____ZN4dyld5_mainEPK12macho_headermiPPKcS5_S5_Pm_block_invoke.110 - // -> "invocation_function_for_block_in_dyld::_main(...)" - static const string blockInvokeSuffix = "_block_invoke"; - size_t blockPos = name.rfind(blockInvokeSuffix); - if (blockPos != string::npos) - { - // Verify the suffix is _block_invoke optionally followed by [._] only - string tail = name.substr(blockPos + blockInvokeSuffix.size()); - bool validSuffix = tail.empty(); - if (!validSuffix && (tail[0] == '.' || tail[0] == '_')) - { - size_t i = 1; - while (i < tail.size() && isdigit((unsigned char)tail[i])) - i++; - validSuffix = (i == tail.size() && i > 1); - } - if (validSuffix) - { - // Extract the base symbol: everything before _block_invoke - string base = name.substr(0, blockPos); - // Normalize leading underscores: find 'Z' after underscores, keep one '_' before it - size_t zPos = base.find_first_not_of('_'); - if (zPos != string::npos && base[zPos] == 'Z') + static bool DemangleStringGNU3Segments( + Architecture* arch, const string& name, Ref& outType, StringList& outVarName) + { + // Handle _block_invoke[.N] and _block_invoke_N suffixes (Clang/Apple block invocations). + // E.g. ____ZN4dyld5_mainEPK12macho_headermiPPKcS5_S5_Pm_block_invoke.110 + // -> "invocation_function_for_block_in_dyld::_main(...)" + static const string blockInvokeSuffix = "_block_invoke"; + size_t blockPos = name.rfind(blockInvokeSuffix); + if (blockPos != string::npos) + { + // Verify the suffix is _block_invoke optionally followed by [._] only + string tail = name.substr(blockPos + blockInvokeSuffix.size()); + bool validSuffix = tail.empty(); + if (!validSuffix && (tail[0] == '.' || tail[0] == '_')) { - string normalized = "_" + base.substr(zPos); - Ref baseType; - QualifiedName baseName; - if (DemangleStringGNU3(arch, normalized, baseType, baseName)) + size_t i = 1; + while (i < tail.size() && isdigit((unsigned char)tail[i])) + i++; + validSuffix = (i == tail.size() && i > 1); + } + if (validSuffix) + { + // Extract the base symbol: everything before _block_invoke + string base = name.substr(0, blockPos); + // Normalize leading underscores: find 'Z' after underscores, keep one '_' before it + size_t zPos = base.find_first_not_of('_'); + if (zPos != string::npos && base[zPos] == 'Z') { - outVarName.clear(); - outVarName.push_back("invocation_function_for_block_in_" + baseName.GetString()); - outType = baseType; - return true; + string normalized = "_" + base.substr(zPos); + Ref baseType; + StringList baseName; + if (DemangleStringGNU3Segments(arch, normalized, baseType, baseName)) + { + outVarName.clear(); + outVarName.push_back("invocation_function_for_block_in_" + JoinNameSegments(baseName)); + outType = baseType; + return true; + } } } } - } - // Handle macOS thread-local variable initializer suffix: $tlv$init - // E.g. __ZL9recursive$tlv$init -> demangle "__ZL9recursive" then annotate. - static const string tlvInitSuffix = "$tlv$init"; - if (name.size() > tlvInitSuffix.size() && - name.compare(name.size() - tlvInitSuffix.size(), tlvInitSuffix.size(), tlvInitSuffix) == 0) - { - string base = name.substr(0, name.size() - tlvInitSuffix.size()); - Ref baseType; - QualifiedName baseName; - if (DemangleStringGNU3(arch, base, baseType, baseName)) + // Handle macOS thread-local variable initializer suffix: $tlv$init + // E.g. __ZL9recursive$tlv$init -> demangle "__ZL9recursive" then annotate. + static const string tlvInitSuffix = "$tlv$init"; + if (name.size() > tlvInitSuffix.size() && + name.compare(name.size() - tlvInitSuffix.size(), tlvInitSuffix.size(), tlvInitSuffix) == 0) { - outVarName = baseName; - if (outVarName.size() > 0) - outVarName[outVarName.size() - 1] += "$tlv$init"; - else - outVarName.push_back("$tlv$init"); - outType = baseType; - return true; + string base = name.substr(0, name.size() - tlvInitSuffix.size()); + Ref baseType; + StringList baseName; + if (DemangleStringGNU3Segments(arch, base, baseType, baseName)) + { + outVarName = std::move(baseName); + if (outVarName.size() > 0) + outVarName[outVarName.size() - 1] += "$tlv$init"; + else + outVarName.push_back("$tlv$init"); + outType = baseType; + return true; + } } - } - string encoding = name; - string header; - bool foundHeader = DemangleGlobalHeader(encoding, header); - - if (!encoding.compare(0, 2, "_Z")) - encoding = encoding.substr(2); - else if (!encoding.compare(0, 3, "__Z")) - encoding = encoding.substr(3); - else if (foundHeader && !header.empty()) - { - outVarName.clear(); - outVarName.push_back(header); - outVarName.push_back(encoding); - outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, outVarName).Finalize(); - return true; - } - else - return false; + string encoding = name; + string header; + bool foundHeader = DemangleGNU3Static::DemangleGlobalHeader(encoding, header); - thread_local DemangleGNU3 demangle(arch, encoding); - demangle.Reset(arch, encoding); - try - { - outType = demangle.DemangleSymbol(outVarName).Finalize(); + if (!encoding.compare(0, 2, "_Z")) + encoding = encoding.substr(2); + else if (!encoding.compare(0, 3, "__Z")) + encoding = encoding.substr(3); + else if (foundHeader && !header.empty()) + { + outVarName.clear(); + outVarName.push_back(header); + outVarName.push_back(encoding); + outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, outVarName).Finalize(); + return true; + } + else + return false; - if (outVarName.size() == 0) + thread_local DemangleGNU3 demangle(arch, encoding); + demangle.Reset(arch, encoding); + try { - if (outType->GetClass() == NamedTypeReferenceClass && outType->GetNamedTypeReference()->GetTypeReferenceClass() == UnknownNamedTypeClass) - { - outVarName = outType->GetTypeName(); - outType = nullptr; - } - else if (outType->GetClass() == NamedTypeReferenceClass) + outType = demangle.DemangleSymbol(outVarName).Finalize(); + + if (outVarName.size() == 0) { - auto typeName = outType->GetTypeName(); - if (typeName.size() > 0) - outVarName = "_" + typeName[typeName.size() - 1]; + if (outType->GetClass() == NamedTypeReferenceClass && outType->GetNamedTypeReference()->GetTypeReferenceClass() == UnknownNamedTypeClass) + { + const auto typeName = outType->GetTypeName(); + outVarName = StringList(typeName.begin(), typeName.end()); + outType = nullptr; + } + else if (outType->GetClass() == NamedTypeReferenceClass) + { + auto typeName = outType->GetTypeName(); + if (typeName.size() > 0) + outVarName = StringList{"_" + typeName[typeName.size() - 1]}; + } } - } - if (foundHeader && !header.empty()) + if (foundHeader && !header.empty()) + outVarName.insert(outVarName.begin(), header); + } + catch (std::exception&) { - outVarName.insert(outVarName.begin(), header); + return false; } + return true; } - catch (std::exception&) - { +} + + +bool DemangleGNU3Static::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName) +{ + StringList outVarNameSegments; + if (!DemangleStringGNU3Segments(arch, name, outType, outVarNameSegments)) return false; - } + outVarName = QualifiedName(outVarNameSegments); return true; } diff --git a/demangler/gnu3/demangle_gnu3.h b/demangler/gnu3/demangle_gnu3.h index 8ee0fca62..261987c49 100644 --- a/demangler/gnu3/demangle_gnu3.h +++ b/demangler/gnu3/demangle_gnu3.h @@ -95,7 +95,6 @@ class DemangleGNU3 { using ParamList = _STD_VECTOR; - BN::QualifiedName m_varName; DemangleGNU3Reader m_reader; BN::Architecture* m_arch; _STD_VECTOR m_substitute; @@ -118,10 +117,10 @@ class DemangleGNU3 bool m_inLocalName; struct ForwardRef { size_t index; }; _STD_VECTOR m_pendingForwardRefs; - void ResolveForwardTemplateRefs(DemangledTypeNode& type, const _STD_VECTOR<_STD_STRING>& args); + void ResolveForwardTemplateRefs(DemangledTypeNode& type, const StringList& args); static _STD_STRING ForwardRefPlaceholder(size_t index); enum SymbolType { Function, FunctionWithReturn, Data, VTable, Rtti, Name}; - BN::QualifiedName DemangleBaseUnresolvedName(); + StringList DemangleBaseUnresolvedName(); DemangledTypeNode DemangleUnresolvedType(); _STD_STRING DemangleUnarySuffixExpression(const _STD_STRING& op); _STD_STRING DemangleUnaryPrefixExpression(const _STD_STRING& op); @@ -140,7 +139,7 @@ class DemangleGNU3 void DemangleCVQualifiers(bool& cnst, bool& vltl, bool& rstrct); DemangledTypeNode DemangleSubstitution(); DemangledTypeNode DemangleTemplateSubstitution(); - void DemangleTemplateArgs(_STD_VECTOR<_STD_STRING>& args, bool* hadNonTypeArg = nullptr); + void DemangleTemplateArgs(StringList& args, bool* hadNonTypeArg = nullptr); DemangledTypeNode DemangleFunction(bool cnst, bool vltl); DemangledTypeNode DemangleType(); int64_t DemangleNumber(); @@ -149,7 +148,7 @@ class DemangleGNU3 void PushType(const DemangledTypeNode& type); const DemangledTypeNode& GetType(size_t ref); - DemangledTypeNode CreateUnknownType(const BN::QualifiedName& s); + DemangledTypeNode CreateUnknownType(const StringList& s); DemangledTypeNode CreateUnknownType(const _STD_STRING& s); static void ExtendTypeName(DemangledTypeNode& type, const _STD_STRING& extend); @@ -161,8 +160,7 @@ class DemangleGNU3 public: DemangleGNU3(BN::Architecture* arch, const _STD_STRING& mangledName); void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); - DemangledTypeNode DemangleSymbol(BN::QualifiedName& varName); - BN::QualifiedName GetVarName() const { return m_varName; } + DemangledTypeNode DemangleSymbol(StringList& varName); }; diff --git a/demangler/gnu3/demangled_type_node.cpp b/demangler/gnu3/demangled_type_node.cpp index 89bfb2fad..ca69a838d 100644 --- a/demangler/gnu3/demangled_type_node.cpp +++ b/demangler/gnu3/demangled_type_node.cpp @@ -23,14 +23,72 @@ using namespace BinaryNinja; using namespace std; #endif +namespace +{ + static constexpr uint8_t DemangledPtr64Bit = 1u << 0; + static constexpr uint8_t DemangledUnalignedBit = 1u << 1; + static constexpr uint8_t DemangledRestrictBit = 1u << 2; + static constexpr uint8_t DemangledReferenceBit = 1u << 3; + static constexpr uint8_t DemangledLvalueBit = 1u << 4; + + static void AppendPointerSuffixToken(string& out, const char* token) + { + if (!out.empty() && out.back() != ' ') + out += ' '; + out += token; + } + + static string JoinNameList(const StringList& name) + { + if (name.empty()) + return {}; + if (name.size() == 1) + return name[0]; + + size_t total = (name.size() - 1) * 2; + for (const auto& segment : name) + total += segment.size(); + + string out; + out.reserve(total); + out += name[0]; + for (size_t i = 1; i < name.size(); i++) + { + out += "::"; + out += name[i]; + } + return out; + } + +} + +#define HAS_POINTER_SUFFIX(bit) ((m_pointerSuffixBits & (bit)) != 0) + +static const char* CallingConventionString[] = +{ + "", + "__cdecl", + "__pascal", + "__thiscall", + "__stdcall", + "__fastcall", + "__clrcall", + "__eabi", + "__vectorcall", + "__swiftcall", + "__swiftasync" +}; + DemangledTypeNode::DemangledTypeNode() - : m_typeClass(VoidTypeClass), m_width(0), m_alignment(0), - m_const(false), m_volatile(false), m_signed(false), - m_hasTemplateArgs(false), m_nameType(NoNameType), - m_ntrClass(UnknownNamedTypeClass), - m_pointerReference(PointerReferenceType), m_elements(0), - m_returnTypeConfidence(BN_DEFAULT_CONFIDENCE) + : m_typeClass(VoidTypeClass), m_ntrClass(UnknownNamedTypeClass), + m_pointerReference(PointerReferenceType), m_nameType(NoNameType), + m_callingConventionName(NoCallingConvention), m_pointerSuffixBits(0), + m_returnTypeConfidence(BN_FULL_CONFIDENCE), + m_const(false), m_volatile(false), m_signed(false), m_hasVariableArgs(false), + m_hasTemplateArgs(false), m_width(0), + m_isMemberPointer(false), + m_elements(0) { } @@ -73,6 +131,25 @@ DemangledTypeNode DemangledTypeNode::FloatType(size_t width, const string& altNa } +DemangledTypeNode DemangledTypeNode::WideCharType(size_t width, const string& altName) +{ + DemangledTypeNode n; + n.m_typeClass = WideCharTypeClass; + n.m_width = width; + n.m_altName = altName; + return n; +} + + +DemangledTypeNode DemangledTypeNode::ValueType(const string& value) +{ + DemangledTypeNode n; + n.m_typeClass = ValueTypeClass; + n.m_altName = value; + return n; +} + + DemangledTypeNode DemangledTypeNode::VarArgsType() { DemangledTypeNode n; @@ -95,6 +172,22 @@ DemangledTypeNode DemangledTypeNode::PointerType(Architecture* arch, DemangledTy } +DemangledTypeNode DemangledTypeNode::MemberPointerType(Architecture* arch, DemangledTypeNode child, + StringList ownerName, bool cnst, bool vltl) +{ + DemangledTypeNode n; + n.m_typeClass = PointerTypeClass; + n.m_width = arch->GetAddressSize(); + n.m_childType = std::make_shared(std::move(child)); + n.m_const = cnst; + n.m_volatile = vltl; + n.m_pointerReference = PointerReferenceType; + n.m_memberPointerOwnerName = std::move(ownerName); + n.m_isMemberPointer = true; + return n; +} + + DemangledTypeNode DemangledTypeNode::ArrayType(DemangledTypeNode child, uint64_t count) { DemangledTypeNode n; @@ -116,44 +209,62 @@ DemangledTypeNode DemangledTypeNode::FunctionType(DemangledTypeNode retType, } +void DemangledTypeNode::SetImplicitThisParameter(DemangledTypeNode type) +{ + m_implicitThisParameterType = std::make_shared(std::move(type)); +} + + DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, - vector nameSegments, size_t width, size_t align) + StringList nameSegments, size_t width, bool isSigned) { DemangledTypeNode n; n.m_typeClass = NamedTypeReferenceClass; n.m_ntrClass = cls; - n.m_nameSegments = std::make_shared>(std::move(nameSegments)); + n.m_nameSegments = std::move(nameSegments); n.m_width = width; - n.m_alignment = align; + n.m_signed = isSigned; return n; } - -DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, - const QualifiedName& name, size_t width, size_t align) -{ - return NamedType(cls, vector(name.begin(), name.end()), width, align); -} - - -void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, vector nameSegments) +uint8_t DemangledTypeNode::PointerSuffixBit(BNPointerSuffix ps) { - m_ntrClass = cls; - m_nameSegments = std::make_shared>(std::move(nameSegments)); + switch (ps) + { + case Ptr64Suffix: + return DemangledPtr64Bit; + case UnalignedSuffix: + return DemangledUnalignedBit; + case RestrictSuffix: + return DemangledRestrictBit; + case ReferenceSuffix: + return DemangledReferenceBit; + case LvalueSuffix: + return DemangledLvalueBit; + default: + return 0; + } } -void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, const QualifiedName& name) +void DemangledTypeNode::AddPointerSuffixes(TypeBuilder& tb, bool omitPtr64) const { - SetNTR(cls, vector(name.begin(), name.end())); + if (HAS_POINTER_SUFFIX(DemangledPtr64Bit) && !omitPtr64) + tb.AddPointerSuffix(Ptr64Suffix); + if (HAS_POINTER_SUFFIX(DemangledUnalignedBit)) + tb.AddPointerSuffix(UnalignedSuffix); + if (HAS_POINTER_SUFFIX(DemangledRestrictBit)) + tb.AddPointerSuffix(RestrictSuffix); + if (HAS_POINTER_SUFFIX(DemangledReferenceBit)) + tb.AddPointerSuffix(ReferenceSuffix); + if (HAS_POINTER_SUFFIX(DemangledLvalueBit)) + tb.AddPointerSuffix(LvalueSuffix); } string DemangledTypeNode::GetTypeNameString() const { - if (!m_nameSegments) - return {}; - const auto& segs = *m_nameSegments; + const auto& segs = m_nameSegments; size_t n = segs.size(); if (n == 0) return {}; @@ -179,45 +290,49 @@ string DemangledTypeNode::GetTypeNameString() const size_t DemangledTypeNode::NameStringSize() const { - if (!m_nameSegments) - return 0; size_t total = 0; - for (const auto& s : *m_nameSegments) + for (const auto& s : m_nameSegments) total += s.size(); return total; } -string DemangledTypeNode::GetModifierString() const +void DemangledTypeNode::AppendModifiers(string& out) const { if (m_const && m_volatile) - return "const volatile"; - if (m_const) - return "const"; - if (m_volatile) - return "volatile"; - return ""; + out += " const volatile"; + else if (m_const) + out += " const"; + else if (m_volatile) + out += " volatile"; } -string DemangledTypeNode::GetPointerSuffixString() const +void DemangledTypeNode::AppendPointerSuffix(string& out) const { - static const char* suffixStrings[] = { - "__ptr64", - "__unaligned", - "__restrict", - "&", - "&&" - }; + if (HAS_POINTER_SUFFIX(DemangledUnalignedBit)) + AppendPointerSuffixToken(out, "__unaligned"); + if (HAS_POINTER_SUFFIX(DemangledRestrictBit)) + AppendPointerSuffixToken(out, "__restrict"); + if (HAS_POINTER_SUFFIX(DemangledReferenceBit)) + AppendPointerSuffixToken(out, "&"); + if (HAS_POINTER_SUFFIX(DemangledLvalueBit)) + AppendPointerSuffixToken(out, "&&"); +} - string out; - for (auto& s : m_pointerSuffix) + +void DemangledTypeNode::AppendTypeName(string& out) const +{ + const auto& segs = m_nameSegments; + size_t n = segs.size(); + if (n == 0) + return; + out += segs[0]; + for (size_t i = 1; i < n; i++) { - if (!out.empty() && out.back() != ' ') - out += ' '; - out += suffixStrings[s]; + out += "::"; + out += segs[i]; } - return out; } @@ -239,27 +354,36 @@ string DemangledTypeNode::GetStringAfterName() const void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* parentType) const { - string modifiers = GetModifierString(); - string ptrSuffix = GetPointerSuffixString(); - switch (m_typeClass) { case FunctionTypeClass: + { // Return type before name if (m_childType) { - if (!out.empty() && out.back() != ' ') - out += " "; + if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; m_childType->AppendBeforeName(out, this); } // If parent is a pointer, add "(" for function pointer syntax if (parentType && parentType->m_typeClass == PointerTypeClass) { if (!out.empty() && out.back() != ' ') - out += " "; - out += "("; + out += ' '; + out += '('; + } + if (static_cast(m_callingConventionName) < (sizeof(CallingConventionString) / sizeof(CallingConventionString[0]))) + { + const char* callingConvention = CallingConventionString[static_cast(m_callingConventionName)]; + if (callingConvention[0] != 0) + { + if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; + out += callingConvention; + } } break; + } case IntegerTypeClass: if (!m_altName.empty()) @@ -267,11 +391,18 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p else if (m_signed && m_width == 1) out += "char"; else if (m_signed) - out += "int" + to_string(m_width * 8) + "_t"; + { + out += "int"; + out += to_string(m_width * 8); + out += "_t"; + } else - out += "uint" + to_string(m_width * 8) + "_t"; - if (!modifiers.empty()) - out += " " + modifiers; + { + out += "uint"; + out += to_string(m_width * 8); + out += "_t"; + } + AppendModifiers(out); break; case FloatTypeClass: @@ -283,22 +414,22 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p case 4: out += "float"; break; case 8: out += "double"; break; case 10: out += "long double"; break; - default: out += "float" + to_string(m_width * 8); break; + default: + out += "float"; + out += to_string(m_width * 8); + break; } - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case BoolTypeClass: out += "bool"; - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case VoidTypeClass: out += "void"; - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case VarArgsTypeClass: @@ -308,26 +439,37 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p case PointerTypeClass: if (m_childType) m_childType->AppendBeforeName(out, this); - switch (m_pointerReference) + if (m_isMemberPointer) { - case ReferenceReferenceType: out += "&"; break; - case PointerReferenceType: out += "*"; break; - case RValueReferenceType: out += "&&"; break; - default: break; + if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; + out += JoinNameList(m_memberPointerOwnerName); + out += "::*"; + } + else + { + switch (m_pointerReference) + { + case ReferenceReferenceType: out += '&'; break; + case PointerReferenceType: out += '*'; break; + case RValueReferenceType: out += "&&"; break; + default: break; + } + } + if ((m_pointerSuffixBits & (DemangledUnalignedBit | DemangledRestrictBit | + DemangledReferenceBit | DemangledLvalueBit)) != 0) + { + out += ' '; + AppendPointerSuffix(out); } - if (!ptrSuffix.empty()) - out += " " + ptrSuffix; - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case ArrayTypeClass: if (m_childType) m_childType->AppendBeforeName(out, this); if (parentType && parentType->m_typeClass == PointerTypeClass) - { out += " ("; - } break; case NamedTypeReferenceClass: @@ -339,9 +481,20 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p case EnumNamedTypeClass: out += "enum "; break; default: break; } - out += GetTypeNameString(); - if (!modifiers.empty()) - out += " " + modifiers; + AppendTypeName(out); + AppendModifiers(out); + break; + + case WideCharTypeClass: + if (!m_altName.empty()) + out += m_altName; + else + out += "wchar_t"; + AppendModifiers(out); + break; + + case ValueTypeClass: + out += m_altName; break; default: @@ -358,30 +511,33 @@ static string FormatArrayCount(uint64_t elements) void DemangledTypeNode::AppendAfterName(string& out, const DemangledTypeNode* parentType) const { - string modifiers = GetModifierString(); - string ptrSuffix = GetPointerSuffixString(); - switch (m_typeClass) { case FunctionTypeClass: { // Close the "(" from before-name if parent is pointer if (parentType && parentType->m_typeClass == PointerTypeClass) - out += ")"; + out += ')'; - out += "("; + out += '('; for (size_t i = 0; i < m_params.size(); i++) { if (i != 0) out += ", "; if (m_params[i].type) - out += m_params[i].type->GetString(); + m_params[i].type->AppendString(out); } - out += ")"; - if (!modifiers.empty()) - out += " " + modifiers; - if (!ptrSuffix.empty()) - out += ptrSuffix; + if (m_hasVariableArgs) + { + if (!m_params.empty()) + out += ", "; + out += "..."; + } + out += ')'; + AppendModifiers(out); + if ((m_pointerSuffixBits & (DemangledUnalignedBit | DemangledRestrictBit | + DemangledReferenceBit | DemangledLvalueBit)) != 0) + AppendPointerSuffix(out); // Return type's after-name tokens if (m_childType) m_childType->AppendAfterName(out, this); @@ -404,24 +560,47 @@ void DemangledTypeNode::AppendAfterName(string& out, const DemangledTypeNode* pa } -string DemangledTypeNode::GetString() const +void DemangledTypeNode::AppendString(string& out) const { - const string before = GetStringBeforeName(); - const string after = GetStringAfterName(); - if (!before.empty() && !after.empty() && before.back() != ' ' && before.back() != '*' - && before.back() != '&' && after.front() != ' ' && after.front() != '[' - && m_childType && m_childType->m_typeClass != FunctionTypeClass) + size_t beforeEnd = out.size(); + AppendBeforeName(out); + beforeEnd = out.size(); // track where "before" ends + + string after; + AppendAfterName(after); + + if (!after.empty() && beforeEnd > 0) { - return before + " " + after; + char lastBefore = out[beforeEnd - 1]; + if (lastBefore != ' ' && lastBefore != '*' && lastBefore != '&' + && after.front() != ' ' && after.front() != '[' + && m_childType && m_childType->m_typeClass != FunctionTypeClass) + { + out += ' '; + } } - return before + after; + out += after; +} + + +string DemangledTypeNode::GetString() const +{ + string out; + AppendString(out); + return out; } string DemangledTypeNode::GetTypeAndName(const QualifiedName& name) const +{ + return GetTypeAndName(vector(name.begin(), name.end())); +} + + +string DemangledTypeNode::GetTypeAndName(const StringList& name) const { const string before = GetStringBeforeName(); - const string qName = name.GetString(); + const string qName = JoinNameList(name); const string after = GetStringAfterName(); if ((!before.empty() && !qName.empty() && before.back() != ' ' && qName.front() != ' ') || (!before.empty() && !after.empty() && before.back() != ' ' && after.front() != ' ')) @@ -477,10 +656,28 @@ Ref DemangledTypeNode::Finalize() const case VarArgsTypeClass: return TypeBuilder::VarArgsType().Finalize(); + case WideCharTypeClass: + { + if (!m_const && !m_volatile) + return Type::WideCharType(m_width, m_altName); + TypeBuilder tb = TypeBuilder::WideCharType(m_width, m_altName); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + return tb.Finalize(); + } + + case ValueTypeClass: + return Type::ValueType(m_altName); + case PointerTypeClass: { Ref child = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); - return TypeBuilder::PointerType(m_width, child, m_const, m_volatile, m_pointerReference).Finalize(); + TypeBuilder tb = TypeBuilder::PointerType(m_width, child, m_const, m_volatile, m_pointerReference); + AddPointerSuffixes(tb, true); + Ref normalized = tb.Finalize(); + if (m_isMemberPointer) + return Type::NamedType(QualifiedName({GetString()}), normalized.GetPtr()); + return normalized; } case ArrayTypeClass: @@ -498,31 +695,37 @@ Ref DemangledTypeNode::Finalize() const { Ref retType = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); vector finalParams; - finalParams.reserve(m_params.size()); + finalParams.reserve(m_params.size() + (m_implicitThisParameterType ? 1 : 0)); + if (m_implicitThisParameterType) + finalParams.push_back({"this", m_implicitThisParameterType->Finalize(), true, Variable()}); for (auto& p : m_params) { Ref pType = p.type ? p.type->Finalize() : Ref(Type::VoidType()); finalParams.push_back({p.name, pType, true, Variable()}); } - TypeBuilder tb = TypeBuilder::FunctionType(retType->WithConfidence(m_returnTypeConfidence), nullptr, finalParams); + Confidence> callingConvention; + if (m_callingConvention) + callingConvention = Confidence>(m_callingConvention, BN_FULL_CONFIDENCE); + TypeBuilder tb = TypeBuilder::FunctionType( + retType->WithConfidence(static_cast(m_returnTypeConfidence)), callingConvention, finalParams, + Confidence(m_hasVariableArgs, m_hasVariableArgs ? BN_DEFAULT_CONFIDENCE : 0)); tb.SetConst(m_const); tb.SetVolatile(m_volatile); - for (auto ps : m_pointerSuffix) - tb.AddPointerSuffix(ps); + AddPointerSuffixes(tb); tb.SetNameType(m_nameType); + if (m_callingConventionName != NoCallingConvention) + tb.SetCallingConventionName(m_callingConventionName); return tb.Finalize(); } case NamedTypeReferenceClass: { + QualifiedName name(m_nameSegments); TypeBuilder tb = TypeBuilder::NamedType( - NamedTypeReference::GenerateAutoDemangledTypeReference( - m_ntrClass, QualifiedName(m_nameSegments ? *m_nameSegments : vector{})), - m_width, m_alignment > 0 ? m_alignment : 1); + NamedTypeReference::GenerateAutoDemangledTypeReference(m_ntrClass, name), m_width, 1); tb.SetConst(m_const); tb.SetVolatile(m_volatile); - for (auto ps : m_pointerSuffix) - tb.AddPointerSuffix(ps); + AddPointerSuffixes(tb); tb.SetNameType(m_nameType); tb.SetHasTemplateArguments(m_hasTemplateArgs); return tb.Finalize(); @@ -532,3 +735,6 @@ Ref DemangledTypeNode::Finalize() const return Type::VoidType(); } } + +#undef HAS_POINTER_SUFFIX +#undef GetClass diff --git a/demangler/gnu3/demangled_type_node.h b/demangler/gnu3/demangled_type_node.h index 62ad9004a..c3ee08f8b 100644 --- a/demangler/gnu3/demangled_type_node.h +++ b/demangler/gnu3/demangled_type_node.h @@ -40,16 +40,12 @@ #endif #endif +#include #include -#ifdef BINARYNINJACORE_LIBRARY -#include "binaryninjacore_global.h" -#define _STD_SET BinaryNinjaCore::set -#else -#include -#define _STD_SET std::set -#endif -// Lightweight type representation for the GNU3 demangler. +typedef _STD_VECTOR<_STD_STRING> StringList; + +// Lightweight type representation for demanglers (GNU3 and MSVC). // This object serves as an abstraction layer between C++'s type system and our own. // It also removes a source of a lot of reallocation of NamedTypeReference BinaryNinja::Type objects // and only creates real Type objects when Finalize() is called. @@ -73,99 +69,97 @@ class DemangledTypeNode static DemangledTypeNode BoolType(); static DemangledTypeNode IntegerType(size_t width, bool isSigned, const _STD_STRING& altName = ""); static DemangledTypeNode FloatType(size_t width, const _STD_STRING& altName = ""); + static DemangledTypeNode WideCharType(size_t width, const _STD_STRING& altName = ""); + static DemangledTypeNode ValueType(const _STD_STRING& value); static DemangledTypeNode VarArgsType(); static DemangledTypeNode PointerType(BN::Architecture* arch, DemangledTypeNode child, bool cnst, bool vltl, BNReferenceType refType); + static DemangledTypeNode MemberPointerType(BN::Architecture* arch, DemangledTypeNode child, + StringList ownerName, bool cnst, bool vltl); static DemangledTypeNode ArrayType(DemangledTypeNode child, uint64_t count); static DemangledTypeNode FunctionType(DemangledTypeNode retType, std::nullptr_t, _STD_VECTOR params); static DemangledTypeNode NamedType(BNNamedTypeReferenceClass cls, - _STD_VECTOR<_STD_STRING> nameSegments, size_t width = 0, size_t align = 0); - static DemangledTypeNode NamedType(BNNamedTypeReferenceClass cls, - const BN::QualifiedName& name, size_t width = 0, size_t align = 0); + StringList nameSegments, size_t width = 0, bool isSigned = false); - // Getters BNTypeClass GetClass() const { return m_typeClass; } #ifdef BINARYNINJACORE_LIBRARY BNTypeClass GetTypeClass() const { return m_typeClass; } #endif - const _STD_VECTOR<_STD_STRING>& GetTypeName() const - { - if (!m_nameSegments) - { - static const _STD_VECTOR<_STD_STRING> empty; - return empty; - } - return *m_nameSegments; - } - _STD_VECTOR<_STD_STRING>& GetMutableTypeName() - { - if (!m_nameSegments) - m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(); - else if (m_nameSegments.use_count() > 1) - m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(*m_nameSegments); - return *m_nameSegments; - } + const StringList& GetTypeName() const { return m_nameSegments; } + StringList& GetMutableTypeName() { return m_nameSegments; } _STD_STRING GetTypeNameString() const; size_t NameStringSize() const; bool IsConst() const { return m_const; } bool IsVolatile() const { return m_volatile; } + bool HasVariableArguments() const { return m_hasVariableArgs; } BNNameType GetNameType() const { return m_nameType; } bool HasTemplateArguments() const { return m_hasTemplateArgs; } - const _STD_SET& GetPointerSuffix() const { return m_pointerSuffix; } + uint8_t GetPointerSuffixBits() const { return m_pointerSuffixBits; } + bool HasPointerSuffix(BNPointerSuffix ps) const { return (m_pointerSuffixBits & PointerSuffixBit(ps)) != 0; } BNNamedTypeReferenceClass GetNTRClass() const { return m_ntrClass; } + bool IsMemberPointer() const { return m_isMemberPointer; } + const StringList& GetMemberPointerOwnerName() const { return m_memberPointerOwnerName; } - // Setters - void SetTypeName(_STD_VECTOR<_STD_STRING> name) { m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(std::move(name)); } + void SetTypeName(StringList name) { m_nameSegments = std::move(name); } void SetConst(bool c) { m_const = c; } void SetVolatile(bool v) { m_volatile = v; } + void SetHasVariableArguments(bool v) { m_hasVariableArgs = v; } void SetNameType(BNNameType nt) { m_nameType = nt; } void SetHasTemplateArguments(bool t) { m_hasTemplateArgs = t; } - void SetPointerSuffix(const _STD_SET& s) { m_pointerSuffix = s; } - void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffix.insert(ps); } - void SetReturnTypeConfidence(uint8_t c) { m_returnTypeConfidence = c; } - - // Named type reference operations - void SetNTR(BNNamedTypeReferenceClass cls, _STD_VECTOR<_STD_STRING> nameSegments); - void SetNTR(BNNamedTypeReferenceClass cls, const BN::QualifiedName& name); - - // String formatting + void SetPointerSuffixBits(uint8_t bits) { m_pointerSuffixBits = bits; } + void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffixBits |= PointerSuffixBit(ps); } + void SetReturnTypeConfidence(int8_t c) { m_returnTypeConfidence = c; } + void SetCallingConventionName(BNCallingConventionName cc) { m_callingConventionName = cc; } + void SetCallingConvention(BN::Ref cc) { m_callingConvention = std::move(cc); } + void SetNTRType(BNNamedTypeReferenceClass cls) { m_ntrClass = cls; } + void SetImplicitThisParameter(DemangledTypeNode type); + + void AppendString(_STD_STRING& out) const; _STD_STRING GetString() const; _STD_STRING GetStringBeforeName() const; _STD_STRING GetStringAfterName() const; + _STD_STRING GetTypeAndName(const StringList& name) const; _STD_STRING GetTypeAndName(const BN::QualifiedName& name) const; - // Conversion to real Type BN::Ref Finalize() const; private: BNTypeClass m_typeClass; - size_t m_width; - size_t m_alignment; + BNNamedTypeReferenceClass m_ntrClass; + BNReferenceType m_pointerReference; + BNNameType m_nameType; + BNCallingConventionName m_callingConventionName; + BN::Ref m_callingConvention; + uint8_t m_pointerSuffixBits; + uint8_t m_returnTypeConfidence; bool m_const; bool m_volatile; bool m_signed; + bool m_hasVariableArgs; bool m_hasTemplateArgs; - BNNameType m_nameType; - _STD_SET m_pointerSuffix; + std::shared_ptr m_implicitThisParameterType; + + size_t m_width; _STD_STRING m_altName; - // Named type ref data - BNNamedTypeReferenceClass m_ntrClass; - std::shared_ptr<_STD_VECTOR<_STD_STRING>> m_nameSegments; + StringList m_nameSegments; + StringList m_memberPointerOwnerName; + bool m_isMemberPointer; // Child type (for pointer/array/function return) std::shared_ptr m_childType; - BNReferenceType m_pointerReference; uint64_t m_elements; // Function params _STD_VECTOR m_params; - uint8_t m_returnTypeConfidence; // Helpers for string formatting - _STD_STRING GetModifierString() const; - _STD_STRING GetPointerSuffixString() const; + static uint8_t PointerSuffixBit(BNPointerSuffix ps); + void AddPointerSuffixes(BN::TypeBuilder& tb, bool omitPtr64 = true) const; + void AppendModifiers(_STD_STRING& out) const; + void AppendPointerSuffix(_STD_STRING& out) const; + void AppendTypeName(_STD_STRING& out) const; void AppendBeforeName(_STD_STRING& out, const DemangledTypeNode* parentType = nullptr) const; void AppendAfterName(_STD_STRING& out, const DemangledTypeNode* parentType = nullptr) const; }; diff --git a/demangler/msvc/CMakeLists.txt b/demangler/msvc/CMakeLists.txt index b12559916..3536c899a 100644 --- a/demangler/msvc/CMakeLists.txt +++ b/demangler/msvc/CMakeLists.txt @@ -5,7 +5,9 @@ project(demangle_msvc) file(GLOB SOURCES CONFIGURE_DEPENDS *.cpp *.c - *.h) + *.h + ../gnu3/demangled_type_node.cpp + ../gnu3/demangled_type_node.h) if(DEMO) add_library(${PROJECT_NAME} STATIC ${SOURCES}) diff --git a/demangler/msvc/demangle_msvc.cpp b/demangler/msvc/demangle_msvc.cpp index 0489c341a..86fb8e136 100644 --- a/demangler/msvc/demangle_msvc.cpp +++ b/demangler/msvc/demangle_msvc.cpp @@ -28,181 +28,212 @@ using namespace std; #endif -#define MAX_DEMANGLE_LENGTH 4096 +#define MAX_DEMANGLE_LENGTH 32768 -Demangle::Reader::Reader(string data) +static int64_t SignExtendInt32(int64_t value) { - m_data = data; - //Check for non-ascii characters - for (auto a : m_data) - { - if (a < 0x20 || a > 0x7e) - throw DemangleException(); - } -} - - -string Demangle::Reader::PeekString(size_t count) -{ - if (count > Length()) - throw DemangleException(); - return m_data.substr(0, count); -} - - -char Demangle::Reader::Peek() -{ - if (1 > Length()) - throw DemangleException(); - return (char)m_data[0]; -} - - -const char* Demangle::Reader::GetRaw() -{ - return m_data.c_str(); -} - - -char Demangle::Reader::Read() -{ - if (1 > Length()) - throw DemangleException(); - char out = m_data[0]; - m_data = m_data.substr(1); - return out; + uint64_t lowBits = static_cast(value) & 0xffffffffULL; + if ((lowBits & 0x80000000ULL) != 0) + return static_cast(lowBits) - 0x100000000LL; + return static_cast(lowBits); } +// Define MSVC_DEMANGLE_DEBUG to enable trace logging +#ifdef MSVC_DEMANGLE_DEBUG +#define MSVC_TRACE(...) LogTrace(__VA_ARGS__) +#else +#define MSVC_TRACE(...) do {} while(0) +#endif string Demangle::Reader::ReadString(size_t count) { - if (count > Length()) + if (m_ptr + count >= m_end) throw DemangleException(); - string out = m_data.substr(0, count); - m_data = m_data.substr(count + 1); + string out(m_ptr, count); + m_ptr += count + 1; // skip count chars + sentinel return out; } string Demangle::Reader::ReadUntil(char sentinal) { - size_t pos = m_data.find_first_of(sentinal); - if (pos == string::npos) + const char* found = (const char*)memchr(m_ptr, sentinal, m_end - m_ptr); + if (!found) throw DemangleException(); - return ReadString(pos); + size_t count = found - m_ptr; + return ReadString(count); } -void Demangle::Reader::Consume(size_t count) -{ - if (count > Length()) - throw DemangleException(); - m_data = m_data.substr(count); -} - - -size_t Demangle::Reader::Length() -{ - return m_data.length(); -} - - -const TypeBuilder& Demangle::BackrefList::GetTypeBackref(size_t reference) +const DemangledTypeNode& Demangle::BackrefList::GetTypeBackref(size_t reference) { if (reference < typeList.size()) return typeList[reference]; - // LogDebug("type: %llx - : %d/%d\n", this, typeList.size(), reference); throw DemangleException(string("Backref too large " + std::to_string(reference))); } -string Demangle::BackrefList::GetStringBackref(size_t reference) +const string& Demangle::BackrefList::GetStringBackref(size_t reference) { - // LogDebug("type: %llx - ref: %d\n", this, reference); if (reference < nameList.size()) return nameList[reference]; - LogDebug("type: %p - Backref too large: %zu/%zu\n", this, nameList.size(), reference); + MSVC_TRACE("type: %p - Backref too large: %zu/%zu\n", this, nameList.size(), reference); throw DemangleException(string("Backref too large " + std::to_string(reference))); } -void Demangle::BackrefList::PushTypeBackref(TypeBuilder t) +void Demangle::BackrefList::PushTypeBackref(DemangledTypeNode t) { - // LogDebug("this: %llx - TypeBackref: %lld %s\n", this, nameList.size(), t.GetString().c_str()); if (typeList.size() <= 9) - typeList.push_back(t); + typeList.push_back(std::move(t)); } -void Demangle::BackrefList::PushStringBackref(string& s) +void Demangle::BackrefList::PushStringBackref(const string& s) { if (s.size() > MAX_DEMANGLE_LENGTH) throw DemangleException(); - LogDebug("this: %p - Backref: %zu - %s\n", this, nameList.size(), s.c_str()); + MSVC_TRACE("this: %p - Backref: %zu - %s\n", this, nameList.size(), s.c_str()); for (const auto& name : nameList) if (name == s) return; - nameList.push_back(s); + if (nameList.size() <= 9) + nameList.push_back(s); } -void Demangle::BackrefList::PushFrontStringBackref(string& s) +void Demangle::BackrefList::PushTemplateSpecialization(const string& s) { if (s.size() > MAX_DEMANGLE_LENGTH) throw DemangleException(); - // LogDebug("this: %llx - F-Backref: %lld - %s\n", this, nameList.size(), s.c_str()); - nameList.insert(nameList.begin(), s); + templateList.push_back(s); } -Demangle::Demangle(Architecture* arch, string mangledName) : - reader(mangledName), + +Demangle::Demangle(Architecture* arch, const string& mangledName) : + m_mangledName(mangledName), + reader(m_mangledName), m_arch(arch), m_platform(nullptr), m_view(nullptr) { - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -Demangle::Demangle(Ref platform, string mangledName) : - reader(mangledName), +Demangle::Demangle(Ref platform, const string& mangledName) : + m_mangledName(mangledName), + reader(m_mangledName), m_arch(platform->GetArchitecture()), m_platform(platform), m_view(nullptr) { - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -Demangle::Demangle(Ref view, string mangledName) : - reader(mangledName), +Demangle::Demangle(Ref view, const string& mangledName) : + m_mangledName(mangledName), + reader(m_mangledName), m_view(view) { m_platform = view->GetDefaultPlatform(); if (!m_platform) throw DemangleException(); m_arch = m_platform->GetArchitecture(); - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, QualifiedName& name) +void Demangle::Reset(Architecture* arch, const string& mangledName) +{ + m_mangledName = mangledName; + reader.Reset(m_mangledName); + m_backrefList.Clear(); + m_arch = arch; + m_platform = nullptr; + m_view = nullptr; + m_varName.clear(); + m_templateParamDepth = 0; +} + + +void Demangle::RewriteTemplateBackrefName(NameList& typeName, const BackrefList& nameBackrefList) const +{ + if (typeName.empty()) + return; + + string& baseName = typeName.back(); + if (baseName.find('<') != string::npos) + return; + + for (auto it = nameBackrefList.templateList.rbegin(); it != nameBackrefList.templateList.rend(); ++it) + { + const string& candidate = *it; + size_t templateStart = candidate.find('<'); + if ((templateStart == string::npos) || (templateStart != baseName.size())) + continue; + if (candidate.compare(0, templateStart, baseName) != 0) + continue; + baseName = candidate; + return; + } +} + +DemangledTypeNode Demangle::DemangleReferencedSymbolValue(BackrefList& varList) +{ + // Template argument backrefs are scoped. A referenced symbol nested inside + // a non-type template argument can see backrefs already introduced by the + // surrounding template argument list, but any backrefs created while parsing + // that nested symbol must not leak back into the outer template list. + BackrefList symbolBackrefs = varList; + NameList savedVarName = m_varName; + + try + { + auto context = DemangleSymbol(symbolBackrefs); + string value = "&" + context.type.GetTypeAndName(m_varName); + m_varName = std::move(savedVarName); + + return DemangledTypeNode::ValueType(value); + } + catch (...) + { + m_varName = std::move(savedVarName); + throw; + } +} + + +DemangledTypeNode Demangle::DemangleAutoNonTypeTemplateParam(BackrefList& varList) +{ + if (reader.Peek() == '0') + { + reader.Consume(); + int64_t value; + DemangleNumber(value); + return DemangledTypeNode::ValueType(to_string(value)); + } + if (reader.Peek() == '1') + { + reader.Consume(); + return DemangleReferencedSymbolValue(varList); + } + throw DemangleException(); +} + + +DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, NameList& name, bool includeImplicitThis) { - m_logger->LogDebug("%s: '%s' - %lu\n", __FUNCTION__, reader.GetRaw(), varList.nameList.size()); - TypeBuilder newType; - bool _const = false, _volatile = false, isMember = false; //TODO: use this info, _signed = false; + MSVC_TRACE("%s: '%s' - %lu\n", __FUNCTION__, reader.GetRaw(), varList.nameList.size()); + DemangledTypeNode newType; + bool _const = false, _volatile = false, isMember = false; BNReferenceType refType; BNTypeClass typeClass = IntegerTypeClass; BNStructureVariant structType; - QualifiedName varName; - QualifiedName typeName; + NameList varName; + NameList typeName; BNNameType classFunctionType; - - size_t width; + size_t width = 0; + bool _enumSigned = false; char elm = reader.Read(); switch (elm) { @@ -218,18 +249,18 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali _const = false; _volatile = true; break; - case 'C': return TypeBuilder::IntegerType(1, true); - case 'D': return TypeBuilder::IntegerType(1, true); - case 'E': return TypeBuilder::IntegerType(1, false); - case 'F': return TypeBuilder::IntegerType(2, true); - case 'G': return TypeBuilder::IntegerType(2, false); - case 'H': return TypeBuilder::IntegerType(4, true); - case 'I': return TypeBuilder::IntegerType(4, false); - case 'J': return TypeBuilder::IntegerType(4, true, "long"); - case 'K': return TypeBuilder::IntegerType(4, false, "unsigned long"); - case 'M': return TypeBuilder::FloatType(4); - case 'N': return TypeBuilder::FloatType(8); - case 'O': return TypeBuilder::FloatType(10, "long double"); + case 'C': return DemangledTypeNode::IntegerType(1, true, "signed char"); + case 'D': return DemangledTypeNode::IntegerType(1, true); + case 'E': return DemangledTypeNode::IntegerType(1, false); + case 'F': return DemangledTypeNode::IntegerType(2, true); + case 'G': return DemangledTypeNode::IntegerType(2, false); + case 'H': return DemangledTypeNode::IntegerType(4, true); + case 'I': return DemangledTypeNode::IntegerType(4, false); + case 'J': return DemangledTypeNode::IntegerType(4, true, "long"); + case 'K': return DemangledTypeNode::IntegerType(4, false, "unsigned long"); + case 'M': return DemangledTypeNode::FloatType(4); + case 'N': return DemangledTypeNode::FloatType(8); + case 'O': return DemangledTypeNode::FloatType(10, "long double"); case 'P': // * typeClass = PointerTypeClass; refType = PointerReferenceType; @@ -261,55 +292,123 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali typeClass = EnumerationTypeClass; switch (reader.Read()) { - case '0': width = 1; /* TODO: use these _signed = true; */ break; - case '1': width = 1; /* TODO: use these _signed = false; */ break; - case '2': width = 2; /* TODO: use these _signed = true; */ break; - case '3': width = 2; /* TODO: use these _signed = false; */ break; - case '4': width = 4; /* TODO: use these _signed = true; */ break; - case '5': width = 4; /* TODO: use these _signed = false; */ break; - case '6': width = 4; /* TODO: use these _signed = true; */ break; - case '7': width = 4; /* TODO: use these _signed = false; */ break; + case '0': width = 1; _enumSigned = true; break; + case '1': width = 1; _enumSigned = false; break; + case '2': width = 2; _enumSigned = true; break; + case '3': width = 2; _enumSigned = false; break; + case '4': width = 4; _enumSigned = true; break; + case '5': width = 4; _enumSigned = false; break; + case '6': width = 4; _enumSigned = true; break; + case '7': width = 4; _enumSigned = false; break; default: throw DemangleException(); } break; - case 'X': return TypeBuilder::VoidType(); break; + case 'X': return DemangledTypeNode::VoidType(); break; case 'Y': - throw DemangleException(); //TODO: handle cointerfaces - case 'Z': return TypeBuilder::VarArgsType(); break; + { + // Multi-dimensional array type: Y...@ + int64_t nDimensions; + DemangleNumber(nDimensions); + _STD_VECTOR elementList; + while (nDimensions--) + { + int64_t element = 0; + DemangleNumber(element); + elementList.push_back(element); + } + NameList arrayName; + newType = DemangleVarType(varList, false, arrayName); + for (auto i = elementList.rbegin(); i != elementList.rend(); i++) + { + newType = DemangledTypeNode::ArrayType(std::move(newType), *i); + } + if (!isReturn) + varList.PushTypeBackref(newType); + return newType; + } + case 'Z': return DemangledTypeNode::VarArgsType(); + case '?': + { + if (reader.Peek() >= '0' && reader.Peek() <= '9') + { + size_t reference = reader.Read() - '0'; + try + { + return varList.GetTypeBackref(reference); + } + catch (const DemangleException&) + { + if (reference == 2) + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"auto"}); + throw; + } + } + if (reader.Peek() != '<') + throw DemangleException(); + + string placeholder = reader.ReadUntil('@'); + if (reader.Peek() == '@') + reader.Consume(); + if (placeholder == "") + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"auto"}); + if (placeholder == "") + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"decltype(auto)"}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{placeholder}); + } case '_': switch (reader.Read()) { - case 'D': newType = TypeBuilder::IntegerType(1, true); break; - case 'E': newType = TypeBuilder::IntegerType(1, false); break; - case 'F': newType = TypeBuilder::IntegerType(2, true); break; - case 'G': newType = TypeBuilder::IntegerType(2, false); break; - case 'H': newType = TypeBuilder::IntegerType(4, true); break; - case 'I': newType = TypeBuilder::IntegerType(4, false); break; - case 'J': newType = TypeBuilder::IntegerType(8, true); break; - case 'K': newType = TypeBuilder::IntegerType(8, false); break; - case 'L': newType = TypeBuilder::IntegerType(16, true); break; - case 'M': newType = TypeBuilder::IntegerType(16, false); break; - case 'N': newType = TypeBuilder::BoolType(); break; + case 'D': newType = DemangledTypeNode::IntegerType(1, true); break; + case 'E': newType = DemangledTypeNode::IntegerType(1, false); break; + case 'F': newType = DemangledTypeNode::IntegerType(2, true); break; + case 'G': newType = DemangledTypeNode::IntegerType(2, false); break; + case 'H': newType = DemangledTypeNode::IntegerType(4, true); break; + case 'I': newType = DemangledTypeNode::IntegerType(4, false); break; + case 'J': newType = DemangledTypeNode::IntegerType(8, true); break; + case 'K': newType = DemangledTypeNode::IntegerType(8, false); break; + case 'L': newType = DemangledTypeNode::IntegerType(16, true); break; + case 'M': newType = DemangledTypeNode::IntegerType(16, false); break; + case 'N': newType = DemangledTypeNode::BoolType(); break; case 'O': { - QualifiedName name; - //m_logger->Indent(); + NameList name; auto childType = DemangleVarType(varList, false, name); - //m_logger->Dedent(); - newType = TypeBuilder::ArrayType(childType.Finalize(), 0); + newType = DemangledTypeNode::ArrayType(std::move(childType), 0); break; } - case 'S': newType = TypeBuilder::IntegerType(2, true, "char16_t"); break; - case 'U': newType = TypeBuilder::IntegerType(4, true, "char32_t"); break; - case 'W': newType = TypeBuilder::IntegerType(2, false, "wchar_t"); break; - case 'X': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Coclass - case 'Y': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Cointerface + case 'S': newType = DemangledTypeNode::IntegerType(2, true, "char16_t"); break; + case 'U': newType = DemangledTypeNode::IntegerType(4, true, "char32_t"); break; + case 'W': newType = DemangledTypeNode::IntegerType(2, false, "wchar_t"); break; + // `_P` (auto) and `_T` (decltype(auto)) are placeholder return-type + // encodings. For normal source code they are deduced at the function + // definition and mangled as the deduced type — you will not see `_P` + // or `_T` from something like `auto foo() { return 0; }` (that becomes + // `?foo@@YAHXZ`). They do appear in compiler-emitted symbols for + // function templates whose declared return type is literally `auto` + // or `decltype(auto)` and which are mangled before/without deduction + // settling on a concrete type — e.g. `??$seq@HX@llvm@@YA?A_PH@Z` + // (llvm::seq) or `??$_Get_unwrapped@...@std@@YA?A_T...@Z`. Handle + // them as named-type placeholders so downstream type consumers get + // something sensible (rather than a `` demangle) even though + // the underlying type is not expressible as a Binary Ninja Type. + case 'P': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"auto"}); break; + case 'Q': newType = DemangledTypeNode::IntegerType(1, true, "char8_t"); break; // C++20 char8_t + case 'T': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"decltype(auto)"}); break; + // NOTE: `_X` and `_Y` were previously mapped to coclass/cointerface + // here, but those encodings are not emitted by any real toolchain. + // LLVM's MicrosoftDemangle / MicrosoftMangle and Wine's undname + // reimplementation none of them recognize `_X` or `_Y` as type + // codes. Real cointerface is plain `Y@@` (no underscore) at + // the top-level type switch, grouped with T/U/V; coclass has no + // dedicated mangling and is emitted as `V@@` (class). Let + // `_X` / `_Y` fall through to the `default: throw` so malformed + // input is rejected instead of producing a bogus class type. default: throw DemangleException(); } break; case '$': - if (reader.PeekString(2) == "$Q") // && + if (reader.PeekMatch("$Q", 2)) // && { reader.Consume(2); typeClass = PointerTypeClass; @@ -317,7 +416,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali _const = false; _volatile = false; } - else if (reader.PeekString(2) == "$R") // && volatile + else if (reader.PeekMatch("$R", 2)) // && volatile { reader.Consume(2); typeClass = PointerTypeClass; @@ -325,45 +424,94 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali _const = false; _volatile = true; } - else if (reader.PeekString(2) == "$A") + else if (reader.PeekMatch("$A", 2)) { reader.Consume(2); char num = reader.Read(); - if (num == 8) - return DemangleFunction(NoNameType, true, varList); - if (num == '6' || num == '7') - return DemangleFunction(NoNameType, false, varList); + if (num >= '6' && num <= '9') + { + // For member function types (8/9), skip the class scope marker @@ + if ((num == '8' || num == '9') && reader.Length() >= 2 + && reader.Peek() == '@' && reader.PeekAt(1) == '@') + reader.Consume(2); + return DemangleFunction(NoNameType, num >= '7', varList, NoneFunctionClass, false); + } throw DemangleException(); } - else if (reader.PeekString(2) == "$C") + else if (reader.PeekMatch("$C", 2)) { reader.Consume(2); DemangleModifiers(_const, _volatile, isMember); - QualifiedName name; - //m_logger->Indent(); + NameList name; newType = DemangleVarType(varList, false, name); - //m_logger->Dedent(); newType.SetConst(_const); newType.SetVolatile(_volatile); return newType; } - else if (reader.PeekString(2) == "$T") + else if (reader.PeekMatch("$T", 2)) + { + reader.Consume(2); + auto t = DemangledTypeNode::NamedType(UnknownNamedTypeClass, vector{"std::nullptr"}); + if (!isReturn) + varList.PushTypeBackref(t); + return t; + } + else if (reader.PeekMatch("$B", 2)) { + // $$B is a type modifier (managed/const) - strip and parse underlying type reader.Consume(2); - return TypeBuilder::ValueType("std::nullptr"); + NameList name; + return DemangleVarType(varList, isReturn, name); } else if (reader.Peek() == '0') { reader.Consume(); int64_t value; DemangleNumber(value); - return TypeBuilder::ValueType(to_string(value)); + return DemangledTypeNode::ValueType(to_string(value)); + } + else if (reader.Peek() == 'D') + { + // $D - template type alias / anonymous type parameter + reader.Consume(); + NameList name; + return DemangleVarType(varList, isReturn, name); + } + else if (reader.Peek() == 'M') + { + // $M - C++17 `auto` non-type template parameter. + // The encoded type is the deduced type for the following bare + // non-type payload and is not itself printed as a template arg. + reader.Consume(); + NameList autoTypeName; + DemangleVarType(varList, false, autoTypeName); + return DemangleAutoNonTypeTemplateParam(varList); + } + else if (reader.Peek() == 'H' || reader.Peek() == 'I' || reader.Peek() == 'J') + { + // $H/$I/$J - member function pointer value as a non-type template + // parameter. Format: $H@; + // $I has two adjustment numbers, $J has three. + char kind = reader.Read(); + BackrefList symbolBackrefs = varList; + auto context = DemangleSymbol(symbolBackrefs); + _STD_STRING value = "{" + context.type.GetTypeAndName(m_varName); + + // Read adjustment number(s) — NOT $-prefixed, just raw numbers. + int adjustments = (kind == 'H') ? 1 : (kind == 'I') ? 2 : 3; + for (int i = 0; i < adjustments && reader.Length() > 0 && reader.Peek() != '@'; i++) + { + int64_t adj; + DemangleNumber(adj); + value += "," + to_string(adj); + } + value += "}"; + return DemangledTypeNode::ValueType(value); } else if (reader.Peek() == '1') { reader.Consume(); - auto context = DemangleSymbol(); - return TypeBuilder::PointerType(m_arch, context.type.Finalize()); + return DemangleReferencedSymbolValue(varList); } else throw DemangleException(); @@ -379,7 +527,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case '8': case '9': //Make a copy of the item in the backref list. Exit early since we don't want this added to the backref list. - m_logger->LogDebug("Backref %u %lu", elm - '0', varList.typeList.size()); + MSVC_TRACE("Backref %u %lu", elm - '0', varList.typeList.size()); return varList.GetTypeBackref(elm - '0'); default: throw DemangleException(); @@ -397,65 +545,51 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case '3': case '4': case '5': + case '7': + case '9': throw DemangleException(); case '6': { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } - reader.Consume(); - auto childType = DemangleFunction(NoNameType, false, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); - break; - } - case '7': //Function pointer - case '9': //Class Function pointer - { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } reader.Consume(); - auto childType = DemangleFunction(NoNameType, true, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); + auto childType = DemangleFunction(NoNameType, false, varList, NoneFunctionClass, false); + newType = DemangledTypeNode::PointerType(m_arch, + std::move(childType), + _const, + _volatile, + refType); break; } case '8': //Named class function pointer { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } reader.Consume(); - DemangleName(name, classFunctionType, varList); - name.push_back(""); - auto childType = DemangleFunction(NoNameType, true, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); + NameList ownerName; + DemangleName(ownerName, classFunctionType, varList, true); + RewriteTemplateBackrefName(ownerName, varList); + auto childType = DemangleFunction(NoNameType, true, varList, NoneFunctionClass, false); + newType = DemangledTypeNode::MemberPointerType(m_arch, + std::move(childType), + std::move(ownerName), + _const, + _volatile); break; } default: // Non-numeric { - m_logger->LogDebug("Demangle pointer subtype: '%s'\n", reader.GetRaw()); - TypeBuilder child; + MSVC_TRACE("Demangle pointer subtype: '%s'\n", reader.GetRaw()); + DemangledTypeNode child; bool _const2 = false, _volatile2 = false, isMember = false; + NameList ownerName; auto suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); DemangleModifiers(_const2, _volatile2, isMember); + if (isMember) + { + DemangleName(ownerName, classFunctionType, varList, true); + RewriteTemplateBackrefName(ownerName, varList); + } if (reader.Peek() == 'Y') //Multi-dimentional array { - m_logger->LogDebug("Demangle multi-dimentional array"); + MSVC_TRACE("Demangle multi-dimentional array"); int64_t nDimentions; reader.Consume(); DemangleNumber(nDimentions); @@ -466,69 +600,65 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali DemangleNumber(element); elementList.push_back(element); } - QualifiedName name; - //m_logger->Indent(); + NameList name; child = DemangleVarType(varList, false, name); - //m_logger->Dedent(); for (auto i = elementList.rbegin(); i != elementList.rend(); i++) { - child = TypeBuilder::ArrayType(child.Finalize(), *i); + child = DemangledTypeNode::ArrayType(std::move(child), *i); } } else { - QualifiedName name; - //m_logger->Indent(); - child = DemangleVarType(varList, true, name); - //m_logger->Dedent(); + NameList name; + child = DemangleVarType(varList, true, name, includeImplicitThis && !isMember); } child.SetConst(_const2); child.SetVolatile(_volatile2); - newType = TypeBuilder::PointerType(m_arch, - child.Finalize(), - _const, - _volatile, - refType); - - newType.SetPointerSuffix(suffix); - m_logger->LogDebug("Name: %s\n", newType.GetString().c_str()); + if (isMember) + { + newType = DemangledTypeNode::MemberPointerType( + m_arch, std::move(child), std::move(ownerName), _const, _volatile); + } + else + { + newType = DemangledTypeNode::PointerType(m_arch, + std::move(child), + _const, + _volatile, + refType); + } + + newType.SetPointerSuffixBits(suffix); + MSVC_TRACE("Name: %s\n", newType.GetString().c_str()); break; } } break; } case EnumerationTypeClass: - m_logger->LogDebug("Demangle enumeration\n"); - //m_logger->Indent(); - DemangleName(typeName, classFunctionType, varList); - //m_logger->Dedent(); - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(EnumNamedTypeClass, typeName), - width, width); + MSVC_TRACE("Demangle enumeration\n"); + DemangleName(typeName, classFunctionType, varList, true); + newType = DemangledTypeNode::NamedType(EnumNamedTypeClass, typeName, width, _enumSigned); break; case StructureTypeClass: - m_logger->LogDebug("Demangle structure\n"); - //m_logger->Indent(); - DemangleName(typeName, classFunctionType, varList); - //m_logger->Dedent(); + MSVC_TRACE("Demangle structure\n"); + DemangleName(typeName, classFunctionType, varList, true); + RewriteTemplateBackrefName(typeName, varList); switch (structType) { case ClassStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - ClassNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(ClassNamedTypeClass, typeName); break; case StructStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); break; case UnionStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnionNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(UnionNamedTypeClass, typeName); break; default: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnknownNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, typeName); break; } break; @@ -545,7 +675,7 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali void Demangle::DemangleNumber(int64_t& num) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); num = 0; int mult = 1; if (reader.Peek() == '?') @@ -580,7 +710,7 @@ void Demangle::DemangleNumber(int64_t& num) void Demangle::DemangleChar(char& ch) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); // Basic char is just the char if (reader.Peek() != '?') { @@ -593,7 +723,7 @@ void Demangle::DemangleChar(char& ch) // Hex char is ?$XX for 2 hex digits XX if (reader.Peek() == '$') { - m_logger->LogDebug("%s: Hex digit '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Hex digit '%s'\n", __FUNCTION__, reader.GetRaw()); reader.Consume(); char c1 = reader.Peek(); @@ -613,7 +743,7 @@ void Demangle::DemangleChar(char& ch) return; } - m_logger->LogDebug("%s: Table lookup '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Table lookup '%s'\n", __FUNCTION__, reader.GetRaw()); // Otherwise it's a lookup based on some big table // Thanks, LLVM! @@ -697,49 +827,95 @@ void Demangle::DemangleWideChar(uint16_t& wch) } -void Demangle::DemangleVariableList(vector& paramList, BackrefList& varList) +void Demangle::DemangleVariableList(vector& paramList, BackrefList& varList, bool typeBackrefs) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - set suffix; - for (size_t i = 0; reader.Peek() != 'Z'; i++) + uint8_t suffix = 0; + for (;;) { bool hasModifiers = false; + if (reader.Peek() == 'Z') + { + if (reader.Length() >= 2 && reader.PeekAt(1) == 'Z') + { + paramList.push_back({"", std::make_shared(DemangledTypeNode::VarArgsType())}); + reader.Consume(); + continue; + } + break; + } if (reader.Peek() == '@') { reader.Consume(); break; } + else if (reader.Length() >= 4 && reader.PeekMatch("$$$V", 4)) + { + // $$$V = empty expanded type / template-template pack (post-MSVC2015 mangling). + // See clang/lib/AST/MicrosoftMangle.cpp: for MSVC2015-compat this emits $$V, + // otherwise $$$V. + reader.Consume(4); + continue; + } + else if (reader.Length() >= 3 && (reader.PeekMatch("$$V", 3) || reader.PeekMatch("$$Z", 3))) + { + // $$V = empty expanded type / template-template pack (MSVC2015-compat mangling). + // $$Z = separator between two consecutive packs (emitted between non-empty packs, + // not as a lone template argument). LLVM's demangler leniently skips it in + // any position; we follow suit. + // NB: $$S is NOT emitted by any known toolchain - only $S (single $) is a real + // token, handled below. + reader.Consume(3); + continue; + } + else if (reader.Length() >= 2 && reader.PeekMatch("$S", 2)) + { + // $S = empty expanded non-type template pack + // (e.g. `template` or `template` instantiated with zero args). + reader.Consume(2); + continue; + } else if (reader.Peek() == '?') { reader.Consume(); suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); DemangleModifiers(_const, _volatile, isMember); hasModifiers = true; } - FunctionParameter vt; - QualifiedName name; - m_logger->LogDebug("Argument %d: %s", i, reader.GetRaw()); - //m_logger->Indent(); - TypeBuilder type = DemangleVarType(varList, false, name); - //m_logger->Dedent(); + NameList name; + MSVC_TRACE("Argument %zu: %s", paramList.size(), reader.GetRaw()); + size_t typeListSizeAtEntry = varList.typeList.size(); + DemangledTypeNode type = DemangleVarType(varList, false, name); + // Template argument lists may use temporary backrefs created while + // parsing an argument, and later arguments may refer to them. However, + // the completed top-level argument itself is not added as a later type + // backref in the same template argument list. DemangleVarType appends + // that completed type last, so preserve any intermediate entries and + // drop only the final top-level type. + if (!typeBackrefs && varList.typeList.size() > typeListSizeAtEntry) + varList.typeList.pop_back(); if (hasModifiers) { type.SetConst(_const); type.SetVolatile(_volatile); - type.SetPointerSuffix(suffix); + type.SetPointerSuffixBits(suffix); } - vt.name = name.GetString(); - vt.type = type.Finalize(); - vt.defaultLocation = true; - paramList.push_back(vt); - m_logger->LogDebug("Argument %zu: '%s' - '%s'\n", i, vt.type->GetString().c_str(), reader.GetRaw()); + DemangledTypeNode::Param vt; + if (name.size() == 1) + vt.name = std::move(name[0]); + else if (name.size() > 1) + vt.name = JoinNameList(name); + vt.type = std::make_shared(std::move(type)); + paramList.push_back(std::move(vt)); + MSVC_TRACE("Argument %zu: '%s' - '%s'\n", paramList.size() - 1, paramList.back().type->GetString().c_str(), reader.GetRaw()); } if (reader.Peek() == 'Z') reader.Consume(); - m_logger->LogDebug("%s: done '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: done '%s'\n", __FUNCTION__, reader.GetRaw()); } @@ -751,6 +927,12 @@ Demangle::NameType Demangle::GetNameType() if (reader.Peek()== '?') { reader.Consume(); + // Check for ??@ (MD5 hashed name) after consuming both ?s + if (reader.Peek() == '@') + { + reader.Consume(); // consume '@' + return NameString; // ReadUntil('@') will get the hash + } return GetNameType(); } else if (reader.Peek() == '$') @@ -773,16 +955,23 @@ Demangle::NameType Demangle::GetNameType() reader.Consume(); return NameReturn; } - else if (reader.PeekString(2) == "_R") + else if (reader.Length() >= 3 && reader.Peek() == 'A' && reader.PeekAt(1) == '0' && reader.PeekAt(2) == 'x') + { + reader.Consume(); + return NameAnonymousNamespace; + } + else if (reader.PeekMatch("_R", 2)) { reader.Consume(2); return NameRtti; } - // else if (reader.PeekString(3) == "__E") - // { - // reader.Consume(2); - // return NameDynamicInitializer; - // } + else if (reader.Peek() >= 'a' && reader.Peek() <= 'z') + { + // Lowercase after ? indicates a non-standard extension name + // (e.g., ??null$initializer$ for thread-safe static init guards) + // All standard MSVC operator codes use uppercase/digits/_ + return NameString; + } else { return NameLookup; @@ -806,27 +995,27 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, string& out) { - TypeBuilder rtti; + DemangledTypeNode rtti; switch (reader.Read()) { case '0': { - if (reader.Peek() != '?') - throw DemangleException(); - reader.Consume(); - bool _const = false, _volatile = false, isMember = false; - auto suffix = DemanglePointerSuffix(); - DemangleModifiers(_const, _volatile, isMember); + uint8_t suffix = 0; + if (reader.Peek() == '?') + { + reader.Consume(); + suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); + DemangleModifiers(_const, _volatile, isMember); + } - QualifiedName name; - //m_logger->Indent(); + NameList name; rtti = DemangleVarType(nameBackrefList, false, name); - //m_logger->Dedent(); rtti.SetConst(_const); rtti.SetVolatile(_volatile); - rtti.SetPointerSuffix(suffix); - out = rtti.GetString() + " `RTTI Type Descriptor' "; + rtti.SetPointerSuffixBits(suffix); + out = rtti.GetString() + " `RTTI Type Descriptor'"; classFunctionType = RttiTypeDescriptor; break; } @@ -838,7 +1027,7 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, DemangleNumber(num); if (i > 0) { - out += ","; + out += ", "; } out += to_string(num); } @@ -864,10 +1053,13 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); switch (reader.Read()) { case '?': functionType = NoNameType; break; + case '0': functionType = ConstructorNameType; break; + case '1': functionType = ConstructorNameType; out = "~"; break; // destructor + case 'B': functionType = OperatorReturnTypeNameType; out = "operator"; break; // conversion operator case '2': functionType = OperatorNewNameType; break; case '3': functionType = OperatorDeleteNameType; break; case '4': functionType = OperatorAssignNameType; break; @@ -903,7 +1095,7 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) case 'Z': functionType = OperatorMinusEqualNameType; break; case '_': { - m_logger->LogDebug(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); switch (reader.Read()) { case '0': functionType = OperatorDivideEqualNameType; break; @@ -942,20 +1134,41 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) case 'W': // Fallthrough case 'Z': functionType = NoNameType; break; case '_': - m_logger->LogDebug(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); switch (reader.Read()) { case 'A': functionType = ManagedVectorConstructorIteratorNameType; break; case 'B': functionType = ManagedVectorDestructorIteratorNameType; break; case 'C': functionType = EHVectorCopyConstructorIteratorNameType; break; - case 'D': functionType = EHVectorVBaseConstructorIteratorNameType; break; - case 'E': functionType = DynamicInitializerNameType; break; - case 'F': functionType = DynamicAtExitDestructorNameType; break; + // ??__D is the *copy* variant per LLVM (MicrosoftDemangle.cpp:701). + // Previously routed to EHVectorVBaseConstructorIteratorNameType + // (the non-copy enum used by ??_O), which dropped the "copy" word. + case 'D': functionType = EHVectorVBaseCopyConstructorIteratorNameType; break; + // ??__E and ??__F are not reached here — they're handled at the + // top level in DemangleSymbol, matching LLVM's special-intrinsic + // dispatch. See DemangleDynamicInitFini. + case 'E': // fall through — unreachable in practice + case 'F': functionType = (reader.PeekAt(-1) == 'E') ? DynamicInitializerNameType : DynamicAtExitDestructorNameType; break; case 'G': functionType = VectorCopyConstructorIteratorNameType; break; case 'H': functionType = VectorVBaseCopyConstructorIteratorNameType; break; case 'I': functionType = ManagedVectorCopyConstructorIteratorNameType; break; - case 'J': functionType = LocalStaticGuardNameType; break; - case 'K': functionType = UserDefinedLiteralOperatorNameType; break; + case 'J': functionType = LocalStaticThreadGuardNameType; break; + case 'K': + { + // User-defined literal operator: ??__K@ + // LLVM's demangleLiteralOperatorIdentifier consumes a simple + // string terminated by '@' as the literal suffix and renders it + // as `operator ""`. The outer DemangleName loop then + // picks up any enclosing scope chain as a normal prefix. + functionType = UserDefinedLiteralOperatorNameType; + _STD_STRING suffix = reader.ReadUntil('@'); + if (suffix.empty()) + throw DemangleException("??__K requires a non-empty literal suffix"); + out = "operator \"\"" + suffix; + break; + } + case 'L': functionType = NoNameType; out = "operator co_await"; break; + case 'M': functionType = NoNameType; out = "operator<=>"; break; // spaceship operator default: throw DemangleException("Demangle Lookup Failed"); // fall through } break; @@ -966,16 +1179,16 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) } default: throw DemangleException("Demangle Lookup Failed"); } - out = Type::GetNameTypeString(functionType); + if (out.empty()) + out = Type::GetNameTypeString(functionType); } string Demangle::DemangleTemplateInstantiationName(BackrefList& nameBackrefList) { string out; - BackrefList templateBackref; + MSVC_TRACE("DemangleTemplateInstantiationName: '%s'\n", reader.GetRaw()); reader.Consume(2); - m_logger->LogDebug("DemangleTemplateInstantiationName: '%s'\n", reader.GetRaw()); if (reader.Peek() >= '0' && reader.Peek() <= '9') { out = nameBackrefList.GetStringBackref(reader.Read() - '0'); @@ -989,48 +1202,104 @@ string Demangle::DemangleTemplateInstantiationName(BackrefList& nameBackrefList) } -string Demangle::DemangleTemplateParams(vector& params, BackrefList& nameBackrefList, string& out) +string Demangle::DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList) { - //m_logger->Indent(); - DemangleVariableList(params, nameBackrefList); - //m_logger->Dedent(); - m_logger->LogDebug("VariableList done\n"); - out += "<"; - for (size_t i = 0; i < params.size(); i++) + string out; + vector params; + BNNameType dummyFunctionType = NoNameType; + NameList dummyNameList; + BackrefList outerBackrefs; + bool backrefEligible = true; + MSVC_TRACE("DemangleTemplateInstantiationNameInLocalContext: '%s'\n", reader.GetRaw()); + + std::swap(outerBackrefs.typeList, nameBackrefList.typeList); + std::swap(outerBackrefs.nameList, nameBackrefList.nameList); + std::swap(outerBackrefs.templateList, nameBackrefList.templateList); + + try { - if (i == 0) - { - out += params[i].type->GetString(); - } - else - { - out += "," + params[i].type->GetString(); - } + reader.Consume(2); + out = DemangleUnqualifiedSymbolName(dummyNameList, nameBackrefList, dummyFunctionType, backrefEligible); + if (backrefEligible && dummyFunctionType == NoNameType) + nameBackrefList.PushStringBackref(out); + DemangleTemplateParams(params, nameBackrefList, out); + } + catch (...) + { + std::swap(outerBackrefs.typeList, nameBackrefList.typeList); + std::swap(outerBackrefs.nameList, nameBackrefList.nameList); + std::swap(outerBackrefs.templateList, nameBackrefList.templateList); + throw; } - if (out[out.size()-1] == '>') - out += " "; //Be c++03 compliant where we can - out += ">"; + std::swap(outerBackrefs.typeList, nameBackrefList.typeList); + std::swap(outerBackrefs.nameList, nameBackrefList.nameList); + std::swap(outerBackrefs.templateList, nameBackrefList.templateList); + + nameBackrefList.PushTemplateSpecialization(out); nameBackrefList.PushStringBackref(out); return out; } -// void Demangle::DemangleInitFiniStub(bool destructor, QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType) -// { -// bool isStatic = false; -// if (reader.Peek() == '?') -// { -// reader.Consume(); -// isStatic = true; -// } -// string out = DemangleUnqualifiedSymbolName(nameList, nameBackrefList, classFunctionType); -// } +void Demangle::DemangleTemplateParams(vector& params, BackrefList& nameBackrefList, string& out) +{ + params.clear(); + const bool nestedTemplateContext = (m_templateParamDepth > 0); + struct NameBackrefScopeGuard + { + BackrefList& backrefs; + size_t typeCount; + size_t nameCount; + ~NameBackrefScopeGuard() + { + backrefs.typeList.resize(typeCount); + backrefs.nameList.resize(nameCount); + } + }; + struct TemplateDepthGuard + { + size_t& depth; + TemplateDepthGuard(size_t& depth): depth(depth) { depth++; } + ~TemplateDepthGuard() { depth--; } + }; -string Demangle::DemangleUnqualifiedSymbolName(QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType) + { + TemplateDepthGuard depthGuard(m_templateParamDepth); + NameBackrefScopeGuard scopeGuard { + nameBackrefList, + nameBackrefList.typeList.size(), + nameBackrefList.nameList.size() + }; + + DemangleVariableList(params, nameBackrefList, false); + } + + // Reserve space to reduce reallocation during template string building + out.reserve(out.size() + params.size() * 16 + 2); + out += '<'; + for (size_t i = 0; i < params.size(); i++) + { + if (i > 0) + out += ','; + if (params[i].type) + params[i].type->AppendString(out); + } + if (out.back() == '>') + out += ' '; // C++03 compat: >> → > > + out += '>'; + nameBackrefList.PushTemplateSpecialization(out); + if (nestedTemplateContext) + nameBackrefList.PushStringBackref(out); +} + + +string Demangle::DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& nameBackrefList, + BNNameType& classFunctionType, bool& backrefEligible) { + backrefEligible = true; string out; - if (reader.PeekString(2) == "?$") + if (reader.PeekMatch("?$", 2)) { reader.Consume(2); out = DemangleTemplateInstantiationName(nameBackrefList); @@ -1040,6 +1309,9 @@ string Demangle::DemangleUnqualifiedSymbolName(QualifiedName& nameList, BackrefL { reader.Consume(); DemangleTypeNameLookup(out, classFunctionType); + // Lookup-based operator names are not normal identifier components and + // should not satisfy later scope backrefs such as strong_ordering@0@. + backrefEligible = false; } else if (reader.Peek() >= '0' && reader.Peek() <= '9') { @@ -1053,9 +1325,9 @@ string Demangle::DemangleUnqualifiedSymbolName(QualifiedName& nameList, BackrefL } -TypeBuilder Demangle::DemangleString() +DemangledTypeNode Demangle::DemangleString() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); // ??_C@_@ if (reader.Peek() != '_') { @@ -1068,6 +1340,8 @@ TypeBuilder Demangle::DemangleString() switch (reader.Peek()) { case '1': + case '2': // UTF-16/UTF-32 encoding variants + case '3': isWideChar = true; break; case '0': @@ -1087,7 +1361,7 @@ TypeBuilder Demangle::DemangleString() } uint64_t length = (uint64_t)lengthRaw; - m_logger->LogDebug("%s: Before CRC32 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Before CRC32 '%s'\n", __FUNCTION__, reader.GetRaw()); // CRC32 (ignored) while (reader.Peek() != '@') @@ -1100,18 +1374,24 @@ TypeBuilder Demangle::DemangleString() bool truncated = false; string name = ""; - TypeBuilder type; + string literalPrefix; + DemangledTypeNode type; // String bytes if (isWideChar) { - m_logger->LogDebug("%s: Wide string '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Wide string '%s'\n", __FUNCTION__, reader.GetRaw()); string utf8name; - truncated = (length > 64); + literalPrefix = "L"; + // Track the last wide char so we can detect missing null terminator. + uint16_t lastWch = 1; + size_t wcharCount = 0; while (reader.Peek() != '@') { uint16_t wch; DemangleWideChar(wch); + lastWch = wch; + wcharCount++; uint8_t chs[2]; chs[0] = wch & 0xFF; @@ -1122,12 +1402,18 @@ TypeBuilder Demangle::DemangleString() } reader.Consume(); + // MSVC string literals always mangle their trailing null. A payload + // that doesn't end in a wide null means the original was too long to + // fit in the mangling and was truncated. Matches LLVM's demangler. + if (wcharCount == 0 || lastWch != 0) + truncated = true; + name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); - type = Type::ArrayType(Type::WideCharType(2), length / 2); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(2), length / 2); } else { - m_logger->LogDebug("%s: Non-wide string '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Non-wide string '%s'\n", __FUNCTION__, reader.GetRaw()); uint64_t numNulls = 0; size_t endNulls = 0; vector chars; @@ -1152,57 +1438,61 @@ TypeBuilder Demangle::DemangleString() { truncated = true; } + // MSVC includes the trailing '\0' in the mangled payload. If the last + // byte isn't a null, the original string was truncated to fit the + // encoding's size limit — LLVM signals this with a `...` suffix. + if (!chars.empty() && chars.back() != 0) + truncated = true; // Now time to guess encoding if (chars.size() % 1 != 0) { - m_logger->LogDebug("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); - type = Type::ArrayType(Type::IntegerType(1, true), length); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::IntegerType(1, true), length); } else { if (chars.size() % 4 == 0 && numNulls > length * 2 / 3) { - m_logger->LogDebug("%s: Looks like UTF32 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Looks like UTF32 '%s'\n", __FUNCTION__, reader.GetRaw()); string utf8name; for (size_t i = 0; i < chars.size() - endNulls; i += 4) { utf8name += Unicode::UTF32ToUTF8(chars.data() + i); } name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); - type = Type::ArrayType(Type::WideCharType(4), length / 4); + literalPrefix = "U"; + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(4), length / 4); } else if (numNulls > length / 3) { - m_logger->LogDebug("%s: Looks like UTF16 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Looks like UTF16 '%s'\n", __FUNCTION__, reader.GetRaw()); string utf8name; for (size_t i = 0; i < chars.size() - endNulls; i += 2) { utf8name += Unicode::UTF16ToUTF8(chars.data() + i, 2); } name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); - type = Type::ArrayType(Type::WideCharType(2), length / 2); + literalPrefix = "L"; + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(2), length / 2); } else { - m_logger->LogDebug("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); - type = Type::ArrayType(Type::IntegerType(1, true), length); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::IntegerType(1, true), length); } } } - if (truncated) - { - name += "..."; - } - m_varName.push_back(name); + m_varName.clear(); + m_varName.push_back(fmt::bnformat("{}\"{}\"{}", literalPrefix, name, truncated ? "..." : "")); return type; } -TypeBuilder Demangle::DemangleTypeInfoName() +DemangledTypeNode Demangle::DemangleTypeInfoName() { if (reader.Read() != '?') throw DemangleException("Unknown raw name type"); @@ -1211,106 +1501,364 @@ TypeBuilder Demangle::DemangleTypeInfoName() bool isMember = false; DemangleModifiers(_const, _volatile, isMember); - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - QualifiedName name; - TypeBuilder type = DemangleVarType(m_backrefList, false, name); + NameList name; + DemangledTypeNode type = DemangleVarType(m_backrefList, false, name); type.SetConst(_const); type.SetVolatile(_volatile); switch (type.GetClass()) { case NamedTypeReferenceClass: - m_varName = type.GetNamedTypeReference()->GetName(); - return type; + { + // Match LLVM's demangler: a raw type-info name (.?A...) renders as + // ` `RTTI Type Descriptor Name''`. Bake the type + // keyword + name into the symbol's qualified name via m_varName, + // then return a fresh NamedType marked RttiTypeDescriptor so BN's + // core type formatter skips its own class/struct prefix — this + // mirrors the treatment of ??_R0 in DemangleNameTypeRtti case '0'. + string rendered = type.GetString() + " `RTTI Type Descriptor Name'"; + m_varName = { rendered }; + DemangledTypeNode newType = DemangledTypeNode::NamedType( + StructNamedTypeClass, + NameList(type.GetTypeName().begin(), type.GetTypeName().end())); + newType.SetNameType(RttiTypeDescriptor); + return newType; + } default: throw DemangleException("Unexpected type of RTTI Type Name"); } } -void Demangle::DemangleName(QualifiedName& nameList, +void Demangle::DemangleName(NameList& nameList, BNNameType& classFunctionType, - BackrefList& nameBackrefList) + BackrefList& nameBackrefList, + bool typeNameContext) { + size_t nameListSizeAtEntry = nameList.size(); + bool pendingConstructorTemplateName = false; + + auto finalizeConstructorTemplateName = [&]() { + if (!pendingConstructorTemplateName) + return; + + if (nameList.size() <= nameListSizeAtEntry + 1) + throw DemangleException("Constructor template missing class scope"); + + string& constructorTemplateName = nameList.back(); + if (constructorTemplateName.empty() || constructorTemplateName[0] != '<') + throw DemangleException("Invalid constructor template name"); + + // `??$?0...@Class@@` is a templated constructor. LLVM models `?0` as a + // structor identifier and attaches the parsed enclosing class to it after + // the qualified name is complete; Wine's undname does the same as a string + // post-process. We store names as strings, so apply that post-process here: + // `?0` becomes `Class`. + constructorTemplateName = nameList[nameList.size() - 2] + constructorTemplateName; + }; + + auto tryDemangleEscapedLookupScopeName = [&]() -> bool + { + if (nameList.size() <= nameListSizeAtEntry) + return false; + + const char* start = reader.GetRaw(); + if (reader.Length() < 4) + return false; + + char prefix = start[0]; + if (!((prefix >= 'A' && prefix <= 'Z') || (prefix == '_'))) + return false; + if (start[1] == '@' || start[1] == '?') + return false; + + const char* limit = start + reader.Length(); + const char* end = nullptr; + for (const char* cur = start + 1; (cur + 1) < limit; cur++) + { + if ((cur[0] == '@') && (cur[1] == '@')) + { + end = cur; + break; + } + } + if (!end) + return false; + + vector escapedNames; + const char* componentStart = start; + while (componentStart < end) + { + const char* componentEnd = componentStart; + while ((componentEnd < end) && (*componentEnd != '@')) + { + char ch = *componentEnd; + if (ch == '?') + return false; + if (!((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z') || (ch == '_') || (ch == '$'))) + { + return false; + } + componentEnd++; + } + if (componentEnd == componentStart) + return false; + + escapedNames.emplace_back(componentStart, componentEnd - componentStart); + componentStart = componentEnd + 1; + } + + for (const auto& escapedName: escapedNames) + { + nameList.insert(nameList.begin(), escapedName); + nameBackrefList.PushStringBackref(escapedName); + } + reader.SetRaw(end + 2); + return true; + }; + + auto decodeEncodedNumber = [&](const string& encoded) -> int64_t + { + if (encoded.empty()) + throw DemangleException("Empty encoded number"); + + size_t offset = 0; + int mult = 1; + if (encoded[offset] == '?') + { + mult = -1; + offset++; + } + if (offset >= encoded.size()) + throw DemangleException("Truncated encoded number"); + + if (encoded[offset] >= '0' && encoded[offset] <= '9') + { + if (offset + 1 != encoded.size()) + throw DemangleException("Decimal encoded number has trailing characters"); + return mult * (encoded[offset] + 1 - '0'); + } + + int64_t num = 0; + for (; offset < encoded.size(); offset++) + { + char a = encoded[offset]; + num *= 16; + if (a >= 'A' && a <= 'P') + num += a - 'A'; + else + throw DemangleException("Invalid encoded hex digit"); + } + return num * mult; + }; + + auto functionTypeHasPointerSuffix = [&](char ft) -> bool + { + return ft != 'C' && ft != 'D' && ft != 'K' && ft != 'L' + && ft != 'S' && ft != 'T' && ft != 'Y' && ft != 'Z'; + }; + + auto formatFunctionScopeSignature = [&](const DemangledTypeNode& type, const NameList& scopeName) -> string + { + string out = type.GetTypeAndName(scopeName); + while (!out.empty() && out.back() == ' ') + out.pop_back(); + return out; + }; + + auto appendLocalScope = [&](int64_t scopeOrdinal) -> void + { + NameList scopeName; + BNNameType scopeFunctionType = NoNameType; + DemangleName(scopeName, scopeFunctionType, nameBackrefList, typeNameContext); + + if (reader.Length() == 0) + throw DemangleException("Missing local scope function encoding"); + + char ft = reader.Read(); + if (ft < 'A' || ft > 'Z') + throw DemangleException("Invalid local scope function encoding"); + + DemangledTypeNode scopeType = DemangleFunction( + scopeFunctionType, functionTypeHasPointerSuffix(ft), nameBackrefList); + + nameList.insert(nameList.begin(), "`" + to_string(scopeOrdinal) + "'"); + nameList.insert(nameList.begin(), "`" + formatFunctionScopeSignature(scopeType, scopeName) + "'"); + }; + string out; BNNameType functionType; BNNameType dummyFunctionType; - vector params; + vector params; while(1) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); switch (GetNameType()) { case NameString: - m_logger->LogDebug("Demangle String\n"); + MSVC_TRACE("Demangle String\n"); DemangleNameTypeString(out); nameList.insert(nameList.begin(), out); - m_logger->LogDebug("Pushing backref NameString %s", out.c_str()); + MSVC_TRACE("Pushing backref NameString %s", out.c_str()); nameBackrefList.PushStringBackref(out); - m_logger->LogDebug("nameList.front(): %s\n", nameList.front().c_str()); - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("nameList.front(): %s\n", nameList.front().c_str()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); break; case NameLookup: - m_logger->LogDebug("Demangle Lookup\n"); + { + MSVC_TRACE("Demangle Lookup\n"); + if (nameList.size() > nameListSizeAtEntry) + { + const char* saved = reader.GetRaw(); + try + { + int64_t scopeOrdinal; + DemangleNumber(scopeOrdinal); + if (reader.Length() >= 2 && reader.Peek() == '?' && reader.PeekAt(1) == '?') + { + appendLocalScope(scopeOrdinal); + break; + } + } + catch (...) + { + } + reader.SetRaw(saved); + } + if (tryDemangleEscapedLookupScopeName()) + break; DemangleTypeNameLookup(out, functionType); classFunctionType = functionType; nameList.insert(nameList.begin(), out); + // Check if this is a scope specifier. Scope specifiers are ? + // followed by either @?? or directly ?? (for digit scopes like ?3??func@...) + // When nameList has prior components, the operator name is actually a scope index + // Also handle dynamic init/dtor wrapping ??@ (MD5 hash) + if (reader.Length() >= 4 && reader.PeekMatch("??@", 3)) + { + reader.Consume(3); // consume ??@ + _STD_STRING hash = reader.ReadUntil('@'); + nameList.insert(nameList.begin(), "??@" + hash + "@"); + // Consume the trailing @ (name terminator) — the ??@hash@ pattern + // is followed by @@ (end of scoped name) before the function type + if (reader.Length() > 0 && reader.Peek() == '@') + reader.Consume(); + break; + } + break; + } + case NameAnonymousNamespace: + { + DemangleNameTypeString(out); // discard compiler-generated hash + nameList.insert(nameList.begin(), "`anonymous namespace'"); break; + } case NameBackref: - m_logger->LogDebug("Demangle Backref"); + MSVC_TRACE("Demangle Backref"); out = nameBackrefList.GetStringBackref(reader.Read() - '0'); - m_logger->LogDebug("Demangle Backref: %s", out.c_str()); + MSVC_TRACE("Demangle Backref: %s", out.c_str()); nameList.insert(nameList.begin(), out); break; case NameTemplate: { - m_logger->LogDebug("Demangle Template: '%s'\n", reader.GetRaw()); - BackrefList templateBackref; - out = DemangleUnqualifiedSymbolName(nameList, templateBackref, functionType); - m_logger->LogDebug("Pushing backref NameTemplate %s", out.c_str()); - templateBackref.PushStringBackref(out); - m_logger->LogDebug("Demangling Template variables %s\n", reader.GetRaw()); - DemangleTemplateParams(params, templateBackref, out); + MSVC_TRACE("Demangle Template: '%s'\n", reader.GetRaw()); + BNNameType functionType = NoNameType; + bool backrefEligible = true; + if (typeNameContext || (m_templateParamDepth > 0) || (nameList.size() > nameListSizeAtEntry)) + { + const char* saved = reader.GetRaw(); + reader.SetRaw(saved - 2); + out = DemangleTemplateInstantiationNameInLocalContext(nameBackrefList); + } + else + { + out = DemangleUnqualifiedSymbolName(nameList, nameBackrefList, functionType, backrefEligible); + if (backrefEligible && functionType == NoNameType) + { + MSVC_TRACE("Pushing backref NameTemplate %s", out.c_str()); + nameBackrefList.PushStringBackref(out); + } + MSVC_TRACE("Demangling Template variables %s\n", reader.GetRaw()); + DemangleTemplateParams(params, nameBackrefList, out); + if (functionType == ConstructorNameType) + { + classFunctionType = ConstructorNameType; + pendingConstructorTemplateName = true; + } + } nameList.insert(nameList.begin(), out); - nameBackrefList.PushStringBackref(out); break; } case NameConstructor: - m_logger->LogDebug("NameConstructor\n"); - classFunctionType = ConstructorNameType; - DemangleName(nameList, dummyFunctionType, nameBackrefList); + { + MSVC_TRACE("NameConstructor\n"); + bool isScope = (nameList.size() > nameListSizeAtEntry); + if (!isScope) + classFunctionType = ConstructorNameType; + if (isScope) + { + appendLocalScope(1); + break; + } + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); if (nameList.size() == 0) throw DemangleException(); nameList.push_back(nameList[nameList.size()-1]); return; + } case NameDestructor: - classFunctionType = ConstructorNameType; - m_logger->LogDebug("NameDestructor\n"); - DemangleName(nameList, dummyFunctionType, nameBackrefList); + { + MSVC_TRACE("NameDestructor\n"); + bool isScope = (nameList.size() > nameListSizeAtEntry); + if (!isScope) + classFunctionType = ConstructorNameType; + if (isScope) + { + appendLocalScope(2); + break; + } + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); if (nameList.size() == 0) throw DemangleException(); nameList.push_back("~" + nameList[nameList.size()-1]); return; + } case NameRtti: - m_logger->LogDebug("NameRtti\n"); + MSVC_TRACE("NameRtti\n"); DemangleNameTypeRtti(classFunctionType, nameBackrefList, out); nameList.insert(nameList.begin(), out); break; - // case NameDynamicInitializer: - // m_logger->LogDebug("NameDynamicInitializer\n"); - // DemangleInitFiniStub(false); - // break; - // case NameDynamicAtExitDestructor: - // m_logger->LogDebug("NameDynamicAtExitDestructor\n"); - // DemangleInitFiniStub(false); - // break; case NameReturn: - m_logger->LogDebug("NameReturn\n"); + { + MSVC_TRACE("NameReturn\n"); + if (nameList.size() > nameListSizeAtEntry && reader.Length() >= 1) + { + const char* saved = reader.GetRaw(); + try + { + _STD_STRING scopeSuffix; + DemangleNameTypeString(scopeSuffix); + if (reader.Length() >= 2 && reader.Peek() == '?' && reader.PeekAt(1) == '?') + { + appendLocalScope(decodeEncodedNumber("B" + scopeSuffix)); + break; + } + } + catch (...) + { + } + reader.SetRaw(saved); + } classFunctionType = OperatorReturnTypeNameType; - if (reader.PeekString(2) == "?$") + if (reader.PeekMatch("?$", 2)) { - out = DemangleTemplateInstantiationName(nameBackrefList); + if (m_templateParamDepth > 0) + out = DemangleTemplateInstantiationNameInLocalContext(nameBackrefList); + else + out = DemangleTemplateInstantiationName(nameBackrefList); DemangleTemplateParams(params, nameBackrefList, out); } else @@ -1320,58 +1868,26 @@ void Demangle::DemangleName(QualifiedName& nameList, } nameList.insert(nameList.begin(), out); break; + } default: throw DemangleException(); } - if (nameList.StringSize() > MAX_DEMANGLE_LENGTH) + if (NameListStringSize(nameList) > MAX_DEMANGLE_LENGTH) throw DemangleException(); if (reader.Peek() == '@') { reader.Consume(); + finalizeConstructorTemplateName(); return; } } } -Ref Demangle::GetCallingConventionForType(BNCallingConventionName ccName) -{ - string name; - switch (ccName) - { - case NoCallingConvention: name = ""; break; - case CdeclCallingConvention: name = "cdecl"; break; - case PascalCallingConvention: name = "pascal"; break; - case ThisCallCallingConvention: name = "thiscall"; break; - case STDCallCallingConvention: name = "stdcall"; break; - case FastcallCallingConvention: name = "fastcall"; break; - case CLRCallCallingConvention: name = "clrcall"; break; - case EabiCallCallingConvention: name = "eabi"; break; - case VectorCallCallingConvention: name = "vectorcall"; break; - case SwiftCallingConvention: name = "swiftcall"; break; - case SwiftAsyncCallingConvention: name = "swiftasync"; break; - default: break; - } - if (m_platform) - { - for (const auto& cc : m_platform->GetCallingConventions()) - { - if (cc->GetName() == name) - return cc; - } - } - - for (const auto& cc : m_arch->GetCallingConventions()) - { - if (cc->GetName() == name) - return cc; - } - return nullptr; -} BNCallingConventionName Demangle::DemangleCallingConvention() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); switch (reader.Read()) { case 'A': //Exported function @@ -1397,10 +1913,50 @@ BNCallingConventionName Demangle::DemangleCallingConvention() } } -set Demangle::DemanglePointerSuffix() + +Ref Demangle::ResolveCallingConvention(BNCallingConventionName cc) const +{ + switch (cc) + { + case CdeclCallingConvention: + if (m_platform && m_platform->GetCdeclCallingConvention()) + return m_platform->GetCdeclCallingConvention(); + if (m_arch && m_arch->GetCdeclCallingConvention()) + return m_arch->GetCdeclCallingConvention(); + return m_arch ? m_arch->GetCallingConventionByName("cdecl") : nullptr; + case STDCallCallingConvention: + if (m_platform && m_platform->GetStdcallCallingConvention()) + return m_platform->GetStdcallCallingConvention(); + if (m_arch && m_arch->GetStdcallCallingConvention()) + return m_arch->GetStdcallCallingConvention(); + return m_arch ? m_arch->GetCallingConventionByName("stdcall") : nullptr; + case FastcallCallingConvention: + if (m_platform && m_platform->GetFastcallCallingConvention()) + return m_platform->GetFastcallCallingConvention(); + if (m_arch && m_arch->GetFastcallCallingConvention()) + return m_arch->GetFastcallCallingConvention(); + return m_arch ? m_arch->GetCallingConventionByName("fastcall") : nullptr; + case ThisCallCallingConvention: + if (m_arch) + return m_arch->GetCallingConventionByName("thiscall"); + return nullptr; + default: + return nullptr; + } +} + + +void Demangle::ConsumeExtendedModifierPrefix() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - set suffix; + while (reader.PeekMatch("$A", 2)) + reader.Consume(2); +} + + +uint8_t Demangle::DemanglePointerSuffix() +{ + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + uint8_t suffix = 0; if (reader.Peek() == '@') return suffix; @@ -1408,15 +1964,15 @@ set Demangle::DemanglePointerSuffix() for (int i = 0; i < 5; i++, elm = reader.Peek()) { if (elm == 'E') - suffix.insert(suffix.end(), Ptr64Suffix); + suffix |= (1u << Ptr64Suffix); else if (elm == 'F') - suffix.insert(suffix.end(), UnalignedSuffix); + suffix |= (1u << UnalignedSuffix); else if (elm == 'G') - suffix.insert(suffix.end(), ReferenceSuffix); + suffix |= (1u << ReferenceSuffix); else if (elm == 'H') - suffix.insert(suffix.end(), LvalueSuffix); + suffix |= (1u << LvalueSuffix); else if (elm == 'I') - suffix.insert(suffix.end(), RestrictSuffix); + suffix |= (1u << RestrictSuffix); else break; reader.Consume(1); @@ -1426,7 +1982,7 @@ set Demangle::DemanglePointerSuffix() void Demangle::DemangleModifiers(bool& _const, bool& _volatile, bool &isMember) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); if (reader.Peek() == '@') return; @@ -1492,12 +2048,13 @@ void Demangle::DemangleModifiers(bool& _const, bool& _volatile, bool &isMember) } -TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& nameBackrefList, int funcClass) +DemangledTypeNode Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& nameBackrefList, + int funcClass, bool includeImplicitThis) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - set suffix; - TypeBuilder returnType; + uint8_t suffix = 0; + DemangledTypeNode returnType; BNCallingConventionName cc; //Demangle adjustor which we don't do anything with for now @@ -1519,6 +2076,9 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe DemangleNumber(vbOffsetOffset); DemangleNumber(vtorDispOffset); DemangleNumber(staticOffset); + vbptrOffset = SignExtendInt32(vbptrOffset); + vbOffsetOffset = SignExtendInt32(vbOffsetOffset); + vtorDispOffset = SignExtendInt32(vtorDispOffset); m_varName.back() += "`vtordispex{" + to_string(vbptrOffset) + ", " + to_string(vbOffsetOffset) + ", " + to_string(vtorDispOffset) + ", " + to_string(staticOffset) + "}'"; } else @@ -1527,6 +2087,7 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe int64_t staticOffset; DemangleNumber(vtorDispOffset); DemangleNumber(staticOffset); + vtorDispOffset = SignExtendInt32(vtorDispOffset); m_varName.back() += "`vtordisp{" + to_string(vtorDispOffset) + ", " + to_string(staticOffset) + "}'"; } } @@ -1534,6 +2095,7 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe if (pointerSuffix) { suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); DemangleModifiers(_const, _volatile, isMember); } if (reader.Peek() == '?') @@ -1545,13 +2107,13 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe //No return type shouldHaveReturnType = false; reader.Consume(); - m_logger->LogDebug("Function has no return type %s", reader.GetRaw()); + MSVC_TRACE("Function has no return type %s", reader.GetRaw()); } else { //Demangle function return type bool return_const = false, return_volatile = false, isMember = false; - set return_suffix; + uint8_t return_suffix = 0; bool hasModifiers = false; //Check for modifiers before return type if (reader.Peek() == '?') @@ -1562,133 +2124,112 @@ TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointe hasModifiers = true; } - QualifiedName name; - m_logger->LogDebug("Demangle function return type %s", reader.GetRaw()); - //m_logger->Indent(); + NameList name; + MSVC_TRACE("Demangle function return type %s", reader.GetRaw()); returnType = DemangleVarType(nameBackrefList, true, name); - m_logger->LogDebug("Return type: %s", returnType.GetString().c_str()); - //m_logger->Dedent(); + MSVC_TRACE("Return type: %s", returnType.GetString().c_str()); + // '...' (varargs) is only legal as the trailing parameter marker, + // never as a return type. Reject so we don't build a bogus type. + if (returnType.GetClass() == VarArgsTypeClass) + throw DemangleException("Varargs ('Z') is not a valid function return type"); if (hasModifiers) { returnType.SetConst(return_const); returnType.SetVolatile(return_volatile); - returnType.SetPointerSuffix(return_suffix); + returnType.SetPointerSuffixBits(return_suffix); } } if (reader.Peek() == '@') reader.Consume(); - m_logger->LogDebug("\tDemangle Function Parameters %s", reader.GetRaw()); - vector params; - bool needsThisPtr = false; - if (cc == ThisCallCallingConvention) - { - needsThisPtr = true; - } - if (funcClass != NoneFunctionClass) - { - if ((funcClass & VirtualFunctionClass) == VirtualFunctionClass - || (funcClass & StaticThunkFunctionClass) == StaticThunkFunctionClass - || (funcClass & VirtualThunkFunctionClass) == VirtualThunkFunctionClass) - { - needsThisPtr = true; - } - else if ((funcClass & StaticFunctionClass) != StaticFunctionClass - && (funcClass & GlobalFunctionClass) != GlobalFunctionClass) - { - needsThisPtr = true; - } - } - - if (needsThisPtr) - { - // Insert implicit "this" parameter for thiscall - // TODO: Replace this with calling convention / platform callbacks to insert thisptr (ask rss) - QualifiedName thisName = m_varName; - if (thisName.size() > 0) - thisName.erase(thisName.end() - 1); - params.push_back(FunctionParameter("this", Type::PointerType(m_arch, Type::NamedType(thisName, Type::VoidType())), true, {})); - } + MSVC_TRACE("\tDemangle Function Parameters %s", reader.GetRaw()); + vector params; + bool needsThisPtr = includeImplicitThis + && funcClass != NoneFunctionClass + && (funcClass & StaticFunctionClass) != StaticFunctionClass + && (funcClass & GlobalFunctionClass) != GlobalFunctionClass; - DemangleVariableList(params, m_backrefList); + DemangleVariableList(params, nameBackrefList); - if (params.size() >= 1 && params.back().type->GetClass() == VoidTypeClass) + if (params.size() >= 1 && params.back().type && params.back().type->GetClass() == VoidTypeClass) params.pop_back(); - // TODO: fix calling convention - Ref returnTypeObj; - if (shouldHaveReturnType) - returnTypeObj = returnType.Finalize(); - else - returnTypeObj = Type::VoidType(); - TypeBuilder newType = TypeBuilder::FunctionType(returnTypeObj, nullptr, params); + if (!shouldHaveReturnType) + returnType = DemangledTypeNode::VoidType(); + DemangledTypeNode newType = DemangledTypeNode::FunctionType(std::move(returnType), nullptr, std::move(params)); newType.SetConst(_const); newType.SetVolatile(_volatile); - newType.SetPointerSuffix(suffix); + newType.SetPointerSuffixBits(suffix); newType.SetNameType(classFunctionType); newType.SetCallingConventionName(cc); - auto convention = GetCallingConventionForType(cc); - if (convention) - newType.SetCallingConvention(convention); + if (auto callingConvention = ResolveCallingConvention(cc)) + newType.SetCallingConvention(callingConvention); + if (needsThisPtr) + { + NameList thisName = m_varName; + if (classFunctionType != OperatorReturnTypeNameType && !thisName.empty()) + thisName.pop_back(); + auto thisNamedType = DemangledTypeNode::NamedType(TypedefNamedTypeClass, std::move(thisName)); + newType.SetImplicitThisParameter(DemangledTypeNode::PointerType( + m_arch, std::move(thisNamedType), false, false, PointerReferenceType)); + } - m_logger->LogDebug("Successfully Created Function Type!\n"); + MSVC_TRACE("Successfully Created Function Type!\n"); return newType; } -TypeBuilder Demangle::DemangleData() +DemangledTypeNode Demangle::DemangleData(BackrefList& varList) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - QualifiedName name; - //m_logger->Indent(); - TypeBuilder newType = DemangleVarType(m_backrefList, false, name); - //m_logger->Dedent(); + NameList name; + DemangledTypeNode newType = DemangleVarType(varList, false, name); auto suffix = DemanglePointerSuffix(); DemangleModifiers(_const, _volatile, isMember); - newType.SetConst(_const); - newType.SetVolatile(_volatile); - newType.SetPointerSuffix(suffix); + if (newType.GetClass() != PointerTypeClass) + { + newType.SetConst(_const); + newType.SetVolatile(_volatile); + newType.SetPointerSuffixBits(suffix); + } return newType; } -TypeBuilder Demangle::DemanagleRTTI(BNNameType nameType) +DemangledTypeNode Demangle::DemanagleRTTI(BNNameType nameType) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; if (reader.Length() > 0) DemangleModifiers(_const, _volatile, isMember); - QualifiedName typeName = m_varName; - m_logger->LogDebug("new struct type\n"); - TypeBuilder newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + NameList typeName = m_varName; + MSVC_TRACE("new struct type\n"); + DemangledTypeNode newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); newType.SetNameType(nameType); newType.SetConst(_const); newType.SetVolatile(_volatile); - m_logger->LogDebug("log: %s\n", newType.GetString().c_str()); + MSVC_TRACE("log: %s\n", newType.GetString().c_str()); return newType; } -TypeBuilder Demangle::DemangleVTable() +DemangledTypeNode Demangle::DemangleVTable(BackrefList& nameBackrefList) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; DemangleModifiers(_const, _volatile, isMember); - TypeBuilder newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, m_varName)); + DemangledTypeNode newType = DemangledTypeNode::NamedType(StructNamedTypeClass, m_varName); if (reader.Peek() != '@') { - QualifiedName typeName; + NameList typeName; BNNameType classFunctionType = NoNameType; - DemangleName(typeName, classFunctionType, m_backrefList); + DemangleName(typeName, classFunctionType, nameBackrefList, true); string suffix = m_varName.back(); - m_varName.back() += "{for `" + typeName.GetString() + "'}"; + m_varName.back() += "{for `" + JoinNameList(typeName) + "'}"; typeName.push_back(suffix); - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); } newType.SetConst(_const); newType.SetVolatile(_volatile); @@ -1697,13 +2238,151 @@ TypeBuilder Demangle::DemangleVTable() } +// ??__E (dynamic initializer) / ??__F (dynamic atexit destructor). +// +// LLVM dispatches these at the top level via demangleSpecialIntrinsic --> +// demangleInitFiniStub. The mangling wraps another symbol (either a variable +// or a function) and emits a new function stub that initializes/destroys it: +// +// ??__E function form, e.g. ??__Efoo@@YAXXZ +// ??__E?@@ variable form, e.g. ??__E?foo@@3HA@@YAXXZ +// +// LLVM's output places the descriptor (`dynamic initializer for ''`) +// at file scope — not as a member of the target's enclosing class — and +// interpolates the target name inside backticks/quotes. For the variable +// form, it additionally renders the variable's type inside the inner +// backtick pair: `dynamic initializer for `int foo''. +Demangle::DemangleContext Demangle::DemangleDynamicInitFini(bool isDtor, BackrefList& backrefList) +{ + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + + // /d2FH4 may replace a long wrapped target with an MD5 name (??@@). + // Parse it before the optional '?' marker below; otherwise the first '?' + // of the hash spelling is mistaken for IsKnownStaticDataMember. + NameList innerNameList; + BNNameType innerClassFunctionType = NoNameType; + bool isMD5Name = false; + if (reader.Length() >= 3 && reader.PeekMatch("??@", 3)) + { + reader.Consume(3); + _STD_STRING hash = reader.ReadUntil('@'); + innerNameList.push_back("??@" + hash + "@"); + isMD5Name = true; + } + + // Optional leading '?' flags the "known static data member" form. LLVM + // calls this IsKnownStaticDataMember — when present, the mangling is + // required to carry two trailing '@' before the outer function encoding + // rather than one. + bool isKnownStaticDataMember = false; + if (!isMD5Name && reader.Length() > 0 && reader.Peek() == '?') + { + reader.Consume(); + isKnownStaticDataMember = true; + } + + // Parse the inner symbol's qualified name exactly as any other symbol + // would. DemangleName handles locally-scoped pieces, anonymous namespaces, + // templates, etc. so a target like + // instance@?1??Get@Globals@@SAAEAU1@XZ@ + // resolves correctly. + if (!isMD5Name) + DemangleName(innerNameList, innerClassFunctionType, backrefList); + + const char* prefix = isDtor + ? "`dynamic atexit destructor for " + : "`dynamic initializer for "; + BNNameType classFunctionType = isDtor + ? DynamicAtExitDestructorNameType + : DynamicInitializerNameType; + + _STD_STRING descriptor; + + if (reader.Length() == 0) + throw DemangleException("Truncated ??__E/??__F"); + + char next = reader.Peek(); + if (next >= '0' && next <= '4') + { + // Variable form: <@-terminators> + // . We don't attach the storage class to + // anything — it exists only to disambiguate variable-vs-function + // inside the wrapper and to match the mangling grammar. + reader.Consume(); // storage class + DemangledTypeNode varType = DemangleData(backrefList); + _STD_STRING varTypeStr = varType.GetString(); + _STD_STRING innerJoined = JoinNameList(innerNameList); + descriptor = _STD_STRING(prefix) + "`" + varTypeStr + " " + innerJoined + "''"; + + // Consume the @-terminators between the inner variable encoding and + // the outer function encoding. LLVM requires two when the optional + // leading '?' was present, one otherwise. + int atCount = isKnownStaticDataMember ? 2 : 1; + for (int i = 0; i < atCount; i++) + { + if (reader.Length() == 0 || reader.Read() != '@') + throw DemangleException("Expected '@' terminator in ??__E/??__F variable form"); + } + } + else + { + // Function form: the inner symbol's function encoding follows + // directly. The outer stub reuses that encoding (there's no separate + // outer signature). + if (isKnownStaticDataMember) + throw DemangleException("??__E/??__F with leading '?' but no variable form"); + if (isMD5Name) + { + while (reader.Length() > 0 && reader.Peek() == '@') + reader.Consume(); + } + _STD_STRING innerJoined = JoinNameList(innerNameList); + descriptor = _STD_STRING(prefix) + "'" + innerJoined + "''"; + } + + // Replace the symbol's qualified name with just the descriptor — this is + // what puts the output at file scope with no enclosing class prefix. + m_varName = { descriptor }; + + // Parse the outer function encoding. MSVC emits a global cdecl stub + // ('Y'/'Z') in practice but we dispatch through the full table for + // robustness (private/public/static/etc.). + if (reader.Length() == 0) + throw DemangleException("Truncated ??__E/??__F outer function encoding"); + char funcType = reader.Read(); + switch (funcType) + { + case 'A': return { DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; + case 'B': return { DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; + case 'C': return { DemangleFunction(classFunctionType, false, backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; + case 'D': return { DemangleFunction(classFunctionType, false, backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; + case 'I': return { DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; + case 'J': return { DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; + case 'K': return { DemangleFunction(classFunctionType, false, backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; + case 'L': return { DemangleFunction(classFunctionType, false, backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; + case 'Q': return { DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass), PublicAccess, NoScope }; + case 'R': return { DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass), PublicAccess, NoScope }; + case 'S': return { DemangleFunction(classFunctionType, false, backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; + case 'T': return { DemangleFunction(classFunctionType, false, backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; + case 'Y': return { DemangleFunction(classFunctionType, false, backrefList, GlobalFunctionClass), NoAccess, NoScope }; + case 'Z': return { DemangleFunction(classFunctionType, false, backrefList, GlobalFunctionClass), NoAccess, NoScope }; + default: + throw DemangleException(_STD_STRING("Unexpected outer function type '") + funcType + "' in ??__E/??__F"); + } +} + Demangle::DemangleContext Demangle::DemangleSymbol() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - //m_logger->Indent(); + return DemangleSymbol(m_backrefList); +} + + +Demangle::DemangleContext Demangle::DemangleSymbol(BackrefList& backrefList) +{ + MSVC_TRACE("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); BNNameType classFunctionType = NoNameType; - QualifiedName varName; + NameList varName; if (reader.Peek() == '.') { @@ -1717,8 +2396,28 @@ Demangle::DemangleContext Demangle::DemangleSymbol() throw DemangleException(); } - DemangleName(varName, classFunctionType, m_backrefList); - m_logger->LogDebug("Done demangling Name: '%s' - '%s'", varName.GetString().c_str(), reader.GetRaw()); + // MD5-hashed names: ??@<32hex>@ + if (reader.Length() >= 2 && reader.PeekMatch("?@", 2)) + { + reader.Consume(2); // consume ?@ + _STD_STRING hash = reader.ReadUntil('@'); + m_varName.push_back("??@" + hash + "@"); + return { DemangledTypeNode::VoidType(), NoAccess, NoScope }; + } + + // Special intrinsics dispatched at the top level (matches LLVM's + // demangleSpecialIntrinsic). ??__E/??__F have a non-uniform grammar + // that the normal DemangleName scope-chain loop can't express — the + // bytes after the code are a wrapped inner symbol, not scope prefixes. + if (reader.Length() >= 4 && (reader.PeekMatch("?__E", 4) || reader.PeekMatch("?__F", 4))) + { + bool isDtor = reader.PeekAt(3) == 'F'; + reader.Consume(4); // consume ?__E or ?__F + return DemangleDynamicInitFini(isDtor, backrefList); + } + + DemangleName(varName, classFunctionType, backrefList); + MSVC_TRACE("Done demangling Name: '%s' - '%s'", JoinNameList(varName).c_str(), reader.GetRaw()); m_varName = varName; DemangleContext context; @@ -1729,47 +2428,77 @@ Demangle::DemangleContext Demangle::DemangleSymbol() return context; } + // ??__J (local static thread guard) is a *variable*, not a function, per + // LLVM's demangleLocalStaticGuard. The only valid storage-class suffixes + // are '4' (IsVisible=false, followed by type encoding like 'IA' for int) + // and '5' (IsVisible=true). Any other byte here indicates a malformed + // symbol — e.g. the function-form ??__JFoo@@YAXXZ that earlier permissive + // code would accept and misrender. + if (classFunctionType == LocalStaticThreadGuardNameType) + { + if (reader.Length() == 0) + throw DemangleException("Truncated ??__J"); + char next = reader.Peek(); + if (next != '4' && next != '5') + throw DemangleException("??__J requires variable storage class ('4' or '5'), got '" + _STD_STRING(1, next) + "'"); + } + char funcType = reader.Read(); switch(funcType) { - case '0': context = {DemangleData(), PrivateAccess, StaticScope }; break; - case '1': context = {DemangleData(), ProtectedAccess, StaticScope }; break; - case '2': context = {DemangleData(), PublicAccess, StaticScope }; break; - case '3': context = {DemangleData(), NoAccess, NoScope }; break; - case '4': context = {DemangleData(), NoAccess, NoScope }; break; - case '5': context = {DemangleVTable(), NoAccess, NoScope }; break; - case '6': context = {DemangleVTable(), NoAccess, NoScope }; break; - case '7': context = {DemangleVTable(), NoAccess, NoScope }; break; + case '0': context = {DemangleData(backrefList), PrivateAccess, StaticScope }; break; + case '1': context = {DemangleData(backrefList), ProtectedAccess, StaticScope }; break; + case '2': context = {DemangleData(backrefList), PublicAccess, StaticScope }; break; + case '3': context = {DemangleData(backrefList), NoAccess, NoScope }; break; + case '4': context = {DemangleData(backrefList), NoAccess, NoScope }; break; + case '5': context = {DemangleVTable(backrefList), NoAccess, NoScope }; break; + case '6': context = {DemangleVTable(backrefList), NoAccess, NoScope }; break; + case '7': context = {DemangleVTable(backrefList), NoAccess, NoScope }; break; case '8': context = {DemanagleRTTI(classFunctionType), NoAccess, NoScope }; break; case '9': context = {DemanagleRTTI(classFunctionType), NoAccess, NoScope }; break; - case 'A': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; - case 'B': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; - case 'C': context = {DemangleFunction(classFunctionType, false, m_backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; - case 'D': context = {DemangleFunction(classFunctionType, false, m_backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; - case 'E': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; - case 'F': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; - case 'G': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; - case 'H': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; - case 'I': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; - case 'J': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; - case 'K': context = {DemangleFunction(classFunctionType, false, m_backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; - case 'L': context = {DemangleFunction(classFunctionType, false, m_backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; - case 'M': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; - case 'N': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; - case 'O': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; - case 'P': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; - case 'Q': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; - case 'R': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; - case 'S': context = {DemangleFunction(classFunctionType, false, m_backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; - case 'T': context = {DemangleFunction(classFunctionType, false, m_backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; - case 'U': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; - case 'V': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; - case 'W': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; - case 'X': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; - case 'Y': context = {DemangleFunction(classFunctionType, false, m_backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; - case 'Z': context = {DemangleFunction(classFunctionType, false, m_backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; + case 'A': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; + case 'B': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; + case 'C': context = {DemangleFunction(classFunctionType, false, backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; + case 'D': context = {DemangleFunction(classFunctionType, false, backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; + case 'E': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; + case 'F': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; + case 'G': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; + case 'H': context = {DemangleFunction(classFunctionType, true, backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; + case 'I': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; + case 'J': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; + case 'K': context = {DemangleFunction(classFunctionType, false, backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; + case 'L': context = {DemangleFunction(classFunctionType, false, backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; + case 'M': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; + case 'N': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; + case 'O': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; + case 'P': context = {DemangleFunction(classFunctionType, true, backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; + case 'Q': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; + case 'R': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; + case 'S': context = {DemangleFunction(classFunctionType, false, backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; + case 'T': context = {DemangleFunction(classFunctionType, false, backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; + case 'U': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; + case 'V': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; + case 'W': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; + case 'X': context = {DemangleFunction(classFunctionType, true, backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; + case 'Y': context = {DemangleFunction(classFunctionType, false, backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; + case 'Z': context = {DemangleFunction(classFunctionType, false, backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; case '$': { + if (reader.Peek() == 'B') + { + // Vcall thunk: $B + reader.Consume(); + int64_t offset; + DemangleNumber(offset); + m_varName.back() = "`vcall'{" + to_string(offset) + ", {flat}}'"; + // Consume calling convention char + this-type flag char + if (reader.Length() >= 1) + reader.Consume(); // calling convention (A=cdecl, etc.) + if (reader.Length() >= 1 && reader.Peek() != '@') + reader.Consume(); // this-type flag + context = {DemangledTypeNode::VoidType(), NoAccess, NoScope}; + break; + } int funcClass = VirtualThunkFunctionClass; if (reader.Peek() == 'R') { @@ -1779,12 +2508,12 @@ Demangle::DemangleContext Demangle::DemangleSymbol() char thunkType = reader.Read(); switch (thunkType) { - case '0': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; - case '1': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; - case '2': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; - case '3': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; - case '4': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; - case '5': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; + case '0': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; + case '1': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; + case '2': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; + case '3': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; + case '4': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; + case '5': context = {DemangleFunction(classFunctionType, true, backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; default: throw DemangleException("Unknown virtual thunk type " + string(1, thunkType)); } break; @@ -1794,12 +2523,20 @@ Demangle::DemangleContext Demangle::DemangleSymbol() return context; } +std::pair, QualifiedName> Demangle::Finalize() +{ + DemangleContext context = DemangleSymbol(); + return {context.type.Finalize(), QualifiedName(m_varName)}; +} + bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, QualifiedName& outVarName, const Ref& view) { outType = nullptr; if (mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) return false; + if (view) + return DemangleMS(mangledName, outType, outVarName, view); return DemangleMS(arch, mangledName, outType, outVarName); } @@ -1809,9 +2546,32 @@ bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref(view)); return DemangleMS(arch, mangledName, outType, outVarName); } +bool Demangle::DemangleMS(Platform* platform, const string& mangledName, Ref& outType, + QualifiedName& outVarName) +{ + outType = nullptr; + if (!platform || mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) + return false; + try + { + Demangle demangle(Ref(platform), mangledName); + auto result = demangle.Finalize(); + outType = std::move(result.first); + outVarName = std::move(result.second); + } + catch (DemangleException &e) + { + LogDebugForException(e, "Demangling Failed '%s' '%s;", mangledName.c_str(), e.what()); + return false; + } + return true; +} + bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, QualifiedName& outVarName) { @@ -1820,11 +2580,11 @@ bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, return false; try { + // Can't use thread_local here — BinaryView overload needs platform/view state Demangle demangle(view, mangledName); - // For now we're throwing away MemberScope and MemberAccess - outType = demangle.DemangleSymbol().type.Finalize(); - outVarName = demangle.GetVarName(); - + auto result = demangle.Finalize(); + outType = std::move(result.first); + outVarName = std::move(result.second); } catch (DemangleException &e) { @@ -1857,6 +2617,15 @@ bool Demangle::DemangleMS(const string& mangledName, Ref& outType, return true; } +bool Demangle::DemangleMS(const string& mangledName, Ref& outType, + QualifiedName& outVarName, BinaryView* view) +{ + outType = nullptr; + if (!view) + return false; + return DemangleMS(mangledName, outType, outVarName, Ref(view)); +} + class MSDemangler: public Demangler { diff --git a/demangler/msvc/demangle_msvc.h b/demangler/msvc/demangle_msvc.h index c2eeb79f6..31256450b 100644 --- a/demangler/msvc/demangle_msvc.h +++ b/demangler/msvc/demangle_msvc.h @@ -15,6 +15,7 @@ #pragma once #include #include +#include // XXX: Compiled directly into the core for performance reasons // Will still work fine compiled independently, just at about a @@ -38,6 +39,12 @@ #define _STD_SET std::set #endif +#ifdef BINARYNINJACORE_LIBRARY +#include "demangler/gnu3/demangled_type_node.h" +#else +#include "../gnu3/demangled_type_node.h" +#endif + class DemangleException: public std::exception { _STD_STRING m_message; @@ -63,7 +70,8 @@ class Demangle NameDynamicInitializer, NameDynamicAtExitDestructor, NameLocalStaticThreadGuard, - NameLocalVftable + NameLocalVftable, + NameAnonymousNamespace }; enum FunctionClass @@ -81,93 +89,186 @@ class Demangle VirtualThunkExFunctionClass = 1 << 9, }; +public: + struct DemangleContext + { + DemangledTypeNode type; + BNMemberAccess access; + BNMemberScope scope; + }; + +private: class Reader { public: - Reader(_STD_STRING data); - _STD_STRING PeekString(size_t count=1); - char Peek(); - const char* GetRaw(); - char Read(); - _STD_STRING ReadString(size_t count=1); + Reader(const _STD_STRING& data) : m_ptr(data.c_str()), m_end(data.c_str() + data.size()) + { + for (const char* p = m_ptr; p < m_end; p++) + if (*p < 0x20 || *p > 0x7e) + throw DemangleException(); + } + void Reset(const _STD_STRING& data) + { + m_ptr = data.c_str(); + m_end = data.c_str() + data.size(); + for (const char* p = m_ptr; p < m_end; p++) + if (*p < 0x20 || *p > 0x7e) + throw DemangleException(); + } + bool PeekMatch(const char* str, size_t len) const + { + if (len > Length()) + throw DemangleException(); + if (len == 2) + return m_ptr[0] == str[0] && m_ptr[1] == str[1]; + if (len == 3) + return m_ptr[0] == str[0] && m_ptr[1] == str[1] && m_ptr[2] == str[2]; + return memcmp(m_ptr, str, len) == 0; + } + char PeekAt(size_t offset) const + { + if (m_ptr + offset >= m_end) + throw DemangleException(); + return m_ptr[offset]; + } + char Peek() const + { + if (m_ptr >= m_end) + throw DemangleException(); + return *m_ptr; + } + const char* GetRaw() const { return m_ptr; } + void SetRaw(const char* p) { m_ptr = p; } + char Read() + { + if (m_ptr >= m_end) + throw DemangleException(); + return *m_ptr++; + } + void Consume(size_t count = 1) + { + if (m_ptr + count > m_end) + throw DemangleException(); + m_ptr += count; + } + size_t Length() const { return (size_t)(m_end - m_ptr); } + _STD_STRING ReadString(size_t count); _STD_STRING ReadUntil(char sentinal); - void Consume(size_t count=1); - size_t Length(); private: - _STD_STRING m_data; + const char* m_ptr; + const char* m_end; }; class BackrefList { public: - _STD_VECTOR typeList; - _STD_VECTOR<_STD_STRING> nameList; - const BN::TypeBuilder& GetTypeBackref(size_t reference); - _STD_STRING GetStringBackref(size_t reference); - void PushTypeBackref(BN::TypeBuilder t); - void PushStringBackref(_STD_STRING& s); - void PushFrontStringBackref(_STD_STRING& s); + _STD_VECTOR typeList; + StringList nameList; + StringList templateList; + void Clear() { typeList.clear(); nameList.clear(); templateList.clear(); } + const DemangledTypeNode& GetTypeBackref(size_t reference); + const _STD_STRING& GetStringBackref(size_t reference); + void PushTypeBackref(DemangledTypeNode t); + void PushStringBackref(const _STD_STRING& s); + void PushTemplateSpecialization(const _STD_STRING& s); }; + // Internal name list type - avoids QualifiedName overhead during parsing + typedef StringList NameList; + + static size_t NameListStringSize(const NameList& nl) + { + size_t total = 0; + for (const auto& s : nl) + total += s.size(); + if (nl.size() > 1) + total += (nl.size() - 1) * 2; // "::" separators + return total; + } + + static _STD_STRING JoinNameList(const NameList& nl) + { + if (nl.empty()) return {}; + if (nl.size() == 1) return nl[0]; + _STD_STRING out; + out.reserve(NameListStringSize(nl)); + out = nl[0]; + for (size_t i = 1; i < nl.size(); i++) + { + out += ':'; + out += ':'; + out += nl[i]; + } + return out; + } + + _STD_STRING m_mangledName; // Owns the string; Reader points into it Reader reader; BackrefList m_backrefList; BN::Architecture* m_arch; BN::Ref m_platform; BN::Ref m_view; - BN::QualifiedName m_varName; - BN::Ref m_logger; + NameList m_varName; + size_t m_templateParamDepth = 0; NameType GetNameType(); - BN::TypeBuilder DemangleVarType(BackrefList& varList, bool isReturn, BN::QualifiedName& name); + void RewriteTemplateBackrefName(NameList& typeName, const BackrefList& nameBackrefList) const; + DemangledTypeNode DemangleReferencedSymbolValue(BackrefList& varList); + DemangledTypeNode DemangleAutoNonTypeTemplateParam(BackrefList& varList); + DemangledTypeNode DemangleVarType(BackrefList& varList, bool isReturn, NameList& name, + bool includeImplicitThis = true); void DemangleNumber(int64_t& num); void DemangleChar(char& ch); void DemangleWideChar(uint16_t& wch); void DemangleModifiers(bool& _const, bool& _volatile, bool& isMember); - _STD_SET DemanglePointerSuffix(); - void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList); + uint8_t DemanglePointerSuffix(); + void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList, bool typeBackrefs = true); void DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, _STD_STRING& out, _STD_STRING& rttiTypeName); void DemangleTypeNameLookup(_STD_STRING& out, BNNameType& functionType); void DemangleNameTypeString(_STD_STRING& out); - void DemangleNameTypeBackref(_STD_STRING& out, const _STD_VECTOR<_STD_STRING>& backrefList); - void DemangleName(BN::QualifiedName& nameList, + void DemangleName(NameList& nameList, BNNameType& classFunctionType, - BackrefList& nameBackrefList); - BN::Ref GetCallingConventionForType(BNCallingConventionName ccName); + BackrefList& nameBackrefList, + bool typeNameContext = false); BNCallingConventionName DemangleCallingConvention(); - BN::TypeBuilder DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, int funcClass = NoneFunctionClass); - BN::TypeBuilder DemangleData(); + BN::Ref ResolveCallingConvention(BNCallingConventionName cc) const; + void ConsumeExtendedModifierPrefix(); + DemangledTypeNode DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, + int funcClass = NoneFunctionClass, bool includeImplicitThis = true); + DemangledTypeNode DemangleData(BackrefList& varList); void DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, _STD_STRING& out); - BN::TypeBuilder DemangleVTable(); - BN::TypeBuilder DemanagleRTTI(BNNameType classFunctionType); + DemangledTypeNode DemangleVTable(BackrefList& nameBackrefList); + DemangledTypeNode DemanagleRTTI(BNNameType classFunctionType); + _STD_STRING DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList); _STD_STRING DemangleTemplateInstantiationName(BackrefList& nameBackrefList); - _STD_STRING DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, _STD_STRING& out); - _STD_STRING DemangleUnqualifiedSymbolName(BN::QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType); - BN::TypeBuilder DemangleString(); - BN::TypeBuilder DemangleTypeInfoName(); + void DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, _STD_STRING& out); + _STD_STRING DemangleUnqualifiedSymbolName(NameList& nameList, BackrefList& nameBackrefList, + BNNameType& classFunctionType, bool& backrefEligible); + DemangledTypeNode DemangleString(); + DemangledTypeNode DemangleTypeInfoName(); + DemangleContext DemangleDynamicInitFini(bool isDtor, BackrefList& backrefList); + DemangleContext DemangleSymbol(BackrefList& backrefList); public: - struct DemangleContext - { - BN::TypeBuilder type; - BNMemberAccess access; - BNMemberScope scope; - }; - Demangle(BN::Architecture* arch, _STD_STRING mangledName); - Demangle(BN::Ref view, _STD_STRING mangledName); - Demangle(BN::Ref platform, _STD_STRING mangledName); + Demangle(BN::Architecture* arch, const _STD_STRING& mangledName); + Demangle(BN::Ref view, const _STD_STRING& mangledName); + Demangle(BN::Ref platform, const _STD_STRING& mangledName); + void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); DemangleContext DemangleSymbol(); - BN::QualifiedName GetVarName() const { return m_varName; } + std::pair, BN::QualifiedName> Finalize(); // Be careful not to accidentally implicitly cast a BinaryView* to a bool static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, const BN::Ref& view); static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, BN::BinaryView* view); + static bool DemangleMS(BN::Platform* platform, const _STD_STRING& mangledName, BN::Ref& outType, + BN::QualifiedName& outVarName); static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName); @@ -176,4 +277,3 @@ class Demangle static bool DemangleMS(const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, BN::BinaryView* view); }; - diff --git a/plugins/pdb-ng/src/symbol_parser.rs b/plugins/pdb-ng/src/symbol_parser.rs index a4ee4a6c6..b7f53b596 100644 --- a/plugins/pdb-ng/src/symbol_parser.rs +++ b/plugins/pdb-ng/src/symbol_parser.rs @@ -39,7 +39,7 @@ use crate::PDBParserInstance; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterId}; use binaryninja::binary_view::BinaryViewBase; use binaryninja::confidence::{Conf, MAX_CONFIDENCE, MIN_CONFIDENCE}; -use binaryninja::demangle::demangle_ms; +use binaryninja::demangle::demangle_ms_with_view; use binaryninja::rc::Ref; use binaryninja::types::{FunctionParameter, QualifiedName, StructureBuilder, Type, TypeClass}; use binaryninja::variable::{Variable, VariableSourceType}; @@ -1820,7 +1820,7 @@ impl<'a, S: Source<'a> + 'a> PDBParserInstance<'a, S> { raw_name: &String, rva: Rva, ) -> Result<(Option>>, Option)> { - let (mut t, mut name) = match demangle_ms(&self.arch, raw_name, true) { + let (mut t, mut name) = match demangle_ms_with_view(&self.arch, raw_name, Some(self.bv)) { Some((name, Some(t))) => (Some(Conf::new(t, DEMANGLE_CONFIDENCE)), name), Some((name, _)) => (None, name), _ => (None, QualifiedName::new(vec![raw_name.clone()])), diff --git a/plugins/rtti/rtti.cpp b/plugins/rtti/rtti.cpp index fde5a6ab4..f713aef77 100644 --- a/plugins/rtti/rtti.cpp +++ b/plugins/rtti/rtti.cpp @@ -3,6 +3,20 @@ using namespace BinaryNinja; using namespace BinaryNinja::RTTI; +namespace +{ + std::string NormalizeRTTIClassName(std::string name) + { + size_t beginFind = name.find_first_of(' '); + if (beginFind != std::string::npos) + name.erase(0, beginFind + 1); + size_t endFind = name.find(" `RTTI Type Descriptor Name'"); + if (endFind != std::string::npos) + name.erase(endFind, name.length()); + return name; + } +} + Ref RTTI::GetRealSymbol(BinaryView *view, uint64_t relocAddr, uint64_t symAddr) { @@ -24,9 +38,9 @@ std::optional RTTI::DemangleNameMS(BinaryView* view, bool allowMang { QualifiedName demangledName = {}; Ref outType = {}; - if (!DemangleMS(view->GetDefaultArchitecture(), mangledName, outType, demangledName, true)) + if (!DemangleMS(view->GetDefaultArchitecture(), mangledName, outType, demangledName, view)) return DemangleNameLLVM(allowMangled, mangledName); - return demangledName.GetString(); + return NormalizeRTTIClassName(demangledName.GetString()); } @@ -90,14 +104,7 @@ std::optional RTTI::DemangleNameLLVM(bool allowMangled, const std:: Ref outType = {}; if (!DemangleLLVM(mangledName, demangledName, true)) return allowMangled ? std::optional(mangledName) : std::nullopt; - auto demangledNameStr = demangledName.GetString(); - size_t beginFind = demangledNameStr.find_first_of(' '); - if (beginFind != std::string::npos) - demangledNameStr.erase(0, beginFind + 1); - size_t endFind = demangledNameStr.find(" `RTTI Type Descriptor Name'"); - if (endFind != std::string::npos) - demangledNameStr.erase(endFind, demangledNameStr.length()); - return demangledNameStr; + return NormalizeRTTIClassName(demangledName.GetString()); } diff --git a/rust/src/demangle.rs b/rust/src/demangle.rs index 1f9f8941c..d6aad25ce 100644 --- a/rust/src/demangle.rs +++ b/rust/src/demangle.rs @@ -165,6 +165,46 @@ pub fn demangle_ms( } } +pub fn demangle_ms_with_view( + arch: &CoreArchitecture, + mangled_name: &str, + view: Option<&BinaryView>, +) -> Option<(QualifiedName, Option>)> { + let mangled_name = mangled_name.to_cstr(); + let mut out_type: *mut BNType = std::ptr::null_mut(); + let mut out_name: *mut *mut std::os::raw::c_char = std::ptr::null_mut(); + let mut out_size: usize = 0; + let res = unsafe { + BNDemangleMSWithOptions( + arch.handle, + mangled_name.as_ptr(), + &mut out_type, + &mut out_name, + &mut out_size, + view.map(|v| v.handle).unwrap_or(std::ptr::null_mut()), + ) + }; + + match res { + true => { + assert!(!out_name.is_null()); + let names: Vec<_> = unsafe { ArrayGuard::::new(out_name, out_size, ()) } + .iter() + .map(str::to_string) + .collect(); + unsafe { BNFreeDemangledName(&mut out_name, out_size) }; + + let out_type = match out_type.is_null() { + true => None, + false => Some(unsafe { Type::ref_from_raw(out_type) }), + }; + + Some((names.into(), out_type)) + } + false => None, + } +} + #[derive(PartialEq, Eq, Hash)] pub struct Demangler { pub(crate) handle: *mut BNDemangler, diff --git a/type.cpp b/type.cpp index 5aa679a16..6410b9f11 100644 --- a/type.cpp +++ b/type.cpp @@ -3442,4 +3442,3 @@ fmt::format_context::iterator fmt::formatter::format( return fmt::format_to(ctx.out(), "{}{}", obj.GetStringBeforeName(), obj.GetStringAfterName()); } } - diff --git a/view/pe/coffview.cpp b/view/pe/coffview.cpp index e009b91db..a36aff64a 100644 --- a/view/pe/coffview.cpp +++ b/view/pe/coffview.cpp @@ -1531,7 +1531,7 @@ void COFFView::AddCOFFSymbol(BNSymbolType type, const string& dll, const string& { QualifiedName demangledName; Ref demangledType; - if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, nullptr, m_simplifyTemplates)) + if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, this, m_simplifyTemplates)) { shortName = demangledName.GetString(); fullName = shortName; diff --git a/view/pe/peview.cpp b/view/pe/peview.cpp index 718822ba9..7fb74b57b 100644 --- a/view/pe/peview.cpp +++ b/view/pe/peview.cpp @@ -3562,7 +3562,7 @@ void PEView::AddPESymbol(BNSymbolType type, const string& dll, const string& nam { QualifiedName demangledName; Ref demangledType; - if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, nullptr, m_simplifyTemplates)) + if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, this, m_simplifyTemplates)) { shortName = demangledName.GetString(); fullName = shortName;