diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h index 6ce26acdd458e..18df195bacd81 100644 --- a/llvm/include/llvm/Support/Signals.h +++ b/llvm/include/llvm/Support/Signals.h @@ -14,10 +14,25 @@ #ifndef LLVM_SUPPORT_SIGNALS_H #define LLVM_SUPPORT_SIGNALS_H +#include "llvm/Config/llvm-config.h" #include "llvm/Support/Compiler.h" #include #include +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +namespace llvm { +// Typedefs that are convenient but only used by the stack-trace-collection code +// added if DebugLoc origin-tracking is enabled. +using AddressSet = DenseSet>; +using SymbolizedAddressMap = + DenseMap, DenseMapInfo, + detail::DenseMapPair>>; +} +#endif + namespace llvm { class StringRef; class raw_ostream; @@ -57,6 +72,28 @@ LLVM_ABI void DisableSystemDialogsOnCrash(); /// specified, the entire frame is printed. LLVM_ABI void PrintStackTrace(raw_ostream &OS, int Depth = 0); +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +#ifdef NDEBUG +#error DebugLoc origin-tracking should not be enabled in Release builds. +#endif +/// Populates the given array with a stack trace of the current program, up to +/// MaxDepth frames. Returns the number of frames returned, which will be +/// inserted into \p StackTrace from index 0. All entries after the returned +/// depth will be unmodified. NB: This is only intended to be used for +/// introspection of LLVM by Debugify, will not be enabled in release builds, +/// and should not be relied on for other purposes. +template +int getStackTrace(std::array &StackTrace); + +/// Takes a set of \p Addresses, symbolizes them and stores the result in the +/// provided \p SymbolizedAddresses map. +/// NB: This is only intended to be used for introspection of LLVM by +/// Debugify, will not be enabled in release builds, and should not be relied +/// on for other purposes. +void symbolizeAddresses(AddressSet &Addresses, + SymbolizedAddressMap &SymbolizedAddresses); +#endif + // Run all registered signal handlers. LLVM_ABI void RunSignalHandlers(); diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp index 9f9030e79d104..a9c61f23497a5 100644 --- a/llvm/lib/Support/Signals.cpp +++ b/llvm/lib/Support/Signals.cpp @@ -31,7 +31,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system @@ -137,47 +136,28 @@ static FormattedNumber format_ptr(void *PC) { return format_hex((uint64_t)PC, PtrWidth); } -/// Helper that launches llvm-symbolizer and symbolizes a backtrace. -LLVM_ATTRIBUTE_USED -static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, - int Depth, llvm::raw_ostream &OS) { - if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv)) - return false; - - // Don't recursively invoke the llvm-symbolizer binary. - if (Argv0.contains("llvm-symbolizer")) - return false; - - // FIXME: Subtract necessary number from StackTrace entries to turn return addresses - // into actual instruction addresses. - // Use llvm-symbolizer tool to symbolize the stack traces. First look for it - // alongside our binary, then in $PATH. - ErrorOr LLVMSymbolizerPathOrErr = std::error_code(); - if (const char *Path = getenv(LLVMSymbolizerPathEnv)) { - LLVMSymbolizerPathOrErr = sys::findProgramByName(Path); - } else if (!Argv0.empty()) { - StringRef Parent = llvm::sys::path::parent_path(Argv0); - if (!Parent.empty()) - LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent); - } - if (!LLVMSymbolizerPathOrErr) - LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer"); - if (!LLVMSymbolizerPathOrErr) - return false; - const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr; - - // If we don't know argv0 or the address of main() at this point, try - // to guess it anyway (it's possible on some platforms). - std::string MainExecutableName = - sys::fs::exists(Argv0) ? (std::string)std::string(Argv0) - : sys::fs::getMainExecutable(nullptr, nullptr); +/// Reads a file \p Filename written by llvm-symbolizer containing function +/// names and source locations for the addresses in \p AddressList and returns +/// the strings in a vector of pairs, where the first pair element is the index +/// of the corresponding entry in AddressList and the second is the symbolized +/// frame, in a format based on the sanitizer stack trace printer, with the +/// exception that it does not write out frame numbers (i.e. "#2 " for the +/// third address), as it is not assumed that \p AddressList corresponds to a +/// single stack trace. +/// There may be multiple returned entries for a single \p AddressList entry if +/// that frame address corresponds to one or more inlined frames; in this case, +/// all frames for an address will appear contiguously and in-order. +std::optional, 0>> +collectAddressSymbols(void **AddressList, unsigned AddressCount, + const char *MainExecutableName, + const std::string &LLVMSymbolizerPath) { BumpPtrAllocator Allocator; StringSaver StrPool(Allocator); - std::vector Modules(Depth, nullptr); - std::vector Offsets(Depth, 0); - if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(), - MainExecutableName.c_str(), StrPool)) - return false; + SmallVector Modules(AddressCount, nullptr); + SmallVector Offsets(AddressCount, 0); + if (!findModulesAndOffsets(AddressList, AddressCount, Modules.data(), Offsets.data(), + MainExecutableName, StrPool)) + return {}; int InputFD; SmallString<32> InputFile, OutputFile; sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile); @@ -187,9 +167,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, { raw_fd_ostream Input(InputFD, true); - for (int i = 0; i < Depth; i++) { - if (Modules[i]) - Input << Modules[i] << " " << (void*)Offsets[i] << "\n"; + for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) { + if (Modules[AddrIdx]) + Input << Modules[AddrIdx] << " " << (void*)Offsets[AddrIdx] << "\n"; } } @@ -206,53 +186,148 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, int RunResult = sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects); if (RunResult != 0) - return false; + return {}; - // This report format is based on the sanitizer stack trace printer. See - // sanitizer_stacktrace_printer.cc in compiler-rt. + + SmallVector, 0> Result; auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str()); if (!OutputBuf) - return false; + return {}; StringRef Output = OutputBuf.get()->getBuffer(); SmallVector Lines; Output.split(Lines, "\n"); - auto CurLine = Lines.begin(); - int frame_no = 0; - for (int i = 0; i < Depth; i++) { - auto PrintLineHeader = [&]() { - OS << right_justify(formatv("#{0}", frame_no++).str(), - std::log10(Depth) + 2) - << ' ' << format_ptr(StackTrace[i]) << ' '; - }; - if (!Modules[i]) { - PrintLineHeader(); - OS << '\n'; + auto *CurLine = Lines.begin(); + // Lines contains the output from llvm-symbolizer, which should contain for + // each address with a module in order of appearance, one or more lines + // containing the function name and line associated with that address, + // followed by an empty line. + // For each address, adds an output entry for every real or inlined frame at + // that address. For addresses without known modules, we have a single entry + // containing just the formatted address; for all other output entries, we + // output the function entry if it is known, and either the line number if it + // is known or the module+address offset otherwise. + for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) { + if (!Modules[AddrIdx]) { + auto &SymbolizedFrame = + Result.emplace_back(std::make_pair(AddrIdx, "")); + raw_string_ostream OS(SymbolizedFrame.second); + OS << format_ptr(AddressList[AddrIdx]); continue; } // Read pairs of lines (function name and file/line info) until we // encounter empty line. for (;;) { if (CurLine == Lines.end()) - return false; + return {}; StringRef FunctionName = *CurLine++; if (FunctionName.empty()) break; - PrintLineHeader(); + auto &SymbolizedFrame = + Result.emplace_back(std::make_pair(AddrIdx, "")); + raw_string_ostream OS(SymbolizedFrame.second); + OS << format_ptr(AddressList[AddrIdx]) << ' '; if (!FunctionName.starts_with("??")) OS << FunctionName << ' '; if (CurLine == Lines.end()) - return false; + return {}; StringRef FileLineInfo = *CurLine++; if (!FileLineInfo.starts_with("??")) OS << FileLineInfo; else - OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")"; - OS << "\n"; + OS << "(" << Modules[AddrIdx] << '+' << format_hex(Offsets[AddrIdx], 0) << ")"; } } + return Result; +} + +ErrorOr getLLVMSymbolizerPath(StringRef Argv0 = {}) { + ErrorOr LLVMSymbolizerPathOrErr = std::error_code(); + if (const char *Path = getenv(LLVMSymbolizerPathEnv)) { + LLVMSymbolizerPathOrErr = sys::findProgramByName(Path); + } else if (!Argv0.empty()) { + StringRef Parent = llvm::sys::path::parent_path(Argv0); + if (!Parent.empty()) + LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent); + } + if (!LLVMSymbolizerPathOrErr) + LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer"); + return LLVMSymbolizerPathOrErr; +} + +/// Helper that launches llvm-symbolizer and symbolizes a backtrace. +LLVM_ATTRIBUTE_USED +static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, + int Depth, llvm::raw_ostream &OS) { + if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv)) + return false; + + // Don't recursively invoke the llvm-symbolizer binary. + if (Argv0.contains("llvm-symbolizer")) + return false; + + // FIXME: Subtract necessary number from StackTrace entries to turn return addresses + // into actual instruction addresses. + // Use llvm-symbolizer tool to symbolize the stack traces. First look for it + // alongside our binary, then in $PATH. + ErrorOr LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath(Argv0); + if (!LLVMSymbolizerPathOrErr) + return false; + const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr; + + // If we don't know argv0 or the address of main() at this point, try + // to guess it anyway (it's possible on some platforms). + std::string MainExecutableName = + sys::fs::exists(Argv0) ? (std::string)std::string(Argv0) + : sys::fs::getMainExecutable(nullptr, nullptr); + + auto SymbolizedAddressesOpt = collectAddressSymbols( + StackTrace, Depth, MainExecutableName.c_str(), LLVMSymbolizerPath); + if (!SymbolizedAddressesOpt) + return false; + for (unsigned FrameNo = 0; FrameNo < SymbolizedAddressesOpt->size(); + ++FrameNo) { + OS << right_justify(formatv("#{0}", FrameNo).str(), std::log10(Depth) + 2) + << ' ' << (*SymbolizedAddressesOpt)[FrameNo].second << '\n'; + } return true; } +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +void sys::symbolizeAddresses(AddressSet &Addresses, + SymbolizedAddressMap &SymbolizedAddresses) { + assert(!DisableSymbolicationFlag && !getenv(DisableSymbolizationEnv) && + "Debugify origin stacktraces require symbolization to be enabled."); + + // Convert Set of Addresses to ordered list. + SmallVector AddressList(Addresses.begin(), Addresses.end()); + if (AddressList.empty()) + return; + llvm::sort(AddressList); + + // Use llvm-symbolizer tool to symbolize the stack traces. First look for it + // alongside our binary, then in $PATH. + ErrorOr LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath(); + if (!LLVMSymbolizerPathOrErr) + report_fatal_error("Debugify origin stacktraces require llvm-symbolizer"); + const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr; + + // Try to guess the main executable name, since we don't have argv0 available + // here. + std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr); + + auto SymbolizedAddressesOpt = collectAddressSymbols( + AddressList.begin(), AddressList.size(), + MainExecutableName.c_str(), LLVMSymbolizerPath); + if (!SymbolizedAddressesOpt) + return; + for (auto SymbolizedFrame : *SymbolizedAddressesOpt) { + SmallVector &SymbolizedAddrs = SymbolizedAddresses[AddressList[SymbolizedFrame.first]]; + SymbolizedAddrs.push_back(SymbolizedFrame.second); + } + return; +} +#endif + static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName); LLVM_ATTRIBUTE_USED diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc index 6668a2953b3b2..a4525a5903649 100644 --- a/llvm/lib/Support/Unix/Signals.inc +++ b/llvm/lib/Support/Unix/Signals.inc @@ -507,6 +507,21 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) { return 0; } +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +#if !defined(HAVE_BACKTRACE) +#error DebugLoc origin-tracking currently requires `backtrace()`. +#endif +namespace llvm { +namespace sys { +template +int getStackTrace(std::array &StackTrace) { + return backtrace(StackTrace.data(), MaxDepth); +} +template int getStackTrace<16ul>(std::array &); +} // namespace sys +} // namespace llvm +#endif + /// If this is an ELF platform, we can find all loaded modules and their virtual /// addresses with dl_iterate_phdr. static bool findModulesAndOffsets(void **StackTrace, int Depth, diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc index f11ad09f37139..441b66a294e42 100644 --- a/llvm/lib/Support/Windows/Signals.inc +++ b/llvm/lib/Support/Windows/Signals.inc @@ -9,6 +9,7 @@ // This file provides the Win32 specific implementation of the Signals class. // //===----------------------------------------------------------------------===// +#include "llvm/Config/llvm-config.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ExitCodes.h" #include "llvm/Support/FileSystem.h" @@ -542,6 +543,10 @@ void sys::PrintStackTraceOnErrorSignal(StringRef Argv0, extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord); #endif +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +#error DebugLoc origin-tracking currently unimplemented for Windows. +#endif + static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) { STACKFRAME64 StackFrame{}; CONTEXT Context{};