From df2371188f2114b642b49919f134bb07401475d7 Mon Sep 17 00:00:00 2001 From: Bharathi Seshadri Date: Wed, 11 Oct 2023 06:50:36 +0000 Subject: [PATCH] Clang/LLD: Create metadata file Signed-off-by: Bharathi Seshadri --- .../include/clang/Basic/CodeGenOptions.h | 5 +++ .../clang/lib/Frontend/CompilerInstance.cpp | 1 + .../clang/test/CodeGen/omnibor_test.c | 11 +++-- llvm-project/clang/test/Driver/unknown-arg.c | 2 + llvm-project/clang/tools/driver/cc1_main.cpp | 42 ++++++++++-------- llvm-project/lld/Common/Strings.cpp | 14 ++++++ llvm-project/lld/ELF/Config.h | 2 + llvm-project/lld/ELF/Driver.cpp | 25 ++++------- llvm-project/lld/ELF/SyntheticSections.cpp | 42 ++++++++++-------- llvm-project/lld/ELF/Writer.cpp | 43 +++++++++++++++++++ llvm-project/lld/include/lld/Common/Strings.h | 1 + llvm-project/lld/test/ELF/omnibor_test.s | 8 ++++ 12 files changed, 139 insertions(+), 57 deletions(-) diff --git a/llvm-project/clang/include/clang/Basic/CodeGenOptions.h b/llvm-project/clang/include/clang/Basic/CodeGenOptions.h index 796f834feb..5507875b2b 100644 --- a/llvm-project/clang/include/clang/Basic/CodeGenOptions.h +++ b/llvm-project/clang/include/clang/Basic/CodeGenOptions.h @@ -187,11 +187,16 @@ class CodeGenOptions : public CodeGenOptionsBase { /// if non-empty. std::string OmniborCommandLine; + /// Output filename for the compilation (same as the one in FrontendOpts). + std::string OutputFile; + /// List of dependent source/header files. /// This is shared with DependencyOuputOptions. /// This has same contents as Dependencies in DependencyCollector. std::shared_ptr> BomDependencies; + std::string OmniborMetadataContents; + std::map DebugPrefixMap; std::map CoveragePrefixMap; diff --git a/llvm-project/clang/lib/Frontend/CompilerInstance.cpp b/llvm-project/clang/lib/Frontend/CompilerInstance.cpp index 2df101fa4b..818dbf2e2d 100644 --- a/llvm-project/clang/lib/Frontend/CompilerInstance.cpp +++ b/llvm-project/clang/lib/Frontend/CompilerInstance.cpp @@ -503,6 +503,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) { addDependencyCollector(std::make_shared(DepOpts)); CodeGenOptions &CGOpts = getCodeGenOpts(); CGOpts.BomDependencies = DepOpts.BomDependencies; + CGOpts.OutputFile = getFrontendOpts().OutputFile; } if (!DepOpts.DOTOutputFile.empty()) AttachDependencyGraphGen(*PP, DepOpts.DOTOutputFile, diff --git a/llvm-project/clang/test/CodeGen/omnibor_test.c b/llvm-project/clang/test/CodeGen/omnibor_test.c index 8454e3727a..e5943001e0 100644 --- a/llvm-project/clang/test/CodeGen/omnibor_test.c +++ b/llvm-project/clang/test/CodeGen/omnibor_test.c @@ -5,7 +5,9 @@ // RUN: llvm-readelf -n omnibor.o | FileCheck --check-prefix=BOM_NOTE_SECTION %s // RUN: cat %t/objects/gitoid_blob_sha1/0b/e9b91c3e456c910b32afc862ffd073b7c61d5f | FileCheck --check-prefix=BOM_FILE_SHA1_CONTENTS %s // RUN: cat %t/objects/gitoid_blob_sha256/58/ed318e337ad6260f471c086e9c9985543c5fbd507c3b44924f5af37bd0ea96 | FileCheck --check-prefix=BOM_FILE_SHA256_CONTENTS %s -// RUN: cat %t/metadata/llvm/17274786fd44c95cae7ccb2d0b29ca1738c3cbb1 | FileCheck --check-prefix=METADATA_CONTENTS %s + +// RUN: ARTIFACT_ID=`git hash-object omnibor.o` +// RUN: cat %t/metadata/llvm/$ARTIFACT_ID | FileCheck --check-prefix=METADATA_CONTENTS %s // RUN: %clang -c -frecord-omnibor=%t -o %t/omnibor_1.o %S/Inputs/omnibor.c -I%S/Inputs/omnibor.h // RUN: llvm-readelf -n %t/omnibor_1.o | FileCheck --check-prefix=BOM_NOTE_SECTION %s @@ -49,6 +51,7 @@ // BOM_FILE_SHA256_CONTENTS: blob 465be75836446b37f31c5db1f29a8689f37cd5625d4b30237877703fc1070f5e // BOM_FILE_SHA256_CONTENTS: blob 8ad020236bd23736a75699ba4e83f52593d61c4c9f8d5932b57fce011f832bf8 -// METADATA_CONTENTS: output: omnibor.o -// METADATA_CONTENTS_NEXT: input {{.*}}/omnibor.c -// METADATA_CONTENTS_NEXT: input {{.*}}/omnibor.h +// METADATA_CONTENTS: output: {{.*}} path: {{.*}}omnibor.o +// METADATA_CONTENTS_NEXT: input: {{.*}} path: {{.*}}/omnibor.c +// METADATA_CONTENTS_NEXT: input: {{.*}} path: {{.*}}/omnibor.h + diff --git a/llvm-project/clang/test/Driver/unknown-arg.c b/llvm-project/clang/test/Driver/unknown-arg.c index c6d2c04546..4136ad81cb 100644 --- a/llvm-project/clang/test/Driver/unknown-arg.c +++ b/llvm-project/clang/test/Driver/unknown-arg.c @@ -22,6 +22,8 @@ // RUN: FileCheck %s --check-prefix=CC1AS-DID-YOU-MEAN // RUN: not %clang -cc1asphalt -help 2>&1 | \ // RUN: FileCheck %s --check-prefix=UNKNOWN-INTEGRATED +// RUN: not %clang -### -frecord-omnibor -S -target x86_64-unknown-linux %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=UNKNOWN-OMNIBOR // RUN: not %clang -### -frecord-omnibor -S -target x86_64-unknown-linux %s 2>&1 | FileCheck %s --check-prefix=UNKNOWN-OMNIBOR diff --git a/llvm-project/clang/tools/driver/cc1_main.cpp b/llvm-project/clang/tools/driver/cc1_main.cpp index aee5741ba5..768bfca6cc 100644 --- a/llvm-project/clang/tools/driver/cc1_main.cpp +++ b/llvm-project/clang/tools/driver/cc1_main.cpp @@ -39,7 +39,6 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/SHA1.h" -#include "llvm/Support/SHA256.h" #include "llvm/Support/Signals.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/TimeProfiler.h" @@ -306,7 +305,9 @@ int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { // later errors use the default handling behavior instead. llvm::remove_fatal_error_handler(); - if (!Clang->getCodeGenOpts().RecordOmniBor.empty()) { + SmallString<128> OutFile(Clang->getFrontendOpts().OutputFile); + if (!Clang->getCodeGenOpts().RecordOmniBor.empty() && + (strcmp(OutFile.c_str(), "-") != 0)) { SmallString<128> gitoidPath(Clang->getCodeGenOpts().RecordOmniBor); llvm::sys::path::append(gitoidPath, "metadata/llvm"); auto EC = @@ -314,30 +315,30 @@ int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { if (EC) llvm::errs() << "\nCannot create metadata file "; - std::vector MetadataLines; - std::string outLine("\noutput: "); - outLine.append(Clang->getFrontendOpts().OutputFile); - MetadataLines.push_back(outLine); - + std::string MetadataLines; if (Clang->getCodeGenOpts().BomDependencies->size()) { std::vector &Deps = *(Clang->getCodeGenOpts().BomDependencies); for (auto file : Deps) { - std::string Line = "\ninput " + file; - MetadataLines.push_back(Line); + std::string Line = "\ninput: " + getSHA1Hash(file) + " path: " + file; + MetadataLines.append(Line); } } - // Metadata contents - std::string MetadataContents; - for (auto line : MetadataLines) - MetadataContents.append(line); + std::string MetadataHdrContents; + llvm::sys::fs::make_absolute(OutFile); + MetadataHdrContents.append("output: "); + MetadataHdrContents.append(getSHA1Hash(OutFile.c_str())); + MetadataHdrContents.append(" path: "); + MetadataHdrContents.append(OutFile.c_str()); - // Write command line - if (!Clang->getCodeGenOpts().OmniborCommandLine.empty()) { - MetadataContents.append("\nbuild_cmd: "); - MetadataContents.append(Clang->getCodeGenOpts().OmniborCommandLine); - } + std::string BuildCmdContents; + BuildCmdContents.append("\nbuild_cmd: "); + BuildCmdContents.append(Clang->getCodeGenOpts().OmniborCommandLine); + BuildCmdContents.append("\n==== End of raw info for this process\n"); + + // TODO: Optimize this to avoid recomputing the hash for the input + // dependencies. // Write metadata std::string artifact_id = getSHA1Hash(Clang->getFrontendOpts().OutputFile); @@ -346,8 +347,11 @@ int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { llvm::raw_fd_ostream OSM(MetadataFile, EC, llvm::sys::fs::OF_TextWithCRLF); if (EC) { llvm::errs() << MetadataFile << EC.message(); + } else { + OSM << MetadataHdrContents; + OSM << MetadataLines; + OSM << BuildCmdContents; } - OSM << MetadataContents; } // When running with -disable-free, don't do any destruction or shutdown. diff --git a/llvm-project/lld/Common/Strings.cpp b/llvm-project/lld/Common/Strings.cpp index 6e5478e335..981ad53d18 100644 --- a/llvm-project/lld/Common/Strings.cpp +++ b/llvm-project/lld/Common/Strings.cpp @@ -45,6 +45,20 @@ bool StringMatcher::match(StringRef s) const { return false; } +std::string lld::convertToHex(StringRef Input) { + static const char *const LUT = "0123456789abcdef"; + size_t Length = Input.size(); + + std::string Output; + Output.reserve(2 * Length); + for (size_t i = 0; i < Length; ++i) { + const unsigned char c = Input[i]; + Output.push_back(LUT[c >> 4]); + Output.push_back(LUT[c & 15]); + } + return Output; +} + // Converts a hex string (e.g. "deadbeef") to a vector. std::vector lld::parseHex(StringRef s) { std::vector hex; diff --git a/llvm-project/lld/ELF/Config.h b/llvm-project/lld/ELF/Config.h index 5f270d32c1..3af46658c0 100644 --- a/llvm-project/lld/ELF/Config.h +++ b/llvm-project/lld/ELF/Config.h @@ -177,6 +177,8 @@ struct Configuration { bool gcSections; bool gdbIndex; std::string OmniBorDir; + std::string SHA1_MetadataContents; + std::string CommandLine; bool gnuHash = false; bool gnuUnique; bool hasDynSymTab; diff --git a/llvm-project/lld/ELF/Driver.cpp b/llvm-project/lld/ELF/Driver.cpp index 4d35d914c2..7091d8f00d 100644 --- a/llvm-project/lld/ELF/Driver.cpp +++ b/llvm-project/lld/ELF/Driver.cpp @@ -972,6 +972,14 @@ static bool isValidReportString(StringRef arg) { // Initializes Config members by the command line options. static void readConfigs(opt::InputArgList &args) { + + // Collect the command line arguments for ld.lld + config->CommandLine.append("ld.lld "); + for (unsigned i = 0; i < args.getNumInputArgStrings(); i++) { + config->CommandLine.append(args.getArgString(i)); + config->CommandLine.append(" "); + } + errorHandler().verbose = args.hasArg(OPT_verbose); errorHandler().vsDiagnostics = args.hasArg(OPT_visual_studio_diagnostics_format, false); @@ -1046,7 +1054,6 @@ static void readConfigs(opt::InputArgList &args) { } } if (!gitOidPath.empty()) { - llvm::sys::path::append(gitOidPath, "objects"); config->OmniBorDir = gitOidPath.str().str(); auto EC = llvm::sys::fs::create_directories(config->OmniBorDir, true); if (EC) @@ -1758,22 +1765,6 @@ static void handleLibcall(StringRef name) { sym->extract(); } -// TODO: Move this to a common location. This is -// being used by both clang and lld. -static std::string convertToHex(StringRef Input) { - static const char *const LUT = "0123456789abcdef"; - size_t Length = Input.size(); - - std::string Output; - Output.reserve(2 * Length); - for (size_t i = 0; i < Length; ++i) { - const unsigned char c = Input[i]; - Output.push_back(LUT[c >> 4]); - Output.push_back(LUT[c & 15]); - } - return Output; -} - // Handle --dependency-file=. If that option is given, lld creates a // file at a given path with the following contents: // diff --git a/llvm-project/lld/ELF/SyntheticSections.cpp b/llvm-project/lld/ELF/SyntheticSections.cpp index d2afe2ae01..73afcd42fb 100644 --- a/llvm-project/lld/ELF/SyntheticSections.cpp +++ b/llvm-project/lld/ELF/SyntheticSections.cpp @@ -64,6 +64,9 @@ struct Omnibor { }; using FileHashBomMap = std::map; +// Metadata contents compatible to bomsh +std::string MetadataContents; + constexpr size_t MergeNoTailSection::numShards; static uint64_t readUint(uint8_t *buf) { @@ -131,24 +134,21 @@ template void BomSection::writeTo(uint8_t *buf) { memcpy(buf + 20, sha256_contents.data(), sha256_contents.size()); } -// TODO: Move this function. -static std::string convertToHex(StringRef Input) { - static const char *const LUT = "0123456789abcdef"; - size_t Length = Input.size(); - - std::string Output; - Output.reserve(2 * Length); - for (size_t i = 0; i < Length; ++i) { - const unsigned char c = Input[i]; - Output.push_back(LUT[c >> 4]); - Output.push_back(LUT[c & 15]); - } - return Output; -} - static void genArtifactIds(FileHashBomMap &BomMap) { struct Omnibor bomData; + std::string OutFilename(config->outputFile.str()); + // StringRef BomFile = StringRef(OutFilename); + + // Emit gitbom info for Outputfile + // Getting the hash of the outputfile is tricky as the file has not been + // written at this point. + MetadataContents.append("\noutput: "); + MetadataContents.append(" path: "); + SmallString<128> OutFile(OutFilename); + llvm::sys::fs::make_absolute(OutFile); + MetadataContents.append(OutFile.c_str()); + for (StringRef path : config->dependencyFiles) { llvm::SHA1 SHA1_Hash; llvm::SHA256 SHA256_Hash; @@ -166,6 +166,14 @@ static void genArtifactIds(FileHashBomMap &BomMap) { auto Result_sha1 = SHA1_Hash.final(); bomData.sha1_artifact_id = convertToHex(Result_sha1); + // Collect Metadata + config->SHA1_MetadataContents.append("\ninput: "); + config->SHA1_MetadataContents.append(bomData.sha1_artifact_id); + config->SHA1_MetadataContents.append(" path: "); + SmallString<128> InFilename(path); + llvm::sys::fs::make_absolute(InFilename); + config->SHA1_MetadataContents.append(InFilename.c_str()); + // sha256 SHA256_Hash.update(StringRef(initData)); SHA256_Hash.update(fileBuf.get()->getBuffer()); @@ -204,7 +212,7 @@ static std::string createSHA1_BomFile(FileHashBomMap &BomMap) { SmallString<128> gitOidPath; gitOidPath = StringRef(config->OmniBorDir); - llvm::sys::path::append(gitOidPath, "gitoid_blob_sha1"); + llvm::sys::path::append(gitOidPath, "objects/gitoid_blob_sha1"); llvm::sys::path::append(gitOidPath, gitOid.substr(0, 2)); std::error_code EC; EC = llvm::sys::fs::create_directories(gitOidPath, true); @@ -248,7 +256,7 @@ static std::string createSHA256_BomFile(FileHashBomMap &BomMap) { SmallString<128> gitOidPath; gitOidPath = StringRef(config->OmniBorDir); - llvm::sys::path::append(gitOidPath, "gitoid_blob_sha256"); + llvm::sys::path::append(gitOidPath, "objects/gitoid_blob_sha256"); llvm::sys::path::append(gitOidPath, gitOid.substr(0, 2)); std::error_code EC; EC = llvm::sys::fs::create_directories(gitOidPath, true); diff --git a/llvm-project/lld/ELF/Writer.cpp b/llvm-project/lld/ELF/Writer.cpp index b7877674b2..b5e533d786 100644 --- a/llvm-project/lld/ELF/Writer.cpp +++ b/llvm-project/lld/ELF/Writer.cpp @@ -535,6 +535,23 @@ template void elf::createSyntheticSections() { add(*in.strTab); } +static std::string getSHA1Hash(std::string Filename) { + llvm::ErrorOr> fileBuf = + llvm::MemoryBuffer::getFile(Filename, /*IsText=*/true); + if (!fileBuf) { + error("\n Cannot open " + Filename); + return std::string(); + } + + llvm::SHA1 Hash; + std::string initData = + "blob " + std::to_string(fileBuf.get()->getBufferSize()) + '\0'; + Hash.update(StringRef(initData)); + Hash.update(fileBuf.get()->getBuffer()); + auto Result = Hash.final(); + return convertToHex(Result); +} + // The main function of the writer. template void Writer::run() { copyLocalSymbols(); @@ -612,6 +629,32 @@ template void Writer::run() { if (auto e = buffer->commit()) error("failed to write to the output file: " + toString(std::move(e))); } + + // Write Omnibor metadata + std::error_code EC; + if (config->outputFile.str().empty() || config->OmniBorDir.empty()) + return; + SmallString<128> MetadataFile(config->OmniBorDir); + MetadataFile.append("/metadata/llvm/"); + EC = llvm::sys::fs::create_directories(MetadataFile, true); + SmallString<128> OutputArtifact(config->outputFile.str()); + llvm::sys::fs::make_absolute(OutputArtifact); + std::string OutputArtifactSHA1 = getSHA1Hash(config->outputFile.str()); + MetadataFile.append(OutputArtifactSHA1); + llvm::raw_fd_ostream OSM(MetadataFile, EC, llvm::sys::fs::OF_TextWithCRLF); + if (EC) { + error("\n failed to create metadata file " + MetadataFile.str().str()); + return; + } + std::string MetadataHeader; + MetadataHeader.append("output: "); + MetadataHeader.append(OutputArtifactSHA1 + + " path: " + OutputArtifact.c_str()); + config->SHA1_MetadataContents.append("\nbuild_cmd: " + config->CommandLine); + config->SHA1_MetadataContents.append( + "\n==== End of raw info for this process\n"); + OSM << MetadataHeader; + OSM << config->SHA1_MetadataContents; } template diff --git a/llvm-project/lld/include/lld/Common/Strings.h b/llvm-project/lld/include/lld/Common/Strings.h index ece8018927..e54a611142 100644 --- a/llvm-project/lld/include/lld/Common/Strings.h +++ b/llvm-project/lld/include/lld/Common/Strings.h @@ -26,6 +26,7 @@ inline std::string demangle(llvm::StringRef symName, bool shouldDemangle) { return std::string(symName); } +std::string convertToHex(llvm::StringRef Input); std::vector parseHex(llvm::StringRef s); bool isValidCIdentifier(llvm::StringRef s); diff --git a/llvm-project/lld/test/ELF/omnibor_test.s b/llvm-project/lld/test/ELF/omnibor_test.s index d090776383..23f8f7ec42 100644 --- a/llvm-project/lld/test/ELF/omnibor_test.s +++ b/llvm-project/lld/test/ELF/omnibor_test.s @@ -4,6 +4,8 @@ # RUN: llvm-readelf -n %t/omnibor.exe | FileCheck --check-prefix=BOM_NOTE_SECTION %s # RUN: cat %t/objects/gitoid_blob_sha1/62/db4ab2efaf0b3df12e5004ff2b002936b94697 | FileCheck --check-prefix=BOM_FILE_SHA1 %s # RUN: cat %t/objects/gitoid_blob_sha256/6b/a8bcf3a1c3c7cafcfba5994ca7ee29bd47234f8616991717677836ce00dd95 | FileCheck --check-prefix=BOM_FILE_SHA256 %s +# RUN: ARTIFACT_ID=`git hash-object %t/omnibor.exe` +# RUN: cat %t/metadata/llvm/$ARTIFACT_ID | FileCheck --check-prefix=METADATA_CONTENTS %s # RUN: rm -rf %t/objects # RUN: ld.lld %t/omnibor.o -e main --omnibor=%t/omnibor_dir -o %t/omnibor_1.exe @@ -40,3 +42,9 @@ # BOM_NOTE_SECTION-NEXT: SHA1 GitOID: 62db4ab2efaf0b3df12e5004ff2b002936b94697 # BOM_NOTE_SECTION-NEXT: 0x00000020 NT_GITOID_SHA256 # BOM_NOTE_SECTION-NEXT: SHA256 GitOID: 6ba8bcf3a1c3c7cafcfba5994ca7ee29bd47234f8616991717677836ce00dd95 + +# METADATA_CONTENTS: output: {{.*}} path: {{.*}}omnibor.exe +# METADATA_CONTENTS: input: {{.*}} path: {{.*}}omnibor.o +# METADATA_CONTENTS: build_cmd: ld.lld {{.*}} +==== End of raw info for this process +