Skip to content

Commit

Permalink
Merge pull request #33 from bharsesh/main
Browse files Browse the repository at this point in the history
Clang/LLD: Create metadata file
  • Loading branch information
edwarnicke authored Oct 20, 2023
2 parents 5a309af + df23711 commit 53fc4b2
Show file tree
Hide file tree
Showing 12 changed files with 139 additions and 57 deletions.
5 changes: 5 additions & 0 deletions llvm-project/clang/include/clang/Basic/CodeGenOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,16 @@ class CodeGenOptions : public CodeGenOptionsBase {
/// if non-empty.
std::string OmniborCommandLine;

/// Output filename for the compilation (same as the one in FrontendOpts).
std::string OutputFile;

/// List of dependent source/header files.
/// This is shared with DependencyOuputOptions.
/// This has same contents as Dependencies in DependencyCollector.
std::shared_ptr<std::vector<std::string>> BomDependencies;

std::string OmniborMetadataContents;

std::map<std::string, std::string> DebugPrefixMap;
std::map<std::string, std::string> CoveragePrefixMap;

Expand Down
1 change: 1 addition & 0 deletions llvm-project/clang/lib/Frontend/CompilerInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
addDependencyCollector(std::make_shared<BomDependencyGenerator>(DepOpts));
CodeGenOptions &CGOpts = getCodeGenOpts();
CGOpts.BomDependencies = DepOpts.BomDependencies;
CGOpts.OutputFile = getFrontendOpts().OutputFile;
}
if (!DepOpts.DOTOutputFile.empty())
AttachDependencyGraphGen(*PP, DepOpts.DOTOutputFile,
Expand Down
11 changes: 7 additions & 4 deletions llvm-project/clang/test/CodeGen/omnibor_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
// RUN: llvm-readelf -n omnibor.o | FileCheck --check-prefix=BOM_NOTE_SECTION %s
// RUN: cat %t/objects/gitoid_blob_sha1/0b/e9b91c3e456c910b32afc862ffd073b7c61d5f | FileCheck --check-prefix=BOM_FILE_SHA1_CONTENTS %s
// RUN: cat %t/objects/gitoid_blob_sha256/58/ed318e337ad6260f471c086e9c9985543c5fbd507c3b44924f5af37bd0ea96 | FileCheck --check-prefix=BOM_FILE_SHA256_CONTENTS %s
// RUN: cat %t/metadata/llvm/17274786fd44c95cae7ccb2d0b29ca1738c3cbb1 | FileCheck --check-prefix=METADATA_CONTENTS %s

// RUN: ARTIFACT_ID=`git hash-object omnibor.o`
// RUN: cat %t/metadata/llvm/$ARTIFACT_ID | FileCheck --check-prefix=METADATA_CONTENTS %s

// RUN: %clang -c -frecord-omnibor=%t -o %t/omnibor_1.o %S/Inputs/omnibor.c -I%S/Inputs/omnibor.h
// RUN: llvm-readelf -n %t/omnibor_1.o | FileCheck --check-prefix=BOM_NOTE_SECTION %s
Expand Down Expand Up @@ -49,6 +51,7 @@
// BOM_FILE_SHA256_CONTENTS: blob 465be75836446b37f31c5db1f29a8689f37cd5625d4b30237877703fc1070f5e
// BOM_FILE_SHA256_CONTENTS: blob 8ad020236bd23736a75699ba4e83f52593d61c4c9f8d5932b57fce011f832bf8

// METADATA_CONTENTS: output: omnibor.o
// METADATA_CONTENTS_NEXT: input {{.*}}/omnibor.c
// METADATA_CONTENTS_NEXT: input {{.*}}/omnibor.h
// METADATA_CONTENTS: output: {{.*}} path: {{.*}}omnibor.o
// METADATA_CONTENTS_NEXT: input: {{.*}} path: {{.*}}/omnibor.c
// METADATA_CONTENTS_NEXT: input: {{.*}} path: {{.*}}/omnibor.h

2 changes: 2 additions & 0 deletions llvm-project/clang/test/Driver/unknown-arg.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
// RUN: FileCheck %s --check-prefix=CC1AS-DID-YOU-MEAN
// RUN: not %clang -cc1asphalt -help 2>&1 | \
// RUN: FileCheck %s --check-prefix=UNKNOWN-INTEGRATED
// RUN: not %clang -### -frecord-omnibor -S -target x86_64-unknown-linux %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=UNKNOWN-OMNIBOR

// RUN: not %clang -### -frecord-omnibor -S -target x86_64-unknown-linux %s 2>&1 | FileCheck %s --check-prefix=UNKNOWN-OMNIBOR

Expand Down
42 changes: 23 additions & 19 deletions llvm-project/clang/tools/driver/cc1_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/SHA256.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/TimeProfiler.h"
Expand Down Expand Up @@ -306,38 +305,40 @@ int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
// later errors use the default handling behavior instead.
llvm::remove_fatal_error_handler();

if (!Clang->getCodeGenOpts().RecordOmniBor.empty()) {
SmallString<128> OutFile(Clang->getFrontendOpts().OutputFile);
if (!Clang->getCodeGenOpts().RecordOmniBor.empty() &&
(strcmp(OutFile.c_str(), "-") != 0)) {
SmallString<128> gitoidPath(Clang->getCodeGenOpts().RecordOmniBor);
llvm::sys::path::append(gitoidPath, "metadata/llvm");
auto EC =
llvm::sys::fs::create_directories(gitoidPath, /*IgnoreExisting=*/true);
if (EC)
llvm::errs() << "\nCannot create metadata file ";

std::vector<std::string> MetadataLines;
std::string outLine("\noutput: ");
outLine.append(Clang->getFrontendOpts().OutputFile);
MetadataLines.push_back(outLine);

std::string MetadataLines;
if (Clang->getCodeGenOpts().BomDependencies->size()) {
std::vector<std::string> &Deps =
*(Clang->getCodeGenOpts().BomDependencies);
for (auto file : Deps) {
std::string Line = "\ninput " + file;
MetadataLines.push_back(Line);
std::string Line = "\ninput: " + getSHA1Hash(file) + " path: " + file;
MetadataLines.append(Line);
}
}

// Metadata contents
std::string MetadataContents;
for (auto line : MetadataLines)
MetadataContents.append(line);
std::string MetadataHdrContents;
llvm::sys::fs::make_absolute(OutFile);
MetadataHdrContents.append("output: ");
MetadataHdrContents.append(getSHA1Hash(OutFile.c_str()));
MetadataHdrContents.append(" path: ");
MetadataHdrContents.append(OutFile.c_str());

// Write command line
if (!Clang->getCodeGenOpts().OmniborCommandLine.empty()) {
MetadataContents.append("\nbuild_cmd: ");
MetadataContents.append(Clang->getCodeGenOpts().OmniborCommandLine);
}
std::string BuildCmdContents;
BuildCmdContents.append("\nbuild_cmd: ");
BuildCmdContents.append(Clang->getCodeGenOpts().OmniborCommandLine);
BuildCmdContents.append("\n==== End of raw info for this process\n");

// TODO: Optimize this to avoid recomputing the hash for the input
// dependencies.

// Write metadata
std::string artifact_id = getSHA1Hash(Clang->getFrontendOpts().OutputFile);
Expand All @@ -346,8 +347,11 @@ int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
llvm::raw_fd_ostream OSM(MetadataFile, EC, llvm::sys::fs::OF_TextWithCRLF);
if (EC) {
llvm::errs() << MetadataFile << EC.message();
} else {
OSM << MetadataHdrContents;
OSM << MetadataLines;
OSM << BuildCmdContents;
}
OSM << MetadataContents;
}

// When running with -disable-free, don't do any destruction or shutdown.
Expand Down
14 changes: 14 additions & 0 deletions llvm-project/lld/Common/Strings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,20 @@ bool StringMatcher::match(StringRef s) const {
return false;
}

std::string lld::convertToHex(StringRef Input) {
static const char *const LUT = "0123456789abcdef";
size_t Length = Input.size();

std::string Output;
Output.reserve(2 * Length);
for (size_t i = 0; i < Length; ++i) {
const unsigned char c = Input[i];
Output.push_back(LUT[c >> 4]);
Output.push_back(LUT[c & 15]);
}
return Output;
}

// Converts a hex string (e.g. "deadbeef") to a vector.
std::vector<uint8_t> lld::parseHex(StringRef s) {
std::vector<uint8_t> hex;
Expand Down
2 changes: 2 additions & 0 deletions llvm-project/lld/ELF/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ struct Configuration {
bool gcSections;
bool gdbIndex;
std::string OmniBorDir;
std::string SHA1_MetadataContents;
std::string CommandLine;
bool gnuHash = false;
bool gnuUnique;
bool hasDynSymTab;
Expand Down
25 changes: 8 additions & 17 deletions llvm-project/lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,14 @@ static bool isValidReportString(StringRef arg) {

// Initializes Config members by the command line options.
static void readConfigs(opt::InputArgList &args) {

// Collect the command line arguments for ld.lld
config->CommandLine.append("ld.lld ");
for (unsigned i = 0; i < args.getNumInputArgStrings(); i++) {
config->CommandLine.append(args.getArgString(i));
config->CommandLine.append(" ");
}

errorHandler().verbose = args.hasArg(OPT_verbose);
errorHandler().vsDiagnostics =
args.hasArg(OPT_visual_studio_diagnostics_format, false);
Expand Down Expand Up @@ -1046,7 +1054,6 @@ static void readConfigs(opt::InputArgList &args) {
}
}
if (!gitOidPath.empty()) {
llvm::sys::path::append(gitOidPath, "objects");
config->OmniBorDir = gitOidPath.str().str();
auto EC = llvm::sys::fs::create_directories(config->OmniBorDir, true);
if (EC)
Expand Down Expand Up @@ -1758,22 +1765,6 @@ static void handleLibcall(StringRef name) {
sym->extract();
}

// TODO: Move this to a common location. This is
// being used by both clang and lld.
static std::string convertToHex(StringRef Input) {
static const char *const LUT = "0123456789abcdef";
size_t Length = Input.size();

std::string Output;
Output.reserve(2 * Length);
for (size_t i = 0; i < Length; ++i) {
const unsigned char c = Input[i];
Output.push_back(LUT[c >> 4]);
Output.push_back(LUT[c & 15]);
}
return Output;
}

// Handle --dependency-file=<path>. If that option is given, lld creates a
// file at a given path with the following contents:
//
Expand Down
42 changes: 25 additions & 17 deletions llvm-project/lld/ELF/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ struct Omnibor {
};
using FileHashBomMap = std::map<std::string, struct Omnibor>;

// Metadata contents compatible to bomsh
std::string MetadataContents;

constexpr size_t MergeNoTailSection::numShards;

static uint64_t readUint(uint8_t *buf) {
Expand Down Expand Up @@ -131,24 +134,21 @@ template <class ELFT> void BomSection<ELFT>::writeTo(uint8_t *buf) {
memcpy(buf + 20, sha256_contents.data(), sha256_contents.size());
}

// TODO: Move this function.
static std::string convertToHex(StringRef Input) {
static const char *const LUT = "0123456789abcdef";
size_t Length = Input.size();

std::string Output;
Output.reserve(2 * Length);
for (size_t i = 0; i < Length; ++i) {
const unsigned char c = Input[i];
Output.push_back(LUT[c >> 4]);
Output.push_back(LUT[c & 15]);
}
return Output;
}

static void genArtifactIds(FileHashBomMap &BomMap) {
struct Omnibor bomData;

std::string OutFilename(config->outputFile.str());
// StringRef BomFile = StringRef(OutFilename);

// Emit gitbom info for Outputfile
// Getting the hash of the outputfile is tricky as the file has not been
// written at this point.
MetadataContents.append("\noutput: ");
MetadataContents.append(" path: ");
SmallString<128> OutFile(OutFilename);
llvm::sys::fs::make_absolute(OutFile);
MetadataContents.append(OutFile.c_str());

for (StringRef path : config->dependencyFiles) {
llvm::SHA1 SHA1_Hash;
llvm::SHA256 SHA256_Hash;
Expand All @@ -166,6 +166,14 @@ static void genArtifactIds(FileHashBomMap &BomMap) {
auto Result_sha1 = SHA1_Hash.final();
bomData.sha1_artifact_id = convertToHex(Result_sha1);

// Collect Metadata
config->SHA1_MetadataContents.append("\ninput: ");
config->SHA1_MetadataContents.append(bomData.sha1_artifact_id);
config->SHA1_MetadataContents.append(" path: ");
SmallString<128> InFilename(path);
llvm::sys::fs::make_absolute(InFilename);
config->SHA1_MetadataContents.append(InFilename.c_str());

// sha256
SHA256_Hash.update(StringRef(initData));
SHA256_Hash.update(fileBuf.get()->getBuffer());
Expand Down Expand Up @@ -204,7 +212,7 @@ static std::string createSHA1_BomFile(FileHashBomMap &BomMap) {

SmallString<128> gitOidPath;
gitOidPath = StringRef(config->OmniBorDir);
llvm::sys::path::append(gitOidPath, "gitoid_blob_sha1");
llvm::sys::path::append(gitOidPath, "objects/gitoid_blob_sha1");
llvm::sys::path::append(gitOidPath, gitOid.substr(0, 2));
std::error_code EC;
EC = llvm::sys::fs::create_directories(gitOidPath, true);
Expand Down Expand Up @@ -248,7 +256,7 @@ static std::string createSHA256_BomFile(FileHashBomMap &BomMap) {

SmallString<128> gitOidPath;
gitOidPath = StringRef(config->OmniBorDir);
llvm::sys::path::append(gitOidPath, "gitoid_blob_sha256");
llvm::sys::path::append(gitOidPath, "objects/gitoid_blob_sha256");
llvm::sys::path::append(gitOidPath, gitOid.substr(0, 2));
std::error_code EC;
EC = llvm::sys::fs::create_directories(gitOidPath, true);
Expand Down
43 changes: 43 additions & 0 deletions llvm-project/lld/ELF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,23 @@ template <class ELFT> void elf::createSyntheticSections() {
add(*in.strTab);
}

static std::string getSHA1Hash(std::string Filename) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> fileBuf =
llvm::MemoryBuffer::getFile(Filename, /*IsText=*/true);
if (!fileBuf) {
error("\n Cannot open " + Filename);
return std::string();
}

llvm::SHA1 Hash;
std::string initData =
"blob " + std::to_string(fileBuf.get()->getBufferSize()) + '\0';
Hash.update(StringRef(initData));
Hash.update(fileBuf.get()->getBuffer());
auto Result = Hash.final();
return convertToHex(Result);
}

// The main function of the writer.
template <class ELFT> void Writer<ELFT>::run() {
copyLocalSymbols();
Expand Down Expand Up @@ -612,6 +629,32 @@ template <class ELFT> void Writer<ELFT>::run() {
if (auto e = buffer->commit())
error("failed to write to the output file: " + toString(std::move(e)));
}

// Write Omnibor metadata
std::error_code EC;
if (config->outputFile.str().empty() || config->OmniBorDir.empty())
return;
SmallString<128> MetadataFile(config->OmniBorDir);
MetadataFile.append("/metadata/llvm/");
EC = llvm::sys::fs::create_directories(MetadataFile, true);
SmallString<128> OutputArtifact(config->outputFile.str());
llvm::sys::fs::make_absolute(OutputArtifact);
std::string OutputArtifactSHA1 = getSHA1Hash(config->outputFile.str());
MetadataFile.append(OutputArtifactSHA1);
llvm::raw_fd_ostream OSM(MetadataFile, EC, llvm::sys::fs::OF_TextWithCRLF);
if (EC) {
error("\n failed to create metadata file " + MetadataFile.str().str());
return;
}
std::string MetadataHeader;
MetadataHeader.append("output: ");
MetadataHeader.append(OutputArtifactSHA1 +
" path: " + OutputArtifact.c_str());
config->SHA1_MetadataContents.append("\nbuild_cmd: " + config->CommandLine);
config->SHA1_MetadataContents.append(
"\n==== End of raw info for this process\n");
OSM << MetadataHeader;
OSM << config->SHA1_MetadataContents;
}

template <class ELFT, class RelTy>
Expand Down
1 change: 1 addition & 0 deletions llvm-project/lld/include/lld/Common/Strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ inline std::string demangle(llvm::StringRef symName, bool shouldDemangle) {
return std::string(symName);
}

std::string convertToHex(llvm::StringRef Input);
std::vector<uint8_t> parseHex(llvm::StringRef s);
bool isValidCIdentifier(llvm::StringRef s);

Expand Down
8 changes: 8 additions & 0 deletions llvm-project/lld/test/ELF/omnibor_test.s
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
# RUN: llvm-readelf -n %t/omnibor.exe | FileCheck --check-prefix=BOM_NOTE_SECTION %s
# RUN: cat %t/objects/gitoid_blob_sha1/62/db4ab2efaf0b3df12e5004ff2b002936b94697 | FileCheck --check-prefix=BOM_FILE_SHA1 %s
# RUN: cat %t/objects/gitoid_blob_sha256/6b/a8bcf3a1c3c7cafcfba5994ca7ee29bd47234f8616991717677836ce00dd95 | FileCheck --check-prefix=BOM_FILE_SHA256 %s
# RUN: ARTIFACT_ID=`git hash-object %t/omnibor.exe`
# RUN: cat %t/metadata/llvm/$ARTIFACT_ID | FileCheck --check-prefix=METADATA_CONTENTS %s

# RUN: rm -rf %t/objects
# RUN: ld.lld %t/omnibor.o -e main --omnibor=%t/omnibor_dir -o %t/omnibor_1.exe
Expand Down Expand Up @@ -40,3 +42,9 @@
# BOM_NOTE_SECTION-NEXT: SHA1 GitOID: 62db4ab2efaf0b3df12e5004ff2b002936b94697
# BOM_NOTE_SECTION-NEXT: 0x00000020 NT_GITOID_SHA256
# BOM_NOTE_SECTION-NEXT: SHA256 GitOID: 6ba8bcf3a1c3c7cafcfba5994ca7ee29bd47234f8616991717677836ce00dd95

# METADATA_CONTENTS: output: {{.*}} path: {{.*}}omnibor.exe
# METADATA_CONTENTS: input: {{.*}} path: {{.*}}omnibor.o
# METADATA_CONTENTS: build_cmd: ld.lld {{.*}}
==== End of raw info for this process

0 comments on commit 53fc4b2

Please sign in to comment.