Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cmake: Add make_cpio, a script for generating reproducible cpio(5) archives #67

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 9 additions & 47 deletions cmake-tool/helpers/cpio.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,10 @@

include_guard(GLOBAL)

# Checks the existence of an argument to cpio -o.
# flag refers to a variable in the parent scope that contains the argument, if
# the argument isn't supported then the flag is set to the empty string in the parent scope.
function(CheckCPIOArgument var flag)
if(NOT (DEFINED ${var}))
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/cpio-testfile "Testfile contents")
execute_process(
COMMAND bash -c "echo cpio-testfile | cpio ${flag} -o"
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
OUTPUT_QUIET ERROR_QUIET
RESULT_VARIABLE result
)
if(result)
set(${var} "" CACHE INTERNAL "")
message(STATUS "CPIO test ${var} FAILED")
else()
set(${var} "${flag}" CACHE INTERNAL "")
message(STATUS "CPIO test ${var} PASSED")
endif()
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/cpio-testfile)
endif()
endfunction()
set(MAKE_CPIO_TOOL
"${CMAKE_CURRENT_LIST_DIR}/make_cpio.py"
CACHE INTERNAL "" FORCE
)

# Function for declaring rules to build a cpio archive that can be linked
# into another target
Expand All @@ -43,25 +25,7 @@ function(MakeCPIO output_name input_files)
if(NOT "${MAKE_CPIO_CPIO_SYMBOL}" STREQUAL "")
set(archive_symbol ${MAKE_CPIO_CPIO_SYMBOL})
endif()
# Check that the reproducible flag is available. Don't use it if it isn't.
CheckCPIOArgument(cpio_reproducible_flag "--reproducible")
set(append "")
set(
commands
"bash;-c;cpio ${cpio_reproducible_flag} --quiet --create -H newc --file=${CMAKE_CURRENT_BINARY_DIR}/archive.${output_name}.cpio;&&"
)
foreach(file IN LISTS input_files)
# Try and generate reproducible cpio meta-data as we do this:
# - touch -d @0 file sets the modified time to 0
# - --owner=root:root sets user and group values to 0:0
# - --reproducible creates reproducible archives with consistent inodes and device numbering
list(
APPEND
commands
"bash;-c; mkdir -p temp_${output_name} && cd temp_${output_name} && cp -a ${file} . && touch -d @0 `basename ${file}` && echo `basename ${file}` | cpio --append ${cpio_reproducible_flag} --owner=+0:+0 --quiet -o -H newc --file=${CMAKE_CURRENT_BINARY_DIR}/archive.${output_name}.cpio && rm `basename ${file}` && cd ../ && rmdir temp_${output_name};&&"
)
endforeach()
list(APPEND commands "true")

separate_arguments(cmake_c_flags_sep NATIVE_COMMAND "${CMAKE_C_FLAGS}")
if(CMAKE_C_COMPILER_ID STREQUAL "Clang")
list(APPEND cmake_c_flags_sep "${CMAKE_C_COMPILE_OPTIONS_TARGET}${CMAKE_C_COMPILER_TARGET}")
Expand All @@ -70,18 +34,16 @@ function(MakeCPIO output_name input_files)
add_custom_command(
OUTPUT ${output_name}
COMMAND rm -f archive.${output_name}.cpio
COMMAND ${commands}
COMMAND ${MAKE_CPIO_TOOL} ${CMAKE_CURRENT_BINARY_DIR}/archive.${output_name}.cpio
${input_files}
COMMAND
sh -c
"echo 'X.section ._archive_cpio,\"aw\"X.globl ${archive_symbol}, ${archive_symbol}_endX${archive_symbol}:X.incbin \"archive.${output_name}.cpio\"X${archive_symbol}_end:X' | tr X '\\n'"
> ${output_name}.S
COMMAND
${CMAKE_C_COMPILER} ${cmake_c_flags_sep} -c -o ${output_name} ${output_name}.S
COMMAND ${CMAKE_C_COMPILER} ${cmake_c_flags_sep} -c -o ${output_name} ${output_name}.S
DEPENDS ${input_files} ${MAKE_CPIO_DEPENDS}
VERBATIM
BYPRODUCTS
archive.${output_name}.cpio
${output_name}.S
BYPRODUCTS archive.${output_name}.cpio ${output_name}.S
COMMENT "Generate CPIO archive ${output_name}"
)
endfunction(MakeCPIO)
221 changes: 221 additions & 0 deletions cmake-tool/helpers/make_cpio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
#!/usr/bin/env python3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This introduces a dependency on libarchive. Is this dependency encoded somewhere?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume it's the same as the dependency here: https://github.com/seL4/seL4/blob/master/tools/python-deps/setup.py#L39

#-
# SPDX-License-Identifier: BSD-2-Clause
#
# Copyright 2020, Data61
# Commonwealth Scientific and Industrial Research Organisation (CSIRO)
# ABN 41 687 119 230.
#
# This software may be distributed and modified according to the terms
# of the BSD 2-Clause license. Note that NO WARRANTY is provided.
# See "LICENSE_BSD2.txt" for details.
#
# @TAG(DATA61_BSD)
#

#
# make_cpio --- create a newc-style cpio(5) archive without metadata
#
# previously, in an attempt to create a ``reproducible'' cpio(5) archive
# (i.e., without any especially-variable metadata), and as described in
# `cmake-tool/helpers/cpio.cmake':
#
# > Try and generate reproducible cpio meta-data as we do this:
# > - touch -d @0 file sets the modified time to 0
# > - --owner=root:root sets user and group values to 0:0
# > - --reproducible creates reproducible archives with consistent
# > inodes and device numbering
#
# that is, for every file to be archived: a copy was made, to archive to
# throw away its partially- or fully-qualified path name; a GNU
# extension to touch(1) threw its timestamps away, and, via a GNU
# extension to cpio(1), its owner, group, device, and inode number were
# thrown away.
#
# there must be a better way!
#
# % make_cpio archive.cpio ../kernel.elf ../kernel.dtb
#
# (I wrote this in C first, and then was defeated by the cmake build
# system in my attempt to add it to the build. this is a very literal
# translation of that C into Python, which I don't know very well.)
#
# 2020-09-03 Jashank Jeremy <[email protected]>
#

import ctypes
import ctypes.util
from ctypes import c_char_p, c_int, c_int64, c_longlong, c_size_t, c_ssize_t, c_uint, c_void_p, c_wchar_p
import os
import stat
import sys
from typing import Any, List, NoReturn, Optional

# constants from <sysexits.h>:
EX_USAGE : int = 64
EX_SOFTWARE : int = 70

# constants from <archive.h>
ARCHIVE_EOF = 1 # Found end of archive.
ARCHIVE_OK = 0 # Operation was successful.
ARCHIVE_RETRY = -10 # Retry might succeed.
ARCHIVE_WARN = -20 # Partial success.
ARCHIVE_FAILED = -25 # Current operation cannot complete.
ARCHIVE_FATAL = -30 # No more operations are possible.

def main (args: List[str]) -> int:
if len(args) < 2:
errx(EX_USAGE, "usage: make_cpio archive-file file...")
argv0, archive_file, *files = args

A = load_libarchive()

ark = A.archive_write_new()
if ark is None:
err (EX_SOFTWARE, "couldn't write archive object")

if A.archive_write_set_format_cpio_newc(ark) != ARCHIVE_OK or \
A.archive_write_open_filename_w(ark, archive_file) != ARCHIVE_OK:
warnx(f'{archive_file}: {A.archive_error_string(ark)}')
A.archive_write_fail(ark)
A.archive_write_free(ark)
return -1

for i, file in enumerate(files):
basename_idx = file.rfind('/')
if basename_idx == -1 or file[basename_idx:] == '/':
warnx(f'skipping {file}: nonsense filename')
continue

basename = file[basename_idx + 1:]
entry = A.archive_entry_new()
if entry is None:
warnx(f"skipping {file}: couldn't make archive entry object")
continue

fd = os.open(file, os.O_RDONLY)
if fd == -1:
warnx(f"skipping {file}: couldn't open")
A.archive_entry_free(entry)
continue

sb = os.fstat(fd)
# what if it failed, Python?

if not stat.S_ISREG(sb.st_mode):
warnx(f'skipping {file}: not a regular file')
A.archive_entry_free(entry)
os.close(fd)
continue

#
# Surprise! `os.fstat' returns a `stat_result', which is some
# weirdo class that doesn't derive from `ctypes.Structure' ---
# which it *should* --- and therefore you cannot pass it around
# as if it were a `struct stat *'.
#
# This means we can't just use `archive_entry_copy_stat(3)',
# because we cannot pass it the results of the `fstat(2)' we
# just did. Arrrrgh!
#
# A.archive_entry_copy_stat (entry, &sbuf);

A.archive_entry_set_size (entry, sb.st_size);
A.archive_entry_set_mode (entry, sb.st_mode);
A.archive_entry_update_pathname_utf8 (entry, c_char_p(basename.encode('utf8')));
A.archive_entry_set_uid (entry, 0);
A.archive_entry_set_gid (entry, 0);
A.archive_entry_unset_ctime (entry);
A.archive_entry_unset_birthtime (entry);
A.archive_entry_unset_mtime (entry);
A.archive_entry_unset_atime (entry);
A.archive_entry_set_dev (entry, 0);
A.archive_entry_set_ino64 (entry, 0);

if A.archive_write_header (ark, entry) != ARCHIVE_OK:
warnx(f"abandoning {file}: couldn't write header: {A.archive_error_string(ark)}")
A.archive_entry_free(entry)
os.close(fd)
continue

bufsiz = 4096
while True:
buf = os.read(fd, bufsiz)
if not buf or len(buf) == 0: break
assert len(buf) > 0
A.archive_write_data(ark, buf, len(buf))

if A.archive_write_finish_entry(ark) != ARCHIVE_OK:
warnx(f"finishing '{file}': {A.archive_error_string(ark)}");
os.close(fd)
A.archive_entry_free(entry)

A.archive_write_close(ark)
A.archive_write_free(ark)
return 0


def load_libarchive():
libarchive_path \
= os.environ.get('LIBARCHIVE') \
or ctypes.util.find_library('archive')
if libarchive_path is None:
errx(EX_SOFTWARE, "cannot find libarchive")
libarchive = ctypes.cdll.LoadLibrary(libarchive_path)
declare_libarchive_types(libarchive)
return libarchive

#
# the python binding doesn't expose parts of the `libarchive' api.
# because why would you ever want to write python.
#

c_archive_p = c_void_p
c_archive_entry_p = c_void_p

def declare_libarchive_types(a: ctypes.CDLL):
def typesig (a: ctypes.CDLL, f: str, argty: List[Any], retty: Any):
fn = getattr(a, 'archive_' + f)
fn.argtypes = argty
fn.restype = retty
return fn

typesig(a, 'write_new', [], c_archive_p)
typesig(a, 'write_set_format_cpio_newc', [c_archive_p], c_int)
typesig(a, 'write_open_filename_w', [c_archive_p, c_wchar_p], c_int)
typesig(a, 'entry_new', [], c_archive_entry_p)
typesig(a, 'entry_free', [], c_archive_entry_p)
typesig(a, 'entry_set_size', [c_archive_entry_p, c_longlong], None)
typesig(a, 'entry_set_mode', [c_archive_entry_p, c_int], None)
typesig(a, 'entry_update_pathname_utf8', [c_archive_entry_p, c_char_p], None)
typesig(a, 'entry_set_uid', [c_archive_entry_p, c_longlong], None)
typesig(a, 'entry_set_gid', [c_archive_entry_p, c_longlong], None)
typesig(a, 'entry_unset_ctime', [c_archive_entry_p], None)
typesig(a, 'entry_unset_birthtime', [c_archive_entry_p], None)
typesig(a, 'entry_unset_mtime', [c_archive_entry_p], None)
typesig(a, 'entry_unset_atime', [c_archive_entry_p], None)
typesig(a, 'entry_set_dev', [c_archive_entry_p, c_uint], None)
typesig(a, 'entry_set_ino64', [c_archive_entry_p, c_int64], None)
typesig(a, 'write_header', [c_archive_p, c_archive_entry_p], c_int)
typesig(a, 'write_data', [c_archive_p, c_void_p, c_size_t], c_ssize_t)
typesig(a, 'write_finish_entry', [c_archive_p], c_int)
typesig(a, 'entry_free', [c_archive_entry_p], None)
typesig(a, 'error_string', [c_archive_p], c_char_p)
typesig(a, 'write_close', [c_archive_p], c_int)
typesig(a, 'write_fail', [c_archive_p], None)
typesig(a, 'write_free', [c_archive_p], None)


########################################################################

def warnx (message: str) -> None:
sys.stderr.write(message + "\n")
def warn (message: str) -> None:
warnx(f'{message}: <errno>')
def errx (code: int, message: str) -> NoReturn:
warnx(message); sys.exit(code)
def err (code: int, message: str) -> NoReturn:
warn(message); sys.exit(code)

if __name__ == "__main__":
sys.exit(main(sys.argv))