Skip to content

Commit

Permalink
refactor: better logging and checks for initial ndjson file checks in…
Browse files Browse the repository at this point in the history
… preprocessing (#490)
  • Loading branch information
Taepper authored Jun 18, 2024
1 parent ba97f0d commit d10a2f7
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/silo/preprocessing/metadata_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,13 @@ bool MetadataInfo::isNdjsonFileEmpty(const std::filesystem::path& ndjson_file) {
duckdb::DuckDB duck_db(nullptr);
duckdb::Connection connection(duck_db);

auto result = connection.Query(fmt::format(
const std::string query = fmt::format(
"SELECT COUNT(*) "
"FROM (SELECT * FROM read_json_auto(\"{}\") LIMIT 1);",
ndjson_file.string()
));
);
SPDLOG_DEBUG("ndjson emptiness-check: {}", query);
auto result = connection.Query(query);

auto row_count_value = result->GetValue<int64_t>(0, 0);
const int64_t row_count = duckdb::BigIntValue::Get(row_count_value);
Expand Down
9 changes: 9 additions & 0 deletions src/silo/preprocessing/preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,21 @@ void Preprocessor::buildTablesFromNdjsonInput(const std::filesystem::path& file_
boost::join(MetadataInfo::getMetadataSQLTypes(database_config), ",")
));

SPDLOG_DEBUG("build - checking whether the file '{}' exists: ", file_name.string());
if (!std::filesystem::exists(file_name)) {
throw silo::preprocessing::PreprocessingException(
fmt::format("The specified input file {} does not exist.", file_name.string())
);
}

SPDLOG_DEBUG("build - checking whether the file '{}' is not a directory: ", file_name.string());
if (std::filesystem::is_directory(file_name)) {
throw silo::preprocessing::PreprocessingException(
fmt::format("The specified input file {} is a directory.", file_name.string())
);
}

SPDLOG_DEBUG("build - checking whether the file '{}' is empty: ", file_name.string());
if (MetadataInfo::isNdjsonFileEmpty(file_name)) {
SPDLOG_WARN(
"The specified input file {} is empty. Ignoring its content.", file_name.string()
Expand Down

0 comments on commit d10a2f7

Please sign in to comment.