Skip to content

Commit

Permalink
fix schema bugs and enable cloud scan.
Browse files Browse the repository at this point in the history
  • Loading branch information
ColinLeeo committed Oct 15, 2024
1 parent 4627878 commit 2945e34
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 127 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/cloud_code_scan.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: Alipay Cloud Devops Codescan
on:
pull_request_target:

jobs:
deployment:
runs-on: ubuntu-latest
steps:
- name: codeScan
if: ${{ github.repository == 'TuGraph-family/tugraph-db' }}
uses: TuGraph-family/alipay-cloud-devops-codescan@main
with:
parent_uid: ${{ secrets.ALI_PID }}
private_key: ${{ secrets.ALI_PK }}
11 changes: 5 additions & 6 deletions deps/geax-front-end/cmake/Modules/FindAntlr4.cmake
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#if ("$ENV{JAVA_HOME}" STREQUAL "")
# set(Java_ROOT ${GEAX_THIRD_PARTY_DIR}/jdk-11.0.2)
#else()
# set(Java_ROOT "$ENV{JAVA_HOME}")
#endif()
set(Java_ROOT /usr/lib/jvm/java-11-openjdk-11.0.13.0.8-1.el8_4.x86_64)
if ("$ENV{JAVA_HOME}" STREQUAL "")
set(Java_ROOT ${GEAX_THIRD_PARTY_DIR}/jdk-11.0.2)
else()
set(Java_ROOT "$ENV{JAVA_HOME}")
endif()
set(Java_JAVA_EXECUTABLE ${Java_ROOT}/bin/java)
find_package(Java QUIET COMPONENTS Runtime)

Expand Down
68 changes: 68 additions & 0 deletions src/core/field_extractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,74 @@

namespace lgraph {
namespace _detail {

void FieldExtractor::_SetFixedSizeValueRaw(Value& record, const Value& data) const {
// "Cannot call SetField(Value&, const T&) on a variable length field";
FMA_DBG_ASSERT(!is_vfield_);
// "Type size mismatch"
FMA_DBG_CHECK_EQ(data.Size(), field_data_helper::FieldTypeSize(def_.type));
FMA_DBG_CHECK_EQ(data.Size(), GetDataSize(record));
// copy the buffer so we don't accidentally overwrite memory
char* ptr = (char*)record.Data() + GetFieldOffset(record, def_.id);
memcpy(ptr, data.Data(), data.Size());
}

// set field value to null
void FieldExtractor::SetIsNull(const Value& record, const bool is_null) const {
if (!def_.optional) {
if (is_null) throw FieldCannotBeSetNullException(Name());
return;
}
// set the Kth bit from NullArray
char* arr = GetNullArray(record);
if (is_null) {
arr[def_.id / 8] |= (0x1 << (def_.id % 8));
} else {
arr[def_.id / 8] &= ~(0x1 << (def_.id % 8));
}
}

size_t FieldExtractor::GetDataSize(const Value& record) const {
if (is_vfield_) {
DataOffset var_offset = ::lgraph::_detail::UnalignedGet<DataOffset>(
record.Data() + GetFieldOffset(record, def_.id));
// The length is stored at the beginning of the variable-length field data area.
return ::lgraph::_detail::UnalignedGet<DataOffset>(record.Data() + var_offset);
} else {
return GetFieldOffset(record, def_.id + 1) - GetFieldOffset(record, def_.id);
}
}

FieldId FieldExtractor::GetRecordCount(const Value& record) const {
return ::lgraph::_detail::UnalignedGet<FieldId>(record.Data() + count_offset_);
}

/** Retrieve the starting position of the Field data for the given ID.
* Note that both fixed-length and variable-length data are not distinguished here.
*/
size_t FieldExtractor::GetFieldOffset(const Value& record, const FieldId id) const {
const uint16_t count = GetRecordCount(record);
if (0 == id) {
// The starting position of Field0 is at the end of the offset section.
return nullarray_offset_ + (count + 7) / 8 + count * sizeof(DataOffset);
}

size_t offset = 0;
offset = nullarray_offset_ + (count + 7) / 8 + (id - 1) * sizeof(DataOffset);
return ::lgraph::_detail::UnalignedGet<DataOffset>(record.Data() + offset);
}

size_t FieldExtractor::GetOffsetPosition(const Value& record, const FieldId id) const {
const FieldId count = GetRecordCount(record);
if (0 == id) {
return 0;
}
return nullarray_offset_ + (count + 7) / 8 + (id - 1) * sizeof(DataOffset);
}
void* FieldExtractor::GetFieldPointer(const Value& record) const {
return (char*)record.Data() + GetFieldOffset(record, def_.id);
}

/**
* Print the string representation of the field. For digital types, it prints
* it into ASCII string; for NBytes and String, it just copies the content of
Expand Down
84 changes: 17 additions & 67 deletions src/core/field_extractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ class FieldExtractor {

void SetLabelInRecord(const bool label_in_record) {
label_in_record_ = label_in_record;
count_offset_ = sizeof(VersionId) + label_in_record ? sizeof(LabelId) : 0;
count_offset_ = sizeof(VersionId) + (label_in_record ? sizeof(LabelId) : 0);
nullarray_offset_ = count_offset_ + sizeof(FieldId);
}

Expand Down Expand Up @@ -353,43 +353,19 @@ class FieldExtractor {
return Value(decoded);
}

template <FieldType FT>
void _ParseStringAndSet(Value& record, const std::string& data) const;

void SetVariableOffset(Value& record, FieldId id, DataOffset offset) const {
size_t off = GetFieldOffset(record, id);
::lgraph::_detail::UnalignedSet<DataOffset>(record.Data() + off, offset);
}

void _SetFixedSizeValueRaw(Value& record, const Value& data) const {
// "Cannot call SetField(Value&, const T&) on a variable length field";
FMA_DBG_ASSERT(!is_vfield_);
// "Type size mismatch"
FMA_DBG_CHECK_EQ(data.Size(), field_data_helper::FieldTypeSize(def_.type));
FMA_DBG_CHECK_EQ(data.Size(), GetDataSize(record));
// copy the buffer so we don't accidentally overwrite memory
char* ptr = (char*)record.Data() + GetFieldOffset(record, def_.id);
memcpy(ptr, data.Data(), data.Size());
}
void _SetFixedSizeValueRaw(Value& record, const Value& data) const;

// set field value to null
void SetIsNull(const Value& record, const bool is_null) const {
if (!def_.optional) {
if (is_null) throw FieldCannotBeSetNullException(Name());
return;
}
// set the Kth bit from NullArray
char* arr = GetNullArray(record);
if (is_null) {
arr[def_.id / 8] |= (0x1 << (def_.id % 8));
} else {
arr[def_.id / 8] &= ~(0x1 << (def_.id % 8));
}
}
void SetIsNull(const Value& record, const bool is_null) const;

/**
* Extracts field data from the record to the buffer pointed to by data. This
* is for internal use only, the size MUST match the data size defined in schema.
* is for internal use only, the size MUST match the data size in record.
*
* \param record The record.
* \param [in,out] data If non-null, the data.
Expand All @@ -399,54 +375,28 @@ class FieldExtractor {
*/
void GetCopyRaw(const Value& record, void* data, size_t size) const {
size_t off = GetFieldOffset(record, def_.id);
FMA_DBG_ASSERT(off + size <= record.Size());
memcpy(data, record.Data() + off, size);
}

char* GetNullArray(const Value& record) const { return record.Data() + nullarray_offset_; }

size_t GetDataSize(const Value& record) const {
if (is_vfield_) {
DataOffset var_offset = ::lgraph::_detail::UnalignedGet<DataOffset>(
record.Data() + GetFieldOffset(record, def_.id));
DataOffset var_data_offset =
::lgraph::_detail::UnalignedGet<DataOffset>(record.Data() + var_offset);
// The length is stored at the beginning of the variable-length field data area.
return ::lgraph::_detail::UnalignedGet<DataOffset>(record.Data() + var_data_offset);
off = ::lgraph::_detail::UnalignedGet<DataOffset>(record.Data() + off);
FMA_DBG_ASSERT(off + size + sizeof(DataOffset) <= record.Size());
memcpy(data, record.Data() + off + sizeof(DataOffset), size);
} else {
return GetFieldOffset(record, def_.id + 1) - GetFieldOffset(record, def_.id);
FMA_DBG_ASSERT(off + size <= record.Size());
memcpy(data, record.Data() + off, size);
}
}

FieldId GetRecordCount(const Value& record) const {
return ::lgraph::_detail::UnalignedGet<FieldId>(record.Data() + count_offset_);
}
char* GetNullArray(const Value& record) const { return record.Data() + nullarray_offset_; }

size_t GetDataSize(const Value& record) const;

FieldId GetRecordCount(const Value& record) const;

/** Retrieve the starting position of the Field data for the given ID.
* Note that both fixed-length and variable-length data are not distinguished here.
*/
size_t GetFieldOffset(const Value& record, const FieldId id) const {
const uint16_t count = GetRecordCount(record);
if (0 == id) {
// The starting position of Field0 is at the end of the offset section.
return nullarray_offset_ + (count + 7) / 8 + count * sizeof(DataOffset);
}

size_t offset = 0;
offset = nullarray_offset_ + (count + 7) / 8 + (id - 1) * sizeof(DataOffset);
return ::lgraph::_detail::UnalignedGet<DataOffset>(record.Data() + offset);
}

size_t GetOffsetPosition(const Value& record, const FieldId id) const {
const FieldId count = GetRecordCount(record);
if (0 == id) {
return 0;
}
return nullarray_offset_ + (count + 7) / 8 + (id - 1) * sizeof(DataOffset);
}
void* GetFieldPointer(const Value& record) const {
return (char*)record.Data() + GetFieldOffset(record, def_.id);
}
size_t GetFieldOffset(const Value& record, const FieldId id) const;
size_t GetOffsetPosition(const Value& record, const FieldId id) const;
void* GetFieldPointer(const Value& record) const;
};

} // namespace _detail
Expand Down
30 changes: 21 additions & 9 deletions src/core/schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -737,14 +737,17 @@ void Schema::_SetVariableLengthValue(Value& record, const Value& data,
record.Resize(record.Size());

// move data to the correct position
int32_t diff = data.Size() + sizeof(uint32_t) - fsize;
int32_t diff = data.Size() - fsize;
if (diff > 0) {
record.Resize(record.Size() + diff);
memmove(rptr + variable_offset + sizeof(data), rptr + variable_offset + fsize,
record.Size() - (variable_offset + sizeof(data)));
rptr = (char*)record.Data();
memmove(rptr + variable_offset + sizeof(DataOffset) + data.Size(),
rptr + variable_offset + sizeof(DataOffset) + fsize,
record.Size() - (variable_offset + sizeof(DataOffset) + data.Size()));
} else {
memmove(rptr + variable_offset + sizeof(data), rptr + variable_offset + fsize,
record.Size() - (variable_offset + fsize));
memmove(rptr + variable_offset + sizeof(DataOffset) + data.Size(),
rptr + variable_offset + sizeof(DataOffset) + fsize,
record.Size() - (variable_offset + sizeof(DataOffset) + fsize));
record.Resize(record.Size() + diff);
}

Expand All @@ -758,7 +761,7 @@ void Schema::_SetVariableLengthValue(Value& record, const Value& data,
// update offset of other veriable fields
size_t count = extractor->GetRecordCount(record);
// adjust offset of other fields
for (size_t i = extractor->GetFieldId(); i < count; i++) {
for (size_t i = extractor->GetFieldId() + 1; i < count; i++) {
if (fields_[i].IsFixedType()) continue;
size_t offset = extractor->GetFieldOffset(record, i);
size_t var_offset = ::lgraph::_detail::UnalignedGet<DataOffset>(rptr + offset);
Expand All @@ -783,7 +786,6 @@ void Schema::_ParseStringAndSet(Value& record, const std::string& data,
typedef typename field_data_helper::FieldType2StorageType<FT>::type ST;
CT s{};
size_t tmp = fma_common::TextParserUtils::ParseT<CT>(data.data(), data.data() + data.size(), s);
// error maybe there
if (_F_UNLIKELY(tmp != data.size())) throw ParseStringException(extractor->Name(), data, FT);
return SetFixedSizeValue(record, static_cast<ST>(s), extractor);
}
Expand Down Expand Up @@ -1038,12 +1040,11 @@ void Schema::SetSchema(bool is_vertex, size_t n_fields, const FieldSpec* fields,
const EdgeConstraints& edge_constraints) {
lgraph::CheckValidFieldNum(n_fields);
fields_.clear();
blob_fields_.clear();
name_to_idx_.clear();
fields_.reserve(n_fields);
for (size_t i = 0; i < n_fields; i++) {
fields_.emplace_back(fields[i]);
fields_[i].SetLabelInRecord(label_in_record_);
name_to_idx_[fields[i].name] = fields[i].id;
}
std::sort(fields_.begin(), fields_.end(),
[](const _detail::FieldExtractor& a, const _detail::FieldExtractor& b) {
Expand All @@ -1055,6 +1056,17 @@ void Schema::SetSchema(bool is_vertex, size_t n_fields, const FieldSpec* fields,
throw FieldIdConflictException(fields_[i].Name(), fields_[i - 1].Name());
}
}
for (auto& f : fields_) {
if (f.Type() == FieldType::NUL) throw FieldCannotBeNullTypeException(f.Name());
if (_F_UNLIKELY(name_to_idx_.find(f.Name()) != name_to_idx_.end()))
throw FieldAlreadyExistsException(f.Name());
name_to_idx_[f.Name()] = f.GetFieldId();
if (f.Type() == FieldType::BLOB) {
blob_fields_.push_back(f.GetFieldId());
}
f.SetLabelInRecord(label_in_record_);
}

is_vertex_ = is_vertex;
primary_field_ = primary;
temporal_field_ = temporal;
Expand Down
41 changes: 2 additions & 39 deletions src/core/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -626,45 +626,8 @@ class Schema {
s = BinaryRead(buf, detach_property_);
if (!s) return 0;
bytes_read += s;
FieldId pro_count = 0;
fields_.reserve(fds.size());
name_to_idx_.clear();
indexed_fields_.clear();
fulltext_fields_.clear();
bool found_primary = false;
for (const auto& f : fds) {
fields_[f.id] = _detail::FieldExtractor(f);
fields_[f.id].SetLabelInRecord(label_in_record_);
if (f.id >= pro_count) {
pro_count = f.id;
}
if (_F_UNLIKELY(name_to_idx_.find(f.name) != name_to_idx_.end())) {
throw FieldAlreadyExistsException(f.name);
}
name_to_idx_[f.name] = f.id;
if (fields_[f.id].GetVertexIndex() || fields_[f.id].GetEdgeIndex()) {
indexed_fields_.emplace_hint(indexed_fields_.end(), f.id);
if (f.name == primary_field_) {
FMA_ASSERT(!found_primary);
found_primary = true;
}
}
if (fields_[f.id].FullTextIndexed()) {
fulltext_fields_.emplace(f.id);
}
}

if (is_vertex_ && !indexed_fields_.empty()) {
FMA_ASSERT(found_primary);
}

if (pro_count != fds.size() - 1) {
std::string err_msg =
FMA_FMT("Schema fields deserialize error, fields num: {}, max id: {}.",
_detail::MAX_GRAPH_SIZE, fds.size(), pro_count);
throw std::runtime_error(err_msg);
}

SetSchema(is_vertex_, fds, primary_field_, temporal_field_, temporal_order_,
edge_constraints_);
return bytes_read;
}

Expand Down
14 changes: 8 additions & 6 deletions test/test_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ TEST_F(TestSchema, GetFieldId) {

TEST_F(TestSchema, DumpRecord) {
Value v_old("name");

Value v_new("name1");
Schema schema(false);
Schema schema_1(true);
Expand Down Expand Up @@ -194,12 +195,13 @@ TEST_F(TestSchema, DumpRecord) {
std::vector<std::string> value{"peter", "101", "65.25", "49", "fifth avenue"};
Value record = schema.CreateRecord(fid.size(), fid.data(), value.data());
// UT_LOG() << "record: " << schema.DumpRecord(record);
schema.GetFieldId("float");
schema.GetFieldExtractor("name");
schema.GetFieldExtractor("uid");
schema.GetFieldExtractor("weight");
schema.GetFieldExtractor("age");
schema.GetFieldExtractor("addr");

UT_EXPECT_EQ(schema.GetFieldId("float"), 5);
UT_EXPECT_EQ(schema.GetFieldExtractor("name")->FieldToString(record), "peter");
UT_EXPECT_EQ(schema.GetFieldExtractor("uid")->FieldToString(record), "101");
UT_EXPECT_EQ(schema.GetFieldExtractor("weight")->FieldToString(record), "6.525e1");
UT_EXPECT_EQ(schema.GetFieldExtractor("age")->FieldToString(record), "49");
UT_EXPECT_EQ(schema.GetFieldExtractor("addr")->FieldToString(record), "fifth avenue");
UT_EXPECT_THROW_CODE(schema.GetFieldExtractor("hash"), FieldNotFound);
UT_EXPECT_THROW_CODE(schema.GetFieldExtractor(1024), FieldNotFound);
const _detail::FieldExtractor fe_temp = *(schema.GetFieldExtractor("name"));
Expand Down

0 comments on commit 2945e34

Please sign in to comment.