diff --git a/.msvc/lgraph_core/lgraph_core.vcxproj b/.msvc/lgraph_core/lgraph_core.vcxproj index 1ba212759b..b843b70418 100644 --- a/.msvc/lgraph_core/lgraph_core.vcxproj +++ b/.msvc/lgraph_core/lgraph_core.vcxproj @@ -25,6 +25,10 @@ + + + + diff --git a/.msvc/lgraph_core/lgraph_core.vcxproj.filters b/.msvc/lgraph_core/lgraph_core.vcxproj.filters index 600c55863f..1d317bc410 100644 --- a/.msvc/lgraph_core/lgraph_core.vcxproj.filters +++ b/.msvc/lgraph_core/lgraph_core.vcxproj.filters @@ -114,6 +114,10 @@ Source Files + + + Source Files + Source Files diff --git a/include/lgraph/lgraph_types.h b/include/lgraph/lgraph_types.h index aa9c50e33d..482e55fecb 100644 --- a/include/lgraph/lgraph_types.h +++ b/include/lgraph/lgraph_types.h @@ -1324,8 +1324,16 @@ struct FieldSpec { inited_value(true), default_value(dv), set_default_value(true) {} - // explicit FieldSpecV2(const FieldSpec& spec) - // : name(spec.name), type(spec.type), optional(spec.optional), deleted(false), id(0) {} + FieldSpec(const FieldSpec& spec) + : name(spec.name), + type(spec.type), + optional(spec.optional), + deleted(false), + id(0), + init_value(spec.init_value), + inited_value(spec.inited_value), + default_value(spec.default_value), + set_default_value(spec.set_default_value) {} inline bool operator==(const FieldSpec& rhs) const { return name == rhs.name && type == rhs.type && optional == rhs.optional && diff --git a/src/BuildLGraphApi.cmake b/src/BuildLGraphApi.cmake index ac813cb97c..5ecce9ea9e 100644 --- a/src/BuildLGraphApi.cmake +++ b/src/BuildLGraphApi.cmake @@ -27,6 +27,7 @@ set(LGRAPH_CORE_SRC core/audit_logger.cpp core/data_type.cpp core/edge_index.cpp + core/field_extractor_base.cpp core/field_extractor.cpp core/field_extractor_v2.cpp core/full_text_index.cpp diff --git a/src/BuildLGraphApiForJNI.cmake b/src/BuildLGraphApiForJNI.cmake index ce5ba046b8..154c902db5 100644 --- a/src/BuildLGraphApiForJNI.cmake +++ b/src/BuildLGraphApiForJNI.cmake @@ -31,6 +31,7 @@ set(LGRAPH_CORE_SRC core/audit_logger.cpp core/data_type.cpp core/edge_index.cpp + core/field_extractor_base.cpp core/field_extractor.cpp core/field_extractor_v2.cpp core/full_text_index.cpp diff --git a/src/core/field_extractor.cpp b/src/core/field_extractor.cpp index bfc4096f87..4052a847a5 100644 --- a/src/core/field_extractor.cpp +++ b/src/core/field_extractor.cpp @@ -27,6 +27,32 @@ namespace _detail { * \return ErrorCode::OK if succeeds * FIELD_PARSE_FAILED. */ + +bool FieldExtractor::GetIsNull(const Value& record) const { + if (!IsOptional()) { + return false; + } else { + // get the Kth bit from NullArray + char* arr = GetNullArray(record); + return arr[null_bit_off_ / 8] & (0x1 << (null_bit_off_ % 8)); + } +} + +// set field value to null +void FieldExtractor::SetIsNull(const Value& record, bool is_null) const { + if (!IsOptional()) { + if (is_null) throw FieldCannotBeSetNullException(Name()); + return; + } + // set the Kth bit from NullArray + char* arr = GetNullArray(record); + if (is_null) { + arr[null_bit_off_ / 8] |= (0x1 << (null_bit_off_ % 8)); + } else { + arr[null_bit_off_ / 8] &= ~(0x1 << (null_bit_off_ % 8)); + } +} + template void FieldExtractor::_ParseStringAndSet(Value& record, const std::string& data) const { typedef typename field_data_helper::FieldType2CType::type CT; @@ -50,7 +76,7 @@ void FieldExtractor::_ParseStringAndSet(Value& record, // check whether the point data is valid; if (!::lgraph_api::TryDecodeEWKB(data, ::lgraph_api::SpatialType::POINT)) throw ParseStringException(Name(), data, FieldType::POINT); - // FMA_DBG_CHECK_EQ(sizeof(data), field_data_helper::FieldTypeSize(def_.type)); + // FMA_DBG_CHECK_EQ(sizeof(data), field_data_helper::FieldTypeSize(Type())); size_t Size = record.Size(); record.Resize(Size); char* ptr = (char*)record.Data() + offset_.data_off; @@ -59,7 +85,7 @@ void FieldExtractor::_ParseStringAndSet(Value& record, template <> void FieldExtractor::_ParseStringAndSet(Value& record, - const std::string& data) const { + const std::string& data) const { // check whether the linestring data is valid; if (!::lgraph_api::TryDecodeEWKB(data, ::lgraph_api::SpatialType::LINESTRING)) throw ParseStringException(Name(), data, FieldType::LINESTRING); @@ -68,7 +94,7 @@ void FieldExtractor::_ParseStringAndSet(Value& record, template <> void FieldExtractor::_ParseStringAndSet(Value& record, - const std::string& data) const { + const std::string& data) const { if (!::lgraph_api::TryDecodeEWKB(data, ::lgraph_api::SpatialType::POLYGON)) throw ParseStringException(Name(), data, FieldType::POLYGON); return _SetVariableLengthValue(record, Value::ConstRef(data)); @@ -76,7 +102,7 @@ void FieldExtractor::_ParseStringAndSet(Value& record, template <> void FieldExtractor::_ParseStringAndSet(Value& record, - const std::string& data) const { + const std::string& data) const { ::lgraph_api::SpatialType s; // throw ParseStringException in this function; try { @@ -92,7 +118,7 @@ void FieldExtractor::_ParseStringAndSet(Value& record, template <> void FieldExtractor::_ParseStringAndSet(Value& record, - const std::string& data) const { + const std::string& data) const { std::vector vec; // check if there are only numbers and commas std::regex nonNumbersAndCommas("[^0-9,.]"); @@ -131,15 +157,15 @@ void FieldExtractor::_ParseStringAndSet(Value& record, * FIELD_PARSE_FAILED. */ void FieldExtractor::ParseAndSet(Value& record, const std::string& data) const { - if (data.empty() && (field_data_helper::IsFixedLengthFieldType(def_.type) - || def_.type == FieldType::LINESTRING || def_.type == FieldType::POLYGON - || def_.type == FieldType::SPATIAL || def_.type == FieldType::FLOAT_VECTOR)) { + if (data.empty() && + (IsFixedType() || Type() == FieldType::LINESTRING || Type() == FieldType::POLYGON || + Type() == FieldType::SPATIAL || Type() == FieldType::FLOAT_VECTOR)) { SetIsNull(record, true); return; } // empty string is treated as non-NULL SetIsNull(record, false); - switch (def_.type) { + switch (Type()) { case FieldType::BOOL: return _ParseStringAndSet(record, data); case FieldType::INT8: @@ -177,7 +203,7 @@ void FieldExtractor::ParseAndSet(Value& record, const std::string& data) const { case FieldType::NUL: LOG_ERROR() << "NUL FieldType"; } - LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(def_.type) << " not handled"; + LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(Type()) << " not handled"; } // parse data from FieldData and set field @@ -190,7 +216,7 @@ void FieldExtractor::ParseAndSet(Value& record, const FieldData& data) const { #define _SET_FIXED_TYPE_VALUE_FROM_FD(ft) \ do { \ - if (data.type == def_.type) { \ + if (data.type == Type()) { \ return SetFixedSizeValue(record, \ field_data_helper::GetStoredValue(data)); \ } else { \ @@ -201,7 +227,7 @@ void FieldExtractor::ParseAndSet(Value& record, const FieldData& data) const { } \ } while (0) - switch (def_.type) { + switch (Type()) { case FieldType::BOOL: _SET_FIXED_TYPE_VALUE_FROM_FD(BOOL); case FieldType::INT8: @@ -248,39 +274,39 @@ void FieldExtractor::ParseAndSet(Value& record, const FieldData& data) const { } case FieldType::LINESTRING: { - if (data.type != FieldType::LINESTRING && data.type != FieldType::STRING) - throw ParseFieldDataException(Name(), data, Type()); - if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, ::lgraph_api::SpatialType::LINESTRING)) + if (data.type != FieldType::LINESTRING && data.type != FieldType::STRING) + throw ParseFieldDataException(Name(), data, Type()); + if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, ::lgraph_api::SpatialType::LINESTRING)) throw ParseStringException(Name(), *data.data.buf, FieldType::LINESTRING); - return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf)); + return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf)); } case FieldType::POLYGON: { - if (data.type != FieldType::POLYGON && data.type != FieldType::STRING) - throw ParseFieldDataException(Name(), data, Type()); - if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, ::lgraph_api::SpatialType::POLYGON)) + if (data.type != FieldType::POLYGON && data.type != FieldType::STRING) + throw ParseFieldDataException(Name(), data, Type()); + if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, ::lgraph_api::SpatialType::POLYGON)) throw ParseStringException(Name(), *data.data.buf, FieldType::POLYGON); - return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf)); + return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf)); } case FieldType::SPATIAL: { - if (data.type != FieldType::SPATIAL && data.type != FieldType::STRING) - throw ParseFieldDataException(Name(), data, Type()); - ::lgraph_api::SpatialType s; + if (data.type != FieldType::SPATIAL && data.type != FieldType::STRING) + throw ParseFieldDataException(Name(), data, Type()); + ::lgraph_api::SpatialType s; - // throw ParseStringException in this function; - try { - s = ::lgraph_api::ExtractType(*data.data.buf); - } catch (...) { - throw ParseStringException(Name(), *data.data.buf, FieldType::SPATIAL); - } + // throw ParseStringException in this function; + try { + s = ::lgraph_api::ExtractType(*data.data.buf); + } catch (...) { + throw ParseStringException(Name(), *data.data.buf, FieldType::SPATIAL); + } - if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, s)) + if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, s)) throw ParseStringException(Name(), *data.data.buf, FieldType::SPATIAL); - return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf)); + return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf)); } case FieldType::FLOAT_VECTOR: { @@ -290,103 +316,8 @@ void FieldExtractor::ParseAndSet(Value& record, const FieldData& data) const { return _SetVariableLengthValue(record, Value::ConstRef(*data.data.vp)); } default: - LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(def_.type) - << " not handled"; - } -} - -/** - * Print the string representation of the field. For digital types, it prints - * it into ASCII string; for NBytes and String, it just copies the content of - * the field into the string. - * - * \param record The record. - * - * \return String representation of the field. - */ -std::string FieldExtractor::FieldToString(const Value& record) const { - if (GetIsNull(record)) return "\"null\""; - std::string ret; - -#define _COPY_FIELD_AND_RETURN_STR_(record, ft) \ - do { \ - typename field_data_helper::FieldType2StorageType::type d = 0; \ - typedef typename field_data_helper::FieldType2CType::type CT; \ - GetCopy(record, d); \ - return fma_common::StringFormatter::Format("{}", static_cast(d)); \ - } while (0) - - switch (def_.type) { - case FieldType::BOOL: - _COPY_FIELD_AND_RETURN_STR_(record, BOOL); - case FieldType::INT8: - _COPY_FIELD_AND_RETURN_STR_(record, INT8); - case FieldType::INT16: - _COPY_FIELD_AND_RETURN_STR_(record, INT16); - case FieldType::INT32: - _COPY_FIELD_AND_RETURN_STR_(record, INT32); - case FieldType::INT64: - _COPY_FIELD_AND_RETURN_STR_(record, INT64); - case FieldType::FLOAT: - _COPY_FIELD_AND_RETURN_STR_(record, FLOAT); - case FieldType::DOUBLE: - _COPY_FIELD_AND_RETURN_STR_(record, DOUBLE); - case FieldType::DATE: - { - int32_t i; - GetCopy(record, i); - return Date(i).ToString(); - } - case FieldType::DATETIME: - { - int64_t i; - GetCopy(record, i); - return DateTime(i).ToString(); - } - case FieldType::STRING: - { - std::string ret(GetDataSize(record), 0); - GetCopyRaw(record, &ret[0], ret.size()); - return ret; - } - case FieldType::BLOB: - { - // std::string ret(GetDataSize(record), 0); - // GetCopyRaw(record, &ret[0], ret.size()); - // return ::lgraph_api::base64::Encode(ret.substr(2)); - return fma_common::StringFormatter::Format("[BLOB]"); - } - case FieldType::POINT: - case FieldType::LINESTRING: - case FieldType::POLYGON: - case FieldType::SPATIAL: - { - std::string ret(GetDataSize(record), 0); - GetCopyRaw(record, &ret[0], ret.size()); - return ret; - } - case FieldType::FLOAT_VECTOR: - { - std::string vec_str; - for (size_t i = 0; i < record.AsType>().size(); i++) { - auto floatnum = record.AsType>().at(i); - if (record.AsType>().at(i) > 999999) { - vec_str += std::to_string(floatnum).substr(0, 7); - } else { - vec_str += std::to_string(floatnum).substr(0, 8); - } - vec_str += ','; - } - if (!vec_str.empty()) { - vec_str.pop_back(); - } - return vec_str; - } - case lgraph_api::NUL: - break; + LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(Type()) << " not handled"; } - LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(def_.type) << " not handled"; - return ""; } // sets variable length value to the field diff --git a/src/core/field_extractor.h b/src/core/field_extractor.h index 764e874e19..24b057a6fb 100644 --- a/src/core/field_extractor.h +++ b/src/core/field_extractor.h @@ -14,11 +14,11 @@ #pragma once +#include "core/field_extractor_base.h" + #include "core/blob_manager.h" #include "core/field_data_helper.h" #include "core/vertex_index.h" -#include "core/edge_index.h" -#include "core/schema_common.h" #include "core/vector_index.h" #include "core/vsag_hnsw.h" @@ -27,18 +27,10 @@ class Schema; namespace _detail { -#define ENABLE_IF_FIXED_FIELD(_TYPE_, _RT_) \ - template \ - typename std::enable_if< \ - std::is_integral<_TYPE_>::value || std::is_floating_point<_TYPE_>::value, _RT_>::type - /** A field extractor can be used to get/set a field in the record. */ -class FieldExtractor { +class FieldExtractor : public FieldExtractorBase { friend class lgraph::Schema; - // type information - FieldSpec def_; // layout - size_t field_id_ = 0; bool is_vfield_ = false; union { size_t data_off = 0; @@ -50,164 +42,54 @@ class FieldExtractor { } offset_; size_t nullable_array_off_ = 0; // offset of nullable array in record size_t null_bit_off_ = 0; - // index - std::unique_ptr vertex_index_; - std::unique_ptr edge_index_; - // fulltext index - bool fulltext_indexed_ = false; - // vector index - std::shared_ptr vector_index_; public: - FieldExtractor() : null_bit_off_(0), vertex_index_(nullptr), - edge_index_(nullptr), vector_index_(nullptr) {} - - ~FieldExtractor() {} + FieldExtractor() : FieldExtractorBase() {} - FieldExtractor(const FieldExtractor& rhs) { - def_ = rhs.def_; - field_id_ = rhs.field_id_; - is_vfield_ = rhs.is_vfield_; + FieldExtractor(const FieldExtractor& rhs) : FieldExtractorBase(rhs.GetFieldSpec()) { + is_vfield_ = !rhs.IsFixedType(); offset_ = rhs.offset_; nullable_array_off_ = rhs.nullable_array_off_; null_bit_off_ = rhs.null_bit_off_; - vertex_index_.reset(rhs.vertex_index_ ? new VertexIndex(*rhs.vertex_index_) : nullptr); - edge_index_.reset(rhs.edge_index_ ? new EdgeIndex(*rhs.edge_index_) : nullptr); - fulltext_indexed_ = rhs.fulltext_indexed_; - vector_index_ = rhs.vector_index_; } - FieldExtractor& operator=(const FieldExtractor& rhs) { - if (this == &rhs) return *this; - def_ = rhs.def_; - field_id_ = rhs.field_id_; - is_vfield_ = rhs.is_vfield_; + FieldExtractor(FieldExtractor&& rhs) noexcept : FieldExtractorBase(std::move(rhs)) { + is_vfield_ = !rhs.IsFixedType(); offset_ = rhs.offset_; null_bit_off_ = rhs.null_bit_off_; nullable_array_off_ = rhs.nullable_array_off_; - vertex_index_.reset(rhs.vertex_index_ ? new VertexIndex(*rhs.vertex_index_) : nullptr); - edge_index_.reset(rhs.edge_index_ ? new EdgeIndex(*rhs.edge_index_) : nullptr); - fulltext_indexed_ = rhs.fulltext_indexed_; - vector_index_ = rhs.vector_index_; - return *this; } - FieldExtractor(FieldExtractor&& rhs) noexcept { - def_ = std::move(rhs.def_); - field_id_ = rhs.field_id_; - is_vfield_ = rhs.is_vfield_; + FieldExtractor& operator=(const FieldExtractor& rhs) { + if (this == &rhs) return *this; + FieldExtractorBase::operator=(std::move(rhs)); + is_vfield_ = rhs.IsFixedType(); offset_ = rhs.offset_; null_bit_off_ = rhs.null_bit_off_; nullable_array_off_ = rhs.nullable_array_off_; - vertex_index_ = std::move(rhs.vertex_index_); - edge_index_ = std::move(rhs.edge_index_); - rhs.vertex_index_ = nullptr; - rhs.edge_index_ = nullptr; - fulltext_indexed_ = rhs.fulltext_indexed_; - vector_index_ = std::move(rhs.vector_index_); - rhs.vector_index_ = nullptr; + return *this; } FieldExtractor& operator=(FieldExtractor&& rhs) noexcept { if (this == &rhs) return *this; - def_ = std::move(rhs.def_); - field_id_ = rhs.field_id_; - is_vfield_ = rhs.is_vfield_; + FieldExtractorBase::operator=(std::move(rhs)); + is_vfield_ = rhs.IsFixedType(); offset_ = rhs.offset_; null_bit_off_ = rhs.null_bit_off_; nullable_array_off_ = rhs.nullable_array_off_; - vertex_index_ = std::move(rhs.vertex_index_); - edge_index_ = std::move(rhs.edge_index_); - fulltext_indexed_ = rhs.fulltext_indexed_; - vector_index_ = std::move(rhs.vector_index_); return *this; } + ~FieldExtractor() override = default; + // for test only - explicit FieldExtractor(const FieldSpec& d) noexcept : def_(d) { - is_vfield_ = !field_data_helper::IsFixedLengthFieldType(d.type); - vertex_index_ = nullptr; - edge_index_ = nullptr; - vector_index_ = nullptr; + explicit FieldExtractor(const FieldSpec& d) noexcept : FieldExtractorBase(d) { null_bit_off_ = 0; + is_vfield_ = !field_data_helper::IsFixedLengthFieldType(d.type); if (is_vfield_) SetVLayoutInfo(d.optional ? 1 : 0, 1, 0); } - const FieldSpec& GetFieldSpec() const { return def_; } - - bool GetIsNull(const Value& record) const { - if (!def_.optional) { - return false; - } else { - // get the Kth bit from NullArray - char* arr = GetNullArray(record); - return arr[null_bit_off_ / 8] & (0x1 << (null_bit_off_ % 8)); - } - } - - /** - * Extract a field from record into data of type T. T must be fixed-length - * type. - * - * \param record The record in which fields are stored. - * \param data Place where the extracted data will be stored. - * - * Assert fails if data is corrupted. - */ - ENABLE_IF_FIXED_FIELD(T, void) GetCopy(const Value& record, T& data) const { - FMA_DBG_ASSERT(field_data_helper::FieldTypeSize(def_.type) == sizeof(T)); - FMA_DBG_ASSERT(offset_.data_off + field_data_helper::FieldTypeSize(def_.type) <= - record.Size()); - memcpy(&data, (char*)record.Data() + offset_.data_off, sizeof(T)); - } - - /** - * Extracts a copy of field into the string. - * - * \param record The record. - * \param [in,out] data The result data. - * - * Assert fails if data is corrupted. - */ - void GetCopy(const Value& record, std::string& data) const { - FMA_DBG_ASSERT(Type() != FieldType::BLOB); - data.resize(GetDataSize(record)); - GetCopyRaw(record, &data[0], data.size()); - } - - /** - * Extracts field data from the record - * - * \param record The record. - * \param [in,out] data The result. - * - * Assert fails if data is corrupted. - */ - void GetCopy(const Value& record, Value& data) const { - data.Resize(GetDataSize(record)); - GetCopyRaw(record, data.Data(), data.Size()); - } - - // Gets a const reference of the field. - // Formatted data is returned for blob, which means [is_large_blob] [blob_data | blob_key] - Value GetConstRef(const Value& record) const { - if (GetIsNull(record)) return Value(); - return Value((char*)GetFieldPointer(record), GetDataSize(record)); - } - - // gets a const ref to the blob content - // get_blob_by_key is a function that accepts BlobKey and returns Value containing blob content - template - Value GetBlobConstRef(const Value& record, const GetBlobByKeyFunc& get_blob_by_key) const { - FMA_DBG_ASSERT(Type() == FieldType::BLOB); - if (GetIsNull(record)) return Value(); - Value v((char*)GetFieldPointer(record), GetDataSize(record)); - if (BlobManager::IsLargeBlob(v)) { - return get_blob_by_key(BlobManager::GetLargeBlobKey(v)); - } else { - return BlobManager::GetSmallBlobContent(v); - } - } + bool GetIsNull(const Value& record) const override; // parse a string as input and then set field in record // cannot be used for blobs since they need formatting @@ -226,7 +108,7 @@ class FieldExtractor { const StoreBlobAndGetKeyFunc& store_blob) const { FMA_DBG_ASSERT(Type() == FieldType::BLOB); bool is_null; - Value v = ParseBlob(data, is_null); + Value v = FieldExtractorBase::ParseBlob(data, is_null); SetIsNull(record, is_null); if (is_null) return; if (v.Size() <= _detail::MAX_IN_PLACE_BLOB_SIZE) { @@ -251,46 +133,7 @@ class FieldExtractor { } } - const std::string& Name() const { return def_.name; } - - FieldType Type() const { return def_.type; } - - size_t TypeSize() const { return field_data_helper::FieldTypeSize(def_.type); } - - size_t DataSize(const Value& record) const { return GetDataSize(record); } - - bool IsOptional() const { return def_.optional; } - - /** - * Print the string representation of the field. For digital types, it prints - * it into ASCII string; for NBytes and String, it just copies the content of - * the field into the string. - * - * \param record The record. - * - * \return String representation of the field. - */ - std::string FieldToString(const Value& record) const; - - VertexIndex* GetVertexIndex() const { return vertex_index_.get(); } - - EdgeIndex* GetEdgeIndex() const { return edge_index_.get(); } - - bool FullTextIndexed() const { return fulltext_indexed_; } - - VectorIndex* GetVectorIndex() const { return vector_index_.get(); } - - size_t GetFieldId() const { return field_id_; } - private: - void SetVertexIndex(VertexIndex* index) { vertex_index_.reset(index); } - - void SetEdgeIndex(EdgeIndex* edgeindex) { edge_index_.reset(edgeindex); } - - void SetVectorIndex(VectorIndex* vectorindex) { vector_index_.reset(vectorindex); } - - void SetFullTextIndex(bool fulltext_indexed) { fulltext_indexed_ = fulltext_indexed; } - void SetFixedLayoutInfo(size_t offset) { is_vfield_ = false; offset_.data_off = offset; @@ -307,43 +150,9 @@ class FieldExtractor { void SetNullableArrayOff(size_t offset) { nullable_array_off_ = offset; } - void SetFieldId(size_t n) { field_id_ = n; } - //----------------------- // record accessors - // get a const ref of raw blob data - inline Value ParseBlob(const FieldData& fd, bool& is_null) const { - if (fd.type == FieldType::NUL) { - is_null = true; - return Value(); - } - is_null = false; - if (fd.type == FieldType::BLOB) { - return Value::ConstRef(*fd.data.buf); - } - if (fd.type == FieldType::STRING) { - std::string decoded; - const std::string& s = *fd.data.buf; - if (!::lgraph_api::base64::TryDecode(s.data(), s.size(), decoded)) - throw ParseStringException(Name(), s, Type()); - return Value(decoded); - } else { - throw ParseIncompatibleTypeException(Name(), fd.type, FieldType::BLOB); - return Value(); - } - } - - inline Value ParseBlob(const std::string& str, bool& is_null) const { - // string input is always seen as non-NULL - is_null = false; - // decode str as base64 - std::string decoded; - if (!::lgraph_api::base64::TryDecode(str.data(), str.size(), decoded)) - throw ParseStringException(Name(), str, Type()); - return Value(decoded); - } - template void _ParseStringAndSet(Value& record, const std::string& data) const; @@ -373,7 +182,7 @@ class FieldExtractor { // "Cannot call SetField(Value&, const T&) on a variable length field"; FMA_DBG_ASSERT(!is_vfield_); // "Type size mismatch" - FMA_DBG_CHECK_EQ(sizeof(data), field_data_helper::FieldTypeSize(def_.type)); + FMA_DBG_CHECK_EQ(sizeof(data), TypeSize()); // copy the buffer so we don't accidentally overwrite memory record.Resize(record.Size()); char* ptr = (char*)record.Data() + offset_.data_off; @@ -384,26 +193,14 @@ class FieldExtractor { // "Cannot call SetField(Value&, const T&) on a variable length field"; FMA_DBG_ASSERT(!is_vfield_); // "Type size mismatch" - FMA_DBG_CHECK_EQ(data.Size(), field_data_helper::FieldTypeSize(def_.type)); + FMA_DBG_CHECK_EQ(data.Size(), TypeSize()); // copy the buffer so we don't accidentally overwrite memory char* ptr = (char*)record.Data() + offset_.data_off; memcpy(ptr, data.Data(), data.Size()); } // set field value to null - void SetIsNull(Value& record, bool is_null) const { - if (!def_.optional) { - if (is_null) throw FieldCannotBeSetNullException(Name()); - return; - } - // set the Kth bit from NullArray - char* arr = GetNullArray(record); - if (is_null) { - arr[null_bit_off_ / 8] |= (0x1 << (null_bit_off_ % 8)); - } else { - arr[null_bit_off_ / 8] &= ~(0x1 << (null_bit_off_ % 8)); - } - } + void SetIsNull(const Value& record, bool is_null) const override; /** * Extracts field data from the record to the buffer pointed to by data. This @@ -415,23 +212,25 @@ class FieldExtractor { * * Assert fails if data is corrupted. */ - void GetCopyRaw(const Value& record, void* data, size_t size) const { + void GetCopyRaw(const Value& record, void* data, size_t size) const override { size_t off = GetFieldOffset(record); FMA_DBG_ASSERT(off + size <= record.Size()); memcpy(data, record.Data() + off, size); } - char* GetNullArray(const Value& record) const { return record.Data() + nullable_array_off_; } + char* GetNullArray(const Value& record) const override { + return record.Data() + nullable_array_off_; + } - size_t GetDataSize(const Value& record) const { + size_t GetDataSize(const Value& record) const override { if (is_vfield_) { return GetNextOffset(record) - GetFieldOffset(record); } else { - return field_data_helper::FieldTypeSize(def_.type); + return TypeSize(); } } - size_t GetFieldOffset(const Value& record) const { + size_t GetFieldOffset(const Value& record) const override { if (is_vfield_) { size_t off = (offset_.idx == 0) @@ -453,11 +252,11 @@ class FieldExtractor { offset_.idx * sizeof(DataOffset)); return off; } else { - return offset_.data_off + field_data_helper::FieldTypeSize(def_.type); + return offset_.data_off + TypeSize(); } } - void* GetFieldPointer(const Value& record) const { + void* GetFieldPointer(const Value& record) const override { return (char*)record.Data() + GetFieldOffset(record); } }; diff --git a/src/core/field_extractor_base.cpp b/src/core/field_extractor_base.cpp new file mode 100644 index 0000000000..775ef6c662 --- /dev/null +++ b/src/core/field_extractor_base.cpp @@ -0,0 +1,124 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "core/field_extractor_base.h" + +namespace lgraph { + +namespace _detail { + +FieldExtractorBase::~FieldExtractorBase() = default; + +void FieldExtractorBase::GetCopy(const Value& record, std::string& data) const { + FMA_DBG_ASSERT(Type() != FieldType::BLOB); + data.resize(GetDataSize(record)); + GetCopyRaw(record, &data[0], data.size()); +} + +void FieldExtractorBase::GetCopy(const Value& record, Value& data) const { + data.Resize(GetDataSize(record)); + GetCopyRaw(record, data.Data(), data.Size()); +} + +Value FieldExtractorBase::GetConstRef(const Value& record) const { + if (GetIsNull(record)) return Value(); + return Value((char*)GetFieldPointer(record), GetDataSize(record)); +} + +std::string FieldExtractorBase::FieldToString(const Value& record) const { + if (GetIsNull(record)) return "\"null\""; + std::string ret; + +#define _COPY_FIELD_AND_RETURN_STR_(record, ft) \ + do { \ + typename field_data_helper::FieldType2StorageType::type d = 0; \ + typedef typename field_data_helper::FieldType2CType::type CT; \ + GetCopy(record, d); \ + return fma_common::StringFormatter::Format("{}", static_cast(d)); \ + } while (0) + + switch (def_.type) { + case FieldType::BOOL: + _COPY_FIELD_AND_RETURN_STR_(record, BOOL); + case FieldType::INT8: + _COPY_FIELD_AND_RETURN_STR_(record, INT8); + case FieldType::INT16: + _COPY_FIELD_AND_RETURN_STR_(record, INT16); + case FieldType::INT32: + _COPY_FIELD_AND_RETURN_STR_(record, INT32); + case FieldType::INT64: + _COPY_FIELD_AND_RETURN_STR_(record, INT64); + case FieldType::FLOAT: + _COPY_FIELD_AND_RETURN_STR_(record, FLOAT); + case FieldType::DOUBLE: + _COPY_FIELD_AND_RETURN_STR_(record, DOUBLE); + case FieldType::DATE: + { + int32_t i; + GetCopy(record, i); + return Date(i).ToString(); + } + case FieldType::DATETIME: + { + int64_t i; + GetCopy(record, i); + return DateTime(i).ToString(); + } + case FieldType::STRING: + { + std::string ret(GetDataSize(record), 0); + GetCopyRaw(record, &ret[0], ret.size()); + return ret; + } + case FieldType::BLOB: + { + // std::string ret(GetDataSize(record), 0); + // GetCopyRaw(record, &ret[0], ret.size()); + // return ::lgraph_api::base64::Encode(ret.substr(2)); + return fma_common::StringFormatter::Format("[BLOB]"); + } + case FieldType::POINT: + case FieldType::LINESTRING: + case FieldType::POLYGON: + case FieldType::SPATIAL: + { + std::string ret(GetDataSize(record), 0); + GetCopyRaw(record, &ret[0], ret.size()); + return ret; + } + case FieldType::FLOAT_VECTOR: + { + std::string vec_str; + for (size_t i = 0; i < record.AsType>().size(); i++) { + auto floatnum = record.AsType>().at(i); + if (record.AsType>().at(i) > 999999) { + vec_str += std::to_string(floatnum).substr(0, 7); + } else { + vec_str += std::to_string(floatnum).substr(0, 8); + } + vec_str += ','; + } + if (!vec_str.empty()) { + vec_str.pop_back(); + } + return vec_str; + } + case lgraph_api::NUL: + break; + } + LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(def_.type) << " not handled"; + return ""; +} + +} // namespace _detail +} // namespace lgraph diff --git a/src/core/field_extractor_base.h b/src/core/field_extractor_base.h new file mode 100644 index 0000000000..af17d98857 --- /dev/null +++ b/src/core/field_extractor_base.h @@ -0,0 +1,261 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#pragma once + +#include "core/blob_manager.h" +#include "core/field_data_helper.h" +#include "core/vertex_index.h" +#include "core/edge_index.h" +#include "core/schema_common.h" +#include "core/vector_index.h" +#include "core/vsag_hnsw.h" + +namespace lgraph { +class Schema; + +namespace _detail { + +#define ENABLE_IF_FIXED_FIELD(_TYPE_, _RT_) \ + template \ + typename std::enable_if< \ + std::is_integral<_TYPE_>::value || std::is_floating_point<_TYPE_>::value, _RT_>::type +class FieldExtractorBase { + friend class lgraph::Schema; + FieldSpec def_; + FieldId field_id_ = 0; + bool is_vfield_ = false; + std::unique_ptr vertex_index_; + std::unique_ptr edge_index_; + // fulltext index + bool fulltext_indexed_ = false; + // vector index + std::shared_ptr vector_index_; + + public: + FieldExtractorBase() : vertex_index_(nullptr), edge_index_(nullptr), vector_index_(nullptr) {} + virtual ~FieldExtractorBase(); + explicit FieldExtractorBase(const FieldSpec& def) { + def_ = def; + is_vfield_ = !field_data_helper::IsFixedLengthFieldType(def.type); + vertex_index_ = nullptr; + edge_index_ = nullptr; + vector_index_ = nullptr; + } + + FieldExtractorBase(FieldExtractorBase&& rhs) noexcept { + def_ = std::move(rhs.GetFieldSpec()); + is_vfield_ = rhs.is_vfield_; + fulltext_indexed_ = rhs.fulltext_indexed_; + vertex_index_ = std::move(rhs.vertex_index_); + edge_index_ = std::move(rhs.edge_index_); + vector_index_ = std::move(rhs.vector_index_); + rhs.vertex_index_ = nullptr; + rhs.edge_index_ = nullptr; + rhs.vertex_index_ = nullptr; + } + + FieldExtractorBase& operator=(FieldExtractorBase&& rhs) noexcept { + if (this == &rhs) return *this; + def_ = std::move(rhs.def_); + is_vfield_ = rhs.is_vfield_; + fulltext_indexed_ = rhs.fulltext_indexed_; + vertex_index_ = std::move(rhs.vertex_index_); + edge_index_ = std::move(rhs.edge_index_); + vector_index_ = std::move(rhs.vector_index_); + } + + FieldExtractorBase& operator=(const FieldExtractorBase& rhs) { + if (this == &rhs) return *this; + def_ = rhs.def_; + is_vfield_ = rhs.is_vfield_; + vertex_index_.reset(rhs.vertex_index_ ? new VertexIndex(*rhs.vertex_index_) : nullptr); + edge_index_.reset(rhs.edge_index_ ? new EdgeIndex(*rhs.edge_index_) : nullptr); + fulltext_indexed_ = rhs.fulltext_indexed_; + vector_index_ = rhs.vector_index_; + } + + // Get fieldExtractor. + const FieldSpec& GetFieldSpec() const { return def_; } + const std::string& Name() const { return def_.name; } + FieldType Type() const { return def_.type; } + size_t TypeSize() const { return field_data_helper::FieldTypeSize(def_.type); } + FieldData GetDefaultValue() const { return def_.default_value; } + FieldData GetInitedValue() const { return def_.init_value; } + bool HasDefaultValue() const { return def_.set_default_value; } + bool HasInitedValue() const { return def_.inited_value; } + bool IsOptional() const { return def_.optional; } + bool IsFixedType() const { return field_data_helper::IsFixedLengthFieldType(def_.type); } + bool IsDeleted() const { return def_.deleted; } + VertexIndex* GetVertexIndex() const { return vertex_index_.get(); } + EdgeIndex* GetEdgeIndex() const { return edge_index_.get(); } + bool FullTextIndexed() const { return fulltext_indexed_; } + VectorIndex* GetVectorIndex() const { return vector_index_.get(); } + FieldId GetFieldId() const { return def_.id; } + + // Set fieldExtractor. + + void SetFieldId(FieldId id) { def_.id = id; } + void SetVertexIndex(VertexIndex* index) { vertex_index_.reset(index); } + void SetEdgeIndex(EdgeIndex* edgeindex) { edge_index_.reset(edgeindex); } + void SetVectorIndex(VectorIndex* vectorindex) { vector_index_.reset(vectorindex); } + void SetFullTextIndex(bool fulltext_indexed) { fulltext_indexed_ = fulltext_indexed; } + void MarkDeleted() { + def_.deleted = true; + // free data space when marked deleted + def_.init_value.~FieldData(); + def_.default_value.~FieldData(); + def_.inited_value = false; + def_.set_default_value = false; + } + + void SetDefaultValue(const FieldData& data) { + def_.default_value = FieldData(data); + def_.set_default_value = true; + } + + void SetInitValue(const FieldData& data) { + def_.init_value = FieldData(data); + def_.inited_value = true; + } + + // record related. + + // Get + size_t DataSize(const Value& record) const { return GetDataSize(record); } + virtual bool GetIsNull(const Value& record) const = 0; + virtual size_t GetDataSize(const Value& record) const = 0; + virtual void* GetFieldPointer(const Value& record) const = 0; + virtual void GetCopyRaw(const Value& record, void* data, size_t size) const = 0; + virtual size_t GetFieldOffset(const Value& record) const = 0; + virtual char* GetNullArray(const Value& record) const = 0; + + // Get copy + ENABLE_IF_FIXED_FIELD(T, void) GetCopy(const Value& record, T& data) const { + FMA_DBG_ASSERT(field_data_helper::FieldTypeSize(def_.type) == sizeof(T)); + size_t offset = GetFieldOffset(record); + size_t size = GetDataSize(record); + if (size == sizeof(T)) { + memcpy(&data, (char*)record.Data() + offset, sizeof(T)); + } else { + ConvertData(&data, (char*)record.Data() + offset, size); + } + } + void GetCopy(const Value& record, std::string& data) const; + void GetCopy(const Value& record, Value& data) const; + Value GetConstRef(const Value& record) const; + + // Blob etc. + template + Value GetBlobConstRef(const Value& record, const GetBlobByKeyFunc& get_blob_by_key) const { + FMA_DBG_ASSERT(Type() == FieldType::BLOB); + if (GetIsNull(record)) return Value(); + Value v((char*)GetFieldPointer(record), GetDataSize(record)); + if (BlobManager::IsLargeBlob(v)) { + return get_blob_by_key(BlobManager::GetLargeBlobKey(v)); + } else { + return BlobManager::GetSmallBlobContent(v); + } + } + + inline Value ParseBlob(const std::string& str, bool& is_null) const { + // string input is always seen as non-NULL + is_null = false; + // decode str as base64 + std::string decoded; + if (!::lgraph_api::base64::TryDecode(str.data(), str.size(), decoded)) + throw ParseStringException(Name(), str, Type()); + return Value(decoded); + } + + // get a const ref of raw blob data + inline Value ParseBlob(const FieldData& fd, bool& is_null) const { + if (fd.type == FieldType::NUL) { + is_null = true; + return Value(); + } + is_null = false; + if (fd.type == FieldType::BLOB) { + return Value::ConstRef(*fd.data.buf); + } + if (fd.type == FieldType::STRING) { + std::string decoded; + const std::string& s = *fd.data.buf; + if (!::lgraph_api::base64::TryDecode(s.data(), s.size(), decoded)) + throw ParseStringException(Name(), s, Type()); + return Value(decoded); + } else { + throw ParseIncompatibleTypeException(Name(), fd.type, FieldType::BLOB); + return Value(); + } + } + + // set record. + virtual void SetIsNull(const Value& record, bool is_null) const = 0; + + std::string FieldToString(const Value& record) const; + + /** + * Convert data for integral and floating types. + * If we change the data type of floating-point or integer values + * (i.e., by altering their defined length), we need to adjust their values accordingly. + * For example, when converting from INT64 to INT8 (a relatively rare operation), + * we need to return an appropriate value within the range of the new type. + * This approach allows us to retain the original value when modifying the data type, + * without requiring a complete scan of the data to generate a new field. + */ + ENABLE_IF_FIXED_FIELD(T, void) ConvertData(T* dst, const char* data, size_t size) const { + if (std::is_integral_v) { + int64_t temp = 0; + switch (size) { + case 1: + temp = *reinterpret_cast(data); + break; + case 2: + temp = *reinterpret_cast(data); + break; + case 4: + temp = *reinterpret_cast(data); + break; + case 8: + temp = *reinterpret_cast(data); + break; + default: + FMA_ASSERT(false) << "Invalid size"; + } + + if (temp > std::numeric_limits::max()) { + *dst = std::numeric_limits::max(); + } else if (temp < std::numeric_limits::min()) { + *dst = std::numeric_limits::min(); + } else { + *dst = static_cast(temp); + } + } else if (std::is_floating_point_v) { + switch (size) { + case 4: + *dst = static_cast(*reinterpret_cast(data)); + break; + case 8: + *dst = static_cast(*reinterpret_cast(data)); + break; + default: + FMA_ASSERT(false) << "Invalid size"; + } + } + } +}; + +} // namespace _detail +} // namespace lgraph diff --git a/src/core/field_extractor_v2.cpp b/src/core/field_extractor_v2.cpp index 9abaade52d..b29975b6ed 100644 --- a/src/core/field_extractor_v2.cpp +++ b/src/core/field_extractor_v2.cpp @@ -17,97 +17,12 @@ namespace lgraph { namespace _detail { bool FieldExtractorV2::GetIsNull(const Value& record) const { - if (!def_.optional) { + if (!IsOptional()) { return false; } // get the Kth bit from NullArray const char* arr = GetNullArray(record); - return arr[def_.id / 8] & (0x1 << (def_.id % 8)); -} - -std::string FieldExtractorV2::FieldToString(const Value& record) const { - if (GetIsNull(record)) return "\"null\""; - std::string ret; - -#define _COPY_FIELD_AND_RETURN_STR_(record, ft) \ - do { \ - typename field_data_helper::FieldType2StorageType::type d = 0; \ - typedef typename field_data_helper::FieldType2CType::type CT; \ - GetCopy(record, d); \ - return fma_common::StringFormatter::Format("{}", static_cast(d)); \ - } while (0) - - switch (def_.type) { - case FieldType::BOOL: - _COPY_FIELD_AND_RETURN_STR_(record, BOOL); - case FieldType::INT8: - _COPY_FIELD_AND_RETURN_STR_(record, INT8); - case FieldType::INT16: - _COPY_FIELD_AND_RETURN_STR_(record, INT16); - case FieldType::INT32: - _COPY_FIELD_AND_RETURN_STR_(record, INT32); - case FieldType::INT64: - _COPY_FIELD_AND_RETURN_STR_(record, INT64); - case FieldType::FLOAT: - _COPY_FIELD_AND_RETURN_STR_(record, FLOAT); - case FieldType::DOUBLE: - _COPY_FIELD_AND_RETURN_STR_(record, DOUBLE); - case FieldType::DATE: - { - int32_t i; - GetCopy(record, i); - return Date(i).ToString(); - } - case FieldType::DATETIME: - { - int64_t i; - GetCopy(record, i); - return DateTime(i).ToString(); - } - case FieldType::STRING: - { - std::string ret(GetDataSize(record), 0); - GetCopyRaw(record, &ret[0], ret.size()); - return ret; - } - case FieldType::BLOB: - { - // std::string ret(GetDataSize(record), 0); - // GetCopyRaw(record, &ret[0], ret.size()); - // return ::lgraph_api::base64::Encode(ret.substr(2)); - return fma_common::StringFormatter::Format("[BLOB]"); - } - case FieldType::POINT: - case FieldType::LINESTRING: - case FieldType::POLYGON: - case FieldType::SPATIAL: - { - std::string ret(GetDataSize(record), 0); - GetCopyRaw(record, &ret[0], ret.size()); - return ret; - } - case FieldType::FLOAT_VECTOR: - { - std::string vec_str; - for (size_t i = 0; i < record.AsType>().size(); i++) { - auto floatnum = record.AsType>().at(i); - if (record.AsType>().at(i) > 999999) { - vec_str += std::to_string(floatnum).substr(0, 7); - } else { - vec_str += std::to_string(floatnum).substr(0, 8); - } - vec_str += ','; - } - if (!vec_str.empty()) { - vec_str.pop_back(); - } - return vec_str; - } - case lgraph_api::NUL: - break; - } - LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(def_.type) << " not handled"; - return ""; + return arr[GetFieldId() / 8] & (0x1 << (GetFieldId() % 8)); } void FieldExtractorV2::SetLabelInRecord(bool label_in_record) { @@ -117,20 +32,9 @@ void FieldExtractorV2::SetLabelInRecord(bool label_in_record) { nullarray_offset_ = count_offset_ + sizeof(FieldId); } -void FieldExtractorV2::GetCopy(const Value& record, std::string& data) const { - FMA_DBG_ASSERT(Type() != FieldType::BLOB); - data.resize(GetDataSize(record)); - GetCopyRaw(record, &data[0], data.size()); -} - -void FieldExtractorV2::GetCopy(const Value& record, Value& data) const { - data.Resize(GetDataSize(record)); - GetCopyRaw(record, data.Data(), data.Size()); -} - void FieldExtractorV2::GetCopyRaw(const Value& record, void* data, size_t size) const { - size_t off = GetFieldOffset(record, def_.id); - if (is_vfield_) { + size_t off = GetFieldOffset(record); + if (!IsFixedType()) { off = ::lgraph::_detail::UnalignedGet(record.Data() + off); FMA_DBG_ASSERT(off + size + sizeof(DataOffset) <= record.Size()); memcpy(data, record.Data() + off + sizeof(DataOffset), size); @@ -145,13 +49,13 @@ FieldId FieldExtractorV2::GetRecordCount(const Value& record) const { } size_t FieldExtractorV2::GetDataSize(const Value& record) const { - if (is_vfield_) { - DataOffset var_offset = ::lgraph::_detail::UnalignedGet( - record.Data() + GetFieldOffset(record, def_.id)); + if (!IsFixedType()) { + DataOffset var_offset = + ::lgraph::_detail::UnalignedGet(record.Data() + GetFieldOffset(record)); // The length is stored at the beginning of the variable-length field data area. return ::lgraph::_detail::UnalignedGet(record.Data() + var_offset); } else { - return GetFieldOffset(record, def_.id + 1) - GetFieldOffset(record, def_.id); + return GetFieldOffset(record, GetFieldId() + 1) - GetFieldOffset(record, GetFieldId()); } } size_t FieldExtractorV2::GetFieldOffset(const Value& record, const FieldId id) const { @@ -173,24 +77,24 @@ size_t FieldExtractorV2::GetOffsetPosition(const Value& record, const FieldId id } void* FieldExtractorV2::GetFieldPointer(const Value& record) const { - if (is_vfield_) { - DataOffset var_offset = ::lgraph::_detail::UnalignedGet( - record.Data() + GetFieldOffset(record, def_.id)); + if (!IsFixedType()) { + DataOffset var_offset = + ::lgraph::_detail::UnalignedGet(record.Data() + GetFieldOffset(record)); return (char*)record.Data() + sizeof(uint32_t) + var_offset; } - return (char*)record.Data() + GetFieldOffset(record, def_.id); + return (char*)record.Data() + GetFieldOffset(record); } void FieldExtractorV2::SetIsNull(const Value& record, bool is_null) const { - if (!def_.optional) { + if (!IsOptional()) { if (is_null) throw FieldCannotBeSetNullException(Name()); } // set the Kth bit from NullArray char* arr = GetNullArray(record); if (is_null) { - arr[def_.id / 8] |= (0x1 << (def_.id % 8)); + arr[GetFieldId() / 8] |= (0x1 << (GetFieldId() % 8)); } else { - arr[def_.id / 8] &= ~(0x1 << (def_.id % 8)); + arr[GetFieldId() / 8] &= ~(0x1 << (GetFieldId() % 8)); } } @@ -201,18 +105,18 @@ void FieldExtractorV2::SetVariableOffset(Value& record, FieldId id, DataOffset o void FieldExtractorV2::_SetFixedSizeValueRaw(Value& record, const Value& data) const { // "Cannot call SetField(Value&, const T&) on a variable length field"; - FMA_DBG_ASSERT(!is_vfield_); + FMA_DBG_ASSERT(IsFixedType()); // "Type size mismatch" - FMA_DBG_CHECK_EQ(data.Size(), field_data_helper::FieldTypeSize(def_.type)); + FMA_DBG_CHECK_EQ(data.Size(), TypeSize()); FMA_DBG_CHECK_EQ(data.Size(), GetDataSize(record)); // copy the buffer so we don't accidentally overwrite memory - char* ptr = (char*)record.Data() + GetFieldOffset(record, def_.id); + char* ptr = (char*)record.Data() + GetFieldOffset(record); memcpy(ptr, data.Data(), data.Size()); } void FieldExtractorV2::_SetVariableValueRaw(Value& record, const Value& data) const { - FMA_DBG_ASSERT(is_vfield_); - DataOffset foff = GetFieldOffset(record, def_.id); + FMA_DBG_ASSERT(!IsFixedType()); + DataOffset foff = GetFieldOffset(record); DataOffset ff = ::lgraph::_detail::UnalignedGet(record.Data() + foff); ::lgraph::_detail::UnalignedSet(record.Data() + ff, data.Size()); memcpy(record.Data() + ff + sizeof(DataOffset), data.Data(), data.Size()); diff --git a/src/core/field_extractor_v2.h b/src/core/field_extractor_v2.h index 966f4648c7..dcb75924af 100644 --- a/src/core/field_extractor_v2.h +++ b/src/core/field_extractor_v2.h @@ -14,11 +14,10 @@ #pragma once -#include "core/blob_manager.h" +#include "core/field_extractor_base.h" + #include "core/field_data_helper.h" #include "core/vertex_index.h" -#include "core/edge_index.h" -#include "core/schema_common.h" #include "core/vector_index.h" #include "core/vsag_hnsw.h" @@ -26,81 +25,29 @@ namespace lgraph { class Schema; namespace _detail { - -#define ENABLE_IF_FIXED_FIELD(_TYPE_, _RT_) \ - template \ - typename std::enable_if< \ - std::is_integral<_TYPE_>::value || std::is_floating_point<_TYPE_>::value, _RT_>::type - /** A field extractor can be used to get a field in the record. */ -class FieldExtractorV2 { +class FieldExtractorV2 : public FieldExtractorBase { friend class lgraph::Schema; - // type information - FieldSpec def_; - // is variable property field - bool is_vfield_ = false; bool label_in_record_ = true; - // index - std::unique_ptr vertex_index_; - std::unique_ptr edge_index_; - // fulltext index - bool fulltext_indexed_ = false; - // vector index - std::shared_ptr vector_index_; size_t count_offset_ = sizeof(VersionId) + sizeof(LabelId); size_t nullarray_offset_ = sizeof(VersionId) + sizeof(LabelId) + sizeof(FieldId); public: - FieldExtractorV2() : vertex_index_(nullptr), edge_index_(nullptr) {} + FieldExtractorV2() : FieldExtractorBase() {} - ~FieldExtractorV2() {} - - explicit FieldExtractorV2(const FieldExtractorV2& rhs) { - def_ = rhs.def_; - is_vfield_ = rhs.is_vfield_; - vertex_index_.reset(rhs.vertex_index_ ? new VertexIndex(*rhs.vertex_index_) : nullptr); - edge_index_.reset(rhs.edge_index_ ? new EdgeIndex(*rhs.edge_index_) : nullptr); - fulltext_indexed_ = rhs.fulltext_indexed_; - vector_index_ = rhs.vector_index_; + FieldExtractorV2(const FieldExtractorV2& rhs) : FieldExtractorBase(rhs.GetFieldSpec()) { nullarray_offset_ = rhs.nullarray_offset_; count_offset_ = rhs.count_offset_; } - explicit FieldExtractorV2(FieldExtractorV2&& rhs) noexcept { - def_ = std::move(rhs.def_); - is_vfield_ = rhs.is_vfield_; - vertex_index_ = std::move(rhs.vertex_index_); - edge_index_ = std::move(rhs.edge_index_); - rhs.vertex_index_ = nullptr; - rhs.edge_index_ = nullptr; - fulltext_indexed_ = rhs.fulltext_indexed_; - vector_index_ = std::move(rhs.vector_index_); - rhs.vector_index_ = nullptr; + FieldExtractorV2(FieldExtractorV2&& rhs) noexcept : FieldExtractorBase(std::move(rhs)) { count_offset_ = rhs.count_offset_; nullarray_offset_ = rhs.nullarray_offset_; } - explicit FieldExtractorV2(const FieldSpec& d) noexcept : def_(d) { - is_vfield_ = !field_data_helper::IsFixedLengthFieldType(d.type); - vertex_index_ = nullptr; - edge_index_ = nullptr; - } - - FieldExtractorV2(const FieldSpec& d, FieldId id) noexcept : def_(d) { - is_vfield_ = !field_data_helper::IsFixedLengthFieldType(d.type); - vertex_index_ = nullptr; - edge_index_ = nullptr; - SetFieldId(id); - } - FieldExtractorV2& operator=(const FieldExtractorV2& rhs) { if (this == &rhs) return *this; - def_ = rhs.def_; - is_vfield_ = rhs.is_vfield_; - vertex_index_.reset(rhs.vertex_index_ ? new VertexIndex(*rhs.vertex_index_) : nullptr); - edge_index_.reset(rhs.edge_index_ ? new EdgeIndex(*rhs.edge_index_) : nullptr); - fulltext_indexed_ = rhs.fulltext_indexed_; - vector_index_ = rhs.vector_index_; + FieldExtractorBase::operator=(rhs); nullarray_offset_ = rhs.nullarray_offset_; count_offset_ = rhs.count_offset_; return *this; @@ -108,287 +55,76 @@ class FieldExtractorV2 { FieldExtractorV2& operator=(FieldExtractorV2&& rhs) noexcept { if (this == &rhs) return *this; - def_ = std::move(rhs.def_); - is_vfield_ = rhs.is_vfield_; - vertex_index_ = std::move(rhs.vertex_index_); - edge_index_ = std::move(rhs.edge_index_); - fulltext_indexed_ = rhs.fulltext_indexed_; - vector_index_ = std::move(rhs.vector_index_); + FieldExtractorBase::operator=(std::move(rhs)); nullarray_offset_ = rhs.nullarray_offset_; count_offset_ = rhs.count_offset_; return *this; } - // get - - const FieldSpec& GetFieldSpec() const { return def_; } - - bool GetIsNull(const Value& record) const; - - FieldData GetDefaultValue() const { return def_.default_value; } - - FieldData GetInitedValue() const { return def_.init_value; } - - bool HasDefaultValue() const { return def_.set_default_value; } - - bool HasInitedValue() const { return def_.inited_value; } - - const std::string& Name() const { return def_.name; } - - FieldType Type() const { return def_.type; } - - size_t TypeSize() const { return field_data_helper::FieldTypeSize(def_.type); } - - size_t DataSize(const Value& record) const { return GetDataSize(record); } - - bool IsOptional() const { return def_.optional; } - - bool IsFixedType() const { return field_data_helper::IsFixedLengthFieldType(def_.type); } - - bool IsDeleted() const { return def_.deleted; } - - VertexIndex* GetVertexIndex() const { return vertex_index_.get(); } - - EdgeIndex* GetEdgeIndex() const { return edge_index_.get(); } - - bool FullTextIndexed() const { return fulltext_indexed_; } - - VectorIndex* GetVectorIndex() const { return vector_index_.get(); } - - FieldId GetFieldId() const { return def_.id; } - - template - Value GetBlobConstRef(const Value& record, const GetBlobByKeyFunc& get_blob_by_key) const { - FMA_DBG_ASSERT(Type() == FieldType::BLOB); - if (GetIsNull(record)) return Value(); - Value v((char*)GetFieldPointer(record), GetDataSize(record)); - if (BlobManager::IsLargeBlob(v)) { - return get_blob_by_key(BlobManager::GetLargeBlobKey(v)); - } else { - return BlobManager::GetSmallBlobContent(v); - } - } - - inline Value ParseBlob(const std::string& str, bool& is_null) const { - // string input is always seen as non-NULL - is_null = false; - // decode str as base64 - std::string decoded; - if (!::lgraph_api::base64::TryDecode(str.data(), str.size(), decoded)) - throw ParseStringException(Name(), str, Type()); - return Value(decoded); - } - - // get a const ref of raw blob data - inline Value ParseBlob(const FieldData& fd, bool& is_null) const { - if (fd.type == FieldType::NUL) { - is_null = true; - return Value(); - } - is_null = false; - if (fd.type == FieldType::BLOB) { - return Value::ConstRef(*fd.data.buf); - } - if (fd.type == FieldType::STRING) { - std::string decoded; - const std::string& s = *fd.data.buf; - if (!::lgraph_api::base64::TryDecode(s.data(), s.size(), decoded)) - throw ParseStringException(Name(), s, Type()); - return Value(decoded); - } else { - throw ParseIncompatibleTypeException(Name(), fd.type, FieldType::BLOB); - return Value(); - } - } - - /** - * Print the string representation of the field. For digital types, it prints - * it into ASCII string; for NBytes and String, it just copies the content of - * the field into the string. - * - * \param record The record. - * - * \return String representation of the field. - */ - std::string FieldToString(const Value& record) const; - - // set - void SetDefaultValue(const FieldData& data) { - def_.default_value = FieldData(data); - def_.set_default_value = true; - } + explicit FieldExtractorV2(const FieldSpec& d) noexcept : FieldExtractorBase(d) {} - void SetInitValue(const FieldData& data) { - def_.init_value = FieldData(data); - def_.inited_value = true; + FieldExtractorV2(const FieldSpec& d, const FieldId id) noexcept : FieldExtractorBase(d) { + SetFieldId(id); } - void MarkDeleted() { - def_.deleted = true; - // free data space when marked deleted - def_.init_value.~FieldData(); - def_.default_value.~FieldData(); - def_.inited_value = false; - def_.set_default_value = false; - } + ~FieldExtractorV2() override = default; + // Get FieldExtractor. + bool GetIsNull(const Value& record) const override; + // Set FieldExtractor. void SetLabelInRecord(bool label_in_record); - + // Set record. void SetRecordCount(Value& record, FieldId count) const { memcpy(record.Data() + count_offset_, &count, sizeof(FieldId)); } // set null in the record. - void SetIsNull(const Value& record, bool is_null) const; - - /** - * Extract a field from record into data of type T. T must be fixed-length - * type. - * - * \param record The record in which fields are stored. - * \param data Place where the extracted data will be stored. - * - * Assert fails if data is corrupted. - */ - ENABLE_IF_FIXED_FIELD(T, void) GetCopy(const Value& record, T& data) const { - FMA_DBG_ASSERT(field_data_helper::FieldTypeSize(def_.type) == sizeof(T)); - size_t offset = GetFieldOffset(record, def_.id); - size_t size = GetDataSize(record); - if (size == sizeof(T)) { - memcpy(&data, (char*)record.Data() + offset, sizeof(T)); - } else { - ConvertData(&data, (char*)record.Data() + offset, size); - } - } - - /** - * Extracts a copy of field into the string. - * - * \param record The record. - * \param [in,out] data The result data. - * - * Assert fails if data is corrupted. - */ - void GetCopy(const Value& record, std::string& data) const; - - /** - * Extracts field data from the record - * - * \param record The record. - * \param [in,out] data The result. - * - * Assert fails if data is corrupted. - */ - void GetCopy(const Value& record, Value& data) const; - - void GetCopyRaw(const Value& record, void* data, size_t size) const; - - // Gets a const reference of the field. - // Formatted data is returned for blob, which means [is_large_blob] [blob_data | blob_key] - Value GetConstRef(const Value& record) const { - if (GetIsNull(record)) return Value(); - return Value((char*)GetFieldPointer(record), GetDataSize(record)); - } - - /** - * Convert data for integral and floating types. - * If we change the data type of floating-point or integer values - * (i.e., by altering their defined length), we need to adjust their values accordingly. - * For example, when converting from INT64 to INT8 (a relatively rare operation), - * we need to return an appropriate value within the range of the new type. - * This approach allows us to retain the original value when modifying the data type, - * without requiring a complete scan of the data to generate a new field. - */ - ENABLE_IF_FIXED_FIELD(T, void) ConvertData(T* dst, const char* data, size_t size) const { - if (std::is_integral_v) { - int64_t temp = 0; - switch (size) { - case 1: - temp = *reinterpret_cast(data); - break; - case 2: - temp = *reinterpret_cast(data); - break; - case 4: - temp = *reinterpret_cast(data); - break; - case 8: - temp = *reinterpret_cast(data); - break; - default: - FMA_ASSERT(false) << "Invalid size"; - } - - if (temp > std::numeric_limits::max()) { - *dst = std::numeric_limits::max(); - } else if (temp < std::numeric_limits::min()) { - *dst = std::numeric_limits::min(); - } else { - *dst = static_cast(temp); - } - } else if (std::is_floating_point_v) { - switch (size) { - case 4: - *dst = static_cast(*reinterpret_cast(data)); - break; - case 8: - *dst = static_cast(*reinterpret_cast(data)); - break; - default: - FMA_ASSERT(false) << "Invalid size"; - } - } - } - - /** Retrieve the starting position of the Field data for the given ID. - * Note that both fixed-length and variable-length data are not distinguished here. - */ - size_t GetFieldOffset(const Value& record, const FieldId id) const; - - // return the position of the field's offset. - size_t GetOffsetPosition(const Value& record, const FieldId id) const; - - // return field num in the record. - FieldId GetRecordCount(const Value& record) const; - - // set variable's offset, they are stored at fixed-data area. - void SetVariableOffset(Value& record, FieldId id, DataOffset offset) const; - - // set fixed length data, only if length of the data in record equal its definition. - void _SetFixedSizeValueRaw(Value& record, const Value& data) const; - - // for test only. - void _SetVariableValueRaw(Value& record, const Value& data) const; + void SetIsNull(const Value& record, bool is_null) const override; ENABLE_IF_FIXED_FIELD(T, void) SetFixedSizeValue(Value& record, const T& data) const { // "Cannot call SetField(Value&, const T&) on a variable length field"; - FMA_DBG_ASSERT(!is_vfield_); + FMA_DBG_ASSERT(!IsFixedType()); // "Type size mismatch" - FMA_DBG_CHECK_EQ(sizeof(data), field_data_helper::FieldTypeSize(def_.type)); + FMA_DBG_CHECK_EQ(sizeof(data), TypeSize()); // copy the buffer so we don't accidentally overwrite memory record.Resize(record.Size()); - char* ptr = (char*)record.Data() + GetFieldOffset(record, def_.id); + char* ptr = (char*)record.Data() + GetFieldOffset(record); ::lgraph::_detail::UnalignedSet(ptr, data); } - private: - void SetVertexIndex(VertexIndex* index) { vertex_index_.reset(index); } + // set variable's offset, they are stored at fixed-data area. + void SetVariableOffset(Value& record, FieldId id, DataOffset offset) const; + // set fixed length data, only if length of the data in record equal its definition. + void _SetFixedSizeValueRaw(Value& record, const Value& data) const; - void SetEdgeIndex(EdgeIndex* edgeindex) { edge_index_.reset(edgeindex); } + // for test only. + void _SetVariableValueRaw(Value& record, const Value& data) const; + // Get record. + void GetCopyRaw(const Value& record, void* data, size_t size) const override; + /** Retrieve the starting position of the Field data for the given ID. + * Note that both fixed-length and variable-length data are not distinguished here. + */ + size_t GetFieldOffset(const Value& record, const FieldId id) const; - void SetVectorIndex(VectorIndex* vectorindex) { vector_index_.reset(vectorindex); } + size_t GetFieldOffset(const Value& record) const override { + return GetFieldOffset(record, GetFieldId()); + } - void SetFullTextIndex(bool fulltext_indexed) { fulltext_indexed_ = fulltext_indexed; } + // return the position of the field's offset. + size_t GetOffsetPosition(const Value& record, FieldId id) const; - void SetFieldId(uint16_t n) { def_.id = n; } + // return field num in the record. + FieldId GetRecordCount(const Value& record) const; // return null array pointer. - char* GetNullArray(const Value& record) const { return record.Data() + nullarray_offset_; } + char* GetNullArray(const Value& record) const override { + return record.Data() + nullarray_offset_; + } - size_t GetDataSize(const Value& record) const; + size_t GetDataSize(const Value& record) const override; - void* GetFieldPointer(const Value& record) const; + void* GetFieldPointer(const Value& record) const override; }; } // namespace _detail diff --git a/src/core/lightning_graph.cpp b/src/core/lightning_graph.cpp index fe8b53ef4c..776310f25a 100644 --- a/src/core/lightning_graph.cpp +++ b/src/core/lightning_graph.cpp @@ -222,7 +222,7 @@ bool LightningGraph::AddLabel(const std::string& label, size_t n_fields, const F Schema* schema = sm->GetSchema(label); FMA_DBG_ASSERT(schema); const auto& primary_field = dynamic_cast(options).primary_field; - const _detail::FieldExtractor* extractor = schema->GetFieldExtractor(primary_field); + const _detail::FieldExtractorBase* extractor = schema->GetFieldExtractorBase(primary_field); FMA_DBG_ASSERT(extractor); std::unique_ptr index; index_manager_->AddVertexIndex(txn.GetTxn(), label, primary_field, extractor->Type(), @@ -358,10 +358,10 @@ bool LightningGraph::DelLabel(const std::string& label, bool is_vertex, size_t* for (auto& fid : indexed_fids) { if (is_vertex) { index_manager_->DeleteVertexIndex(txn.GetTxn(), label, - schema->GetFieldExtractor(fid)->Name()); + schema->GetFieldExtractorBase(fid)->Name()); } else { index_manager_->DeleteEdgeIndex(txn.GetTxn(), label, - schema->GetFieldExtractor(fid)->Name()); + schema->GetFieldExtractorBase(fid)->Name()); } } // delete detached property table @@ -370,7 +370,7 @@ bool LightningGraph::DelLabel(const std::string& label, bool is_vertex, size_t* std::vector indexes; auto indexed_fids = schema->GetIndexedFields(); for (auto fid : indexed_fids) { - indexes.push_back(schema->GetFieldExtractor(fid)->GetVertexIndex()); + indexes.push_back(schema->GetFieldExtractorBase(fid)->GetVertexIndex()); } // get unique index VertexIndex* unique_idx = nullptr; @@ -473,7 +473,7 @@ bool LightningGraph::DelLabel(const std::string& label, bool is_vertex, size_t* Schema* to_unindex = schema_with_no_index->v_schema_manager.GetSchema(lid); for (auto& fid : indexed_fids) { index_manager_->DeleteVertexIndex(txn.GetTxn(), label, - schema->GetFieldExtractor(fid)->Name()); + schema->GetFieldExtractorBase(fid)->Name()); to_unindex->UnVertexIndex(fid); } } else { @@ -489,7 +489,7 @@ bool LightningGraph::DelLabel(const std::string& label, bool is_vertex, size_t* Schema* to_unedgeindex = schema_with_no_index->e_schema_manager.GetSchema(lid); for (auto& fid : indexed_fids) { index_manager_->DeleteEdgeIndex(txn.GetTxn(), label, - schema->GetFieldExtractor(fid)->Name()); + schema->GetFieldExtractorBase(fid)->Name()); to_unedgeindex->UnEdgeIndex(fid); } } @@ -785,7 +785,7 @@ bool LightningGraph::AlterLabelDelFields(const std::string& label, // get fids of the fields in new schema std::vector new_fids; std::vector old_field_pos; - std::vector blob_deleted_fes; + std::vector blob_deleted_fes; // make new schema auto setup_and_gen_new_schema = [&](Schema* curr_schema) -> Schema { @@ -836,7 +836,7 @@ bool LightningGraph::AlterLabelDelFields(const std::string& label, auto fids = curr_schema->GetFieldIds(del_fields); // delete indexes if necessary for (auto& f : fids) { - auto* extractor = curr_schema->GetFieldExtractor(f); + auto* extractor = curr_schema->GetFieldExtractorBase(f); if (extractor->GetVertexIndex()) { // delete vertex index index_manager_->DeleteVertexIndex(txn.GetTxn(), label, extractor->Name()); @@ -900,8 +900,8 @@ bool LightningGraph::AlterLabelAddFields(const std::string& label, Schema new_schema(*curr_schema); new_schema.AddFields(to_add); for (size_t i = 0; i < to_add.size(); i++) { - auto* extractor = const_cast( - new_schema.GetFieldExtractorV2(to_add[i].name)); + auto extractor = + new_schema.GetFieldExtractorBase(to_add[i].name); extractor->SetInitValue(default_values[i]); } return new_schema; @@ -915,7 +915,7 @@ bool LightningGraph::AlterLabelAddFields(const std::string& label, src_fids.reserve(curr_schema->GetNumFields()); for (size_t i = 0; i < new_schema.GetNumFields(); i++) { size_t fid = 0; - if (curr_schema->TryGetFieldId(new_schema.GetFieldExtractor(i)->Name(), fid)) { + if (curr_schema->TryGetFieldId(new_schema.GetFieldExtractorBase(i)->Name(), fid)) { dst_fids.push_back(i); src_fids.push_back(fid); } @@ -936,7 +936,8 @@ bool LightningGraph::AlterLabelAddFields(const std::string& label, new_schema->CopyFieldsRaw(new_prop, dst_fids, curr_schema, old_prop, src_fids); for (size_t i = 0; i < new_fids.size(); i++) { size_t fid = new_fids[i]; - auto* extr = new_schema->GetFieldExtractor(fid); + auto* extr = + new_schema->GetFieldExtractor(fid); if (extr->Type() == FieldType::BLOB) { extr->ParseAndSetBlob(new_prop, default_values[i], [&](const Value& v) { return blob_manager_->Add(txn.GetTxn(), v); @@ -1122,7 +1123,7 @@ bool LightningGraph::_AddEmptyIndex(const std::string& label, const std::string& Schema* schema = is_vertex ? new_schema->v_schema_manager.GetSchema(label) : new_schema->e_schema_manager.GetSchema(label); if (!schema) throw LabelNotExistException(label); - const _detail::FieldExtractor* extractor = schema->GetFieldExtractor(field); + const _detail::FieldExtractorBase* extractor = schema->GetFieldExtractorBase(field); if ((extractor->GetVertexIndex() && is_vertex) || (extractor->GetEdgeIndex() && !is_vertex)) return false; // index already exist if (is_vertex) { @@ -1365,7 +1366,7 @@ void LightningGraph::BatchBuildIndex(Transaction& txn, SchemaInfo* new_schema_in if (is_vertex) { SchemaManager* schema_manager = &new_schema_info->v_schema_manager; auto v_schema = schema_manager->GetSchema(label_id); - auto* field_extractor = v_schema->GetFieldExtractor(field_id); + auto* field_extractor = v_schema->GetFieldExtractorBase(field_id); FMA_DBG_ASSERT(field_extractor); VertexIndex* index = field_extractor->GetVertexIndex(); FMA_DBG_ASSERT(index); @@ -1445,7 +1446,7 @@ void LightningGraph::BatchBuildIndex(Transaction& txn, SchemaInfo* new_schema_in } else { SchemaManager* schema_manager = &new_schema_info->e_schema_manager; auto e_schema = schema_manager->GetSchema(label_id); - auto* field_extractor = e_schema->GetFieldExtractor(field_id); + auto* field_extractor = e_schema->GetFieldExtractorBase(field_id); FMA_DBG_ASSERT(field_extractor); EdgeIndex* edge_index = field_extractor->GetEdgeIndex(); FMA_DBG_ASSERT(edge_index); @@ -1578,8 +1579,9 @@ void LightningGraph::BatchBuildCompositeIndex(Transaction& txn, SchemaInfo* new_ prop = v_schema->GetDetachedVertexProperty(txn.GetTxn(), it.GetId()); } bool can_index = true; - for (const std::string &field : fields) { - const _detail::FieldExtractor* extractor = v_schema->GetFieldExtractor(field); + for (const std::string& field : fields) { + const _detail::FieldExtractorBase* extractor = + v_schema->GetFieldExtractorBase(field); if (extractor->GetIsNull(prop)) { can_index = false; break; @@ -1590,9 +1592,9 @@ void LightningGraph::BatchBuildCompositeIndex(Transaction& txn, SchemaInfo* new_ } std::vector values; std::vector types; - for (auto &field : fields) { - values.emplace_back(v_schema->GetFieldExtractor(field)->GetConstRef(prop)); - types.emplace_back(v_schema->GetFieldExtractor(field)->Type()); + for (auto& field : fields) { + values.emplace_back(v_schema->GetFieldExtractorBase(field)->GetConstRef(prop)); + types.emplace_back(v_schema->GetFieldExtractorBase(field)->Type()); } key_vids.emplace_back(values, types, it.GetId()); } @@ -1789,7 +1791,7 @@ void LightningGraph::RebuildFullTextIndex(const std::set& v_lids, } std::vector> kvs; for (auto& idx : fulltext_filelds) { - auto fe = schema->GetFieldExtractor(idx); + auto fe = schema->GetFieldExtractorBase(idx); if (fe->GetIsNull(properties)) continue; kvs.emplace_back(fe->Name(), fe->FieldToString(properties)); } @@ -1812,7 +1814,7 @@ void LightningGraph::RebuildFullTextIndex(const std::set& v_lids, Schema* schema = curr_schema_info->e_schema_manager.GetSchema(lid); const auto& fulltext_filelds = schema->GetFullTextFields(); for (auto& idx : fulltext_filelds) { - auto fe = schema->GetFieldExtractor(idx); + auto fe = schema->GetFieldExtractorBase(idx); Value properties; if (schema->DetachProperty()) { properties = schema->GetDetachedEdgeProperty(txn.GetTxn(), euid); @@ -1853,7 +1855,7 @@ bool LightningGraph::AddFullTextIndex(bool is_vertex, const std::string& label, if (!schema) { THROW_CODE(InputError, "label \"{}\" does not exist.", label); } - const _detail::FieldExtractor* extractor = schema->GetFieldExtractor(field); + const _detail::FieldExtractorBase* extractor = schema->GetFieldExtractorBase(field); if (!extractor) { THROW_CODE(InputError, "field \"{}\":\"{}\" does not exist.", label, field); } @@ -1933,7 +1935,7 @@ bool LightningGraph::BlockingAddCompositeIndex(const std::string& label, } std::vector field_types; for (const std::string &field : fields) { - const _detail::FieldExtractor* extractor = schema->GetFieldExtractor(field); + const _detail::FieldExtractorBase* extractor = schema->GetFieldExtractorBase(field); if (!extractor) { if (is_vertex) THROW_CODE(InputError, "Vertex field \"{}\":\"{}\" does not exist.", label, field); @@ -1974,8 +1976,8 @@ bool LightningGraph::BlockingAddCompositeIndex(const std::string& label, std::vector values; std::vector types; for (auto &field : fields) { - values.emplace_back(schema->GetFieldExtractor(field)->GetConstRef(prop)); - types.emplace_back(schema->GetFieldExtractor(field)->Type()); + values.emplace_back(schema->GetFieldExtractorBase(field)->GetConstRef(prop)); + types.emplace_back(schema->GetFieldExtractorBase(field)->Type()); } index->Add(txn.GetTxn(), composite_index_helper::GenerateCompositeIndexKey(values), vid); @@ -2000,7 +2002,7 @@ bool LightningGraph::BlockingAddCompositeIndex(const std::string& label, end_vid = txn.GetLooseNumVertex(); // vid range not known, try getting from index VertexIndex* idx = - schema->GetFieldExtractor(schema->GetPrimaryField())->GetVertexIndex(); + schema->GetFieldExtractorBase(schema->GetPrimaryField())->GetVertexIndex(); FMA_DBG_ASSERT(idx); VertexId beg = std::numeric_limits::max(); VertexId end = 0; @@ -2037,7 +2039,7 @@ bool LightningGraph::BlockingAddIndex(const std::string& label, const std::strin else THROW_CODE(InputError, "Edge label \"{}\" does not exist.", label); } - const _detail::FieldExtractor* extractor = schema->GetFieldExtractor(field); + const _detail::FieldExtractorBase* extractor = schema->GetFieldExtractorBase(field); if (!extractor) { if (is_vertex) THROW_CODE(InputError, "Vertex field \"{}\":\"{}\" does not exist.", label, field); @@ -2102,7 +2104,7 @@ bool LightningGraph::BlockingAddIndex(const std::string& label, const std::strin end_vid = txn.GetLooseNumVertex(); // vid range not known, try getting from index VertexIndex* idx = - schema->GetFieldExtractor(schema->GetPrimaryField())->GetVertexIndex(); + schema->GetFieldExtractorBase(schema->GetPrimaryField())->GetVertexIndex(); FMA_DBG_ASSERT(idx); VertexId beg = std::numeric_limits::max(); VertexId end = 0; @@ -2163,7 +2165,7 @@ bool LightningGraph::BlockingAddIndex(const std::string& label, const std::strin // vid range not known, try getting from index auto& indexed_fields = schema->GetIndexedFields(); for (size_t pos : indexed_fields) { - auto fe = schema->GetFieldExtractor(pos); + auto fe = schema->GetFieldExtractorBase(pos); if (!fe->IsOptional()) { EdgeIndex* idx = fe->GetEdgeIndex(); FMA_DBG_ASSERT(idx); @@ -2256,7 +2258,7 @@ bool LightningGraph::BlockingAddVectorIndex(bool is_vertex, const std::string& l if (!schema) { THROW_CODE(InputError, "Vertex label \"{}\" does not exist.", label); } - const _detail::FieldExtractor* extractor = schema->GetFieldExtractor(field); + const _detail::FieldExtractorBase* extractor = schema->GetFieldExtractorBase(field); if (!extractor) { THROW_CODE(InputError, "Vertex field \"{}\":\"{}\" does not exist.", label, field); } @@ -2339,7 +2341,7 @@ void LightningGraph::_DumpIndex(const IndexSpec& spec, VertexId first_vertex, auto vit = txn.GetVertexIterator(first_vertex, true); auto schema = txn.curr_schema_->v_schema_manager.GetSchema(lid); FMA_DBG_ASSERT(schema); - auto extractor = schema->GetFieldExtractor(spec.field); + auto extractor = schema->GetFieldExtractorBase(spec.field); FMA_DBG_ASSERT(extractor); for (; vit.IsValid(); vit.Next()) { Value v = vit.GetProperty(); @@ -2427,7 +2429,7 @@ void LightningGraph::_DumpIndex(const IndexSpec& spec, VertexId first_vertex, auto start_lid = SchemaManager::GetRecordLabelId(v); auto schema = txn.curr_schema_->e_schema_manager.GetSchema(lid); FMA_DBG_ASSERT(schema); - auto extractor = schema->GetFieldExtractor(spec.field); + auto extractor = schema->GetFieldExtractorBase(spec.field); FMA_DBG_ASSERT(extractor); for (; vit.IsValid(); vit.Next()) { Value v = vit.GetProperty(); @@ -2703,7 +2705,7 @@ bool LightningGraph::IsIndexed(const std::string& label, const std::string& fiel const Schema* s = is_vertex ? curr_schema->v_schema_manager.GetSchema(label) : curr_schema->e_schema_manager.GetSchema(label); if (!s) throw LabelNotExistException(label); - auto fe = s->GetFieldExtractor(field); + auto fe = s->GetFieldExtractorBase(field); if (is_vertex) { VertexIndex* idx = fe->GetVertexIndex(); @@ -2738,7 +2740,7 @@ bool LightningGraph::DeleteFullTextIndex(bool is_vertex, const std::string& labe if (!schema) { THROW_CODE(InputError, "label \"{}\" does not exist.", label); } - const _detail::FieldExtractor* extractor = schema->GetFieldExtractor(field); + const _detail::FieldExtractorBase* extractor = schema->GetFieldExtractorBase(field); if (!extractor) { THROW_CODE(InputError, "field \"{}\":\"{}\" does not exist.", label, field); } @@ -2765,7 +2767,7 @@ bool LightningGraph::DeleteIndex(const std::string& label, const std::string& fi if (field == schema->GetPrimaryField()) { throw PrimaryIndexCannotBeDeletedException(field); } - const _detail::FieldExtractor* extractor = schema->GetFieldExtractor(field); + const _detail::FieldExtractorBase* extractor = schema->GetFieldExtractorBase(field); bool index_exist = (is_vertex && extractor->GetVertexIndex()) || (!is_vertex && extractor->GetEdgeIndex()); if (!index_exist) return false; @@ -2838,7 +2840,7 @@ bool LightningGraph::DeleteVectorIndex( throw PrimaryIndexCannotBeDeletedException(field); } std::unique_ptr old_schema_backup(new SchemaInfo(*curr_schema.Get())); - const _detail::FieldExtractor* extractor = schema->GetFieldExtractor(field); + const _detail::FieldExtractorBase* extractor = schema->GetFieldExtractorBase(field); if (!extractor->GetVectorIndex()) { return false; } diff --git a/src/core/schema.cpp b/src/core/schema.cpp index 497242a490..63dfcb7fb9 100644 --- a/src/core/schema.cpp +++ b/src/core/schema.cpp @@ -48,15 +48,15 @@ void Schema::DeleteVertexFullTextIndex(VertexId vid, std::vector& void Schema::DeleteVertexIndex(KvTransaction& txn, VertexId vid, const Value& record) { for (auto& idx : indexed_fields_) { auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - if (fe.Type() != FieldType::FLOAT_VECTOR) { - VertexIndex* index = fe.GetVertexIndex(); + if (fe->GetIsNull(record)) continue; + if (fe->Type() != FieldType::FLOAT_VECTOR) { + VertexIndex* index = fe->GetVertexIndex(); FMA_ASSERT(index); // update field index - if (!index->Delete(txn, fe.GetConstRef(record), vid)) { + if (!index->Delete(txn, fe->GetConstRef(record), vid)) { THROW_CODE(InputError, "Failed to un-index vertex [{}] with field " "value [{}:{}]: index value does not exist.", - vid, fe.Name(), fe.FieldToString(record)); + vid, fe->Name(), fe->FieldToString(record)); } } } @@ -73,11 +73,11 @@ void Schema::DeleteVertexCompositeIndex(lgraph::KvTransaction& txn, bool is_add_index = true; std::vector keys; for (int i = 0; i < (int)ids.size(); i++) { - if (fields_[std::stoi(ids[i])].GetIsNull(record)) { + if (fields_[std::stoi(ids[i])]->GetIsNull(record)) { is_add_index = false; break; } - keys.emplace_back(fields_[std::stoi(ids[i])].GetConstRef(record)); + keys.emplace_back(fields_[std::stoi(ids[i])]->GetConstRef(record)); } if (!is_add_index) continue; auto composite_index = kv.second; @@ -86,8 +86,8 @@ void Schema::DeleteVertexCompositeIndex(lgraph::KvTransaction& txn, std::vector field_names; std::vector field_values; for (int i = 0; i < (int)ids.size(); i++) { - field_names.push_back(fields_[std::stoi(ids[i])].Name()); - field_values.push_back(fields_[std::stoi(ids[i])].FieldToString(record)); + field_names.push_back(fields_[std::stoi(ids[i])]->Name()); + field_values.push_back(fields_[std::stoi(ids[i])]->FieldToString(record)); } THROW_CODE(InputError, "Failed to index vertex [{}] with field value {}:{}: " @@ -102,14 +102,14 @@ void Schema::DeleteCreatedVertexIndex(KvTransaction& txn, VertexId vid, const Va const std::vector& created) { for (auto& idx : created) { auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - VertexIndex* index = fe.GetVertexIndex(); + if (fe->GetIsNull(record)) continue; + VertexIndex* index = fe->GetVertexIndex(); FMA_ASSERT(index); // the aim of this method is delete the index that has been created - if (!index->Delete(txn, fe.GetConstRef(record), vid)) { + if (!index->Delete(txn, fe->GetConstRef(record), vid)) { THROW_CODE(InputError, "Failed to un-index vertex [{}] with field " "value [{}:{}]: index value does not exist.", - vid, fe.Name(), fe.FieldToString(record)); + vid, fe->Name(), fe->FieldToString(record)); } } } @@ -127,8 +127,8 @@ void Schema::AddEdgeToFullTextIndex(EdgeUid euid, const Value& record, entry.lid = euid.lid; for (auto& idx : fulltext_fields_) { auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - entry.kvs.emplace_back(fe.Name(), fe.FieldToString(record)); + if (fe->GetIsNull(record)) continue; + entry.kvs.emplace_back(fe->Name(), fe->FieldToString(record)); } buffers.emplace_back(std::move(entry)); } @@ -144,8 +144,8 @@ void Schema::AddVertexToFullTextIndex(VertexId vid, const Value& record, entry.lid = label_id_; for (auto& idx : fulltext_fields_) { auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - entry.kvs.emplace_back(fe.Name(), fe.FieldToString(record)); + if (fe->GetIsNull(record)) continue; + entry.kvs.emplace_back(fe->Name(), fe->FieldToString(record)); } buffers.emplace_back(std::move(entry)); } @@ -155,15 +155,15 @@ void Schema::AddVertexToIndex(KvTransaction& txn, VertexId vid, const Value& rec created.reserve(fields_.size()); for (auto& idx : indexed_fields_) { auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - if (fe.Type() != FieldType::FLOAT_VECTOR) { - VertexIndex* index = fe.GetVertexIndex(); + if (fe->GetIsNull(record)) continue; + if (fe->Type() != FieldType::FLOAT_VECTOR) { + VertexIndex* index = fe->GetVertexIndex(); FMA_ASSERT(index); // update field index - if (!index->Add(txn, fe.GetConstRef(record), vid)) { + if (!index->Add(txn, fe->GetConstRef(record), vid)) { THROW_CODE(InputError, "Failed to index vertex [{}] with field value [{}:{}]: index value already exists.", - vid, fe.Name(), fe.FieldToString(record)); + vid, fe->Name(), fe->FieldToString(record)); } } created.push_back(idx); @@ -181,11 +181,11 @@ void Schema::AddVertexToCompositeIndex(lgraph::KvTransaction& txn, lgraph::Verte bool is_add_index = true; std::vector keys; for (int i = 0; i < (int)ids.size(); i++) { - if (fields_[std::stoi(ids[i])].GetIsNull(record)) { + if (fields_[std::stoi(ids[i])]->GetIsNull(record)) { is_add_index = false; break; } - keys.emplace_back(fields_[std::stoi(ids[i])].GetConstRef(record)); + keys.emplace_back(fields_[std::stoi(ids[i])]->GetConstRef(record)); } if (!is_add_index) continue; auto composite_index = kv.second; @@ -194,8 +194,8 @@ void Schema::AddVertexToCompositeIndex(lgraph::KvTransaction& txn, lgraph::Verte std::vector field_names; std::vector field_values; for (int i = 0; i < (int)ids.size(); i++) { - field_names.push_back(fields_[std::stoi(ids[i])].Name()); - field_values.push_back(fields_[std::stoi(ids[i])].FieldToString(record)); + field_names.push_back(fields_[std::stoi(ids[i])]->Name()); + field_values.push_back(fields_[std::stoi(ids[i])]->FieldToString(record)); } THROW_CODE(InputError, "Failed to index vertex [{}] with field value {}:{}: " @@ -226,11 +226,7 @@ std::vector> Schema::GetRelationalCompositeIndexKey( if (flag && !visited.count(kv.first)) { std::vector field_names; for (const auto& id : field_ids) { - if (fast_alter_schema) { - field_names.push_back(fieldsV2_[std::stoi(id)].Name()); - } else { - field_names.push_back(fields_[std::stoi(id)].Name()); - } + field_names.push_back(fields_[std::stoi(id)]->Name()); } result.push_back(field_names); visited.insert(kv.first); @@ -243,24 +239,13 @@ std::vector> Schema::GetRelationalCompositeIndexKey( bool Schema::VertexUniqueIndexConflict(KvTransaction& txn, const Value& record) { for (auto& idx : indexed_fields_) { VertexIndex* index; - if (fast_alter_schema) { - auto &fe = fieldsV2_[idx]; - index = fe.GetVertexIndex(); - if (fe.GetIsNull(record)) continue; - FMA_ASSERT(index); - if (!index->IsUnique()) continue; - if (index->UniqueIndexConflict(txn, fe.GetConstRef(record))) { - return true; - } - } else { - auto& fe = fields_[idx]; - index = fe.GetVertexIndex(); - if (fe.GetIsNull(record)) continue; - FMA_ASSERT(index); - if (!index->IsUnique()) continue; - if (index->UniqueIndexConflict(txn, fe.GetConstRef(record))) { - return true; - } + auto& fe = fields_[idx]; + index = fe->GetVertexIndex(); + if (fe->GetIsNull(record)) continue; + FMA_ASSERT(index); + if (!index->IsUnique()) continue; + if (index->UniqueIndexConflict(txn, fe->GetConstRef(record))) { + return true; } } return false; @@ -268,30 +253,16 @@ bool Schema::VertexUniqueIndexConflict(KvTransaction& txn, const Value& record) void Schema::DeleteEdgeIndex(KvTransaction& txn, const EdgeUid& euid, const Value& record) { for (auto& idx : indexed_fields_) { - if (fast_alter_schema) { - auto& fe = fieldsV2_[idx]; - if (fe.GetIsNull(record)) continue; - EdgeIndex* index = fe.GetEdgeIndex(); - FMA_ASSERT(index); - // update field index - if (!index->Delete(txn, fe.GetConstRef(record), euid)) { - THROW_CODE(InputError, - "Failed to un-index edge with field " - "value [{}:{}]: index value does not exist.", - fe.Name(), fe.FieldToString(record)); - } - } else { - auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - EdgeIndex* index = fe.GetEdgeIndex(); - FMA_ASSERT(index); - // update field index - if (!index->Delete(txn, fe.GetConstRef(record), euid)) { - THROW_CODE(InputError, - "Failed to un-index edge with field " - "value [{}:{}]: index value does not exist.", - fe.Name(), fe.FieldToString(record)); - } + auto& fe = fields_[idx]; + if (fe->GetIsNull(record)) continue; + EdgeIndex* index = fe->GetEdgeIndex(); + FMA_ASSERT(index); + // update field index + if (!index->Delete(txn, fe->GetConstRef(record), euid)) { + THROW_CODE(InputError, + "Failed to un-index edge with field " + "value [{}:{}]: index value does not exist.", + fe->Name(), fe->FieldToString(record)); } } } @@ -299,118 +270,62 @@ void Schema::DeleteEdgeIndex(KvTransaction& txn, const EdgeUid& euid, const Valu void Schema::DeleteCreatedEdgeIndex(KvTransaction& txn, const EdgeUid& euid, const Value& record, const std::vector& created) { for (auto& idx : created) { - if (fast_alter_schema) { - auto& fe = fieldsV2_[idx]; - if (fe.GetIsNull(record)) continue; - EdgeIndex* index = fe.GetEdgeIndex(); - FMA_ASSERT(index); - // the aim of this method is delete the index that has been created - if (!index->Delete(txn, fe.GetConstRef(record), euid)) { - THROW_CODE(InputError, - "Failed to un-index edge with field " - "value [{}:{}]: index value does not exist.", - fe.Name(), fe.FieldToString(record)); - } - } else { - auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - EdgeIndex* index = fe.GetEdgeIndex(); - FMA_ASSERT(index); - // the aim of this method is delete the index that has been created - if (!index->Delete(txn, fe.GetConstRef(record), euid)) { - THROW_CODE(InputError, - "Failed to un-index edge with field " - "value [{}:{}]: index value does not exist.", - fe.Name(), fe.FieldToString(record)); - } + auto& fe = fields_[idx]; + if (fe->GetIsNull(record)) continue; + EdgeIndex* index = fe->GetEdgeIndex(); + FMA_ASSERT(index); + // the aim of this method is delete the index that has been created + if (!index->Delete(txn, fe->GetConstRef(record), euid)) { + THROW_CODE(InputError, + "Failed to un-index edge with field " + "value [{}:{}]: index value does not exist.", + fe->Name(), fe->FieldToString(record)); } } } void Schema::AddEdgeToIndex(KvTransaction& txn, const EdgeUid& euid, const Value& record, std::vector& created) { - created.reserve(fast_alter_schema ? fieldsV2_.size() : fields_.size()); + created.reserve(fields_.size()); for (auto& idx : indexed_fields_) { - if (fast_alter_schema) { - auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - EdgeIndex* index = fe.GetEdgeIndex(); - FMA_ASSERT(index); - // update field index - if (!index->Add(txn, fe.GetConstRef(record), euid)) { - THROW_CODE( - InputError, - "Failed to index edge with field value [{}:{}]: index value already exists.", - fe.Name(), fe.FieldToString(record)); - } - created.push_back(idx); - - } else { - auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - EdgeIndex* index = fe.GetEdgeIndex(); - FMA_ASSERT(index); - // update field index - if (!index->Add(txn, fe.GetConstRef(record), euid)) { - THROW_CODE( - InputError, - "Failed to index edge with field value [{}:{}]: index value already exists.", - fe.Name(), fe.FieldToString(record)); - } - created.push_back(idx); + auto& fe = fields_[idx]; + if (fe->GetIsNull(record)) continue; + EdgeIndex* index = fe->GetEdgeIndex(); + FMA_ASSERT(index); + // update field index + if (!index->Add(txn, fe->GetConstRef(record), euid)) { + THROW_CODE(InputError, + "Failed to index edge with field value [{}:{}]: index value already exists.", + fe->Name(), fe->FieldToString(record)); } + created.push_back(idx); } } void Schema::AddVectorToVectorIndex(KvTransaction& txn, VertexId vid, const Value& record) { for (auto& idx : vector_index_fields_) { - if (fast_alter_schema) { - auto& fe = fieldsV2_[idx]; - if (fe.GetIsNull(record)) continue; - VectorIndex* index = fe.GetVectorIndex(); - auto dim = index->GetVecDimension(); - std::vector> floatvector; - std::vector vids; - floatvector.push_back(fe.GetConstRef(record).AsType>()); - vids.push_back(vid); - if (floatvector.back().size() != (size_t)dim) { - THROW_CODE(InputError, - "vector index dimension mismatch, vector size:{}, dim:{}", - floatvector.back().size(), dim); - } - index->Add(floatvector, vids); - } else { - auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - VectorIndex* index = fe.GetVectorIndex(); - auto dim = index->GetVecDimension(); - std::vector> floatvector; - std::vector vids; - floatvector.push_back(fe.GetConstRef(record).AsType>()); - vids.push_back(vid); - if (floatvector.back().size() != (size_t)dim) { - THROW_CODE(InputError, - "vector index dimension mismatch, vector size:{}, dim:{}", - floatvector.back().size(), dim); - } - index->Add(floatvector, vids); + auto& fe = fields_[idx]; + if (fe->GetIsNull(record)) continue; + VectorIndex* index = fe->GetVectorIndex(); + auto dim = index->GetVecDimension(); + std::vector> floatvector; + std::vector vids; + floatvector.push_back(fe->GetConstRef(record).AsType>()); + vids.push_back(vid); + if (floatvector.back().size() != (size_t)dim) { + THROW_CODE(InputError, "vector index dimension mismatch, vector size:{}, dim:{}", + floatvector.back().size(), dim); } + index->Add(floatvector, vids); } } void Schema::DeleteVectorIndex(KvTransaction& txn, VertexId vid, const Value& record) { for (auto& idx : vector_index_fields_) { - if (fast_alter_schema) { - auto& fe = fieldsV2_[idx]; - if (fe.GetIsNull(record)) continue; - VectorIndex* index = fe.GetVectorIndex(); - index->Remove({vid}); - } else { - auto& fe = fields_[idx]; - if (fe.GetIsNull(record)) continue; - VectorIndex* index = fe.GetVectorIndex(); - index->Remove({vid}); - } + auto& fe = fields_[idx]; + if (fe->GetIsNull(record)) continue; + VectorIndex* index = fe->GetVectorIndex(); + index->Remove({vid}); } } @@ -630,7 +545,7 @@ FieldData Schema::GetFieldDataFromField(const _detail::FieldExtractorV2* extract } void Schema::ParseAndSet(Value& record, const FieldData& data, - const _detail::FieldExtractorV2* extractor) const { + _detail::FieldExtractorBase* extractor) const { bool data_is_null = data.type == FieldType::NUL; extractor->SetIsNull(record, data_is_null); if (data_is_null) return; @@ -691,7 +606,7 @@ void Schema::ParseAndSet(Value& record, const FieldData& data, record.Resize(record.Size()); char* ptr = - (char*)record.Data() + extractor->GetFieldOffset(record, extractor->GetFieldId()); + (char*)record.Data() + extractor->GetFieldOffset(record); memcpy(ptr, (*data.data.buf).data(), 50); return; } @@ -747,7 +662,7 @@ void Schema::ParseAndSet(Value& record, const FieldData& data, template void Schema::_ParseStringAndSet(Value& record, const std::string& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const { + ::lgraph::_detail::FieldExtractorBase* extractor) const { typedef typename field_data_helper::FieldType2CType::type CT; typedef typename field_data_helper::FieldType2StorageType::type ST; CT s{}; @@ -759,28 +674,28 @@ void Schema::_ParseStringAndSet(Value& record, const std::string& data, template <> void Schema::_ParseStringAndSet( Value& record, const std::string& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const { + ::lgraph::_detail::FieldExtractorBase* extractor) const { return _SetVariableLengthValue(record, Value::ConstRef(data), extractor); } template <> void Schema::_ParseStringAndSet( Value& record, const std::string& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const { + ::lgraph::_detail::FieldExtractorBase* extractor) const { // check whether the point data is valid; if (!::lgraph_api::TryDecodeEWKB(data, ::lgraph_api::SpatialType::POINT)) throw ParseStringException(extractor->Name(), data, FieldType::POINT); // FMA_DBG_CHECK_EQ(sizeof(data), field_data_helper::FieldTypeSize(def_.type)); size_t Size = record.Size(); record.Resize(Size); - char* ptr = (char*)record.Data() + extractor->GetFieldOffset(record, extractor->GetFieldId()); + char* ptr = (char*)record.Data() + extractor->GetFieldOffset(record); memcpy(ptr, data.data(), 50); } template <> void Schema::_ParseStringAndSet( Value& record, const std::string& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const { + ::lgraph::_detail::FieldExtractorBase* extractor) const { // check whether the linestring data is valid; if (!::lgraph_api::TryDecodeEWKB(data, ::lgraph_api::SpatialType::LINESTRING)) throw ParseStringException(extractor->Name(), data, FieldType::LINESTRING); @@ -790,7 +705,7 @@ void Schema::_ParseStringAndSet( template <> void Schema::_ParseStringAndSet( Value& record, const std::string& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const { + ::lgraph::_detail::FieldExtractorBase* extractor) const { if (!::lgraph_api::TryDecodeEWKB(data, ::lgraph_api::SpatialType::POLYGON)) throw ParseStringException(extractor->Name(), data, FieldType::POLYGON); return _SetVariableLengthValue(record, Value::ConstRef(data), extractor); @@ -799,7 +714,7 @@ void Schema::_ParseStringAndSet( template <> void Schema::_ParseStringAndSet( Value& record, const std::string& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const { + ::lgraph::_detail::FieldExtractorBase* extractor) const { ::lgraph_api::SpatialType s; // throw ParseStringException in this function; try { @@ -816,7 +731,7 @@ void Schema::_ParseStringAndSet( template <> void Schema::_ParseStringAndSet( Value& record, const std::string& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const { + ::lgraph::_detail::FieldExtractorBase* extractor) const { std::vector vec; // check if there are only numbers and commas std::regex nonNumbersAndCommas("[^0-9,.]"); @@ -851,7 +766,7 @@ void Schema::_ParseStringAndSet( * \param data The string representation of the data. */ void Schema::ParseAndSet(Value& record, const std::string& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const { + ::lgraph::_detail::FieldExtractorBase* extractor) const { if (data.empty() && (extractor->IsFixedType() || extractor->Type() == FieldType::LINESTRING || extractor->Type() == FieldType::POLYGON || extractor->Type() == FieldType::SPATIAL || @@ -908,17 +823,18 @@ void Schema::ParseAndSet(Value& record, const std::string& data, * * \param record The record. * \param data Value to be set. - * \param extractor The field extractor pointer. + * \param extr The field extractor pointer. */ void Schema::_SetVariableLengthValue(Value& record, const Value& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const { - FMA_DBG_ASSERT(extractor->is_vfield_); + ::lgraph::_detail::FieldExtractorBase* extractor) const { + _detail::FieldExtractorV2* extr = dynamic_cast<_detail::FieldExtractorV2*>(extractor); + FMA_DBG_ASSERT(extr->is_vfield_); if (data.Size() > _detail::MAX_STRING_SIZE) - throw DataSizeTooLargeException(extractor->Name(), data.Size(), _detail::MAX_STRING_SIZE); - size_t foff = extractor->GetFieldOffset(record, extractor->GetFieldId()); + throw DataSizeTooLargeException(extr->Name(), data.Size(), _detail::MAX_STRING_SIZE); + size_t foff = extr->GetFieldOffset(record); char* rptr = (char*)record.Data(); size_t variable_offset = ::lgraph::_detail::UnalignedGet(rptr + foff); - size_t fsize = extractor->GetDataSize(record); + size_t fsize = extr->GetDataSize(record); // realloc record with original size to make sure we own the memory record.Resize(record.Size()); @@ -946,25 +862,26 @@ void Schema::_SetVariableLengthValue(Value& record, const Value& data, memcpy(rptr + variable_offset + sizeof(uint32_t), data.Data(), data.Size()); // update offset of other veriable fields - size_t count = extractor->GetRecordCount(record); + size_t count = extr->GetRecordCount(record); // adjust offset of other fields - for (size_t i = extractor->GetFieldId() + 1; i < count; i++) { - if (fieldsV2_[i].IsFixedType()) continue; - size_t offset = extractor->GetFieldOffset(record, i); + for (size_t i = extr->GetFieldId() + 1; i < count; i++) { + if (fields_[i]->IsFixedType()) continue; + size_t offset = extr->GetFieldOffset(record, i); size_t var_offset = ::lgraph::_detail::UnalignedGet(rptr + offset); ::lgraph::_detail::UnalignedSet(rptr + offset, var_offset + diff); } } - void Schema::CopyFieldsRaw(Value& dst, const std::vector fids_in_dst, const Schema* src_schema, const Value& src, const std::vector fids_in_src) { FMA_DBG_ASSERT(fids_in_dst.size() == fids_in_src.size()); dst.Resize(dst.Size()); for (size_t i = 0; i < fids_in_dst.size(); i++) { - const _detail::FieldExtractor* dst_fe = GetFieldExtractor(fids_in_dst[i]); - const _detail::FieldExtractor* src_fe = src_schema->GetFieldExtractor(fids_in_src[i]); + const _detail::FieldExtractor* dst_fe = + static_cast<_detail::FieldExtractor*>(GetFieldExtractor(fids_in_dst[i])); + const _detail::FieldExtractor* src_fe = + static_cast<_detail::FieldExtractor*>(src_schema->GetFieldExtractor(fids_in_src[i])); dst_fe->CopyDataRaw(dst, src, src_fe); } } @@ -979,26 +896,26 @@ void Schema::RefreshLayout() { blob_fields_.clear(); for (size_t i = 0; i < fields_.size(); i++) { auto& f = fields_[i]; - if (f.Type() == FieldType::NUL) throw FieldCannotBeNullTypeException(f.Name()); - if (f.Type() == FieldType::BLOB) blob_fields_.push_back(i); + if (f->Type() == FieldType::NUL) throw FieldCannotBeNullTypeException(f->Name()); + if (f->Type() == FieldType::BLOB) blob_fields_.push_back(i); } // if label is included in record, data starts after LabelId size_t data_start_off = label_in_record_ ? sizeof(LabelId) : 0; // setup name_to_fields name_to_idx_.clear(); for (size_t i = 0; i < fields_.size(); i++) { - auto& f = fields_[i]; - f.SetFieldId(i); - f.SetNullableArrayOff(data_start_off); - if (_F_UNLIKELY(name_to_idx_.find(f.Name()) != name_to_idx_.end())) - throw FieldAlreadyExistsException(f.Name()); - name_to_idx_[f.Name()] = i; + auto f = (_detail::FieldExtractor*)fields_[i].get(); + f->SetFieldId(i); + f->SetNullableArrayOff(data_start_off); + if (_F_UNLIKELY(name_to_idx_.find(f->Name()) != name_to_idx_.end())) + throw FieldAlreadyExistsException(f->Name()); + name_to_idx_[f->Name()] = i; } // layout nullable array n_nullable_ = 0; for (auto& f : fields_) { - if (f.IsOptional()) { - f.SetNullableOff(n_nullable_); + if (f->IsOptional()) { + (static_cast<_detail::FieldExtractor*>(f.get()))->SetNullableOff(n_nullable_); n_nullable_++; } } @@ -1007,10 +924,10 @@ void Schema::RefreshLayout() { n_fixed_ = 0; n_variable_ = 0; for (auto& f : fields_) { - if (field_data_helper::IsFixedLengthFieldType(f.Type())) { + if (field_data_helper::IsFixedLengthFieldType(f->Type())) { n_fixed_++; - f.SetFixedLayoutInfo(v_offset_start_); - v_offset_start_ += f.TypeSize(); + (static_cast<_detail::FieldExtractor*>(f.get()))->SetFixedLayoutInfo(v_offset_start_); + v_offset_start_ += f->TypeSize(); } else { n_variable_++; } @@ -1018,16 +935,17 @@ void Schema::RefreshLayout() { // now, layout the variable fields size_t vidx = 0; for (auto& f : fields_) { - if (!field_data_helper::IsFixedLengthFieldType(f.Type())) - f.SetVLayoutInfo(v_offset_start_, n_variable_, vidx++); + if (!field_data_helper::IsFixedLengthFieldType(f->Type())) + (static_cast<_detail::FieldExtractor*>(f.get())) + ->SetVLayoutInfo(v_offset_start_, n_variable_, vidx++); } // finally, check the indexed fields indexed_fields_.clear(); bool found_primary = false; for (auto& f : fields_) { - if (!f.GetVertexIndex() && !f.GetEdgeIndex()) continue; - indexed_fields_.emplace_hint(indexed_fields_.end(), f.GetFieldId()); - if (f.Name() == primary_field_) { + if (!f->GetVertexIndex() && !f->GetEdgeIndex()) continue; + indexed_fields_.emplace_hint(indexed_fields_.end(), f->GetFieldId()); + if (f->Name() == primary_field_) { FMA_ASSERT(!found_primary); found_primary = true; } @@ -1039,8 +957,8 @@ void Schema::RefreshLayout() { fulltext_fields_.clear(); for (auto& f : fields_) { - if (!f.FullTextIndexed()) continue; - fulltext_fields_.emplace(f.GetFieldId()); + if (!f->FullTextIndexed()) continue; + fulltext_fields_.emplace(f->GetFieldId()); } } @@ -1048,23 +966,23 @@ void Schema::RefreshLayoutForFastSchema() { FMA_ASSERT(fast_alter_schema); blob_fields_.clear(); name_to_idx_.clear(); - for (size_t i = 0; i < fieldsV2_.size(); i++) { - auto& f = fieldsV2_[i]; - if (f.IsDeleted()) continue; - f.SetLabelInRecord(label_in_record_); - if (f.Type() == FieldType::NUL) throw FieldCannotBeNullTypeException(f.Name()); - if (f.Type() == FieldType::BLOB) blob_fields_.push_back(i); - if (_F_UNLIKELY(name_to_idx_.find(f.Name()) != name_to_idx_.end())) - throw FieldAlreadyExistsException(f.Name()); - name_to_idx_[f.Name()] = i; + for (size_t i = 0; i < fields_.size(); i++) { + auto f = static_cast<_detail::FieldExtractorV2*>(fields_[i].get()); + if (f->IsDeleted()) continue; + f->SetLabelInRecord(label_in_record_); + if (f->Type() == FieldType::NUL) throw FieldCannotBeNullTypeException(f->Name()); + if (f->Type() == FieldType::BLOB) blob_fields_.push_back(i); + if (_F_UNLIKELY(name_to_idx_.find(f->Name()) != name_to_idx_.end())) + throw FieldAlreadyExistsException(f->Name()); + name_to_idx_[f->Name()] = i; } indexed_fields_.clear(); bool found_primary = false; - for (auto& f : fieldsV2_) { - if (!f.GetVertexIndex() && !f.GetEdgeIndex()) continue; - indexed_fields_.emplace_hint(indexed_fields_.end(), f.GetFieldId()); - if (f.Name() == primary_field_) { + for (auto& f : fields_) { + if (!f->GetVertexIndex() && !f->GetEdgeIndex()) continue; + indexed_fields_.emplace_hint(indexed_fields_.end(), f->GetFieldId()); + if (f->Name() == primary_field_) { FMA_ASSERT(!found_primary); found_primary = true; } @@ -1075,9 +993,9 @@ void Schema::RefreshLayoutForFastSchema() { } fulltext_fields_.clear(); - for (auto& f : fieldsV2_) { - if (!f.FullTextIndexed()) continue; - fulltext_fields_.emplace(f.GetFieldId()); + for (auto& f : fields_) { + if (!f->FullTextIndexed()) continue; + fulltext_fields_.emplace(f->GetFieldId()); } } @@ -1112,15 +1030,15 @@ Value Schema::CreateEmptyRecord(size_t size_hint) const { } } } else { - size_t num_fields = fieldsV2_.size(); + size_t num_fields = fields_.size(); // version - [label] - count - null_array - offset_array size_t min_size = sizeof(VersionId) + (label_in_record_ ? sizeof(LabelId) : 0) + sizeof(FieldId) + (num_fields + 7) / 8 + num_fields * sizeof(DataOffset); // Fixed-value and Variable-value. Variable-value will store an offset at Fixed-value area // and assume the length of every variable value is 0; - for (const auto& field : fieldsV2_) { + for (const auto& field : fields_) { min_size += - field.IsFixedType() ? field.TypeSize() : (sizeof(DataOffset) + sizeof(uint32_t)); + field->IsFixedType() ? field->TypeSize() : (sizeof(DataOffset) + sizeof(uint32_t)); } v.Resize(min_size); @@ -1156,27 +1074,26 @@ Value Schema::CreateEmptyRecord(size_t size_hint) const { // field0 do not need to store its offset. for (size_t i = 1; i < num_fields; i++) { data_offset += - fieldsV2_[i - 1].IsFixedType() ? fieldsV2_[i - 1].TypeSize() : sizeof(DataOffset); + fields_[i - 1]->IsFixedType() ? fields_[i - 1]->TypeSize() : sizeof(DataOffset); ::lgraph::_detail::UnalignedSet(offset_ptr, data_offset); offset_ptr += sizeof(DataOffset); } // the latest offset marks the end of the fixed-area. - data_offset += fieldsV2_[num_fields - 1].IsFixedType() - ? fieldsV2_[num_fields - 1].TypeSize() - : sizeof(DataOffset); + data_offset += fields_[num_fields - 1]->IsFixedType() ? fields_[num_fields - 1]->TypeSize() + : sizeof(DataOffset); ::lgraph::_detail::UnalignedSet(offset_ptr, data_offset); // 7. Set variable fields offset. They are stored at fixed-area, and their sizes are all // zero. - for (const auto& field : fieldsV2_) { - if (!field.IsFixedType()) { + for (const auto& field : fields_) { + if (!field->IsFixedType()) { DataOffset var_offset = 0; // variable fields offset. - if (field.GetFieldId() == 0) { + if (field->GetFieldId() == 0) { var_offset = offset + num_fields * sizeof(DataOffset); } else { var_offset = ::lgraph::_detail::UnalignedGet( - ptr + offset_begin + (field.GetFieldId() - 1) * sizeof(DataOffset)); + ptr + offset_begin + (field->GetFieldId() - 1) * sizeof(DataOffset)); } ::lgraph::_detail::UnalignedSet(ptr + var_offset, data_offset); @@ -1275,7 +1192,6 @@ void Schema::DeleteDetachedEdgeProperty(KvTransaction& txn, const EdgeUid& eid) void Schema::ClearFields() { label_.clear(); fields_.clear(); - fieldsV2_.clear(); name_to_idx_.clear(); n_fixed_ = 0; n_variable_ = 0; @@ -1306,24 +1222,22 @@ void Schema::SetSchema(bool is_vertex, size_t n_fields, const FieldSpec* fields, name_to_idx_.clear(); // assign id to fields, starting from fixed length types // then variable length types + fields_.clear(); + fields_.reserve(n_fields); if (!fast_alter_schema) { - fields_.clear(); - fields_.reserve(n_fields); for (size_t i = 0; i < n_fields; i++) { const FieldSpec& fs = fields[i]; - if (field_data_helper::IsFixedLengthFieldType(fs.type)) fields_.emplace_back(fs); + if (field_data_helper::IsFixedLengthFieldType(fs.type)) + fields_.push_back(std::make_shared<_detail::FieldExtractor>(fs)); } for (size_t i = 0; i < n_fields; i++) { const FieldSpec& fs = fields[i]; if (!field_data_helper::IsFixedLengthFieldType(fs.type)) - fields_.push_back(_detail::FieldExtractor(fs)); + fields_.push_back(std::make_shared<_detail::FieldExtractor>(fs)); } } else { - fieldsV2_.clear(); - fieldsV2_.reserve(n_fields); for (size_t i = 0; i < n_fields; i++) { - fieldsV2_.emplace_back(FieldSpec(fields[i])); - fieldsV2_.back().SetFieldId(i); + fields_.push_back(std::make_shared<_detail::FieldExtractorV2>(fields[i], i)); } } is_vertex_ = is_vertex; @@ -1363,13 +1277,13 @@ void Schema::DelFields(const std::vector& del_fields) { } auto composite_index_key = GetRelationalCompositeIndexKey(del_ids); - for (const auto &k : composite_index_key) { + for (const auto& k : composite_index_key) { UnVertexCompositeIndex(k); } if (fast_alter_schema) { for (size_t del_id : del_ids) { - fieldsV2_[del_id].MarkDeleted(); + fields_[del_id]->MarkDeleted(); } } else { del_ids.push_back(fields_.size()); @@ -1392,19 +1306,19 @@ void Schema::AddFields(const std::vector& add_fields) { f.name == KeyWordFunc::GetStrFromKeyWord(KeyWord::SRC_ID) || f.name == KeyWordFunc::GetStrFromKeyWord(KeyWord::DST_ID)) { THROW_CODE(InputError, - "Label[{}]: Property name cannot be \"SKIP\" or \"SRC_ID\" or \"DST_ID\"", label_); + "Label[{}]: Property name cannot be \"SKIP\" or \"SRC_ID\" or \"DST_ID\"", + label_); } if (_F_UNLIKELY(name_to_idx_.find(f.name) != name_to_idx_.end())) throw FieldAlreadyExistsException(f.name); if (fast_alter_schema) { - FieldId id = fieldsV2_.size(); - fieldsV2_.push_back(_detail::FieldExtractorV2(FieldSpec(f), id)); - lgraph::CheckValidFieldNum(fieldsV2_.size()); + fields_.push_back( + std::make_shared<_detail::FieldExtractorV2>(FieldSpec(f), fields_.size())); } else { - fields_.push_back(_detail::FieldExtractor(f)); - lgraph::CheckValidFieldNum(fields_.size()); + fields_.push_back(std::make_shared<_detail::FieldExtractor>(FieldSpec(f))); } } + lgraph::CheckValidFieldNum(fields_.size()); RefreshLayout(); } @@ -1418,17 +1332,19 @@ void Schema::ModFields(const std::vector& mod_fields) { UnVertexIndex(fid); UnEdgeIndex(fid); if (fast_alter_schema) { - auto& extractor = fieldsV2_[fid]; - extractor = _detail::FieldExtractorV2(FieldSpec(f)); - extractor.SetFieldId(fid); + auto& extractor = fields_[fid]; + extractor.reset(); + extractor = std::make_shared<_detail::FieldExtractorV2>(f); + extractor->SetFieldId(fid); } else { auto& extractor = fields_[fid]; - extractor = _detail::FieldExtractor(f); + extractor.reset(); + extractor = std::make_shared<_detail::FieldExtractor>(f); } mod_ids.push_back(fid); } auto composite_index_key = GetRelationalCompositeIndexKey(mod_ids); - for (const auto &k : composite_index_key) { + for (const auto& k : composite_index_key) { UnVertexCompositeIndex(k); } RefreshLayout(); @@ -1438,23 +1354,16 @@ std::vector Schema::GetFieldSpecPtrs() const { std::vector schema; schema.reserve(fields_.size()); for (auto& f : fields_) { - schema.push_back(&f.GetFieldSpec()); + schema.push_back(&f->GetFieldSpec()); } return schema; } std::vector Schema::GetFieldSpecs() const { std::vector schema; - if (fast_alter_schema) { - schema.reserve(fieldsV2_.size()); - for (auto& f : fieldsV2_) { - schema.emplace_back(f.GetFieldSpec()); - } - return schema; - } schema.reserve(fields_.size()); for (auto& f : fields_) { - schema.emplace_back(f.GetFieldSpec()); + schema.emplace_back(f->GetFieldSpec()); } return schema; } @@ -1462,68 +1371,41 @@ std::vector Schema::GetFieldSpecs() const { std::map Schema::GetFieldSpecsAsMap() const { std::map ret; for (auto& kv : name_to_idx_) { - ret.emplace_hint( - ret.end(), - std::make_pair(kv.first, fast_alter_schema ? fieldsV2_[kv.second].GetFieldSpec() - : fields_[kv.second].GetFieldSpec())); + ret.emplace_hint(ret.end(), std::make_pair(kv.first, fields_[kv.second]->GetFieldSpec())); } return ret; } -const _detail::FieldExtractor* Schema::GetFieldExtractor(size_t field_num) const { +_detail::FieldExtractorBase* Schema::GetFieldExtractorBase(size_t field_num) const { if (_F_UNLIKELY(field_num >= fields_.size())) throw FieldNotFoundException(field_num); - return &fields_[field_num]; + return fields_[field_num].get(); } -const _detail::FieldExtractor* Schema::TryGetFieldExtractor(size_t field_num) const { +_detail::FieldExtractorBase* Schema::TryGetFieldExtractorBase(size_t field_num) const { if (_F_UNLIKELY(field_num >= fields_.size())) return nullptr; - return &fields_[field_num]; + return fields_[field_num].get(); } -const _detail::FieldExtractorV2* Schema::GetFieldExtractorV2(size_t field_num) const { - if (_F_UNLIKELY(field_num >= fieldsV2_.size())) throw FieldNotFoundException(field_num); - return &fieldsV2_[field_num]; -} - -const _detail::FieldExtractorV2* Schema::TryGetFieldExtractorV2(size_t field_num) const { - if (_F_UNLIKELY(field_num >= fieldsV2_.size())) return nullptr; - return &fieldsV2_[field_num]; -} - -const _detail::FieldExtractor* Schema::GetFieldExtractor(const std::string& field_name) const { - auto it = name_to_idx_.find(field_name); - if (_F_UNLIKELY(it == name_to_idx_.end())) throw FieldNotFoundException(field_name); - return &fields_[it->second]; -} - -const _detail::FieldExtractor* Schema::TryGetFieldExtractor(const std::string& field_name) const { - auto it = name_to_idx_.find(field_name); - if (_F_UNLIKELY(it == name_to_idx_.end())) return nullptr; - return &fields_[it->second]; -} - -const _detail::FieldExtractorV2* Schema::GetFieldExtractorV2(const std::string& field_name) const { +_detail::FieldExtractorBase* Schema::GetFieldExtractorBase(const std::string& field_name) const { auto it = name_to_idx_.find(field_name); if (_F_UNLIKELY(it == name_to_idx_.end())) throw FieldNotFoundException(field_name); - return &fieldsV2_[it->second]; + return fields_[it->second].get(); } -const _detail::FieldExtractorV2* Schema::TryGetFieldExtractorV2( - const std::string& field_name) const { +_detail::FieldExtractorBase* Schema::TryGetFieldExtractorBase(const std::string& field_name) const { auto it = name_to_idx_.find(field_name); if (_F_UNLIKELY(it == name_to_idx_.end())) return nullptr; - return &fieldsV2_[it->second]; + return fields_[it->second].get(); } std::vector Schema::GetCompositeIndexSpec() const { std::vector compositeIndexSpecList; - for (const auto &kv : composite_index_map) { + for (const auto& kv : composite_index_map) { std::vector ids; boost::split(ids, kv.first, boost::is_any_of(_detail::COMPOSITE_INDEX_KEY_SEPARATOR)); std::vector fields; for (int i = 0; i < (int)ids.size(); i++) { - fields.emplace_back(fast_alter_schema ? this->fieldsV2_[std::stoi(ids[i])].Name() : - this->fields_[std::stoi(ids[i])].Name()); + fields.emplace_back(this->fields_[std::stoi(ids[i])]->Name()); } compositeIndexSpecList.push_back({label_, fields, kv.second->type_}); } @@ -1531,10 +1413,7 @@ std::vector Schema::GetCompositeIndexSpec() const { } size_t Schema::GetFieldId(const std::string& name) const { - if (fast_alter_schema) { - return GetFieldExtractorV2(name)->GetFieldId(); - } - auto fe = GetFieldExtractor(name); + auto fe = GetFieldExtractorBase(name); return fe->GetFieldId(); } @@ -1556,19 +1435,12 @@ std::vector Schema::GetFieldIds(const std::vector& names) c std::string Schema::DumpRecord(const Value& record) const { std::string ret = "{"; - if (fast_alter_schema) { - for (size_t i = 0; i < fieldsV2_.size(); i++) { - auto& f = fieldsV2_[i]; - ret.append(f.Name()).append("=").append(f.FieldToString(record)); - if (i != fieldsV2_.size() - 1) ret.append(", "); - } - } else { - for (size_t i = 0; i < fields_.size(); i++) { - auto& f = fields_[i]; - ret.append(f.Name()).append("=").append(f.FieldToString(record)); - if (i != fields_.size() - 1) ret.append(", "); - } + for (size_t i = 0; i < fields_.size(); i++) { + auto& f = fields_[i]; + ret.append(f->Name()).append("=").append(f->FieldToString(record)); + if (i != fields_.size() - 1) ret.append(", "); } + ret.append("}"); return ret; } diff --git a/src/core/schema.h b/src/core/schema.h index 0317b9f9b5..d730e69b57 100644 --- a/src/core/schema.h +++ b/src/core/schema.h @@ -28,6 +28,7 @@ #include "core/data_type.h" #include "core/field_extractor.h" #include "core/field_extractor_v2.h" +#include "core/field_extractor_base.h" #include "core/schema_common.h" #include "core/value.h" #include "core/full_text_index.h" @@ -95,8 +96,7 @@ class Schema { bool deleted_ = false; bool is_vertex_ = false; - std::vector<_detail::FieldExtractor> fields_; - std::vector<_detail::FieldExtractorV2> fieldsV2_; + std::vector> fields_; std::unordered_map name_to_idx_; size_t n_fixed_ = 0; size_t n_variable_ = 0; @@ -225,7 +225,9 @@ class Schema { // mod fields, assuming fields are already de-duplicated void ModFields(const std::vector& mod_fields); - const std::vector<_detail::FieldExtractor>& GetFields() const { return fields_; } + const std::vector>& GetFields() const { + return fields_; + } //----------------------- // const accessors @@ -262,20 +264,39 @@ class Schema { std::map GetFieldSpecsAsMap() const; - size_t GetNumFields() const { return fast_alter_schema ? fieldsV2_.size() : fields_.size(); } + size_t GetNumFields() const { return fields_.size(); } bool GetFastAlterSchema() const {return fast_alter_schema;} - const _detail::FieldExtractor* GetFieldExtractor(size_t field_num) const; - const _detail::FieldExtractor* TryGetFieldExtractor(size_t field_num) const; - - const _detail::FieldExtractor* GetFieldExtractor(const std::string& field_name) const; - const _detail::FieldExtractor* TryGetFieldExtractor(const std::string& field_name) const; + _detail::FieldExtractorBase* GetFieldExtractorBase(size_t field_num) const; + _detail::FieldExtractorBase* TryGetFieldExtractorBase(size_t field_num) const; + _detail::FieldExtractorBase* GetFieldExtractorBase(const std::string& field_name) const; + _detail::FieldExtractorBase* TryGetFieldExtractorBase(const std::string& field_name) const; - const _detail::FieldExtractorV2* GetFieldExtractorV2(size_t field_num) const; - const _detail::FieldExtractorV2* TryGetFieldExtractorV2(size_t field_num) const; + _detail::FieldExtractorV2* GetFieldExtractorV2(size_t field_num) const { + return dynamic_cast<_detail::FieldExtractorV2*>(GetFieldExtractorBase(field_num)); + } + _detail::FieldExtractorV2* TryGetFieldExtractorV2(size_t field_num) const { + return dynamic_cast<_detail::FieldExtractorV2*>(GetFieldExtractorBase(field_num)); + } + _detail::FieldExtractorV2* GetFieldExtractorV2(const std::string& field_name) const { + return dynamic_cast<_detail::FieldExtractorV2*>(GetFieldExtractorBase(field_name)); + } + _detail::FieldExtractorV2* TryGetFieldExtractorV2(const std::string& field_name) const { + return dynamic_cast<_detail::FieldExtractorV2*>(GetFieldExtractorBase(field_name)); + } - const _detail::FieldExtractorV2* GetFieldExtractorV2(const std::string& field_name) const; - const _detail::FieldExtractorV2* TryGetFieldExtractorV2(const std::string& field_name) const; + _detail::FieldExtractor* GetFieldExtractor(size_t field_num) const { + return dynamic_cast<_detail::FieldExtractor*>(GetFieldExtractorBase(field_num)); + } + _detail::FieldExtractor* TryGetFieldExtractor(size_t field_num) const { + return dynamic_cast<_detail::FieldExtractor*>(GetFieldExtractorBase(field_num)); + } + _detail::FieldExtractor* GetFieldExtractor(const std::string& field_name) const { + return dynamic_cast<_detail::FieldExtractor*>(GetFieldExtractorBase(field_name)); + } + _detail::FieldExtractor* TryGetFieldExtractor(const std::string& field_name) const { + return dynamic_cast<_detail::FieldExtractor*>(GetFieldExtractorBase(field_name)); + } size_t GetFieldId(const std::string& name) const; @@ -379,30 +400,32 @@ class Schema { typename std::enable_if::type GetField( const Value& record, const FieldT& field_name_or_num, const GetBlobByKeyFunc& get_blob) const { + _detail::FieldExtractorBase* extractor = TryGetFieldExtractorBase(field_name_or_num); + if (!extractor) return FieldData(); + if (fast_alter_schema) { - const ::lgraph::_detail::FieldExtractorV2* extractor = - TryGetFieldExtractorV2(field_name_or_num); - if (!extractor) return FieldData(); - if (extractor->GetRecordCount(record) < extractor->GetFieldId() + 1) { + if (dynamic_cast<_detail::FieldExtractorV2*>(extractor)->GetRecordCount(record) < + extractor->GetFieldId() + 1) { if (extractor->HasInitedValue()) { return extractor->GetInitedValue(); } return FieldData(); } - if (extractor->GetIsNull(record)) return FieldData(); - if (_F_UNLIKELY(extractor->Type()) == FieldType::BLOB) { - return GetFieldDataFromBlobField(extractor, record, get_blob); - } else { - return GetFieldDataFromField(extractor, record); - } } - auto extractor = TryGetFieldExtractor(field_name_or_num); - if (!extractor) return FieldData(); + if (extractor->GetIsNull(record)) return FieldData(); - if (_F_UNLIKELY(extractor->Type() == FieldType::BLOB)) - return GetFieldDataFromBlobField(extractor, record, get_blob); - else - return GetFieldDataFromField(extractor, record); + if (_F_UNLIKELY(extractor->Type() == FieldType::BLOB)) { + if (fast_alter_schema) + return GetFieldDataFromBlobField( + dynamic_cast<_detail::FieldExtractorV2*>(extractor), record, get_blob); + return GetFieldDataFromBlobField(dynamic_cast<_detail::FieldExtractor*>(extractor), + record, get_blob); + } else { + if (fast_alter_schema) + return GetFieldDataFromField(dynamic_cast<_detail::FieldExtractorV2*>(extractor), + record); + return GetFieldDataFromField(dynamic_cast<_detail::FieldExtractor*>(extractor), record); + } } // Create a record given properties as string or FieldData. @@ -413,34 +436,22 @@ class Schema { FMA_DBG_ASSERT(!HasBlob()); // TODO(anyone): optimize Value v = CreateEmptyRecord(); - if (fast_alter_schema) { - std::vector is_set(fieldsV2_.size(), false); - for (size_t i = 0; i < n_fields; i++) { - const FieldT& name_or_num = fields[i]; - const DataT& data = values[i]; - const _detail::FieldExtractorV2* extr = GetFieldExtractorV2(name_or_num); - is_set[extr->GetFieldId()] = true; - ParseAndSet(v, data, extr); - } - for (size_t i = 0; i < fieldsV2_.size(); i++) { - auto& f = fieldsV2_[i]; - if (_F_UNLIKELY(!f.IsOptional() && !is_set[i])) - throw FieldCannotBeSetNullException(f.Name()); - } - return v; - } std::vector is_set(fields_.size(), false); for (size_t i = 0; i < n_fields; i++) { const FieldT& name_or_num = fields[i]; const DataT& data = values[i]; - const _detail::FieldExtractor* extr = GetFieldExtractor(name_or_num); + _detail::FieldExtractorBase* extr = GetFieldExtractorBase(name_or_num); is_set[extr->GetFieldId()] = true; - extr->ParseAndSet(v, data); + if (fast_alter_schema) { + ParseAndSet(v, data, extr); + } else { + dynamic_cast<_detail::FieldExtractor*>(extr)->ParseAndSet(v, data); + } } for (size_t i = 0; i < fields_.size(); i++) { auto& f = fields_[i]; - if (_F_UNLIKELY(!f.IsOptional() && !is_set[i])) - throw FieldCannotBeSetNullException(f.Name()); + if (_F_UNLIKELY(!f->IsOptional() && !is_set[i])) + throw FieldCannotBeSetNullException(f->Name()); } return v; } @@ -453,42 +464,31 @@ class Schema { const StoreLargeBlobFunc& on_large_blob) { FMA_DBG_ASSERT(HasBlob()); Value prop = CreateEmptyRecord(); - if (fast_alter_schema) { - std::vector is_set(fieldsV2_.size(), false); - for (size_t i = 0; i < n_fields; i++) { - const FT& name_or_num = fields[i]; - const DT& data = values[i]; - const _detail::FieldExtractorV2* extr = GetFieldExtractorV2(name_or_num); - is_set[extr->GetFieldId()] = true; - if (_F_UNLIKELY(extr->Type() == FieldType::BLOB)) { - ParseAndSetBlob(prop, data, on_large_blob, extr); - } else { - ParseAndSet(prop, data, extr); - } - } - for (size_t i = 0; i < fields_.size(); i++) { - auto& f = fieldsV2_[i]; - if (_F_UNLIKELY(!f.IsOptional() && !is_set[i])) - throw FieldCannotBeSetNullException(f.Name()); - } - return prop; - } std::vector is_set(fields_.size(), false); for (size_t i = 0; i < n_fields; i++) { const FT& name_or_num = fields[i]; const DT& data = values[i]; - const _detail::FieldExtractor* extr = GetFieldExtractor(name_or_num); + _detail::FieldExtractorBase* extr = GetFieldExtractorBase(name_or_num); is_set[extr->GetFieldId()] = true; if (_F_UNLIKELY(extr->Type() == FieldType::BLOB)) { - extr->ParseAndSetBlob(prop, data, on_large_blob); + if (fast_alter_schema) { + ParseAndSetBlob(prop, data, on_large_blob, extr); + } else { + dynamic_cast<_detail::FieldExtractor*>(extr)->ParseAndSetBlob(prop, data, + on_large_blob); + } } else { - extr->ParseAndSet(prop, data); + if (fast_alter_schema) { + ParseAndSet(prop, data, extr); + } else { + dynamic_cast<_detail::FieldExtractor*>(extr)->ParseAndSet(prop, data); + } } } for (size_t i = 0; i < fields_.size(); i++) { auto& f = fields_[i]; - if (_F_UNLIKELY(!f.IsOptional() && !is_set[i])) - throw FieldCannotBeSetNullException(f.Name()); + if (_F_UNLIKELY(!f->IsOptional() && !is_set[i])) + throw FieldCannotBeSetNullException(f->Name()); } return prop; } @@ -496,13 +496,13 @@ class Schema { // -------------------- // fieldextractor v2 related void ParseAndSet(Value& record, const FieldData& data, - const _detail::FieldExtractorV2* extractor) const; + _detail::FieldExtractorBase* extractor) const; void ParseAndSet(Value& record, const std::string& data, - const _detail::FieldExtractorV2* extractor) const; + _detail::FieldExtractorBase* extractor) const; template void ParseAndSetBlob(Value& record, const DataT& data, const StoreBlobAndGetKeyFunc& store_blob, - const _detail::FieldExtractorV2* extr) const { + _detail::FieldExtractorBase* extr) const { FMA_DBG_ASSERT(extr->Type() == FieldType::BLOB); bool is_null; Value v = extr->ParseBlob(data, is_null); @@ -519,21 +519,22 @@ class Schema { template void _ParseStringAndSet(Value& record, const std::string& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const; + ::lgraph::_detail::FieldExtractorBase* extractor) const; void _SetVariableLengthValue(Value& record, const Value& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const; + ::lgraph::_detail::FieldExtractorBase* extr) const; ENABLE_IF_FIXED_FIELD(T, void) SetFixedSizeValue(Value& record, const T& data, - const ::lgraph::_detail::FieldExtractorV2* extractor) const { + ::lgraph::_detail::FieldExtractorBase* extractor) const { + _detail::FieldExtractorV2* extr = dynamic_cast<_detail::FieldExtractorV2*>(extractor); // "Cannot call SetField(Value&, const T&) on a variable length field"; - FMA_DBG_ASSERT(extractor->IsFixedType()); + FMA_DBG_ASSERT(extr->IsFixedType()); // "Type size mismatch" - FMA_DBG_CHECK_EQ(sizeof(data), extractor->TypeSize()); + FMA_DBG_CHECK_EQ(sizeof(data), extr->TypeSize()); // copy the buffer so we don't accidentally overwrite memory - int data_size = extractor->GetDataSize(record); - size_t offset = extractor->GetFieldOffset(record, extractor->GetFieldId()); + int data_size = extr->GetDataSize(record); + size_t offset = extr->GetFieldOffset(record); char* ptr = (char*)record.Data(); if (_F_LIKELY(data_size == sizeof(data))) { record.Resize(record.Size()); @@ -558,19 +559,19 @@ class Schema { ::lgraph::_detail::UnalignedSet(ptr + offset, data); // Update the offset of the subsequent fields. - for (FieldId i = extractor->GetFieldId() + 1; i < extractor->GetRecordCount(record) + 1; + for (FieldId i = extr->GetFieldId() + 1; i < extr->GetRecordCount(record) + 1; ++i) { - size_t off = extractor->GetOffsetPosition(record, i); + size_t off = extr->GetOffsetPosition(record, i); size_t property_offset = ::lgraph::_detail::UnalignedGet(record.Data() + off); ::lgraph::_detail::UnalignedSet(ptr + off, property_offset + diff); } // Update the offset of veriable length fields. - for (FieldId i = extractor->GetRecordCount(record) + 1; - i < extractor->GetRecordCount(record); i++) { - if (fieldsV2_[i].IsFixedType()) continue; - size_t off = extractor->GetFieldOffset(record, i); + for (FieldId i = extr->GetRecordCount(record) + 1; + i < extr->GetRecordCount(record); i++) { + if (fields_[i]->IsFixedType()) continue; + size_t off = extr->GetFieldOffset(record, i); size_t property_offset = ::lgraph::_detail::UnalignedGet(record.Data() + off); ::lgraph::_detail::UnalignedSet(ptr + off, property_offset + diff); @@ -589,46 +590,46 @@ class Schema { void MarkVertexIndexed(size_t field_idx, VertexIndex* index) { FMA_DBG_ASSERT(field_idx < fields_.size()); indexed_fields_.insert(field_idx); - fields_[field_idx].SetVertexIndex(index); + fields_[field_idx]->SetVertexIndex(index); } void MarkEdgeIndexed(size_t field_idx, EdgeIndex* edge_index) { FMA_DBG_ASSERT(field_idx < fields_.size()); indexed_fields_.insert(field_idx); - fields_[field_idx].SetEdgeIndex(edge_index); + fields_[field_idx]->SetEdgeIndex(edge_index); } void MarkVectorIndexed(size_t field_idx, VectorIndex* index) { FMA_DBG_ASSERT(field_idx < fields_.size()); vector_index_fields_.insert(field_idx); - fields_[field_idx].SetVectorIndex(index); + fields_[field_idx]->SetVectorIndex(index); } bool IsVertexIndex(size_t field_idx) { FMA_DBG_ASSERT(field_idx < fields_.size()); - return fields_[field_idx].GetVertexIndex() == nullptr; + return fields_[field_idx]->GetVertexIndex() == nullptr; } bool IsEdgeIndex(size_t field_idx) { FMA_DBG_ASSERT(field_idx < fields_.size()); - return fields_[field_idx].GetEdgeIndex() == nullptr; + return fields_[field_idx]->GetEdgeIndex() == nullptr; } bool IsVectorIndex(size_t field_idx) { FMA_DBG_ASSERT(field_idx < fields_.size()); - return fields_[field_idx].GetVectorIndex() == nullptr; + return fields_[field_idx]->GetVectorIndex() == nullptr; } void UnVertexIndex(size_t field_idx) { FMA_DBG_ASSERT(field_idx < fields_.size()); indexed_fields_.erase(field_idx); - fields_[field_idx].SetVertexIndex(nullptr); + fields_[field_idx]->SetVertexIndex(nullptr); } void UnEdgeIndex(size_t field_idx) { FMA_DBG_ASSERT(field_idx < fields_.size()); indexed_fields_.erase(field_idx); - fields_[field_idx].SetEdgeIndex(nullptr); + fields_[field_idx]->SetEdgeIndex(nullptr); } void UnVertexCompositeIndex(const std::vector &fields) { @@ -638,7 +639,7 @@ class Schema { void UnVectorIndex(size_t field_idx) { FMA_DBG_ASSERT(field_idx < fields_.size()); vector_index_fields_.erase(field_idx); - fields_[field_idx].SetVectorIndex(nullptr); + fields_[field_idx]->SetVectorIndex(nullptr); } void MarkFullTextIndexed(size_t field_idx, bool fulltext_indexed) { @@ -648,7 +649,7 @@ class Schema { } else { fulltext_fields_.emplace(field_idx); } - fields_[field_idx].SetFullTextIndex(fulltext_indexed); + fields_[field_idx]->SetFullTextIndex(fulltext_indexed); } const std::unordered_set& GetIndexedFields() const { return indexed_fields_; } diff --git a/src/core/schema_common.h b/src/core/schema_common.h index 604ad825fa..190956b266 100644 --- a/src/core/schema_common.h +++ b/src/core/schema_common.h @@ -143,16 +143,14 @@ class BinaryReader : public BinaryReaderForFie template class BinaryWriter : public BinaryWriterForFieldData {}; - - template struct BinaryReader { static size_t Read(StreamT& stream, lgraph::FieldSpec& fs) { return BinaryRead(stream, fs.name) + BinaryRead(stream, fs.type) + BinaryRead(stream, fs.optional) + BinaryRead(stream, fs.deleted) + - BinaryRead(stream, fs.id) + BinaryRead(stream, fs.init_value) + - BinaryRead(stream, fs.inited_value) + BinaryRead(stream, fs.default_value) + - BinaryRead(stream, fs.set_default_value); + BinaryRead(stream, fs.id) + BinaryRead(stream, fs.inited_value) + + BinaryRead(stream, fs.init_value) + BinaryRead(stream, fs.set_default_value) + + BinaryRead(stream, fs.default_value); } }; diff --git a/src/cypher/procedure/procedure.cpp b/src/cypher/procedure/procedure.cpp index f39a9ca2a5..e312dcb14f 100644 --- a/src/cypher/procedure/procedure.cpp +++ b/src/cypher/procedure/procedure.cpp @@ -2210,10 +2210,10 @@ void BuiltinProcedure::DbmsGraphGetGraphSchema(RTContext *ctx, const Record *rec node["detach_property"] = s->DetachProperty(); for (auto& fd : s->GetFields()) { nlohmann::json property; - property["name"] = fd.Name(); - property["type"] = lgraph_api::to_string(fd.Type()); - property["optional"] = fd.IsOptional(); - auto vi = fd.GetVertexIndex(); + property["name"] = fd->Name(); + property["type"] = lgraph_api::to_string(fd->Type()); + property["optional"] = fd->IsOptional(); + auto vi = fd->GetVertexIndex(); if (vi) { property["index"] = true; property["unique"] = vi->IsUnique(); @@ -2234,10 +2234,10 @@ void BuiltinProcedure::DbmsGraphGetGraphSchema(RTContext *ctx, const Record *rec } for (auto& fd : s->GetFields()) { nlohmann::json property; - property["name"] = fd.Name(); - property["type"] = lgraph_api::to_string(fd.Type()); - property["optional"] = fd.IsOptional(); - auto vi = fd.GetEdgeIndex(); + property["name"] = fd->Name(); + property["type"] = lgraph_api::to_string(fd->Type()); + property["optional"] = fd->IsOptional(); + auto vi = fd->GetEdgeIndex(); if (vi) { property["index"] = true; property["unique"] = vi->IsUnique(); diff --git a/src/restful/server/json_convert.h b/src/restful/server/json_convert.h index e41aefc238..867d60f1e6 100644 --- a/src/restful/server/json_convert.h +++ b/src/restful/server/json_convert.h @@ -35,7 +35,7 @@ #include "core/global_config.h" #include "core/task_tracker.h" #include "core/schema.h" -#include "core/field_extractor.h" +#include "core/field_extractor_base.h" #include "db/acl.h" #include "plugin/plugin_desc.h" #include "server/state_machine.h" @@ -606,16 +606,17 @@ inline web::json::value ValueToJson(const std::vector& fields) { +inline web::json::value ValueToJson( + const std::vector>& fields) { auto arr = web::json::value::array(); for (int idx = 0; idx < (int)fields.size(); ++idx) { web::json::value js; - js[_TU("name")] = ValueToJson(fields[idx].GetFieldSpec().name); - js[_TU("type")] = ValueToJson(to_string(fields[idx].GetFieldSpec().type)); - js[_TU("optional")] = ValueToJson(fields[idx].GetFieldSpec().optional); - if (fields[idx].GetVertexIndex()) { + js[_TU("name")] = ValueToJson(fields[idx]->GetFieldSpec().name); + js[_TU("type")] = ValueToJson(to_string(fields[idx]->GetFieldSpec().type)); + js[_TU("optional")] = ValueToJson(fields[idx]->GetFieldSpec().optional); + if (fields[idx]->GetVertexIndex()) { js[_TU("index")] = ValueToJson(true); - switch (fields[idx].GetVertexIndex()->GetType()) { + switch (fields[idx]->GetVertexIndex()->GetType()) { case IndexType::NonuniqueIndex: js[_TU("unique")] = ValueToJson(false); break; @@ -626,9 +627,9 @@ inline web::json::value ValueToJson(const std::vectorGetEdgeIndex()) { js[_TU("index")] = ValueToJson(true); - switch (fields[idx].GetEdgeIndex()->GetType()) { + switch (fields[idx]->GetEdgeIndex()->GetType()) { case IndexType::NonuniqueIndex: js[_TU("unique")] = ValueToJson(false); js[_TU("pair_unique")] = ValueToJson(false);