Skip to content

Commit

Permalink
refine extractor.
Browse files Browse the repository at this point in the history
  • Loading branch information
ColinLeeo committed Oct 29, 2024
1 parent fa92edd commit b4007d9
Show file tree
Hide file tree
Showing 17 changed files with 915 additions and 1,268 deletions.
4 changes: 4 additions & 0 deletions .msvc/lgraph_core/lgraph_core.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
<ClCompile Include="..\..\src\core\data_type.cpp" />
<ClCompile Include="..\..\src\core\edge_index.cpp" />
<ClCompile Include="..\..\src\core\field_extractor.cpp" />

<CICompile Include="..\..\src\core\field_extractor_v2.cpp"/>

<CICompile Include="..\..\src\core\field_extractor_base.cpp"/>
<ClCompile Include="..\..\src\core\full_text_index.cpp" />
<ClCompile Include="..\..\src\core\global_config.cpp" />
<ClCompile Include="..\..\src\core\graph.cpp" />
Expand Down
4 changes: 4 additions & 0 deletions .msvc/lgraph_core/lgraph_core.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@
<ClCompile Include="..\..\src\core\field_extractor.cpp">
<Filter>Source Files</Filter>
</ClCompile>

<ClCompile Include="..\..\src\core\field_extractor_base.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\core\data_type.cpp">
<Filter>Source Files</Filter>
</ClCompile>
Expand Down
12 changes: 10 additions & 2 deletions include/lgraph/lgraph_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1324,8 +1324,16 @@ struct FieldSpec {
inited_value(true),
default_value(dv),
set_default_value(true) {}
// explicit FieldSpecV2(const FieldSpec& spec)
// : name(spec.name), type(spec.type), optional(spec.optional), deleted(false), id(0) {}
FieldSpec(const FieldSpec& spec)
: name(spec.name),
type(spec.type),
optional(spec.optional),
deleted(false),
id(0),
init_value(spec.init_value),
inited_value(spec.inited_value),
default_value(spec.default_value),
set_default_value(spec.set_default_value) {}

inline bool operator==(const FieldSpec& rhs) const {
return name == rhs.name && type == rhs.type && optional == rhs.optional &&
Expand Down
1 change: 1 addition & 0 deletions src/BuildLGraphApi.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set(LGRAPH_CORE_SRC
core/audit_logger.cpp
core/data_type.cpp
core/edge_index.cpp
core/field_extractor_base.cpp
core/field_extractor.cpp
core/field_extractor_v2.cpp
core/full_text_index.cpp
Expand Down
1 change: 1 addition & 0 deletions src/BuildLGraphApiForJNI.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ set(LGRAPH_CORE_SRC
core/audit_logger.cpp
core/data_type.cpp
core/edge_index.cpp
core/field_extractor_base.cpp
core/field_extractor.cpp
core/field_extractor_v2.cpp
core/full_text_index.cpp
Expand Down
185 changes: 58 additions & 127 deletions src/core/field_extractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,32 @@ namespace _detail {
* \return ErrorCode::OK if succeeds
* FIELD_PARSE_FAILED.
*/

bool FieldExtractor::GetIsNull(const Value& record) const {
if (!IsOptional()) {
return false;
} else {
// get the Kth bit from NullArray
char* arr = GetNullArray(record);
return arr[null_bit_off_ / 8] & (0x1 << (null_bit_off_ % 8));
}
}

// set field value to null
void FieldExtractor::SetIsNull(const Value& record, bool is_null) const {
if (!IsOptional()) {
if (is_null) throw FieldCannotBeSetNullException(Name());
return;
}
// set the Kth bit from NullArray
char* arr = GetNullArray(record);
if (is_null) {
arr[null_bit_off_ / 8] |= (0x1 << (null_bit_off_ % 8));
} else {
arr[null_bit_off_ / 8] &= ~(0x1 << (null_bit_off_ % 8));
}
}

template <FieldType FT>
void FieldExtractor::_ParseStringAndSet(Value& record, const std::string& data) const {
typedef typename field_data_helper::FieldType2CType<FT>::type CT;
Expand All @@ -50,7 +76,7 @@ void FieldExtractor::_ParseStringAndSet<FieldType::POINT>(Value& record,
// check whether the point data is valid;
if (!::lgraph_api::TryDecodeEWKB(data, ::lgraph_api::SpatialType::POINT))
throw ParseStringException(Name(), data, FieldType::POINT);
// FMA_DBG_CHECK_EQ(sizeof(data), field_data_helper::FieldTypeSize(def_.type));
// FMA_DBG_CHECK_EQ(sizeof(data), field_data_helper::FieldTypeSize(Type()));
size_t Size = record.Size();
record.Resize(Size);
char* ptr = (char*)record.Data() + offset_.data_off;
Expand All @@ -59,7 +85,7 @@ void FieldExtractor::_ParseStringAndSet<FieldType::POINT>(Value& record,

template <>
void FieldExtractor::_ParseStringAndSet<FieldType::LINESTRING>(Value& record,
const std::string& data) const {
const std::string& data) const {
// check whether the linestring data is valid;
if (!::lgraph_api::TryDecodeEWKB(data, ::lgraph_api::SpatialType::LINESTRING))
throw ParseStringException(Name(), data, FieldType::LINESTRING);
Expand All @@ -68,15 +94,15 @@ void FieldExtractor::_ParseStringAndSet<FieldType::LINESTRING>(Value& record,

template <>
void FieldExtractor::_ParseStringAndSet<FieldType::POLYGON>(Value& record,
const std::string& data) const {
const std::string& data) const {
if (!::lgraph_api::TryDecodeEWKB(data, ::lgraph_api::SpatialType::POLYGON))
throw ParseStringException(Name(), data, FieldType::POLYGON);
return _SetVariableLengthValue(record, Value::ConstRef(data));
}

template <>
void FieldExtractor::_ParseStringAndSet<FieldType::SPATIAL>(Value& record,
const std::string& data) const {
const std::string& data) const {
::lgraph_api::SpatialType s;
// throw ParseStringException in this function;
try {
Expand All @@ -92,7 +118,7 @@ void FieldExtractor::_ParseStringAndSet<FieldType::SPATIAL>(Value& record,

template <>
void FieldExtractor::_ParseStringAndSet<FieldType::FLOAT_VECTOR>(Value& record,
const std::string& data) const {
const std::string& data) const {
std::vector<float> vec;
// check if there are only numbers and commas
std::regex nonNumbersAndCommas("[^0-9,.]");
Expand Down Expand Up @@ -131,15 +157,15 @@ void FieldExtractor::_ParseStringAndSet<FieldType::FLOAT_VECTOR>(Value& record,
* FIELD_PARSE_FAILED.
*/
void FieldExtractor::ParseAndSet(Value& record, const std::string& data) const {
if (data.empty() && (field_data_helper::IsFixedLengthFieldType(def_.type)
|| def_.type == FieldType::LINESTRING || def_.type == FieldType::POLYGON
|| def_.type == FieldType::SPATIAL || def_.type == FieldType::FLOAT_VECTOR)) {
if (data.empty() &&
(IsFixedType() || Type() == FieldType::LINESTRING || Type() == FieldType::POLYGON ||
Type() == FieldType::SPATIAL || Type() == FieldType::FLOAT_VECTOR)) {
SetIsNull(record, true);
return;
}
// empty string is treated as non-NULL
SetIsNull(record, false);
switch (def_.type) {
switch (Type()) {
case FieldType::BOOL:
return _ParseStringAndSet<FieldType::BOOL>(record, data);
case FieldType::INT8:
Expand Down Expand Up @@ -177,7 +203,7 @@ void FieldExtractor::ParseAndSet(Value& record, const std::string& data) const {
case FieldType::NUL:
LOG_ERROR() << "NUL FieldType";
}
LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(def_.type) << " not handled";
LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(Type()) << " not handled";
}

// parse data from FieldData and set field
Expand All @@ -190,7 +216,7 @@ void FieldExtractor::ParseAndSet(Value& record, const FieldData& data) const {

#define _SET_FIXED_TYPE_VALUE_FROM_FD(ft) \
do { \
if (data.type == def_.type) { \
if (data.type == Type()) { \
return SetFixedSizeValue(record, \
field_data_helper::GetStoredValue<FieldType::ft>(data)); \
} else { \
Expand All @@ -201,7 +227,7 @@ void FieldExtractor::ParseAndSet(Value& record, const FieldData& data) const {
} \
} while (0)

switch (def_.type) {
switch (Type()) {
case FieldType::BOOL:
_SET_FIXED_TYPE_VALUE_FROM_FD(BOOL);
case FieldType::INT8:
Expand Down Expand Up @@ -248,39 +274,39 @@ void FieldExtractor::ParseAndSet(Value& record, const FieldData& data) const {
}
case FieldType::LINESTRING:
{
if (data.type != FieldType::LINESTRING && data.type != FieldType::STRING)
throw ParseFieldDataException(Name(), data, Type());
if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, ::lgraph_api::SpatialType::LINESTRING))
if (data.type != FieldType::LINESTRING && data.type != FieldType::STRING)
throw ParseFieldDataException(Name(), data, Type());
if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, ::lgraph_api::SpatialType::LINESTRING))
throw ParseStringException(Name(), *data.data.buf, FieldType::LINESTRING);

return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf));
return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf));
}
case FieldType::POLYGON:
{
if (data.type != FieldType::POLYGON && data.type != FieldType::STRING)
throw ParseFieldDataException(Name(), data, Type());
if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, ::lgraph_api::SpatialType::POLYGON))
if (data.type != FieldType::POLYGON && data.type != FieldType::STRING)
throw ParseFieldDataException(Name(), data, Type());
if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, ::lgraph_api::SpatialType::POLYGON))
throw ParseStringException(Name(), *data.data.buf, FieldType::POLYGON);

return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf));
return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf));
}
case FieldType::SPATIAL:
{
if (data.type != FieldType::SPATIAL && data.type != FieldType::STRING)
throw ParseFieldDataException(Name(), data, Type());
::lgraph_api::SpatialType s;
if (data.type != FieldType::SPATIAL && data.type != FieldType::STRING)
throw ParseFieldDataException(Name(), data, Type());
::lgraph_api::SpatialType s;

// throw ParseStringException in this function;
try {
s = ::lgraph_api::ExtractType(*data.data.buf);
} catch (...) {
throw ParseStringException(Name(), *data.data.buf, FieldType::SPATIAL);
}
// throw ParseStringException in this function;
try {
s = ::lgraph_api::ExtractType(*data.data.buf);
} catch (...) {
throw ParseStringException(Name(), *data.data.buf, FieldType::SPATIAL);
}

if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, s))
if (!::lgraph_api::TryDecodeEWKB(*data.data.buf, s))
throw ParseStringException(Name(), *data.data.buf, FieldType::SPATIAL);

return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf));
return _SetVariableLengthValue(record, Value::ConstRef(*data.data.buf));
}
case FieldType::FLOAT_VECTOR:
{
Expand All @@ -290,103 +316,8 @@ void FieldExtractor::ParseAndSet(Value& record, const FieldData& data) const {
return _SetVariableLengthValue(record, Value::ConstRef(*data.data.vp));
}
default:
LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(def_.type)
<< " not handled";
}
}

/**
* Print the string representation of the field. For digital types, it prints
* it into ASCII string; for NBytes and String, it just copies the content of
* the field into the string.
*
* \param record The record.
*
* \return String representation of the field.
*/
std::string FieldExtractor::FieldToString(const Value& record) const {
if (GetIsNull(record)) return "\"null\"";
std::string ret;

#define _COPY_FIELD_AND_RETURN_STR_(record, ft) \
do { \
typename field_data_helper::FieldType2StorageType<FieldType::ft>::type d = 0; \
typedef typename field_data_helper::FieldType2CType<FieldType::ft>::type CT; \
GetCopy(record, d); \
return fma_common::StringFormatter::Format("{}", static_cast<CT>(d)); \
} while (0)

switch (def_.type) {
case FieldType::BOOL:
_COPY_FIELD_AND_RETURN_STR_(record, BOOL);
case FieldType::INT8:
_COPY_FIELD_AND_RETURN_STR_(record, INT8);
case FieldType::INT16:
_COPY_FIELD_AND_RETURN_STR_(record, INT16);
case FieldType::INT32:
_COPY_FIELD_AND_RETURN_STR_(record, INT32);
case FieldType::INT64:
_COPY_FIELD_AND_RETURN_STR_(record, INT64);
case FieldType::FLOAT:
_COPY_FIELD_AND_RETURN_STR_(record, FLOAT);
case FieldType::DOUBLE:
_COPY_FIELD_AND_RETURN_STR_(record, DOUBLE);
case FieldType::DATE:
{
int32_t i;
GetCopy(record, i);
return Date(i).ToString();
}
case FieldType::DATETIME:
{
int64_t i;
GetCopy(record, i);
return DateTime(i).ToString();
}
case FieldType::STRING:
{
std::string ret(GetDataSize(record), 0);
GetCopyRaw(record, &ret[0], ret.size());
return ret;
}
case FieldType::BLOB:
{
// std::string ret(GetDataSize(record), 0);
// GetCopyRaw(record, &ret[0], ret.size());
// return ::lgraph_api::base64::Encode(ret.substr(2));
return fma_common::StringFormatter::Format("[BLOB]");
}
case FieldType::POINT:
case FieldType::LINESTRING:
case FieldType::POLYGON:
case FieldType::SPATIAL:
{
std::string ret(GetDataSize(record), 0);
GetCopyRaw(record, &ret[0], ret.size());
return ret;
}
case FieldType::FLOAT_VECTOR:
{
std::string vec_str;
for (size_t i = 0; i < record.AsType<std::vector<float>>().size(); i++) {
auto floatnum = record.AsType<std::vector<float>>().at(i);
if (record.AsType<std::vector<float>>().at(i) > 999999) {
vec_str += std::to_string(floatnum).substr(0, 7);
} else {
vec_str += std::to_string(floatnum).substr(0, 8);
}
vec_str += ',';
}
if (!vec_str.empty()) {
vec_str.pop_back();
}
return vec_str;
}
case lgraph_api::NUL:
break;
LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(Type()) << " not handled";
}
LOG_ERROR() << "Data type " << field_data_helper::FieldTypeName(def_.type) << " not handled";
return "";
}

// sets variable length value to the field
Expand Down
Loading

0 comments on commit b4007d9

Please sign in to comment.