diff --git a/.github/workflows/pikiwidb.yml b/.github/workflows/pikiwidb.yml index 5d1a72574..2ca212d3d 100644 --- a/.github/workflows/pikiwidb.yml +++ b/.github/workflows/pikiwidb.yml @@ -6,24 +6,29 @@ on: branches: [ "unstable" ] jobs: - build_on_macos: - runs-on: macos-latest + check_format: + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Install Deps - run: | - brew install clang-format - - name: Build - run: | - sh build.sh + run: bash ci/build.sh - name: Check Format working-directory: ${{ github.workspace }}/build + run: make check-format + + build_on_macos: + runs-on: macos-latest + needs: check_format + + steps: + - uses: actions/checkout@v4 + + - name: Build run: | - make check-format + sh build.sh - name: GTest working-directory: ${{ github.workspace }}/build @@ -33,6 +38,7 @@ jobs: build_on_ubuntu: runs-on: ubuntu-latest + needs: check_format steps: - uses: actions/checkout@v4 @@ -41,10 +47,6 @@ jobs: run: | bash build.sh - - name: Check Format - working-directory: ${{ github.workspace }}/build - run: make check-format - - name: GTest working-directory: ${{ github.workspace }}/build # Execute tests defined by the CMake configuration. diff --git a/CMakeLists.txt b/CMakeLists.txt index 7764e4e9e..e2f553e36 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,6 +115,7 @@ enable_testing() ADD_SUBDIRECTORY(src/pstd) ADD_SUBDIRECTORY(src/net) +ADD_SUBDIRECTORY(src/storage) ADD_SUBDIRECTORY(src) ############################################################################# diff --git a/build_support/clang_format_exclusions.txt b/build_support/clang_format_exclusions.txt deleted file mode 100755 index e69de29bb..000000000 diff --git a/ci/build.sh b/ci/build.sh new file mode 100755 index 000000000..3aef7afe0 --- /dev/null +++ b/ci/build.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +BUILD_TIME=$(git log -1 --format=%ai) +BUILD_TIME=${BUILD_TIME: 0: 10} + +COMMIT_ID=$(git rev-parse HEAD) +SHORT_COMMIT_ID=${COMMIT_ID: 0: 8} + +BUILD_TYPE=release +VERBOSE=0 +CMAKE_FLAGS="" +MAKE_FLAGS="" +PREFIX="build" + +if [ -z "$SHORT_COMMIT_ID" ]; then + echo "no git commit id" + SHORT_COMMIT_ID="pikiwidb" +fi + +echo "BUILD_TIME:" $BUILD_TIME +echo "COMMIT_ID:" $SHORT_COMMIT_ID + +echo "BUILD_TYPE:" $BUILD_TYPE +echo "CMAKE_FLAGS:" $CMAKE_FLAGS +echo "MAKE_FLAGS:" $MAKE_FLAGS + +cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DBUILD_TIME=$BUILD_TIME -DGIT_COMMIT_ID=$SHORT_COMMIT_ID ${CMAKE_FLAGS} -S . -B ${PREFIX} diff --git a/cmake/rocksdb.cmake b/cmake/rocksdb.cmake index 4bd8045d8..fbe821867 100644 --- a/cmake/rocksdb.cmake +++ b/cmake/rocksdb.cmake @@ -2,12 +2,13 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. An additional grant # of patent rights can be found in the PATENTS file in the same directory. + include_guard() FetchContent_Declare( rocksdb GIT_REPOSITORY https://github.com/facebook/rocksdb.git - GIT_TAG v8.6.7 + GIT_TAG v8.3.3 ) FetchContent_MakeAvailableWithArgs(rocksdb @@ -20,6 +21,10 @@ FetchContent_MakeAvailableWithArgs(rocksdb WITH_TRACE_TOOLS=OFF WITH_EXAMPLES=OFF ROCKSDB_BUILD_SHARED=OFF - WITH_GFLAGS=OFF WITH_LIBURING=OFF + WITH_LZ4=OFF + WITH_SNAPPY=OFF + WITH_ZLIB=OFF + WITH_ZSTD=OFF + WITH_GFLAGS=OFF ) diff --git a/src/base_cmd.h b/src/base_cmd.h index edaa9bb2b..d4f7d3831 100644 --- a/src/base_cmd.h +++ b/src/base_cmd.h @@ -35,6 +35,7 @@ const std::string kCmdNameSetex = "setex"; const std::string kCmdNamePsetex = "psetex"; const std::string kCmdNameSetnx = "setnx"; const std::string kCmdNameSetBit = "setbit"; +const std::string kCmdNameIncrbyfloat = "incrbyfloat"; const std::string kCmdNameGetBit = "getbit"; // multi @@ -46,6 +47,7 @@ const std::string kCmdNameDiscard = "discard"; // admin const std::string kCmdNameConfig = "config"; +const std::string kCmdNameFlushdb = "flushdb"; const std::string kCmdNameAppend = "append"; const std::string kCmdNameGetset = "getset"; @@ -55,6 +57,14 @@ const std::string kCmdNameBitCount = "bitcount"; const std::string kCmdNameAuth = "auth"; +// hash cmd +const std::string kCmdNameHSet = "hset"; +const std::string kCmdNameHGet = "hget"; +const std::string kCmdNameHMSet = "hmset"; +const std::string kCmdNameHMGet = "hmget"; +const std::string kCmdNameHGetAll = "hgetall"; +const std::string kCmdNameHKeys = "hkeys"; + enum CmdFlags { CmdFlagsWrite = (1 << 0), // May modify the dataset CmdFlagsReadonly = (1 << 1), // Doesn't modify the dataset diff --git a/src/cmd_admin.cc b/src/cmd_admin.cc index f2a85b331..613d4fafb 100644 --- a/src/cmd_admin.cc +++ b/src/cmd_admin.cc @@ -6,6 +6,7 @@ */ #include "cmd_admin.h" +#include "store.h" namespace pikiwidb { @@ -27,4 +28,16 @@ bool CmdConfigSet::DoInitial(PClient* client) { return true; } void CmdConfigSet::DoCmd(PClient* client) { client->AppendString("config cmd in development"); } +FlushdbCmd::FlushdbCmd(const std::string& name, int16_t arity) + : BaseCmd(name, arity, CmdFlagsAdmin | CmdFlagsWrite, AclCategoryWrite | AclCategoryAdmin) {} + +bool FlushdbCmd::DoInitial(PClient* client) { return true; } + +void FlushdbCmd::DoCmd(PClient* client) { + PSTORE.dirty_ += PSTORE.DBSize(); + PSTORE.ClearCurrentDB(); + Propagate(PSTORE.GetDB(), std::vector{"flushdb"}); + client->SetRes(CmdRes::kOk); +} + } // namespace pikiwidb \ No newline at end of file diff --git a/src/cmd_admin.h b/src/cmd_admin.h index 9da3627a8..556a5b85c 100644 --- a/src/cmd_admin.h +++ b/src/cmd_admin.h @@ -48,4 +48,15 @@ class CmdConfigSet : public BaseCmd { void DoCmd(PClient* client) override; }; +class FlushdbCmd : public BaseCmd { + public: + FlushdbCmd(const std::string& name, int16_t arity); + + protected: + bool DoInitial(PClient* client) override; + + private: + void DoCmd(PClient* client) override; +}; + } // namespace pikiwidb diff --git a/src/cmd_hash.cc b/src/cmd_hash.cc new file mode 100644 index 000000000..53a3b71dd --- /dev/null +++ b/src/cmd_hash.cc @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#include "cmd_hash.h" + +#include "store.h" + +namespace pikiwidb { + +HSetCmd::HSetCmd(const std::string& name, int16_t arity) + : BaseCmd(name, arity, CmdFlagsWrite, AclCategoryWrite | AclCategoryHash) {} + +bool HSetCmd::DoInitial(PClient* client) { + if (client->argv_.size() % 2 != 0) { + client->SetRes(CmdRes::kWrongNum, kCmdNameHSet); + return false; + } + client->SetKey(client->argv_[1]); + return true; +} + +void HSetCmd::DoCmd(PClient* client) { + PObject* value = nullptr; + UnboundedBuffer reply; + PError err = PSTORE.GetValueByType(client->Key(), value, PType_hash); + if (err != PError_ok && err != PError_notExist) { + ReplyError(err, &reply); + client->SetRes(CmdRes::kSyntaxErr, "hset cmd error"); + return; + } + if (err == PError_notExist) { + value = PSTORE.SetValue(client->Key(), PObject::CreateHash()); + } + + auto new_cnt = 0; + auto hash = value->CastHash(); + for (size_t i = 2; i < client->argv_.size(); i += 2) { + auto field = client->argv_[i]; + auto value = client->argv_[i + 1]; + auto it = hash->find(field); + if (it == hash->end()) { + hash->insert(PHash::value_type(field, value)); + ++new_cnt; + } else { + it->second = value; + } + } + FormatInt(new_cnt, &reply); + client->AppendStringRaw(reply.ReadAddr()); +} + +HGetCmd::HGetCmd(const std::string& name, int16_t arity) + : BaseCmd(name, arity, CmdFlagsReadonly, AclCategoryRead | AclCategoryHash) {} + +bool HGetCmd::DoInitial(PClient* client) { + client->SetKey(client->argv_[1]); + return true; +} + +void HGetCmd::DoCmd(PClient* client) { + PObject* value = nullptr; + UnboundedBuffer reply; + PError err = PSTORE.GetValueByType(client->Key(), value, PType_hash); + if (err != PError_ok) { + ReplyError(err, &reply); + if (err == PError_notExist) { + client->AppendString(""); + } else { + client->SetRes(CmdRes::kSyntaxErr, "hget cmd error"); + } + return; + } + + auto hash = value->CastHash(); + auto it = hash->find(client->argv_[2]); + + if (it != hash->end()) { + FormatBulk(it->second, &reply); + } else { + FormatNull(&reply); + } + client->AppendStringRaw(reply.ReadAddr()); +} + +HMSetCmd::HMSetCmd(const std::string& name, int16_t arity) + : BaseCmd(name, arity, CmdFlagsWrite, AclCategoryWrite | AclCategoryHash) {} + +bool HMSetCmd::DoInitial(PClient* client) { + if (client->argv_.size() % 2 != 0) { + client->SetRes(CmdRes::kWrongNum, kCmdNameHMSet); + return false; + } + client->SetKey(client->argv_[1]); + return true; +} + +void HMSetCmd::DoCmd(PClient* client) { + PObject* value = nullptr; + UnboundedBuffer reply; + PError err = PSTORE.GetValueByType(client->Key(), value, PType_hash); + if (err != PError_ok && err != PError_notExist) { + ReplyError(err, &reply); + client->SetRes(CmdRes::kSyntaxErr, "hmset cmd error"); + return; + } + if (err == PError_notExist) { + value = PSTORE.SetValue(client->Key(), PObject::CreateHash()); + } + + auto hash = value->CastHash(); + for (size_t i = 2; i < client->argv_.size(); i += 2) { + auto field = client->argv_[i]; + auto value = client->argv_[i + 1]; + auto it = hash->find(field); + if (it == hash->end()) { + hash->insert(PHash::value_type(field, value)); + } else { + it->second = value; + } + } + FormatOK(&reply); + client->AppendStringRaw(reply.ReadAddr()); +} + +HMGetCmd::HMGetCmd(const std::string& name, int16_t arity) + : BaseCmd(name, arity, CmdFlagsReadonly, AclCategoryRead | AclCategoryHash) {} + +bool HMGetCmd::DoInitial(PClient* client) { + client->SetKey(client->argv_[1]); + return true; +} + +void HMGetCmd::DoCmd(PClient* client) { + PObject* value = nullptr; + UnboundedBuffer reply; + PError err = PSTORE.GetValueByType(client->Key(), value, PType_hash); + if (err != PError_ok) { + ReplyError(err, &reply); + if (err == PError_notExist) { + client->AppendString(""); + } else { + client->SetRes(CmdRes::kSyntaxErr, "hmget cmd error"); + } + return; + } + + auto hash = value->CastHash(); + PreFormatMultiBulk(client->argv_.size() - 2, &reply); + + for (size_t i = 2; i < client->argv_.size(); ++i) { + auto it = hash->find(client->argv_[i]); + if (it != hash->end()) { + FormatBulk(it->second, &reply); + } else { + FormatNull(&reply); + } + } + client->AppendStringRaw(reply.ReadAddr()); +} + +HGetAllCmd::HGetAllCmd(const std::string& name, int16_t arity) + : BaseCmd(name, arity, CmdFlagsReadonly, AclCategoryRead | AclCategoryHash) {} + +bool HGetAllCmd::DoInitial(PClient* client) { + client->SetKey(client->argv_[1]); + return true; +} + +void HGetAllCmd::DoCmd(PClient* client) { + PObject* value = nullptr; + UnboundedBuffer reply; + PError err = PSTORE.GetValueByType(client->Key(), value, PType_hash); + if (err != PError_ok) { + ReplyError(err, &reply); + if (err == PError_notExist) { + client->AppendString(""); + } else { + client->SetRes(CmdRes::kSyntaxErr, "hgetall cmd error"); + } + return; + } + + auto hash = value->CastHash(); + PreFormatMultiBulk(2 * hash->size(), &reply); + + for (const auto& kv : *hash) { + FormatBulk(kv.first, &reply); + FormatBulk(kv.second, &reply); + } + client->AppendStringRaw(reply.ReadAddr()); +} + +HKeysCmd::HKeysCmd(const std::string& name, int16_t arity) + : BaseCmd(name, arity, CmdFlagsReadonly, AclCategoryRead | AclCategoryHash) {} + +bool HKeysCmd::DoInitial(PClient* client) { + client->SetKey(client->argv_[1]); + return true; +} + +void HKeysCmd::DoCmd(PClient* client) { + PObject* value = nullptr; + UnboundedBuffer reply; + PError err = PSTORE.GetValueByType(client->Key(), value, PType_hash); + if (err != PError_ok) { + ReplyError(err, &reply); + if (err == PError_notExist) { + client->AppendString(""); + } else { + client->SetRes(CmdRes::kSyntaxErr, "hkeys cmd error"); + } + return; + } + + auto hash = value->CastHash(); + PreFormatMultiBulk(hash->size(), &reply); + + for (const auto& kv : *hash) { + FormatBulk(kv.first, &reply); + } + client->AppendStringRaw(reply.ReadAddr()); +} + +} // namespace pikiwidb diff --git a/src/cmd_hash.h b/src/cmd_hash.h new file mode 100644 index 000000000..6ef91da34 --- /dev/null +++ b/src/cmd_hash.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#pragma once + +#include "base_cmd.h" + +namespace pikiwidb { + +class HSetCmd : public BaseCmd { + public: + HSetCmd(const std::string &name, int16_t arity); + + protected: + bool DoInitial(PClient *client) override; + + private: + void DoCmd(PClient *client) override; +}; + +class HGetCmd : public BaseCmd { + public: + HGetCmd(const std::string &name, int16_t arity); + + protected: + bool DoInitial(PClient *client) override; + + private: + void DoCmd(PClient *client) override; +}; + +class HMSetCmd : public BaseCmd { + public: + HMSetCmd(const std::string &name, int16_t arity); + + protected: + bool DoInitial(PClient *client) override; + + private: + void DoCmd(PClient *client) override; +}; + +class HMGetCmd : public BaseCmd { + public: + HMGetCmd(const std::string &name, int16_t arity); + + protected: + bool DoInitial(PClient *client) override; + + private: + void DoCmd(PClient *client) override; +}; + +class HGetAllCmd : public BaseCmd { + public: + HGetAllCmd(const std::string &name, int16_t arity); + + protected: + bool DoInitial(PClient *client) override; + + private: + void DoCmd(PClient *client) override; +}; + +class HKeysCmd : public BaseCmd { + public: + HKeysCmd(const std::string &name, int16_t arity); + + protected: + bool DoInitial(PClient *client) override; + + private: + void DoCmd(PClient *client) override; +}; + +} // namespace pikiwidb diff --git a/src/cmd_kv.cc b/src/cmd_kv.cc index 32d2c1d8a..0b271f5e0 100644 --- a/src/cmd_kv.cc +++ b/src/cmd_kv.cc @@ -6,7 +6,8 @@ */ #include "cmd_kv.h" -#include "pstd/pstd_string.h" +#include "common.h" +#include "pstd_string.h" #include "pstd_util.h" #include "store.h" @@ -413,10 +414,10 @@ bool IncrbyCmd::DoInitial(PClient* client) { } void IncrbyCmd::DoCmd(PClient* client) { - int64_t new_value_ = 0; + int64_t new_value = 0; int64_t by_ = 0; pstd::String2int(client->argv_[2].data(), client->argv_[2].size(), &by_); - PError err = PSTORE.Incrby(client->Key(), by_, &new_value_); + PError err = PSTORE.Incrby(client->Key(), by_, &new_value); switch (err) { case PError_type: client->SetRes(CmdRes::kInvalidInt); @@ -427,7 +428,7 @@ void IncrbyCmd::DoCmd(PClient* client) { client->AppendInteger(by_); break; case PError_ok: - client->AppendInteger(new_value_); + client->AppendInteger(new_value); break; default: client->SetRes(CmdRes::kErrOther, "incrby cmd error"); @@ -435,6 +436,40 @@ void IncrbyCmd::DoCmd(PClient* client) { } } +IncrbyfloatCmd::IncrbyfloatCmd(const std::string& name, int16_t arity) + : BaseCmd(name, arity, CmdFlagsWrite, AclCategoryWrite | AclCategoryString) {} + +bool IncrbyfloatCmd::DoInitial(PClient* client) { + long double by_ = 0.00f; + if (StrToLongDouble(client->argv_[2].data(), client->argv_[2].size(), &by_)) { + client->SetRes(CmdRes::kInvalidFloat); + return false; + } + client->SetKey(client->argv_[1]); + return true; +} + +void IncrbyfloatCmd::DoCmd(PClient* client) { + std::string new_value; + PError err = PSTORE.Incrbyfloat(client->argv_[1], client->argv_[2], &new_value); + switch (err) { + case PError_type: + client->SetRes(CmdRes::kInvalidFloat); + break; + case PError_notExist: // key not exist, set a new value + PSTORE.ClearExpire(client->Key()); // clear key's old ttl + PSTORE.SetValue(client->Key(), PObject::CreateString(client->argv_[2])); + client->AppendString(client->argv_[2]); + break; + case PError_ok: + client->AppendString(new_value); + break; + default: + client->SetRes(CmdRes::kErrOther, "incrbyfloat cmd error"); + break; + } +} + SetnxCmd::SetnxCmd(const std::string& name, int16_t arity) : BaseCmd(name, arity, CmdFlagsWrite, AclCategoryWrite | AclCategoryString) {} @@ -562,4 +597,4 @@ void SetBitCmd::DoCmd(PClient* client) { return; } -} // namespace pikiwidb \ No newline at end of file +} diff --git a/src/cmd_kv.h b/src/cmd_kv.h index 09f927766..229aa7a7e 100644 --- a/src/cmd_kv.h +++ b/src/cmd_kv.h @@ -182,4 +182,15 @@ class GetBitCmd : public BaseCmd { void DoCmd(PClient *client) override; }; +class IncrbyfloatCmd : public BaseCmd { + public: + IncrbyfloatCmd(const std::string &name, int16_t arity); + + protected: + bool DoInitial(PClient *client) override; + + private: + void DoCmd(PClient *client) override; +}; + } // namespace pikiwidb diff --git a/src/cmd_table_manager.cc b/src/cmd_table_manager.cc index 0adf49bc4..d606637dd 100644 --- a/src/cmd_table_manager.cc +++ b/src/cmd_table_manager.cc @@ -8,6 +8,7 @@ #include "cmd_table_manager.h" #include #include "cmd_admin.h" +#include "cmd_hash.h" #include "cmd_keys.h" #include "cmd_kv.h" @@ -28,6 +29,9 @@ void CmdTableManager::InitCmdTable() { cmds_->insert(std::make_pair(kCmdNameConfig, std::move(configPtr))); + std::unique_ptr flushdbPtr = std::make_unique(kCmdNameFlushdb, 1); + cmds_->insert(std::make_pair(kCmdNameFlushdb, std::move(flushdbPtr))); + // keyspace std::unique_ptr delPtr = std::make_unique(kCmdNameDel, -2); cmds_->insert(std::make_pair(kCmdNameDel, std::move(delPtr))); @@ -54,6 +58,8 @@ void CmdTableManager::InitCmdTable() { cmds_->insert(std::make_pair(kCmdNameBitCount, std::move(bitcountPtr))); std::unique_ptr incrbyPtr = std::make_unique(kCmdNameIncrby, 3); cmds_->insert(std::make_pair(kCmdNameIncrby, std::move(incrbyPtr))); + std::unique_ptr incrbyfloatPtr = std::make_unique(kCmdNameIncrbyfloat, 3); + cmds_->insert(std::make_pair(kCmdNameIncrbyfloat, std::move(incrbyfloatPtr))); std::unique_ptr strlenPtr = std::make_unique(kCmdNameStrlen, 2); cmds_->insert(std::make_pair(kCmdNameStrlen, std::move(strlenPtr))); std::unique_ptr setexPtr = std::make_unique(kCmdNameSetex, 4); @@ -66,6 +72,20 @@ void CmdTableManager::InitCmdTable() { cmds_->insert(std::make_pair(kCmdNameSetBit, std::move(setbitPtr))); std::unique_ptr getbitPtr = std::make_unique(kCmdNameGetBit, 3); cmds_->insert(std::make_pair(kCmdNameGetBit, std::move(getbitPtr))); + + // hash + std::unique_ptr hsetPtr = std::make_unique(kCmdNameHSet, -4); + cmds_->insert(std::make_pair(kCmdNameHSet, std::move(hsetPtr))); + std::unique_ptr hgetPtr = std::make_unique(kCmdNameHGet, 3); + cmds_->insert(std::make_pair(kCmdNameHGet, std::move(hgetPtr))); + std::unique_ptr hmsetPtr = std::make_unique(kCmdNameHMSet, -4); + cmds_->insert(std::make_pair(kCmdNameHMSet, std::move(hmsetPtr))); + std::unique_ptr hmgetPtr = std::make_unique(kCmdNameHMGet, -3); + cmds_->insert(std::make_pair(kCmdNameHMGet, std::move(hmgetPtr))); + std::unique_ptr hgetallPtr = std::make_unique(kCmdNameHGetAll, 2); + cmds_->insert(std::make_pair(kCmdNameHGetAll, std::move(hgetallPtr))); + std::unique_ptr hkeysPtr = std::make_unique(kCmdNameHKeys, 2); + cmds_->insert(std::make_pair(kCmdNameHKeys, std::move(hkeysPtr))); } std::pair CmdTableManager::GetCommand(const std::string& cmdName, PClient* client) { @@ -93,4 +113,4 @@ bool CmdTableManager::CmdExist(const std::string& cmd) const { uint32_t CmdTableManager::GetCmdId() { return ++cmdId_; } -} // namespace pikiwidb \ No newline at end of file +} // namespace pikiwidb diff --git a/src/common.cc b/src/common.cc index 1f407aed0..c1cbdcb46 100644 --- a/src/common.cc +++ b/src/common.cc @@ -6,12 +6,15 @@ */ #include "common.h" +#include #include #include #include #include +#include #include #include +#include #include "unbounded_buffer.h" namespace pikiwidb { @@ -44,6 +47,50 @@ struct PErrorInfo g_errorInfo[] = { int Double2Str(char* ptr, std::size_t nBytes, double val) { return snprintf(ptr, nBytes - 1, "%.6g", val); } +int StrToLongDouble(const char* s, size_t slen, long double* ldval) { + char* pEnd; + std::string t(s, slen); + if (t.find(' ') != std::string::npos) { + return -1; + } + long double d = strtold(s, &pEnd); + if (pEnd != s + slen) { + return -1; + } + + if (ldval) { + *ldval = d; + } + return 0; +} + +int LongDoubleToStr(long double ldval, std::string* value) { + if (isnan(ldval)) { + return -1; + } else if (isinf(ldval)) { + if (ldval > 0) { + *value = "inf"; + } else { + *value = "-inf"; + } + return -1; + } else { + std::ostringstream oss; + oss << std::setprecision(15) << ldval; + *value = oss.str(); + + // Remove trailing zeroes after the '.' + size_t dotPos = value->find('.'); + if (dotPos != std::string::npos) { + value->erase(value->find_last_not_of('0') + 1, std::string::npos); + if (value->back() == '.') { + value->pop_back(); + } + } + return 0; + } +} + bool TryStr2Long(const char* ptr, size_t nBytes, long& val) { bool negtive = false; size_t i = 0; diff --git a/src/common.h b/src/common.h index 572b7831a..bc0329509 100644 --- a/src/common.h +++ b/src/common.h @@ -94,6 +94,7 @@ enum PError { PError_moduleinit = 16, PError_moduleuninit = 17, PError_modulerepeat = 18, + PError_overflow = 19, PError_max, }; @@ -145,6 +146,8 @@ inline std::size_t Number2Str(char* ptr, std::size_t nBytes, T val) { } int Double2Str(char* ptr, std::size_t nBytes, double val); +int StrToLongDouble(const char* s, size_t slen, long double* ldval); +int LongDoubleToStr(long double ldval, std::string* value); bool TryStr2Long(const char* ptr, std::size_t nBytes, long& val); // only for decimal bool Strtol(const char* ptr, std::size_t nBytes, long* outVal); bool Strtoll(const char* ptr, std::size_t nBytes, long long* outVal); diff --git a/src/pstd/CMakeLists.txt b/src/pstd/CMakeLists.txt index 50537ac83..c9ef08f87 100644 --- a/src/pstd/CMakeLists.txt +++ b/src/pstd/CMakeLists.txt @@ -9,6 +9,10 @@ ADD_LIBRARY(pstd ${STD_SRC}) ADD_SUBDIRECTORY(tests) -TARGET_LINK_LIBRARIES(pstd; spdlog pthread) +TARGET_INCLUDE_DIRECTORIES(pstd + PRIVATE ${rocksdb_SOURCE_DIR}/include +) + +TARGET_LINK_LIBRARIES(pstd; spdlog pthread glog) SET_TARGET_PROPERTIES(pstd PROPERTIES LINKER_LANGUAGE CXX) diff --git a/src/pstd/env.cc b/src/pstd/env.cc new file mode 100644 index 000000000..6ac5f323c --- /dev/null +++ b/src/pstd/env.cc @@ -0,0 +1,688 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#include "env.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if __has_include() +#include +namespace filesystem = std::filesystem; +#elif __has_include() +#include +namespace filesystem = std::experimental::filesystem; +#endif + +#include + +namespace pstd { + +/* + * Set the resource limits of a process + */ + +/* + * 0: success. + * -1: set failed. + * -2: get resource limits failed. + */ +const size_t kPageSize = getpagesize(); + +int SetMaxFileDescriptorNum(int64_t max_file_descriptor_num) { + // Try to Set the number of file descriptor + struct rlimit limit; + if (getrlimit(RLIMIT_NOFILE, &limit) != -1) { + if (limit.rlim_cur < static_cast(max_file_descriptor_num)) { + // rlim_cur could be set by any user while rlim_max are + // changeable only by root. + limit.rlim_cur = max_file_descriptor_num; + if (limit.rlim_cur > limit.rlim_max) { + limit.rlim_max = max_file_descriptor_num; + } + if (setrlimit(RLIMIT_NOFILE, &limit) != -1) { + return 0; + } else { + return -1; + }; + } else { + return 0; + } + } else { + return -2; + } +} + +/* + * size of initial mmap size + */ +size_t kMmapBoundSize = 1024 * 1024 * 4; + +void SetMmapBoundSize(size_t size) { kMmapBoundSize = size; } + +static Status IOError(const std::string& context, int err_number) { + return Status::IOError(context, strerror(err_number)); +} + +int CreateDir(const std::string& path) { + try { + if (filesystem::create_directory(path)) { + return 0; + } + } catch (const filesystem::filesystem_error& e) { + LOG(WARNING) << e.what(); + } catch (const std::exception& e) { + LOG(WARNING) << e.what(); + } + return -1; +} + +bool FileExists(const std::string& path) { + try { + return filesystem::exists(path); + } catch (const filesystem::filesystem_error& e) { + LOG(WARNING) << e.what(); + } catch (const std::exception& e) { + LOG(WARNING) << e.what(); + } + return false; +} + +bool DeleteFile(const std::string& fname) { + try { + return filesystem::remove(fname); + } catch (const filesystem::filesystem_error& e) { + LOG(WARNING) << e.what(); + } catch (const std::exception& e) { + LOG(WARNING) << e.what(); + } + return false; +} + +/** + ** CreatePath - ensure all directories in path exist + ** Algorithm takes the pessimistic view and works top-down to ensure + ** each directory in path exists, rather than optimistically creating + ** the last element and working backwards. + */ +int CreatePath(const std::string& path, mode_t mode) { + try { + if (!filesystem::create_directories(path)) { + return -1; + } + filesystem::permissions(path, static_cast(mode)); + return 0; + } catch (const filesystem::filesystem_error& e) { + LOG(WARNING) << e.what(); + } catch (const std::exception& e) { + LOG(WARNING) << e.what(); + } + + return -1; +} + +int GetChildren(const std::string& dir, std::vector& result) { + result.clear(); + for (auto& de : filesystem::directory_iterator(dir)) { + result.emplace_back(de.path().filename()); + } + return 0; +} + +void GetDescendant(const std::string& dir, std::vector& result) { + result.clear(); + for (auto& de : filesystem::recursive_directory_iterator(dir)) { + result.emplace_back(de.path()); + } +} + +int RenameFile(const std::string& oldname, const std::string& newname) { + try { + filesystem::rename(oldname, newname); + return 0; + } catch (const filesystem::filesystem_error& e) { + LOG(WARNING) << e.what(); + } catch (const std::exception& e) { + LOG(WARNING) << e.what(); + } + return -1; +} + +int IsDir(const std::string& path) { + std::error_code ec; + if (filesystem::is_directory(path, ec)) { + return 0; + } else if (filesystem::is_regular_file(path, ec)) { + return 1; + } + return -1; +} + +int DeleteDir(const std::string& path) { + try { + if (filesystem::remove_all(path) == 0) { + return -1; + } + return 0; + } catch (const filesystem::filesystem_error& e) { + LOG(WARNING) << e.what(); + } catch (const std::exception& e) { + LOG(WARNING) << e.what(); + } + return -1; +} + +bool DeleteDirIfExist(const std::string& path) { + return !(IsDir(path) == 0 && DeleteDir(path) != 0); +} + +uint64_t Du(const std::string& path) { + uint64_t sum = 0; + try { + if (!filesystem::exists(path)) { + return 0; + } + if (filesystem::is_symlink(path)) { + filesystem::path symlink_path = filesystem::read_symlink(path); + sum = Du(symlink_path); + } else if (filesystem::is_directory(path)) { + for (const auto& entry : filesystem::directory_iterator(path)) { + if (entry.is_symlink()) { + sum += Du(filesystem::read_symlink(entry.path())); + } else if (entry.is_directory()) { + sum += Du(entry.path()); + } else if (entry.is_regular_file()) { + sum += entry.file_size(); + } + } + } else if (filesystem::is_regular_file(path)) { + sum = filesystem::file_size(path); + } + } catch (const filesystem::filesystem_error& ex) { + LOG(WARNING) << "Error accessing path: " << ex.what(); + } + + return sum; +} + +uint64_t NowMicros() { + auto now = std::chrono::system_clock::now(); + return std::chrono::duration_cast(now.time_since_epoch()).count(); +} + +void SleepForMicroseconds(int micros) { std::this_thread::sleep_for(std::chrono::microseconds(micros)); } + +SequentialFile::~SequentialFile() = default; + +class PosixSequentialFile : public SequentialFile { + private: + std::string filename_; + FILE* file_ = nullptr; + + public: + virtual void setUnBuffer() { setbuf(file_, nullptr); } + + PosixSequentialFile(std::string fname, FILE* f) : filename_(std::move(fname)), file_(f) { setbuf(file_, nullptr); } + + ~PosixSequentialFile() override { + if (file_) { + fclose(file_); + } + } + + Status Read(size_t n, Slice* result, char* scratch) override { + Status s; + size_t r = fread(scratch, 1, n, file_); + + *result = Slice(scratch, r); + + if (r < n) { + if (feof(file_) != 0) { + s = Status::EndFile(filename_, "end file"); + // We leave status as ok if we hit the end of the file + } else { + // A partial read with an error: return a non-ok status + s = IOError(filename_, errno); + } + } + return s; + } + + Status Skip(uint64_t n) override { + if (fseek(file_, static_cast(n), SEEK_CUR) != 0) { + return IOError(filename_, errno); + } + return Status::OK(); + } + + char* ReadLine(char* buf, int n) override { return fgets(buf, n, file_); } + + virtual Status Close() { + if (fclose(file_) != 0) { + return IOError(filename_, errno); + } + file_ = nullptr; + return Status::OK(); + } +}; + +WritableFile::~WritableFile() = default; + +// We preallocate up to an extra megabyte and use memcpy to append new +// data to the file. This is safe since we either properly close the +// file before reading from it, or for log files, the reading code +// knows enough to skip zero suffixes. +class PosixMmapFile : public WritableFile { + private: + std::string filename_; + int fd_ = -1; + size_t page_size_ = 0; + size_t map_size_ = 0; // How much extra memory to map at a time + char* base_ = nullptr; // The mapped region + char* limit_ = nullptr; // Limit of the mapped region + char* dst_ = nullptr; // Where to write next (in range [base_,limit_]) + char* last_sync_ = nullptr; // Where have we synced up to + uint64_t file_offset_ = 0; // Offset of base_ in file + uint64_t write_len_ = 0; // The data that written in the file + + // Have we done an munmap of unsynced data? + bool pending_sync_ = false; + + // Roundup x to a multiple of y + static size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; } + + static size_t TrimDown(size_t x, size_t y) { return (x / y) * y; } + size_t TruncateToPageBoundary(size_t s) { + s -= (s & (page_size_ - 1)); + assert((s % page_size_) == 0); + return s; + } + + bool UnmapCurrentRegion() { + bool result = true; + if (base_) { + if (last_sync_ < limit_) { + // Defer syncing this data until next Sync() call, if any + pending_sync_ = true; + } + if (munmap(base_, limit_ - base_) != 0) { + result = false; + } + file_offset_ += limit_ - base_; + base_ = nullptr; + limit_ = nullptr; + last_sync_ = nullptr; + dst_ = nullptr; + + // Increase the amount we map the next time, but capped at 1MB + if (map_size_ < (1 << 20)) { + map_size_ *= 2; + } + } + return result; + } + + bool MapNewRegion() { + assert(base_ == nullptr); +#if defined(__APPLE__) + if (ftruncate(fd_, file_offset_ + map_size_) != 0) { +#else + if (posix_fallocate(fd_, static_cast(file_offset_), static_cast(map_size_)) != 0) { +#endif + LOG(WARNING) << "ftruncate error"; + return false; + } + void* ptr = mmap(nullptr, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, static_cast(file_offset_)); + if (ptr == MAP_FAILED) { // NOLINT + LOG(WARNING) << "mmap failed"; + return false; + } + base_ = reinterpret_cast(ptr); + limit_ = base_ + map_size_; + dst_ = base_ + write_len_; + write_len_ = 0; + last_sync_ = base_; + return true; + } + + public: + PosixMmapFile(std::string fname, int fd, size_t page_size, uint64_t write_len = 0) + : filename_(std::move(fname)), + fd_(fd), + page_size_(page_size), + map_size_(Roundup(kMmapBoundSize, page_size)), + + write_len_(write_len) + { + if (write_len_ != 0) { + while (map_size_ < write_len_) { + map_size_ += (1024 * 1024); + } + } + assert((page_size & (page_size - 1)) == 0); + } + + ~PosixMmapFile() override { + if (fd_ >= 0) { + PosixMmapFile::Close(); + } + } + + Status Append(const Slice& data) override { + const char* src = data.data(); + size_t left = data.size(); + while (left > 0) { + assert(base_ <= dst_); + assert(dst_ <= limit_); + size_t avail = limit_ - dst_; + if (!avail) { + if (!UnmapCurrentRegion() || !MapNewRegion()) { + return IOError(filename_, errno); + } + } + size_t n = (left <= avail) ? left : avail; + memcpy(dst_, src, n); + dst_ += n; + src += n; + left -= n; + } + return Status::OK(); + } + + Status Close() override { + Status s; + size_t unused = limit_ - dst_; + if (!UnmapCurrentRegion()) { + s = IOError(filename_, errno); + } else if (unused > 0) { + // Trim the extra space at the end of the file + if (ftruncate(fd_, static_cast(file_offset_ - unused)) < 0) { + s = IOError(filename_, errno); + } + } + + if (close(fd_) < 0) { + if (s.ok()) { + s = IOError(filename_, errno); + } + } + + fd_ = -1; + base_ = nullptr; + limit_ = nullptr; + return s; + } + + Status Flush() override { return Status::OK(); } + + Status Sync() override { + Status s; + + if (pending_sync_) { + // Some unmapped data was not synced + pending_sync_ = false; +#if defined(__APPLE__) + if (fsync(fd_) < 0) { +#else + if (fdatasync(fd_) < 0) { +#endif + s = IOError(filename_, errno); + } + } + + if (dst_ > last_sync_) { + // Find the beginnings of the pages that contain the first and last + // bytes to be synced. + size_t p1 = TruncateToPageBoundary(last_sync_ - base_); + size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1); + last_sync_ = dst_; + if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) { + s = IOError(filename_, errno); + } + } + + return s; + } + + Status Trim(uint64_t target) override { + if (!UnmapCurrentRegion()) { + return IOError(filename_, errno); + } + + file_offset_ = target; + + if (!MapNewRegion()) { + return IOError(filename_, errno); + } + return Status::OK(); + } + + uint64_t Filesize() override { return write_len_ + file_offset_ + (dst_ - base_); } +}; + +RWFile::~RWFile() = default; + +class MmapRWFile : public RWFile { + public: + MmapRWFile(std::string fname, int fd, size_t page_size) + : filename_(std::move(fname)), fd_(fd), page_size_(page_size), map_size_(Roundup(65536, page_size)) { + DoMapRegion(); + } + + ~MmapRWFile() override { + if (fd_ >= 0) { + munmap(base_, map_size_); + } + } + + bool DoMapRegion() { +#if defined(__APPLE__) + if (ftruncate(fd_, map_size_) != 0) { +#else + if (posix_fallocate(fd_, 0, static_cast(map_size_)) != 0) { +#endif + return false; + } + void* ptr = mmap(nullptr, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0); + if (ptr == MAP_FAILED) { // NOLINT + return false; + } + base_ = reinterpret_cast(ptr); + return true; + } + + char* GetData() override { return base_; } + char* base() { return base_; } + + private: + static size_t Roundup(size_t x, size_t y) { return ((x + y - 1) / y) * y; } + std::string filename_; + int fd_ = -1; + size_t page_size_[[maybe_unused]] = 0; + size_t map_size_ = 0; + char* base_ = nullptr; +}; + +class PosixRandomRWFile : public RandomRWFile { + private: + const std::string filename_; + int fd_ = -1; + bool pending_sync_ = false; + bool pending_fsync_ = false; + // bool fallocate_with_keep_size_; + + public: + PosixRandomRWFile(std::string fname, int fd) + : filename_(std::move(fname)), fd_(fd) { + // fallocate_with_keep_size_ = options.fallocate_with_keep_size; + } + + ~PosixRandomRWFile() override { + if (fd_ >= 0) { + // TODO(clang-tidy): Call virtual method during destruction bypasses virtual dispatch + // So I disabled next line clang-tidy check simply temporarily. + Close(); // NOLINT + } + } + + Status Write(uint64_t offset, const Slice& data) override { + const char* src = data.data(); + size_t left = data.size(); + Status s; + pending_sync_ = true; + pending_fsync_ = true; + + while (left != 0) { + ssize_t done = pwrite(fd_, src, left, static_cast(offset)); + if (done < 0) { + if (errno == EINTR) { + continue; + } + return IOError(filename_, errno); + } + + left -= done; + src += done; + offset += done; + } + + return Status::OK(); + } + + Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { + Status s; + ssize_t r = -1; + size_t left = n; + char* ptr = scratch; + while (left > 0) { + r = pread(fd_, ptr, left, static_cast(offset)); + if (r <= 0) { + if (errno == EINTR) { + continue; + } + break; + } + ptr += r; + offset += r; + left -= r; + } + *result = Slice(scratch, (r < 0) ? 0 : n - left); + if (r < 0) { + s = IOError(filename_, errno); + } + return s; + } + + Status Close() override { + Status s = Status::OK(); + if (fd_ >= 0 && close(fd_) < 0) { + s = IOError(filename_, errno); + } + fd_ = -1; + return s; + } + + Status Sync() override { +#if defined(__APPLE__) + if (pending_sync_ && fsync(fd_) < 0) { +#else + if (pending_sync_ && fdatasync(fd_) < 0) { +#endif + return IOError(filename_, errno); + } + pending_sync_ = false; + return Status::OK(); + } + + Status Fsync() override { + if (pending_fsync_ && fsync(fd_) < 0) { + return IOError(filename_, errno); + } + pending_fsync_ = false; + pending_sync_ = false; + return Status::OK(); + } + + // virtual Status Allocate(off_t offset, off_t len) override { + // TEST_KILL_RANDOM(rocksdb_kill_odds); + // int alloc_status = fallocate( + // fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); + // if (alloc_status == 0) { + // return Status::OK(); + // } else { + // return IOError(filename_, errno); + // } + // } +}; + +Status NewSequentialFile(const std::string& fname, std::unique_ptr& result) { + FILE* f = fopen(fname.c_str(), "r"); + if (!f) { + return IOError(fname, errno); + } else { + result = std::make_unique(fname, f); + return Status::OK(); + } +} + +Status NewWritableFile(const std::string& fname, std::unique_ptr& result) { + Status s; + const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC | O_CLOEXEC, 0644); + if (fd < 0) { + s = IOError(fname, errno); + } else { + result = std::make_unique(fname, fd, kPageSize); + } + return s; +} + +Status NewRWFile(const std::string& fname, std::unique_ptr& result) { + Status s; + const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_CLOEXEC, 0644); + if (fd < 0) { + s = IOError(fname, errno); + } else { + result = std::make_unique(fname, fd, kPageSize); + } + return s; +} + +Status AppendWritableFile(const std::string& fname, std::unique_ptr& result, uint64_t write_len) { + Status s; + const int fd = open(fname.c_str(), O_RDWR | O_CLOEXEC, 0644); + if (fd < 0) { + s = IOError(fname, errno); + } else { + result = std::make_unique(fname, fd, kPageSize, write_len); + } + return s; +} + +Status NewRandomRWFile(const std::string& fname, std::unique_ptr& result) { + Status s; + const int fd = open(fname.c_str(), O_CREAT | O_RDWR, 0644); + if (fd < 0) { + s = IOError(fname, errno); + } else { + result = std::make_unique(fname, fd); + } + return s; +} + +} // namespace pstd diff --git a/src/pstd/env.h b/src/pstd/env.h new file mode 100644 index 000000000..6451c94f7 --- /dev/null +++ b/src/pstd/env.h @@ -0,0 +1,161 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#pragma once + +#include +#include +#include +#include + +#include "pstd_status.h" +#include "noncopyable.h" + +namespace pstd { + +class WritableFile; +class SequentialFile; +class RWFile; +class RandomRWFile; + +/* + * Set the resource limits of a process + */ +int SetMaxFileDescriptorNum(int64_t max_file_descriptor_num); + +/* + * Set size of initial mmap size + */ +void SetMmapBoundSize(size_t size); + +extern const size_t kPageSize; + +/* + * File Operations + */ +int IsDir(const std::string& path); +int DeleteDir(const std::string& path); +bool DeleteDirIfExist(const std::string& path); +int CreateDir(const std::string& path); +int CreatePath(const std::string& path, mode_t mode = 0755); +uint64_t Du(const std::string& filename); + +/* + * Whether the file is exist + * If exist return true, else return false + */ +bool FileExists(const std::string& path); + +bool DeleteFile(const std::string& fname); + +int RenameFile(const std::string& oldname, const std::string& newname); + +class FileLock : public pstd::noncopyable { + public: + FileLock() = default; + virtual ~FileLock()= default;; + + int fd_ = -1; + std::string name_; +}; + +int GetChildren(const std::string& dir, std::vector& result); +void GetDescendant(const std::string& dir, std::vector& result); + +uint64_t NowMicros(); +void SleepForMicroseconds(int micros); + +Status NewSequentialFile(const std::string& fname, std::unique_ptr& result); + +Status NewWritableFile(const std::string& fname, std::unique_ptr& result); + +Status NewRWFile(const std::string& fname, std::unique_ptr& result); + +Status AppendSequentialFile(const std::string& fname, SequentialFile** result); + +Status AppendWritableFile(const std::string& fname, std::unique_ptr& result, uint64_t write_len = 0); + +Status NewRandomRWFile(const std::string& fname, std::unique_ptr& result); + +// A file abstraction for sequential writing. The implementation +// must provide buffering since callers may append small fragments +// at a time to the file. +class WritableFile : public pstd::noncopyable { + public: + WritableFile() = default; + virtual ~WritableFile(); + + virtual Status Append(const Slice& data) = 0; + virtual Status Close() = 0; + virtual Status Flush() = 0; + virtual Status Sync() = 0; + virtual Status Trim(uint64_t offset) = 0; + virtual uint64_t Filesize() = 0; +}; + +// A abstract for the sequential readable file +class SequentialFile { + public: + SequentialFile()= default;; + virtual ~SequentialFile(); + // virtual Status Read(size_t n, char *&result, char *scratch) = 0; + virtual Status Read(size_t n, Slice* result, char* scratch) = 0; + virtual Status Skip(uint64_t n) = 0; + // virtual Status Close() = 0; + virtual char* ReadLine(char* buf, int n) = 0; +}; + +class RWFile : public pstd::noncopyable { + public: + RWFile() = default; + virtual ~RWFile(); + virtual char* GetData() = 0; +}; + +// A file abstraction for random reading and writing. +class RandomRWFile : public pstd::noncopyable { + public: + RandomRWFile() = default; + virtual ~RandomRWFile() = default; + + // Write data from Slice data to file starting from offset + // Returns IOError on failure, but does not guarantee + // atomicity of a write. Returns OK status on success. + // + // Safe for concurrent use. + virtual Status Write(uint64_t offset, const Slice& data) = 0; + // Read up to "n" bytes from the file starting at "offset". + // "scratch[0..n-1]" may be written by this routine. Sets "*result" + // to the data that was read (including if fewer than "n" bytes were + // successfully read). May set "*result" to point at data in + // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when + // "*result" is used. If an error was encountered, returns a non-OK + // status. + // + // Safe for concurrent use by multiple threads. + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const = 0; + virtual Status Close() = 0; // closes the file + virtual Status Sync() = 0; // sync data + + /* + * Sync data and/or metadata as well. + * By default, sync only data. + * Override this method for environments where we need to sync + * metadata as well. + */ + virtual Status Fsync() { return Sync(); } + + /* + * Pre-allocate space for a file. + */ + virtual Status Allocate(off_t offset, off_t len) { + (void)offset; + (void)len; + return Status::OK(); + } +}; +} // namespace pstd diff --git a/src/pstd/lock_mgr.cc b/src/pstd/lock_mgr.cc new file mode 100644 index 000000000..930c6af54 --- /dev/null +++ b/src/pstd/lock_mgr.cc @@ -0,0 +1,179 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS +#endif + +#include "lock_mgr.h" + +#include +#include +#include +#include +#include + +#include "mutex.h" + +namespace pstd::lock { + +struct LockMapStripe { + explicit LockMapStripe(const std::shared_ptr& factory) { + stripe_mutex = factory->AllocateMutex(); + stripe_cv = factory->AllocateCondVar(); + assert(stripe_mutex); + assert(stripe_cv); + } + + // Mutex must be held before modifying keys map + std::shared_ptr stripe_mutex; + + // Condition Variable per stripe for waiting on a lock + std::shared_ptr stripe_cv; + + // Locked keys + std::unordered_set keys; +}; + +// Map of #num_stripes LockMapStripes +struct LockMap { + explicit LockMap(size_t num_stripes, const std::shared_ptr& factory) : num_stripes_(num_stripes) { + lock_map_stripes_.reserve(num_stripes); + for (size_t i = 0; i < num_stripes; i++) { + auto stripe = std::make_shared(factory); + lock_map_stripes_.push_back(stripe); + } + } + + ~LockMap() = default; + + // Number of sepearate LockMapStripes to create, each with their own Mutex + const size_t num_stripes_; + + // Count of keys that are currently locked. + // (Only maintained if LockMgr::max_num_locks_ is positive.) + std::atomic lock_cnt{0}; + + std::vector> lock_map_stripes_; + + size_t GetStripe(const std::string& key) const; +}; + +size_t LockMap::GetStripe(const std::string& key) const { + assert(num_stripes_ > 0); + size_t stripe = std::hash{}(key) % num_stripes_; + return stripe; +} + +LockMgr::LockMgr(size_t default_num_stripes, int64_t max_num_locks, const std::shared_ptr& mutex_factory) + : default_num_stripes_(default_num_stripes), + max_num_locks_(max_num_locks), + mutex_factory_(mutex_factory), + lock_map_(std::make_shared(default_num_stripes, mutex_factory)) {} + +LockMgr::~LockMgr() = default; + +Status LockMgr::TryLock(const std::string& key) { +#ifdef LOCKLESS + return Status::OK(); +#else + size_t stripe_num = lock_map_->GetStripe(key); + assert(lock_map_->lock_map_stripes_.size() > stripe_num); + auto stripe = lock_map_->lock_map_stripes_.at(stripe_num); + + return Acquire(stripe, key); +#endif +} + +// Helper function for TryLock(). +Status LockMgr::Acquire(const std::shared_ptr& stripe, const std::string& key) { + Status result; + + // we wait indefinitely to acquire the lock + result = stripe->stripe_mutex->Lock(); + + if (!result.ok()) { + // failed to acquire mutex + return result; + } + + // Acquire lock if we are able to + result = AcquireLocked(stripe, key); + + if (!result.ok()) { + // If we weren't able to acquire the lock, we will keep retrying + do { + result = stripe->stripe_cv->Wait(stripe->stripe_mutex); + if (result.ok()) { + result = AcquireLocked(stripe, key); + } + } while (!result.ok()); + } + + stripe->stripe_mutex->UnLock(); + + return result; +} + +// Try to lock this key after we have acquired the mutex. +// REQUIRED: Stripe mutex must be held. +Status LockMgr::AcquireLocked(const std::shared_ptr& stripe, const std::string& key) { + Status result; + // Check if this key is already locked + if (stripe->keys.find(key) != stripe->keys.end()) { + // Lock already held + result = Status::Busy("LockTimeout"); + } else { // Lock not held. + // Check lock limit + if (max_num_locks_ > 0 && lock_map_->lock_cnt.load(std::memory_order_acquire) >= max_num_locks_) { + result = Status::Busy("LockLimit"); + } else { + // acquire lock + stripe->keys.insert(key); + + // Maintain lock count if there is a limit on the number of locks + if (max_num_locks_ != 0) { + lock_map_->lock_cnt++; + } + } + } + + return result; +} + +void LockMgr::UnLockKey(const std::string& key, const std::shared_ptr& stripe) { +#ifdef LOCKLESS +#else + auto stripe_iter = stripe->keys.find(key); + if (stripe_iter != stripe->keys.end()) { + // Found the key locked. unlock it. + stripe->keys.erase(stripe_iter); + if (max_num_locks_ > 0) { + // Maintain lock count if there is a limit on the number of locks. + assert(lock_map_->lock_cnt.load(std::memory_order_relaxed) > 0); + lock_map_->lock_cnt--; + } + } else { + // This key is either not locked or locked by someone else. + } +#endif +} + +void LockMgr::UnLock(const std::string& key) { + // Lock the mutex for the stripe that this key hashes to + size_t stripe_num = lock_map_->GetStripe(key); + assert(lock_map_->lock_map_stripes_.size() > stripe_num); + auto stripe = lock_map_->lock_map_stripes_.at(stripe_num); + + stripe->stripe_mutex->Lock(); + UnLockKey(key, stripe); + stripe->stripe_mutex->UnLock(); + + // Signal waiting threads to retry locking + stripe->stripe_cv->NotifyAll(); +} +} // namespace pstd::lock diff --git a/src/pstd/lock_mgr.h b/src/pstd/lock_mgr.h new file mode 100644 index 000000000..5d172b81e --- /dev/null +++ b/src/pstd/lock_mgr.h @@ -0,0 +1,57 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#pragma once + +#include +#include + +#include "mutex.h" +#include "noncopyable.h" + +namespace pstd { + +namespace lock { +struct LockMap; +struct LockMapStripe; + +class LockMgr : public pstd::noncopyable { +public: + LockMgr(size_t default_num_stripes, int64_t max_num_locks, const std::shared_ptr& factory); + + ~LockMgr(); + + // Attempt to lock key. If OK status is returned, the caller is responsible + // for calling UnLock() on this key. + Status TryLock(const std::string& key); + + // Unlock a key locked by TryLock(). + void UnLock(const std::string& key); + +private: + // Default number of lock map stripes + const size_t default_num_stripes_[[maybe_unused]]; + + // Limit on number of keys locked per column family + const int64_t max_num_locks_; + + // Used to allocate mutexes/condvars to use when locking keys + std::shared_ptr mutex_factory_; + + // Map to locked key info + std::shared_ptr lock_map_; + + Status Acquire(const std::shared_ptr& stripe, const std::string& key); + + Status AcquireLocked(const std::shared_ptr& stripe, const std::string& key); + + void UnLockKey(const std::string& key, const std::shared_ptr& stripe); + +}; + +} // namespace lock +} // namespace pstd diff --git a/src/pstd/mutex.h b/src/pstd/mutex.h new file mode 100644 index 000000000..b768d2b2a --- /dev/null +++ b/src/pstd/mutex.h @@ -0,0 +1,86 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#pragma once + +#include + +#include "pstd_status.h" + +namespace pstd::lock { + +using Status = pstd::Status; + +class Mutex { + public: + virtual ~Mutex() = default; + + // Attempt to acquire lock. Return OK on success, or other Status on failure. + // If returned status is OK, Storage will eventually call UnLock(). + virtual Status Lock() = 0; + + // Attempt to acquire lock. If timeout is non-negative, operation may be + // failed after this many microseconds. + // Returns OK on success, + // TimedOut if timed out, + // or other Status on failure. + // If returned status is OK, Storage will eventually call UnLock(). + virtual Status TryLockFor(int64_t timeout_time) = 0; + + // Unlock Mutex that was successfully locked by Lock() or TryLockUntil() + virtual void UnLock() = 0; +}; + +class CondVar { + public: + virtual ~CondVar() = default; + + // Block current thread until condition variable is notified by a call to + // Notify() or NotifyAll(). Wait() will be called with mutex locked. + // Returns OK if notified. + // Returns non-OK if Storage should stop waiting and fail the operation. + // May return OK spuriously even if not notified. + virtual Status Wait(std::shared_ptr mutex) = 0; + + // Block current thread until condition variable is notified by a call to + // Notify() or NotifyAll(), or if the timeout is reached. + // Wait() will be called with mutex locked. + // + // If timeout is non-negative, operation should be failed after this many + // microseconds. + // If implementing a custom version of this class, the implementation may + // choose to ignore the timeout. + // + // Returns OK if notified. + // Returns TimedOut if timeout is reached. + // Returns other status if Storage should otherwis stop waiting and + // fail the operation. + // May return OK spuriously even if not notified. + virtual Status WaitFor(std::shared_ptr mutex, int64_t timeout_time) = 0; + + // If any threads are waiting on *this, unblock at least one of the + // waiting threads. + virtual void Notify() = 0; + + // Unblocks all threads waiting on *this. + virtual void NotifyAll() = 0; +}; + +// Factory class that can allocate mutexes and condition variables. +class MutexFactory { + public: + // Create a Mutex object. + virtual std::shared_ptr AllocateMutex() = 0; + + // Create a CondVar object. + virtual std::shared_ptr AllocateCondVar() = 0; + + virtual ~MutexFactory() = default; +}; + +} // namespace pstd::lock + diff --git a/src/pstd/mutex_impl.cc b/src/pstd/mutex_impl.cc new file mode 100644 index 000000000..80568b88c --- /dev/null +++ b/src/pstd/mutex_impl.cc @@ -0,0 +1,120 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#include +#include + +#include "mutex.h" +#include "mutex_impl.h" + +namespace pstd::lock { + +class MutexImpl : public Mutex { + public: + MutexImpl() = default; + ~MutexImpl() override = default; + + Status Lock() override; + + Status TryLockFor(int64_t timeout_time) override; + + void UnLock() override { mutex_.unlock(); } + + friend class CondVarImpl; + + private: + std::mutex mutex_; +}; + +class CondVarImpl : public CondVar { + public: + CondVarImpl() = default; + ~CondVarImpl() override = default; + + Status Wait(std::shared_ptr mutex) override; + + Status WaitFor(std::shared_ptr mutex, int64_t timeout_time) override; + + void Notify() override { cv_.notify_one(); } + + void NotifyAll() override { cv_.notify_all(); } + + private: + std::condition_variable cv_; +}; + +std::shared_ptr MutexFactoryImpl::AllocateMutex() { return std::shared_ptr(new MutexImpl()); } + +std::shared_ptr MutexFactoryImpl::AllocateCondVar() { return std::shared_ptr(new CondVarImpl()); } + +Status MutexImpl::Lock() { + mutex_.lock(); + return Status::OK(); +} + +Status MutexImpl::TryLockFor(int64_t timeout_time) { + bool locked = true; + + if (timeout_time == 0) { + locked = mutex_.try_lock(); + } else { + // Previously, this code used a std::timed_mutex. However, this was changed + // due to known bugs in gcc versions < 4.9. + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54562 + // + // Since this mutex isn't held for long and only a single mutex is ever + // held at a time, it is reasonable to ignore the lock timeout_time here + // and only check it when waiting on the condition_variable. + mutex_.lock(); + } + + if (!locked) { + // timeout acquiring mutex + return Status::Timeout("MutexTimeout"); + } + + return Status::OK(); +} + +Status CondVarImpl::Wait(std::shared_ptr mutex) { + auto mutex_impl = reinterpret_cast(mutex.get()); + + std::unique_lock lock(mutex_impl->mutex_, std::adopt_lock); + cv_.wait(lock); + + // Make sure unique_lock doesn't unlock mutex when it destructs + lock.release(); + + return Status::OK(); +} + +Status CondVarImpl::WaitFor(std::shared_ptr mutex, int64_t timeout_time) { + Status s; + + auto mutex_impl = reinterpret_cast(mutex.get()); + std::unique_lock lock(mutex_impl->mutex_, std::adopt_lock); + + if (timeout_time < 0) { + // If timeout is negative, do not use a timeout + cv_.wait(lock); + } else { + auto duration = std::chrono::microseconds(timeout_time); + auto cv_status = cv_.wait_for(lock, duration); + + // Check if the wait stopped due to timing out. + if (cv_status == std::cv_status::timeout) { + s = Status::Timeout("MutexTimeout"); + } + } + + // Make sure unique_lock doesn't unlock mutex when it destructs + lock.release(); + + // CV was signaled, or we spuriously woke up (but didn't time out) + return s; +} +} // namespace pstd::lock diff --git a/src/pstd/mutex_impl.h b/src/pstd/mutex_impl.h new file mode 100644 index 000000000..5165933f1 --- /dev/null +++ b/src/pstd/mutex_impl.h @@ -0,0 +1,23 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#pragma once + +#include "mutex.h" + +#include + +namespace pstd { +namespace lock { +// Default implementation of MutexFactory. +class MutexFactoryImpl : public MutexFactory { +public: + std::shared_ptr AllocateMutex() override; + std::shared_ptr AllocateCondVar() override; +}; +} // namespace lock +} // namespace pstd diff --git a/src/pstd/noncopyable.h b/src/pstd/noncopyable.h new file mode 100644 index 000000000..039c8d39c --- /dev/null +++ b/src/pstd/noncopyable.h @@ -0,0 +1,22 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#pragma once + +namespace pstd { + +class noncopyable { +protected: + noncopyable() = default; + ~noncopyable() = default; + +private: + noncopyable(const noncopyable&) = delete; + void operator=(const noncopyable&) = delete; +}; + +} // namespace pstd diff --git a/src/pstd/pstd_coding.cc b/src/pstd/pstd_coding.cc new file mode 100644 index 000000000..6fc0496cd --- /dev/null +++ b/src/pstd/pstd_coding.cc @@ -0,0 +1,204 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "pstd_coding.h" +#include "pstd_slice.h" + +namespace pstd { + +void EncodeFixed16(char* buf, uint16_t value) { memcpy(buf, &value, sizeof(value)); } + +void EncodeFixed32(char* buf, uint32_t value) { memcpy(buf, &value, sizeof(value)); } + +void EncodeFixed64(char* buf, uint64_t value) { memcpy(buf, &value, sizeof(value)); } + +void PutFixed16(std::string* dst, uint16_t value) { + char buf[sizeof(value)]; + EncodeFixed16(buf, value); + dst->append(buf, sizeof(buf)); +} + +void PutFixed32(std::string* dst, uint32_t value) { + char buf[sizeof(value)]; + EncodeFixed32(buf, value); + dst->append(buf, sizeof(buf)); +} + +void PutFixed64(std::string* dst, uint64_t value) { + char buf[sizeof(value)]; + EncodeFixed64(buf, value); + dst->append(buf, sizeof(buf)); +} + +char* EncodeVarint32(char* dst, uint32_t v) { + // Operate on characters as unsigneds + auto ptr = reinterpret_cast(dst); + static const int B = 128; + if (v < (1 << 7)) { + *(ptr++) = v; + } else if (v < (1 << 14)) { + *(ptr++) = v | B; + *(ptr++) = v >> 7; + } else if (v < (1 << 21)) { + *(ptr++) = v | B; + *(ptr++) = (v >> 7) | B; + *(ptr++) = v >> 14; + } else if (v < (1 << 28)) { + *(ptr++) = v | B; + *(ptr++) = (v >> 7) | B; + *(ptr++) = (v >> 14) | B; + *(ptr++) = v >> 21; + } else { + *(ptr++) = v | B; + *(ptr++) = (v >> 7) | B; + *(ptr++) = (v >> 14) | B; + *(ptr++) = (v >> 21) | B; + *(ptr++) = v >> 28; + } + return reinterpret_cast(ptr); +} + +void PutVarint32(std::string* dst, uint32_t v) { + char buf[5]; + char* ptr = EncodeVarint32(buf, v); + dst->append(buf, ptr - buf); +} + +char* EncodeVarint64(char* dst, uint64_t v) { + static const int B = 128; + auto ptr = reinterpret_cast(dst); + while (v >= B) { + *(ptr++) = (v & (B - 1)) | B; + v >>= 7; + } + *(ptr++) = static_cast(v); + return reinterpret_cast(ptr); +} + +void PutVarint64(std::string* dst, uint64_t v) { + char buf[10]; + char* ptr = EncodeVarint64(buf, v); + dst->append(buf, ptr - buf); +} + +void PutLengthPrefixedString(std::string* dst, const std::string& value) { + PutVarint32(dst, value.size()); + dst->append(value.data(), value.size()); +} + +int VarintLength(uint64_t v) { + int len = 1; + while (v >= 128) { + v >>= 7; + len++; + } + return len; +} + +const char* GetVarint32PtrFallback(const char* p, const char* limit, uint32_t* value) { + uint32_t result = 0; + for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) { + uint32_t byte = *(reinterpret_cast(p)); + p++; + if ((byte & 128) != 0U) { + // More bytes are present + result |= ((byte & 127) << shift); + } else { + result |= (byte << shift); + *value = result; + return reinterpret_cast(p); + } + } + return nullptr; +} + +bool GetVarint32(std::string* input, uint32_t* value) { + const char* p = input->data(); + const char* limit = p + input->size(); + const char* q = GetVarint32Ptr(p, limit, value); + if (!q) { + return false; + } else { + (*input).erase(0, q - p); + return true; + } +} + +bool GetVarint32(Slice* input, uint32_t* value) { + const char* p = input->data(); + const char* limit = p + input->size(); + const char* q = GetVarint32Ptr(p, limit, value); + if (!q) { + return false; + } else { + *input = Slice(q, limit - q); + return true; + } +} + +const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) { + uint64_t result = 0; + for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) { + uint64_t byte = *(reinterpret_cast(p)); + p++; + if ((byte & 128) != 0U) { + // More bytes are present + result |= ((byte & 127) << shift); + } else { + result |= (byte << shift); + *value = result; + return reinterpret_cast(p); + } + } + return nullptr; +} + +bool GetVarint64(Slice* input, uint64_t* value) { + const char* p = input->data(); + const char* limit = p + input->size(); + const char* q = GetVarint64Ptr(p, limit, value); + if (!q) { + return false; + } else { + *input = Slice(q, limit - q); + return true; + } +} + +const char* GetLengthPrefixedSlice(const char* p, const char* limit, Slice* result) { + uint32_t len; + p = GetVarint32Ptr(p, limit, &len); + if (!p) { + return nullptr; + } + if (p + len > limit) { + return nullptr; + } + *result = Slice(p, len); + return p + len; +} + +bool GetLengthPrefixedSlice(Slice* input, Slice* result) { + uint32_t len; + if (GetVarint32(input, &len) && input->size() >= len) { + *result = Slice(input->data(), len); + input->remove_prefix(len); + return true; + } else { + return false; + } +} + +bool GetLengthPrefixedString(std::string* input, std::string* result) { + uint32_t len; + if (GetVarint32(input, &len) && input->size() >= len) { + *result = (*input).substr(0, len); + input->erase(0, len); + return true; + } else { + return false; + } +} + +} // namespace pstd diff --git a/src/pstd/pstd_coding.h b/src/pstd/pstd_coding.h new file mode 100644 index 000000000..206fd34fe --- /dev/null +++ b/src/pstd/pstd_coding.h @@ -0,0 +1,151 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. +// +// Endian-neutral encoding: +// * Fixed-length numbers are encoded with least-significant byte first +// * In addition we support variable length "varint" encoding +// * Strings are encoded prefixed by their length in varint format + +#pragma once + +#include +#include +#include + +#include "pstd_slice.h" + +namespace pstd { + +// Standard Put... routines append to a string +extern void PutFixed16(std::string* dst, uint16_t value); +extern void PutFixed32(std::string* dst, uint32_t value); +extern void PutFixed64(std::string* dst, uint64_t value); +extern void PutVarint32(std::string* dst, uint32_t value); +extern void PutVarint64(std::string* dst, uint64_t value); +extern void PutLengthPrefixedString(std::string* dst, const std::string& value); + +extern void GetFixed16(std::string* dst, uint16_t* value); +extern void GetFixed32(std::string* dst, uint32_t* value); +extern void GetFixed64(std::string* dst, uint64_t* value); +extern bool GetVarint32(std::string* input, uint32_t* value); +extern bool GetVarint64(std::string* input, uint64_t* value); + +extern void GetFixed16(Slice* dst, uint16_t* value); +extern void GetFixed32(Slice* dst, uint32_t* value); +extern void GetFixed64(Slice* dst, uint64_t* value); +extern bool GetVarint32(Slice* input, uint32_t* value); +extern bool GetVarint64(Slice* input, uint64_t* value); + +extern const char* GetLengthPrefixedSlice(const char* p, const char* limit, Slice* result); +extern bool GetLengthPrefixedSlice(Slice* input, Slice* result); +extern bool GetLengthPrefixedString(std::string* input, std::string* result); + +// Pointer-based variants of GetVarint... These either store a value +// in *v and return a pointer just past the parsed value, or return +// nullptr on error. These routines only look at bytes in the range +// [p..limit-1] +extern const char* GetVarint32Ptr(const char* p, const char* limit, uint32_t* v); +extern const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* v); + +// Returns the length of the varint32 or varint64 encoding of "v" +extern int VarintLength(uint64_t v); + +// Lower-level versions of Put... that write directly into a character buffer +// REQUIRES: dst has enough space for the value being written +extern void EncodeFixed16(char* buf, uint16_t value); +extern void EncodeFixed32(char* buf, uint32_t value); +extern void EncodeFixed64(char* buf, uint64_t value); + +// Lower-level versions of Put... that write directly into a character buffer +// and return a pointer just past the last byte written. +// REQUIRES: dst has enough space for the value being written +extern char* EncodeVarint32(char* dst, uint32_t value); +extern char* EncodeVarint64(char* dst, uint64_t value); + +// Lower-level versions of Get... that read directly from a character buffer +// without any bounds checking. + +inline uint16_t DecodeFixed16(const char* ptr) { + // Load the raw bytes + uint16_t result; + memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load + return result; +} + +inline uint32_t DecodeFixed32(const char* ptr) { + // Load the raw bytes + uint32_t result; + memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load + return result; +} + +inline uint64_t DecodeFixed64(const char* ptr) { + // Load the raw bytes + uint64_t result; + memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load + return result; +} + +inline void GetFixed16(std::string* dst, uint16_t* value) { + if (!dst || !value) { + return; + } + *value = DecodeFixed16(dst->data()); + dst->erase(0, sizeof(uint16_t)); +} + +inline void GetFixed32(std::string* dst, uint32_t* value) { + if (!dst || !value) { + return; + } + *value = DecodeFixed32(dst->data()); + dst->erase(0, sizeof(uint32_t)); +} + +inline void GetFixed64(std::string* dst, uint64_t* value) { + if (!dst || !value) { + return; + } + *value = DecodeFixed64(dst->data()); + dst->erase(0, sizeof(uint64_t)); +} + +inline void GetFixed16(Slice* dst, uint16_t* value) { + if (!dst || !value) { + return; + } + *value = DecodeFixed16(dst->data()); + dst->remove_prefix(sizeof(uint16_t) / sizeof(char)); +} + +inline void GetFixed32(Slice* dst, uint32_t* value) { + if (!dst || !value) { + return; + } + *value = DecodeFixed32(dst->data()); + dst->remove_prefix(sizeof(uint32_t) / sizeof(char)); +} + +inline void GetFixed64(Slice* dst, uint64_t* value) { + if (!dst || !value) { + return; + } + *value = DecodeFixed64(dst->data()); + dst->remove_prefix(sizeof(uint64_t) / sizeof(char)); +} + +// Internal routine for use by fallback path of GetVarint32Ptr +extern const char* GetVarint32PtrFallback(const char* p, const char* limit, uint32_t* value); +inline const char* GetVarint32Ptr(const char* p, const char* limit, uint32_t* value) { + if (p < limit) { + uint32_t result = *(reinterpret_cast(p)); + if ((result & 128) == 0) { + *value = result; + return p + 1; + } + } + return GetVarint32PtrFallback(p, limit, value); +} + +} // namespace pstd diff --git a/src/pstd/pstd_defer.h b/src/pstd/pstd_defer.h index ce8221bfd..4929c9323 100755 --- a/src/pstd/pstd_defer.h +++ b/src/pstd/pstd_defer.h @@ -1,7 +1,9 @@ -// Copyright (c) 2015-present, Qihoo, Inc. All rights reserved. -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. An additional grant -// of patent rights can be found in the PATENTS file in the same directory. +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ #pragma once diff --git a/src/pstd/pstd_mutex.h b/src/pstd/pstd_mutex.h new file mode 100644 index 000000000..2c0a5f928 --- /dev/null +++ b/src/pstd/pstd_mutex.h @@ -0,0 +1,79 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "noncopyable.h" + +namespace pstd { + +using Mutex = std::mutex; +using CondVar = std::condition_variable; +using RWMutex = std::shared_mutex; + +using OnceType = std::once_flag; + +template +void InitOnce(OnceType& once, F&& f, Args&&... args) { + return std::call_once(once, std::forward(f), std::forward(args)...); +} + +class RefMutex : public pstd::noncopyable { +public: + RefMutex() = default; + ~RefMutex() = default; + + // Lock and Unlock will increase and decrease refs_, + // should check refs before Unlock + void Lock(); + void Unlock(); + + void Ref(); + void Unref(); + bool IsLastRef() { return refs_ == 1; } + +private: + std::mutex mu_; + int refs_ = 0; +}; + +class RecordMutex : public pstd::noncopyable { +public: + RecordMutex()= default;; + ~RecordMutex(); + + void MultiLock(const std::vector& keys); + void Lock(const std::string& key); + void MultiUnlock(const std::vector& keys); + void Unlock(const std::string& key); + +private: + Mutex mutex_; + + std::unordered_map records_; +}; + +class RecordLock : public pstd::noncopyable { +public: + RecordLock(RecordMutex* mu, std::string key) : mu_(mu), key_(std::move(key)) { mu_->Lock(key_); } + ~RecordLock() { mu_->Unlock(key_); } + +private: + RecordMutex* const mu_; + std::string key_; +}; + +} // namespace pstd diff --git a/src/pstd/slice.h b/src/pstd/pstd_slice.h similarity index 100% rename from src/pstd/slice.h rename to src/pstd/pstd_slice.h diff --git a/src/pstd/status.cc b/src/pstd/pstd_status.cc similarity index 98% rename from src/pstd/status.cc rename to src/pstd/pstd_status.cc index a84231aef..c198b5aa0 100755 --- a/src/pstd/status.cc +++ b/src/pstd/pstd_status.cc @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "status.h" +#include "pstd_status.h" #include #include diff --git a/src/pstd/status.h b/src/pstd/pstd_status.h similarity index 99% rename from src/pstd/status.h rename to src/pstd/pstd_status.h index 2e777abfc..49051a45e 100755 --- a/src/pstd/status.h +++ b/src/pstd/pstd_status.h @@ -6,7 +6,7 @@ #pragma once #include -#include "slice.h" +#include "pstd_slice.h" namespace pstd { diff --git a/src/pstd/scope_record_lock.cc b/src/pstd/scope_record_lock.cc new file mode 100644 index 000000000..6ef391bd3 --- /dev/null +++ b/src/pstd/scope_record_lock.cc @@ -0,0 +1,78 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#include + +#include "scope_record_lock.h" + +namespace pstd::lock { + +MultiScopeRecordLock::MultiScopeRecordLock(const std::shared_ptr& lock_mgr, const std::vector& keys) + : lock_mgr_(lock_mgr), keys_(keys) { + std::string pre_key; + std::sort(keys_.begin(), keys_.end()); + if (!keys_.empty() && keys_[0].empty()) { + lock_mgr_->TryLock(pre_key); + } + + for (const auto& key : keys_) { + if (pre_key != key) { + lock_mgr_->TryLock(key); + pre_key = key; + } + } +} +MultiScopeRecordLock::~MultiScopeRecordLock() { + std::string pre_key; + if (!keys_.empty() && keys_[0].empty()) { + lock_mgr_->UnLock(pre_key); + } + + for (const auto& key : keys_) { + if (pre_key != key) { + lock_mgr_->UnLock(key); + pre_key = key; + } + } +} + +void MultiRecordLock::Lock(const std::vector& keys) { + std::vector internal_keys = keys; + std::sort(internal_keys.begin(), internal_keys.end()); + // init to be "" + std::string pre_key; + // consider internal_keys "" "" "a" + if (!internal_keys.empty()) { + lock_mgr_->TryLock(internal_keys.front()); + pre_key = internal_keys.front(); + } + + for (const auto& key : internal_keys) { + if (pre_key != key) { + lock_mgr_->TryLock(key); + pre_key = key; + } + } +} + +void MultiRecordLock::Unlock(const std::vector& keys) { + std::vector internal_keys = keys; + std::sort(internal_keys.begin(), internal_keys.end()); + std::string pre_key; + if (!internal_keys.empty()) { + lock_mgr_->UnLock(internal_keys.front()); + pre_key = internal_keys.front(); + } + + for (const auto& key : internal_keys) { + if (pre_key != key) { + lock_mgr_->UnLock(key); + pre_key = key; + } + } +} +} // namespace pstd::lock diff --git a/src/pstd/scope_record_lock.h b/src/pstd/scope_record_lock.h new file mode 100644 index 000000000..a17e16062 --- /dev/null +++ b/src/pstd/scope_record_lock.h @@ -0,0 +1,57 @@ +/* +* Copyright (c) 2023-present, Qihoo, Inc. All rights reserved. + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. An additional grant + * of patent rights can be found in the PATENTS file in the same directory. + */ + +#pragma once + +#include +#include +#include +#include + +#include "lock_mgr.h" +#include "noncopyable.h" +#include "rocksdb/slice.h" + +namespace pstd::lock { + +using Slice = rocksdb::Slice; + +class ScopeRecordLock final : public pstd::noncopyable { +public: + ScopeRecordLock(const std::shared_ptr& lock_mgr, const Slice& key) : lock_mgr_(lock_mgr), key_(key) { + lock_mgr_->TryLock(key_.ToString()); + } + ~ScopeRecordLock() { lock_mgr_->UnLock(key_.ToString()); } + +private: + std::shared_ptr const lock_mgr_; + Slice key_; +}; + +class MultiScopeRecordLock final : public pstd::noncopyable { +public: + MultiScopeRecordLock(const std::shared_ptr& lock_mgr, const std::vector& keys); + ~MultiScopeRecordLock(); + +private: + std::shared_ptr const lock_mgr_; + std::vector keys_; +}; + +class MultiRecordLock : public noncopyable { +public: + explicit MultiRecordLock(const std::shared_ptr& lock_mgr) : lock_mgr_(lock_mgr) {} + ~MultiRecordLock() = default; + + void Lock(const std::vector& keys); + void Unlock(const std::vector& keys); + +private: + std::shared_ptr const lock_mgr_; +}; + +} // namespace pstd::lock diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt new file mode 100644 index 000000000..3c28ee2af --- /dev/null +++ b/src/storage/CMakeLists.txt @@ -0,0 +1,20 @@ +#AUX_SOURCE_DIRECTORY(./src STORAGE_SRC) +FILE(GLOB STORAGE_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/src/*.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/storage/*.h" +) +SET(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) +ADD_LIBRARY(storage ${STORAGE_SRC}) + +TARGET_INCLUDE_DIRECTORIES(storage + PUBLIC ${CMAKE_SOURCE_DIR}/src + PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} + PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include + PRIVATE ${rocksdb_SOURCE_DIR}/ + PRIVATE ${rocksdb_SOURCE_DIR}/include +) + +TARGET_LINK_LIBRARIES(storage pstd glog rocksdb) + +SET_TARGET_PROPERTIES(storage PROPERTIES LINKER_LANGUAGE CXX) diff --git a/src/storage/detect_environment b/src/storage/detect_environment new file mode 100755 index 000000000..e00202072 --- /dev/null +++ b/src/storage/detect_environment @@ -0,0 +1,92 @@ +#!/bin/sh + +OUTPUT=$1 +if test -z "$OUTPUT"; then + echo "usage: $0 " >&2 + exit 1 +fi + +# Delete existing output, if it exists +rm -f "$OUTPUT" +touch "$OUTPUT" + +if test -z "$CXX"; then + CXX=g++ +fi + +# Test whether Snappy library is installed +# http://code.google.com/p/snappy/ +$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < + int main() {} +EOF +if [ "$?" = 0 ]; then + ROCKSDB_LDFLAGS="$ROCKSDB_LDFLAGS -lsnappy" +fi + +# Test whether gflags library is installed +# http://gflags.github.io/gflags/ +# check if the namespace is gflags +$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF + #include + using namespace gflags; + int main() {} +EOF +if [ "$?" = 0 ]; then + ROCKSDB_LDFLAGS="$ROCKSDB_LDFLAGS -lgflags" +else + # check if namespace is google + $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF + #include + using namespace google; + int main() {} +EOF +if [ "$?" = 0 ]; then + ROCKSDB_LDFLAGS="$ROCKSDB_LDFLAGS -lgflags" +fi +fi + +# Test whether zlib library is installed +$CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < + int main() {} +EOF +if [ "$?" = 0 ]; then + ROCKSDB_LDFLAGS="$ROCKSDB_LDFLAGS -lz" +fi + +# Test whether bzip library is installed +$CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < + int main() {} +EOF +if [ "$?" = 0 ]; then + ROCKSDB_LDFLAGS="$ROCKSDB_LDFLAGS -lbz2" +fi + +# Test whether lz4 library is installed +$CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < + #include + int main() {} +EOF +if [ "$?" = 0 ]; then + ROCKSDB_LDFLAGS="$ROCKSDB_LDFLAGS -llz4" +fi + +# Test whether zstd library is installed +$CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < + int main() {} +EOF +if [ "$?" = 0 ]; then + ROCKSDB_LDFLAGS="$ROCKSDB_LDFLAGS -lzstd" +fi + + + +# Test processor nums +PROCESSOR_NUMS=$(cat /proc/cpuinfo | grep processor | wc -l) + +echo "ROCKSDB_LDFLAGS=$ROCKSDB_LDFLAGS" >> "$OUTPUT" +echo "PROCESSOR_NUMS=$PROCESSOR_NUMS" >> "$OUTPUT" diff --git a/src/storage/include/storage/build_version.h b/src/storage/include/storage/build_version.h new file mode 100644 index 000000000..a4474cbf6 --- /dev/null +++ b/src/storage/include/storage/build_version.h @@ -0,0 +1,12 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +// this variable tells us about the git revision +extern const char* blackwidow_build_git_sha; + +// Date on which the code was compiled: +extern const char* blackwidow_build_compile_date; diff --git a/src/storage/include/storage/storage.h b/src/storage/include/storage/storage.h new file mode 100644 index 000000000..0ea59b7b5 --- /dev/null +++ b/src/storage/include/storage/storage.h @@ -0,0 +1,1052 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "rocksdb/convenience.h" +#include "rocksdb/filter_policy.h" +#include "rocksdb/options.h" +#include "rocksdb/rate_limiter.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "rocksdb/table.h" + +#include "pstd/pstd_mutex.h" + +namespace storage { + +inline constexpr double ZSET_SCORE_MAX = std::numeric_limits::max(); +inline constexpr double ZSET_SCORE_MIN = std::numeric_limits::lowest(); + +inline const std::string PROPERTY_TYPE_ROCKSDB_CUR_SIZE_ALL_MEM_TABLES = "rocksdb.cur-size-all-mem-tables"; +inline const std::string PROPERTY_TYPE_ROCKSDB_ESTIMATE_TABLE_READER_MEM = "rocksdb.estimate-table-readers-mem"; +inline const std::string PROPERTY_TYPE_ROCKSDB_BACKGROUND_ERRORS = "rocksdb.background-errors"; + +inline const std::string ALL_DB = "all"; +inline const std::string STRINGS_DB = "strings"; +inline const std::string HASHES_DB = "hashes"; +inline const std::string LISTS_DB = "lists"; +inline const std::string ZSETS_DB = "zsets"; +inline const std::string SETS_DB = "sets"; + +inline constexpr size_t BATCH_DELETE_LIMIT = 100; +inline constexpr size_t COMPACT_THRESHOLD_COUNT = 2000; + +using Options = rocksdb::Options; +using BlockBasedTableOptions = rocksdb::BlockBasedTableOptions; +using Status = rocksdb::Status; +using Slice = rocksdb::Slice; + +class RedisStrings; +class RedisHashes; +class RedisSets; +class RedisLists; +class RedisZSets; +class HyperLogLog; +enum class OptionType; + +template +class LRUCache; + +struct StorageOptions { + rocksdb::Options options; + rocksdb::BlockBasedTableOptions table_options; + size_t block_cache_size = 0; + bool share_block_cache = false; + size_t statistics_max_size = 0; + size_t small_compaction_threshold = 5000; + Status ResetOptions(const OptionType& option_type, const std::unordered_map& options_map); +}; + +struct KeyValue { + std::string key; + std::string value; + bool operator==(const KeyValue& kv) const { return (kv.key == key && kv.value == value); } + bool operator<(const KeyValue& kv) const { return key < kv.key; } +}; + +struct KeyInfo { + uint64_t keys; + uint64_t expires; + uint64_t avg_ttl; + uint64_t invaild_keys; +}; + +struct ValueStatus { + std::string value; + Status status; + bool operator==(const ValueStatus& vs) const { return (vs.value == value && vs.status == status); } +}; + +struct FieldValue { + std::string field; + std::string value; + bool operator==(const FieldValue& fv) const { return (fv.field == field && fv.value == value); } +}; + +struct KeyVersion { + std::string key; + int32_t version; + bool operator==(const KeyVersion& kv) const { return (kv.key == key && kv.version == version); } +}; + +struct ScoreMember { + double score; + std::string member; + bool operator==(const ScoreMember& sm) const { return (sm.score == score && sm.member == member); } +}; + +enum BeforeOrAfter { Before, After }; + +enum DataType { kAll, kStrings, kHashes, kLists, kZSets, kSets }; + +const char DataTypeTag[] = {'a', 'k', 'h', 'l', 'z', 's'}; + +enum class OptionType { + kDB, + kColumnFamily, +}; + +enum ColumnFamilyType { kMeta, kData, kMetaAndData }; + +enum AGGREGATE { SUM, MIN, MAX }; + +enum BitOpType { kBitOpAnd = 1, kBitOpOr, kBitOpXor, kBitOpNot, kBitOpDefault }; + +enum Operation { kNone = 0, kCleanAll, kCleanStrings, kCleanHashes, kCleanZSets, kCleanSets, kCleanLists, kCompactKey }; + +struct BGTask { + DataType type; + Operation operation; + std::string argv; + + BGTask(const DataType& _type = DataType::kAll, const Operation& _opeation = Operation::kNone, std::string _argv = "") + : type(_type), operation(_opeation), argv(std::move(_argv)) {} +}; + +class Storage { + public: + Storage(); + ~Storage(); + + Status Open(const StorageOptions& storage_options, const std::string& db_path); + + Status GetStartKey(const DataType& dtype, int64_t cursor, std::string* start_key); + + Status StoreCursorStartKey(const DataType& dtype, int64_t cursor, const std::string& next_key); + + // Strings Commands + + // Set key to hold the string value. if key + // already holds a value, it is overwritten + Status Set(const Slice& key, const Slice& value); + + // Set key to hold the string value. if key exist + Status Setxx(const Slice& key, const Slice& value, int32_t* ret, int32_t ttl = 0); + + // Get the value of key. If the key does not exist + // the special value nil is returned + Status Get(const Slice& key, std::string* value); + + // Atomically sets key to value and returns the old value stored at key + // Returns an error when key exists but does not hold a string value. + Status GetSet(const Slice& key, const Slice& value, std::string* old_value); + + // Sets or clears the bit at offset in the string value stored at key + Status SetBit(const Slice& key, int64_t offset, int32_t value, int32_t* ret); + + // Returns the bit value at offset in the string value stored at key + Status GetBit(const Slice& key, int64_t offset, int32_t* ret); + + // Sets the given keys to their respective values + // MSET replaces existing values with new values + Status MSet(const std::vector& kvs); + + // Returns the values of all specified keys. For every key + // that does not hold a string value or does not exist, the + // special value nil is returned + Status MGet(const std::vector& keys, std::vector* vss); + + // Set key to hold string value if key does not exist + // return 1 if the key was set + // return 0 if the key was not set + Status Setnx(const Slice& key, const Slice& value, int32_t* ret, int32_t ttl = 0); + + // Sets the given keys to their respective values. + // MSETNX will not perform any operation at all even + // if just a single key already exists. + Status MSetnx(const std::vector& kvs, int32_t* ret); + + // Set key to hold string new_value if key currently hold the give value + // return 1 if the key currently hold the give value And override success + // return 0 if the key doesn't exist And override fail + // return -1 if the key currently does not hold the given value And override fail + Status Setvx(const Slice& key, const Slice& value, const Slice& new_value, int32_t* ret, int32_t ttl = 0); + + // delete the key that holds a given value + // return 1 if the key currently hold the give value And delete success + // return 0 if the key doesn't exist And del fail + // return -1 if the key currently does not hold the given value And del fail + Status Delvx(const Slice& key, const Slice& value, int32_t* ret); + + // Set key to hold string value if key does not exist + // return the length of the string after it was modified by the command + Status Setrange(const Slice& key, int64_t start_offset, const Slice& value, int32_t* ret); + + // Returns the substring of the string value stored at key, + // determined by the offsets start and end (both are inclusive) + Status Getrange(const Slice& key, int64_t start_offset, int64_t end_offset, std::string* ret); + + // If key already exists and is a string, this command appends the value at + // the end of the string + // return the length of the string after the append operation + Status Append(const Slice& key, const Slice& value, int32_t* ret); + + // Count the number of set bits (population counting) in a string. + // return the number of bits set to 1 + // note: if need to specified offset, set have_range to true + Status BitCount(const Slice& key, int64_t start_offset, int64_t end_offset, int32_t* ret, bool have_range); + + // Perform a bitwise operation between multiple keys + // and store the result in the destination key + Status BitOp(BitOpType op, const std::string& dest_key, const std::vector& src_keys, std::string &value_to_dest, int64_t* ret); + + // Return the position of the first bit set to 1 or 0 in a string + // BitPos key 0 + Status BitPos(const Slice& key, int32_t bit, int64_t* ret); + // BitPos key 0 [start] + Status BitPos(const Slice& key, int32_t bit, int64_t start_offset, int64_t* ret); + // BitPos key 0 [start] [end] + Status BitPos(const Slice& key, int32_t bit, int64_t start_offset, int64_t end_offset, int64_t* ret); + + // Decrements the number stored at key by decrement + // return the value of key after the decrement + Status Decrby(const Slice& key, int64_t value, int64_t* ret); + + // Increments the number stored at key by increment. + // If the key does not exist, it is set to 0 before performing the operation + Status Incrby(const Slice& key, int64_t value, int64_t* ret); + + // Increment the string representing a floating point number + // stored at key by the specified increment. + Status Incrbyfloat(const Slice& key, const Slice& value, std::string* ret); + + // Set key to hold the string value and set key to timeout after a given + // number of seconds + Status Setex(const Slice& key, const Slice& value, int32_t ttl); + + // Returns the length of the string value stored at key. An error + // is returned when key holds a non-string value. + Status Strlen(const Slice& key, int32_t* len); + + // PKSETEXAT has the same effect and semantic as SETEX, but instead of + // specifying the number of seconds representing the TTL (time to live), it + // takes an absolute Unix timestamp (seconds since January 1, 1970). A + // timestamp in the past will delete the key immediately. + Status PKSetexAt(const Slice& key, const Slice& value, int32_t timestamp); + + // Hashes Commands + + // Sets field in the hash stored at key to value. If key does not exist, a new + // key holding a hash is created. If field already exists in the hash, it is + // overwritten. + Status HSet(const Slice& key, const Slice& field, const Slice& value, int32_t* res); + + // Returns the value associated with field in the hash stored at key. + // the value associated with field, or nil when field is not present in the + // hash or key does not exist. + Status HGet(const Slice& key, const Slice& field, std::string* value); + + // Sets the specified fields to their respective values in the hash stored at + // key. This command overwrites any specified fields already existing in the + // hash. If key does not exist, a new key holding a hash is created. + Status HMSet(const Slice& key, const std::vector& fvs); + + // Returns the values associated with the specified fields in the hash stored + // at key. + // For every field that does not exist in the hash, a nil value is returned. + // Because a non-existing keys are treated as empty hashes, running HMGET + // against a non-existing key will return a list of nil values. + Status HMGet(const Slice& key, const std::vector& fields, std::vector* vss); + + // Returns all fields and values of the hash stored at key. In the returned + // value, every field name is followed by its value, so the length of the + // reply is twice the size of the hash. + Status HGetall(const Slice& key, std::vector* fvs); + + // Returns all field names in the hash stored at key. + Status HKeys(const Slice& key, std::vector* fields); + + // Returns all values in the hash stored at key. + Status HVals(const Slice& key, std::vector* values); + + // Sets field in the hash stored at key to value, only if field does not yet + // exist. If key does not exist, a new key holding a hash is created. If field + // already exists, this operation has no effect. + Status HSetnx(const Slice& key, const Slice& field, const Slice& value, int32_t* ret); + + // Returns the number of fields contained in the hash stored at key. + // Return 0 when key does not exist. + Status HLen(const Slice& key, int32_t* ret); + + // Returns the string length of the value associated with field in the hash + // stored at key. If the key or the field do not exist, 0 is returned. + Status HStrlen(const Slice& key, const Slice& field, int32_t* len); + + // Returns if field is an existing field in the hash stored at key. + // Return Status::Ok() if the hash contains field. + // Return Status::NotFound() if the hash does not contain field, + // or key does not exist. + Status HExists(const Slice& key, const Slice& field); + + // Increments the number stored at field in the hash stored at key by + // increment. If key does not exist, a new key holding a hash is created. If + // field does not exist the value is set to 0 before the operation is + // performed. + Status HIncrby(const Slice& key, const Slice& field, int64_t value, int64_t* ret); + + // Increment the specified field of a hash stored at key, and representing a + // floating point number, by the specified increment. If the increment value + // is negative, the result is to have the hash field value decremented instead + // of incremented. If the field does not exist, it is set to 0 before + // performing the operation. An error is returned if one of the following + // conditions occur: + // + // The field contains a value of the wrong type (not a string). + // The current field content or the specified increment are not parsable as a + // double precision floating point number. + Status HIncrbyfloat(const Slice& key, const Slice& field, const Slice& by, std::string* new_value); + + // Removes the specified fields from the hash stored at key. Specified fields + // that do not exist within this hash are ignored. If key does not exist, it + // is treated as an empty hash and this command returns 0. + Status HDel(const Slice& key, const std::vector& fields, int32_t* ret); + + // See SCAN for HSCAN documentation. + Status HScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* field_values, int64_t* next_cursor); + + // Iterate over a Hash table of fields + // return next_field that the user need to use as the start_field argument + // in the next call + Status HScanx(const Slice& key, const std::string& start_field, const std::string& pattern, int64_t count, + std::vector* field_values, std::string* next_field); + + // Iterate over a Hash table of fields by specified range + // return next_field that the user need to use as the start_field argument + // in the next call + Status PKHScanRange(const Slice& key, const Slice& field_start, const std::string& field_end, const Slice& pattern, + int32_t limit, std::vector* field_values, std::string* next_field); + + // part from the reversed ordering, PKHRSCANRANGE is similar to PKHScanRange + Status PKHRScanRange(const Slice& key, const Slice& field_start, const std::string& field_end, const Slice& pattern, + int32_t limit, std::vector* field_values, std::string* next_field); + + // Sets Commands + + // Add the specified members to the set stored at key. Specified members that + // are already a member of this set are ignored. If key does not exist, a new + // set is created before adding the specified members. + Status SAdd(const Slice& key, const std::vector& members, int32_t* ret); + + // Returns the set cardinality (number of elements) of the set stored at key. + Status SCard(const Slice& key, int32_t* ret); + + // Returns the members of the set resulting from the difference between the + // first set and all the successive sets. + // + // For example: + // key1 = {a, b, c, d} + // key2 = {c} + // key3 = {a, c, e} + // SDIFF key1 key2 key3 = {b, d} + Status SDiff(const std::vector& keys, std::vector* members); + + // This command is equal to SDIFF, but instead of returning the resulting set, + // it is stored in destination. + // If destination already exists, it is overwritten. + // + // For example: + // destination = {}; + // key1 = {a, b, c, d} + // key2 = {c} + // key3 = {a, c, e} + // SDIFFSTORE destination key1 key2 key3 + // destination = {b, d} + Status SDiffstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret); + + // Returns the members of the set resulting from the intersection of all the + // given sets. + // + // For example: + // key1 = {a, b, c, d} + // key2 = {c} + // key3 = {a, c, e} + // SINTER key1 key2 key3 = {c} + Status SInter(const std::vector& keys, std::vector* members); + + // This command is equal to SINTER, but instead of returning the resulting + // set, it is stored in destination. + // If destination already exists, it is overwritten. + // + // For example: + // destination = {} + // key1 = {a, b, c, d} + // key2 = {a, c} + // key3 = {a, c, e} + // SINTERSTORE destination key1 key2 key3 + // destination = {a, c} + Status SInterstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret); + + // Returns if member is a member of the set stored at key. + Status SIsmember(const Slice& key, const Slice& member, int32_t* ret); + + // Returns all the members of the set value stored at key. + // This has the same effect as running SINTER with one argument key. + Status SMembers(const Slice& key, std::vector* members); + + // Remove the specified members from the set stored at key. Specified members + // that are not a member of this set are ignored. If key does not exist, it is + // treated as an empty set and this command returns 0. + Status SRem(const Slice& key, const std::vector& members, int32_t* ret); + + // Removes and returns several random elements specified by count from the set value store at key. + Status SPop(const Slice& key, std::vector* members, int64_t count); + + // When called with just the key argument, return a random element from the + // set value stored at key. + // when called with the additional count argument, return an array of count + // distinct elements if count is positive. If called with a negative count the + // behavior changes and the command is allowed to return the same element + // multiple times. In this case the number of returned elements is the + // absolute value of the specified count + Status SRandmember(const Slice& key, int32_t count, std::vector* members); + + // Move member from the set at source to the set at destination. This + // operation is atomic. In every given moment the element will appear to be a + // member of source or destination for other clients. + // + // If the source set does not exist or does not contain the specified element, + // no operation is performed and 0 is returned. Otherwise, the element is + // removed from the source set and added to the destination set. When the + // specified element already exists in the destination set, it is only removed + // from the source set. + Status SMove(const Slice& source, const Slice& destination, const Slice& member, int32_t* ret); + + // Returns the members of the set resulting from the union of all the given + // sets. + // + // For example: + // key1 = {a, b, c, d} + // key2 = {c} + // key3 = {a, c, e} + // SUNION key1 key2 key3 = {a, b, c, d, e} + Status SUnion(const std::vector& keys, std::vector* members); + + // This command is equal to SUNION, but instead of returning the resulting + // set, it is stored in destination. + // If destination already exists, it is overwritten. + // + // For example: + // key1 = {a, b} + // key2 = {c, d} + // key3 = {c, d, e} + // SUNIONSTORE destination key1 key2 key3 + // destination = {a, b, c, d, e} + Status SUnionstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret); + + // See SCAN for SSCAN documentation. + Status SScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* members, int64_t* next_cursor); + + // Lists Commands + + // Insert all the specified values at the head of the list stored at key. If + // key does not exist, it is created as empty list before performing the push + // operations. + Status LPush(const Slice& key, const std::vector& values, uint64_t* ret); + + // Insert all the specified values at the tail of the list stored at key. If + // key does not exist, it is created as empty list before performing the push + // operation. + Status RPush(const Slice& key, const std::vector& values, uint64_t* ret); + + // Returns the specified elements of the list stored at key. The offsets start + // and stop are zero-based indexes, with 0 being the first element of the list + // (the head of the list), 1 being the next element and so on. + Status LRange(const Slice& key, int64_t start, int64_t stop, std::vector* ret); + + // Removes the first count occurrences of elements equal to value from the + // list stored at key. The count argument influences the operation in the + // following ways + Status LTrim(const Slice& key, int64_t start, int64_t stop); + + // Returns the length of the list stored at key. If key does not exist, it is + // interpreted as an empty list and 0 is returned. An error is returned when + // the value stored at key is not a list. + Status LLen(const Slice& key, uint64_t* len); + + // Removes and returns the first elements of the list stored at key. + Status LPop(const Slice& key, int64_t count, std::vector* elements); + + // Removes and returns the last elements of the list stored at key. + Status RPop(const Slice& key, int64_t count, std::vector* elements); + + // Returns the element at index index in the list stored at key. The index is + // zero-based, so 0 means the first element, 1 the second element and so on. + // Negative indices can be used to designate elements starting at the tail of + // the list. Here, -1 means the last element, -2 means the penultimate and so + // forth. + Status LIndex(const Slice& key, int64_t index, std::string* element); + + // Inserts value in the list stored at key either before or after the + // reference value pivot. + // When key does not exist, it is considered an empty list and no operation is + // performed. + // An error is returned when key exists but does not hold a list value. + Status LInsert(const Slice& key, const BeforeOrAfter& before_or_after, const std::string& pivot, + const std::string& value, int64_t* ret); + + // Inserts value at the head of the list stored at key, only if key already + // exists and holds a list. In contrary to LPUSH, no operation will be + // performed when key does not yet exist. + Status LPushx(const Slice& key, const std::vector& values, uint64_t* len); + + // Inserts value at the tail of the list stored at key, only if key already + // exists and holds a list. In contrary to RPUSH, no operation will be + // performed when key does not yet exist. + Status RPushx(const Slice& key, const std::vector& values, uint64_t* len); + + // Removes the first count occurrences of elements equal to value from the + // list stored at key. The count argument influences the operation in the + // following ways: + // + // count > 0: Remove elements equal to value moving from head to tail. + // count < 0: Remove elements equal to value moving from tail to head. + // count = 0: Remove all elements equal to value. + // For example, LREM list -2 "hello" will remove the last two occurrences of + // "hello" in the list stored at list. + // + // Note that non-existing keys are treated like empty lists, so when key does + // not exist, the command will always return 0. + Status LRem(const Slice& key, int64_t count, const Slice& value, uint64_t* ret); + + // Sets the list element at index to value. For more information on the index + // argument, see LINDEX. + // + // An error is returned for out of range indexes. + Status LSet(const Slice& key, int64_t index, const Slice& value); + + // Atomically returns and removes the last element (tail) of the list stored + // at source, and pushes the element at the first element (head) of the list + // stored at destination. + // + // For example: consider source holding the list a,b,c, and destination + // holding the list x,y,z. Executing RPOPLPUSH results in source holding a,b + // and destination holding c,x,y,z. + // + // If source does not exist, the value nil is returned and no operation is + // performed. If source and destination are the same, the operation is + // equivalent to removing the last element from the list and pushing it as + // first element of the list, so it can be considered as a list rotation + // command. + Status RPoplpush(const Slice& source, const Slice& destination, std::string* element); + + // Zsets Commands + + // Pop the maximum count score_members which have greater score in the sorted set. + // And return the result in the score_members,If the total number of the sorted + // set less than count, it will pop out the total number of sorted set. If two + // ScoreMember's score were the same, the lexicographic predominant elements will + // be pop out. + Status ZPopMax(const Slice& key, int64_t count, std::vector* score_members); + + // Pop the minimum count score_members which have less score in the sorted set. + // And return the result in the score_members,If the total number of the sorted + // set less than count, it will pop out the total number of sorted set. If two + // ScoreMember's score were the same, the lexicographic predominant elements will + // not be pop out. + Status ZPopMin(const Slice& key, int64_t count, std::vector* score_members); + + // Adds all the specified members with the specified scores to the sorted set + // stored at key. It is possible to specify multiple score / member pairs. If + // a specified member is already a member of the sorted set, the score is + // updated and the element reinserted at the right position to ensure the + // correct ordering. + // + // If key does not exist, a new sorted set with the specified members as sole + // members is created, like if the sorted set was empty. If the key exists but + // does not hold a sorted set, an error is returned. + // The score values should be the string representation of a double precision + // floating point number. +inf and -inf values are valid values as well. + Status ZAdd(const Slice& key, const std::vector& score_members, int32_t* ret); + + // Returns the sorted set cardinality (number of elements) of the sorted set + // stored at key. + Status ZCard(const Slice& key, int32_t* ret); + + // Returns the number of elements in the sorted set at key with a score + // between min and max. + // + // The min and max arguments have the same semantic as described for + // ZRANGEBYSCORE. + // + // Note: the command has a complexity of just O(log(N)) because it uses + // elements ranks (see ZRANK) to get an idea of the range. Because of this + // there is no need to do a work proportional to the size of the range. + Status ZCount(const Slice& key, double min, double max, bool left_close, bool right_close, int32_t* ret); + + // Increments the score of member in the sorted set stored at key by + // increment. If member does not exist in the sorted set, it is added with + // increment as its score (as if its previous score was 0.0). If key does not + // exist, a new sorted set with the specified member as its sole member is + // created. + // + // An error is returned when key exists but does not hold a sorted set. + // + // The score value should be the string representation of a numeric value, and + // accepts double precision floating point numbers. It is possible to provide + // a negative value to decrement the score. + Status ZIncrby(const Slice& key, const Slice& member, double increment, double* ret); + + // Returns the specified range of elements in the sorted set stored at key. + // The elements are considered to be ordered from the lowest to the highest + // score. Lexicographical order is used for elements with equal score. + // + // See ZREVRANGE when you need the elements ordered from highest to lowest + // score (and descending lexicographical order for elements with equal score). + // + // Both start and stop are zero-based indexes, where 0 is the first element, 1 + // is the next element and so on. They can also be negative numbers indicating + // offsets from the end of the sorted set, with -1 being the last element of + // the sorted set, -2 the penultimate element and so on. + // + // start and stop are inclusive ranges, so for example ZRANGE myzset 0 1 will + // return both the first and the second element of the sorted set. + // + // Out of range indexes will not produce an error. If start is larger than the + // largest index in the sorted set, or start > stop, an empty list is + // returned. If stop is larger than the end of the sorted set Redis will treat + // it like it is the last element of the sorted set. + // + // It is possible to pass the WITHSCORES option in order to return the scores + // of the elements together with the elements. The returned list will contain + // value1,score1,...,valueN,scoreN instead of value1,...,valueN. Client + // libraries are free to return a more appropriate data type (suggestion: an + // array with (value, score) arrays/tuples). + Status ZRange(const Slice& key, int32_t start, int32_t stop, std::vector* score_members); + + // Returns all the elements in the sorted set at key with a score between min + // and max (including elements with score equal to min or max). The elements + // are considered to be ordered from low to high scores. + // + // The elements having the same score are returned in lexicographical order + // (this follows from a property of the sorted set implementation in Redis and + // does not involve further computation). + // + // The optional LIMIT argument can be used to only get a range of the matching + // elements (similar to SELECT LIMIT offset, count in SQL). Keep in mind that + // if offset is large, the sorted set needs to be traversed for offset + // elements before getting to the elements to return, which can add up to O(N) + // time complexity. + // + // The optional WITHSCORES argument makes the command return both the element + // and its score, instead of the element alone. This option is available since + // Redis 2.0. + // + // Exclusive intervals and infinity + // min and max can be -inf and +inf, so that you are not required to know the + // highest or lowest score in the sorted set to get all elements from or up to + // a certain score. + // + // By default, the interval specified by min and max is closed (inclusive). It + // is possible to specify an open interval (exclusive) by prefixing the score + // with the character (. For example: + // + // ZRANGEBYSCORE zset (1 5 + // Will return all elements with 1 < score <= 5 while: + // + // ZRANGEBYSCORE zset (5 (10 + // Will return all the elements with 5 < score < 10 (5 and 10 excluded). + // + // Return value + // Array reply: list of elements in the specified score range (optionally with + // their scores). + Status ZRangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, + std::vector* score_members); + + // Returns all the elements in the sorted set at key with a score between min + // and max (including elements with score equal to min or max). The elements + // are considered to be ordered from low to high scores. + // + // The elements having the same score are returned in lexicographical order + // (this follows from a property of the sorted set implementation in Redis and + // does not involve further computation). + // + // The optional LIMIT argument can be used to only get a range of the matching + // elements (similar to SELECT LIMIT offset, count in SQL). Keep in mind that + // if offset is large, the sorted set needs to be traversed for offset + // elements before getting to the elements to return, which can add up to O(N) + // time complexity. + // + // The optional WITHSCORES argument makes the command return both the element + // and its score, instead of the element alone. This option is available since + // Redis 2.0. + // + // Exclusive intervals and infinity + // min and max can be -inf and +inf, so that you are not required to know the + // highest or lowest score in the sorted set to get all elements from or up to + // a certain score. + // + // By default, the interval specified by min and max is closed (inclusive). It + // is possible to specify an open interval (exclusive) by prefixing the score + // with the character (. For example: + // + // ZRANGEBYSCORE zset (1 5 + // Will return all elements with 1 < score <= 5 while: + // + // ZRANGEBYSCORE zset (5 (10 + // Will return all the elements with 5 < score < 10 (5 and 10 excluded). + // + // Return value + // Array reply: list of elements in the specified score range (optionally with + // their scores). + Status ZRangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, int64_t count, + int64_t offset, std::vector* score_members); + + // Returns the rank of member in the sorted set stored at key, with the scores + // ordered from low to high. The rank (or index) is 0-based, which means that + // the member with the lowest score has rank 0. + // + // Use ZREVRANK to get the rank of an element with the scores ordered from + // high to low. + Status ZRank(const Slice& key, const Slice& member, int32_t* rank); + + // Removes the specified members from the sorted set stored at key. Non + // existing members are ignored. + // + // An error is returned when key exists and does not hold a sorted set. + Status ZRem(const Slice& key, const std::vector& members, int32_t* ret); + + // Removes all elements in the sorted set stored at key with rank between + // start and stop. Both start and stop are 0 -based indexes with 0 being the + // element with the lowest score. These indexes can be negative numbers, where + // they indicate offsets starting at the element with the highest score. For + // example: -1 is the element with the highest score, -2 the element with the + // second highest score and so forth. + Status ZRemrangebyrank(const Slice& key, int32_t start, int32_t stop, int32_t* ret); + + // Removes all elements in the sorted set stored at key with a score between + // min and max (inclusive). + Status ZRemrangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, int32_t* ret); + + // Returns the specified range of elements in the sorted set stored at key. + // The elements are considered to be ordered from the highest to the lowest + // score. Descending lexicographical order is used for elements with equal + // score. + // + // Apart from the reversed ordering, ZREVRANGE is similar to ZRANGE. + Status ZRevrange(const Slice& key, int32_t start, int32_t stop, std::vector* score_members); + + // Returns all the elements in the sorted set at key with a score between max + // and min (including elements with score equal to max or min). In contrary to + // the default ordering of sorted sets, for this command the elements are + // considered to be ordered from high to low scores. + // + // The elements having the same score are returned in reverse lexicographical + // order. + // + // Apart from the reversed ordering, ZREVRANGEBYSCORE is similar to + // ZRANGEBYSCORE. + Status ZRevrangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, + std::vector* score_members); + + // Returns all the elements in the sorted set at key with a score between max + // and min (including elements with score equal to max or min). In contrary to + // the default ordering of sorted sets, for this command the elements are + // considered to be ordered from high to low scores. + // + // The elements having the same score are returned in reverse lexicographical + // order. + // + // Apart from the reversed ordering, ZREVRANGEBYSCORE is similar to + // ZRANGEBYSCORE. + Status ZRevrangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, int64_t count, + int64_t offset, std::vector* score_members); + + // Returns the rank of member in the sorted set stored at key, with the scores + // ordered from high to low. The rank (or index) is 0-based, which means that + // the member with the highest score has rank 0. + Status ZRevrank(const Slice& key, const Slice& member, int32_t* rank); + + // Returns the score of member in the sorted set at key. + // + // If member does not exist in the sorted set, or key does not exist, nil is + // returned. + Status ZScore(const Slice& key, const Slice& member, double* ret); + + // Computes the union of numkeys sorted sets given by the specified keys, and + // stores the result in destination. It is mandatory to provide the number of + // input keys (numkeys) before passing the input keys and the other (optional) + // arguments. + // + // By default, the resulting score of an element is the sum of its scores in + // the sorted sets where it exists. + // + // Using the WEIGHTS option, it is possible to specify a multiplication factor + // for each input sorted set. This means that the score of every element in + // every input sorted set is multiplied by this factor before being passed to + // the aggregation function. When WEIGHTS is not given, the multiplication + // factors default to 1. + // + // With the AGGREGATE option, it is possible to specify how the results of the + // union are aggregated. This option defaults to SUM, where the score of an + // element is summed across the inputs where it exists. When this option is + // set to either MIN or MAX, the resulting set will contain the minimum or + // maximum score of an element across the inputs where it exists. + // + // If destination already exists, it is overwritten. + Status ZUnionstore(const Slice& destination, const std::vector& keys, const std::vector& weights, + AGGREGATE agg, std::map& value_to_dest, int32_t* ret); + + // Computes the intersection of numkeys sorted sets given by the specified + // keys, and stores the result in destination. It is mandatory to provide the + // number of input keys (numkeys) before passing the input keys and the other + // (optional) arguments. + // + // By default, the resulting score of an element is the sum of its scores in + // the sorted sets where it exists. Because intersection requires an element + // to be a member of every given sorted set, this results in the score of + // every element in the resulting sorted set to be equal to the number of + // input sorted sets. + // + // For a description of the WEIGHTS and AGGREGATE options, see ZUNIONSTORE. + // + // If destination already exists, it is overwritten. + Status ZInterstore(const Slice& destination, const std::vector& keys, const std::vector& weights, + AGGREGATE agg, std::vector& value_to_dest, int32_t* ret); + + // When all the elements in a sorted set are inserted with the same score, in + // order to force lexicographical ordering, this command returns all the + // elements in the sorted set at key with a value between min and max. + // + // If the elements in the sorted set have different scores, the returned + // elements are unspecified. + // + // The elements are considered to be ordered from lower to higher strings as + // compared byte-by-byte using the memcmp() C function. Longer strings are + // considered greater than shorter strings if the common part is identical. + // + // The optional LIMIT argument can be used to only get a range of the matching + // elements (similar to SELECT LIMIT offset, count in SQL). Keep in mind that + // if offset is large, the sorted set needs to be traversed for offset + // elements before getting to the elements to return, which can add up to O(N) + // time complexity. + Status ZRangebylex(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + std::vector* members); + + // When all the elements in a sorted set are inserted with the same score, in + // order to force lexicographical ordering, this command returns the number of + // elements in the sorted set at key with a value between min and max. + // + // The min and max arguments have the same meaning as described for + // ZRANGEBYLEX. + // + // Note: the command has a complexity of just O(log(N)) because it uses + // elements ranks (see ZRANK) to get an idea of the range. Because of this + // there is no need to do a work proportional to the size of the range. + Status ZLexcount(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + int32_t* ret); + + // When all the elements in a sorted set are inserted with the same score, in + // order to force lexicographical ordering, this command removes all elements + // in the sorted set stored at key between the lexicographical range specified + // by min and max. + // + // The meaning of min and max are the same of the ZRANGEBYLEX command. + // Similarly, this command actually returns the same elements that ZRANGEBYLEX + // would return if called with the same min and max arguments. + Status ZRemrangebylex(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + int32_t* ret); + + // See SCAN for ZSCAN documentation. + Status ZScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* score_members, int64_t* next_cursor); + + // Keys Commands + + // Note: + // While any error happens, you need to check type_status for + // the error message + + // Set a timeout on key + // return -1 operation exception errors happen in database + // return >=0 success + int32_t Expire(const Slice& key, int32_t ttl, std::map* type_status); + + // Removes the specified keys + // return -1 operation exception errors happen in database + // return >=0 the number of keys that were removed + int64_t Del(const std::vector& keys, std::map* type_status); + + // Removes the specified keys of the specified type + // return -1 operation exception errors happen in database + // return >= 0 the number of keys that were removed + int64_t DelByType(const std::vector& keys, const DataType& type); + + // Iterate over a collection of elements + // return an updated cursor that the user need to use as the cursor argument + // in the next call + int64_t Scan(const DataType& dtype, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* keys); + + // Iterate over a collection of elements, obtaining the item which timeout + // conforms to the inequality (min_ttl < item_ttl < max_ttl) + // return an updated cursor that the user need to use as the cursor argument + // in the next call + int64_t PKExpireScan(const DataType& dtype, int64_t cursor, int32_t min_ttl, int32_t max_ttl, int64_t count, + std::vector* keys); + + // Iterate over a collection of elements by specified range + // return a next_key that the user need to use as the key_start argument + // in the next call + Status PKScanRange(const DataType& data_type, const Slice& key_start, const Slice& key_end, const Slice& pattern, + int32_t limit, std::vector* keys, std::vector* kvs, std::string* next_key); + + // part from the reversed ordering, PKRSCANRANGE is similar to PKScanRange + Status PKRScanRange(const DataType& data_type, const Slice& key_start, const Slice& key_end, const Slice& pattern, + int32_t limit, std::vector* keys, std::vector* kvs, std::string* next_key); + + // Traverses the database of the specified type, removing the Key that matches + // the pattern + Status PKPatternMatchDel(const DataType& data_type, const std::string& pattern, int32_t* ret); + + // Iterate over a collection of elements + // return next_key that the user need to use as the start_key argument + // in the next call + Status Scanx(const DataType& data_type, const std::string& start_key, const std::string& pattern, int64_t count, + std::vector* keys, std::string* next_key); + + // Returns if key exists. + // return -1 operation exception errors happen in database + // return >=0 the number of keys existing + int64_t Exists(const std::vector& keys, std::map* type_status); + + // EXPIREAT has the same effect and semantic as EXPIRE, but instead of + // specifying the number of seconds representing the TTL (time to live), it + // takes an absolute Unix timestamp (seconds since January 1, 1970). A + // timestamp in the past will delete the key immediately. + // return -1 operation exception errors happen in database + // return 0 if key does not exist + // return >=1 if the timueout was set + int32_t Expireat(const Slice& key, int32_t timestamp, std::map* type_status); + + // Remove the existing timeout on key, turning the key from volatile (a key + // with an expire set) to persistent (a key that will never expire as no + // timeout is associated). + // return -1 operation exception errors happen in database + // return 0 if key does not exist or does not have an associated timeout + // return >=1 if the timueout was set + int32_t Persist(const Slice& key, std::map* type_status); + + // Returns the remaining time to live of a key that has a timeout. + // return -3 operation exception errors happen in database + // return -2 if the key does not exist + // return -1 if the key exists but has not associated expire + // return > 0 TTL in seconds + std::map TTL(const Slice& key, std::map* type_status); + + // Reutrns the data all type of the key + // if single is true, the query will return the first one + Status GetType(const std::string& key, bool single, std::vector& types); + + // Reutrns the data all type of the key + Status Type(const std::string& key, std::vector& types); + + Status Keys(const DataType& data_type, const std::string& pattern, std::vector* keys); + + // Dynamic switch WAL + void DisableWal(const bool is_wal_disable); + + // Iterate through all the data in the database. + void ScanDatabase(const DataType& type); + + // HyperLogLog + enum { + kMaxKeys = 255, + kPrecision = 17, + }; + // Adds all the element arguments to the HyperLogLog data structure stored + // at the variable name specified as first argument. + Status PfAdd(const Slice& key, const std::vector& values, bool* update); + + // When called with a single key, returns the approximated cardinality + // computed by the HyperLogLog data structure stored at the specified + // variable, which is 0 if the variable does not exist. + Status PfCount(const std::vector& keys, int64_t* result); + + // Merge multiple HyperLogLog values into an unique value that will + // approximate the cardinality of the union of the observed Sets of the source + // HyperLogLog structures. + Status PfMerge(const std::vector& keys, std::string& value_to_dest); + + // Admin Commands + Status StartBGThread(); + Status RunBGTask(); + Status AddBGTask(const BGTask& bg_task); + + Status Compact(const DataType& type, bool sync = false); + Status DoCompact(const DataType& type); + Status CompactKey(const DataType& type, const std::string& key); + + Status SetMaxCacheStatisticKeys(uint32_t max_cache_statistic_keys); + Status SetSmallCompactionThreshold(uint32_t small_compaction_threshold); + + std::string GetCurrentTaskType(); + Status GetUsage(const std::string& property, uint64_t* result); + Status GetUsage(const std::string& property, std::map* type_result); + uint64_t GetProperty(const std::string& db_type, const std::string& property); + + Status GetKeyNum(std::vector* key_infos); + Status StopScanKeyNum(); + + rocksdb::DB* GetDBByType(const std::string& type); + + Status SetOptions(const OptionType& option_type, const std::string& db_type, + const std::unordered_map& options); + void GetRocksDBInfo(std::string& info); + + private: + std::unique_ptr strings_db_; + std::unique_ptr hashes_db_; + std::unique_ptr sets_db_; + std::unique_ptr zsets_db_; + std::unique_ptr lists_db_; + std::atomic is_opened_ = false; + + std::unique_ptr> cursors_store_; + + // Storage start the background thread for compaction task + pthread_t bg_tasks_thread_id_ = 0; + pstd::Mutex bg_tasks_mutex_; + pstd::CondVar bg_tasks_cond_var_; + std::queue bg_tasks_queue_; + + std::atomic current_task_type_ = kNone; + std::atomic bg_tasks_should_exit_ = false; + + // For scan keys in data base + std::atomic scan_keynum_exit_ = false; +}; + +} // namespace storage diff --git a/src/storage/include/storage/util.h b/src/storage/include/storage/util.h new file mode 100644 index 000000000..039888158 --- /dev/null +++ b/src/storage/include/storage/util.h @@ -0,0 +1,32 @@ +// Copyright (c) 2017-present The storage Authors. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace storage { + +int Int64ToStr(char* dst, size_t dstlen, int64_t svalue); +int StrToInt64(const char* s, size_t slen, int64_t* value); +int StringMatch(const char* pattern, uint64_t pattern_len, const char* string, uint64_t string_len, int nocase); +int StrToLongDouble(const char* s, size_t slen, long double* ldval); +int LongDoubleToStr(long double ldval, std::string* value); +int do_mkdir(const char* path, mode_t mode); +int mkpath(const char* path, mode_t mode); +int delete_dir(const char* dirname); +int is_dir(const char* filename); +int CalculateMetaStartAndEndKey(const std::string& key, std::string* meta_start_key, std::string* meta_end_key); +int CalculateDataStartAndEndKey(const std::string& key, std::string* data_start_key, std::string* data_end_key); +bool isTailWildcard(const std::string& pattern); +void GetFilepath(const char* path, const char* filename, char* filepath); +bool DeleteFiles(const char* path); +} // namespace storage + diff --git a/src/storage/include/storage/version.h b/src/storage/include/storage/version.h new file mode 100644 index 000000000..59aab5799 --- /dev/null +++ b/src/storage/include/storage/version.h @@ -0,0 +1,10 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#define STORAGE_MAJOR 1 +#define STORAGE_MINOR 0 +#define STORAGE_PATCH 0 diff --git a/src/storage/src/base_data_key_format.h b/src/storage/src/base_data_key_format.h new file mode 100644 index 000000000..696aec65a --- /dev/null +++ b/src/storage/src/base_data_key_format.h @@ -0,0 +1,119 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include "pstd/pstd_coding.h" + +namespace storage { +class BaseDataKey { + public: + BaseDataKey(const Slice& key, int32_t version, const Slice& data) + : key_(key), version_(version), data_(data) {} + + ~BaseDataKey() { + if (start_ != space_) { + delete[] start_; + } + } + + Slice Encode() { + size_t usize = key_.size() + data_.size(); + size_t needed = usize + sizeof(int32_t) * 2; + char* dst; + if (needed <= sizeof(space_)) { + dst = space_; + } else { + dst = new char[needed]; + + // Need to allocate space, delete previous space + if (start_ != space_) { + delete[] start_; + } + } + + start_ = dst; + pstd::EncodeFixed32(dst, key_.size()); + dst += sizeof(int32_t); + memcpy(dst, key_.data(), key_.size()); + dst += key_.size(); + pstd::EncodeFixed32(dst, version_); + dst += sizeof(int32_t); + memcpy(dst, data_.data(), data_.size()); + return Slice(start_, needed); + } + + private: + char space_[200]; + char* start_ = nullptr; + Slice key_; + int32_t version_ = -1; + Slice data_; +}; + +class ParsedBaseDataKey { + public: + explicit ParsedBaseDataKey(const std::string* key) { + const char* ptr = key->data(); + int32_t key_len = pstd::DecodeFixed32(ptr); + ptr += sizeof(int32_t); + key_ = Slice(ptr, key_len); + ptr += key_len; + version_ = pstd::DecodeFixed32(ptr); + ptr += sizeof(int32_t); + data_ = Slice(ptr, key->size() - key_len - sizeof(int32_t) * 2); + } + + explicit ParsedBaseDataKey(const Slice& key) { + const char* ptr = key.data(); + int32_t key_len = pstd::DecodeFixed32(ptr); + ptr += sizeof(int32_t); + key_ = Slice(ptr, key_len); + ptr += key_len; + version_ = pstd::DecodeFixed32(ptr); + ptr += sizeof(int32_t); + data_ = Slice(ptr, key.size() - key_len - sizeof(int32_t) * 2); + } + + virtual ~ParsedBaseDataKey() = default; + + Slice key() { return key_; } + + int32_t version() { return version_; } + + Slice data() { return data_; } + + protected: + Slice key_; + int32_t version_ = -1; + Slice data_; +}; + +class ParsedHashesDataKey : public ParsedBaseDataKey { + public: + explicit ParsedHashesDataKey(const std::string* key) : ParsedBaseDataKey(key) {} + explicit ParsedHashesDataKey(const Slice& key) : ParsedBaseDataKey(key) {} + Slice field() { return data_; } +}; + +class ParsedSetsMemberKey : public ParsedBaseDataKey { + public: + explicit ParsedSetsMemberKey(const std::string* key) : ParsedBaseDataKey(key) {} + explicit ParsedSetsMemberKey(const Slice& key) : ParsedBaseDataKey(key) {} + Slice member() { return data_; } +}; + +class ParsedZSetsMemberKey : public ParsedBaseDataKey { + public: + explicit ParsedZSetsMemberKey(const std::string* key) : ParsedBaseDataKey(key) {} + explicit ParsedZSetsMemberKey(const Slice& key) : ParsedBaseDataKey(key) {} + Slice member() { return data_; } +}; + +using HashesDataKey = BaseDataKey; +using SetsMemberKey = BaseDataKey; +using ZSetsMemberKey = BaseDataKey; + +} // namespace storage diff --git a/src/storage/src/base_filter.h b/src/storage/src/base_filter.h new file mode 100644 index 000000000..25180aef0 --- /dev/null +++ b/src/storage/src/base_filter.h @@ -0,0 +1,158 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "rocksdb/compaction_filter.h" +#include "src/base_data_key_format.h" +#include "src/base_meta_value_format.h" +#include "src/debug.h" + +namespace storage { + +class BaseMetaFilter : public rocksdb::CompactionFilter { + public: + BaseMetaFilter() = default; + bool Filter(int level, const rocksdb::Slice& key, const rocksdb::Slice& value, std::string* new_value, + bool* value_changed) const override { + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + auto cur_time = static_cast(unix_time); + ParsedBaseMetaValue parsed_base_meta_value(value); + TRACE("==========================START=========================="); + TRACE("[MetaFilter], key: %s, count = %d, timestamp: %d, cur_time: %d, version: %d", key.ToString().c_str(), + parsed_base_meta_value.count(), parsed_base_meta_value.timestamp(), cur_time, + parsed_base_meta_value.version()); + + if (parsed_base_meta_value.timestamp() != 0 && parsed_base_meta_value.timestamp() < cur_time && + parsed_base_meta_value.version() < cur_time) { + TRACE("Drop[Stale & version < cur_time]"); + return true; + } + if (parsed_base_meta_value.count() == 0 && parsed_base_meta_value.version() < cur_time) { + TRACE("Drop[Empty & version < cur_time]"); + return true; + } + TRACE("Reserve"); + return false; + } + + const char* Name() const override { return "BaseMetaFilter"; } +}; + +class BaseMetaFilterFactory : public rocksdb::CompactionFilterFactory { + public: + BaseMetaFilterFactory() = default; + std::unique_ptr CreateCompactionFilter( + const rocksdb::CompactionFilter::Context& context) override { + return std::unique_ptr(new BaseMetaFilter()); + } + const char* Name() const override { return "BaseMetaFilterFactory"; } +}; + +class BaseDataFilter : public rocksdb::CompactionFilter { + public: + BaseDataFilter(rocksdb::DB* db, std::vector* cf_handles_ptr) + : db_(db), + cf_handles_ptr_(cf_handles_ptr) + {} + + bool Filter(int level, const Slice& key, const rocksdb::Slice& value, std::string* new_value, + bool* value_changed) const override { + ParsedBaseDataKey parsed_base_data_key(key); + TRACE("==========================START=========================="); + TRACE("[DataFilter], key: %s, data = %s, version = %d", parsed_base_data_key.key().ToString().c_str(), + parsed_base_data_key.data().ToString().c_str(), parsed_base_data_key.version()); + + if (parsed_base_data_key.key().ToString() != cur_key_) { + cur_key_ = parsed_base_data_key.key().ToString(); + std::string meta_value; + // destroyed when close the database, Reserve Current key value + if (cf_handles_ptr_->empty()) { + return false; + } + Status s = db_->Get(default_read_options_, (*cf_handles_ptr_)[0], cur_key_, &meta_value); + if (s.ok()) { + meta_not_found_ = false; + ParsedBaseMetaValue parsed_base_meta_value(&meta_value); + cur_meta_version_ = parsed_base_meta_value.version(); + cur_meta_timestamp_ = parsed_base_meta_value.timestamp(); + } else if (s.IsNotFound()) { + meta_not_found_ = true; + } else { + cur_key_ = ""; + TRACE("Reserve[Get meta_key faild]"); + return false; + } + } + + if (meta_not_found_) { + TRACE("Drop[Meta key not exist]"); + return true; + } + + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + if (cur_meta_timestamp_ != 0 && cur_meta_timestamp_ < static_cast(unix_time)) { + TRACE("Drop[Timeout]"); + return true; + } + + if (cur_meta_version_ > parsed_base_data_key.version()) { + TRACE("Drop[data_key_version < cur_meta_version]"); + return true; + } else { + TRACE("Reserve[data_key_version == cur_meta_version]"); + return false; + } + } + + const char* Name() const override { return "BaseDataFilter"; } + + private: + rocksdb::DB* db_ = nullptr; + std::vector* cf_handles_ptr_ = nullptr; + rocksdb::ReadOptions default_read_options_; + mutable std::string cur_key_; + mutable bool meta_not_found_ = false; + mutable int32_t cur_meta_version_ = 0; + mutable int32_t cur_meta_timestamp_ = 0; +}; + +class BaseDataFilterFactory : public rocksdb::CompactionFilterFactory { + public: + BaseDataFilterFactory(rocksdb::DB** db_ptr, std::vector* handles_ptr) + : db_ptr_(db_ptr), cf_handles_ptr_(handles_ptr) {} + std::unique_ptr CreateCompactionFilter( + const rocksdb::CompactionFilter::Context& context) override { + return std::unique_ptr(new BaseDataFilter(*db_ptr_, cf_handles_ptr_)); + } + const char* Name() const override { return "BaseDataFilterFactory"; } + + private: + rocksdb::DB** db_ptr_ = nullptr; + std::vector* cf_handles_ptr_ = nullptr; +}; + +using HashesMetaFilter = BaseMetaFilter; +using HashesMetaFilterFactory = BaseMetaFilterFactory; +using HashesDataFilter = BaseDataFilter; +using HashesDataFilterFactory = BaseDataFilterFactory; + +using SetsMetaFilter = BaseMetaFilter; +using SetsMetaFilterFactory = BaseMetaFilterFactory; +using SetsMemberFilter = BaseDataFilter; +using SetsMemberFilterFactory = BaseDataFilterFactory; + +using ZSetsMetaFilter = BaseMetaFilter; +using ZSetsMetaFilterFactory = BaseMetaFilterFactory; +using ZSetsDataFilter = BaseDataFilter; +using ZSetsDataFilterFactory = BaseDataFilterFactory; + +} // namespace storage diff --git a/src/storage/src/base_meta_value_format.h b/src/storage/src/base_meta_value_format.h new file mode 100644 index 000000000..1c49e7987 --- /dev/null +++ b/src/storage/src/base_meta_value_format.h @@ -0,0 +1,146 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include + +#include "src/base_value_format.h" + +namespace storage { + +class BaseMetaValue : public InternalValue { + public: + explicit BaseMetaValue(const Slice& user_value) : InternalValue(user_value) {} + size_t AppendTimestampAndVersion() override { + size_t usize = user_value_.size(); + char* dst = start_; + memcpy(dst, user_value_.data(), usize); + dst += usize; + EncodeFixed32(dst, version_); + dst += sizeof(int32_t); + EncodeFixed32(dst, timestamp_); + return usize + 2 * sizeof(int32_t); + } + + int32_t UpdateVersion() { + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + if (version_ >= static_cast(unix_time)) { + version_++; + } else { + version_ = static_cast(unix_time); + } + return version_; + } +}; + +class ParsedBaseMetaValue : public ParsedInternalValue { + public: + // Use this constructor after rocksdb::DB::Get(); + explicit ParsedBaseMetaValue(std::string* internal_value_str) : ParsedInternalValue(internal_value_str) { + if (internal_value_str->size() >= kBaseMetaValueSuffixLength) { + user_value_ = Slice(internal_value_str->data(), internal_value_str->size() - kBaseMetaValueSuffixLength); + version_ = DecodeFixed32(internal_value_str->data() + internal_value_str->size() - sizeof(int32_t) * 2); + timestamp_ = DecodeFixed32(internal_value_str->data() + internal_value_str->size() - sizeof(int32_t)); + } + count_ = DecodeFixed32(internal_value_str->data()); + } + + // Use this constructor in rocksdb::CompactionFilter::Filter(); + explicit ParsedBaseMetaValue(const Slice& internal_value_slice) : ParsedInternalValue(internal_value_slice) { + if (internal_value_slice.size() >= kBaseMetaValueSuffixLength) { + user_value_ = Slice(internal_value_slice.data(), internal_value_slice.size() - kBaseMetaValueSuffixLength); + version_ = DecodeFixed32(internal_value_slice.data() + internal_value_slice.size() - sizeof(int32_t) * 2); + timestamp_ = DecodeFixed32(internal_value_slice.data() + internal_value_slice.size() - sizeof(int32_t)); + } + count_ = DecodeFixed32(internal_value_slice.data()); + } + + void StripSuffix() override { + if (value_) { + value_->erase(value_->size() - kBaseMetaValueSuffixLength, kBaseMetaValueSuffixLength); + } + } + + void SetVersionToValue() override { + if (value_) { + char* dst = const_cast(value_->data()) + value_->size() - kBaseMetaValueSuffixLength; + EncodeFixed32(dst, version_); + } + } + + void SetTimestampToValue() override { + if (value_) { + char* dst = const_cast(value_->data()) + value_->size() - sizeof(int32_t); + EncodeFixed32(dst, timestamp_); + } + } + static const size_t kBaseMetaValueSuffixLength = 2 * sizeof(int32_t); + + int32_t InitialMetaValue() { + this->set_count(0); + this->set_timestamp(0); + return this->UpdateVersion(); + } + + int32_t count() { return count_; } + + bool check_set_count(size_t count) { + if (count > INT32_MAX) { + return false; + } + return true; + } + + void set_count(int32_t count) { + count_ = count; + if (value_) { + char* dst = const_cast(value_->data()); + EncodeFixed32(dst, count_); + } + } + + bool CheckModifyCount(int32_t delta) { + int64_t count = count_; + count += delta; + if (count < 0 || count > INT32_MAX) { + return false; + } + return true; + } + + void ModifyCount(int32_t delta) { + count_ += delta; + if (value_) { + char* dst = const_cast(value_->data()); + EncodeFixed32(dst, count_); + } + } + + int32_t UpdateVersion() { + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + if (version_ >= static_cast(unix_time)) { + version_++; + } else { + version_ = static_cast(unix_time); + } + SetVersionToValue(); + return version_; + } + + private: + int32_t count_ = 0; +}; + +using HashesMetaValue = BaseMetaValue; +using ParsedHashesMetaValue = ParsedBaseMetaValue; +using SetsMetaValue = BaseMetaValue; +using ParsedSetsMetaValue = ParsedBaseMetaValue; +using ZSetsMetaValue = BaseMetaValue; +using ParsedZSetsMetaValue = ParsedBaseMetaValue; + +} // namespace storage diff --git a/src/storage/src/base_value_format.h b/src/storage/src/base_value_format.h new file mode 100644 index 000000000..292732a71 --- /dev/null +++ b/src/storage/src/base_value_format.h @@ -0,0 +1,126 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include + +#include "rocksdb/env.h" +#include "rocksdb/slice.h" +#include "src/coding.h" +#include "src/redis.h" + +namespace storage { + +class InternalValue { + public: + explicit InternalValue(const rocksdb::Slice& user_value) + : user_value_(user_value) {} + virtual ~InternalValue() { + if (start_ != space_) { + delete[] start_; + } + } + void set_timestamp(int32_t timestamp = 0) { timestamp_ = timestamp; } + Status SetRelativeTimestamp(int32_t ttl) { + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + timestamp_ = static_cast(unix_time) + ttl; + if (timestamp_ != unix_time + static_cast(ttl)) { + return Status::InvalidArgument("invalid expire time"); + } + return Status::OK(); + } + void set_version(int32_t version = 0) { version_ = version; } + static const size_t kDefaultValueSuffixLength = sizeof(int32_t) * 2; + virtual rocksdb::Slice Encode() { + size_t usize = user_value_.size(); + size_t needed = usize + kDefaultValueSuffixLength; + char* dst; + if (needed <= sizeof(space_)) { + dst = space_; + } else { + dst = new char[needed]; + + // Need to allocate space, delete previous space + if (start_ != space_) { + delete[] start_; + } + } + start_ = dst; + size_t len = AppendTimestampAndVersion(); + return rocksdb::Slice(start_, len); + } + virtual size_t AppendTimestampAndVersion() = 0; + + protected: + char space_[200]; + char* start_ = nullptr; + rocksdb::Slice user_value_; + int32_t version_ = 0; + int32_t timestamp_ = 0; +}; + +class ParsedInternalValue { + public: + // Use this constructor after rocksdb::DB::Get(), since we use this in + // the implement of user interfaces and may need to modify the + // original value suffix, so the value_ must point to the string + explicit ParsedInternalValue(std::string* value) : value_(value) {} + + // Use this constructor in rocksdb::CompactionFilter::Filter(), + // since we use this in Compaction process, all we need to do is parsing + // the rocksdb::Slice, so don't need to modify the original value, value_ can be + // set to nullptr + explicit ParsedInternalValue(const rocksdb::Slice& value) {} + + virtual ~ParsedInternalValue() = default; + + rocksdb::Slice user_value() { return user_value_; } + + int32_t version() { return version_; } + + void set_version(int32_t version) { + version_ = version; + SetVersionToValue(); + } + + int32_t timestamp() { return timestamp_; } + + void set_timestamp(int32_t timestamp) { + timestamp_ = timestamp; + SetTimestampToValue(); + } + + void SetRelativeTimestamp(int32_t ttl) { + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + timestamp_ = static_cast(unix_time) + ttl; + SetTimestampToValue(); + } + + bool IsPermanentSurvival() { return timestamp_ == 0; } + + bool IsStale() { + if (timestamp_ == 0) { + return false; + } + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + return timestamp_ < unix_time; + } + + virtual void StripSuffix() = 0; + + protected: + virtual void SetVersionToValue() = 0; + virtual void SetTimestampToValue() = 0; + std::string* value_ = nullptr; + rocksdb::Slice user_value_; + int32_t version_ = 0 ; + int32_t timestamp_ = 0; +}; + +} // namespace storage diff --git a/src/storage/src/build_version.cc.in b/src/storage/src/build_version.cc.in new file mode 100644 index 000000000..1ad5231ac --- /dev/null +++ b/src/storage/src/build_version.cc.in @@ -0,0 +1,9 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "storage/build_version.h" +const char* storage_build_git_sha = "storage_build_git_sha:@@GIT_SHA@@"; +const char* storage_build_git_date = "storage_build_git_date:@@GIT_DATE_TIME@@"; +const char* storage_build_compile_date = __DATE__; diff --git a/src/storage/src/coding.h b/src/storage/src/coding.h new file mode 100644 index 000000000..13ae3ea54 --- /dev/null +++ b/src/storage/src/coding.h @@ -0,0 +1,81 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#if defined(__APPLE__) +# include // __BYTE_ORDER +# define __BYTE_ORDER __DARWIN_BYTE_ORDER +# define __LITTLE_ENDIAN __DARWIN_LITTLE_ENDIAN +#elif defined(__FreeBSD__) +# include // __BYTE_ORDER +#else +# include // __BYTE_ORDER +#endif + +#undef STORAGE_PLATFORM_IS_LITTLE_ENDIAN +#ifndef STORAGE_PLATFORM_IS_LITTLE_ENDIAN +# define STORAGE_PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN) +#endif +#include + +namespace storage { +static const bool kLittleEndian = STORAGE_PLATFORM_IS_LITTLE_ENDIAN; +#undef STORAGE_PLATFORM_IS_LITTLE_ENDIAN + +inline void EncodeFixed32(char* buf, uint32_t value) { + if (kLittleEndian) { + memcpy(buf, &value, sizeof(value)); + } else { + buf[0] = value & 0xff; + buf[1] = (value >> 8) & 0xff; + buf[2] = (value >> 16) & 0xff; + buf[3] = (value >> 24) & 0xff; + } +} + +inline void EncodeFixed64(char* buf, uint64_t value) { + if (kLittleEndian) { + memcpy(buf, &value, sizeof(value)); + } else { + buf[0] = value & 0xff; + buf[1] = (value >> 8) & 0xff; + buf[2] = (value >> 16) & 0xff; + buf[3] = (value >> 24) & 0xff; + buf[4] = (value >> 32) & 0xff; + buf[5] = (value >> 40) & 0xff; + buf[6] = (value >> 48) & 0xff; + buf[7] = (value >> 56) & 0xff; + } +} + +inline uint32_t DecodeFixed32(const char* ptr) { + if (kLittleEndian) { + // Load the raw bytes + uint32_t result; + memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load + return result; + } else { + return ((static_cast(static_cast(ptr[0]))) | + (static_cast(static_cast(ptr[1])) << 8) | + (static_cast(static_cast(ptr[2])) << 16) | + (static_cast(static_cast(ptr[3])) << 24)); + } +} + +inline uint64_t DecodeFixed64(const char* ptr) { + if (kLittleEndian) { + // Load the raw bytes + uint64_t result; + memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load + return result; + } else { + uint64_t lo = DecodeFixed32(ptr); + uint64_t hi = DecodeFixed32(ptr + 4); + return (hi << 32) | lo; + } +} + +} // namespace storage diff --git a/src/storage/src/custom_comparator.h b/src/storage/src/custom_comparator.h new file mode 100644 index 000000000..4df0813a0 --- /dev/null +++ b/src/storage/src/custom_comparator.h @@ -0,0 +1,266 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include "string" + +#include + +#include "src/coding.h" +#include "rocksdb/comparator.h" + +namespace storage { + +class ListsDataKeyComparatorImpl : public rocksdb::Comparator { + public: + ListsDataKeyComparatorImpl() = default; + + // keep compatible with blackwidow + const char* Name() const override { return "blackwidow.ListsDataKeyComparator"; } + + int Compare(const rocksdb::Slice& a, const rocksdb::Slice& b) const override { + assert(!a.empty() && !b.empty()); + const char* ptr_a = a.data(); + const char* ptr_b = b.data(); + auto a_size = static_cast(a.size()); + auto b_size = static_cast(b.size()); + int32_t key_a_len = DecodeFixed32(ptr_a); + int32_t key_b_len = DecodeFixed32(ptr_b); + ptr_a += sizeof(int32_t); + ptr_b += sizeof(int32_t); + rocksdb::Slice sets_key_a(ptr_a, key_a_len); + rocksdb::Slice sets_key_b(ptr_b, key_b_len); + ptr_a += key_a_len; + ptr_b += key_b_len; + if (sets_key_a != sets_key_b) { + return sets_key_a.compare(sets_key_b); + } + if (ptr_a - a.data() == a_size && ptr_b - b.data() == b_size) { + return 0; + } else if (ptr_a - a.data() == a_size) { + return -1; + } else if (ptr_b - b.data() == b_size) { + return 1; + } + + int32_t version_a = DecodeFixed32(ptr_a); + int32_t version_b = DecodeFixed32(ptr_b); + ptr_a += sizeof(int32_t); + ptr_b += sizeof(int32_t); + if (version_a != version_b) { + return version_a < version_b ? -1 : 1; + } + if (ptr_a - a.data() == a_size && ptr_b - b.data() == b_size) { + return 0; + } else if (ptr_a - a.data() == a_size) { + return -1; + } else if (ptr_b - b.data() == b_size) { + return 1; + } + + uint64_t index_a = DecodeFixed64(ptr_a); + uint64_t index_b = DecodeFixed64(ptr_b); + ptr_a += sizeof(uint64_t); + ptr_b += sizeof(uint64_t); + if (index_a != index_b) { + return index_a < index_b ? -1 : 1; + } else { + return 0; + } + } + + bool Equal(const rocksdb::Slice& a, const rocksdb::Slice& b) const override { return Compare(a, b) == 0; } + + void FindShortestSeparator(std::string* start, const rocksdb::Slice& limit) const override {} + + void FindShortSuccessor(std::string* key) const override {} +}; + +/* + * | | | | | | + * 4 Bytes Key Size Bytes 4 Bytes 8 Bytes + */ +class ZSetsScoreKeyComparatorImpl : public rocksdb::Comparator { + public: + // keep compatible with blackwidow + const char* Name() const override { return "blackwidow.ZSetsScoreKeyComparator"; } + int Compare(const rocksdb::Slice& a, const rocksdb::Slice& b) const override { + assert(a.size() > sizeof(int32_t)); + assert(a.size() >= DecodeFixed32(a.data()) + 2 * sizeof(int32_t) + sizeof(uint64_t)); + assert(b.size() > sizeof(int32_t)); + assert(b.size() >= DecodeFixed32(b.data()) + 2 * sizeof(int32_t) + sizeof(uint64_t)); + + const char* ptr_a = a.data(); + const char* ptr_b = b.data(); + auto a_size = static_cast(a.size()); + auto b_size = static_cast(b.size()); + int32_t key_a_len = DecodeFixed32(ptr_a); + int32_t key_b_len = DecodeFixed32(ptr_b); + rocksdb::Slice key_a_prefix(ptr_a, key_a_len + 2 * sizeof(int32_t)); + rocksdb::Slice key_b_prefix(ptr_b, key_b_len + 2 * sizeof(int32_t)); + ptr_a += key_a_len + 2 * sizeof(int32_t); + ptr_b += key_b_len + 2 * sizeof(int32_t); + int ret = key_a_prefix.compare(key_b_prefix); + if (ret) { + return ret; + } + + uint64_t a_i = DecodeFixed64(ptr_a); + uint64_t b_i = DecodeFixed64(ptr_b); + const void* ptr_a_score = reinterpret_cast(&a_i); + const void* ptr_b_score = reinterpret_cast(&b_i); + double a_score = *reinterpret_cast(ptr_a_score); + double b_score = *reinterpret_cast(ptr_b_score); + ptr_a += sizeof(uint64_t); + ptr_b += sizeof(uint64_t); + if (a_score != b_score) { + return a_score < b_score ? -1 : 1; + } else { + if (ptr_a - a.data() == a_size && ptr_b - b.data() == b_size) { + return 0; + } else if (ptr_a - a.data() == a_size) { + return -1; + } else if (ptr_b - b.data() == b_size) { + return 1; + } else { + rocksdb::Slice key_a_member(ptr_a, a_size - (ptr_a - a.data())); + rocksdb::Slice key_b_member(ptr_b, b_size - (ptr_b - b.data())); + ret = key_a_member.compare(key_b_member); + if (ret) { + return ret; + } + } + } + return 0; + } + + bool Equal(const rocksdb::Slice& a, const rocksdb::Slice& b) const override { return Compare(a, b) == 0; } + + void ParseAndPrintZSetsScoreKey(const std::string& from, const std::string& str) { + const char* ptr = str.data(); + + int32_t key_len = DecodeFixed32(ptr); + ptr += sizeof(int32_t); + + std::string key(ptr, key_len); + ptr += key_len; + + int32_t version = DecodeFixed32(ptr); + ptr += sizeof(int32_t); + + uint64_t key_score_i = DecodeFixed64(ptr); + const void* ptr_key_score = reinterpret_cast(&key_score_i); + double score = *reinterpret_cast(ptr_key_score); + ptr += sizeof(uint64_t); + + std::string member(ptr, str.size() - (key_len + 2 * sizeof(int32_t) + sizeof(uint64_t))); + LOG(INFO) << from.data() << ": total_len[" << str.size() << "], key_len[" << key_len << "], key[" << key.data() << "], " + << "version[ " << version << "], score[" << score << "], member[" << member.data() << "]"; + } + + // Advanced functions: these are used to reduce the space requirements + // for internal data structures like index blocks. + + // If *start < limit, changes *start to a short string in [start,limit). + // Simple comparator implementations may return with *start unchanged, + // i.e., an implementation of this method that does nothing is correct. + void FindShortestSeparator(std::string* start, const rocksdb::Slice& limit) const override { + assert(start->size() > sizeof(int32_t)); + assert(start->size() >= DecodeFixed32(start->data()) + 2 * sizeof(int32_t) + sizeof(uint64_t)); + assert(limit.size() > sizeof(int32_t)); + assert(limit.size() >= DecodeFixed32(limit.data()) + 2 * sizeof(int32_t) + sizeof(uint64_t)); + + const char* ptr_start = start->data(); + const char* ptr_limit = limit.data(); + int32_t key_start_len = DecodeFixed32(ptr_start); + int32_t key_limit_len = DecodeFixed32(ptr_limit); + rocksdb::Slice key_start_prefix(ptr_start, key_start_len + 2 * sizeof(int32_t)); + rocksdb::Slice key_limit_prefix(ptr_limit, key_limit_len + 2 * sizeof(int32_t)); + ptr_start += key_start_len + 2 * sizeof(int32_t); + ptr_limit += key_limit_len + 2 * sizeof(int32_t); + if (key_start_prefix.compare(key_limit_prefix) != 0) { + return; + } + + uint64_t start_i = DecodeFixed64(ptr_start); + uint64_t limit_i = DecodeFixed64(ptr_limit); + const void* ptr_start_score = reinterpret_cast(&start_i); + const void* ptr_limit_score = reinterpret_cast(&limit_i); + double start_score = *reinterpret_cast(ptr_start_score); + double limit_score = *reinterpret_cast(ptr_limit_score); + ptr_start += sizeof(uint64_t); + ptr_limit += sizeof(uint64_t); + if (start_score < limit_score) { + if (start_score + 1 < limit_score) { + start->resize(key_start_len + 2 * sizeof(int32_t)); + start_score += 1; + const void* addr_start_score = reinterpret_cast(&start_score); + char dst[sizeof(uint64_t)]; + EncodeFixed64(dst, *reinterpret_cast(addr_start_score)); + start->append(dst, sizeof(uint64_t)); + } + return; + } + + std::string key_start_member(ptr_start, start->size() - (key_start_len + 2 * sizeof(int32_t) + sizeof(uint64_t))); + std::string key_limit_member(ptr_limit, limit.size() - (key_limit_len + 2 * sizeof(int32_t) + sizeof(uint64_t))); + // Find length of common prefix + size_t min_length = std::min(key_start_member.size(), key_limit_member.size()); + size_t diff_index = 0; + while ((diff_index < min_length) && (key_start_member[diff_index] == key_limit_member[diff_index])) { + diff_index++; + } + + if (diff_index >= min_length) { + // Do not shorten if one string is a prefix of the other + } else { + auto key_start_member_byte = static_cast(key_start_member[diff_index]); + auto key_limit_member_byte = static_cast(key_limit_member[diff_index]); + if (key_start_member_byte >= key_limit_member_byte) { + // Cannot shorten since limit is smaller than start or start is + // already the shortest possible. + return; + } + assert(key_start_member_byte < key_limit_member_byte); + + if (diff_index < key_limit_member.size() - 1 || key_start_member_byte + 1 < key_limit_member_byte) { + key_start_member[diff_index]++; + key_start_member.resize(diff_index + 1); + start->resize(key_start_len + 2 * sizeof(int32_t) + sizeof(uint64_t)); + start->append(key_start_member); + } else { + // v + // A A 1 A A A + // A A 2 + // + // Incrementing the current byte will make start bigger than limit, we + // will skip this byte, and find the first non 0xFF byte in start and + // increment it. + diff_index++; + + while (diff_index < key_start_member.size()) { + // Keep moving until we find the first non 0xFF byte to + // increment it + if (static_cast(key_start_member[diff_index]) < static_cast(0xff)) { + key_start_member[diff_index]++; + key_start_member.resize(diff_index + 1); + start->resize(key_start_len + 2 * sizeof(int32_t) + sizeof(uint64_t)); + start->append(key_start_member); + break; + } + diff_index++; + } + } + } + } + + // Changes *key to a short string >= *key. + // Simple comparator implementations may return with *key unchanged, + // i.e., an implementation of this method that does nothing is correct. + void FindShortSuccessor(std::string* key) const override {} +}; + +} // namespace storage diff --git a/src/storage/src/debug.h b/src/storage/src/debug.h new file mode 100644 index 000000000..000c20017 --- /dev/null +++ b/src/storage/src/debug.h @@ -0,0 +1,14 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#ifndef NDEBUG +# define TRACE(M, ...) fprintf(stderr, "[TRACE] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__) +# define DEBUG(M, ...) fprintf(stderr, "[Debug] (%s:%d) " M "\n", __FILE__, __LINE__, ##__VA_ARGS__) +#else +# define TRACE(M, ...) {} +# define DEBUG(M, ...) {} +#endif // NDEBUG diff --git a/src/storage/src/lists_data_key_format.h b/src/storage/src/lists_data_key_format.h new file mode 100644 index 000000000..8bcc7b232 --- /dev/null +++ b/src/storage/src/lists_data_key_format.h @@ -0,0 +1,95 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include "pstd/pstd_coding.h" + +#include + +namespace storage { +class ListsDataKey { + public: + ListsDataKey(const rocksdb::Slice& key, int32_t version, uint64_t index) + : key_(key), version_(version), index_(index) {} + + ~ListsDataKey() { + if (start_ != space_) { + delete[] start_; + } + } + + rocksdb::Slice Encode() { + size_t usize = key_.size(); + size_t needed = usize + sizeof(int32_t) * 2 + sizeof(uint64_t); + char* dst; + if (needed <= sizeof(space_)) { + dst = space_; + } else { + dst = new char[needed]; + + // Need to allocate space, delete previous space + if (start_ != space_) { + delete[] start_; + } + } + start_ = dst; + pstd::EncodeFixed32(dst, key_.size()); + dst += sizeof(int32_t); + memcpy(dst, key_.data(), key_.size()); + dst += key_.size(); + pstd::EncodeFixed32(dst, version_); + dst += sizeof(int32_t); + pstd::EncodeFixed64(dst, index_); + return rocksdb::Slice(start_, needed); + } + + private: + char space_[200]; + char* start_ = nullptr; + rocksdb::Slice key_; + int32_t version_ = -1; + uint64_t index_ = 0; +}; + +class ParsedListsDataKey { + public: + explicit ParsedListsDataKey(const std::string* key) { + const char* ptr = key->data(); + int32_t key_len = pstd::DecodeFixed32(ptr); + ptr += sizeof(int32_t); + key_ = rocksdb::Slice(ptr, key_len); + ptr += key_len; + version_ = pstd::DecodeFixed32(ptr); + ptr += sizeof(int32_t); + index_ = pstd::DecodeFixed64(ptr); + } + + explicit ParsedListsDataKey(const rocksdb::Slice& key) { + const char* ptr = key.data(); + int32_t key_len = pstd::DecodeFixed32(ptr); + ptr += sizeof(int32_t); + key_ = rocksdb::Slice(ptr, key_len); + ptr += key_len; + version_ = pstd::DecodeFixed32(ptr); + ptr += sizeof(int32_t); + index_ = pstd::DecodeFixed64(ptr); + } + + virtual ~ParsedListsDataKey() = default; + + rocksdb::Slice key() { return key_; } + + int32_t version() { return version_; } + + uint64_t index() { return index_; } + + private: + rocksdb::Slice key_; + int32_t version_ = -1; + uint64_t index_ = 0; +}; + +} // namespace storage diff --git a/src/storage/src/lists_filter.h b/src/storage/src/lists_filter.h new file mode 100644 index 000000000..4da2124d9 --- /dev/null +++ b/src/storage/src/lists_filter.h @@ -0,0 +1,145 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "rocksdb/compaction_filter.h" +#include "rocksdb/db.h" +#include "src/debug.h" +#include "src/lists_data_key_format.h" +#include "src/lists_meta_value_format.h" + +namespace storage { + +class ListsMetaFilter : public rocksdb::CompactionFilter { + public: + ListsMetaFilter() = default; + bool Filter(int level, const rocksdb::Slice& key, const rocksdb::Slice& value, std::string* new_value, + bool* value_changed) const override { + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + auto cur_time = static_cast(unix_time); + ParsedListsMetaValue parsed_lists_meta_value(value); + TRACE("==========================START=========================="); + TRACE("[ListMetaFilter], key: %s, count = %llu, timestamp: %d, cur_time: %d, version: %d", key.ToString().c_str(), + parsed_lists_meta_value.count(), parsed_lists_meta_value.timestamp(), cur_time, + parsed_lists_meta_value.version()); + + if (parsed_lists_meta_value.timestamp() != 0 && parsed_lists_meta_value.timestamp() < cur_time && + parsed_lists_meta_value.version() < cur_time) { + TRACE("Drop[Stale & version < cur_time]"); + return true; + } + if (parsed_lists_meta_value.count() == 0 && parsed_lists_meta_value.version() < cur_time) { + TRACE("Drop[Empty & version < cur_time]"); + return true; + } + TRACE("Reserve"); + return false; + } + + const char* Name() const override { return "ListsMetaFilter"; } +}; + +class ListsMetaFilterFactory : public rocksdb::CompactionFilterFactory { + public: + ListsMetaFilterFactory() = default; + std::unique_ptr CreateCompactionFilter( + const rocksdb::CompactionFilter::Context& context) override { + return std::unique_ptr(new ListsMetaFilter()); + } + const char* Name() const override { return "ListsMetaFilterFactory"; } +}; + +class ListsDataFilter : public rocksdb::CompactionFilter { + public: + ListsDataFilter(rocksdb::DB* db, std::vector* cf_handles_ptr) + : db_(db), + cf_handles_ptr_(cf_handles_ptr) + {} + + bool Filter(int level, const rocksdb::Slice& key, const rocksdb::Slice& value, std::string* new_value, + bool* value_changed) const override { + ParsedListsDataKey parsed_lists_data_key(key); + TRACE("==========================START=========================="); + TRACE("[DataFilter], key: %s, index = %llu, data = %s, version = %d", parsed_lists_data_key.key().ToString().c_str(), + parsed_lists_data_key.index(), value.ToString().c_str(), parsed_lists_data_key.version()); + + if (parsed_lists_data_key.key().ToString() != cur_key_) { + cur_key_ = parsed_lists_data_key.key().ToString(); + std::string meta_value; + // destroyed when close the database, Reserve Current key value + if (cf_handles_ptr_->empty()) { + return false; + } + rocksdb::Status s = db_->Get(default_read_options_, (*cf_handles_ptr_)[0], cur_key_, &meta_value); + if (s.ok()) { + meta_not_found_ = false; + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + cur_meta_version_ = parsed_lists_meta_value.version(); + cur_meta_timestamp_ = parsed_lists_meta_value.timestamp(); + } else if (s.IsNotFound()) { + meta_not_found_ = true; + } else { + cur_key_ = ""; + TRACE("Reserve[Get meta_key faild]"); + return false; + } + } + + if (meta_not_found_) { + TRACE("Drop[Meta key not exist]"); + return true; + } + + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + if (cur_meta_timestamp_ != 0 && cur_meta_timestamp_ < static_cast(unix_time)) { + TRACE("Drop[Timeout]"); + return true; + } + + if (cur_meta_version_ > parsed_lists_data_key.version()) { + TRACE("Drop[list_data_key_version < cur_meta_version]"); + return true; + } else { + TRACE("Reserve[list_data_key_version == cur_meta_version]"); + return false; + } + } + + const char* Name() const override { return "ListsDataFilter"; } + + private: + rocksdb::DB* db_ = nullptr; + std::vector* cf_handles_ptr_ = nullptr; + rocksdb::ReadOptions default_read_options_; + mutable std::string cur_key_; + mutable bool meta_not_found_ = false; + mutable int32_t cur_meta_version_ = 0; + mutable int32_t cur_meta_timestamp_ = 0; +}; + +class ListsDataFilterFactory : public rocksdb::CompactionFilterFactory { + public: + ListsDataFilterFactory(rocksdb::DB** db_ptr, std::vector* handles_ptr) + : db_ptr_(db_ptr), cf_handles_ptr_(handles_ptr) {} + + std::unique_ptr CreateCompactionFilter( + const rocksdb::CompactionFilter::Context& context) override { + return std::unique_ptr(new ListsDataFilter(*db_ptr_, cf_handles_ptr_)); + } + const char* Name() const override { return "ListsDataFilterFactory"; } + + private: + rocksdb::DB** db_ptr_ = nullptr; + std::vector* cf_handles_ptr_ = nullptr; +}; + +} // namespace storage diff --git a/src/storage/src/lists_meta_value_format.h b/src/storage/src/lists_meta_value_format.h new file mode 100644 index 000000000..70f0ea922 --- /dev/null +++ b/src/storage/src/lists_meta_value_format.h @@ -0,0 +1,227 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include + +#include "src/base_value_format.h" + +namespace storage { + +const uint64_t InitalLeftIndex = 9223372036854775807; +const uint64_t InitalRightIndex = 9223372036854775808U; + +class ListsMetaValue : public InternalValue { + public: + explicit ListsMetaValue(const rocksdb::Slice& user_value) + : InternalValue(user_value), left_index_(InitalLeftIndex), right_index_(InitalRightIndex) {} + + size_t AppendTimestampAndVersion() override { + size_t usize = user_value_.size(); + char* dst = start_; + memcpy(dst, user_value_.data(), usize); + dst += usize; + EncodeFixed32(dst, version_); + dst += sizeof(int32_t); + EncodeFixed32(dst, timestamp_); + return usize + 2 * sizeof(int32_t); + } + + virtual size_t AppendIndex() { + char* dst = start_; + dst += user_value_.size() + 2 * sizeof(int32_t); + EncodeFixed64(dst, left_index_); + dst += sizeof(int64_t); + EncodeFixed64(dst, right_index_); + return 2 * sizeof(int64_t); + } + + static const size_t kDefaultValueSuffixLength = sizeof(int32_t) * 2 + sizeof(int64_t) * 2; + + rocksdb::Slice Encode() override { + size_t usize = user_value_.size(); + size_t needed = usize + kDefaultValueSuffixLength; + char* dst; + if (needed <= sizeof(space_)) { + dst = space_; + } else { + dst = new char[needed]; + } + start_ = dst; + size_t len = AppendTimestampAndVersion() + AppendIndex(); + return rocksdb::Slice(start_, len); + } + + int32_t UpdateVersion() { + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + if (version_ >= static_cast(unix_time)) { + version_++; + } else { + version_ = static_cast(unix_time); + } + return version_; + } + + uint64_t left_index() { return left_index_; } + + void ModifyLeftIndex(uint64_t index) { left_index_ -= index; } + + uint64_t right_index() { return right_index_; } + + void ModifyRightIndex(uint64_t index) { right_index_ += index; } + + private: + uint64_t left_index_ = 0; + uint64_t right_index_ = 0; +}; + +class ParsedListsMetaValue : public ParsedInternalValue { + public: + // Use this constructor after rocksdb::DB::Get(); + explicit ParsedListsMetaValue(std::string* internal_value_str) + : ParsedInternalValue(internal_value_str) { + assert(internal_value_str->size() >= kListsMetaValueSuffixLength); + if (internal_value_str->size() >= kListsMetaValueSuffixLength) { + user_value_ = rocksdb::Slice(internal_value_str->data(), internal_value_str->size() - kListsMetaValueSuffixLength); + version_ = DecodeFixed32(internal_value_str->data() + internal_value_str->size() - sizeof(int32_t) * 2 - + sizeof(int64_t) * 2); + timestamp_ = DecodeFixed32(internal_value_str->data() + internal_value_str->size() - sizeof(int32_t) - + sizeof(int64_t) * 2); + left_index_ = DecodeFixed64(internal_value_str->data() + internal_value_str->size() - sizeof(int64_t) * 2); + right_index_ = DecodeFixed64(internal_value_str->data() + internal_value_str->size() - sizeof(int64_t)); + } + count_ = DecodeFixed64(internal_value_str->data()); + } + + // Use this constructor in rocksdb::CompactionFilter::Filter(); + explicit ParsedListsMetaValue(const rocksdb::Slice& internal_value_slice) + : ParsedInternalValue(internal_value_slice) { + assert(internal_value_slice.size() >= kListsMetaValueSuffixLength); + if (internal_value_slice.size() >= kListsMetaValueSuffixLength) { + user_value_ = rocksdb::Slice(internal_value_slice.data(), internal_value_slice.size() - kListsMetaValueSuffixLength); + version_ = DecodeFixed32(internal_value_slice.data() + internal_value_slice.size() - sizeof(int32_t) * 2 - + sizeof(int64_t) * 2); + timestamp_ = DecodeFixed32(internal_value_slice.data() + internal_value_slice.size() - sizeof(int32_t) - + sizeof(int64_t) * 2); + left_index_ = DecodeFixed64(internal_value_slice.data() + internal_value_slice.size() - sizeof(int64_t) * 2); + right_index_ = DecodeFixed64(internal_value_slice.data() + internal_value_slice.size() - sizeof(int64_t)); + } + count_ = DecodeFixed64(internal_value_slice.data()); + } + + void StripSuffix() override { + if (value_) { + value_->erase(value_->size() - kListsMetaValueSuffixLength, kListsMetaValueSuffixLength); + } + } + + void SetVersionToValue() override { + if (value_) { + char* dst = const_cast(value_->data()) + value_->size() - kListsMetaValueSuffixLength; + EncodeFixed32(dst, version_); + } + } + + void SetTimestampToValue() override { + if (value_) { + char* dst = const_cast(value_->data()) + value_->size() - sizeof(int32_t) - 2 * sizeof(int64_t); + EncodeFixed32(dst, timestamp_); + } + } + + void SetIndexToValue() { + if (value_) { + char* dst = const_cast(value_->data()) + value_->size() - 2 * sizeof(int64_t); + EncodeFixed64(dst, left_index_); + dst += sizeof(int64_t); + EncodeFixed64(dst, right_index_); + } + } + + static const size_t kListsMetaValueSuffixLength = 2 * sizeof(int32_t) + 2 * sizeof(int64_t); + + int32_t InitialMetaValue() { + this->set_count(0); + this->set_left_index(InitalLeftIndex); + this->set_right_index(InitalRightIndex); + this->set_timestamp(0); + return this->UpdateVersion(); + } + + uint64_t count() { return count_; } + + void set_count(uint64_t count) { + count_ = count; + if (value_) { + char* dst = const_cast(value_->data()); + EncodeFixed64(dst, count_); + } + } + + void ModifyCount(uint64_t delta) { + count_ += delta; + if (value_) { + char* dst = const_cast(value_->data()); + EncodeFixed64(dst, count_); + } + } + + int32_t UpdateVersion() { + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + if (version_ >= static_cast(unix_time)) { + version_++; + } else { + version_ = static_cast(unix_time); + } + SetVersionToValue(); + return version_; + } + + uint64_t left_index() { return left_index_; } + + void set_left_index(uint64_t index) { + left_index_ = index; + if (value_) { + char* dst = const_cast(value_->data()) + value_->size() - 2 * sizeof(int64_t); + EncodeFixed64(dst, left_index_); + } + } + + void ModifyLeftIndex(uint64_t index) { + left_index_ -= index; + if (value_) { + char* dst = const_cast(value_->data()) + value_->size() - 2 * sizeof(int64_t); + EncodeFixed64(dst, left_index_); + } + } + + uint64_t right_index() { return right_index_; } + + void set_right_index(uint64_t index) { + right_index_ = index; + if (value_) { + char* dst = const_cast(value_->data()) + value_->size() - sizeof(int64_t); + EncodeFixed64(dst, right_index_); + } + } + + void ModifyRightIndex(uint64_t index) { + right_index_ += index; + if (value_) { + char* dst = const_cast(value_->data()) + value_->size() - sizeof(int64_t); + EncodeFixed64(dst, right_index_); + } + } + + private: + uint64_t count_ = 0; + uint64_t left_index_ = 0; + uint64_t right_index_ = 0; +}; + +} // namespace storage diff --git a/src/storage/src/lock_mgr.h b/src/storage/src/lock_mgr.h new file mode 100644 index 000000000..e829e86ed --- /dev/null +++ b/src/storage/src/lock_mgr.h @@ -0,0 +1,19 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include + +#include "pstd/lock_mgr.h" + +#include "src/mutex.h" + +namespace storage { + +using LockMgr = pstd::lock::LockMgr; + +} // namespace storage diff --git a/src/storage/src/lru_cache.h b/src/storage/src/lru_cache.h new file mode 100644 index 000000000..07eead022 --- /dev/null +++ b/src/storage/src/lru_cache.h @@ -0,0 +1,295 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "rocksdb/status.h" + +#include "pstd/pstd_mutex.h" + +namespace storage { + +template +struct LRUHandle { + T1 key; + T2 value; + size_t charge; + LRUHandle* next; + LRUHandle* prev; +}; + +template +class HandleTable { + public: + HandleTable(); + ~HandleTable(); + + size_t TableSize(); + LRUHandle* Lookup(const T1& key); + LRUHandle* Remove(const T1& key); + LRUHandle* Insert(const T1& key, LRUHandle* handle); + + private: + std::unordered_map*> table_; +}; + +template +HandleTable::HandleTable() = default; + +template +HandleTable::~HandleTable() = default; + +template +size_t HandleTable::TableSize() { + return table_.size(); +} + +template +LRUHandle* HandleTable::Lookup(const T1& key) { + if (table_.find(key) != table_.end()) { + return table_[key]; + } else { + return nullptr; + } +} + +template +LRUHandle* HandleTable::Remove(const T1& key) { + LRUHandle* old = nullptr; + if (table_.find(key) != table_.end()) { + old = table_[key]; + table_.erase(key); + } + return old; +} + +template +LRUHandle* HandleTable::Insert(const T1& key, LRUHandle* const handle) { + LRUHandle* old = nullptr; + if (table_.find(key) != table_.end()) { + old = table_[key]; + table_.erase(key); + } + table_.insert({key, handle}); + return old; +} + +template +class LRUCache { + public: + LRUCache(); + ~LRUCache(); + + size_t Size(); + size_t TotalCharge(); + size_t Capacity(); + void SetCapacity(size_t capacity); + + rocksdb::Status Lookup(const T1& key, T2* value); + rocksdb::Status Insert(const T1& key, const T2& value, size_t charge = 1); + rocksdb::Status Remove(const T1& key); + rocksdb::Status Clear(); + + // Just for test + bool LRUAndHandleTableConsistent(); + bool LRUAsExpected(const std::vector>& expect); + + private: + void LRU_Trim(); + void LRU_Remove(LRUHandle* e); + void LRU_Append(LRUHandle* e); + void LRU_MoveToHead(LRUHandle* e); + bool FinishErase(LRUHandle* e); + + // Initialized before use. + size_t capacity_ = 0; + size_t usage_ = 0; + size_t size_ = 0; + + pstd::Mutex mutex_; + + // Dummy head of LRU list. + // lru.prev is newest entry, lru.next is oldest entry. + LRUHandle lru_; + + HandleTable handle_table_; +}; + +template +LRUCache::LRUCache() { + // Make empty circular linked lists. + lru_.next = &lru_; + lru_.prev = &lru_; +} + +template +LRUCache::~LRUCache() { + Clear(); +} + +template +size_t LRUCache::Size() { + std::lock_guard l(mutex_); + return size_; +} + +template +size_t LRUCache::TotalCharge() { + std::lock_guard l(mutex_); + return usage_; +} + +template +size_t LRUCache::Capacity() { + std::lock_guard l(mutex_); + return capacity_; +} + +template +void LRUCache::SetCapacity(size_t capacity) { + std::lock_guard l(mutex_); + capacity_ = capacity; + LRU_Trim(); +} + +template +rocksdb::Status LRUCache::Lookup(const T1& key, T2* const value) { + std::lock_guard l(mutex_); + LRUHandle* handle = handle_table_.Lookup(key); + if (handle) { + LRU_MoveToHead(handle); + *value = handle->value; + } + return (!handle) ? rocksdb::Status::NotFound() : rocksdb::Status::OK(); +} + +template +rocksdb::Status LRUCache::Insert(const T1& key, const T2& value, size_t charge) { + std::lock_guard l(mutex_); + if (capacity_ == 0) { + return rocksdb::Status::Corruption("capacity is empty"); + } else { + auto handle = new LRUHandle(); + handle->key = key; + handle->value = value; + handle->charge = charge; + LRU_Append(handle); + size_++; + usage_ += charge; + FinishErase(handle_table_.Insert(key, handle)); + LRU_Trim(); + } + return rocksdb::Status::OK(); +} + +template +rocksdb::Status LRUCache::Remove(const T1& key) { + std::lock_guard l(mutex_); + bool erased = FinishErase(handle_table_.Remove(key)); + return erased ? rocksdb::Status::OK() : rocksdb::Status::NotFound(); +} + +template +rocksdb::Status LRUCache::Clear() { + std::lock_guard l(mutex_); + LRUHandle* old = nullptr; + while (lru_.next != &lru_) { + old = lru_.next; + bool erased = FinishErase(handle_table_.Remove(old->key)); + if (!erased) { // to avoid unused variable when compiled NDEBUG + assert(erased); + } + } + return rocksdb::Status::OK(); +} + +template +bool LRUCache::LRUAndHandleTableConsistent() { + size_t count = 0; + std::lock_guard l(mutex_); + LRUHandle* handle = nullptr; + LRUHandle* current = lru_.prev; + while (current != &lru_) { + handle = handle_table_.Lookup(current->key); + if (!handle || handle != current) { + return false; + } else { + count++; + current = current->prev; + } + } + return count == handle_table_.TableSize(); +} + +template +bool LRUCache::LRUAsExpected(const std::vector>& expect) { + if (Size() != expect.size()) { + return false; + } else { + size_t idx = 0; + LRUHandle* current = lru_.prev; + while (current != &lru_) { + if (current->key != expect[idx].first || current->value != expect[idx].second) { + return false; + } else { + idx++; + current = current->prev; + } + } + } + return true; +} + +template +void LRUCache::LRU_Trim() { + LRUHandle* old = nullptr; + while (usage_ > capacity_ && lru_.next != &lru_) { + old = lru_.next; + bool erased = FinishErase(handle_table_.Remove(old->key)); + if (!erased) { // to avoid unused variable when compiled NDEBUG + assert(erased); + } + } +} + +template +void LRUCache::LRU_Remove(LRUHandle* const e) { + e->next->prev = e->prev; + e->prev->next = e->next; +} + +template +void LRUCache::LRU_Append(LRUHandle* const e) { + // Make "e" newest entry by inserting just before lru_ + e->next = &lru_; + e->prev = lru_.prev; + e->prev->next = e; + e->next->prev = e; +} + +template +void LRUCache::LRU_MoveToHead(LRUHandle* const e) { + LRU_Remove(e); + LRU_Append(e); +} + +template +bool LRUCache::FinishErase(LRUHandle* const e) { + bool erased = false; + if (e) { + LRU_Remove(e); + size_--; + usage_ -= e->charge; + delete e; + erased = true; + } + return erased; +} + +} // namespace storage diff --git a/src/storage/src/murmurhash.cc b/src/storage/src/murmurhash.cc new file mode 100644 index 000000000..9c42fcb4e --- /dev/null +++ b/src/storage/src/murmurhash.cc @@ -0,0 +1,197 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +/* + Murmurhash from http://sites.google.com/site/murmurhash/ + + All code is released to the public domain. For business purposes, Murmurhash + is under the MIT license. +*/ +#include "src/murmurhash.h" + +#if defined(__x86_64__) + +// ------------------------------------------------------------------- +// +// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment +// and endian-ness issues if used across multiple platforms. +// +// 64-bit hash for 64-bit platforms + +uint64_t MurmurHash64A(const void* key, int len, unsigned int seed) { + const uint64_t m = 0xc6a4a7935bd1e995; + const int r = 47; + + uint64_t h = seed ^ (len * m); + + auto data = static_cast(key); + auto end = data + (len / 8); + + while (data != end) { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + auto data2 = reinterpret_cast(data); + + switch (len & 7) { + case 7: + h ^= (static_cast(data2[6])) << 48; + [[fallthrough]]; + case 6: + h ^= (static_cast(data2[5])) << 40; + [[fallthrough]]; + case 5: + h ^= (static_cast(data2[4])) << 32; + [[fallthrough]]; + case 4: + h ^= (static_cast(data2[3])) << 24; + [[fallthrough]]; + case 3: + h ^= (static_cast(data2[2])) << 16; + [[fallthrough]]; + case 2: + h ^= (static_cast(data2[1])) << 8; + [[fallthrough]]; + case 1: + h ^= (static_cast(data2[0])); + h *= m; + } + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + +#elif defined(__i386__) + +// ------------------------------------------------------------------- +// +// Note - This code makes a few assumptions about how your machine behaves - +// +// 1. We can read a 4-byte value from any address without crashing +// 2. sizeof(int) == 4 +// +// And it has a few limitations - +// +// 1. It will not work incrementally. +// 2. It will not produce the same results on little-endian and big-endian +// machines. + +unsigned int MurmurHash2(const void* key, int len, unsigned int seed) { + // 'm' and 'r' are mixing constants generated offline. + // They're not really 'magic', they just happen to work well. + + const unsigned int m = 0x5bd1e995; + const int r = 24; + + // Initialize the hash to a 'random' value + + unsigned int h = seed ^ len; + + // Mix 4 bytes at a time into the hash + + auto data = (const unsigned char*)key; + + while (len >= 4) { + unsigned int k = *(unsigned int*)data; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + + switch (len) { + case 3: + h ^= data[2] << 16; + [[fallthrough]]; + case 2: + h ^= data[1] << 8; + [[fallthrough]]; + case 1: + h ^= data[0]; + h *= m; + } + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +#else + +// ------------------------------------------------------------------- +// +// Same as MurmurHash2, but endian- and alignment-neutral. +// Half the speed though, alas. + +unsigned int MurmurHashNeutral2(const void* key, int len, unsigned int seed) { + const unsigned int m = 0x5bd1e995; + const int r = 24; + + unsigned int h = seed ^ len; + + auto data = static_cast(key); + + while (len >= 4) { + unsigned int k; + + k = data[0]; + k |= data[1] << 8; + k |= data[2] << 16; + k |= data[3] << 24; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + data += 4; + len -= 4; + } + + switch (len) { + case 3: + h ^= data[2] << 16; + [[fallthrough]]; + case 2: + h ^= data[1] << 8; + [[fallthrough]]; + case 1: + h ^= data[0]; + h *= m; + } + + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +#endif diff --git a/src/storage/src/murmurhash.h b/src/storage/src/murmurhash.h new file mode 100644 index 000000000..13fda3acc --- /dev/null +++ b/src/storage/src/murmurhash.h @@ -0,0 +1,42 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +/* + Murmurhash from http://sites.google.com/site/murmurhash/ + + All code is released to the public domain. For business purposes, Murmurhash + is under the MIT license. +*/ +#pragma once + +#include +#include "rocksdb/slice.h" + +#if defined(__x86_64__) +# define MURMUR_HASH MurmurHash64A +uint64_t MurmurHash64A(const void* key, int len, unsigned int seed); +# define MurmurHash MurmurHash64A +typedef uint64_t murmur_t; + +#elif defined(__i386__) +# define MURMUR_HASH MurmurHash2 +unsigned int MurmurHash2(const void* key, int len, unsigned int seed); +# define MurmurHash MurmurHash2 +typedef unsigned int murmur_t; + +#else +# define MURMUR_HASH MurmurHashNeutral2 +unsigned int MurmurHashNeutral2(const void* key, int len, unsigned int seed); +# define MurmurHash MurmurHashNeutral2 +using murmur_t = unsigned int; +#endif + +// Allow slice to be hashable by murmur hash. +namespace storage { +using Slice = rocksdb::Slice; +struct murmur_hash { + size_t operator()(const Slice& slice) const { return MurmurHash(slice.data(), static_cast(slice.size()), 0); } +}; +} // namespace storage diff --git a/src/storage/src/mutex.h b/src/storage/src/mutex.h new file mode 100644 index 000000000..b6a25aab6 --- /dev/null +++ b/src/storage/src/mutex.h @@ -0,0 +1,22 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include + +#include "rocksdb/status.h" + +#include "pstd/mutex.h" + +namespace storage { + +using Status = rocksdb::Status; + +using Mutex = pstd::lock::Mutex; +using CondVar = pstd::lock::CondVar; +using MutexFactory = pstd::lock::MutexFactory; + +} // namespace storage diff --git a/src/storage/src/mutex_impl.h b/src/storage/src/mutex_impl.h new file mode 100644 index 000000000..e0a463957 --- /dev/null +++ b/src/storage/src/mutex_impl.h @@ -0,0 +1,18 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include "src/mutex.h" + +#include "pstd/mutex_impl.h" + +#include + +namespace storage { + +using MutexFactoryImpl = pstd::lock::MutexFactoryImpl; + +} // namespace storage diff --git a/src/storage/src/options_helper.cc b/src/storage/src/options_helper.cc new file mode 100644 index 000000000..b0783c35d --- /dev/null +++ b/src/storage/src/options_helper.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "src/options_helper.h" + +#include + +namespace storage { + +// strToInt may throw exception +static bool strToInt(const std::string& value, int* num, int base = 10) { + size_t end; + *num = std::stoi(value, &end, base); + return end >= value.size(); +} + +// strToUint64 may throw exception +static bool strToUint64(const std::string& value, uint64_t* num, int base = 10) { + size_t end; + *num = std::stoull(value, &end, base); + return end >= value.size(); +} + +// strToUint32 may throw exception +static bool strToUint32(const std::string& value, uint32_t* num, int base = 10) { + uint64_t uint64Val; + if (!strToUint64(value, &uint64Val)) { + return false; + } + if ((uint64Val >> 32LL) == 0) { + *num = static_cast(uint64Val); + } else { + throw std::out_of_range(value); + } + return true; +} + +bool ParseOptionMember(const MemberType& member_type, const std::string& value, char* member_address) { + switch (member_type) { + case MemberType::kInt: { + int intVal; + if (!strToInt(value, &intVal)) { + return false; + } + *reinterpret_cast(member_address) = intVal; + break; + } + case MemberType::kUint: { + uint32_t uint32Val; + if (!strToUint32(value, &uint32Val)) { + return false; + } + *reinterpret_cast(member_address) = static_cast(uint32Val); + break; + } + case MemberType::kUint64T: { + uint64_t uint64Val; + if (!strToUint64(value, &uint64Val)) { + return false; + } + *reinterpret_cast(member_address) = uint64Val; + break; + } + case MemberType::kSizeT: { + uint64_t uint64Val; + if (!strToUint64(value, &uint64Val)) { + return false; + } + *reinterpret_cast(member_address) = static_cast(uint64Val); + break; + } + default: { + return false; + } + } + return true; +} + +} // namespace storage diff --git a/src/storage/src/options_helper.h b/src/storage/src/options_helper.h new file mode 100644 index 000000000..d9e4a724f --- /dev/null +++ b/src/storage/src/options_helper.h @@ -0,0 +1,71 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include + +#include + +namespace storage { + +enum class MemberType { + kInt, + kUint, + kUint64T, + kSizeT, + kUnknown, +}; + +struct MemberTypeInfo { + int offset; + MemberType type; +}; + +// offset_of is used to get the offset of a class data member with non standard-layout +// http://en.cppreference.com/w/cpp/concept/StandardLayoutType +// https://gist.github.com/graphitemaster/494f21190bb2c63c5516 +template +inline int offset_of(T1 T2::*member) { + static T2 obj; + return int(size_t(&(obj.*member)) - size_t(&obj)); +} + +static std::unordered_map mutable_db_options_member_type_info = { + {"max_background_jobs", {offsetof(struct rocksdb::DBOptions, max_background_jobs), MemberType::kInt}}, + {"max_background_compactions", {offsetof(struct rocksdb::DBOptions, max_background_compactions), MemberType::kInt}}, + // {"base_background_compactions", {offsetof(struct rocksdb::DBOptions, base_background_compactions), + // MemberType::kInt}}, + {"max_open_files", {offsetof(struct rocksdb::DBOptions, max_open_files), MemberType::kInt}}, + {"bytes_per_sync", {offsetof(struct rocksdb::DBOptions, bytes_per_sync), MemberType::kUint64T}}, + {"delayed_write_rate", {offsetof(struct rocksdb::DBOptions, delayed_write_rate), MemberType::kUint64T}}, + {"max_total_wal_size", {offsetof(struct rocksdb::DBOptions, max_total_wal_size), MemberType::kUint64T}}, + {"wal_bytes_per_sync", {offsetof(struct rocksdb::DBOptions, wal_bytes_per_sync), MemberType::kUint64T}}, + {"stats_dump_period_sec", {offsetof(struct rocksdb::DBOptions, stats_dump_period_sec), MemberType::kUint}}, +}; + +static std::unordered_map mutable_cf_options_member_type_info = { + {"max_write_buffer_number", {offset_of(&rocksdb::ColumnFamilyOptions::max_write_buffer_number), MemberType::kInt}}, + {"write_buffer_size", {offset_of(&rocksdb::ColumnFamilyOptions::write_buffer_size), MemberType::kSizeT}}, + {"target_file_size_base", {offset_of(&rocksdb::ColumnFamilyOptions::target_file_size_base), MemberType::kUint64T}}, + {"target_file_size_multiplier", + {offset_of(&rocksdb::ColumnFamilyOptions::target_file_size_multiplier), MemberType::kInt}}, + {"arena_block_size", {offset_of(&rocksdb::ColumnFamilyOptions::arena_block_size), MemberType::kSizeT}}, + {"level0_file_num_compaction_trigger", + {offset_of(&rocksdb::ColumnFamilyOptions::level0_file_num_compaction_trigger), MemberType::kInt}}, + {"level0_slowdown_writes_trigger", + {offset_of(&rocksdb::ColumnFamilyOptions::level0_slowdown_writes_trigger), MemberType::kInt}}, + {"level0_stop_writes_trigger", + {offset_of(&rocksdb::ColumnFamilyOptions::level0_stop_writes_trigger), MemberType::kInt}}, + {"max_compaction_bytes", {offset_of(&rocksdb::ColumnFamilyOptions::max_compaction_bytes), MemberType::kUint64T}}, + {"soft_pending_compaction_bytes_limit", + {offset_of(&rocksdb::ColumnFamilyOptions::soft_pending_compaction_bytes_limit), MemberType::kUint64T}}, + {"hard_pending_compaction_bytes_limit", + {offset_of(&rocksdb::ColumnFamilyOptions::hard_pending_compaction_bytes_limit), MemberType::kUint64T}}, +}; + +extern bool ParseOptionMember(const MemberType& member_type, const std::string& value, char* member_address); + +} // namespace storage diff --git a/src/storage/src/redis.cc b/src/storage/src/redis.cc new file mode 100644 index 000000000..6a7bea8e6 --- /dev/null +++ b/src/storage/src/redis.cc @@ -0,0 +1,172 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "src/redis.h" +#include + +namespace storage { + +Redis::Redis(Storage* const s, const DataType& type) + : storage_(s), + type_(type), + lock_mgr_(std::make_shared(1000, 0, std::make_shared())), + small_compaction_threshold_(5000) { + statistics_store_ = std::make_unique>(); + scan_cursors_store_ = std::make_unique>(); + scan_cursors_store_->SetCapacity(5000); + default_compact_range_options_.exclusive_manual_compaction = false; + default_compact_range_options_.change_level = true; + handles_.clear(); +} + +Redis::~Redis() { + std::vector tmp_handles = handles_; + handles_.clear(); + for (auto handle : tmp_handles) { + delete handle; + } + delete db_; +} + +Status Redis::GetScanStartPoint(const Slice& key, const Slice& pattern, int64_t cursor, std::string* start_point) { + std::string index_key = key.ToString() + "_" + pattern.ToString() + "_" + std::to_string(cursor); + return scan_cursors_store_->Lookup(index_key, start_point); +} + +Status Redis::StoreScanNextPoint(const Slice& key, const Slice& pattern, int64_t cursor, + const std::string& next_point) { + std::string index_key = key.ToString() + "_" + pattern.ToString() + "_" + std::to_string(cursor); + return scan_cursors_store_->Insert(index_key, next_point); +} + +Status Redis::SetMaxCacheStatisticKeys(size_t max_cache_statistic_keys) { + statistics_store_->SetCapacity(max_cache_statistic_keys); + return Status::OK(); +} + +Status Redis::SetSmallCompactionThreshold(size_t small_compaction_threshold) { + small_compaction_threshold_ = small_compaction_threshold; + return Status::OK(); +} + +Status Redis::UpdateSpecificKeyStatistics(const std::string& key, size_t count) { + if ((statistics_store_->Capacity() != 0U) && (count != 0U)) { + size_t total = 0; + statistics_store_->Lookup(key, &total); + statistics_store_->Insert(key, total + count); + AddCompactKeyTaskIfNeeded(key, total + count); + } + return Status::OK(); +} + +Status Redis::AddCompactKeyTaskIfNeeded(const std::string& key, size_t total) { + if (total < small_compaction_threshold_) { + return Status::OK(); + } else { + storage_->AddBGTask({type_, kCompactKey, key}); + statistics_store_->Remove(key); + } + return Status::OK(); +} + +Status Redis::SetOptions(const OptionType& option_type, const std::unordered_map& options) { + if (option_type == OptionType::kDB) { + return db_->SetDBOptions(options); + } + if (handles_.empty()) { + return db_->SetOptions(db_->DefaultColumnFamily(), options); + } + Status s; + for (auto handle : handles_) { + s = db_->SetOptions(handle, options); + if (!s.ok()) { + break; + } + } + return s; +} + +void Redis::GetRocksDBInfo(std::string &info, const char *prefix) { + std::ostringstream string_stream; + string_stream << "#" << prefix << "RocksDB" << "\r\n"; + + auto write_stream_key_value=[&](const Slice& property, const char *metric) { + uint64_t value; + db_->GetAggregatedIntProperty(property, &value); + string_stream << prefix << metric << ':' << value << "\r\n"; + }; + + auto mapToString=[&](const std::map& map_data, const char *prefix) { + for (const auto& kv : map_data) { + std::string str_data; + str_data += kv.first + ": " + kv.second + "\r\n"; + string_stream << prefix << str_data; + } + }; + + // memtables num + write_stream_key_value(rocksdb::DB::Properties::kNumImmutableMemTable, "num_immutable_mem_table"); + write_stream_key_value(rocksdb::DB::Properties::kNumImmutableMemTableFlushed, "num_immutable_mem_table_flushed"); + write_stream_key_value(rocksdb::DB::Properties::kMemTableFlushPending, "mem_table_flush_pending"); + write_stream_key_value(rocksdb::DB::Properties::kNumRunningFlushes, "num_running_flushes"); + + // compaction + write_stream_key_value(rocksdb::DB::Properties::kCompactionPending, "compaction_pending"); + write_stream_key_value(rocksdb::DB::Properties::kNumRunningCompactions, "num_running_compactions"); + + // background errors + write_stream_key_value(rocksdb::DB::Properties::kBackgroundErrors, "background_errors"); + + // memtables size + write_stream_key_value(rocksdb::DB::Properties::kCurSizeActiveMemTable, "cur_size_active_mem_table"); + write_stream_key_value(rocksdb::DB::Properties::kCurSizeAllMemTables, "cur_size_all_mem_tables"); + write_stream_key_value(rocksdb::DB::Properties::kSizeAllMemTables, "size_all_mem_tables"); + + // keys + write_stream_key_value(rocksdb::DB::Properties::kEstimateNumKeys, "estimate_num_keys"); + + // table readers mem + write_stream_key_value(rocksdb::DB::Properties::kEstimateTableReadersMem, "estimate_table_readers_mem"); + + // snapshot + write_stream_key_value(rocksdb::DB::Properties::kNumSnapshots, "num_snapshots"); + + // version + write_stream_key_value(rocksdb::DB::Properties::kNumLiveVersions, "num_live_versions"); + write_stream_key_value(rocksdb::DB::Properties::kCurrentSuperVersionNumber, "current_super_version_number"); + + // live data size + write_stream_key_value(rocksdb::DB::Properties::kEstimateLiveDataSize, "estimate_live_data_size"); + + // sst files + write_stream_key_value(rocksdb::DB::Properties::kTotalSstFilesSize, "total_sst_files_size"); + write_stream_key_value(rocksdb::DB::Properties::kLiveSstFilesSize, "live_sst_files_size"); + + // pending compaction bytes + write_stream_key_value(rocksdb::DB::Properties::kEstimatePendingCompactionBytes, "estimate_pending_compaction_bytes"); + + // block cache + write_stream_key_value(rocksdb::DB::Properties::kBlockCacheCapacity, "block_cache_capacity"); + write_stream_key_value(rocksdb::DB::Properties::kBlockCacheUsage, "block_cache_usage"); + write_stream_key_value(rocksdb::DB::Properties::kBlockCachePinnedUsage, "block_cache_pinned_usage"); + + // blob files + write_stream_key_value(rocksdb::DB::Properties::kNumBlobFiles, "num_blob_files"); + write_stream_key_value(rocksdb::DB::Properties::kBlobStats, "blob_stats"); + write_stream_key_value(rocksdb::DB::Properties::kTotalBlobFileSize, "total_blob_file_size"); + write_stream_key_value(rocksdb::DB::Properties::kLiveBlobFileSize, "live_blob_file_size"); + + // column family stats + std::map mapvalues; + db_->rocksdb::DB::GetMapProperty(rocksdb::DB::Properties::kCFStats,&mapvalues); + mapToString(mapvalues,prefix); + info.append(string_stream.str()); +} + +void Redis::SetWriteWalOptions(const bool is_wal_disable) { + default_write_options_.disableWAL = is_wal_disable; +} + +} // namespace storage diff --git a/src/storage/src/redis.h b/src/storage/src/redis.h new file mode 100644 index 000000000..fde38a6e9 --- /dev/null +++ b/src/storage/src/redis.h @@ -0,0 +1,84 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "rocksdb/db.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" + +#include "src/lock_mgr.h" +#include "src/lru_cache.h" +#include "src/mutex_impl.h" +#include "storage/storage.h" + +namespace storage { +using Status = rocksdb::Status; +using Slice = rocksdb::Slice; + +class Redis { + public: + Redis(Storage* storage, const DataType& type); + virtual ~Redis(); + + rocksdb::DB* GetDB() { return db_; } + + Status SetOptions(const OptionType& option_type, const std::unordered_map& options); + void SetWriteWalOptions(const bool is_wal_disable); + + // Common Commands + virtual Status Open(const StorageOptions& storage_options, const std::string& db_path) = 0; + virtual Status CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, + const ColumnFamilyType& type = kMetaAndData) = 0; + virtual Status GetProperty(const std::string& property, uint64_t* out) = 0; + virtual Status ScanKeyNum(KeyInfo* key_info) = 0; + virtual Status ScanKeys(const std::string& pattern, std::vector* keys) = 0; + virtual Status PKPatternMatchDel(const std::string& pattern, int32_t* ret) = 0; + + // Keys Commands + virtual Status Expire(const Slice& key, int32_t ttl) = 0; + virtual Status Del(const Slice& key) = 0; + virtual bool Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, + int64_t* count, std::string* next_key) = 0; + virtual bool PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) = 0; + virtual Status Expireat(const Slice& key, int32_t timestamp) = 0; + virtual Status Persist(const Slice& key) = 0; + virtual Status TTL(const Slice& key, int64_t* timestamp) = 0; + + Status SetMaxCacheStatisticKeys(size_t max_cache_statistic_keys); + Status SetSmallCompactionThreshold(size_t small_compaction_threshold); + void GetRocksDBInfo(std::string &info, const char *prefix); + + protected: + Storage* const storage_; + DataType type_; + std::shared_ptr lock_mgr_; + rocksdb::DB* db_ = nullptr; + + std::vector handles_; + rocksdb::WriteOptions default_write_options_; + rocksdb::ReadOptions default_read_options_; + rocksdb::CompactRangeOptions default_compact_range_options_; + + // For Scan + std::unique_ptr> scan_cursors_store_; + + Status GetScanStartPoint(const Slice& key, const Slice& pattern, int64_t cursor, std::string* start_point); + Status StoreScanNextPoint(const Slice& key, const Slice& pattern, int64_t cursor, const std::string& next_point); + + // For Statistics + std::atomic small_compaction_threshold_; + std::unique_ptr> statistics_store_; + + Status UpdateSpecificKeyStatistics(const std::string& key, size_t count); + Status AddCompactKeyTaskIfNeeded(const std::string& key, size_t total); +}; + +} // namespace storage diff --git a/src/storage/src/redis_hashes.cc b/src/storage/src/redis_hashes.cc new file mode 100644 index 000000000..549970898 --- /dev/null +++ b/src/storage/src/redis_hashes.cc @@ -0,0 +1,1337 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "src/redis_hashes.h" + +#include + +#include +#include + +#include "src/base_filter.h" +#include "src/scope_record_lock.h" +#include "src/scope_snapshot.h" +#include "storage/util.h" + +namespace storage { + +RedisHashes::RedisHashes(Storage* const s, const DataType& type) : Redis(s, type) {} + +Status RedisHashes::Open(const StorageOptions& storage_options, const std::string& db_path) { + statistics_store_->SetCapacity(storage_options.statistics_max_size); + small_compaction_threshold_ = storage_options.small_compaction_threshold; + + rocksdb::Options ops(storage_options.options); + Status s = rocksdb::DB::Open(ops, db_path, &db_); + if (s.ok()) { + // create column family + rocksdb::ColumnFamilyHandle* cf; + s = db_->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "data_cf", &cf); + if (!s.ok()) { + return s; + } + // close DB + delete cf; + delete db_; + } + + // Open + rocksdb::DBOptions db_ops(storage_options.options); + rocksdb::ColumnFamilyOptions meta_cf_ops(storage_options.options); + rocksdb::ColumnFamilyOptions data_cf_ops(storage_options.options); + meta_cf_ops.compaction_filter_factory = std::make_shared(); + data_cf_ops.compaction_filter_factory = std::make_shared(&db_, &handles_); + + // use the bloom filter policy to reduce disk reads + rocksdb::BlockBasedTableOptions table_ops(storage_options.table_options); + table_ops.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); + rocksdb::BlockBasedTableOptions meta_cf_table_ops(table_ops); + rocksdb::BlockBasedTableOptions data_cf_table_ops(table_ops); + if (!storage_options.share_block_cache && storage_options.block_cache_size > 0) { + meta_cf_table_ops.block_cache = rocksdb::NewLRUCache(storage_options.block_cache_size); + data_cf_table_ops.block_cache = rocksdb::NewLRUCache(storage_options.block_cache_size); + } + meta_cf_ops.table_factory.reset(rocksdb::NewBlockBasedTableFactory(meta_cf_table_ops)); + data_cf_ops.table_factory.reset(rocksdb::NewBlockBasedTableFactory(data_cf_table_ops)); + + std::vector column_families; + // Meta CF + column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, meta_cf_ops); + // Data CF + column_families.emplace_back("data_cf", data_cf_ops); + return rocksdb::DB::Open(db_ops, db_path, column_families, &handles_, &db_); +} + +Status RedisHashes::CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, const ColumnFamilyType& type) { + if (type == kMeta || type == kMetaAndData) { + db_->CompactRange(default_compact_range_options_, handles_[0], begin, end); + } + if (type == kData || type == kMetaAndData) { + db_->CompactRange(default_compact_range_options_, handles_[1], begin, end); + } + return Status::OK(); +} + +Status RedisHashes::GetProperty(const std::string& property, uint64_t* out) { + std::string value; + db_->GetProperty(handles_[0], property, &value); + *out = std::strtoull(value.c_str(), nullptr, 10); + db_->GetProperty(handles_[1], property, &value); + *out += std::strtoull(value.c_str(), nullptr, 10); + return Status::OK(); +} + +Status RedisHashes::ScanKeyNum(KeyInfo* key_info) { + uint64_t keys = 0; + uint64_t expires = 0; + uint64_t ttl_sum = 0; + uint64_t invaild_keys = 0; + + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedHashesMetaValue parsed_hashes_meta_value(iter->value()); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + invaild_keys++; + } else { + keys++; + if (!parsed_hashes_meta_value.IsPermanentSurvival()) { + expires++; + ttl_sum += parsed_hashes_meta_value.timestamp() - curtime; + } + } + } + delete iter; + + key_info->keys = keys; + key_info->expires = expires; + key_info->avg_ttl = (expires != 0) ? ttl_sum / expires : 0; + key_info->invaild_keys = invaild_keys; + return Status::OK(); +} + +Status RedisHashes::ScanKeys(const std::string& pattern, std::vector* keys) { + std::string key; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedHashesMetaValue parsed_hashes_meta_value(iter->value()); + if (!parsed_hashes_meta_value.IsStale() && parsed_hashes_meta_value.count() != 0) { + key = iter->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + } + } + delete iter; + return Status::OK(); +} + +Status RedisHashes::PKPatternMatchDel(const std::string& pattern, int32_t* ret) { + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + std::string key; + std::string meta_value; + int32_t total_delete = 0; + Status s; + rocksdb::WriteBatch batch; + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + iter->SeekToFirst(); + while (iter->Valid()) { + key = iter->key().ToString(); + meta_value = iter->value().ToString(); + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (!parsed_hashes_meta_value.IsStale() && (parsed_hashes_meta_value.count() != 0) && + (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0)) { + parsed_hashes_meta_value.InitialMetaValue(); + batch.Put(handles_[0], key, meta_value); + } + if (static_cast(batch.Count()) >= BATCH_DELETE_LIMIT) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + total_delete += static_cast( batch.Count()); + batch.Clear(); + } else { + *ret = total_delete; + return s; + } + } + iter->Next(); + } + if (batch.Count() != 0U) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + total_delete += static_cast(batch.Count()); + batch.Clear(); + } + } + + *ret = total_delete; + return s; +} + +Status RedisHashes::HDel(const Slice& key, const std::vector& fields, int32_t* ret) { + uint32_t statistic = 0; + std::vector filtered_fields; + std::unordered_set field_set; + for (const auto & iter : fields) { + const std::string& field = iter; + if (field_set.find(field) == field_set.end()) { + field_set.insert(field); + filtered_fields.push_back(iter); + } + } + + rocksdb::WriteBatch batch; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t del_cnt = 0; + int32_t version = 0; + ScopeRecordLock l(lock_mgr_, key); + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + *ret = 0; + return Status::OK(); + } else { + std::string data_value; + version = parsed_hashes_meta_value.version(); + for (const auto& field : filtered_fields) { + HashesDataKey hashes_data_key(key, version, field); + s = db_->Get(read_options, handles_[1], hashes_data_key.Encode(), &data_value); + if (s.ok()) { + del_cnt++; + statistic++; + batch.Delete(handles_[1], hashes_data_key.Encode()); + } else if (s.IsNotFound()) { + continue; + } else { + return s; + } + } + *ret = del_cnt; + if (!parsed_hashes_meta_value.CheckModifyCount(-del_cnt)){ + return Status::InvalidArgument("hash size overflow"); + } + parsed_hashes_meta_value.ModifyCount(-del_cnt); + batch.Put(handles_[0], key, meta_value); + } + } else if (s.IsNotFound()) { + *ret = 0; + return Status::OK(); + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisHashes::HExists(const Slice& key, const Slice& field) { + std::string value; + return HGet(key, field, &value); +} + +Status RedisHashes::HGet(const Slice& key, const Slice& field, std::string* value) { + std::string meta_value; + int32_t version = 0; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } else { + version = parsed_hashes_meta_value.version(); + HashesDataKey data_key(key, version, field); + s = db_->Get(read_options, handles_[1], data_key.Encode(), value); + } + } + return s; +} + +Status RedisHashes::HGetall(const Slice& key, std::vector* fvs) { + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t version = 0; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } else { + version = parsed_hashes_meta_value.version(); + HashesDataKey hashes_data_key(key, version, ""); + Slice prefix = hashes_data_key.Encode(); + auto iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { + ParsedHashesDataKey parsed_hashes_data_key(iter->key()); + fvs->push_back({parsed_hashes_data_key.field().ToString(), iter->value().ToString()}); + } + delete iter; + } + } + return s; +} + +Status RedisHashes::HIncrby(const Slice& key, const Slice& field, int64_t value, int64_t* ret) { + *ret = 0; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + int32_t version = 0; + uint32_t statistic = 0; + std::string old_value; + std::string meta_value; + + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + char value_buf[32] = {0}; + char meta_value_buf[4] = {0}; + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + version = parsed_hashes_meta_value.UpdateVersion(); + parsed_hashes_meta_value.set_count(1); + parsed_hashes_meta_value.set_timestamp(0); + batch.Put(handles_[0], key, meta_value); + HashesDataKey hashes_data_key(key, version, field); + Int64ToStr(value_buf, 32, value); + batch.Put(handles_[1], hashes_data_key.Encode(), value_buf); + *ret = value; + } else { + version = parsed_hashes_meta_value.version(); + HashesDataKey hashes_data_key(key, version, field); + s = db_->Get(default_read_options_, handles_[1], hashes_data_key.Encode(), &old_value); + if (s.ok()) { + int64_t ival = 0; + if (StrToInt64(old_value.data(), old_value.size(), &ival) == 0) { + return Status::Corruption("hash value is not an integer"); + } + if ((value >= 0 && LLONG_MAX - value < ival) || (value < 0 && LLONG_MIN - value > ival)) { + return Status::InvalidArgument("Overflow"); + } + *ret = ival + value; + Int64ToStr(value_buf, 32, *ret); + batch.Put(handles_[1], hashes_data_key.Encode(), value_buf); + statistic++; + } else if (s.IsNotFound()) { + Int64ToStr(value_buf, 32, value); + if (!parsed_hashes_meta_value.CheckModifyCount(1)){ + return Status::InvalidArgument("hash size overflow"); + } + parsed_hashes_meta_value.ModifyCount(1); + batch.Put(handles_[0], key, meta_value); + batch.Put(handles_[1], hashes_data_key.Encode(), value_buf); + *ret = value; + } else { + return s; + } + } + } else if (s.IsNotFound()) { + EncodeFixed32(meta_value_buf, 1); + HashesMetaValue hashes_meta_value(Slice(meta_value_buf, sizeof(int32_t))); + version = hashes_meta_value.UpdateVersion(); + batch.Put(handles_[0], key, hashes_meta_value.Encode()); + HashesDataKey hashes_data_key(key, version, field); + + Int64ToStr(value_buf, 32, value); + batch.Put(handles_[1], hashes_data_key.Encode(), value_buf); + *ret = value; + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisHashes::HIncrbyfloat(const Slice& key, const Slice& field, const Slice& by, std::string* new_value) { + new_value->clear(); + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + int32_t version = 0; + uint32_t statistic = 0; + std::string meta_value; + std::string old_value_str; + long double long_double_by; + + if (StrToLongDouble(by.data(), by.size(), &long_double_by) == -1) { + return Status::Corruption("value is not a vaild float"); + } + + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + char meta_value_buf[4] = {0}; + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + version = parsed_hashes_meta_value.UpdateVersion(); + parsed_hashes_meta_value.set_count(1); + parsed_hashes_meta_value.set_timestamp(0); + batch.Put(handles_[0], key, meta_value); + HashesDataKey hashes_data_key(key, version, field); + + LongDoubleToStr(long_double_by, new_value); + batch.Put(handles_[1], hashes_data_key.Encode(), *new_value); + } else { + version = parsed_hashes_meta_value.version(); + HashesDataKey hashes_data_key(key, version, field); + s = db_->Get(default_read_options_, handles_[1], hashes_data_key.Encode(), &old_value_str); + if (s.ok()) { + long double total; + long double old_value; + if (StrToLongDouble(old_value_str.data(), old_value_str.size(), &old_value) == -1) { + return Status::Corruption("value is not a vaild float"); + } + + total = old_value + long_double_by; + if (LongDoubleToStr(total, new_value) == -1) { + return Status::InvalidArgument("Overflow"); + } + batch.Put(handles_[1], hashes_data_key.Encode(), *new_value); + statistic++; + } else if (s.IsNotFound()) { + LongDoubleToStr(long_double_by, new_value); + if (!parsed_hashes_meta_value.CheckModifyCount(1)){ + return Status::InvalidArgument("hash size overflow"); + } + parsed_hashes_meta_value.ModifyCount(1); + batch.Put(handles_[0], key, meta_value); + batch.Put(handles_[1], hashes_data_key.Encode(), *new_value); + } else { + return s; + } + } + } else if (s.IsNotFound()) { + EncodeFixed32(meta_value_buf, 1); + HashesMetaValue hashes_meta_value(Slice(meta_value_buf, sizeof(int32_t))); + version = hashes_meta_value.UpdateVersion(); + batch.Put(handles_[0], key, hashes_meta_value.Encode()); + + HashesDataKey hashes_data_key(key, version, field); + LongDoubleToStr(long_double_by, new_value); + batch.Put(handles_[1], hashes_data_key.Encode(), *new_value); + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisHashes::HKeys(const Slice& key, std::vector* fields) { + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t version = 0; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } else { + version = parsed_hashes_meta_value.version(); + HashesDataKey hashes_data_key(key, version, ""); + Slice prefix = hashes_data_key.Encode(); + auto iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { + ParsedHashesDataKey parsed_hashes_data_key(iter->key()); + fields->push_back(parsed_hashes_data_key.field().ToString()); + } + delete iter; + } + } + return s; +} + +Status RedisHashes::HLen(const Slice& key, int32_t* ret) { + *ret = 0; + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale()) { + *ret = 0; + return Status::NotFound("Stale"); + } else if (parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } else { + *ret = parsed_hashes_meta_value.count(); + } + } else if (s.IsNotFound()) { + *ret = 0; + } + return s; +} + +Status RedisHashes::HMGet(const Slice& key, const std::vector& fields, std::vector* vss) { + vss->clear(); + + int32_t version = 0; + bool is_stale = false; + std::string value; + std::string meta_value; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if ((is_stale = parsed_hashes_meta_value.IsStale()) || parsed_hashes_meta_value.count() == 0) { + for (size_t idx = 0; idx < fields.size(); ++idx) { + vss->push_back({std::string(), Status::NotFound()}); + } + return Status::NotFound(is_stale ? "Stale" : ""); + } else { + version = parsed_hashes_meta_value.version(); + for (const auto& field : fields) { + HashesDataKey hashes_data_key(key, version, field); + s = db_->Get(read_options, handles_[1], hashes_data_key.Encode(), &value); + if (s.ok()) { + vss->push_back({value, Status::OK()}); + } else if (s.IsNotFound()) { + vss->push_back({std::string(), Status::NotFound()}); + } else { + vss->clear(); + return s; + } + } + } + return Status::OK(); + } else if (s.IsNotFound()) { + for (size_t idx = 0; idx < fields.size(); ++idx) { + vss->push_back({std::string(), Status::NotFound()}); + } + } + return s; +} + +Status RedisHashes::HMSet(const Slice& key, const std::vector& fvs) { + uint32_t statistic = 0; + std::unordered_set fields; + std::vector filtered_fvs; + for (auto iter = fvs.rbegin(); iter != fvs.rend(); ++iter) { + std::string field = iter->field; + if (fields.find(field) == fields.end()) { + fields.insert(field); + filtered_fvs.push_back(*iter); + } + } + + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + int32_t version = 0; + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + char meta_value_buf[4] = {0}; + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + version = parsed_hashes_meta_value.InitialMetaValue(); + if (!parsed_hashes_meta_value.check_set_count(static_cast(filtered_fvs.size()))) { + return Status::InvalidArgument("hash size overflow"); + } + parsed_hashes_meta_value.set_count(static_cast(filtered_fvs.size())); + batch.Put(handles_[0], key, meta_value); + for (const auto& fv : filtered_fvs) { + HashesDataKey hashes_data_key(key, version, fv.field); + batch.Put(handles_[1], hashes_data_key.Encode(), fv.value); + } + } else { + int32_t count = 0; + std::string data_value; + version = parsed_hashes_meta_value.version(); + for (const auto& fv : filtered_fvs) { + HashesDataKey hashes_data_key(key, version, fv.field); + s = db_->Get(default_read_options_, handles_[1], hashes_data_key.Encode(), &data_value); + if (s.ok()) { + statistic++; + batch.Put(handles_[1], hashes_data_key.Encode(), fv.value); + } else if (s.IsNotFound()) { + count++; + batch.Put(handles_[1], hashes_data_key.Encode(), fv.value); + } else { + return s; + } + } + if (!parsed_hashes_meta_value.CheckModifyCount(count)){ + return Status::InvalidArgument("hash size overflow"); + } + parsed_hashes_meta_value.ModifyCount(count); + batch.Put(handles_[0], key, meta_value); + } + } else if (s.IsNotFound()) { + EncodeFixed32(meta_value_buf, filtered_fvs.size()); + HashesMetaValue hashes_meta_value(Slice(meta_value_buf, sizeof(int32_t))); + version = hashes_meta_value.UpdateVersion(); + batch.Put(handles_[0], key, hashes_meta_value.Encode()); + for (const auto& fv : filtered_fvs) { + HashesDataKey hashes_data_key(key, version, fv.field); + batch.Put(handles_[1], hashes_data_key.Encode(), fv.value); + } + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisHashes::HSet(const Slice& key, const Slice& field, const Slice& value, int32_t* res) { + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + int32_t version = 0; + uint32_t statistic = 0; + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + char meta_value_buf[4] = {0}; + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + version = parsed_hashes_meta_value.InitialMetaValue(); + parsed_hashes_meta_value.set_count(1); + batch.Put(handles_[0], key, meta_value); + HashesDataKey data_key(key, version, field); + batch.Put(handles_[1], data_key.Encode(), value); + *res = 1; + } else { + version = parsed_hashes_meta_value.version(); + std::string data_value; + HashesDataKey hashes_data_key(key, version, field); + s = db_->Get(default_read_options_, handles_[1], hashes_data_key.Encode(), &data_value); + if (s.ok()) { + *res = 0; + if (data_value == value.ToString()) { + return Status::OK(); + } else { + batch.Put(handles_[1], hashes_data_key.Encode(), value); + statistic++; + } + } else if (s.IsNotFound()) { + if (!parsed_hashes_meta_value.CheckModifyCount(1)){ + return Status::InvalidArgument("hash size overflow"); + } + parsed_hashes_meta_value.ModifyCount(1); + batch.Put(handles_[0], key, meta_value); + batch.Put(handles_[1], hashes_data_key.Encode(), value); + *res = 1; + } else { + return s; + } + } + } else if (s.IsNotFound()) { + EncodeFixed32(meta_value_buf, 1); + HashesMetaValue meta_value(Slice(meta_value_buf, sizeof(int32_t))); + version = meta_value.UpdateVersion(); + batch.Put(handles_[0], key, meta_value.Encode()); + HashesDataKey data_key(key, version, field); + batch.Put(handles_[1], data_key.Encode(), value); + *res = 1; + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisHashes::HSetnx(const Slice& key, const Slice& field, const Slice& value, int32_t* ret) { + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + int32_t version = 0; + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + char meta_value_buf[4] = {0}; + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + version = parsed_hashes_meta_value.InitialMetaValue(); + parsed_hashes_meta_value.set_count(1); + batch.Put(handles_[0], key, meta_value); + HashesDataKey hashes_data_key(key, version, field); + batch.Put(handles_[1], hashes_data_key.Encode(), value); + *ret = 1; + } else { + version = parsed_hashes_meta_value.version(); + HashesDataKey hashes_data_key(key, version, field); + std::string data_value; + s = db_->Get(default_read_options_, handles_[1], hashes_data_key.Encode(), &data_value); + if (s.ok()) { + *ret = 0; + } else if (s.IsNotFound()) { + if (!parsed_hashes_meta_value.CheckModifyCount(1)){ + return Status::InvalidArgument("hash size overflow"); + } + parsed_hashes_meta_value.ModifyCount(1); + batch.Put(handles_[0], key, meta_value); + batch.Put(handles_[1], hashes_data_key.Encode(), value); + *ret = 1; + } else { + return s; + } + } + } else if (s.IsNotFound()) { + EncodeFixed32(meta_value_buf, 1); + HashesMetaValue hashes_meta_value(Slice(meta_value_buf, sizeof(int32_t))); + version = hashes_meta_value.UpdateVersion(); + batch.Put(handles_[0], key, hashes_meta_value.Encode()); + HashesDataKey hashes_data_key(key, version, field); + batch.Put(handles_[1], hashes_data_key.Encode(), value); + *ret = 1; + } else { + return s; + } + return db_->Write(default_write_options_, &batch); +} + +Status RedisHashes::HVals(const Slice& key, std::vector* values) { + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t version = 0; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } else { + version = parsed_hashes_meta_value.version(); + HashesDataKey hashes_data_key(key, version, ""); + Slice prefix = hashes_data_key.Encode(); + auto iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { + values->push_back(iter->value().ToString()); + } + delete iter; + } + } + return s; +} + +Status RedisHashes::HStrlen(const Slice& key, const Slice& field, int32_t* len) { + std::string value; + Status s = HGet(key, field, &value); + if (s.ok()) { + *len = static_cast(value.size()); + } else { + *len = 0; + } + return s; +} + +Status RedisHashes::HScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* field_values, int64_t* next_cursor) { + *next_cursor = 0; + field_values->clear(); + if (cursor < 0) { + *next_cursor = 0; + return Status::OK(); + } + + int64_t rest = count; + int64_t step_length = count; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + *next_cursor = 0; + return Status::NotFound(); + } else { + std::string sub_field; + std::string start_point; + int32_t version = parsed_hashes_meta_value.version(); + s = GetScanStartPoint(key, pattern, cursor, &start_point); + if (s.IsNotFound()) { + cursor = 0; + if (isTailWildcard(pattern)) { + start_point = pattern.substr(0, pattern.size() - 1); + } + } + if (isTailWildcard(pattern)) { + sub_field = pattern.substr(0, pattern.size() - 1); + } + + HashesDataKey hashes_data_prefix(key, version, sub_field); + HashesDataKey hashes_start_data_key(key, version, start_point); + std::string prefix = hashes_data_prefix.Encode().ToString(); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(hashes_start_data_key.Encode()); iter->Valid() && rest > 0 && iter->key().starts_with(prefix); + iter->Next()) { + ParsedHashesDataKey parsed_hashes_data_key(iter->key()); + std::string field = parsed_hashes_data_key.field().ToString(); + if (StringMatch(pattern.data(), pattern.size(), field.data(), field.size(), 0) != 0) { + field_values->push_back({field, iter->value().ToString()}); + } + rest--; + } + + if (iter->Valid() && (iter->key().compare(prefix) <= 0 || iter->key().starts_with(prefix))) { + *next_cursor = cursor + step_length; + ParsedHashesDataKey parsed_hashes_data_key(iter->key()); + std::string next_field = parsed_hashes_data_key.field().ToString(); + StoreScanNextPoint(key, pattern, *next_cursor, next_field); + } else { + *next_cursor = 0; + } + delete iter; + } + } else { + *next_cursor = 0; + return s; + } + return Status::OK(); +} + +Status RedisHashes::HScanx(const Slice& key, const std::string& start_field, const std::string& pattern, int64_t count, + std::vector* field_values, std::string* next_field) { + next_field->clear(); + field_values->clear(); + + int64_t rest = count; + std::string meta_value; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + *next_field = ""; + return Status::NotFound(); + } else { + int32_t version = parsed_hashes_meta_value.version(); + HashesDataKey hashes_data_prefix(key, version, Slice()); + HashesDataKey hashes_start_data_key(key, version, start_field); + std::string prefix = hashes_data_prefix.Encode().ToString(); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(hashes_start_data_key.Encode()); iter->Valid() && rest > 0 && iter->key().starts_with(prefix); + iter->Next()) { + ParsedHashesDataKey parsed_hashes_data_key(iter->key()); + std::string field = parsed_hashes_data_key.field().ToString(); + if (StringMatch(pattern.data(), pattern.size(), field.data(), field.size(), 0) != 0) { + field_values->push_back({field, iter->value().ToString()}); + } + rest--; + } + + if (iter->Valid() && iter->key().starts_with(prefix)) { + ParsedHashesDataKey parsed_hashes_data_key(iter->key()); + *next_field = parsed_hashes_data_key.field().ToString(); + } else { + *next_field = ""; + } + delete iter; + } + } else { + *next_field = ""; + return s; + } + return Status::OK(); +} + +Status RedisHashes::PKHScanRange(const Slice& key, const Slice& field_start, const std::string& field_end, + const Slice& pattern, int32_t limit, std::vector* field_values, + std::string* next_field) { + next_field->clear(); + field_values->clear(); + + int64_t remain = limit; + std::string meta_value; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + + bool start_no_limit = field_start.compare("") == 0; + bool end_no_limit = field_end.empty(); + + if (!start_no_limit && !end_no_limit && (field_start.compare(field_end) > 0)) { + return Status::InvalidArgument("error in given range"); + } + + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t version = parsed_hashes_meta_value.version(); + HashesDataKey hashes_data_prefix(key, version, Slice()); + HashesDataKey hashes_start_data_key(key, version, field_start); + std::string prefix = hashes_data_prefix.Encode().ToString(); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(start_no_limit ? prefix : hashes_start_data_key.Encode()); + iter->Valid() && remain > 0 && iter->key().starts_with(prefix); iter->Next()) { + ParsedHashesDataKey parsed_hashes_data_key(iter->key()); + std::string field = parsed_hashes_data_key.field().ToString(); + if (!end_no_limit && field.compare(field_end) > 0) { + break; + } + if (StringMatch(pattern.data(), pattern.size(), field.data(), field.size(), 0) != 0) { + field_values->push_back({field, iter->value().ToString()}); + } + remain--; + } + + if (iter->Valid() && iter->key().starts_with(prefix)) { + ParsedHashesDataKey parsed_hashes_data_key(iter->key()); + if (end_no_limit || parsed_hashes_data_key.field().compare(field_end) <= 0) { + *next_field = parsed_hashes_data_key.field().ToString(); + } + } + delete iter; + } + } else { + return s; + } + return Status::OK(); +} + +Status RedisHashes::PKHRScanRange(const Slice& key, const Slice& field_start, const std::string& field_end, + const Slice& pattern, int32_t limit, std::vector* field_values, + std::string* next_field) { + next_field->clear(); + field_values->clear(); + + int64_t remain = limit; + std::string meta_value; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + + bool start_no_limit = field_start.compare("") == 0; + bool end_no_limit = field_end.empty(); + + if (!start_no_limit && !end_no_limit && (field_start.compare(field_end) < 0)) { + return Status::InvalidArgument("error in given range"); + } + + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t version = parsed_hashes_meta_value.version(); + int32_t start_key_version = start_no_limit ? version + 1 : version; + std::string start_key_field = start_no_limit ? "" : field_start.ToString(); + HashesDataKey hashes_data_prefix(key, version, Slice()); + HashesDataKey hashes_start_data_key(key, start_key_version, start_key_field); + std::string prefix = hashes_data_prefix.Encode().ToString(); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[1]); + for (iter->SeekForPrev(hashes_start_data_key.Encode().ToString()); + iter->Valid() && remain > 0 && iter->key().starts_with(prefix); iter->Prev()) { + ParsedHashesDataKey parsed_hashes_data_key(iter->key()); + std::string field = parsed_hashes_data_key.field().ToString(); + if (!end_no_limit && field.compare(field_end) < 0) { + break; + } + if (StringMatch(pattern.data(), pattern.size(), field.data(), field.size(), 0) != 0) { + field_values->push_back({field, iter->value().ToString()}); + } + remain--; + } + + if (iter->Valid() && iter->key().starts_with(prefix)) { + ParsedHashesDataKey parsed_hashes_data_key(iter->key()); + if (end_no_limit || parsed_hashes_data_key.field().compare(field_end) >= 0) { + *next_field = parsed_hashes_data_key.field().ToString(); + } + } + delete iter; + } + } else { + return s; + } + return Status::OK(); +} + +Status RedisHashes::PKScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key) { + next_key->clear(); + + std::string key; + int32_t remain = limit; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + bool start_no_limit = key_start.compare("") == 0; + bool end_no_limit = key_end.compare("") == 0; + + if (!start_no_limit && !end_no_limit && (key_start.compare(key_end) > 0)) { + return Status::InvalidArgument("error in given range"); + } + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + if (start_no_limit) { + it->SeekToFirst(); + } else { + it->Seek(key_start); + } + + while (it->Valid() && remain > 0 && (end_no_limit || it->key().compare(key_end) <= 0)) { + ParsedHashesMetaValue parsed_hashes_meta_value(it->value()); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + it->Next(); + } else { + key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + remain--; + it->Next(); + } + } + + while (it->Valid() && (end_no_limit || it->key().compare(key_end) <= 0)) { + ParsedHashesMetaValue parsed_hashes_meta_value(it->value()); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + it->Next(); + } else { + *next_key = it->key().ToString(); + break; + } + } + delete it; + return Status::OK(); +} + +Status RedisHashes::PKRScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key) { + next_key->clear(); + + std::string key; + int32_t remain = limit; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + bool start_no_limit = key_start.compare("") == 0; + bool end_no_limit = key_end.compare("") == 0; + + if (!start_no_limit && !end_no_limit && (key_start.compare(key_end) < 0)) { + return Status::InvalidArgument("error in given range"); + } + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + if (start_no_limit) { + it->SeekToLast(); + } else { + it->SeekForPrev(key_start); + } + + while (it->Valid() && remain > 0 && (end_no_limit || it->key().compare(key_end) >= 0)) { + ParsedHashesMetaValue parsed_hashes_meta_value(it->value()); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + it->Prev(); + } else { + key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + remain--; + it->Prev(); + } + } + + while (it->Valid() && (end_no_limit || it->key().compare(key_end) >= 0)) { + ParsedHashesMetaValue parsed_hashes_meta_value(it->value()); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + it->Prev(); + } else { + *next_key = it->key().ToString(); + break; + } + } + delete it; + return Status::OK(); +} + +Status RedisHashes::Expire(const Slice& key, int32_t ttl) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } + + if (ttl > 0) { + parsed_hashes_meta_value.SetRelativeTimestamp(ttl); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + } else { + parsed_hashes_meta_value.InitialMetaValue(); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + return s; +} + +Status RedisHashes::Del(const Slice& key) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } else { + uint32_t statistic = parsed_hashes_meta_value.count(); + parsed_hashes_meta_value.InitialMetaValue(); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + } + } + return s; +} + +bool RedisHashes::Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, + int64_t* count, std::string* next_key) { + std::string meta_key; + bool is_finish = true; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + + it->Seek(start_key); + while (it->Valid() && (*count) > 0) { + ParsedHashesMetaValue parsed_meta_value(it->value()); + if (parsed_meta_value.IsStale() || parsed_meta_value.count() == 0) { + it->Next(); + continue; + } else { + meta_key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), meta_key.data(), meta_key.size(), 0) != 0) { + keys->push_back(meta_key); + } + (*count)--; + it->Next(); + } + } + + std::string prefix = isTailWildcard(pattern) ? pattern.substr(0, pattern.size() - 1) : ""; + if (it->Valid() && (it->key().compare(prefix) <= 0 || it->key().starts_with(prefix))) { + *next_key = it->key().ToString(); + is_finish = false; + } else { + *next_key = ""; + } + delete it; + return is_finish; +} + +bool RedisHashes::PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) { + bool is_finish = true; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + it->Seek(start_key); + while (it->Valid() && (*leftover_visits) > 0) { + ParsedHashesMetaValue parsed_hashes_meta_value(it->value()); + if (parsed_hashes_meta_value.IsStale() || parsed_hashes_meta_value.count() == 0) { + it->Next(); + continue; + } else { + if (min_timestamp < parsed_hashes_meta_value.timestamp() && + parsed_hashes_meta_value.timestamp() < max_timestamp) { + keys->push_back(it->key().ToString()); + } + (*leftover_visits)--; + it->Next(); + } + } + + if (it->Valid()) { + is_finish = false; + *next_key = it->key().ToString(); + } else { + *next_key = ""; + } + delete it; + return is_finish; +} + +Status RedisHashes::Expireat(const Slice& key, int32_t timestamp) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } else { + if (timestamp > 0) { + parsed_hashes_meta_value.set_timestamp(timestamp); + } else { + parsed_hashes_meta_value.InitialMetaValue(); + } + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + return s; +} + +Status RedisHashes::Persist(const Slice& key) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_hashes_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t timestamp = parsed_hashes_meta_value.timestamp(); + if (timestamp == 0) { + return Status::NotFound("Not have an associated timeout"); + } else { + parsed_hashes_meta_value.set_timestamp(0); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + } + return s; +} + +Status RedisHashes::TTL(const Slice& key, int64_t* timestamp) { + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedHashesMetaValue parsed_hashes_meta_value(&meta_value); + if (parsed_hashes_meta_value.IsStale()) { + *timestamp = -2; + return Status::NotFound("Stale"); + } else if (parsed_hashes_meta_value.count() == 0) { + *timestamp = -2; + return Status::NotFound(); + } else { + *timestamp = parsed_hashes_meta_value.timestamp(); + if (*timestamp == 0) { + *timestamp = -1; + } else { + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + *timestamp = *timestamp - curtime >= 0 ? *timestamp - curtime : -2; + } + } + } else if (s.IsNotFound()) { + *timestamp = -2; + } + return s; +} + +void RedisHashes::ScanDatabase() { + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + auto current_time = static_cast(time(nullptr)); + + LOG(INFO) << "***************Hashes Meta Data***************"; + auto meta_iter = db_->NewIterator(iterator_options, handles_[0]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + ParsedHashesMetaValue parsed_hashes_meta_value(meta_iter->value()); + int32_t survival_time = 0; + if (parsed_hashes_meta_value.timestamp() != 0) { + survival_time = parsed_hashes_meta_value.timestamp() - current_time > 0 + ? parsed_hashes_meta_value.timestamp() - current_time + : -1; + } + + LOG(INFO) << fmt::format("[key : {:<30}] [count : {:<10}] [timestamp : {:<10}] [version : {}] [survival_time : {}]", + meta_iter->key().ToString(), parsed_hashes_meta_value.count(), + parsed_hashes_meta_value.timestamp(), parsed_hashes_meta_value.version(), survival_time); + } + delete meta_iter; + + LOG(INFO) << "***************Hashes Field Data***************"; + auto field_iter = db_->NewIterator(iterator_options, handles_[1]); + for (field_iter->SeekToFirst(); field_iter->Valid(); field_iter->Next()) { + ParsedHashesDataKey parsed_hashes_data_key(field_iter->key()); + + LOG(INFO) << fmt::format("[key : {:<30}] [field : {:<20}] [value : {:<20}] [version : {}]", + parsed_hashes_data_key.key().ToString(), parsed_hashes_data_key.field().ToString(), + field_iter->value().ToString(), parsed_hashes_data_key.version()); + } + delete field_iter; +} + +} // namespace storage diff --git a/src/storage/src/redis_hashes.h b/src/storage/src/redis_hashes.h new file mode 100644 index 000000000..180ec733b --- /dev/null +++ b/src/storage/src/redis_hashes.h @@ -0,0 +1,73 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "src/redis.h" + +namespace storage { + +class RedisHashes : public Redis { + public: + RedisHashes(Storage* s, const DataType& type); + ~RedisHashes() override = default; + + // Common Commands + Status Open(const StorageOptions& storage_options, const std::string& db_path) override; + Status CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, + const ColumnFamilyType& type = kMetaAndData) override; + Status GetProperty(const std::string& property, uint64_t* out) override; + Status ScanKeyNum(KeyInfo* key_info) override; + Status ScanKeys(const std::string& pattern, std::vector* keys) override; + Status PKPatternMatchDel(const std::string& pattern, int32_t* ret) override; + + // Hashes Commands + Status HDel(const Slice& key, const std::vector& fields, int32_t* ret); + Status HExists(const Slice& key, const Slice& field); + Status HGet(const Slice& key, const Slice& field, std::string* value); + Status HGetall(const Slice& key, std::vector* fvs); + Status HIncrby(const Slice& key, const Slice& field, int64_t value, int64_t* ret); + Status HIncrbyfloat(const Slice& key, const Slice& field, const Slice& by, std::string* new_value); + Status HKeys(const Slice& key, std::vector* fields); + Status HLen(const Slice& key, int32_t* ret); + Status HMGet(const Slice& key, const std::vector& fields, std::vector* vss); + Status HMSet(const Slice& key, const std::vector& fvs); + Status HSet(const Slice& key, const Slice& field, const Slice& value, int32_t* res); + Status HSetnx(const Slice& key, const Slice& field, const Slice& value, int32_t* ret); + Status HVals(const Slice& key, std::vector* values); + Status HStrlen(const Slice& key, const Slice& field, int32_t* len); + Status HScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* field_values, int64_t* next_cursor); + Status HScanx(const Slice& key, const std::string& start_field, const std::string& pattern, int64_t count, + std::vector* field_values, std::string* next_field); + Status PKHScanRange(const Slice& key, const Slice& field_start, const std::string& field_end, const Slice& pattern, + int32_t limit, std::vector* field_values, std::string* next_field); + Status PKHRScanRange(const Slice& key, const Slice& field_start, const std::string& field_end, const Slice& pattern, + int32_t limit, std::vector* field_values, std::string* next_field); + Status PKScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key); + Status PKRScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key); + + // Keys Commands + Status Expire(const Slice& key, int32_t ttl) override; + Status Del(const Slice& key) override; + bool Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, int64_t* count, + std::string* next_key) override; + bool PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) override; + Status Expireat(const Slice& key, int32_t timestamp) override; + Status Persist(const Slice& key) override; + Status TTL(const Slice& key, int64_t* timestamp) override; + + // Iterate all data + void ScanDatabase(); +}; + +} // namespace storage diff --git a/src/storage/src/redis_hyperloglog.cc b/src/storage/src/redis_hyperloglog.cc new file mode 100644 index 000000000..52dae4246 --- /dev/null +++ b/src/storage/src/redis_hyperloglog.cc @@ -0,0 +1,114 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "src/redis_hyperloglog.h" +#include +#include +#include +#include "src/storage_murmur3.h" + +namespace storage { + +const int32_t HLL_HASH_SEED = 313; + +HyperLogLog::HyperLogLog(uint8_t precision, std::string origin_register) { + b_ = precision; + m_ = 1 << precision; + alpha_ = Alpha(); + register_ = std::make_unique(m_); + for (uint32_t i = 0; i < m_; ++i) { + register_[i] = 0; + } + if (!origin_register.empty()) { + for (uint32_t i = 0; i < m_; ++i) { + register_[i] = origin_register[i]; + } + } +} + +HyperLogLog::~HyperLogLog() = default; + +std::string HyperLogLog::Add(const char* value, uint32_t len) { + uint32_t hash_value; + MurmurHash3_x86_32(value, static_cast(len), HLL_HASH_SEED, static_cast(&hash_value)); + uint32_t index = hash_value & ((1 << b_) - 1); + uint8_t rank = Nctz((hash_value >> b_), static_cast(32 - b_)); + if (rank > register_[index]) { register_[index] = static_cast(rank); +} + std::string result(m_, 0); + for (uint32_t i = 0; i < m_; ++i) { + result[i] = register_[i]; + } + return result; +} + +double HyperLogLog::Estimate() const { + double estimate = FirstEstimate(); + if (estimate <= 2.5 * m_) { + uint32_t zeros = CountZero(); + if (zeros != 0) { + estimate = m_ * log(static_cast(m_) / zeros); + } + } else if (estimate > pow(2, 32) / 30.0) { + estimate = log1p(estimate * -1 / pow(2, 32)) * pow(2, 32) * -1; + } + return estimate; +} + +double HyperLogLog::FirstEstimate() const { + double estimate; + double sum = 0.0; + for (uint32_t i = 0; i < m_; i++) { + sum += 1.0 / (1 << register_[i]); + } + + estimate = alpha_ * m_ * m_ / sum; + return estimate; +} + +double HyperLogLog::Alpha() const { + switch (m_) { + case 16: + return 0.673; + case 32: + return 0.697; + case 64: + return 0.709; + default: + return 0.7213 / (1 + 1.079 / m_); + } +} + +uint32_t HyperLogLog::CountZero() const { + uint32_t count = 0; + for (uint32_t i = 0; i < m_; i++) { + if (register_[i] == 0) { + count++; + } + } + return count; +} + +std::string HyperLogLog::Merge(const HyperLogLog& hll) { + if (m_ != hll.m_) { + // TODO(shq) the number of registers doesn't match + } + for (uint32_t r = 0; r < m_; r++) { + if (register_[r] < hll.register_[r]) { + register_[r] = static_cast(register_[r] | hll.register_[r]); + } + } + + std::string result(m_, 0); + for (uint32_t i = 0; i < m_; ++i) { + result[i] = register_[i]; + } + return result; +} + +// ::__builtin_ctz(x): 返回右起第一个‘1’之后的0的个数 +uint8_t HyperLogLog::Nctz(uint32_t x, int b) { return static_cast(std::min(b, ::__builtin_ctz(x))) + 1; } + +} // namespace storage diff --git a/src/storage/src/redis_hyperloglog.h b/src/storage/src/redis_hyperloglog.h new file mode 100644 index 000000000..f88976246 --- /dev/null +++ b/src/storage/src/redis_hyperloglog.h @@ -0,0 +1,36 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include +#include + +namespace storage { + +class HyperLogLog { + public: + HyperLogLog(uint8_t precision, std::string origin_register); + ~HyperLogLog(); + + double Estimate() const; + double FirstEstimate() const; + uint32_t CountZero() const; + double Alpha() const; + uint8_t Nctz(uint32_t x, int b); + + std::string Add(const char* value, uint32_t len); + std::string Merge(const HyperLogLog& hll); + + protected: + uint32_t m_ = 0; // register bit width + uint32_t b_ = 0; // regieter size + double alpha_ = 0; + std::unique_ptr register_; +}; + +} // namespace storage diff --git a/src/storage/src/redis_lists.cc b/src/storage/src/redis_lists.cc new file mode 100644 index 000000000..06e182a85 --- /dev/null +++ b/src/storage/src/redis_lists.cc @@ -0,0 +1,1284 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include + +#include +#include + +#include "src/lists_filter.h" +#include "src/redis_lists.h" +#include "src/scope_record_lock.h" +#include "src/scope_snapshot.h" +#include "storage/util.h" + +namespace storage { + +const rocksdb::Comparator* ListsDataKeyComparator() { + static ListsDataKeyComparatorImpl ldkc; + return &ldkc; +} + +RedisLists::RedisLists(Storage* const s, const DataType& type) : Redis(s, type) {} + +Status RedisLists::Open(const StorageOptions& storage_options, const std::string& db_path) { + statistics_store_->SetCapacity(storage_options.statistics_max_size); + small_compaction_threshold_ = storage_options.small_compaction_threshold; + + rocksdb::Options ops(storage_options.options); + Status s = rocksdb::DB::Open(ops, db_path, &db_); + if (s.ok()) { + // Create column family + rocksdb::ColumnFamilyHandle* cf; + rocksdb::ColumnFamilyOptions cfo; + cfo.comparator = ListsDataKeyComparator(); + s = db_->CreateColumnFamily(cfo, "data_cf", &cf); + if (!s.ok()) { + return s; + } + // Close DB + delete cf; + delete db_; + } + + // Open + rocksdb::DBOptions db_ops(storage_options.options); + rocksdb::ColumnFamilyOptions meta_cf_ops(storage_options.options); + rocksdb::ColumnFamilyOptions data_cf_ops(storage_options.options); + meta_cf_ops.compaction_filter_factory = std::make_shared(); + data_cf_ops.compaction_filter_factory = std::make_shared(&db_, &handles_); + data_cf_ops.comparator = ListsDataKeyComparator(); + + // use the bloom filter policy to reduce disk reads + rocksdb::BlockBasedTableOptions table_ops(storage_options.table_options); + table_ops.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); + rocksdb::BlockBasedTableOptions meta_cf_table_ops(table_ops); + rocksdb::BlockBasedTableOptions data_cf_table_ops(table_ops); + if (!storage_options.share_block_cache && storage_options.block_cache_size > 0) { + meta_cf_table_ops.block_cache = rocksdb::NewLRUCache(storage_options.block_cache_size); + data_cf_table_ops.block_cache = rocksdb::NewLRUCache(storage_options.block_cache_size); + } + meta_cf_ops.table_factory.reset(rocksdb::NewBlockBasedTableFactory(meta_cf_table_ops)); + data_cf_ops.table_factory.reset(rocksdb::NewBlockBasedTableFactory(data_cf_table_ops)); + + std::vector column_families; + // Meta CF + column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, meta_cf_ops); + // Data CF + column_families.emplace_back("data_cf", data_cf_ops); + return rocksdb::DB::Open(db_ops, db_path, column_families, &handles_, &db_); +} + +Status RedisLists::CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, const ColumnFamilyType& type) { + if (type == kMeta || type == kMetaAndData) { + db_->CompactRange(default_compact_range_options_, handles_[0], begin, end); + } + if (type == kData || type == kMetaAndData) { + db_->CompactRange(default_compact_range_options_, handles_[1], begin, end); + } + return Status::OK(); +} + +Status RedisLists::GetProperty(const std::string& property, uint64_t* out) { + std::string value; + db_->GetProperty(handles_[0], property, &value); + *out = std::strtoull(value.c_str(), nullptr, 10); + db_->GetProperty(handles_[1], property, &value); + *out += std::strtoull(value.c_str(), nullptr, 10); + return Status::OK(); +} + +Status RedisLists::ScanKeyNum(KeyInfo* key_info) { + uint64_t keys = 0; + uint64_t expires = 0; + uint64_t ttl_sum = 0; + uint64_t invaild_keys = 0; + + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedListsMetaValue parsed_lists_meta_value(iter->value()); + if (parsed_lists_meta_value.IsStale() || parsed_lists_meta_value.count() == 0) { + invaild_keys++; + } else { + keys++; + if (!parsed_lists_meta_value.IsPermanentSurvival()) { + expires++; + ttl_sum += parsed_lists_meta_value.timestamp() - curtime; + } + } + } + delete iter; + + key_info->keys = keys; + key_info->expires = expires; + key_info->avg_ttl = (expires != 0) ? ttl_sum / expires : 0; + key_info->invaild_keys = invaild_keys; + return Status::OK(); +} + +Status RedisLists::ScanKeys(const std::string& pattern, std::vector* keys) { + std::string key; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedListsMetaValue parsed_lists_meta_value(iter->value()); + if (!parsed_lists_meta_value.IsStale() && parsed_lists_meta_value.count() != 0) { + key = iter->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + } + } + delete iter; + return Status::OK(); +} + +Status RedisLists::PKPatternMatchDel(const std::string& pattern, int32_t* ret) { + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + std::string key; + std::string meta_value; + int32_t total_delete = 0; + Status s; + rocksdb::WriteBatch batch; + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + iter->SeekToFirst(); + while (iter->Valid()) { + key = iter->key().ToString(); + meta_value = iter->value().ToString(); + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (!parsed_lists_meta_value.IsStale() && (parsed_lists_meta_value.count() != 0U) && + (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0)) { + parsed_lists_meta_value.InitialMetaValue(); + batch.Put(handles_[0], key, meta_value); + } + if (static_cast(batch.Count()) >= BATCH_DELETE_LIMIT) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + total_delete += static_cast(batch.Count()); + batch.Clear(); + } else { + *ret = total_delete; + return s; + } + } + iter->Next(); + } + if (batch.Count() != 0U) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + total_delete += static_cast(batch.Count()); + batch.Clear(); + } + } + + *ret = total_delete; + return s; +} + +Status RedisLists::LIndex(const Slice& key, int64_t index, std::string* element) { + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + std::string meta_value; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + int32_t version = parsed_lists_meta_value.version(); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + std::string tmp_element; + uint64_t target_index = + index >= 0 ? parsed_lists_meta_value.left_index() + index + 1 : parsed_lists_meta_value.right_index() + index; + if (parsed_lists_meta_value.left_index() < target_index && target_index < parsed_lists_meta_value.right_index()) { + ListsDataKey lists_data_key(key, version, target_index); + s = db_->Get(read_options, handles_[1], lists_data_key.Encode(), &tmp_element); + if (s.ok()) { + *element = tmp_element; + } + } else { + return Status::NotFound(); + } + } + } + return s; +} + +Status RedisLists::LInsert(const Slice& key, const BeforeOrAfter& before_or_after, const std::string& pivot, + const std::string& value, int64_t* ret) { + *ret = 0; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + bool find_pivot = false; + uint64_t pivot_index = 0; + int32_t version = parsed_lists_meta_value.version(); + uint64_t current_index = parsed_lists_meta_value.left_index() + 1; + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[1]); + ListsDataKey start_data_key(key, version, current_index); + for (iter->Seek(start_data_key.Encode()); iter->Valid() && current_index < parsed_lists_meta_value.right_index(); + iter->Next(), current_index++) { + if (iter->value() == Slice(pivot)) { + find_pivot = true; + pivot_index = current_index; + break; + } + } + delete iter; + if (!find_pivot) { + *ret = -1; + return Status::NotFound(); + } else { + uint64_t target_index; + std::vector list_nodes; + uint64_t mid_index = parsed_lists_meta_value.left_index() + + (parsed_lists_meta_value.right_index() - parsed_lists_meta_value.left_index()) / 2; + if (pivot_index <= mid_index) { + target_index = (before_or_after == Before) ? pivot_index - 1 : pivot_index; + current_index = parsed_lists_meta_value.left_index() + 1; + rocksdb::Iterator* first_half_iter = db_->NewIterator(default_read_options_, handles_[1]); + ListsDataKey start_data_key(key, version, current_index); + for (first_half_iter->Seek(start_data_key.Encode()); first_half_iter->Valid() && current_index <= pivot_index; + first_half_iter->Next(), current_index++) { + if (current_index == pivot_index) { + if (before_or_after == After) { + list_nodes.push_back(first_half_iter->value().ToString()); + } + break; + } + list_nodes.push_back(first_half_iter->value().ToString()); + } + delete first_half_iter; + + current_index = parsed_lists_meta_value.left_index(); + for (const auto& node : list_nodes) { + ListsDataKey lists_data_key(key, version, current_index++); + batch.Put(handles_[1], lists_data_key.Encode(), node); + } + parsed_lists_meta_value.ModifyLeftIndex(1); + } else { + target_index = (before_or_after == Before) ? pivot_index : pivot_index + 1; + current_index = pivot_index; + rocksdb::Iterator* after_half_iter = db_->NewIterator(default_read_options_, handles_[1]); + ListsDataKey start_data_key(key, version, current_index); + for (after_half_iter->Seek(start_data_key.Encode()); + after_half_iter->Valid() && current_index < parsed_lists_meta_value.right_index(); + after_half_iter->Next(), current_index++) { + if (current_index == pivot_index && before_or_after == BeforeOrAfter::After) { + continue; + } + list_nodes.push_back(after_half_iter->value().ToString()); + } + delete after_half_iter; + + current_index = target_index + 1; + for (const auto& node : list_nodes) { + ListsDataKey lists_data_key(key, version, current_index++); + batch.Put(handles_[1], lists_data_key.Encode(), node); + } + parsed_lists_meta_value.ModifyRightIndex(1); + } + parsed_lists_meta_value.ModifyCount(1); + batch.Put(handles_[0], key, meta_value); + ListsDataKey lists_target_key(key, version, target_index); + batch.Put(handles_[1], lists_target_key.Encode(), value); + *ret = static_cast(parsed_lists_meta_value.count()); + return db_->Write(default_write_options_, &batch); + } + } + } else if (s.IsNotFound()) { + *ret = 0; + } + return s; +} + +Status RedisLists::LLen(const Slice& key, uint64_t* len) { + *len = 0; + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + *len = parsed_lists_meta_value.count(); + return s; + } + } + return s; +} + +Status RedisLists::LPop(const Slice& key, int64_t count, std::vector* elements) { + uint32_t statistic = 0; + elements->clear(); + + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + auto size = static_cast(parsed_lists_meta_value.count()); + int32_t version = parsed_lists_meta_value.version(); + int32_t start_index = 0; + auto stop_index = static_cast(count<=size?count-1:size-1); + int32_t cur_index = 0; + ListsDataKey lists_data_key(key, version, parsed_lists_meta_value.left_index()+1); + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[1]); + for (iter->Seek(lists_data_key.Encode()); iter->Valid() && cur_index <= stop_index; iter->Next(), ++cur_index) { + statistic++; + elements->push_back(iter->value().ToString()); + batch.Delete(handles_[1],iter->key()); + + parsed_lists_meta_value.ModifyCount(-1); + parsed_lists_meta_value.ModifyLeftIndex(-1); + } + batch.Put(handles_[0], key, meta_value); + delete iter; + } + } + if (batch.Count() != 0U) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + batch.Clear(); + } + UpdateSpecificKeyStatistics(key.ToString(), statistic); + } + return s; +} + +Status RedisLists::LPush(const Slice& key, const std::vector& values, uint64_t* ret) { + *ret = 0; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + uint64_t index = 0; + int32_t version = 0; + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale() || parsed_lists_meta_value.count() == 0) { + version = parsed_lists_meta_value.InitialMetaValue(); + } else { + version = parsed_lists_meta_value.version(); + } + for (const auto& value : values) { + index = parsed_lists_meta_value.left_index(); + parsed_lists_meta_value.ModifyLeftIndex(1); + parsed_lists_meta_value.ModifyCount(1); + ListsDataKey lists_data_key(key, version, index); + batch.Put(handles_[1], lists_data_key.Encode(), value); + } + batch.Put(handles_[0], key, meta_value); + *ret = parsed_lists_meta_value.count(); + } else if (s.IsNotFound()) { + char str[8]; + EncodeFixed64(str, values.size()); + ListsMetaValue lists_meta_value(Slice(str, sizeof(uint64_t))); + version = lists_meta_value.UpdateVersion(); + for (const auto& value : values) { + index = lists_meta_value.left_index(); + lists_meta_value.ModifyLeftIndex(1); + ListsDataKey lists_data_key(key, version, index); + batch.Put(handles_[1], lists_data_key.Encode(), value); + } + batch.Put(handles_[0], key, lists_meta_value.Encode()); + *ret = lists_meta_value.right_index() - lists_meta_value.left_index() - 1; + } else { + return s; + } + return db_->Write(default_write_options_, &batch); +} + +Status RedisLists::LPushx(const Slice& key, const std::vector& values, uint64_t* len) { + *len = 0; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t version = parsed_lists_meta_value.version(); + for (const auto& value : values) { + uint64_t index = parsed_lists_meta_value.left_index(); + parsed_lists_meta_value.ModifyCount(1); + parsed_lists_meta_value.ModifyLeftIndex(1); + ListsDataKey lists_data_key(key, version, index); + batch.Put(handles_[1], lists_data_key.Encode(), value); + } + batch.Put(handles_[0], key, meta_value); + *len = parsed_lists_meta_value.count(); + return db_->Write(default_write_options_, &batch); + } + } + return s; +} + +Status RedisLists::LRange(const Slice& key, int64_t start, int64_t stop, std::vector* ret) { + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + + std::string meta_value; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t version = parsed_lists_meta_value.version(); + uint64_t origin_left_index = parsed_lists_meta_value.left_index() + 1; + uint64_t origin_right_index = parsed_lists_meta_value.right_index() - 1; + uint64_t sublist_left_index = start >= 0 ? origin_left_index + start : origin_right_index + start + 1; + uint64_t sublist_right_index = stop >= 0 ? origin_left_index + stop : origin_right_index + stop + 1; + + if (sublist_left_index > sublist_right_index || sublist_left_index > origin_right_index || + sublist_right_index < origin_left_index) { + return Status::OK(); + } else { + if (sublist_left_index < origin_left_index) { + sublist_left_index = origin_left_index; + } + if (sublist_right_index > origin_right_index) { + sublist_right_index = origin_right_index; + } + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[1]); + uint64_t current_index = sublist_left_index; + ListsDataKey start_data_key(key, version, current_index); + for (iter->Seek(start_data_key.Encode()); iter->Valid() && current_index <= sublist_right_index; + iter->Next(), current_index++) { + ret->push_back(iter->value().ToString()); + } + delete iter; + return Status::OK(); + } + } + } else { + return s; + } +} + +Status RedisLists::LRem(const Slice& key, int64_t count, const Slice& value, uint64_t* ret) { + *ret = 0; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + uint64_t current_index; + std::vector target_index; + std::vector delete_index; + uint64_t rest = (count < 0) ? -count : count; + int32_t version = parsed_lists_meta_value.version(); + uint64_t start_index = parsed_lists_meta_value.left_index() + 1; + uint64_t stop_index = parsed_lists_meta_value.right_index() - 1; + ListsDataKey start_data_key(key, version, start_index); + ListsDataKey stop_data_key(key, version, stop_index); + if (count >= 0) { + current_index = start_index; + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[1]); + for (iter->Seek(start_data_key.Encode()); + iter->Valid() && current_index <= stop_index && ((count == 0) || rest != 0); + iter->Next(), current_index++) { + if (iter->value() == value) { + target_index.push_back(current_index); + if (count != 0) { + rest--; + } + } + } + delete iter; + } else { + current_index = stop_index; + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[1]); + for (iter->Seek(stop_data_key.Encode()); + iter->Valid() && current_index >= start_index && ((count == 0) || rest != 0); + iter->Prev(), current_index--) { + if (iter->value() == value) { + target_index.push_back(current_index); + if (count != 0) { + rest--; + } + } + } + delete iter; + } + if (target_index.empty()) { + *ret = 0; + return Status::NotFound(); + } else { + rest = target_index.size(); + uint64_t sublist_left_index = (count >= 0) ? target_index[0] : target_index[target_index.size() - 1]; + uint64_t sublist_right_index = (count >= 0) ? target_index[target_index.size() - 1] : target_index[0]; + uint64_t left_part_len = sublist_right_index - start_index; + uint64_t right_part_len = stop_index - sublist_left_index; + if (left_part_len <= right_part_len) { + uint64_t left = sublist_right_index; + current_index = sublist_right_index; + ListsDataKey sublist_right_key(key, version, sublist_right_index); + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[1]); + for (iter->Seek(sublist_right_key.Encode()); iter->Valid() && current_index >= start_index; + iter->Prev(), current_index--) { + if ((iter->value() == value) && rest > 0) { + rest--; + } else { + ListsDataKey lists_data_key(key, version, left--); + batch.Put(handles_[1], lists_data_key.Encode(), iter->value()); + } + } + delete iter; + uint64_t left_index = parsed_lists_meta_value.left_index(); + for (uint64_t idx = 0; idx < target_index.size(); ++idx) { + delete_index.push_back(left_index + idx + 1); + } + parsed_lists_meta_value.ModifyLeftIndex(-target_index.size()); + } else { + uint64_t right = sublist_left_index; + current_index = sublist_left_index; + ListsDataKey sublist_left_key(key, version, sublist_left_index); + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[1]); + for (iter->Seek(sublist_left_key.Encode()); iter->Valid() && current_index <= stop_index; + iter->Next(), current_index++) { + if ((iter->value() == value) && rest > 0) { + rest--; + } else { + ListsDataKey lists_data_key(key, version, right++); + batch.Put(handles_[1], lists_data_key.Encode(), iter->value()); + } + } + delete iter; + uint64_t right_index = parsed_lists_meta_value.right_index(); + for (uint64_t idx = 0; idx < target_index.size(); ++idx) { + delete_index.push_back(right_index - idx - 1); + } + parsed_lists_meta_value.ModifyRightIndex(-target_index.size()); + } + parsed_lists_meta_value.ModifyCount(-target_index.size()); + batch.Put(handles_[0], key, meta_value); + for (const auto& idx : delete_index) { + ListsDataKey lists_data_key(key, version, idx); + batch.Delete(handles_[1], lists_data_key.Encode()); + } + *ret = target_index.size(); + return db_->Write(default_write_options_, &batch); + } + } + } else if (s.IsNotFound()) { + *ret = 0; + } + return s; +} + +Status RedisLists::LSet(const Slice& key, int64_t index, const Slice& value) { + uint32_t statistic = 0; + ScopeRecordLock l(lock_mgr_, key); + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t version = parsed_lists_meta_value.version(); + uint64_t target_index = + index >= 0 ? parsed_lists_meta_value.left_index() + index + 1 : parsed_lists_meta_value.right_index() + index; + if (target_index <= parsed_lists_meta_value.left_index() || + target_index >= parsed_lists_meta_value.right_index()) { + return Status::Corruption("index out of range"); + } + ListsDataKey lists_data_key(key, version, target_index); + s = db_->Put(default_write_options_, handles_[1], lists_data_key.Encode(), value); + statistic++; + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; + } + } + return s; +} + +Status RedisLists::LTrim(const Slice& key, int64_t start, int64_t stop) { + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + uint32_t statistic = 0; + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + int32_t version = parsed_lists_meta_value.version(); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + uint64_t origin_left_index = parsed_lists_meta_value.left_index() + 1; + uint64_t origin_right_index = parsed_lists_meta_value.right_index() - 1; + uint64_t sublist_left_index = start >= 0 ? origin_left_index + start : origin_right_index + start + 1; + uint64_t sublist_right_index = stop >= 0 ? origin_left_index + stop : origin_right_index + stop + 1; + + if (sublist_left_index > sublist_right_index || sublist_left_index > origin_right_index || + sublist_right_index < origin_left_index) { + parsed_lists_meta_value.InitialMetaValue(); + batch.Put(handles_[0], key, meta_value); + } else { + if (sublist_left_index < origin_left_index) { + sublist_left_index = origin_left_index; + } + + if (sublist_right_index > origin_right_index) { + sublist_right_index = origin_right_index; + } + + uint64_t delete_node_num = + (sublist_left_index - origin_left_index) + (origin_right_index - sublist_right_index); + parsed_lists_meta_value.ModifyLeftIndex(-(sublist_left_index - origin_left_index)); + parsed_lists_meta_value.ModifyRightIndex(-(origin_right_index - sublist_right_index)); + parsed_lists_meta_value.ModifyCount(-delete_node_num); + batch.Put(handles_[0], key, meta_value); + for (uint64_t idx = origin_left_index; idx < sublist_left_index; ++idx) { + statistic++; + ListsDataKey lists_data_key(key, version, idx); + batch.Delete(handles_[1], lists_data_key.Encode()); + } + for (uint64_t idx = origin_right_index; idx > sublist_right_index; --idx) { + statistic++; + ListsDataKey lists_data_key(key, version, idx); + batch.Delete(handles_[1], lists_data_key.Encode()); + } + } + } + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisLists::RPop(const Slice& key, int64_t count, std::vector* elements) { + uint32_t statistic = 0; + elements->clear(); + + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + auto size = static_cast(parsed_lists_meta_value.count()); + int32_t version = parsed_lists_meta_value.version(); + int32_t start_index = 0; + auto stop_index = static_cast(count<=size?count-1:size-1); + int32_t cur_index = 0; + ListsDataKey lists_data_key(key, version, parsed_lists_meta_value.right_index()-1); + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[1]); + for (iter->SeekForPrev(lists_data_key.Encode()); iter->Valid() && cur_index <= stop_index; iter->Prev(), ++cur_index) { + statistic++; + elements->push_back(iter->value().ToString()); + batch.Delete(handles_[1],iter->key()); + + parsed_lists_meta_value.ModifyCount(-1); + parsed_lists_meta_value.ModifyRightIndex(-1); + } + batch.Put(handles_[0], key, meta_value); + delete iter; + } + } + if (batch.Count() != 0U) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + batch.Clear(); + } + UpdateSpecificKeyStatistics(key.ToString(), statistic); + } + return s; +} + +Status RedisLists::RPoplpush(const Slice& source, const Slice& destination, std::string* element) { + element->clear(); + uint32_t statistic = 0; + Status s; + rocksdb::WriteBatch batch; + MultiScopeRecordLock l(lock_mgr_, {source.ToString(), destination.ToString()}); + if (source.compare(destination) == 0) { + std::string meta_value; + s = db_->Get(default_read_options_, handles_[0], source, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + std::string target; + int32_t version = parsed_lists_meta_value.version(); + uint64_t last_node_index = parsed_lists_meta_value.right_index() - 1; + ListsDataKey lists_data_key(source, version, last_node_index); + s = db_->Get(default_read_options_, handles_[1], lists_data_key.Encode(), &target); + if (s.ok()) { + *element = target; + if (parsed_lists_meta_value.count() == 1) { + return Status::OK(); + } else { + uint64_t target_index = parsed_lists_meta_value.left_index(); + ListsDataKey lists_target_key(source, version, target_index); + batch.Delete(handles_[1], lists_data_key.Encode()); + batch.Put(handles_[1], lists_target_key.Encode(), target); + statistic++; + parsed_lists_meta_value.ModifyRightIndex(-1); + parsed_lists_meta_value.ModifyLeftIndex(1); + batch.Put(handles_[0], source, meta_value); + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(source.ToString(), statistic); + return s; + } + } else { + return s; + } + } + } else { + return s; + } + } + + int32_t version; + std::string target; + std::string source_meta_value; + s = db_->Get(default_read_options_, handles_[0], source, &source_meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&source_meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + version = parsed_lists_meta_value.version(); + uint64_t last_node_index = parsed_lists_meta_value.right_index() - 1; + ListsDataKey lists_data_key(source, version, last_node_index); + s = db_->Get(default_read_options_, handles_[1], lists_data_key.Encode(), &target); + if (s.ok()) { + batch.Delete(handles_[1], lists_data_key.Encode()); + statistic++; + parsed_lists_meta_value.ModifyCount(-1); + parsed_lists_meta_value.ModifyRightIndex(-1); + batch.Put(handles_[0], source, source_meta_value); + } else { + return s; + } + } + } else { + return s; + } + + std::string destination_meta_value; + s = db_->Get(default_read_options_, handles_[0], destination, &destination_meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&destination_meta_value); + if (parsed_lists_meta_value.IsStale() || parsed_lists_meta_value.count() == 0) { + version = parsed_lists_meta_value.InitialMetaValue(); + } else { + version = parsed_lists_meta_value.version(); + } + uint64_t target_index = parsed_lists_meta_value.left_index(); + ListsDataKey lists_data_key(destination, version, target_index); + batch.Put(handles_[1], lists_data_key.Encode(), target); + parsed_lists_meta_value.ModifyCount(1); + parsed_lists_meta_value.ModifyLeftIndex(1); + batch.Put(handles_[0], destination, destination_meta_value); + } else if (s.IsNotFound()) { + char str[8]; + EncodeFixed64(str, 1); + ListsMetaValue lists_meta_value(Slice(str, sizeof(uint64_t))); + version = lists_meta_value.UpdateVersion(); + uint64_t target_index = lists_meta_value.left_index(); + ListsDataKey lists_data_key(destination, version, target_index); + batch.Put(handles_[1], lists_data_key.Encode(), target); + lists_meta_value.ModifyLeftIndex(1); + batch.Put(handles_[0], destination, lists_meta_value.Encode()); + } else { + return s; + } + + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(source.ToString(), statistic); + if (s.ok()) { + *element = target; + } + return s; +} + +Status RedisLists::RPush(const Slice& key, const std::vector& values, uint64_t* ret) { + *ret = 0; + rocksdb::WriteBatch batch; + + uint64_t index = 0; + int32_t version = 0; + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale() || parsed_lists_meta_value.count() == 0) { + version = parsed_lists_meta_value.InitialMetaValue(); + } else { + version = parsed_lists_meta_value.version(); + } + for (const auto& value : values) { + index = parsed_lists_meta_value.right_index(); + parsed_lists_meta_value.ModifyRightIndex(1); + parsed_lists_meta_value.ModifyCount(1); + ListsDataKey lists_data_key(key, version, index); + batch.Put(handles_[1], lists_data_key.Encode(), value); + } + batch.Put(handles_[0], key, meta_value); + *ret = parsed_lists_meta_value.count(); + } else if (s.IsNotFound()) { + char str[8]; + EncodeFixed64(str, values.size()); + ListsMetaValue lists_meta_value(Slice(str, sizeof(uint64_t))); + version = lists_meta_value.UpdateVersion(); + for (const auto& value : values) { + index = lists_meta_value.right_index(); + lists_meta_value.ModifyRightIndex(1); + ListsDataKey lists_data_key(key, version, index); + batch.Put(handles_[1], lists_data_key.Encode(), value); + } + batch.Put(handles_[0], key, lists_meta_value.Encode()); + *ret = lists_meta_value.right_index() - lists_meta_value.left_index() - 1; + } else { + return s; + } + return db_->Write(default_write_options_, &batch); +} + +Status RedisLists::RPushx(const Slice& key, const std::vector& values, uint64_t* len) { + *len = 0; + rocksdb::WriteBatch batch; + + ScopeRecordLock l(lock_mgr_, key); + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t version = parsed_lists_meta_value.version(); + for (const auto& value : values) { + uint64_t index = parsed_lists_meta_value.right_index(); + parsed_lists_meta_value.ModifyCount(1); + parsed_lists_meta_value.ModifyRightIndex(1); + ListsDataKey lists_data_key(key, version, index); + batch.Put(handles_[1], lists_data_key.Encode(), value); + } + batch.Put(handles_[0], key, meta_value); + *len = parsed_lists_meta_value.count(); + return db_->Write(default_write_options_, &batch); + } + } + return s; +} + +Status RedisLists::PKScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key) { + next_key->clear(); + + std::string key; + int32_t remain = limit; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + bool start_no_limit = key_start.compare("") == 0; + bool end_no_limit = key_end.compare("") == 0; + + if (!start_no_limit && !end_no_limit && (key_start.compare(key_end) > 0)) { + return Status::InvalidArgument("error in given range"); + } + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + if (start_no_limit) { + it->SeekToFirst(); + } else { + it->Seek(key_start); + } + + while (it->Valid() && remain > 0 && (end_no_limit || it->key().compare(key_end) <= 0)) { + ParsedListsMetaValue parsed_lists_meta_value(it->value()); + if (parsed_lists_meta_value.IsStale() || parsed_lists_meta_value.count() == 0) { + it->Next(); + } else { + key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + remain--; + it->Next(); + } + } + + while (it->Valid() && (end_no_limit || it->key().compare(key_end) <= 0)) { + ParsedListsMetaValue parsed_lists_meta_value(it->value()); + if (parsed_lists_meta_value.IsStale() || parsed_lists_meta_value.count() == 0) { + it->Next(); + } else { + *next_key = it->key().ToString(); + break; + } + } + delete it; + return Status::OK(); +} + +Status RedisLists::PKRScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key) { + next_key->clear(); + + std::string key; + int32_t remain = limit; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + bool start_no_limit = key_start.compare("") == 0; + bool end_no_limit = key_end.compare("") == 0; + + if (!start_no_limit && !end_no_limit && (key_start.compare(key_end) < 0)) { + return Status::InvalidArgument("error in given range"); + } + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + if (start_no_limit) { + it->SeekToLast(); + } else { + it->SeekForPrev(key_start); + } + + while (it->Valid() && remain > 0 && (end_no_limit || it->key().compare(key_end) >= 0)) { + ParsedListsMetaValue parsed_lists_meta_value(it->value()); + if (parsed_lists_meta_value.IsStale() || parsed_lists_meta_value.count() == 0) { + it->Prev(); + } else { + key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + remain--; + it->Prev(); + } + } + + while (it->Valid() && (end_no_limit || it->key().compare(key_end) >= 0)) { + ParsedListsMetaValue parsed_lists_meta_value(it->value()); + if (parsed_lists_meta_value.IsStale() || parsed_lists_meta_value.count() == 0) { + it->Prev(); + } else { + *next_key = it->key().ToString(); + break; + } + } + delete it; + return Status::OK(); +} + +Status RedisLists::Expire(const Slice& key, int32_t ttl) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } + + if (ttl > 0) { + parsed_lists_meta_value.SetRelativeTimestamp(ttl); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + } else { + parsed_lists_meta_value.InitialMetaValue(); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + return s; +} + +Status RedisLists::Del(const Slice& key) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + uint32_t statistic = parsed_lists_meta_value.count(); + parsed_lists_meta_value.InitialMetaValue(); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + } + } + return s; +} + +bool RedisLists::Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, + int64_t* count, std::string* next_key) { + std::string meta_key; + bool is_finish = true; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + + it->Seek(start_key); + while (it->Valid() && (*count) > 0) { + ParsedListsMetaValue parsed_lists_meta_value(it->value()); + if (parsed_lists_meta_value.IsStale() || parsed_lists_meta_value.count() == 0) { + it->Next(); + continue; + } else { + meta_key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), meta_key.data(), meta_key.size(), 0) != 0) { + keys->push_back(meta_key); + } + (*count)--; + it->Next(); + } + } + + std::string prefix = isTailWildcard(pattern) ? pattern.substr(0, pattern.size() - 1) : ""; + if (it->Valid() && (it->key().compare(prefix) <= 0 || it->key().starts_with(prefix))) { + *next_key = it->key().ToString(); + is_finish = false; + } else { + *next_key = ""; + } + delete it; + return is_finish; +} + +bool RedisLists::PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) { + bool is_finish = true; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + it->Seek(start_key); + while (it->Valid() && (*leftover_visits) > 0) { + ParsedListsMetaValue parsed_lists_meta_value(it->value()); + if (parsed_lists_meta_value.IsStale() || parsed_lists_meta_value.count() == 0) { + it->Next(); + continue; + } else { + if (min_timestamp < parsed_lists_meta_value.timestamp() && parsed_lists_meta_value.timestamp() < max_timestamp) { + keys->push_back(it->key().ToString()); + } + (*leftover_visits)--; + it->Next(); + } + } + + if (it->Valid()) { + is_finish = false; + *next_key = it->key().ToString(); + } else { + *next_key = ""; + } + delete it; + return is_finish; +} + +Status RedisLists::Expireat(const Slice& key, int32_t timestamp) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + if (timestamp > 0) { + parsed_lists_meta_value.set_timestamp(timestamp); + } else { + parsed_lists_meta_value.InitialMetaValue(); + } + return db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + return s; +} + +Status RedisLists::Persist(const Slice& key) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t timestamp = parsed_lists_meta_value.timestamp(); + if (timestamp == 0) { + return Status::NotFound("Not have an associated timeout"); + } else { + parsed_lists_meta_value.set_timestamp(0); + return db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + } + return s; +} + +Status RedisLists::TTL(const Slice& key, int64_t* timestamp) { + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedListsMetaValue parsed_lists_meta_value(&meta_value); + if (parsed_lists_meta_value.IsStale()) { + *timestamp = -2; + return Status::NotFound("Stale"); + } else if (parsed_lists_meta_value.count() == 0) { + *timestamp = -2; + return Status::NotFound(); + } else { + *timestamp = parsed_lists_meta_value.timestamp(); + if (*timestamp == 0) { + *timestamp = -1; + } else { + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + *timestamp = *timestamp - curtime >= 0 ? *timestamp - curtime : -2; + } + } + } else if (s.IsNotFound()) { + *timestamp = -2; + } + return s; +} + +void RedisLists::ScanDatabase() { + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + auto current_time = static_cast(time(nullptr)); + + LOG(INFO) << "***************List Meta Data***************"; + auto meta_iter = db_->NewIterator(iterator_options, handles_[0]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + ParsedListsMetaValue parsed_lists_meta_value(meta_iter->value()); + int32_t survival_time = 0; + if (parsed_lists_meta_value.timestamp() != 0) { + survival_time = parsed_lists_meta_value.timestamp() - current_time > 0 + ? parsed_lists_meta_value.timestamp() - current_time + : -1; + } + + LOG(INFO) << fmt::format( + "[key : {:<30}] [count : {:<10}] [left index : {:<10}] [right index : {:<10}] [timestamp : {:<10}] [version : " + "{}] [survival_time : {}]", + meta_iter->key().ToString(), parsed_lists_meta_value.count(), parsed_lists_meta_value.left_index(), + parsed_lists_meta_value.right_index(), parsed_lists_meta_value.timestamp(), parsed_lists_meta_value.version(), + survival_time); + } + delete meta_iter; + + LOG(INFO) << "***************List Node Data***************"; + auto data_iter = db_->NewIterator(iterator_options, handles_[1]); + for (data_iter->SeekToFirst(); data_iter->Valid(); data_iter->Next()) { + ParsedListsDataKey parsed_lists_data_key(data_iter->key()); + + LOG(INFO) << fmt::format("[key : {:<30}] [index : {:<10}] [data : {:<20}] [version : {}]", + parsed_lists_data_key.key().ToString(), parsed_lists_data_key.index(), + data_iter->value().ToString(), parsed_lists_data_key.version()); + } + delete data_iter; +} + +} // namespace storage diff --git a/src/storage/src/redis_lists.h b/src/storage/src/redis_lists.h new file mode 100644 index 000000000..0a73a43bd --- /dev/null +++ b/src/storage/src/redis_lists.h @@ -0,0 +1,67 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "src/custom_comparator.h" +#include "src/redis.h" + +namespace storage { + +class RedisLists : public Redis { + public: + RedisLists(Storage* s, const DataType& type); + ~RedisLists() override = default; + + // Common commands + Status Open(const StorageOptions& storage_options, const std::string& db_path) override; + Status CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, + const ColumnFamilyType& type = kMetaAndData) override; + Status GetProperty(const std::string& property, uint64_t* out) override; + Status ScanKeyNum(KeyInfo* key_info) override; + Status ScanKeys(const std::string& pattern, std::vector* keys) override; + Status PKPatternMatchDel(const std::string& pattern, int32_t* ret) override; + + // Lists commands; + Status LIndex(const Slice& key, int64_t index, std::string* element); + Status LInsert(const Slice& key, const BeforeOrAfter& before_or_after, const std::string& pivot, + const std::string& value, int64_t* ret); + Status LLen(const Slice& key, uint64_t* len); + Status LPop(const Slice& key, int64_t count, std::vector* elements); + Status LPush(const Slice& key, const std::vector& values, uint64_t* ret); + Status LPushx(const Slice& key, const std::vector& values, uint64_t* len); + Status LRange(const Slice& key, int64_t start, int64_t stop, std::vector* ret); + Status LRem(const Slice& key, int64_t count, const Slice& value, uint64_t* ret); + Status LSet(const Slice& key, int64_t index, const Slice& value); + Status LTrim(const Slice& key, int64_t start, int64_t stop); + Status RPop(const Slice& key, int64_t count, std::vector* elements); + Status RPoplpush(const Slice& source, const Slice& destination, std::string* element); + Status RPush(const Slice& key, const std::vector& values, uint64_t* ret); + Status RPushx(const Slice& key, const std::vector& values, uint64_t* len); + Status PKScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key); + Status PKRScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key); + + // Keys Commands + Status Expire(const Slice& key, int32_t ttl) override; + Status Del(const Slice& key) override; + bool Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, int64_t* count, + std::string* next_key) override; + bool PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) override; + Status Expireat(const Slice& key, int32_t timestamp) override; + Status Persist(const Slice& key) override; + Status TTL(const Slice& key, int64_t* timestamp) override; + + // Iterate all data + void ScanDatabase(); +}; + +} // namespace storage diff --git a/src/storage/src/redis_sets.cc b/src/storage/src/redis_sets.cc new file mode 100644 index 000000000..308e6278f --- /dev/null +++ b/src/storage/src/redis_sets.cc @@ -0,0 +1,1538 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "src/redis_sets.h" + +#include +#include +#include +#include + +#include +#include + +#include "src/base_filter.h" +#include "src/scope_record_lock.h" +#include "src/scope_snapshot.h" +#include "storage/util.h" + +namespace storage { + +RedisSets::RedisSets(Storage* const s, const DataType& type) : Redis(s, type) { + spop_counts_store_ = std::make_unique>(); + spop_counts_store_->SetCapacity(1000); +} + +RedisSets::~RedisSets() = default; + +rocksdb::Status RedisSets::Open(const StorageOptions& storage_options, const std::string& db_path) { + statistics_store_->SetCapacity(storage_options.statistics_max_size); + small_compaction_threshold_ = storage_options.small_compaction_threshold; + + rocksdb::Options ops(storage_options.options); + rocksdb::Status s = rocksdb::DB::Open(ops, db_path, &db_); + if (s.ok()) { + // create column family + rocksdb::ColumnFamilyHandle* cf; + rocksdb::ColumnFamilyOptions cfo; + s = db_->CreateColumnFamily(cfo, "member_cf", &cf); + if (!s.ok()) { + return s; + } + // close DB + delete cf; + delete db_; + } + + // Open + rocksdb::DBOptions db_ops(storage_options.options); + rocksdb::ColumnFamilyOptions meta_cf_ops(storage_options.options); + rocksdb::ColumnFamilyOptions member_cf_ops(storage_options.options); + meta_cf_ops.compaction_filter_factory = std::make_shared(); + member_cf_ops.compaction_filter_factory = std::make_shared(&db_, &handles_); + + // use the bloom filter policy to reduce disk reads + rocksdb::BlockBasedTableOptions table_ops(storage_options.table_options); + table_ops.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); + rocksdb::BlockBasedTableOptions meta_cf_table_ops(table_ops); + rocksdb::BlockBasedTableOptions member_cf_table_ops(table_ops); + if (!storage_options.share_block_cache && storage_options.block_cache_size > 0) { + meta_cf_table_ops.block_cache = rocksdb::NewLRUCache(storage_options.block_cache_size); + member_cf_table_ops.block_cache = rocksdb::NewLRUCache(storage_options.block_cache_size); + } + meta_cf_ops.table_factory.reset(rocksdb::NewBlockBasedTableFactory(meta_cf_table_ops)); + member_cf_ops.table_factory.reset(rocksdb::NewBlockBasedTableFactory(member_cf_table_ops)); + + std::vector column_families; + // Meta CF + column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, meta_cf_ops); + // Member CF + column_families.emplace_back("member_cf", member_cf_ops); + return rocksdb::DB::Open(db_ops, db_path, column_families, &handles_, &db_); +} + +rocksdb::Status RedisSets::CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, const ColumnFamilyType& type) { + if (type == kMeta || type == kMetaAndData) { + db_->CompactRange(default_compact_range_options_, handles_[0], begin, end); + } + if (type == kData || type == kMetaAndData) { + db_->CompactRange(default_compact_range_options_, handles_[1], begin, end); + } + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::GetProperty(const std::string& property, uint64_t* out) { + std::string value; + db_->GetProperty(handles_[0], property, &value); + *out = std::strtoull(value.c_str(), nullptr, 10); + db_->GetProperty(handles_[1], property, &value); + *out += std::strtoull(value.c_str(), nullptr, 10); + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::ScanKeyNum(KeyInfo* key_info) { + uint64_t keys = 0; + uint64_t expires = 0; + uint64_t ttl_sum = 0; + uint64_t invaild_keys = 0; + + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedSetsMetaValue parsed_sets_meta_value(iter->value()); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + invaild_keys++; + } else { + keys++; + if (!parsed_sets_meta_value.IsPermanentSurvival()) { + expires++; + ttl_sum += parsed_sets_meta_value.timestamp() - curtime; + } + } + } + delete iter; + + key_info->keys = keys; + key_info->expires = expires; + key_info->avg_ttl = (expires != 0) ? ttl_sum / expires : 0; + key_info->invaild_keys = invaild_keys; + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::ScanKeys(const std::string& pattern, std::vector* keys) { + std::string key; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedSetsMetaValue parsed_sets_meta_value(iter->value()); + if (!parsed_sets_meta_value.IsStale() && parsed_sets_meta_value.count() != 0) { + key = iter->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + } + } + delete iter; + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::PKPatternMatchDel(const std::string& pattern, int32_t* ret) { + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + std::string key; + std::string meta_value; + int32_t total_delete = 0; + rocksdb::Status s; + rocksdb::WriteBatch batch; + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + iter->SeekToFirst(); + while (iter->Valid()) { + key = iter->key().ToString(); + meta_value = iter->value().ToString(); + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (!parsed_sets_meta_value.IsStale() && (parsed_sets_meta_value.count() != 0) && + (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0)) { + parsed_sets_meta_value.InitialMetaValue(); + batch.Put(handles_[0], key, meta_value); + } + if (static_cast(batch.Count()) >= BATCH_DELETE_LIMIT) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + total_delete += static_cast(batch.Count()); + batch.Clear(); + } else { + *ret = total_delete; + return s; + } + } + iter->Next(); + } + if (batch.Count() != 0U) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + total_delete += static_cast(batch.Count()); + batch.Clear(); + } + } + + *ret = total_delete; + return s; +} + +rocksdb::Status RedisSets::SAdd(const Slice& key, const std::vector& members, int32_t* ret) { + std::unordered_set unique; + std::vector filtered_members; + for (const auto& member : members) { + if (unique.find(member) == unique.end()) { + unique.insert(member); + filtered_members.push_back(member); + } + } + + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + int32_t version = 0; + std::string meta_value; + rocksdb::Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + version = parsed_sets_meta_value.InitialMetaValue(); + if (!parsed_sets_meta_value.check_set_count(static_cast(filtered_members.size()))) { + return Status::InvalidArgument("set size overflow"); + } + parsed_sets_meta_value.set_count(static_cast(filtered_members.size())); + batch.Put(handles_[0], key, meta_value); + for (const auto& member : filtered_members) { + SetsMemberKey sets_member_key(key, version, member); + batch.Put(handles_[1], sets_member_key.Encode(), Slice()); + } + *ret = static_cast(filtered_members.size()); + } else { + int32_t cnt = 0; + std::string member_value; + version = parsed_sets_meta_value.version(); + for (const auto& member : filtered_members) { + SetsMemberKey sets_member_key(key, version, member); + s = db_->Get(default_read_options_, handles_[1], sets_member_key.Encode(), &member_value); + if (s.ok()) { + } else if (s.IsNotFound()) { + cnt++; + batch.Put(handles_[1], sets_member_key.Encode(), Slice()); + } else { + return s; + } + } + *ret = cnt; + if (cnt == 0) { + return rocksdb::Status::OK(); + } else { + if (!parsed_sets_meta_value.CheckModifyCount(cnt)){ + return Status::InvalidArgument("set size overflow"); + } + parsed_sets_meta_value.ModifyCount(cnt); + batch.Put(handles_[0], key, meta_value); + } + } + } else if (s.IsNotFound()) { + char str[4]; + EncodeFixed32(str, filtered_members.size()); + SetsMetaValue sets_meta_value(Slice(str, sizeof(int32_t))); + version = sets_meta_value.UpdateVersion(); + batch.Put(handles_[0], key, sets_meta_value.Encode()); + for (const auto& member : filtered_members) { + SetsMemberKey sets_member_key(key, version, member); + batch.Put(handles_[1], sets_member_key.Encode(), Slice()); + } + *ret = static_cast(filtered_members.size()); + } else { + return s; + } + return db_->Write(default_write_options_, &batch); +} + +rocksdb::Status RedisSets::SCard(const Slice& key, int32_t* ret) { + *ret = 0; + std::string meta_value; + rocksdb::Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return rocksdb::Status::NotFound("Stale"); + } else { + *ret = parsed_sets_meta_value.count(); + if (*ret == 0) { + return rocksdb::Status::NotFound("Deleted"); + } + } + } + return s; +} + +rocksdb::Status RedisSets::SDiff(const std::vector& keys, std::vector* members) { + if (keys.empty()) { + return rocksdb::Status::Corruption("SDiff invalid parameter, no keys"); + } + + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t version = 0; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + std::vector vaild_sets; + rocksdb::Status s; + + for (uint32_t idx = 1; idx < keys.size(); ++idx) { + s = db_->Get(read_options, handles_[0], keys[idx], &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (!parsed_sets_meta_value.IsStale() && parsed_sets_meta_value.count() != 0) { + vaild_sets.push_back({keys[idx], parsed_sets_meta_value.version()}); + } + } else if (!s.IsNotFound()) { + return s; + } + } + + s = db_->Get(read_options, handles_[0], keys[0], &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (!parsed_sets_meta_value.IsStale() && parsed_sets_meta_value.count() != 0) { + bool found; + Slice prefix; + std::string member_value; + version = parsed_sets_meta_value.version(); + SetsMemberKey sets_member_key(keys[0], version, Slice()); + prefix = sets_member_key.Encode(); + auto iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + Slice member = parsed_sets_member_key.member(); + + found = false; + for (const auto& key_version : vaild_sets) { + SetsMemberKey sets_member_key(key_version.key, key_version.version, member); + s = db_->Get(read_options, handles_[1], sets_member_key.Encode(), &member_value); + if (s.ok()) { + found = true; + break; + } else if (!s.IsNotFound()) { + delete iter; + return s; + } + } + if (!found) { + members->push_back(member.ToString()); + } + } + delete iter; + } + } else if (!s.IsNotFound()) { + return s; + } + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::SDiffstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret) { + if (keys.empty()) { + return rocksdb::Status::Corruption("SDiffsotre invalid parameter, no keys"); + } + + rocksdb::WriteBatch batch; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t version = 0; + ScopeRecordLock l(lock_mgr_, destination); + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + std::vector vaild_sets; + rocksdb::Status s; + + for (uint32_t idx = 1; idx < keys.size(); ++idx) { + s = db_->Get(read_options, handles_[0], keys[idx], &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (!parsed_sets_meta_value.IsStale() && parsed_sets_meta_value.count() != 0) { + vaild_sets.push_back({keys[idx], parsed_sets_meta_value.version()}); + } + } else if (!s.IsNotFound()) { + return s; + } + } + + std::vector members; + s = db_->Get(read_options, handles_[0], keys[0], &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (!parsed_sets_meta_value.IsStale() && parsed_sets_meta_value.count() != 0) { + bool found; + std::string member_value; + version = parsed_sets_meta_value.version(); + SetsMemberKey sets_member_key(keys[0], version, Slice()); + Slice prefix = sets_member_key.Encode(); + auto iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + Slice member = parsed_sets_member_key.member(); + + found = false; + for (const auto& key_version : vaild_sets) { + SetsMemberKey sets_member_key(key_version.key, key_version.version, member); + s = db_->Get(read_options, handles_[1], sets_member_key.Encode(), &member_value); + if (s.ok()) { + found = true; + break; + } else if (!s.IsNotFound()) { + delete iter; + return s; + } + } + if (!found) { + members.push_back(member.ToString()); + } + } + delete iter; + } + } else if (!s.IsNotFound()) { + return s; + } + + uint32_t statistic = 0; + s = db_->Get(read_options, handles_[0], destination, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + statistic = parsed_sets_meta_value.count(); + version = parsed_sets_meta_value.InitialMetaValue(); + if (!parsed_sets_meta_value.check_set_count(static_cast(members.size()))) { + return Status::InvalidArgument("set size overflow"); + } + parsed_sets_meta_value.set_count(static_cast(members.size())); + batch.Put(handles_[0], destination, meta_value); + } else if (s.IsNotFound()) { + char str[4]; + EncodeFixed32(str, members.size()); + SetsMetaValue sets_meta_value(Slice(str, sizeof(int32_t))); + version = sets_meta_value.UpdateVersion(); + batch.Put(handles_[0], destination, sets_meta_value.Encode()); + } else { + return s; + } + for (const auto& member : members) { + SetsMemberKey sets_member_key(destination, version, member); + batch.Put(handles_[1], sets_member_key.Encode(), Slice()); + } + *ret = static_cast(members.size()); + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(destination.ToString(), statistic); + value_to_dest = std::move(members); + return s; +} + +rocksdb::Status RedisSets::SInter(const std::vector& keys, std::vector* members) { + if (keys.empty()) { + return rocksdb::Status::Corruption("SInter invalid parameter, no keys"); + } + + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t version = 0; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + std::vector vaild_sets; + rocksdb::Status s; + + for (uint32_t idx = 1; idx < keys.size(); ++idx) { + s = db_->Get(read_options, handles_[0], keys[idx], &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::OK(); + } else { + vaild_sets.push_back({keys[idx], parsed_sets_meta_value.version()}); + } + } else if (s.IsNotFound()) { + return rocksdb::Status::OK(); + } else { + return s; + } + } + + s = db_->Get(read_options, handles_[0], keys[0], &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::OK(); + } else { + bool reliable; + std::string member_value; + version = parsed_sets_meta_value.version(); + SetsMemberKey sets_member_key(keys[0], version, Slice()); + Slice prefix = sets_member_key.Encode(); + auto iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + Slice member = parsed_sets_member_key.member(); + + reliable = true; + for (const auto& key_version : vaild_sets) { + SetsMemberKey sets_member_key(key_version.key, key_version.version, member); + s = db_->Get(read_options, handles_[1], sets_member_key.Encode(), &member_value); + if (s.ok()) { + continue; + } else if (s.IsNotFound()) { + reliable = false; + break; + } else { + delete iter; + return s; + } + } + if (reliable) { + members->push_back(member.ToString()); + } + } + delete iter; + } + } else if (s.IsNotFound()) { + return rocksdb::Status::OK(); + } else { + return s; + } + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::SInterstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret) { + if (keys.empty()) { + return rocksdb::Status::Corruption("SInterstore invalid parameter, no keys"); + } + + rocksdb::WriteBatch batch; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t version = 0; + bool have_invalid_sets = false; + ScopeRecordLock l(lock_mgr_, destination); + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + std::vector vaild_sets; + rocksdb::Status s; + + for (uint32_t idx = 1; idx < keys.size(); ++idx) { + s = db_->Get(read_options, handles_[0], keys[idx], &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + have_invalid_sets = true; + break; + } else { + vaild_sets.push_back({keys[idx], parsed_sets_meta_value.version()}); + } + } else if (s.IsNotFound()) { + have_invalid_sets = true; + break; + } else { + return s; + } + } + + std::vector members; + if (!have_invalid_sets) { + s = db_->Get(read_options, handles_[0], keys[0], &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + have_invalid_sets = true; + } else { + bool reliable; + std::string member_value; + version = parsed_sets_meta_value.version(); + SetsMemberKey sets_member_key(keys[0], version, Slice()); + Slice prefix = sets_member_key.Encode(); + auto iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + Slice member = parsed_sets_member_key.member(); + + reliable = true; + for (const auto& key_version : vaild_sets) { + SetsMemberKey sets_member_key(key_version.key, key_version.version, member); + s = db_->Get(read_options, handles_[1], sets_member_key.Encode(), &member_value); + if (s.ok()) { + continue; + } else if (s.IsNotFound()) { + reliable = false; + break; + } else { + delete iter; + return s; + } + } + if (reliable) { + members.push_back(member.ToString()); + } + } + delete iter; + } + } else if (s.IsNotFound()) { + } else { + return s; + } + } + + uint32_t statistic = 0; + s = db_->Get(read_options, handles_[0], destination, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + statistic = parsed_sets_meta_value.count(); + version = parsed_sets_meta_value.InitialMetaValue(); + if (!parsed_sets_meta_value.check_set_count(static_cast(members.size()))) { + return Status::InvalidArgument("set size overflow"); + } + parsed_sets_meta_value.set_count(static_cast(members.size())); + batch.Put(handles_[0], destination, meta_value); + } else if (s.IsNotFound()) { + char str[4]; + EncodeFixed32(str, members.size()); + SetsMetaValue sets_meta_value(Slice(str, sizeof(int32_t))); + version = sets_meta_value.UpdateVersion(); + batch.Put(handles_[0], destination, sets_meta_value.Encode()); + } else { + return s; + } + for (const auto& member : members) { + SetsMemberKey sets_member_key(destination, version, member); + batch.Put(handles_[1], sets_member_key.Encode(), Slice()); + } + *ret = static_cast(members.size()); + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(destination.ToString(), statistic); + value_to_dest = std::move(members); + return s; +} + +rocksdb::Status RedisSets::SIsmember(const Slice& key, const Slice& member, int32_t* ret) { + *ret = 0; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t version = 0; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + rocksdb::Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return rocksdb::Status::NotFound("Stale"); + } else if (parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::NotFound(); + } else { + std::string member_value; + version = parsed_sets_meta_value.version(); + SetsMemberKey sets_member_key(key, version, member); + s = db_->Get(read_options, handles_[1], sets_member_key.Encode(), &member_value); + *ret = s.ok() ? 1 : 0; + } + } else if (s.IsNotFound()) { + *ret = 0; + } + return s; +} + +rocksdb::Status RedisSets::SMembers(const Slice& key, std::vector* members) { + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t version = 0; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + rocksdb::Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return rocksdb::Status::NotFound("Stale"); + } else if (parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::NotFound(); + } else { + version = parsed_sets_meta_value.version(); + SetsMemberKey sets_member_key(key, version, Slice()); + Slice prefix = sets_member_key.Encode(); + auto iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + members->push_back(parsed_sets_member_key.member().ToString()); + } + delete iter; + } + } + return s; +} + +rocksdb::Status RedisSets::SMove(const Slice& source, const Slice& destination, const Slice& member, int32_t* ret) { + *ret = 0; + rocksdb::WriteBatch batch; + rocksdb::ReadOptions read_options; + + int32_t version = 0; + uint32_t statistic = 0; + std::string meta_value; + std::vector keys{source.ToString(), destination.ToString()}; + MultiScopeRecordLock ml(lock_mgr_, keys); + + if (source == destination) { + *ret = 1; + return rocksdb::Status::OK(); + } + + rocksdb::Status s = db_->Get(default_read_options_, handles_[0], source, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return rocksdb::Status::NotFound("Stale"); + } else if (parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::NotFound(); + } else { + std::string member_value; + version = parsed_sets_meta_value.version(); + SetsMemberKey sets_member_key(source, version, member); + s = db_->Get(default_read_options_, handles_[1], sets_member_key.Encode(), &member_value); + if (s.ok()) { + *ret = 1; + if (!parsed_sets_meta_value.CheckModifyCount(-1)){ + return Status::InvalidArgument("set size overflow"); + } + parsed_sets_meta_value.ModifyCount(-1); + batch.Put(handles_[0], source, meta_value); + batch.Delete(handles_[1], sets_member_key.Encode()); + statistic++; + } else if (s.IsNotFound()) { + *ret = 0; + return rocksdb::Status::NotFound(); + } else { + return s; + } + } + } else if (s.IsNotFound()) { + *ret = 0; + return rocksdb::Status::NotFound(); + } else { + return s; + } + + s = db_->Get(default_read_options_, handles_[0], destination, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + version = parsed_sets_meta_value.InitialMetaValue(); + parsed_sets_meta_value.set_count(1); + batch.Put(handles_[0], destination, meta_value); + SetsMemberKey sets_member_key(destination, version, member); + batch.Put(handles_[1], sets_member_key.Encode(), Slice()); + } else { + std::string member_value; + version = parsed_sets_meta_value.version(); + SetsMemberKey sets_member_key(destination, version, member); + s = db_->Get(default_read_options_, handles_[1], sets_member_key.Encode(), &member_value); + if (s.IsNotFound()) { + if (!parsed_sets_meta_value.CheckModifyCount(1)){ + return Status::InvalidArgument("set size overflow"); + } + parsed_sets_meta_value.ModifyCount(1); + batch.Put(handles_[0], destination, meta_value); + batch.Put(handles_[1], sets_member_key.Encode(), Slice()); + } else if (!s.ok()) { + return s; + } + } + } else if (s.IsNotFound()) { + char str[4]; + EncodeFixed32(str, 1); + SetsMetaValue sets_meta_value(Slice(str, sizeof(int32_t))); + version = sets_meta_value.UpdateVersion(); + batch.Put(handles_[0], destination, sets_meta_value.Encode()); + SetsMemberKey sets_member_key(destination, version, member); + batch.Put(handles_[1], sets_member_key.Encode(), Slice()); + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(source.ToString(), 1); + return s; +} + +rocksdb::Status RedisSets::SPop(const Slice& key, std::vector* members, bool* need_compact, int64_t cnt) { + std::default_random_engine engine; + + std::string meta_value; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + uint64_t start_us = pstd::NowMicros(); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_sets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t length = parsed_sets_meta_value.count(); + if (length < cnt) { + int32_t size = parsed_sets_meta_value.count(); + int32_t cur_index = 0; + int32_t version = parsed_sets_meta_value.version(); + SetsMemberKey sets_member_key(key, version, Slice()); + auto iter = db_->NewIterator(default_read_options_, handles_[1]); + for (iter->Seek(sets_member_key.Encode()); + iter->Valid() && cur_index < size; + iter->Next(), cur_index++) { + + batch.Delete(handles_[1], iter->key()); + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + members->push_back(parsed_sets_member_key.member().ToString()); + + } + + //parsed_sets_meta_value.ModifyCount(-cnt); + //batch.Put(handles_[0], key, meta_value); + batch.Delete(handles_[0], key); + delete iter; + + } else { + engine.seed(time(nullptr)); + int32_t cur_index = 0; + int32_t size = parsed_sets_meta_value.count(); + int32_t target_index = -1; + int32_t version = parsed_sets_meta_value.version(); + std::unordered_set sets_index; + int32_t modnum = size; + + for (int64_t cur_round = 0; + cur_round < cnt; + cur_round++) { + do { + target_index = static_cast( engine() % modnum); + } while (sets_index.find(target_index) != sets_index.end()); + sets_index.insert(target_index); + } + + SetsMemberKey sets_member_key(key, version, Slice()); + int64_t del_count = 0; + auto iter = db_->NewIterator(default_read_options_, handles_[1]); + for (iter->Seek(sets_member_key.Encode()); + iter->Valid() && cur_index < size; + iter->Next(), cur_index++) { + if (del_count == cnt) { + break; + } + if (sets_index.find(cur_index) != sets_index.end()) { + del_count++; + batch.Delete(handles_[1], iter->key()); + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + members->push_back(parsed_sets_member_key.member().ToString()); + } + } + + if (!parsed_sets_meta_value.CheckModifyCount(static_cast(-cnt))){ + return Status::InvalidArgument("set size overflow"); + } + parsed_sets_meta_value.ModifyCount(static_cast(-cnt)); + batch.Put(handles_[0], key, meta_value); + delete iter; + + } + + } + } else { + return s; + } + uint64_t count = 0; + uint64_t duration = pstd::NowMicros() - start_us; + AddAndGetSpopCount(key.ToString(), &count); + if (duration >= SPOP_COMPACT_THRESHOLD_DURATION + || count >= SPOP_COMPACT_THRESHOLD_COUNT) { + *need_compact = true; + ResetSpopCount(key.ToString()); + } + return db_->Write(default_write_options_, &batch); +} + +rocksdb::Status RedisSets::ResetSpopCount(const std::string& key) { return spop_counts_store_->Remove(key); } + +rocksdb::Status RedisSets::AddAndGetSpopCount(const std::string& key, uint64_t* count) { + size_t old_count = 0; + spop_counts_store_->Lookup(key, &old_count); + spop_counts_store_->Insert(key, old_count + 1); + *count = old_count + 1; + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::SRandmember(const Slice& key, int32_t count, std::vector* members) { + if (count == 0) { + return rocksdb::Status::OK(); + } + + members->clear(); + auto last_seed = pstd::NowMicros(); + std::default_random_engine engine; + + std::string meta_value; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + std::vector targets; + std::unordered_set unique; + + rocksdb::Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return rocksdb::Status::NotFound("Stale"); + } else if (parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::NotFound(); + } else { + int32_t size = parsed_sets_meta_value.count(); + int32_t version = parsed_sets_meta_value.version(); + if (count > 0) { + count = count <= size ? count : size; + while (targets.size() < static_cast(count)) { + engine.seed(last_seed); + last_seed = static_cast(engine()); + auto pos = static_cast(last_seed % size); + if (unique.find(pos) == unique.end()) { + unique.insert(pos); + targets.push_back(pos); + } + } + } else { + count = -count; + while (targets.size() < static_cast(count)) { + engine.seed(last_seed); + last_seed = static_cast(engine()); + targets.push_back(static_cast(last_seed % size)); + } + } + std::sort(targets.begin(), targets.end()); + + int32_t cur_index = 0; + int32_t idx = 0; + SetsMemberKey sets_member_key(key, version, Slice()); + auto iter = db_->NewIterator(default_read_options_, handles_[1]); + for (iter->Seek(sets_member_key.Encode()); iter->Valid() && cur_index < size; iter->Next(), cur_index++) { + if (static_cast(idx) >= targets.size()) { + break; + } + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + while (static_cast(idx) < targets.size() && cur_index == targets[idx]) { + idx++; + members->push_back(parsed_sets_member_key.member().ToString()); + } + } + + std::shuffle(members->begin(), members->end(), engine); + delete iter; + } + } + return s; +} + +rocksdb::Status RedisSets::SRem(const Slice& key, const std::vector& members, int32_t* ret) { + *ret = 0; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + + int32_t version = 0; + uint32_t statistic = 0; + std::string meta_value; + rocksdb::Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return rocksdb::Status::NotFound("stale"); + } else if (parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::NotFound(); + } else { + int32_t cnt = 0; + std::string member_value; + version = parsed_sets_meta_value.version(); + for (const auto& member : members) { + SetsMemberKey sets_member_key(key, version, member); + s = db_->Get(default_read_options_, handles_[1], sets_member_key.Encode(), &member_value); + if (s.ok()) { + cnt++; + statistic++; + batch.Delete(handles_[1], sets_member_key.Encode()); + } else if (s.IsNotFound()) { + } else { + return s; + } + } + *ret = cnt; + if (!parsed_sets_meta_value.CheckModifyCount(-cnt)){ + return Status::InvalidArgument("set size overflow"); + } + parsed_sets_meta_value.ModifyCount(-cnt); + batch.Put(handles_[0], key, meta_value); + } + } else if (s.IsNotFound()) { + *ret = 0; + return rocksdb::Status::NotFound(); + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +rocksdb::Status RedisSets::SUnion(const std::vector& keys, std::vector* members) { + if (keys.empty()) { + return rocksdb::Status::Corruption("SUnion invalid parameter, no keys"); + } + + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + std::vector vaild_sets; + rocksdb::Status s; + + for (const auto & key : keys) { + s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (!parsed_sets_meta_value.IsStale() && parsed_sets_meta_value.count() != 0) { + vaild_sets.push_back({key, parsed_sets_meta_value.version()}); + } + } else if (!s.IsNotFound()) { + return s; + } + } + + Slice prefix; + std::map result_flag; + for (const auto& key_version : vaild_sets) { + SetsMemberKey sets_member_key(key_version.key, key_version.version, Slice()); + prefix = sets_member_key.Encode(); + auto iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + std::string member = parsed_sets_member_key.member().ToString(); + if (result_flag.find(member) == result_flag.end()) { + members->push_back(member); + result_flag[member] = true; + } + } + delete iter; + } + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::SUnionstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret) { + if (keys.empty()) { + return rocksdb::Status::Corruption("SUnionstore invalid parameter, no keys"); + } + + rocksdb::WriteBatch batch; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + int32_t version = 0; + ScopeRecordLock l(lock_mgr_, destination); + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + std::vector vaild_sets; + rocksdb::Status s; + + for (const auto & key : keys) { + s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (!parsed_sets_meta_value.IsStale() && parsed_sets_meta_value.count() != 0) { + vaild_sets.push_back({key, parsed_sets_meta_value.version()}); + } + } else if (!s.IsNotFound()) { + return s; + } + } + + Slice prefix; + std::vector members; + std::map result_flag; + for (const auto& key_version : vaild_sets) { + SetsMemberKey sets_member_key(key_version.key, key_version.version, Slice()); + prefix = sets_member_key.Encode(); + auto iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + std::string member = parsed_sets_member_key.member().ToString(); + if (result_flag.find(member) == result_flag.end()) { + members.push_back(member); + result_flag[member] = true; + } + } + delete iter; + } + + uint32_t statistic = 0; + s = db_->Get(read_options, handles_[0], destination, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + statistic = parsed_sets_meta_value.count(); + version = parsed_sets_meta_value.InitialMetaValue(); + if (!parsed_sets_meta_value.check_set_count(static_cast(members.size()))) { + return Status::InvalidArgument("set size overflow"); + } + parsed_sets_meta_value.set_count(static_cast(members.size())); + batch.Put(handles_[0], destination, meta_value); + } else if (s.IsNotFound()) { + char str[4]; + EncodeFixed32(str, members.size()); + SetsMetaValue sets_meta_value(Slice(str, sizeof(int32_t))); + version = sets_meta_value.UpdateVersion(); + batch.Put(handles_[0], destination, sets_meta_value.Encode()); + } else { + return s; + } + for (const auto& member : members) { + SetsMemberKey sets_member_key(destination, version, member); + batch.Put(handles_[1], sets_member_key.Encode(), Slice()); + } + *ret = static_cast(members.size()); + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(destination.ToString(), statistic); + value_to_dest = std::move(members); + return s; +} + +rocksdb::Status RedisSets::SScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* members, int64_t* next_cursor) { + *next_cursor = 0; + members->clear(); + if (cursor < 0) { + *next_cursor = 0; + return rocksdb::Status::OK(); + } + + int64_t rest = count; + int64_t step_length = count; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + rocksdb::Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + *next_cursor = 0; + return rocksdb::Status::NotFound(); + } else { + std::string sub_member; + std::string start_point; + int32_t version = parsed_sets_meta_value.version(); + s = GetScanStartPoint(key, pattern, cursor, &start_point); + if (s.IsNotFound()) { + cursor = 0; + if (isTailWildcard(pattern)) { + start_point = pattern.substr(0, pattern.size() - 1); + } + } + if (isTailWildcard(pattern)) { + sub_member = pattern.substr(0, pattern.size() - 1); + } + + SetsMemberKey sets_member_prefix(key, version, sub_member); + SetsMemberKey sets_member_key(key, version, start_point); + std::string prefix = sets_member_prefix.Encode().ToString(); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(sets_member_key.Encode()); iter->Valid() && rest > 0 && iter->key().starts_with(prefix); + iter->Next()) { + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + std::string member = parsed_sets_member_key.member().ToString(); + if (StringMatch(pattern.data(), pattern.size(), member.data(), member.size(), 0) != 0) { + members->push_back(member); + } + rest--; + } + + if (iter->Valid() && (iter->key().compare(prefix) <= 0 || iter->key().starts_with(prefix))) { + *next_cursor = cursor + step_length; + ParsedSetsMemberKey parsed_sets_member_key(iter->key()); + std::string next_member = parsed_sets_member_key.member().ToString(); + StoreScanNextPoint(key, pattern, *next_cursor, next_member); + } else { + *next_cursor = 0; + } + delete iter; + } + } else { + *next_cursor = 0; + return s; + } + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::PKScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key) { + next_key->clear(); + + std::string key; + int32_t remain = limit; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + bool start_no_limit = key_start.compare("") == 0; + bool end_no_limit = key_end.compare("") == 0; + + if (!start_no_limit && !end_no_limit && (key_start.compare(key_end) > 0)) { + return rocksdb::Status::InvalidArgument("error in given range"); + } + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + if (start_no_limit) { + it->SeekToFirst(); + } else { + it->Seek(key_start); + } + + while (it->Valid() && remain > 0 && (end_no_limit || it->key().compare(key_end) <= 0)) { + ParsedSetsMetaValue parsed_meta_value(it->value()); + if (parsed_meta_value.IsStale() || parsed_meta_value.count() == 0) { + it->Next(); + } else { + key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + remain--; + it->Next(); + } + } + + while (it->Valid() && (end_no_limit || it->key().compare(key_end) <= 0)) { + ParsedSetsMetaValue parsed_sets_meta_value(it->value()); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + it->Next(); + } else { + *next_key = it->key().ToString(); + break; + } + } + delete it; + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::PKRScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key) { + next_key->clear(); + + std::string key; + int32_t remain = limit; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + bool start_no_limit = key_start.compare("") == 0; + bool end_no_limit = key_end.compare("") == 0; + + if (!start_no_limit && !end_no_limit && (key_start.compare(key_end) < 0)) { + return rocksdb::Status::InvalidArgument("error in given range"); + } + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + if (start_no_limit) { + it->SeekToLast(); + } else { + it->SeekForPrev(key_start); + } + + while (it->Valid() && remain > 0 && (end_no_limit || it->key().compare(key_end) >= 0)) { + ParsedSetsMetaValue parsed_sets_meta_value(it->value()); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + it->Prev(); + } else { + key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + remain--; + it->Prev(); + } + } + + while (it->Valid() && (end_no_limit || it->key().compare(key_end) >= 0)) { + ParsedSetsMetaValue parsed_sets_meta_value(it->value()); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + it->Prev(); + } else { + *next_key = it->key().ToString(); + break; + } + } + delete it; + return rocksdb::Status::OK(); +} + +rocksdb::Status RedisSets::Expire(const Slice& key, int32_t ttl) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + rocksdb::Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return rocksdb::Status::NotFound("Stale"); + } else if (parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::NotFound(); + } + + if (ttl > 0) { + parsed_sets_meta_value.SetRelativeTimestamp(ttl); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + } else { + parsed_sets_meta_value.InitialMetaValue(); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + return s; +} + +rocksdb::Status RedisSets::Del(const Slice& key) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + rocksdb::Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return rocksdb::Status::NotFound("Stale"); + } else if (parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::NotFound(); + } else { + uint32_t statistic = parsed_sets_meta_value.count(); + parsed_sets_meta_value.InitialMetaValue(); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + } + } + return s; +} + +bool RedisSets::Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, + int64_t* count, std::string* next_key) { + std::string meta_key; + bool is_finish = true; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + + it->Seek(start_key); + while (it->Valid() && (*count) > 0) { + ParsedSetsMetaValue parsed_meta_value(it->value()); + if (parsed_meta_value.IsStale() || parsed_meta_value.count() == 0) { + it->Next(); + continue; + } else { + meta_key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), meta_key.data(), meta_key.size(), 0) != 0) { + keys->push_back(meta_key); + } + (*count)--; + it->Next(); + } + } + + std::string prefix = isTailWildcard(pattern) ? pattern.substr(0, pattern.size() - 1) : ""; + if (it->Valid() && (it->key().compare(prefix) <= 0 || it->key().starts_with(prefix))) { + *next_key = it->key().ToString(); + is_finish = false; + } else { + *next_key = ""; + } + delete it; + return is_finish; +} + +bool RedisSets::PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) { + bool is_finish = true; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + it->Seek(start_key); + while (it->Valid() && (*leftover_visits) > 0) { + ParsedSetsMetaValue parsed_sets_meta_value(it->value()); + if (parsed_sets_meta_value.IsStale() || parsed_sets_meta_value.count() == 0) { + it->Next(); + continue; + } else { + if (min_timestamp < parsed_sets_meta_value.timestamp() && parsed_sets_meta_value.timestamp() < max_timestamp) { + keys->push_back(it->key().ToString()); + } + (*leftover_visits)--; + it->Next(); + } + } + + if (it->Valid()) { + is_finish = false; + *next_key = it->key().ToString(); + } else { + *next_key = ""; + } + delete it; + return is_finish; +} + +rocksdb::Status RedisSets::Expireat(const Slice& key, int32_t timestamp) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + rocksdb::Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return rocksdb::Status::NotFound("Stale"); + } else if (parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::NotFound(); + } else { + if (timestamp > 0) { + parsed_sets_meta_value.set_timestamp(timestamp); + } else { + parsed_sets_meta_value.InitialMetaValue(); + } + return db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + return s; +} + +rocksdb::Status RedisSets::Persist(const Slice& key) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + rocksdb::Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_sets_meta_value(&meta_value); + if (parsed_sets_meta_value.IsStale()) { + return rocksdb::Status::NotFound("Stale"); + } else if (parsed_sets_meta_value.count() == 0) { + return rocksdb::Status::NotFound(); + } else { + int32_t timestamp = parsed_sets_meta_value.timestamp(); + if (timestamp == 0) { + return rocksdb::Status::NotFound("Not have an associated timeout"); + } else { + parsed_sets_meta_value.set_timestamp(0); + return db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + } + return s; +} + +rocksdb::Status RedisSets::TTL(const Slice& key, int64_t* timestamp) { + std::string meta_value; + rocksdb::Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedSetsMetaValue parsed_setes_meta_value(&meta_value); + if (parsed_setes_meta_value.IsStale()) { + *timestamp = -2; + return rocksdb::Status::NotFound("Stale"); + } else if (parsed_setes_meta_value.count() == 0) { + *timestamp = -2; + return rocksdb::Status::NotFound(); + } else { + *timestamp = parsed_setes_meta_value.timestamp(); + if (*timestamp == 0) { + *timestamp = -1; + } else { + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + *timestamp = *timestamp - curtime >= 0 ? *timestamp - curtime : -2; + } + } + } else if (s.IsNotFound()) { + *timestamp = -2; + } + return s; +} + +void RedisSets::ScanDatabase() { + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + auto current_time = static_cast(time(nullptr)); + + LOG(INFO) << "***************Sets Meta Data***************"; + auto meta_iter = db_->NewIterator(iterator_options, handles_[0]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + ParsedSetsMetaValue parsed_sets_meta_value(meta_iter->value()); + int32_t survival_time = 0; + if (parsed_sets_meta_value.timestamp() != 0) { + survival_time = parsed_sets_meta_value.timestamp() - current_time > 0 + ? parsed_sets_meta_value.timestamp() - current_time + : -1; + } + + LOG(INFO) << fmt::format("[key : {:<30}] [count : {:<10}] [timestamp : {:<10}] [version : {}] [survival_time : {}]", + meta_iter->key().ToString(), parsed_sets_meta_value.count(), parsed_sets_meta_value.timestamp(), + parsed_sets_meta_value.version(), survival_time); + } + delete meta_iter; + + LOG(INFO) << "***************Sets Member Data***************"; + auto member_iter = db_->NewIterator(iterator_options, handles_[1]); + for (member_iter->SeekToFirst(); member_iter->Valid(); member_iter->Next()) { + ParsedSetsMemberKey parsed_sets_member_key(member_iter->key()); + + LOG(INFO) << fmt::format("[key : {:<30}] [member : {:<20}] [version : {}]", parsed_sets_member_key.key().ToString(), + parsed_sets_member_key.member().ToString(), parsed_sets_member_key.version()); + } + delete member_iter; +} + +} // namespace storage diff --git a/src/storage/src/redis_sets.h b/src/storage/src/redis_sets.h new file mode 100644 index 000000000..4245abbac --- /dev/null +++ b/src/storage/src/redis_sets.h @@ -0,0 +1,80 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "pstd/env.h" + +#include "src/custom_comparator.h" +#include "src/lru_cache.h" +#include "src/redis.h" + +#define SPOP_COMPACT_THRESHOLD_COUNT 500 +#define SPOP_COMPACT_THRESHOLD_DURATION (1000 * 1000) // 1000ms + +namespace storage { + +class RedisSets : public Redis { + public: + RedisSets(Storage* s, const DataType& type); + ~RedisSets() override; + + // Common Commands + Status Open(const StorageOptions& storage_options, const std::string& db_path) override; + Status CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, + const ColumnFamilyType& type = kMetaAndData) override; + Status GetProperty(const std::string& property, uint64_t* out) override; + Status ScanKeyNum(KeyInfo* key_info) override; + Status ScanKeys(const std::string& pattern, std::vector* keys) override; + Status PKPatternMatchDel(const std::string& pattern, int32_t* ret) override; + + // Setes Commands + Status SAdd(const Slice& key, const std::vector& members, int32_t* ret); + Status SCard(const Slice& key, int32_t* ret); + Status SDiff(const std::vector& keys, std::vector* members); + Status SDiffstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret); + Status SInter(const std::vector& keys, std::vector* members); + Status SInterstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret); + Status SIsmember(const Slice& key, const Slice& member, int32_t* ret); + Status SMembers(const Slice& key, std::vector* members); + Status SMove(const Slice& source, const Slice& destination, const Slice& member, int32_t* ret); + Status SPop(const Slice& key, std::vector* members, bool* need_compact, int64_t cnt); + Status SRandmember(const Slice& key, int32_t count, std::vector* members); + Status SRem(const Slice& key, const std::vector& members, int32_t* ret); + Status SUnion(const std::vector& keys, std::vector* members); + Status SUnionstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret); + Status SScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* members, int64_t* next_cursor); + Status PKScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key); + Status PKRScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key); + + // Keys Commands + Status Expire(const Slice& key, int32_t ttl) override; + Status Del(const Slice& key) override; + bool Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, int64_t* count, + std::string* next_key) override; + bool PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) override; + Status Expireat(const Slice& key, int32_t timestamp) override; + Status Persist(const Slice& key) override; + Status TTL(const Slice& key, int64_t* timestamp) override; + + // Iterate all data + void ScanDatabase(); + + private: + // For compact in time after multiple spop + std::unique_ptr> spop_counts_store_; + Status ResetSpopCount(const std::string& key); + Status AddAndGetSpopCount(const std::string& key, uint64_t* count); +}; + +} // namespace storage diff --git a/src/storage/src/redis_strings.cc b/src/storage/src/redis_strings.cc new file mode 100644 index 000000000..2fa750158 --- /dev/null +++ b/src/storage/src/redis_strings.cc @@ -0,0 +1,1361 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "src/redis_strings.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "src/scope_record_lock.h" +#include "src/scope_snapshot.h" +#include "src/strings_filter.h" +#include "storage/util.h" + +namespace storage { + +RedisStrings::RedisStrings(Storage* const s, const DataType& type) : Redis(s, type) {} + +Status RedisStrings::Open(const StorageOptions& storage_options, const std::string& db_path) { + rocksdb::Options ops(storage_options.options); + ops.compaction_filter_factory = std::make_shared(); + + // use the bloom filter policy to reduce disk reads + rocksdb::BlockBasedTableOptions table_ops(storage_options.table_options); + if (!storage_options.share_block_cache && storage_options.block_cache_size > 0) { + table_ops.block_cache = rocksdb::NewLRUCache(storage_options.block_cache_size); + } + table_ops.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); + ops.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_ops)); + + return rocksdb::DB::Open(ops, db_path, &db_); +} + +Status RedisStrings::CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, + const ColumnFamilyType& type) { + return db_->CompactRange(default_compact_range_options_, begin, end); +} + +Status RedisStrings::GetProperty(const std::string& property, uint64_t* out) { + std::string value; + db_->GetProperty(property, &value); + *out = std::strtoull(value.c_str(), nullptr, 10); + return Status::OK(); +} + +Status RedisStrings::ScanKeyNum(KeyInfo* key_info) { + uint64_t keys = 0; + uint64_t expires = 0; + uint64_t ttl_sum = 0; + uint64_t invaild_keys = 0; + + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + + // Note: This is a string type and does not need to pass the column family as + // a parameter, use the default column family + rocksdb::Iterator* iter = db_->NewIterator(iterator_options); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedStringsValue parsed_strings_value(iter->value()); + if (parsed_strings_value.IsStale()) { + invaild_keys++; + } else { + keys++; + if (!parsed_strings_value.IsPermanentSurvival()) { + expires++; + ttl_sum += parsed_strings_value.timestamp() - curtime; + } + } + } + delete iter; + + key_info->keys = keys; + key_info->expires = expires; + key_info->avg_ttl = (expires != 0) ? ttl_sum / expires : 0; + key_info->invaild_keys = invaild_keys; + return Status::OK(); +} + +Status RedisStrings::ScanKeys(const std::string& pattern, std::vector* keys) { + std::string key; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + // Note: This is a string type and does not need to pass the column family as + // a parameter, use the default column family + rocksdb::Iterator* iter = db_->NewIterator(iterator_options); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedStringsValue parsed_strings_value(iter->value()); + if (!parsed_strings_value.IsStale()) { + key = iter->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + } + } + delete iter; + return Status::OK(); +} + +Status RedisStrings::PKPatternMatchDel(const std::string& pattern, int32_t* ret) { + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + std::string key; + std::string value; + int32_t total_delete = 0; + Status s; + rocksdb::WriteBatch batch; + rocksdb::Iterator* iter = db_->NewIterator(iterator_options); + iter->SeekToFirst(); + while (iter->Valid()) { + key = iter->key().ToString(); + value = iter->value().ToString(); + ParsedStringsValue parsed_strings_value(&value); + if (!parsed_strings_value.IsStale() && (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0)) { + batch.Delete(key); + } + // In order to be more efficient, we use batch deletion here + if (static_cast(batch.Count()) >= BATCH_DELETE_LIMIT) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + total_delete += static_cast(batch.Count()); + batch.Clear(); + } else { + *ret = total_delete; + return s; + } + } + iter->Next(); + } + if (batch.Count() != 0U) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + total_delete += static_cast( batch.Count()); + batch.Clear(); + } + } + + *ret = total_delete; + return s; +} + +Status RedisStrings::Append(const Slice& key, const Slice& value, int32_t* ret) { + std::string old_value; + *ret = 0; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &old_value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&old_value); + if (parsed_strings_value.IsStale()) { + *ret = static_cast(value.size()); + StringsValue strings_value(value); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } else { + int32_t timestamp = parsed_strings_value.timestamp(); + std::string old_user_value = parsed_strings_value.value().ToString(); + std::string new_value = old_user_value + value.ToString(); + StringsValue strings_value(new_value); + strings_value.set_timestamp(timestamp); + *ret = static_cast(new_value.size()); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } + } else if (s.IsNotFound()) { + *ret = static_cast(value.size()); + StringsValue strings_value(value); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } + return s; +} + +int GetBitCount(const unsigned char* value, int64_t bytes) { + int bit_num = 0; + static const unsigned char bitsinbyte[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, + 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, + 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, + 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, + 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, + 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, + 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; + for (int i = 0; i < bytes; i++) { + bit_num += bitsinbyte[static_cast(value[i])]; + } + return bit_num; +} + +Status RedisStrings::BitCount(const Slice& key, int64_t start_offset, int64_t end_offset, int32_t* ret, + bool have_range) { + *ret = 0; + std::string value; + Status s = db_->Get(default_read_options_, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + return Status::NotFound("Stale"); + } else { + parsed_strings_value.StripSuffix(); + const auto bit_value = reinterpret_cast(value.data()); + auto value_length = static_cast(value.length()); + if (have_range) { + if (start_offset < 0) { + start_offset = start_offset + value_length; + } + if (end_offset < 0) { + end_offset = end_offset + value_length; + } + if (start_offset < 0) { + start_offset = 0; + } + if (end_offset < 0) { + end_offset = 0; + } + + if (end_offset >= value_length) { + end_offset = value_length - 1; + } + if (start_offset > end_offset) { + return Status::OK(); + } + } else { + start_offset = 0; + end_offset = std::max(value_length - 1, static_cast(0)); + } + *ret = GetBitCount(bit_value + start_offset, end_offset - start_offset + 1); + } + } else { + return s; + } + return Status::OK(); +} + +std::string BitOpOperate(BitOpType op, const std::vector& src_values, int64_t max_len) { + char byte; + char output; + auto dest_value = std::make_unique(max_len); + for (int64_t j = 0; j < max_len; j++) { + if (j < static_cast(src_values[0].size())) { + output = src_values[0][j]; + } else { + output = 0; + } + if (op == kBitOpNot) { + output = static_cast(~output); + } + for (size_t i = 1; i < src_values.size(); i++) { + if (static_cast(src_values[i].size()) - 1 >= j) { + byte = src_values[i][j]; + } else { + byte = 0; + } + switch (op) { + case kBitOpNot: + break; + case kBitOpAnd: + output = static_cast(output & byte); + break; + case kBitOpOr: + output = static_cast(output | byte); + break; + case kBitOpXor: + output = static_cast(output ^ byte); + break; + case kBitOpDefault: + break; + } + } + dest_value[j] = output; + } + std::string dest_str(dest_value.get(), max_len); + return dest_str; +} + +Status RedisStrings::BitOp(BitOpType op, const std::string& dest_key, const std::vector& src_keys, + std::string &value_to_dest, int64_t* ret) { + Status s; + if (op == kBitOpNot && src_keys.size() != 1) { + return Status::InvalidArgument("the number of source keys is not right"); + } else if (src_keys.empty()) { + return Status::InvalidArgument("the number of source keys is not right"); + } + + int64_t max_len = 0; + int64_t value_len = 0; + std::vector src_values; + for (const auto & src_key : src_keys) { + std::string value; + s = db_->Get(default_read_options_, src_key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + src_values.emplace_back(""); + value_len = 0; + } else { + parsed_strings_value.StripSuffix(); + src_values.push_back(value); + value_len = static_cast(value.size()); + } + } else if (s.IsNotFound()) { + src_values.emplace_back(""); + value_len = 0; + } else { + return s; + } + max_len = std::max(max_len, value_len); + } + + std::string dest_value = BitOpOperate(op, src_values, max_len); + value_to_dest = dest_value; + *ret = static_cast(dest_value.size()); + + StringsValue strings_value(Slice(dest_value.c_str(), max_len)); + ScopeRecordLock l(lock_mgr_, dest_key); + return db_->Put(default_write_options_, dest_key, strings_value.Encode()); +} + +Status RedisStrings::Decrby(const Slice& key, int64_t value, int64_t* ret) { + std::string old_value; + std::string new_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &old_value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&old_value); + if (parsed_strings_value.IsStale()) { + *ret = -value; + new_value = std::to_string(*ret); + StringsValue strings_value(new_value); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } else { + int32_t timestamp = parsed_strings_value.timestamp(); + std::string old_user_value = parsed_strings_value.value().ToString(); + char* end = nullptr; + errno = 0; + int64_t ival = strtoll(old_user_value.c_str(), &end, 10); + if (errno == ERANGE || *end != 0) { + return Status::Corruption("Value is not a integer"); + } + if ((value >= 0 && LLONG_MIN + value > ival) || (value < 0 && LLONG_MAX + value < ival)) { + return Status::InvalidArgument("Overflow"); + } + *ret = ival - value; + new_value = std::to_string(*ret); + StringsValue strings_value(new_value); + strings_value.set_timestamp(timestamp); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } + } else if (s.IsNotFound()) { + *ret = -value; + new_value = std::to_string(*ret); + StringsValue strings_value(new_value); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } else { + return s; + } +} + +Status RedisStrings::Get(const Slice& key, std::string* value) { + value->clear(); + Status s = db_->Get(default_read_options_, key, value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(value); + if (parsed_strings_value.IsStale()) { + value->clear(); + return Status::NotFound("Stale"); + } else { + parsed_strings_value.StripSuffix(); + } + } + return s; +} + +Status RedisStrings::GetBit(const Slice& key, int64_t offset, int32_t* ret) { + std::string meta_value; + Status s = db_->Get(default_read_options_, key, &meta_value); + if (s.ok() || s.IsNotFound()) { + std::string data_value; + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&meta_value); + if (parsed_strings_value.IsStale()) { + *ret = 0; + return Status::OK(); + } else { + data_value = parsed_strings_value.value().ToString(); + } + } + size_t byte = offset >> 3; + size_t bit = 7 - (offset & 0x7); + if (byte + 1 > data_value.length()) { + *ret = 0; + } else { + *ret = ((data_value[byte] & (1 << bit)) >> bit); + } + } else { + return s; + } + return Status::OK(); +} + +Status RedisStrings::Getrange(const Slice& key, int64_t start_offset, int64_t end_offset, std::string* ret) { + *ret = ""; + std::string value; + Status s = db_->Get(default_read_options_, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + return Status::NotFound("Stale"); + } else { + parsed_strings_value.StripSuffix(); + auto size = static_cast(value.size()); + int64_t start_t = start_offset >= 0 ? start_offset : size + start_offset; + int64_t end_t = end_offset >= 0 ? end_offset : size + end_offset; + if (start_t > size - 1 || (start_t != 0 && start_t > end_t) || (start_t != 0 && end_t < 0)) { + return Status::OK(); + } + if (start_t < 0) { + start_t = 0; + } + if (end_t >= size) { + end_t = size - 1; + } + if (start_t == 0 && end_t < 0) { + end_t = 0; + } + *ret = value.substr(start_t, end_t - start_t + 1); + return Status::OK(); + } + } else { + return s; + } +} + +Status RedisStrings::GetSet(const Slice& key, const Slice& value, std::string* old_value) { + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, old_value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(old_value); + if (parsed_strings_value.IsStale()) { + *old_value = ""; + } else { + parsed_strings_value.StripSuffix(); + } + } else if (!s.IsNotFound()) { + return s; + } + StringsValue strings_value(value); + return db_->Put(default_write_options_, key, strings_value.Encode()); +} + +Status RedisStrings::Incrby(const Slice& key, int64_t value, int64_t* ret) { + std::string old_value; + std::string new_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &old_value); + char buf[32] = {0}; + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&old_value); + if (parsed_strings_value.IsStale()) { + *ret = value; + Int64ToStr(buf, 32, value); + StringsValue strings_value(buf); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } else { + int32_t timestamp = parsed_strings_value.timestamp(); + std::string old_user_value = parsed_strings_value.value().ToString(); + char* end = nullptr; + int64_t ival = strtoll(old_user_value.c_str(), &end, 10); + if (*end != 0) { + return Status::Corruption("Value is not a integer"); + } + if ((value >= 0 && LLONG_MAX - value < ival) || (value < 0 && LLONG_MIN - value > ival)) { + return Status::InvalidArgument("Overflow"); + } + *ret = ival + value; + new_value = std::to_string(*ret); + StringsValue strings_value(new_value); + strings_value.set_timestamp(timestamp); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } + } else if (s.IsNotFound()) { + *ret = value; + Int64ToStr(buf, 32, value); + StringsValue strings_value(buf); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } else { + return s; + } +} + +Status RedisStrings::Incrbyfloat(const Slice& key, const Slice& value, std::string* ret) { + std::string old_value; + std::string new_value; + long double long_double_by; + if (StrToLongDouble(value.data(), value.size(), &long_double_by) == -1) { + return Status::Corruption("Value is not a vaild float"); + } + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &old_value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&old_value); + if (parsed_strings_value.IsStale()) { + LongDoubleToStr(long_double_by, &new_value); + *ret = new_value; + StringsValue strings_value(new_value); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } else { + int32_t timestamp = parsed_strings_value.timestamp(); + std::string old_user_value = parsed_strings_value.value().ToString(); + long double total; + long double old_number; + if (StrToLongDouble(old_user_value.data(), old_user_value.size(), &old_number) == -1) { + return Status::Corruption("Value is not a vaild float"); + } + total = old_number + long_double_by; + if (LongDoubleToStr(total, &new_value) == -1) { + return Status::InvalidArgument("Overflow"); + } + *ret = new_value; + StringsValue strings_value(new_value); + strings_value.set_timestamp(timestamp); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } + } else if (s.IsNotFound()) { + LongDoubleToStr(long_double_by, &new_value); + *ret = new_value; + StringsValue strings_value(new_value); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } else { + return s; + } +} + +Status RedisStrings::MGet(const std::vector& keys, std::vector* vss) { + vss->clear(); + + Status s; + std::string value; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + for (const auto& key : keys) { + s = db_->Get(read_options, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + vss->push_back({std::string(), Status::NotFound("Stale")}); + } else { + vss->push_back({parsed_strings_value.user_value().ToString(), Status::OK()}); + } + } else if (s.IsNotFound()) { + vss->push_back({std::string(), Status::NotFound()}); + } else { + vss->clear(); + return s; + } + } + return Status::OK(); +} + +Status RedisStrings::MSet(const std::vector& kvs) { + std::vector keys; + keys.reserve(kvs.size()); +for (const auto& kv : kvs) { + keys.push_back(kv.key); + } + + MultiScopeRecordLock ml(lock_mgr_, keys); + rocksdb::WriteBatch batch; + for (const auto& kv : kvs) { + StringsValue strings_value(kv.value); + batch.Put(kv.key, strings_value.Encode()); + } + return db_->Write(default_write_options_, &batch); +} + +Status RedisStrings::MSetnx(const std::vector& kvs, int32_t* ret) { + Status s; + bool exists = false; + *ret = 0; + std::string value; + for (const auto & kv : kvs) { + s = db_->Get(default_read_options_, kv.key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (!parsed_strings_value.IsStale()) { + exists = true; + break; + } + } + } + if (!exists) { + s = MSet(kvs); + if (s.ok()) { + *ret = 1; + } + } + return s; +} + +Status RedisStrings::Set(const Slice& key, const Slice& value) { + StringsValue strings_value(value); + ScopeRecordLock l(lock_mgr_, key); + return db_->Put(default_write_options_, key, strings_value.Encode()); +} + +Status RedisStrings::Setxx(const Slice& key, const Slice& value, int32_t* ret, const int32_t ttl) { + bool not_found = true; + std::string old_value; + StringsValue strings_value(value); + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &old_value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(old_value); + if (!parsed_strings_value.IsStale()) { + not_found = false; + } + } else if (!s.IsNotFound()) { + return s; + } + + if (not_found) { + *ret = 0; + return s; + } else { + *ret = 1; + if (ttl > 0) { + strings_value.SetRelativeTimestamp(ttl); + } + return db_->Put(default_write_options_, key, strings_value.Encode()); + } +} + +Status RedisStrings::SetBit(const Slice& key, int64_t offset, int32_t on, int32_t* ret) { + std::string meta_value; + if (offset < 0) { + return Status::InvalidArgument("offset < 0"); + } + + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &meta_value); + if (s.ok() || s.IsNotFound()) { + std::string data_value; + int32_t timestamp = 0; + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&meta_value); + if (!parsed_strings_value.IsStale()) { + data_value = parsed_strings_value.value().ToString(); + timestamp = parsed_strings_value.timestamp(); + } + } + size_t byte = offset >> 3; + size_t bit = 7 - (offset & 0x7); + char byte_val; + size_t value_lenth = data_value.length(); + if (byte + 1 > value_lenth) { + *ret = 0; + byte_val = 0; + } else { + *ret = ((data_value[byte] & (1 << bit)) >> bit); + byte_val = data_value[byte]; + } + if (*ret == on) { + return Status::OK(); + } + byte_val = static_cast(byte_val & (~(1 << bit))); + byte_val = static_cast(byte_val | ((on & 0x1) << bit)); + if (byte + 1 <= value_lenth) { + data_value.replace(byte, 1, &byte_val, 1); + } else { + data_value.append(byte + 1 - value_lenth - 1, 0); + data_value.append(1, byte_val); + } + StringsValue strings_value(data_value); + strings_value.set_timestamp(timestamp); + return db_->Put(rocksdb::WriteOptions(), key, strings_value.Encode()); + } else { + return s; + } +} + +Status RedisStrings::Setex(const Slice& key, const Slice& value, int32_t ttl) { + if (ttl <= 0) { + return Status::InvalidArgument("invalid expire time"); + } + StringsValue strings_value(value); + auto s = strings_value.SetRelativeTimestamp(ttl); + if (s != Status::OK()) { + return s; + } + ScopeRecordLock l(lock_mgr_, key); + return db_->Put(default_write_options_, key, strings_value.Encode()); +} + +Status RedisStrings::Setnx(const Slice& key, const Slice& value, int32_t* ret, const int32_t ttl) { + *ret = 0; + std::string old_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &old_value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&old_value); + if (parsed_strings_value.IsStale()) { + StringsValue strings_value(value); + if (ttl > 0) { + strings_value.SetRelativeTimestamp(ttl); + } + s = db_->Put(default_write_options_, key, strings_value.Encode()); + if (s.ok()) { + *ret = 1; + } + } + } else if (s.IsNotFound()) { + StringsValue strings_value(value); + if (ttl > 0) { + strings_value.SetRelativeTimestamp(ttl); + } + s = db_->Put(default_write_options_, key, strings_value.Encode()); + if (s.ok()) { + *ret = 1; + } + } + return s; +} + +Status RedisStrings::Setvx(const Slice& key, const Slice& value, const Slice& new_value, int32_t* ret, + const int32_t ttl) { + *ret = 0; + std::string old_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &old_value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&old_value); + if (parsed_strings_value.IsStale()) { + *ret = 0; + } else { + if (value.compare(parsed_strings_value.value()) == 0) { + StringsValue strings_value(new_value); + if (ttl > 0) { + strings_value.SetRelativeTimestamp(ttl); + } + s = db_->Put(default_write_options_, key, strings_value.Encode()); + if (!s.ok()) { + return s; + } + *ret = 1; + } else { + *ret = -1; + } + } + } else if (s.IsNotFound()) { + *ret = 0; + } else { + return s; + } + return Status::OK(); +} + +Status RedisStrings::Delvx(const Slice& key, const Slice& value, int32_t* ret) { + *ret = 0; + std::string old_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &old_value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&old_value); + if (parsed_strings_value.IsStale()) { + *ret = 0; + return Status::NotFound("Stale"); + } else { + if (value.compare(parsed_strings_value.value()) == 0) { + *ret = 1; + return db_->Delete(default_write_options_, key); + } else { + *ret = -1; + } + } + } else if (s.IsNotFound()) { + *ret = 0; + } + return s; +} + +Status RedisStrings::Setrange(const Slice& key, int64_t start_offset, const Slice& value, int32_t* ret) { + std::string old_value; + std::string new_value; + if (start_offset < 0) { + return Status::InvalidArgument("offset < 0"); + } + + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &old_value); + if (s.ok()) { + int32_t timestamp = 0; + ParsedStringsValue parsed_strings_value(&old_value); + parsed_strings_value.StripSuffix(); + if (parsed_strings_value.IsStale()) { + std::string tmp(start_offset, '\0'); + new_value = tmp.append(value.data()); + *ret = static_cast(new_value.length()); + } else { + timestamp = parsed_strings_value.timestamp(); + if (static_cast(start_offset) > old_value.length()) { + old_value.resize(start_offset); + new_value = old_value.append(value.data()); + } else { + std::string head = old_value.substr(0, start_offset); + std::string tail; + if (start_offset + value.size() - 1 < old_value.length() - 1) { + tail = old_value.substr(start_offset + value.size()); + } + new_value = head + value.data() + tail; + } + } + *ret = static_cast(new_value.length()); + StringsValue strings_value(new_value); + strings_value.set_timestamp(timestamp); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } else if (s.IsNotFound()) { + std::string tmp(start_offset, '\0'); + new_value = tmp.append(value.data()); + *ret = static_cast(new_value.length()); + StringsValue strings_value(new_value); + return db_->Put(default_write_options_, key, strings_value.Encode()); + } + return s; +} + +Status RedisStrings::Strlen(const Slice& key, int32_t* len) { + std::string value; + Status s = Get(key, &value); + if (s.ok()) { + *len = static_cast(value.size()); + } else { + *len = 0; + } + return s; +} + +int32_t GetBitPos(const unsigned char* s, unsigned int bytes, int bit) { + uint64_t word = 0; + uint64_t skip_val = 0; + auto value = const_cast(s); + auto l = reinterpret_cast(value); + int pos = 0; + if (bit == 0) { + skip_val = std::numeric_limits::max(); + } else { + skip_val = 0; + } + // skip 8 bytes at one time, find the first int64 that should not be skipped + while (bytes >= sizeof(*l)) { + if (*l != skip_val) { + break; + } + l++; + bytes = bytes - sizeof(*l); + pos += static_cast(8 * sizeof(*l)); + } + auto c = reinterpret_cast(l); + for (size_t j = 0; j < sizeof(*l); j++) { + word = word << 8; + if (bytes != 0U) { + word = word | *c; + c++; + bytes--; + } + } + if (bit == 1 && word == 0) { + return -1; + } + // set each bit of mask to 0 except msb + uint64_t mask = std::numeric_limits::max(); + mask = mask >> 1; + mask = ~(mask); + while (mask != 0U) { + if (static_cast((word & mask) != 0) == bit) { + return pos; + } + pos++; + mask = mask >> 1; + } + return pos; +} + +Status RedisStrings::BitPos(const Slice& key, int32_t bit, int64_t* ret) { + Status s; + std::string value; + s = db_->Get(default_read_options_, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + if (bit == 1) { + *ret = -1; + } else if (bit == 0) { + *ret = 0; + } + return Status::NotFound("Stale"); + } else { + parsed_strings_value.StripSuffix(); + const auto bit_value = reinterpret_cast(value.data()); + auto value_length = static_cast(value.length()); + int64_t start_offset = 0; + int64_t end_offset = std::max(value_length - 1, static_cast(0)); + int64_t bytes = end_offset - start_offset + 1; + int64_t pos = GetBitPos(bit_value + start_offset, bytes, bit); + if (pos == (8 * bytes) && bit == 0) { + pos = -1; + } + if (pos != -1) { + pos = pos + 8 * start_offset; + } + *ret = pos; + } + } else { + return s; + } + return Status::OK(); +} + +Status RedisStrings::BitPos(const Slice& key, int32_t bit, int64_t start_offset, int64_t* ret) { + Status s; + std::string value; + s = db_->Get(default_read_options_, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + if (bit == 1) { + *ret = -1; + } else if (bit == 0) { + *ret = 0; + } + return Status::NotFound("Stale"); + } else { + parsed_strings_value.StripSuffix(); + const auto bit_value = reinterpret_cast(value.data()); + auto value_length = static_cast(value.length()); + int64_t end_offset = std::max(value_length - 1, static_cast(0)); + if (start_offset < 0) { + start_offset = start_offset + value_length; + } + if (start_offset < 0) { + start_offset = 0; + } + if (start_offset > end_offset) { + *ret = -1; + return Status::OK(); + } + if (start_offset > value_length - 1) { + *ret = -1; + return Status::OK(); + } + int64_t bytes = end_offset - start_offset + 1; + int64_t pos = GetBitPos(bit_value + start_offset, bytes, bit); + if (pos == (8 * bytes) && bit == 0) { + pos = -1; + } + if (pos != -1) { + pos = pos + 8 * start_offset; + } + *ret = pos; + } + } else { + return s; + } + return Status::OK(); +} + +Status RedisStrings::BitPos(const Slice& key, int32_t bit, int64_t start_offset, int64_t end_offset, int64_t* ret) { + Status s; + std::string value; + s = db_->Get(default_read_options_, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + if (bit == 1) { + *ret = -1; + } else if (bit == 0) { + *ret = 0; + } + return Status::NotFound("Stale"); + } else { + parsed_strings_value.StripSuffix(); + const auto bit_value = reinterpret_cast(value.data()); + auto value_length = static_cast(value.length()); + if (start_offset < 0) { + start_offset = start_offset + value_length; + } + if (start_offset < 0) { + start_offset = 0; + } + if (end_offset < 0) { + end_offset = end_offset + value_length; + } + // converting to int64_t just avoid warning + if (end_offset > static_cast(value.length()) - 1) { + end_offset = value_length - 1; + } + if (end_offset < 0) { + end_offset = 0; + } + if (start_offset > end_offset) { + *ret = -1; + return Status::OK(); + } + if (start_offset > value_length - 1) { + *ret = -1; + return Status::OK(); + } + int64_t bytes = end_offset - start_offset + 1; + int64_t pos = GetBitPos(bit_value + start_offset, bytes, bit); + if (pos == (8 * bytes) && bit == 0) { + pos = -1; + } + if (pos != -1) { + pos = pos + 8 * start_offset; + } + *ret = pos; + } + } else { + return s; + } + return Status::OK(); +} + +Status RedisStrings::PKSetexAt(const Slice& key, const Slice& value, int32_t timestamp) { + StringsValue strings_value(value); + ScopeRecordLock l(lock_mgr_, key); + strings_value.set_timestamp(timestamp); + return db_->Put(default_write_options_, key, strings_value.Encode()); +} + +Status RedisStrings::PKScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* kvs, std::string* next_key) { + next_key->clear(); + + std::string key; + std::string value; + int32_t remain = limit; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + bool start_no_limit = key_start.compare("") == 0; + bool end_no_limit = key_end.compare("") == 0; + + if (!start_no_limit && !end_no_limit && (key_start.compare(key_end) > 0)) { + return Status::InvalidArgument("error in given range"); + } + + // Note: This is a string type and does not need to pass the column family as + // a parameter, use the default column family + rocksdb::Iterator* it = db_->NewIterator(iterator_options); + if (start_no_limit) { + it->SeekToFirst(); + } else { + it->Seek(key_start); + } + + while (it->Valid() && remain > 0 && (end_no_limit || it->key().compare(key_end) <= 0)) { + ParsedStringsValue parsed_strings_value(it->value()); + if (parsed_strings_value.IsStale()) { + it->Next(); + } else { + key = it->key().ToString(); + value = parsed_strings_value.value().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + kvs->push_back({key, value}); + } + remain--; + it->Next(); + } + } + + while (it->Valid() && (end_no_limit || it->key().compare(key_end) <= 0)) { + ParsedStringsValue parsed_strings_value(it->value()); + if (parsed_strings_value.IsStale()) { + it->Next(); + } else { + *next_key = it->key().ToString(); + break; + } + } + delete it; + return Status::OK(); +} + +Status RedisStrings::PKRScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* kvs, std::string* next_key) { + std::string key; + std::string value; + int32_t remain = limit; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + bool start_no_limit = key_start.compare("") == 0; + bool end_no_limit = key_end.compare("") == 0; + + if (!start_no_limit && !end_no_limit && (key_start.compare(key_end) < 0)) { + return Status::InvalidArgument("error in given range"); + } + + // Note: This is a string type and does not need to pass the column family as + // a parameter, use the default column family + rocksdb::Iterator* it = db_->NewIterator(iterator_options); + if (start_no_limit) { + it->SeekToLast(); + } else { + it->SeekForPrev(key_start); + } + + while (it->Valid() && remain > 0 && (end_no_limit || it->key().compare(key_end) >= 0)) { + ParsedStringsValue parsed_strings_value(it->value()); + if (parsed_strings_value.IsStale()) { + it->Prev(); + } else { + key = it->key().ToString(); + value = parsed_strings_value.value().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + kvs->push_back({key, value}); + } + remain--; + it->Prev(); + } + } + + while (it->Valid() && (end_no_limit || it->key().compare(key_end) >= 0)) { + ParsedStringsValue parsed_strings_value(it->value()); + if (parsed_strings_value.IsStale()) { + it->Prev(); + } else { + *next_key = it->key().ToString(); + break; + } + } + delete it; + return Status::OK(); +} + +Status RedisStrings::Expire(const Slice& key, int32_t ttl) { + std::string value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + return Status::NotFound("Stale"); + } + if (ttl > 0) { + parsed_strings_value.SetRelativeTimestamp(ttl); + return db_->Put(default_write_options_, key, value); + } else { + return db_->Delete(default_write_options_, key); + } + } + return s; +} + +Status RedisStrings::Del(const Slice& key) { + std::string value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + return Status::NotFound("Stale"); + } + return db_->Delete(default_write_options_, key); + } + return s; +} + +bool RedisStrings::Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, + int64_t* count, std::string* next_key) { + std::string key; + bool is_finish = true; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + // Note: This is a string type and does not need to pass the column family as + // a parameter, use the default column family + rocksdb::Iterator* it = db_->NewIterator(iterator_options); + + it->Seek(start_key); + while (it->Valid() && (*count) > 0) { + ParsedStringsValue parsed_strings_value(it->value()); + if (parsed_strings_value.IsStale()) { + it->Next(); + continue; + } else { + key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + (*count)--; + it->Next(); + } + } + + std::string prefix = isTailWildcard(pattern) ? pattern.substr(0, pattern.size() - 1) : ""; + if (it->Valid() && (it->key().compare(prefix) <= 0 || it->key().starts_with(prefix))) { + is_finish = false; + *next_key = it->key().ToString(); + } else { + *next_key = ""; + } + delete it; + return is_finish; +} + +bool RedisStrings::PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) { + bool is_finish = true; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* it = db_->NewIterator(iterator_options); + + it->Seek(start_key); + while (it->Valid() && (*leftover_visits) > 0) { + ParsedStringsValue parsed_strings_value(it->value()); + if (parsed_strings_value.IsStale()) { + it->Next(); + continue; + } else { + if (min_timestamp < parsed_strings_value.timestamp() && parsed_strings_value.timestamp() < max_timestamp) { + keys->push_back(it->key().ToString()); + } + (*leftover_visits)--; + it->Next(); + } + } + + if (it->Valid()) { + is_finish = false; + *next_key = it->key().ToString(); + } else { + *next_key = ""; + } + delete it; + return is_finish; +} + +Status RedisStrings::Expireat(const Slice& key, int32_t timestamp) { + std::string value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + return Status::NotFound("Stale"); + } else { + if (timestamp > 0) { + parsed_strings_value.set_timestamp(timestamp); + return db_->Put(default_write_options_, key, value); + } else { + return db_->Delete(default_write_options_, key); + } + } + } + return s; +} + +Status RedisStrings::Persist(const Slice& key) { + std::string value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + return Status::NotFound("Stale"); + } else { + int32_t timestamp = parsed_strings_value.timestamp(); + if (timestamp == 0) { + return Status::NotFound("Not have an associated timeout"); + } else { + parsed_strings_value.set_timestamp(0); + return db_->Put(default_write_options_, key, value); + } + } + } + return s; +} + +Status RedisStrings::TTL(const Slice& key, int64_t* timestamp) { + std::string value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &value); + if (s.ok()) { + ParsedStringsValue parsed_strings_value(&value); + if (parsed_strings_value.IsStale()) { + *timestamp = -2; + return Status::NotFound("Stale"); + } else { + *timestamp = parsed_strings_value.timestamp(); + if (*timestamp == 0) { + *timestamp = -1; + } else { + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + *timestamp = *timestamp - curtime >= 0 ? *timestamp - curtime : -2; + } + } + } else if (s.IsNotFound()) { + *timestamp = -2; + } + return s; +} + +void RedisStrings::ScanDatabase() { + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + auto current_time = static_cast(time(nullptr)); + + LOG(INFO) << "***************String Data***************"; + auto iter = db_->NewIterator(iterator_options); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedStringsValue parsed_strings_value(iter->value()); + int32_t survival_time = 0; + if (parsed_strings_value.timestamp() != 0) { + survival_time = + parsed_strings_value.timestamp() - current_time > 0 ? parsed_strings_value.timestamp() - current_time : -1; + } + LOG(INFO) << fmt::format("[key : {:<30}] [value : {:<30}] [timestamp : {:<10}] [version : {}] [survival_time : {}]", iter->key().ToString(), + parsed_strings_value.value().ToString(), parsed_strings_value.timestamp(), parsed_strings_value.version(), + survival_time); + + } + delete iter; +} + +} // namespace storage diff --git a/src/storage/src/redis_strings.h b/src/storage/src/redis_strings.h new file mode 100644 index 000000000..1680673e6 --- /dev/null +++ b/src/storage/src/redis_strings.h @@ -0,0 +1,78 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "src/redis.h" + +namespace storage { + +class RedisStrings : public Redis { + public: + RedisStrings(Storage* s, const DataType& type); + ~RedisStrings() override = default; + + // Common Commands + Status Open(const StorageOptions& storage_options, const std::string& db_path) override; + Status CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, + const ColumnFamilyType& type = kMetaAndData) override; + Status GetProperty(const std::string& property, uint64_t* out) override; + Status ScanKeyNum(KeyInfo* key_info) override; + Status ScanKeys(const std::string& pattern, std::vector* keys) override; + Status PKPatternMatchDel(const std::string& pattern, int32_t* ret) override; + + // Strings Commands + Status Append(const Slice& key, const Slice& value, int32_t* ret); + Status BitCount(const Slice& key, int64_t start_offset, int64_t end_offset, int32_t* ret, bool have_range); + Status BitOp(BitOpType op, const std::string& dest_key, const std::vector& src_keys, std::string &value_to_dest, int64_t* ret); + Status Decrby(const Slice& key, int64_t value, int64_t* ret); + Status Get(const Slice& key, std::string* value); + Status GetBit(const Slice& key, int64_t offset, int32_t* ret); + Status Getrange(const Slice& key, int64_t start_offset, int64_t end_offset, std::string* ret); + Status GetSet(const Slice& key, const Slice& value, std::string* old_value); + Status Incrby(const Slice& key, int64_t value, int64_t* ret); + Status Incrbyfloat(const Slice& key, const Slice& value, std::string* ret); + Status MGet(const std::vector& keys, std::vector* vss); + Status MSet(const std::vector& kvs); + Status MSetnx(const std::vector& kvs, int32_t* ret); + Status Set(const Slice& key, const Slice& value); + Status Setxx(const Slice& key, const Slice& value, int32_t* ret, int32_t ttl = 0); + Status SetBit(const Slice& key, int64_t offset, int32_t value, int32_t* ret); + Status Setex(const Slice& key, const Slice& value, int32_t ttl); + Status Setnx(const Slice& key, const Slice& value, int32_t* ret, int32_t ttl = 0); + Status Setvx(const Slice& key, const Slice& value, const Slice& new_value, int32_t* ret, int32_t ttl = 0); + Status Delvx(const Slice& key, const Slice& value, int32_t* ret); + Status Setrange(const Slice& key, int64_t start_offset, const Slice& value, int32_t* ret); + Status Strlen(const Slice& key, int32_t* len); + + Status BitPos(const Slice& key, int32_t bit, int64_t* ret); + Status BitPos(const Slice& key, int32_t bit, int64_t start_offset, int64_t* ret); + Status BitPos(const Slice& key, int32_t bit, int64_t start_offset, int64_t end_offset, int64_t* ret); + Status PKSetexAt(const Slice& key, const Slice& value, int32_t timestamp); + Status PKScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* kvs, std::string* next_key); + Status PKRScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* kvs, std::string* next_key); + + // Keys Commands + Status Expire(const Slice& key, int32_t ttl) override; + Status Del(const Slice& key) override; + bool Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, int64_t* count, + std::string* next_key) override; + bool PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) override; + Status Expireat(const Slice& key, int32_t timestamp) override; + Status Persist(const Slice& key) override; + Status TTL(const Slice& key, int64_t* timestamp) override; + + // Iterate all data + void ScanDatabase(); +}; + +} // namespace storage diff --git a/src/storage/src/redis_zsets.cc b/src/storage/src/redis_zsets.cc new file mode 100644 index 000000000..67d38ebd2 --- /dev/null +++ b/src/storage/src/redis_zsets.cc @@ -0,0 +1,1823 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "src/redis_zsets.h" + +#include +#include +#include +#include + +#include +#include + +#include "iostream" +#include "src/scope_record_lock.h" +#include "src/scope_snapshot.h" +#include "src/zsets_filter.h" +#include "storage/util.h" + +namespace storage { + +rocksdb::Comparator* ZSetsScoreKeyComparator() { + static ZSetsScoreKeyComparatorImpl zsets_score_key_compare; + return &zsets_score_key_compare; +} + +RedisZSets::RedisZSets(Storage* const s, const DataType& type) : Redis(s, type) {} + +Status RedisZSets::Open(const StorageOptions& storage_options, const std::string& db_path) { + statistics_store_->SetCapacity(storage_options.statistics_max_size); + small_compaction_threshold_ = storage_options.small_compaction_threshold; + + rocksdb::Options ops(storage_options.options); + Status s = rocksdb::DB::Open(ops, db_path, &db_); + if (s.ok()) { + rocksdb::ColumnFamilyHandle *dcf = nullptr; + rocksdb::ColumnFamilyHandle *scf = nullptr; + s = db_->CreateColumnFamily(rocksdb::ColumnFamilyOptions(), "data_cf", &dcf); + if (!s.ok()) { + return s; + } + rocksdb::ColumnFamilyOptions score_cf_ops; + score_cf_ops.comparator = ZSetsScoreKeyComparator(); + s = db_->CreateColumnFamily(score_cf_ops, "score_cf", &scf); + if (!s.ok()) { + return s; + } + delete scf; + delete dcf; + delete db_; + } + + rocksdb::DBOptions db_ops(storage_options.options); + rocksdb::ColumnFamilyOptions meta_cf_ops(storage_options.options); + rocksdb::ColumnFamilyOptions data_cf_ops(storage_options.options); + rocksdb::ColumnFamilyOptions score_cf_ops(storage_options.options); + meta_cf_ops.compaction_filter_factory = std::make_shared(); + data_cf_ops.compaction_filter_factory = std::make_shared(&db_, &handles_); + score_cf_ops.compaction_filter_factory = std::make_shared(&db_, &handles_); + score_cf_ops.comparator = ZSetsScoreKeyComparator(); + + // use the bloom filter policy to reduce disk reads + rocksdb::BlockBasedTableOptions table_ops(storage_options.table_options); + table_ops.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true)); + rocksdb::BlockBasedTableOptions meta_cf_table_ops(table_ops); + rocksdb::BlockBasedTableOptions data_cf_table_ops(table_ops); + rocksdb::BlockBasedTableOptions score_cf_table_ops(table_ops); + if (!storage_options.share_block_cache && storage_options.block_cache_size > 0) { + meta_cf_table_ops.block_cache = rocksdb::NewLRUCache(storage_options.block_cache_size); + data_cf_table_ops.block_cache = rocksdb::NewLRUCache(storage_options.block_cache_size); + score_cf_table_ops.block_cache = rocksdb::NewLRUCache(storage_options.block_cache_size); + } + meta_cf_ops.table_factory.reset(rocksdb::NewBlockBasedTableFactory(meta_cf_table_ops)); + data_cf_ops.table_factory.reset(rocksdb::NewBlockBasedTableFactory(data_cf_table_ops)); + score_cf_ops.table_factory.reset(rocksdb::NewBlockBasedTableFactory(score_cf_table_ops)); + + std::vector column_families; + column_families.emplace_back(rocksdb::kDefaultColumnFamilyName, meta_cf_ops); + column_families.emplace_back("data_cf", data_cf_ops); + column_families.emplace_back("score_cf", score_cf_ops); + return rocksdb::DB::Open(db_ops, db_path, column_families, &handles_, &db_); +} + +Status RedisZSets::CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, const ColumnFamilyType& type) { + if (type == kMeta || type == kMetaAndData) { + db_->CompactRange(default_compact_range_options_, handles_[0], begin, end); + } + if (type == kData || type == kMetaAndData) { + db_->CompactRange(default_compact_range_options_, handles_[1], begin, end); + db_->CompactRange(default_compact_range_options_, handles_[2], begin, end); + } + return Status::OK(); +} + +Status RedisZSets::GetProperty(const std::string& property, uint64_t* out) { + std::string value; + db_->GetProperty(handles_[0], property, &value); + *out = std::strtoull(value.c_str(), nullptr, 10); + db_->GetProperty(handles_[1], property, &value); + *out += std::strtoull(value.c_str(), nullptr, 10); + db_->GetProperty(handles_[2], property, &value); + *out += std::strtoull(value.c_str(), nullptr, 10); + return Status::OK(); +} + +Status RedisZSets::ScanKeyNum(KeyInfo* key_info) { + uint64_t keys = 0; + uint64_t expires = 0; + uint64_t ttl_sum = 0; + uint64_t invaild_keys = 0; + + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(iter->value()); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + invaild_keys++; + } else { + keys++; + if (!parsed_zsets_meta_value.IsPermanentSurvival()) { + expires++; + ttl_sum += parsed_zsets_meta_value.timestamp() - curtime; + } + } + } + delete iter; + + key_info->keys = keys; + key_info->expires = expires; + key_info->avg_ttl = (expires != 0) ? ttl_sum / expires : 0; + key_info->invaild_keys = invaild_keys; + return Status::OK(); +} + +Status RedisZSets::ScanKeys(const std::string& pattern, std::vector* keys) { + std::string key; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(iter->value()); + if (!parsed_zsets_meta_value.IsStale() && parsed_zsets_meta_value.count() != 0) { + key = iter->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + } + } + delete iter; + return Status::OK(); +} + +Status RedisZSets::PKPatternMatchDel(const std::string& pattern, int32_t* ret) { + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + std::string key; + std::string meta_value; + int32_t total_delete = 0; + Status s; + rocksdb::WriteBatch batch; + rocksdb::Iterator* iter = db_->NewIterator(iterator_options, handles_[0]); + iter->SeekToFirst(); + while (iter->Valid()) { + key = iter->key().ToString(); + meta_value = iter->value().ToString(); + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (!parsed_zsets_meta_value.IsStale() && (parsed_zsets_meta_value.count() != 0) && + (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0)) { + parsed_zsets_meta_value.InitialMetaValue(); + batch.Put(handles_[0], key, meta_value); + } + if (static_cast(batch.Count()) >= BATCH_DELETE_LIMIT) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + total_delete += static_cast(batch.Count()); + batch.Clear(); + } else { + *ret = total_delete; + return s; + } + } + iter->Next(); + } + if (batch.Count() != 0U) { + s = db_->Write(default_write_options_, &batch); + if (s.ok()) { + total_delete += static_cast(batch.Count()); + batch.Clear(); + } + } + + *ret = total_delete; + return s; +} + +Status RedisZSets::ZPopMax(const Slice& key, const int64_t count, std::vector* score_members) { + uint32_t statistic = 0; + score_members->clear(); + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int64_t num = parsed_zsets_meta_value.count(); + num = num <= count ? num : count; + int32_t version = parsed_zsets_meta_value.version(); + ZSetsScoreKey zsets_score_key(key, version, std::numeric_limits::max(), Slice()); + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[2]); + int32_t del_cnt = 0; + for (iter->SeekForPrev(zsets_score_key.Encode()); iter->Valid() && del_cnt < num; iter->Prev()) { + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + score_members->emplace_back( + ScoreMember{parsed_zsets_score_key.score(), parsed_zsets_score_key.member().ToString()}); + ZSetsMemberKey zsets_member_key(key, version, parsed_zsets_score_key.member()); + ++statistic; + ++del_cnt; + batch.Delete(handles_[1], zsets_member_key.Encode()); + batch.Delete(handles_[2], iter->key()); + } + delete iter; + if (!parsed_zsets_meta_value.CheckModifyCount(-del_cnt)){ + return Status::InvalidArgument("zset size overflow"); + } + parsed_zsets_meta_value.ModifyCount(-del_cnt); + batch.Put(handles_[0], key, meta_value); + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; + } + } else { + return s; + } +} + +Status RedisZSets::ZPopMin(const Slice& key, const int64_t count, std::vector* score_members) { + uint32_t statistic = 0; + score_members->clear(); + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int64_t num = parsed_zsets_meta_value.count(); + num = num <= count ? num : count; + int32_t version = parsed_zsets_meta_value.version(); + ZSetsScoreKey zsets_score_key(key, version, std::numeric_limits::lowest(), Slice()); + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[2]); + int32_t del_cnt = 0; + for (iter->Seek(zsets_score_key.Encode()); iter->Valid() && del_cnt < num; iter->Next()) { + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + score_members->emplace_back( + ScoreMember{parsed_zsets_score_key.score(), parsed_zsets_score_key.member().ToString()}); + ZSetsMemberKey zsets_member_key(key, version, parsed_zsets_score_key.member()); + ++statistic; + ++del_cnt; + batch.Delete(handles_[1], zsets_member_key.Encode()); + batch.Delete(handles_[2], iter->key()); + } + delete iter; + if (!parsed_zsets_meta_value.CheckModifyCount(-del_cnt)){ + return Status::InvalidArgument("zset size overflow"); + } + parsed_zsets_meta_value.ModifyCount(-del_cnt); + batch.Put(handles_[0], key, meta_value); + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; + } + } else { + return s; + } +} + +Status RedisZSets::ZAdd(const Slice& key, const std::vector& score_members, int32_t* ret) { + *ret = 0; + uint32_t statistic = 0; + std::unordered_set unique; + std::vector filtered_score_members; + for (const auto& sm : score_members) { + if (unique.find(sm.member) == unique.end()) { + unique.insert(sm.member); + filtered_score_members.push_back(sm); + } + } + + char score_buf[8]; + int32_t version = 0; + std::string meta_value; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + bool vaild = true; + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + vaild = false; + version = parsed_zsets_meta_value.InitialMetaValue(); + } else { + vaild = true; + version = parsed_zsets_meta_value.version(); + } + + int32_t cnt = 0; + std::string data_value; + for (const auto& sm : filtered_score_members) { + bool not_found = true; + ZSetsMemberKey zsets_member_key(key, version, sm.member); + if (vaild) { + s = db_->Get(default_read_options_, handles_[1], zsets_member_key.Encode(), &data_value); + if (s.ok()) { + not_found = false; + uint64_t tmp = DecodeFixed64(data_value.data()); + const void* ptr_tmp = reinterpret_cast(&tmp); + double old_score = *reinterpret_cast(ptr_tmp); + if (old_score == sm.score) { + continue; + } else { + ZSetsScoreKey zsets_score_key(key, version, old_score, sm.member); + batch.Delete(handles_[2], zsets_score_key.Encode()); + // delete old zsets_score_key and overwirte zsets_member_key + // but in different column_families so we accumulative 1 + statistic++; + } + } else if (!s.IsNotFound()) { + return s; + } + } + + const void* ptr_score = reinterpret_cast(&sm.score); + EncodeFixed64(score_buf, *reinterpret_cast(ptr_score)); + batch.Put(handles_[1], zsets_member_key.Encode(), Slice(score_buf, sizeof(uint64_t))); + + ZSetsScoreKey zsets_score_key(key, version, sm.score, sm.member); + batch.Put(handles_[2], zsets_score_key.Encode(), Slice()); + if (not_found) { + cnt++; + } + } + if (!parsed_zsets_meta_value.CheckModifyCount(cnt)){ + return Status::InvalidArgument("zset size overflow"); + } + parsed_zsets_meta_value.ModifyCount(cnt); + batch.Put(handles_[0], key, meta_value); + *ret = cnt; + } else if (s.IsNotFound()) { + char buf[4]; + EncodeFixed32(buf, filtered_score_members.size()); + ZSetsMetaValue zsets_meta_value(Slice(buf, sizeof(int32_t))); + version = zsets_meta_value.UpdateVersion(); + batch.Put(handles_[0], key, zsets_meta_value.Encode()); + for (const auto& sm : filtered_score_members) { + ZSetsMemberKey zsets_member_key(key, version, sm.member); + const void* ptr_score = reinterpret_cast(&sm.score); + EncodeFixed64(score_buf, *reinterpret_cast(ptr_score)); + batch.Put(handles_[1], zsets_member_key.Encode(), Slice(score_buf, sizeof(uint64_t))); + + ZSetsScoreKey zsets_score_key(key, version, sm.score, sm.member); + batch.Put(handles_[2], zsets_score_key.Encode(), Slice()); + } + *ret = static_cast(filtered_score_members.size()); + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisZSets::ZCard(const Slice& key, int32_t* card) { + *card = 0; + std::string meta_value; + + Status s = db_->Get(default_read_options_, key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + *card = 0; + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + *card = 0; + return Status::NotFound(); + } else { + *card = parsed_zsets_meta_value.count(); + } + } + return s; +} + +Status RedisZSets::ZCount(const Slice& key, double min, double max, bool left_close, bool right_close, int32_t* ret) { + *ret = 0; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + + Status s = db_->Get(read_options, key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t version = parsed_zsets_meta_value.version(); + int32_t cnt = 0; + int32_t cur_index = 0; + int32_t stop_index = parsed_zsets_meta_value.count() - 1; + ScoreMember score_member; + ZSetsScoreKey zsets_score_key(key, version, min, Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[2]); + for (iter->Seek(zsets_score_key.Encode()); iter->Valid() && cur_index <= stop_index; iter->Next(), ++cur_index) { + bool left_pass = false; + bool right_pass = false; + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + if (parsed_zsets_score_key.key() != key) { + break; + } + if (parsed_zsets_score_key.version() != version) { + break; + } + if ((left_close && min <= parsed_zsets_score_key.score()) || + (!left_close && min < parsed_zsets_score_key.score())) { + left_pass = true; + } + if ((right_close && parsed_zsets_score_key.score() <= max) || + (!right_close && parsed_zsets_score_key.score() < max)) { + right_pass = true; + } + if (left_pass && right_pass) { + cnt++; + } else if (!right_pass) { + break; + } + } + delete iter; + *ret = cnt; + } + } + return s; +} + +Status RedisZSets::ZIncrby(const Slice& key, const Slice& member, double increment, double* ret) { + *ret = 0; + uint32_t statistic = 0; + double score = 0; + char score_buf[8]; + int32_t version = 0; + std::string meta_value; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + version = parsed_zsets_meta_value.InitialMetaValue(); + } else { + version = parsed_zsets_meta_value.version(); + } + std::string data_value; + ZSetsMemberKey zsets_member_key(key, version, member); + s = db_->Get(default_read_options_, handles_[1], zsets_member_key.Encode(), &data_value); + if (s.ok()) { + uint64_t tmp = DecodeFixed64(data_value.data()); + const void* ptr_tmp = reinterpret_cast(&tmp); + double old_score = *reinterpret_cast(ptr_tmp); + score = old_score + increment; + ZSetsScoreKey zsets_score_key(key, version, old_score, member); + batch.Delete(handles_[2], zsets_score_key.Encode()); + // delete old zsets_score_key and overwirte zsets_member_key + // but in different column_families so we accumulative 1 + statistic++; + } else if (s.IsNotFound()) { + score = increment; + if (!parsed_zsets_meta_value.CheckModifyCount(1)){ + return Status::InvalidArgument("zset size overflow"); + } + parsed_zsets_meta_value.ModifyCount(1); + batch.Put(handles_[0], key, meta_value); + } else { + return s; + } + } else if (s.IsNotFound()) { + char buf[8]; + EncodeFixed32(buf, 1); + ZSetsMetaValue zsets_meta_value(Slice(buf, sizeof(int32_t))); + version = zsets_meta_value.UpdateVersion(); + batch.Put(handles_[0], key, zsets_meta_value.Encode()); + score = increment; + } else { + return s; + } + ZSetsMemberKey zsets_member_key(key, version, member); + const void* ptr_score = reinterpret_cast(&score); + EncodeFixed64(score_buf, *reinterpret_cast(ptr_score)); + batch.Put(handles_[1], zsets_member_key.Encode(), Slice(score_buf, sizeof(uint64_t))); + + ZSetsScoreKey zsets_score_key(key, version, score, member); + batch.Put(handles_[2], zsets_score_key.Encode(), Slice()); + *ret = score; + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisZSets::ZRange(const Slice& key, int32_t start, int32_t stop, std::vector* score_members) { + score_members->clear(); + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + + Status s = db_->Get(read_options, key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t count = parsed_zsets_meta_value.count(); + int32_t version = parsed_zsets_meta_value.version(); + int32_t start_index = start >= 0 ? start : count + start; + int32_t stop_index = stop >= 0 ? stop : count + stop; + start_index = start_index <= 0 ? 0 : start_index; + stop_index = stop_index >= count ? count - 1 : stop_index; + if (start_index > stop_index || start_index >= count || stop_index < 0) { + return s; + } + int32_t cur_index = 0; + ScoreMember score_member; + ZSetsScoreKey zsets_score_key(key, version, std::numeric_limits::lowest(), Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[2]); + for (iter->Seek(zsets_score_key.Encode()); iter->Valid() && cur_index <= stop_index; iter->Next(), ++cur_index) { + if (cur_index >= start_index) { + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + score_member.score = parsed_zsets_score_key.score(); + score_member.member = parsed_zsets_score_key.member().ToString(); + score_members->push_back(score_member); + } + } + delete iter; + } + } + return s; +} + +Status RedisZSets::ZRangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, + int64_t count, int64_t offset, std::vector* score_members) { + score_members->clear(); + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else if (offset >= 0 && count != 0) { + int32_t version = parsed_zsets_meta_value.version(); + int32_t index = 0; + int32_t stop_index = parsed_zsets_meta_value.count() - 1; + int64_t skipped = 0; + ScoreMember score_member; + ZSetsScoreKey zsets_score_key(key, version, min, Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[2]); + for (iter->Seek(zsets_score_key.Encode()); iter->Valid() && index <= stop_index; iter->Next(), ++index) { + bool left_pass = false; + bool right_pass = false; + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + if (parsed_zsets_score_key.key() != key) { + break; + } + if (parsed_zsets_score_key.version() != version) { + break; + } + if ((left_close && min <= parsed_zsets_score_key.score()) || + (!left_close && min < parsed_zsets_score_key.score())) { + left_pass = true; + } + if ((right_close && parsed_zsets_score_key.score() <= max) || + (!right_close && parsed_zsets_score_key.score() < max)) { + right_pass = true; + } + if (left_pass && right_pass) { + // skip offset + if (skipped < offset) { + ++skipped; + continue; + } + score_member.score = parsed_zsets_score_key.score(); + score_member.member = parsed_zsets_score_key.member().ToString(); + score_members->push_back(score_member); + if (count > 0 && score_members->size() == static_cast(count)) { + break; + } + } + if (!right_pass) { + break; + } + } + delete iter; + } + } + return s; +} + +Status RedisZSets::ZRank(const Slice& key, const Slice& member, int32_t* rank) { + *rank = -1; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + bool found = false; + int32_t version = parsed_zsets_meta_value.version(); + int32_t index = 0; + int32_t stop_index = parsed_zsets_meta_value.count() - 1; + ScoreMember score_member; + ZSetsScoreKey zsets_score_key(key, version, std::numeric_limits::lowest(), Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[2]); + for (iter->Seek(zsets_score_key.Encode()); iter->Valid() && index <= stop_index; iter->Next(), ++index) { + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + if (parsed_zsets_score_key.member().compare(member) == 0) { + found = true; + break; + } + } + delete iter; + if (found) { + *rank = index; + return Status::OK(); + } else { + return Status::NotFound(); + } + } + } + return s; +} + +Status RedisZSets::ZRem(const Slice& key, const std::vector& members, int32_t* ret) { + *ret = 0; + uint32_t statistic = 0; + std::unordered_set unique; + std::vector filtered_members; + for (const auto& member : members) { + if (unique.find(member) == unique.end()) { + unique.insert(member); + filtered_members.push_back(member); + } + } + + std::string meta_value; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t del_cnt = 0; + std::string data_value; + int32_t version = parsed_zsets_meta_value.version(); + for (const auto& member : filtered_members) { + ZSetsMemberKey zsets_member_key(key, version, member); + s = db_->Get(default_read_options_, handles_[1], zsets_member_key.Encode(), &data_value); + if (s.ok()) { + del_cnt++; + statistic++; + uint64_t tmp = DecodeFixed64(data_value.data()); + const void* ptr_tmp = reinterpret_cast(&tmp); + double score = *reinterpret_cast(ptr_tmp); + batch.Delete(handles_[1], zsets_member_key.Encode()); + + ZSetsScoreKey zsets_score_key(key, version, score, member); + batch.Delete(handles_[2], zsets_score_key.Encode()); + } else if (!s.IsNotFound()) { + return s; + } + } + *ret = del_cnt; + if (!parsed_zsets_meta_value.CheckModifyCount(-del_cnt)){ + return Status::InvalidArgument("zset size overflow"); + } + parsed_zsets_meta_value.ModifyCount(-del_cnt); + batch.Put(handles_[0], key, meta_value); + } + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisZSets::ZRemrangebyrank(const Slice& key, int32_t start, int32_t stop, int32_t* ret) { + *ret = 0; + uint32_t statistic = 0; + std::string meta_value; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + std::string member; + int32_t del_cnt = 0; + int32_t cur_index = 0; + int32_t count = parsed_zsets_meta_value.count(); + int32_t version = parsed_zsets_meta_value.version(); + int32_t start_index = start >= 0 ? start : count + start; + int32_t stop_index = stop >= 0 ? stop : count + stop; + start_index = start_index <= 0 ? 0 : start_index; + stop_index = stop_index >= count ? count - 1 : stop_index; + if (start_index > stop_index || start_index >= count) { + return s; + } + ZSetsScoreKey zsets_score_key(key, version, std::numeric_limits::lowest(), Slice()); + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[2]); + for (iter->Seek(zsets_score_key.Encode()); iter->Valid() && cur_index <= stop_index; iter->Next(), ++cur_index) { + if (cur_index >= start_index) { + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + ZSetsMemberKey zsets_member_key(key, version, parsed_zsets_score_key.member()); + batch.Delete(handles_[1], zsets_member_key.Encode()); + batch.Delete(handles_[2], iter->key()); + del_cnt++; + statistic++; + } + } + delete iter; + *ret = del_cnt; + if (!parsed_zsets_meta_value.CheckModifyCount(-del_cnt)){ + return Status::InvalidArgument("zset size overflow"); + } + parsed_zsets_meta_value.ModifyCount(-del_cnt); + batch.Put(handles_[0], key, meta_value); + } + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisZSets::ZRemrangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, + int32_t* ret) { + *ret = 0; + uint32_t statistic = 0; + std::string meta_value; + rocksdb::WriteBatch batch; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + std::string member; + int32_t del_cnt = 0; + int32_t cur_index = 0; + int32_t stop_index = parsed_zsets_meta_value.count() - 1; + int32_t version = parsed_zsets_meta_value.version(); + ZSetsScoreKey zsets_score_key(key, version, min, Slice()); + rocksdb::Iterator* iter = db_->NewIterator(default_read_options_, handles_[2]); + for (iter->Seek(zsets_score_key.Encode()); iter->Valid() && cur_index <= stop_index; iter->Next(), ++cur_index) { + bool left_pass = false; + bool right_pass = false; + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + if (parsed_zsets_score_key.key() != key) { + break; + } + if (parsed_zsets_score_key.version() != version) { + break; + } + if ((left_close && min <= parsed_zsets_score_key.score()) || + (!left_close && min < parsed_zsets_score_key.score())) { + left_pass = true; + } + if ((right_close && parsed_zsets_score_key.score() <= max) || + (!right_close && parsed_zsets_score_key.score() < max)) { + right_pass = true; + } + if (left_pass && right_pass) { + ZSetsMemberKey zsets_member_key(key, version, parsed_zsets_score_key.member()); + batch.Delete(handles_[1], zsets_member_key.Encode()); + batch.Delete(handles_[2], iter->key()); + del_cnt++; + statistic++; + } + if (!right_pass) { + break; + } + } + delete iter; + *ret = del_cnt; + if (!parsed_zsets_meta_value.CheckModifyCount(-del_cnt)){ + return Status::InvalidArgument("zset size overflow"); + } + parsed_zsets_meta_value.ModifyCount(-del_cnt); + batch.Put(handles_[0], key, meta_value); + } + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisZSets::ZRevrange(const Slice& key, int32_t start, int32_t stop, std::vector* score_members) { + score_members->clear(); + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + + Status s = db_->Get(read_options, key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t count = parsed_zsets_meta_value.count(); + int32_t version = parsed_zsets_meta_value.version(); + int32_t start_index = stop >= 0 ? count - stop - 1 : -stop - 1; + int32_t stop_index = start >= 0 ? count - start - 1 : -start - 1; + start_index = start_index <= 0 ? 0 : start_index; + stop_index = stop_index >= count ? count - 1 : stop_index; + if (start_index > stop_index || start_index >= count || stop_index < 0) { + return s; + } + int32_t cur_index = count - 1; + ScoreMember score_member; + ZSetsScoreKey zsets_score_key(key, version, std::numeric_limits::max(), Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[2]); + for (iter->SeekForPrev(zsets_score_key.Encode()); iter->Valid() && cur_index >= start_index; + iter->Prev(), --cur_index) { + if (cur_index <= stop_index) { + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + score_member.score = parsed_zsets_score_key.score(); + score_member.member = parsed_zsets_score_key.member().ToString(); + score_members->push_back(score_member); + } + } + delete iter; + } + } + return s; +} + +Status RedisZSets::ZRevrangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, + int64_t count, int64_t offset, std::vector* score_members) { + score_members->clear(); + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else if (offset >= 0 && count != 0) { + int32_t version = parsed_zsets_meta_value.version(); + int32_t left = parsed_zsets_meta_value.count(); + int64_t skipped = 0; + ScoreMember score_member; + ZSetsScoreKey zsets_score_key(key, version, std::nextafter(max, std::numeric_limits::max()), Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[2]); + for (iter->SeekForPrev(zsets_score_key.Encode()); iter->Valid() && left > 0; iter->Prev(), --left) { + bool left_pass = false; + bool right_pass = false; + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + if (parsed_zsets_score_key.key() != key) { + break; + } + if (parsed_zsets_score_key.version() != version) { + break; + } + if ((left_close && min <= parsed_zsets_score_key.score()) || + (!left_close && min < parsed_zsets_score_key.score())) { + left_pass = true; + } + if ((right_close && parsed_zsets_score_key.score() <= max) || + (!right_close && parsed_zsets_score_key.score() < max)) { + right_pass = true; + } + if (left_pass && right_pass) { + // skip offset + if (skipped < offset) { + ++skipped; + continue; + } + score_member.score = parsed_zsets_score_key.score(); + score_member.member = parsed_zsets_score_key.member().ToString(); + score_members->push_back(score_member); + if (count > 0 and score_members->size() == static_cast(count)) { + break; + } + } + if (!left_pass) { + break; + } + } + delete iter; + } + } + return s; +} + +Status RedisZSets::ZRevrank(const Slice& key, const Slice& member, int32_t* rank) { + *rank = -1; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + + Status s = db_->Get(read_options, key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + bool found = false; + int32_t rev_index = 0; + int32_t left = parsed_zsets_meta_value.count(); + int32_t version = parsed_zsets_meta_value.version(); + ZSetsScoreKey zsets_score_key(key, version, std::numeric_limits::max(), Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[2]); + for (iter->SeekForPrev(zsets_score_key.Encode()); iter->Valid() && left >= 0; iter->Prev(), --left, ++rev_index) { + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + if (parsed_zsets_score_key.member().compare(member) == 0) { + found = true; + break; + } + } + delete iter; + if (found) { + *rank = rev_index; + } else { + return Status::NotFound(); + } + } + } + return s; +} + +Status RedisZSets::ZScore(const Slice& key, const Slice& member, double* score) { + *score = 0; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + + Status s = db_->Get(read_options, key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + int32_t version = parsed_zsets_meta_value.version(); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + std::string data_value; + ZSetsMemberKey zsets_member_key(key, version, member); + s = db_->Get(read_options, handles_[1], zsets_member_key.Encode(), &data_value); + if (s.ok()) { + uint64_t tmp = DecodeFixed64(data_value.data()); + const void* ptr_tmp = reinterpret_cast(&tmp); + *score = *reinterpret_cast(ptr_tmp); + } else { + return s; + } + } + } else if (!s.IsNotFound()) { + return s; + } + return s; +} + +Status RedisZSets::ZUnionstore(const Slice& destination, const std::vector& keys, + const std::vector& weights, const AGGREGATE agg, std::map& value_to_dest, int32_t* ret) { + *ret = 0; + uint32_t statistic = 0; + rocksdb::WriteBatch batch; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + int32_t version; + std::string meta_value; + ScoreMember sm; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + ScopeRecordLock l(lock_mgr_, destination); + std::map member_score_map; + + Status s; + for (size_t idx = 0; idx < keys.size(); ++idx) { + s = db_->Get(read_options, handles_[0], keys[idx], &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (!parsed_zsets_meta_value.IsStale() && parsed_zsets_meta_value.count() != 0) { + int32_t cur_index = 0; + int32_t stop_index = parsed_zsets_meta_value.count() - 1; + double score = 0; + double weight = idx < weights.size() ? weights[idx] : 1; + version = parsed_zsets_meta_value.version(); + ZSetsScoreKey zsets_score_key(keys[idx], version, std::numeric_limits::lowest(), Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[2]); + for (iter->Seek(zsets_score_key.Encode()); iter->Valid() && cur_index <= stop_index; + iter->Next(), ++cur_index) { + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + sm.score = parsed_zsets_score_key.score(); + sm.member = parsed_zsets_score_key.member().ToString(); + if (member_score_map.find(sm.member) == member_score_map.end()) { + score = weight * sm.score; + member_score_map[sm.member] = (score == -0.0) ? 0 : score; + } else { + score = member_score_map[sm.member]; + switch (agg) { + case SUM: + score += weight * sm.score; + break; + case MIN: + score = std::min(score, weight * sm.score); + break; + case MAX: + score = std::max(score, weight * sm.score); + break; + } + member_score_map[sm.member] = (score == -0.0) ? 0 : score; + } + } + delete iter; + } + } else if (!s.IsNotFound()) { + return s; + } + } + + s = db_->Get(read_options, handles_[0], destination, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + statistic = parsed_zsets_meta_value.count(); + version = parsed_zsets_meta_value.InitialMetaValue(); + if (!parsed_zsets_meta_value.check_set_count(static_cast(member_score_map.size()))) { + return Status::InvalidArgument("zset size overflow"); + } + parsed_zsets_meta_value.set_count(static_cast(member_score_map.size())); + batch.Put(handles_[0], destination, meta_value); + } else { + char buf[4]; + EncodeFixed32(buf, member_score_map.size()); + ZSetsMetaValue zsets_meta_value(Slice(buf, sizeof(int32_t))); + version = zsets_meta_value.UpdateVersion(); + batch.Put(handles_[0], destination, zsets_meta_value.Encode()); + } + + char score_buf[8]; + for (const auto& sm : member_score_map) { + ZSetsMemberKey zsets_member_key(destination, version, sm.first); + + const void* ptr_score = reinterpret_cast(&sm.second); + EncodeFixed64(score_buf, *reinterpret_cast(ptr_score)); + batch.Put(handles_[1], zsets_member_key.Encode(), Slice(score_buf, sizeof(uint64_t))); + + ZSetsScoreKey zsets_score_key(destination, version, sm.second, sm.first); + batch.Put(handles_[2], zsets_score_key.Encode(), Slice()); + } + *ret = static_cast(member_score_map.size()); + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(destination.ToString(), statistic); + value_to_dest = std::move(member_score_map); + return s; +} + +Status RedisZSets::ZInterstore(const Slice& destination, const std::vector& keys, + const std::vector& weights, const AGGREGATE agg, std::vector& value_to_dest, int32_t* ret) { + if (keys.empty()) { + return Status::Corruption("ZInterstore invalid parameter, no keys"); + } + + *ret = 0; + uint32_t statistic = 0; + rocksdb::WriteBatch batch; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + ScopeRecordLock l(lock_mgr_, destination); + + std::string meta_value; + int32_t version = 0; + bool have_invalid_zsets = false; + ScoreMember item; + std::vector vaild_zsets; + std::vector score_members; + std::vector final_score_members; + Status s; + + int32_t cur_index = 0; + int32_t stop_index = 0; + for (size_t idx = 0; idx < keys.size(); ++idx) { + s = db_->Get(read_options, handles_[0], keys[idx], &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + have_invalid_zsets = true; + } else { + vaild_zsets.push_back({keys[idx], parsed_zsets_meta_value.version()}); + if (idx == 0) { + stop_index = parsed_zsets_meta_value.count() - 1; + } + } + } else if (s.IsNotFound()) { + have_invalid_zsets = true; + } else { + return s; + } + } + + if (!have_invalid_zsets) { + ZSetsScoreKey zsets_score_key(vaild_zsets[0].key, vaild_zsets[0].version, std::numeric_limits::lowest(), + Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[2]); + for (iter->Seek(zsets_score_key.Encode()); iter->Valid() && cur_index <= stop_index; iter->Next(), ++cur_index) { + ParsedZSetsScoreKey parsed_zsets_score_key(iter->key()); + double score = parsed_zsets_score_key.score(); + std::string member = parsed_zsets_score_key.member().ToString(); + score_members.push_back({score, member}); + } + delete iter; + + std::string data_value; + for (const auto& sm : score_members) { + bool reliable = true; + item.member = sm.member; + item.score = sm.score * (!weights.empty() ? weights[0] : 1); + for (size_t idx = 1; idx < vaild_zsets.size(); ++idx) { + double weight = idx < weights.size() ? weights[idx] : 1; + ZSetsMemberKey zsets_member_key(vaild_zsets[idx].key, vaild_zsets[idx].version, item.member); + s = db_->Get(read_options, handles_[1], zsets_member_key.Encode(), &data_value); + if (s.ok()) { + uint64_t tmp = DecodeFixed64(data_value.data()); + const void* ptr_tmp = reinterpret_cast(&tmp); + double score = *reinterpret_cast(ptr_tmp); + switch (agg) { + case SUM: + item.score += weight * score; + break; + case MIN: + item.score = std::min(item.score, weight * score); + break; + case MAX: + item.score = std::max(item.score, weight * score); + break; + } + } else if (s.IsNotFound()) { + reliable = false; + break; + } else { + return s; + } + } + if (reliable) { + final_score_members.push_back(item); + } + } + } + + s = db_->Get(read_options, handles_[0], destination, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + statistic = parsed_zsets_meta_value.count(); + version = parsed_zsets_meta_value.InitialMetaValue(); + if (!parsed_zsets_meta_value.check_set_count(static_cast(final_score_members.size()))) { + return Status::InvalidArgument("zset size overflow"); + } + parsed_zsets_meta_value.set_count(static_cast(final_score_members.size())); + batch.Put(handles_[0], destination, meta_value); + } else { + char buf[4]; + EncodeFixed32(buf, final_score_members.size()); + ZSetsMetaValue zsets_meta_value(Slice(buf, sizeof(int32_t))); + version = zsets_meta_value.UpdateVersion(); + batch.Put(handles_[0], destination, zsets_meta_value.Encode()); + } + char score_buf[8]; + for (const auto& sm : final_score_members) { + ZSetsMemberKey zsets_member_key(destination, version, sm.member); + + const void* ptr_score = reinterpret_cast(&sm.score); + EncodeFixed64(score_buf, *reinterpret_cast(ptr_score)); + batch.Put(handles_[1], zsets_member_key.Encode(), Slice(score_buf, sizeof(uint64_t))); + + ZSetsScoreKey zsets_score_key(destination, version, sm.score, sm.member); + batch.Put(handles_[2], zsets_score_key.Encode(), Slice()); + } + *ret = static_cast(final_score_members.size()); + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(destination.ToString(), statistic); + value_to_dest = std::move(final_score_members); + return s; +} + +Status RedisZSets::ZRangebylex(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + std::vector* members) { + members->clear(); + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + + bool left_no_limit = min.compare("-") == 0; + bool right_not_limit = max.compare("+") == 0; + + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t version = parsed_zsets_meta_value.version(); + int32_t cur_index = 0; + int32_t stop_index = parsed_zsets_meta_value.count() - 1; + ZSetsMemberKey zsets_member_key(key, version, Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(zsets_member_key.Encode()); iter->Valid() && cur_index <= stop_index; iter->Next(), ++cur_index) { + bool left_pass = false; + bool right_pass = false; + ParsedZSetsMemberKey parsed_zsets_member_key(iter->key()); + Slice member = parsed_zsets_member_key.member(); + if (left_no_limit || (left_close && min.compare(member) <= 0) || (!left_close && min.compare(member) < 0)) { + left_pass = true; + } + if (right_not_limit || (right_close && max.compare(member) >= 0) || (!right_close && max.compare(member) > 0)) { + right_pass = true; + } + if (left_pass && right_pass) { + members->push_back(member.ToString()); + } + if (!right_pass) { + break; + } + } + delete iter; + } + } + return s; +} + +Status RedisZSets::ZLexcount(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + int32_t* ret) { + std::vector members; + Status s = ZRangebylex(key, min, max, left_close, right_close, &members); + *ret = static_cast(members.size()); + return s; +} + +Status RedisZSets::ZRemrangebylex(const Slice& key, const Slice& min, const Slice& max, bool left_close, + bool right_close, int32_t* ret) { + *ret = 0; + uint32_t statistic = 0; + rocksdb::WriteBatch batch; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot = nullptr; + + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + ScopeRecordLock l(lock_mgr_, key); + + bool left_no_limit = min.compare("-") == 0; + bool right_not_limit = max.compare("+") == 0; + + int32_t del_cnt = 0; + std::string meta_value; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t version = parsed_zsets_meta_value.version(); + int32_t cur_index = 0; + int32_t stop_index = parsed_zsets_meta_value.count() - 1; + ZSetsMemberKey zsets_member_key(key, version, Slice()); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(zsets_member_key.Encode()); iter->Valid() && cur_index <= stop_index; iter->Next(), ++cur_index) { + bool left_pass = false; + bool right_pass = false; + ParsedZSetsMemberKey parsed_zsets_member_key(iter->key()); + Slice member = parsed_zsets_member_key.member(); + if (left_no_limit || (left_close && min.compare(member) <= 0) || (!left_close && min.compare(member) < 0)) { + left_pass = true; + } + if (right_not_limit || (right_close && max.compare(member) >= 0) || (!right_close && max.compare(member) > 0)) { + right_pass = true; + } + if (left_pass && right_pass) { + batch.Delete(handles_[1], iter->key()); + + uint64_t tmp = DecodeFixed64(iter->value().data()); + const void* ptr_tmp = reinterpret_cast(&tmp); + double score = *reinterpret_cast(ptr_tmp); + ZSetsScoreKey zsets_score_key(key, version, score, member); + batch.Delete(handles_[2], zsets_score_key.Encode()); + del_cnt++; + statistic++; + } + if (!right_pass) { + break; + } + } + delete iter; + } + if (del_cnt > 0) { + if (!parsed_zsets_meta_value.CheckModifyCount(-del_cnt)){ + return Status::InvalidArgument("zset size overflow"); + } + parsed_zsets_meta_value.ModifyCount(-del_cnt); + batch.Put(handles_[0], key, meta_value); + *ret = del_cnt; + } + } else { + return s; + } + s = db_->Write(default_write_options_, &batch); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + return s; +} + +Status RedisZSets::Expire(const Slice& key, int32_t ttl) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } + + if (ttl > 0) { + parsed_zsets_meta_value.SetRelativeTimestamp(ttl); + } else { + parsed_zsets_meta_value.InitialMetaValue(); + } + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + } + return s; +} + +Status RedisZSets::Del(const Slice& key) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + uint32_t statistic = parsed_zsets_meta_value.count(); + parsed_zsets_meta_value.InitialMetaValue(); + s = db_->Put(default_write_options_, handles_[0], key, meta_value); + UpdateSpecificKeyStatistics(key.ToString(), statistic); + } + } + return s; +} + +bool RedisZSets::Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, + int64_t* count, std::string* next_key) { + std::string meta_key; + bool is_finish = true; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + + it->Seek(start_key); + while (it->Valid() && (*count) > 0) { + ParsedZSetsMetaValue parsed_zsets_meta_value(it->value()); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + it->Next(); + continue; + } else { + meta_key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), meta_key.data(), meta_key.size(), 0) != 0) { + keys->push_back(meta_key); + } + (*count)--; + it->Next(); + } + } + + std::string prefix = isTailWildcard(pattern) ? pattern.substr(0, pattern.size() - 1) : ""; + if (it->Valid() && (it->key().compare(prefix) <= 0 || it->key().starts_with(prefix))) { + *next_key = it->key().ToString(); + is_finish = false; + } else { + *next_key = ""; + } + delete it; + return is_finish; +} + +bool RedisZSets::PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) { + bool is_finish = true; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + it->Seek(start_key); + while (it->Valid() && (*leftover_visits) > 0) { + ParsedZSetsMetaValue parsed_zsets_meta_value(it->value()); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + it->Next(); + continue; + } else { + if (min_timestamp < parsed_zsets_meta_value.timestamp() && parsed_zsets_meta_value.timestamp() < max_timestamp) { + keys->push_back(it->key().ToString()); + } + (*leftover_visits)--; + it->Next(); + } + } + + if (it->Valid()) { + is_finish = false; + *next_key = it->key().ToString(); + } else { + *next_key = ""; + } + delete it; + return is_finish; +} + +Status RedisZSets::Expireat(const Slice& key, int32_t timestamp) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + if (timestamp > 0) { + parsed_zsets_meta_value.set_timestamp(timestamp); + } else { + parsed_zsets_meta_value.InitialMetaValue(); + } + return db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + return s; +} + +Status RedisZSets::ZScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* score_members, int64_t* next_cursor) { + *next_cursor = 0; + score_members->clear(); + if (cursor < 0) { + *next_cursor = 0; + return Status::OK(); + } + + int64_t rest = count; + int64_t step_length = count; + rocksdb::ReadOptions read_options; + const rocksdb::Snapshot* snapshot; + + std::string meta_value; + ScopeSnapshot ss(db_, &snapshot); + read_options.snapshot = snapshot; + Status s = db_->Get(read_options, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + *next_cursor = 0; + return Status::NotFound(); + } else { + std::string sub_member; + std::string start_point; + int32_t version = parsed_zsets_meta_value.version(); + s = GetScanStartPoint(key, pattern, cursor, &start_point); + if (s.IsNotFound()) { + cursor = 0; + if (isTailWildcard(pattern)) { + start_point = pattern.substr(0, pattern.size() - 1); + } + } + if (isTailWildcard(pattern)) { + sub_member = pattern.substr(0, pattern.size() - 1); + } + + ZSetsMemberKey zsets_member_prefix(key, version, sub_member); + ZSetsMemberKey zsets_member_key(key, version, start_point); + std::string prefix = zsets_member_prefix.Encode().ToString(); + rocksdb::Iterator* iter = db_->NewIterator(read_options, handles_[1]); + for (iter->Seek(zsets_member_key.Encode()); iter->Valid() && rest > 0 && iter->key().starts_with(prefix); + iter->Next()) { + ParsedZSetsMemberKey parsed_zsets_member_key(iter->key()); + std::string member = parsed_zsets_member_key.member().ToString(); + if (StringMatch(pattern.data(), pattern.size(), member.data(), member.size(), 0) != 0) { + uint64_t tmp = DecodeFixed64(iter->value().data()); + const void* ptr_tmp = reinterpret_cast(&tmp); + double score = *reinterpret_cast(ptr_tmp); + score_members->push_back({score, member}); + } + rest--; + } + + if (iter->Valid() && (iter->key().compare(prefix) <= 0 || iter->key().starts_with(prefix))) { + *next_cursor = cursor + step_length; + ParsedZSetsMemberKey parsed_zsets_member_key(iter->key()); + std::string next_member = parsed_zsets_member_key.member().ToString(); + StoreScanNextPoint(key, pattern, *next_cursor, next_member); + } else { + *next_cursor = 0; + } + delete iter; + } + } else { + *next_cursor = 0; + return s; + } + return Status::OK(); +} + +Status RedisZSets::PKScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key) { + next_key->clear(); + + std::string key; + int32_t remain = limit; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + bool start_no_limit = key_start.compare("") == 0; + bool end_no_limit = key_end.compare("") == 0; + + if (!start_no_limit && !end_no_limit && (key_start.compare(key_end) > 0)) { + return Status::InvalidArgument("error in given range"); + } + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + if (start_no_limit) { + it->SeekToFirst(); + } else { + it->Seek(key_start); + } + + while (it->Valid() && remain > 0 && (end_no_limit || it->key().compare(key_end) <= 0)) { + ParsedZSetsMetaValue parsed_zsets_meta_value(it->value()); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + it->Next(); + } else { + key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + remain--; + it->Next(); + } + } + + while (it->Valid() && (end_no_limit || it->key().compare(key_end) <= 0)) { + ParsedZSetsMetaValue parsed_zsets_meta_value(it->value()); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + it->Next(); + } else { + *next_key = it->key().ToString(); + break; + } + } + delete it; + return Status::OK(); +} + +Status RedisZSets::PKRScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key) { + next_key->clear(); + + std::string key; + int32_t remain = limit; + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + + bool start_no_limit = key_start.compare("") == 0; + bool end_no_limit = key_end.compare("") == 0; + + if (!start_no_limit && !end_no_limit && (key_start.compare(key_end) < 0)) { + return Status::InvalidArgument("error in given range"); + } + + rocksdb::Iterator* it = db_->NewIterator(iterator_options, handles_[0]); + if (start_no_limit) { + it->SeekToLast(); + } else { + it->SeekForPrev(key_start); + } + + while (it->Valid() && remain > 0 && (end_no_limit || it->key().compare(key_end) >= 0)) { + ParsedZSetsMetaValue parsed_zsets_meta_value(it->value()); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + it->Prev(); + } else { + key = it->key().ToString(); + if (StringMatch(pattern.data(), pattern.size(), key.data(), key.size(), 0) != 0) { + keys->push_back(key); + } + remain--; + it->Prev(); + } + } + + while (it->Valid() && (end_no_limit || it->key().compare(key_end) >= 0)) { + ParsedZSetsMetaValue parsed_zsets_meta_value(it->value()); + if (parsed_zsets_meta_value.IsStale() || parsed_zsets_meta_value.count() == 0) { + it->Prev(); + } else { + *next_key = it->key().ToString(); + break; + } + } + delete it; + return Status::OK(); +} + +Status RedisZSets::Persist(const Slice& key) { + std::string meta_value; + ScopeRecordLock l(lock_mgr_, key); + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + return Status::NotFound(); + } else { + int32_t timestamp = parsed_zsets_meta_value.timestamp(); + if (timestamp == 0) { + return Status::NotFound("Not have an associated timeout"); + } else { + parsed_zsets_meta_value.set_timestamp(0); + return db_->Put(default_write_options_, handles_[0], key, meta_value); + } + } + } + return s; +} + +Status RedisZSets::TTL(const Slice& key, int64_t* timestamp) { + std::string meta_value; + Status s = db_->Get(default_read_options_, handles_[0], key, &meta_value); + if (s.ok()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + if (parsed_zsets_meta_value.IsStale()) { + *timestamp = -2; + return Status::NotFound("Stale"); + } else if (parsed_zsets_meta_value.count() == 0) { + *timestamp = -2; + return Status::NotFound(); + } else { + *timestamp = parsed_zsets_meta_value.timestamp(); + if (*timestamp == 0) { + *timestamp = -1; + } else { + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + *timestamp = *timestamp - curtime >= 0 ? *timestamp - curtime : -2; + } + } + } else if (s.IsNotFound()) { + *timestamp = -2; + } + return s; +} + +void RedisZSets::ScanDatabase() { + rocksdb::ReadOptions iterator_options; + const rocksdb::Snapshot* snapshot; + ScopeSnapshot ss(db_, &snapshot); + iterator_options.snapshot = snapshot; + iterator_options.fill_cache = false; + auto current_time = static_cast(time(nullptr)); + + LOG(INFO) << "***************ZSets Meta Data***************"; + auto meta_iter = db_->NewIterator(iterator_options, handles_[0]); + for (meta_iter->SeekToFirst(); meta_iter->Valid(); meta_iter->Next()) { + ParsedZSetsMetaValue parsed_zsets_meta_value(meta_iter->value()); + int32_t survival_time = 0; + if (parsed_zsets_meta_value.timestamp() != 0) { + survival_time = parsed_zsets_meta_value.timestamp() - current_time > 0 + ? parsed_zsets_meta_value.timestamp() - current_time + : -1; + } + + LOG(INFO) << fmt::format("[key : {:<30}] [count : {:<10}] [timestamp : {:<10}] [version : {}] [survival_time : {}]", + meta_iter->key().ToString(), parsed_zsets_meta_value.count(), parsed_zsets_meta_value.timestamp(), + parsed_zsets_meta_value.version(), survival_time); + } + delete meta_iter; + + LOG(INFO) << "***************ZSets Member To Score Data***************"; + auto member_iter = db_->NewIterator(iterator_options, handles_[1]); + for (member_iter->SeekToFirst(); member_iter->Valid(); member_iter->Next()) { + ParsedZSetsMemberKey parsed_zsets_member_key(member_iter->key()); + + uint64_t tmp = DecodeFixed64(member_iter->value().data()); + const void* ptr_tmp = reinterpret_cast(&tmp); + double score = *reinterpret_cast(ptr_tmp); + + LOG(INFO) << fmt::format("[key : {:<30}] [member : {:<20}] [score : {:<20}] [version : {}]", + parsed_zsets_member_key.key().ToString(), parsed_zsets_member_key.member().ToString(), + score, parsed_zsets_member_key.version()); + } + delete member_iter; + + LOG(INFO) << "***************ZSets Score To Member Data***************"; + auto score_iter = db_->NewIterator(iterator_options, handles_[2]); + for (score_iter->SeekToFirst(); score_iter->Valid(); score_iter->Next()) { + ParsedZSetsScoreKey parsed_zsets_score_key(score_iter->key()); + + LOG(INFO) << fmt::format("[key : {:<30}] [score : {:<20}] [member : {:<20}] [version : {}]", + parsed_zsets_score_key.key().ToString(), parsed_zsets_score_key.score(), + parsed_zsets_score_key.member().ToString(), parsed_zsets_score_key.version()); + } + delete score_iter; +} + +} // namespace storage diff --git a/src/storage/src/redis_zsets.h b/src/storage/src/redis_zsets.h new file mode 100644 index 000000000..c3e70b96e --- /dev/null +++ b/src/storage/src/redis_zsets.h @@ -0,0 +1,82 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "src/custom_comparator.h" +#include "src/redis.h" + +namespace storage { + +class RedisZSets : public Redis { + public: + RedisZSets(Storage* s, const DataType& type); + ~RedisZSets() override = default; + + // Common Commands + Status Open(const StorageOptions& storage_options, const std::string& db_path) override; + Status CompactRange(const rocksdb::Slice* begin, const rocksdb::Slice* end, + const ColumnFamilyType& type = kMetaAndData) override; + Status GetProperty(const std::string& property, uint64_t* out) override; + Status ScanKeyNum(KeyInfo* key_info) override; + Status ScanKeys(const std::string& pattern, std::vector* keys) override; + Status PKPatternMatchDel(const std::string& pattern, int32_t* ret) override; + + // ZSets Commands + Status ZAdd(const Slice& key, const std::vector& score_members, int32_t* ret); + Status ZCard(const Slice& key, int32_t* card); + Status ZCount(const Slice& key, double min, double max, bool left_close, bool right_close, int32_t* ret); + Status ZIncrby(const Slice& key, const Slice& member, double increment, double* ret); + Status ZRange(const Slice& key, int32_t start, int32_t stop, std::vector* score_members); + Status ZRangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, int64_t count, + int64_t offset, std::vector* score_members); + Status ZRank(const Slice& key, const Slice& member, int32_t* rank); + Status ZRem(const Slice& key, const std::vector& members, int32_t* ret); + Status ZRemrangebyrank(const Slice& key, int32_t start, int32_t stop, int32_t* ret); + Status ZRemrangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, int32_t* ret); + Status ZRevrange(const Slice& key, int32_t start, int32_t stop, std::vector* score_members); + Status ZRevrangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, int64_t count, + int64_t offset, std::vector* score_members); + Status ZRevrank(const Slice& key, const Slice& member, int32_t* rank); + Status ZScore(const Slice& key, const Slice& member, double* score); + Status ZUnionstore(const Slice& destination, const std::vector& keys, const std::vector& weights, + AGGREGATE agg, std::map& value_to_dest, int32_t* ret); + Status ZInterstore(const Slice& destination, const std::vector& keys, const std::vector& weights, + AGGREGATE agg, std::vector& value_to_dest, int32_t* ret); + Status ZRangebylex(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + std::vector* members); + Status ZLexcount(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + int32_t* ret); + Status ZRemrangebylex(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + int32_t* ret); + Status ZScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* score_members, int64_t* next_cursor); + Status PKScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key); + Status PKRScanRange(const Slice& key_start, const Slice& key_end, const Slice& pattern, int32_t limit, + std::vector* keys, std::string* next_key); + Status ZPopMax(const Slice& key, int64_t count, std::vector* score_members); + Status ZPopMin(const Slice& key, int64_t count, std::vector* score_members); + + // Keys Commands + Status Expire(const Slice& key, int32_t ttl) override; + Status Del(const Slice& key) override; + bool Scan(const std::string& start_key, const std::string& pattern, std::vector* keys, int64_t* count, + std::string* next_key) override; + bool PKExpireScan(const std::string& start_key, int32_t min_timestamp, int32_t max_timestamp, + std::vector* keys, int64_t* leftover_visits, std::string* next_key) override; + Status Expireat(const Slice& key, int32_t timestamp) override; + Status Persist(const Slice& key) override; + Status TTL(const Slice& key, int64_t* timestamp) override; + + // Iterate all data + void ScanDatabase(); +}; + +} // namespace storage diff --git a/src/storage/src/scope_record_lock.h b/src/storage/src/scope_record_lock.h new file mode 100644 index 000000000..571e3440b --- /dev/null +++ b/src/storage/src/scope_record_lock.h @@ -0,0 +1,22 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include +#include + +#include "pstd/scope_record_lock.h" +#include "src/lock_mgr.h" +#include "storage/storage.h" + +namespace storage { + +using ScopeRecordLock = pstd::lock::ScopeRecordLock; +using MultiScopeRecordLock = pstd::lock::MultiScopeRecordLock; + +} // namespace storage diff --git a/src/storage/src/scope_snapshot.h b/src/storage/src/scope_snapshot.h new file mode 100644 index 000000000..852dfa100 --- /dev/null +++ b/src/storage/src/scope_snapshot.h @@ -0,0 +1,25 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include "rocksdb/db.h" + +#include "pstd/noncopyable.h" + +namespace storage { +class ScopeSnapshot : public pstd::noncopyable { + public: + ScopeSnapshot(rocksdb::DB* db, const rocksdb::Snapshot** snapshot) : db_(db), snapshot_(snapshot) { + *snapshot_ = db_->GetSnapshot(); + } + ~ScopeSnapshot() { db_->ReleaseSnapshot(*snapshot_); } + + private: + rocksdb::DB* const db_; + const rocksdb::Snapshot** snapshot_; +}; + +} // namespace storage diff --git a/src/storage/src/storage.cc b/src/storage/src/storage.cc new file mode 100644 index 000000000..29e7e1336 --- /dev/null +++ b/src/storage/src/storage.cc @@ -0,0 +1,1772 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include "storage/storage.h" +#include "storage/util.h" + +#include + +#include + +#include "scope_snapshot.h" +#include "src/lru_cache.h" +#include "src/mutex_impl.h" +#include "src/options_helper.h" +#include "src/redis_hashes.h" +#include "src/redis_hyperloglog.h" +#include "src/redis_lists.h" +#include "src/redis_sets.h" +#include "src/redis_strings.h" +#include "src/redis_zsets.h" + +namespace storage { + +Status StorageOptions::ResetOptions(const OptionType& option_type, + const std::unordered_map& options_map) { + std::unordered_map& options_member_type_info = mutable_cf_options_member_type_info; + char* opt = reinterpret_cast(static_cast(&options)); + if (option_type == OptionType::kDB) { + options_member_type_info = mutable_db_options_member_type_info; + opt = reinterpret_cast(static_cast(&options)); + } + for (const auto& option_member : options_map) { + try { + auto iter = options_member_type_info.find(option_member.first); + if (iter == options_member_type_info.end()) { + return Status::InvalidArgument("Unsupport option member: " + option_member.first); + } + const auto& member_info = iter->second; + if (!ParseOptionMember(member_info.type, option_member.second, opt + member_info.offset)) { + return Status::InvalidArgument("Error parsing option member " + option_member.first); + } + } catch (std::exception& e) { + return Status::InvalidArgument("Error parsing option member " + option_member.first + ":" + + std::string(e.what())); + } + } + return Status::OK(); +} + +Storage::Storage() { + cursors_store_ = std::make_unique>(); + cursors_store_->SetCapacity(5000); + + Status s = StartBGThread(); + if (!s.ok()) { + LOG(FATAL) << "start bg thread failed, " << s.ToString(); + } +} + +Storage::~Storage() { + bg_tasks_should_exit_ = true; + bg_tasks_cond_var_.notify_one(); + + if (is_opened_) { + rocksdb::CancelAllBackgroundWork(strings_db_->GetDB(), true); + rocksdb::CancelAllBackgroundWork(hashes_db_->GetDB(), true); + rocksdb::CancelAllBackgroundWork(sets_db_->GetDB(), true); + rocksdb::CancelAllBackgroundWork(lists_db_->GetDB(), true); + rocksdb::CancelAllBackgroundWork(zsets_db_->GetDB(), true); + } + + int ret = 0; + if ((ret = pthread_join(bg_tasks_thread_id_, nullptr)) != 0) { + LOG(ERROR) << "pthread_join failed with bgtask thread error " << ret; + } +} + +static std::string AppendSubDirectory(const std::string& db_path, const std::string& sub_db) { + if (db_path.back() == '/') { + return db_path + sub_db; + } else { + return db_path + "/" + sub_db; + } +} + +Status Storage::Open(const StorageOptions& storage_options, const std::string& db_path) { + mkpath(db_path.c_str(), 0755); + + strings_db_ = std::make_unique(this, kStrings); + Status s = strings_db_->Open(storage_options, AppendSubDirectory(db_path, "strings")); + if (!s.ok()) { + LOG(FATAL) << "open kv db failed, " << s.ToString(); + } + + hashes_db_ = std::make_unique(this, kHashes); + s = hashes_db_->Open(storage_options, AppendSubDirectory(db_path, "hashes")); + if (!s.ok()) { + LOG(FATAL) << "open hashes db failed, " << s.ToString(); + } + + sets_db_ = std::make_unique(this, kSets); + s = sets_db_->Open(storage_options, AppendSubDirectory(db_path, "sets")); + if (!s.ok()) { + LOG(FATAL) << "open set db failed, " << s.ToString(); + } + + lists_db_ = std::make_unique(this, kLists); + s = lists_db_->Open(storage_options, AppendSubDirectory(db_path, "lists")); + if (!s.ok()) { + LOG(FATAL) << "open list db failed, " << s.ToString(); + } + + zsets_db_ = std::make_unique(this, kZSets); + s = zsets_db_->Open(storage_options, AppendSubDirectory(db_path, "zsets")); + if (!s.ok()) { + LOG(FATAL) << "open zset db failed, " << s.ToString(); + } + is_opened_.store(true); + return Status::OK(); +} + +Status Storage::GetStartKey(const DataType& dtype, int64_t cursor, std::string* start_key) { + std::string index_key = DataTypeTag[dtype] + std::to_string(cursor); + return cursors_store_->Lookup(index_key, start_key); +} + +Status Storage::StoreCursorStartKey(const DataType& dtype, int64_t cursor, const std::string& next_key) { + std::string index_key = DataTypeTag[dtype] + std::to_string(cursor); + return cursors_store_->Insert(index_key, next_key); +} + +// Strings Commands +Status Storage::Set(const Slice& key, const Slice& value) { return strings_db_->Set(key, value); } + +Status Storage::Setxx(const Slice& key, const Slice& value, int32_t* ret, const int32_t ttl) { + return strings_db_->Setxx(key, value, ret, ttl); +} + +Status Storage::Get(const Slice& key, std::string* value) { return strings_db_->Get(key, value); } + +Status Storage::GetSet(const Slice& key, const Slice& value, std::string* old_value) { + return strings_db_->GetSet(key, value, old_value); +} + +Status Storage::SetBit(const Slice& key, int64_t offset, int32_t value, int32_t* ret) { + return strings_db_->SetBit(key, offset, value, ret); +} + +Status Storage::GetBit(const Slice& key, int64_t offset, int32_t* ret) { return strings_db_->GetBit(key, offset, ret); } + +Status Storage::MSet(const std::vector& kvs) { return strings_db_->MSet(kvs); } + +Status Storage::MGet(const std::vector& keys, std::vector* vss) { + return strings_db_->MGet(keys, vss); +} + +Status Storage::Setnx(const Slice& key, const Slice& value, int32_t* ret, const int32_t ttl) { + return strings_db_->Setnx(key, value, ret, ttl); +} + +Status Storage::MSetnx(const std::vector& kvs, int32_t* ret) { return strings_db_->MSetnx(kvs, ret); } + +Status Storage::Setvx(const Slice& key, const Slice& value, const Slice& new_value, int32_t* ret, const int32_t ttl) { + return strings_db_->Setvx(key, value, new_value, ret, ttl); +} + +Status Storage::Delvx(const Slice& key, const Slice& value, int32_t* ret) { + return strings_db_->Delvx(key, value, ret); +} + +Status Storage::Setrange(const Slice& key, int64_t start_offset, const Slice& value, int32_t* ret) { + return strings_db_->Setrange(key, start_offset, value, ret); +} + +Status Storage::Getrange(const Slice& key, int64_t start_offset, int64_t end_offset, std::string* ret) { + return strings_db_->Getrange(key, start_offset, end_offset, ret); +} + +Status Storage::Append(const Slice& key, const Slice& value, int32_t* ret) { + return strings_db_->Append(key, value, ret); +} + +Status Storage::BitCount(const Slice& key, int64_t start_offset, int64_t end_offset, int32_t* ret, bool have_range) { + return strings_db_->BitCount(key, start_offset, end_offset, ret, have_range); +} + +Status Storage::BitOp(BitOpType op, const std::string& dest_key, const std::vector& src_keys, + std::string &value_to_dest, int64_t* ret) { + return strings_db_->BitOp(op, dest_key, src_keys, value_to_dest, ret); +} + +Status Storage::BitPos(const Slice& key, int32_t bit, int64_t* ret) { return strings_db_->BitPos(key, bit, ret); } + +Status Storage::BitPos(const Slice& key, int32_t bit, int64_t start_offset, int64_t* ret) { + return strings_db_->BitPos(key, bit, start_offset, ret); +} + +Status Storage::BitPos(const Slice& key, int32_t bit, int64_t start_offset, int64_t end_offset, int64_t* ret) { + return strings_db_->BitPos(key, bit, start_offset, end_offset, ret); +} + +Status Storage::Decrby(const Slice& key, int64_t value, int64_t* ret) { return strings_db_->Decrby(key, value, ret); } + +Status Storage::Incrby(const Slice& key, int64_t value, int64_t* ret) { return strings_db_->Incrby(key, value, ret); } + +Status Storage::Incrbyfloat(const Slice& key, const Slice& value, std::string* ret) { + return strings_db_->Incrbyfloat(key, value, ret); +} + +Status Storage::Setex(const Slice& key, const Slice& value, int32_t ttl) { return strings_db_->Setex(key, value, ttl); } + +Status Storage::Strlen(const Slice& key, int32_t* len) { return strings_db_->Strlen(key, len); } + +Status Storage::PKSetexAt(const Slice& key, const Slice& value, int32_t timestamp) { + return strings_db_->PKSetexAt(key, value, timestamp); +} + +// Hashes Commands +Status Storage::HSet(const Slice& key, const Slice& field, const Slice& value, int32_t* res) { + return hashes_db_->HSet(key, field, value, res); +} + +Status Storage::HGet(const Slice& key, const Slice& field, std::string* value) { + return hashes_db_->HGet(key, field, value); +} + +Status Storage::HMSet(const Slice& key, const std::vector& fvs) { return hashes_db_->HMSet(key, fvs); } + +Status Storage::HMGet(const Slice& key, const std::vector& fields, std::vector* vss) { + return hashes_db_->HMGet(key, fields, vss); +} + +Status Storage::HGetall(const Slice& key, std::vector* fvs) { return hashes_db_->HGetall(key, fvs); } + +Status Storage::HKeys(const Slice& key, std::vector* fields) { return hashes_db_->HKeys(key, fields); } + +Status Storage::HVals(const Slice& key, std::vector* values) { return hashes_db_->HVals(key, values); } + +Status Storage::HSetnx(const Slice& key, const Slice& field, const Slice& value, int32_t* ret) { + return hashes_db_->HSetnx(key, field, value, ret); +} + +Status Storage::HLen(const Slice& key, int32_t* ret) { return hashes_db_->HLen(key, ret); } + +Status Storage::HStrlen(const Slice& key, const Slice& field, int32_t* len) { + return hashes_db_->HStrlen(key, field, len); +} + +Status Storage::HExists(const Slice& key, const Slice& field) { return hashes_db_->HExists(key, field); } + +Status Storage::HIncrby(const Slice& key, const Slice& field, int64_t value, int64_t* ret) { + return hashes_db_->HIncrby(key, field, value, ret); +} + +Status Storage::HIncrbyfloat(const Slice& key, const Slice& field, const Slice& by, std::string* new_value) { + return hashes_db_->HIncrbyfloat(key, field, by, new_value); +} + +Status Storage::HDel(const Slice& key, const std::vector& fields, int32_t* ret) { + return hashes_db_->HDel(key, fields, ret); +} + +Status Storage::HScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* field_values, int64_t* next_cursor) { + return hashes_db_->HScan(key, cursor, pattern, count, field_values, next_cursor); +} + +Status Storage::HScanx(const Slice& key, const std::string& start_field, const std::string& pattern, int64_t count, + std::vector* field_values, std::string* next_field) { + return hashes_db_->HScanx(key, start_field, pattern, count, field_values, next_field); +} + +Status Storage::PKHScanRange(const Slice& key, const Slice& field_start, const std::string& field_end, + const Slice& pattern, int32_t limit, std::vector* field_values, + std::string* next_field) { + return hashes_db_->PKHScanRange(key, field_start, field_end, pattern, limit, field_values, next_field); +} + +Status Storage::PKHRScanRange(const Slice& key, const Slice& field_start, const std::string& field_end, + const Slice& pattern, int32_t limit, std::vector* field_values, + std::string* next_field) { + return hashes_db_->PKHRScanRange(key, field_start, field_end, pattern, limit, field_values, next_field); +} + +// Sets Commands +Status Storage::SAdd(const Slice& key, const std::vector& members, int32_t* ret) { + return sets_db_->SAdd(key, members, ret); +} + +Status Storage::SCard(const Slice& key, int32_t* ret) { return sets_db_->SCard(key, ret); } + +Status Storage::SDiff(const std::vector& keys, std::vector* members) { + return sets_db_->SDiff(keys, members); +} + +Status Storage::SDiffstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret) { + return sets_db_->SDiffstore(destination, keys, value_to_dest, ret); +} + +Status Storage::SInter(const std::vector& keys, std::vector* members) { + return sets_db_->SInter(keys, members); +} + +Status Storage::SInterstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret) { + return sets_db_->SInterstore(destination, keys, value_to_dest, ret); +} + +Status Storage::SIsmember(const Slice& key, const Slice& member, int32_t* ret) { + return sets_db_->SIsmember(key, member, ret); +} + +Status Storage::SMembers(const Slice& key, std::vector* members) { + return sets_db_->SMembers(key, members); +} + +Status Storage::SMove(const Slice& source, const Slice& destination, const Slice& member, int32_t* ret) { + return sets_db_->SMove(source, destination, member, ret); +} + +Status Storage::SPop(const Slice& key, std::vector* members, int64_t count) { + bool need_compact = false; + Status status = sets_db_->SPop(key, members, &need_compact, count); + if (need_compact) { + AddBGTask({kSets, kCompactKey, key.ToString()}); + } + return status; +} + +Status Storage::SRandmember(const Slice& key, int32_t count, std::vector* members) { + return sets_db_->SRandmember(key, count, members); +} + +Status Storage::SRem(const Slice& key, const std::vector& members, int32_t* ret) { + return sets_db_->SRem(key, members, ret); +} + +Status Storage::SUnion(const std::vector& keys, std::vector* members) { + return sets_db_->SUnion(keys, members); +} + +Status Storage::SUnionstore(const Slice& destination, const std::vector& keys, std::vector& value_to_dest, int32_t* ret) { + return sets_db_->SUnionstore(destination, keys, value_to_dest, ret); +} + +Status Storage::SScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* members, int64_t* next_cursor) { + return sets_db_->SScan(key, cursor, pattern, count, members, next_cursor); +} + +Status Storage::LPush(const Slice& key, const std::vector& values, uint64_t* ret) { + return lists_db_->LPush(key, values, ret); +} + +Status Storage::RPush(const Slice& key, const std::vector& values, uint64_t* ret) { + return lists_db_->RPush(key, values, ret); +} + +Status Storage::LRange(const Slice& key, int64_t start, int64_t stop, std::vector* ret) { + return lists_db_->LRange(key, start, stop, ret); +} + +Status Storage::LTrim(const Slice& key, int64_t start, int64_t stop) { return lists_db_->LTrim(key, start, stop); } + +Status Storage::LLen(const Slice& key, uint64_t* len) { return lists_db_->LLen(key, len); } + +Status Storage::LPop(const Slice& key, int64_t count, std::vector* elements) { return lists_db_->LPop(key, count, elements); } + +Status Storage::RPop(const Slice& key, int64_t count, std::vector* elements) { return lists_db_->RPop(key, count, elements); } + +Status Storage::LIndex(const Slice& key, int64_t index, std::string* element) { + return lists_db_->LIndex(key, index, element); +} + +Status Storage::LInsert(const Slice& key, const BeforeOrAfter& before_or_after, const std::string& pivot, + const std::string& value, int64_t* ret) { + return lists_db_->LInsert(key, before_or_after, pivot, value, ret); +} + +Status Storage::LPushx(const Slice& key, const std::vector& values, uint64_t* len) { + return lists_db_->LPushx(key, values, len); +} + +Status Storage::RPushx(const Slice& key, const std::vector& values, uint64_t* len) { + return lists_db_->RPushx(key, values, len); +} + +Status Storage::LRem(const Slice& key, int64_t count, const Slice& value, uint64_t* ret) { + return lists_db_->LRem(key, count, value, ret); +} + +Status Storage::LSet(const Slice& key, int64_t index, const Slice& value) { return lists_db_->LSet(key, index, value); } + +Status Storage::RPoplpush(const Slice& source, const Slice& destination, std::string* element) { + return lists_db_->RPoplpush(source, destination, element); +} + +Status Storage::ZPopMax(const Slice& key, const int64_t count, std::vector* score_members) { + return zsets_db_->ZPopMax(key, count, score_members); +} + +Status Storage::ZPopMin(const Slice& key, const int64_t count, std::vector* score_members) { + return zsets_db_->ZPopMin(key, count, score_members); +} + +Status Storage::ZAdd(const Slice& key, const std::vector& score_members, int32_t* ret) { + return zsets_db_->ZAdd(key, score_members, ret); +} + +Status Storage::ZCard(const Slice& key, int32_t* ret) { return zsets_db_->ZCard(key, ret); } + +Status Storage::ZCount(const Slice& key, double min, double max, bool left_close, bool right_close, int32_t* ret) { + return zsets_db_->ZCount(key, min, max, left_close, right_close, ret); +} + +Status Storage::ZIncrby(const Slice& key, const Slice& member, double increment, double* ret) { + return zsets_db_->ZIncrby(key, member, increment, ret); +} + +Status Storage::ZRange(const Slice& key, int32_t start, int32_t stop, std::vector* score_members) { + return zsets_db_->ZRange(key, start, stop, score_members); +} + +Status Storage::ZRangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, + std::vector* score_members) { + // maximum number of zset is std::numeric_limits::max() + return zsets_db_->ZRangebyscore(key, min, max, left_close, right_close, std::numeric_limits::max(), 0, + score_members); +} + +Status Storage::ZRangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, + int64_t count, int64_t offset, std::vector* score_members) { + return zsets_db_->ZRangebyscore(key, min, max, left_close, right_close, count, offset, score_members); +} + +Status Storage::ZRank(const Slice& key, const Slice& member, int32_t* rank) { + return zsets_db_->ZRank(key, member, rank); +} + +Status Storage::ZRem(const Slice& key, const std::vector& members, int32_t* ret) { + return zsets_db_->ZRem(key, members, ret); +} + +Status Storage::ZRemrangebyrank(const Slice& key, int32_t start, int32_t stop, int32_t* ret) { + return zsets_db_->ZRemrangebyrank(key, start, stop, ret); +} + +Status Storage::ZRemrangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, + int32_t* ret) { + return zsets_db_->ZRemrangebyscore(key, min, max, left_close, right_close, ret); +} + +Status Storage::ZRevrangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, + int64_t count, int64_t offset, std::vector* score_members) { + return zsets_db_->ZRevrangebyscore(key, min, max, left_close, right_close, count, offset, score_members); +} + +Status Storage::ZRevrange(const Slice& key, int32_t start, int32_t stop, std::vector* score_members) { + return zsets_db_->ZRevrange(key, start, stop, score_members); +} + +Status Storage::ZRevrangebyscore(const Slice& key, double min, double max, bool left_close, bool right_close, + std::vector* score_members) { + // maximum number of zset is std::numeric_limits::max() + return zsets_db_->ZRevrangebyscore(key, min, max, left_close, right_close, std::numeric_limits::max(), 0, + score_members); +} + +Status Storage::ZRevrank(const Slice& key, const Slice& member, int32_t* rank) { + return zsets_db_->ZRevrank(key, member, rank); +} + +Status Storage::ZScore(const Slice& key, const Slice& member, double* ret) { + return zsets_db_->ZScore(key, member, ret); +} + +Status Storage::ZUnionstore(const Slice& destination, const std::vector& keys, + const std::vector& weights, const AGGREGATE agg, std::map& value_to_dest, int32_t* ret) { + return zsets_db_->ZUnionstore(destination, keys, weights, agg, value_to_dest, ret); +} + +Status Storage::ZInterstore(const Slice& destination, const std::vector& keys, + const std::vector& weights, const AGGREGATE agg, std::vector& value_to_dest, int32_t* ret) { + return zsets_db_->ZInterstore(destination, keys, weights, agg, value_to_dest, ret); +} + +Status Storage::ZRangebylex(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + std::vector* members) { + return zsets_db_->ZRangebylex(key, min, max, left_close, right_close, members); +} + +Status Storage::ZLexcount(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + int32_t* ret) { + return zsets_db_->ZLexcount(key, min, max, left_close, right_close, ret); +} + +Status Storage::ZRemrangebylex(const Slice& key, const Slice& min, const Slice& max, bool left_close, bool right_close, + int32_t* ret) { + return zsets_db_->ZRemrangebylex(key, min, max, left_close, right_close, ret); +} + +Status Storage::ZScan(const Slice& key, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* score_members, int64_t* next_cursor) { + return zsets_db_->ZScan(key, cursor, pattern, count, score_members, next_cursor); +} + +// Keys Commands +int32_t Storage::Expire(const Slice& key, int32_t ttl, std::map* type_status) { + int32_t ret = 0; + bool is_corruption = false; + + // Strings + Status s = strings_db_->Expire(key, ttl); + if (s.ok()) { + ret++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kStrings] = s; + } + + // Hash + s = hashes_db_->Expire(key, ttl); + if (s.ok()) { + ret++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kHashes] = s; + } + + // Sets + s = sets_db_->Expire(key, ttl); + if (s.ok()) { + ret++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kSets] = s; + } + + // Lists + s = lists_db_->Expire(key, ttl); + if (s.ok()) { + ret++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kLists] = s; + } + + // Zsets + s = zsets_db_->Expire(key, ttl); + if (s.ok()) { + ret++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kZSets] = s; + } + + if (is_corruption) { + return -1; + } else { + return ret; + } +} + +int64_t Storage::Del(const std::vector& keys, std::map* type_status) { + Status s; + int64_t count = 0; + bool is_corruption = false; + + for (const auto& key : keys) { + // Strings + Status s = strings_db_->Del(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kStrings] = s; + } + + // Hashes + s = hashes_db_->Del(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kHashes] = s; + } + + // Sets + s = sets_db_->Del(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kSets] = s; + } + + // Lists + s = lists_db_->Del(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kLists] = s; + } + + // ZSets + s = zsets_db_->Del(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kZSets] = s; + } + } + + if (is_corruption) { + return -1; + } else { + return count; + } +} + +int64_t Storage::DelByType(const std::vector& keys, const DataType& type) { + Status s; + int64_t count = 0; + bool is_corruption = false; + + for (const auto& key : keys) { + switch (type) { + // Strings + case DataType::kStrings: { + s = strings_db_->Del(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + } + break; + } + // Hashes + case DataType::kHashes: { + s = hashes_db_->Del(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + } + break; + } + // Sets + case DataType::kSets: { + s = sets_db_->Del(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + } + break; + } + // Lists + case DataType::kLists: { + s = lists_db_->Del(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + } + break; + } + // ZSets + case DataType::kZSets: { + s = zsets_db_->Del(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + } + break; + } + case DataType::kAll: { + return -1; + } + } + } + + if (is_corruption) { + return -1; + } else { + return count; + } +} + +int64_t Storage::Exists(const std::vector& keys, std::map* type_status) { + int64_t count = 0; + int32_t ret; + uint64_t llen; + std::string value; + Status s; + bool is_corruption = false; + + for (const auto& key : keys) { + s = strings_db_->Get(key, &value); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kStrings] = s; + } + + s = hashes_db_->HLen(key, &ret); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kHashes] = s; + } + + s = sets_db_->SCard(key, &ret); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kSets] = s; + } + + s = lists_db_->LLen(key, &llen); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kLists] = s; + } + + s = zsets_db_->ZCard(key, &ret); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kZSets] = s; + } + } + + if (is_corruption) { + return -1; + } else { + return count; + } +} + +int64_t Storage::Scan(const DataType& dtype, int64_t cursor, const std::string& pattern, int64_t count, + std::vector* keys) { + keys->clear(); + bool is_finish; + int64_t leftover_visits = count; + int64_t step_length = count; + int64_t cursor_ret = 0; + std::string start_key; + std::string next_key; + std::string prefix; + + prefix = isTailWildcard(pattern) ? pattern.substr(0, pattern.size() - 1) : ""; + + if (cursor < 0) { + return cursor_ret; + } else { + Status s = GetStartKey(dtype, cursor, &start_key); + if (s.IsNotFound()) { + // If want to scan all the databases, we start with the strings database + start_key = (dtype == DataType::kAll ? DataTypeTag[kStrings] : DataTypeTag[dtype]) + prefix; + cursor = 0; + } + } + + char key_type = start_key.at(0); + start_key.erase(start_key.begin()); + switch (key_type) { + case 'k': + is_finish = strings_db_->Scan(start_key, pattern, keys, &leftover_visits, &next_key); + if ((leftover_visits == 0) && !is_finish) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("k") + next_key); + break; + } else if (is_finish) { + if (DataType::kStrings == dtype) { + cursor_ret = 0; + break; + } else if (leftover_visits == 0) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("h") + prefix); + break; + } + } + start_key = prefix; + case 'h': + is_finish = hashes_db_->Scan(start_key, pattern, keys, &leftover_visits, &next_key); + if ((leftover_visits == 0) && !is_finish) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("h") + next_key); + break; + } else if (is_finish) { + if (DataType::kHashes == dtype) { + cursor_ret = 0; + break; + } else if (leftover_visits == 0) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("s") + prefix); + break; + } + } + start_key = prefix; + case 's': + is_finish = sets_db_->Scan(start_key, pattern, keys, &leftover_visits, &next_key); + if ((leftover_visits == 0) && !is_finish) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("s") + next_key); + break; + } else if (is_finish) { + if (DataType::kSets == dtype) { + cursor_ret = 0; + break; + } else if (leftover_visits == 0) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("l") + prefix); + break; + } + } + start_key = prefix; + case 'l': + is_finish = lists_db_->Scan(start_key, pattern, keys, &leftover_visits, &next_key); + if ((leftover_visits == 0) && !is_finish) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("l") + next_key); + break; + } else if (is_finish) { + if (DataType::kLists == dtype) { + cursor_ret = 0; + break; + } else if (leftover_visits == 0) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("z") + prefix); + break; + } + } + start_key = prefix; + case 'z': + is_finish = zsets_db_->Scan(start_key, pattern, keys, &leftover_visits, &next_key); + if ((leftover_visits == 0) && !is_finish) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("z") + next_key); + break; + } else if (is_finish) { + cursor_ret = 0; + break; + } + } + return cursor_ret; +} + +int64_t Storage::PKExpireScan(const DataType& dtype, int64_t cursor, int32_t min_ttl, int32_t max_ttl, int64_t count, + std::vector* keys) { + keys->clear(); + bool is_finish; + int64_t leftover_visits = count; + int64_t step_length = count; + int64_t cursor_ret = 0; + std::string start_key; + std::string next_key; + + int64_t curtime; + rocksdb::Env::Default()->GetCurrentTime(&curtime); + + if (cursor < 0) { + return cursor_ret; + } else { + Status s = GetStartKey(dtype, cursor, &start_key); + if (s.IsNotFound()) { + // If want to scan all the databases, we start with the strings database + start_key = std::string(1, dtype == DataType::kAll ? DataTypeTag[kStrings] : DataTypeTag[dtype]); + cursor = 0; + } + } + + char key_type = start_key.at(0); + start_key.erase(start_key.begin()); + switch (key_type) { + case 'k': + is_finish = strings_db_->PKExpireScan(start_key, static_cast(curtime + min_ttl), + static_cast(curtime + max_ttl), keys, &leftover_visits, &next_key); + if ((leftover_visits == 0) && !is_finish) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("k") + next_key); + break; + } else if (is_finish) { + if (DataType::kStrings == dtype) { + cursor_ret = 0; + break; + } else if (leftover_visits == 0) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("h")); + break; + } + } + start_key = ""; + case 'h': + is_finish = hashes_db_->PKExpireScan(start_key, static_cast(curtime + min_ttl), + static_cast(curtime + max_ttl), keys, &leftover_visits, &next_key); + if ((leftover_visits == 0) && !is_finish) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("h") + next_key); + break; + } else if (is_finish) { + if (DataType::kHashes == dtype) { + cursor_ret = 0; + break; + } else if (leftover_visits == 0) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("s")); + break; + } + } + start_key = ""; + case 's': + is_finish = sets_db_->PKExpireScan(start_key, static_cast(curtime + min_ttl), + static_cast(curtime + max_ttl), keys, &leftover_visits, &next_key); + if ((leftover_visits == 0) && !is_finish) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("s") + next_key); + break; + } else if (is_finish) { + if (DataType::kSets == dtype) { + cursor_ret = 0; + break; + } else if (leftover_visits == 0) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("l")); + break; + } + } + start_key = ""; + case 'l': + is_finish = lists_db_->PKExpireScan(start_key, static_cast(curtime + min_ttl), + static_cast(curtime + max_ttl), keys, &leftover_visits, &next_key); + if ((leftover_visits == 0) && !is_finish) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("l") + next_key); + break; + } else if (is_finish) { + if (DataType::kLists == dtype) { + cursor_ret = 0; + break; + } else if (leftover_visits == 0) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("z")); + break; + } + } + start_key = ""; + case 'z': + is_finish = zsets_db_->PKExpireScan(start_key, static_cast(curtime + min_ttl), + static_cast(curtime + max_ttl), keys, &leftover_visits, &next_key); + if ((leftover_visits == 0) && !is_finish) { + cursor_ret = cursor + step_length; + StoreCursorStartKey(dtype, cursor_ret, std::string("z") + next_key); + break; + } else if (is_finish) { + cursor_ret = 0; + break; + } + } + return cursor_ret; +} + +Status Storage::PKScanRange(const DataType& data_type, const Slice& key_start, const Slice& key_end, + const Slice& pattern, int32_t limit, std::vector* keys, + std::vector* kvs, std::string* next_key) { + Status s; + keys->clear(); + next_key->clear(); + switch (data_type) { + case DataType::kStrings: + s = strings_db_->PKScanRange(key_start, key_end, pattern, limit, kvs, next_key); + break; + case DataType::kHashes: + s = hashes_db_->PKScanRange(key_start, key_end, pattern, limit, keys, next_key); + break; + case DataType::kLists: + s = lists_db_->PKScanRange(key_start, key_end, pattern, limit, keys, next_key); + break; + case DataType::kZSets: + s = zsets_db_->PKScanRange(key_start, key_end, pattern, limit, keys, next_key); + break; + case DataType::kSets: + s = sets_db_->PKScanRange(key_start, key_end, pattern, limit, keys, next_key); + break; + default: + s = Status::Corruption("Unsupported data types"); + break; + } + return s; +} + +Status Storage::PKRScanRange(const DataType& data_type, const Slice& key_start, const Slice& key_end, + const Slice& pattern, int32_t limit, std::vector* keys, + std::vector* kvs, std::string* next_key) { + Status s; + keys->clear(); + next_key->clear(); + switch (data_type) { + case DataType::kStrings: + s = strings_db_->PKRScanRange(key_start, key_end, pattern, limit, kvs, next_key); + break; + case DataType::kHashes: + s = hashes_db_->PKRScanRange(key_start, key_end, pattern, limit, keys, next_key); + break; + case DataType::kLists: + s = lists_db_->PKRScanRange(key_start, key_end, pattern, limit, keys, next_key); + break; + case DataType::kZSets: + s = zsets_db_->PKRScanRange(key_start, key_end, pattern, limit, keys, next_key); + break; + case DataType::kSets: + s = sets_db_->PKRScanRange(key_start, key_end, pattern, limit, keys, next_key); + break; + default: + s = Status::Corruption("Unsupported data types"); + break; + } + return s; +} + +Status Storage::PKPatternMatchDel(const DataType& data_type, const std::string& pattern, int32_t* ret) { + Status s; + switch (data_type) { + case DataType::kStrings: + s = strings_db_->PKPatternMatchDel(pattern, ret); + break; + case DataType::kHashes: + s = hashes_db_->PKPatternMatchDel(pattern, ret); + break; + case DataType::kLists: + s = lists_db_->PKPatternMatchDel(pattern, ret); + break; + case DataType::kZSets: + s = zsets_db_->PKPatternMatchDel(pattern, ret); + break; + case DataType::kSets: + s = sets_db_->PKPatternMatchDel(pattern, ret); + break; + default: + s = Status::Corruption("Unsupported data type"); + break; + } + return s; +} + +Status Storage::Scanx(const DataType& data_type, const std::string& start_key, const std::string& pattern, + int64_t count, std::vector* keys, std::string* next_key) { + Status s; + keys->clear(); + next_key->clear(); + switch (data_type) { + case DataType::kStrings: + strings_db_->Scan(start_key, pattern, keys, &count, next_key); + break; + case DataType::kHashes: + hashes_db_->Scan(start_key, pattern, keys, &count, next_key); + break; + case DataType::kLists: + lists_db_->Scan(start_key, pattern, keys, &count, next_key); + break; + case DataType::kZSets: + zsets_db_->Scan(start_key, pattern, keys, &count, next_key); + break; + case DataType::kSets: + sets_db_->Scan(start_key, pattern, keys, &count, next_key); + break; + default: + Status::Corruption("Unsupported data types"); + break; + } + return s; +} + +int32_t Storage::Expireat(const Slice& key, int32_t timestamp, std::map* type_status) { + Status s; + int32_t count = 0; + bool is_corruption = false; + + s = strings_db_->Expireat(key, timestamp); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kStrings] = s; + } + + s = hashes_db_->Expireat(key, timestamp); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kHashes] = s; + } + + s = sets_db_->Expireat(key, timestamp); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kSets] = s; + } + + s = lists_db_->Expireat(key, timestamp); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kLists] = s; + } + + s = zsets_db_->Expireat(key, timestamp); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kLists] = s; + } + + if (is_corruption) { + return -1; + } else { + return count; + } +} + +int32_t Storage::Persist(const Slice& key, std::map* type_status) { + Status s; + int32_t count = 0; + bool is_corruption = false; + + s = strings_db_->Persist(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kStrings] = s; + } + + s = hashes_db_->Persist(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kHashes] = s; + } + + s = sets_db_->Persist(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kSets] = s; + } + + s = lists_db_->Persist(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kLists] = s; + } + + s = zsets_db_->Persist(key); + if (s.ok()) { + count++; + } else if (!s.IsNotFound()) { + is_corruption = true; + (*type_status)[DataType::kLists] = s; + } + + if (is_corruption) { + return -1; + } else { + return count; + } +} + +std::map Storage::TTL(const Slice& key, std::map* type_status) { + Status s; + std::map ret; + int64_t timestamp = 0; + + s = strings_db_->TTL(key, ×tamp); + if (s.ok() || s.IsNotFound()) { + ret[DataType::kStrings] = timestamp; + } else if (!s.IsNotFound()) { + ret[DataType::kStrings] = -3; + (*type_status)[DataType::kStrings] = s; + } + + s = hashes_db_->TTL(key, ×tamp); + if (s.ok() || s.IsNotFound()) { + ret[DataType::kHashes] = timestamp; + } else if (!s.IsNotFound()) { + ret[DataType::kHashes] = -3; + (*type_status)[DataType::kHashes] = s; + } + + s = lists_db_->TTL(key, ×tamp); + if (s.ok() || s.IsNotFound()) { + ret[DataType::kLists] = timestamp; + } else if (!s.IsNotFound()) { + ret[DataType::kLists] = -3; + (*type_status)[DataType::kLists] = s; + } + + s = sets_db_->TTL(key, ×tamp); + if (s.ok() || s.IsNotFound()) { + ret[DataType::kSets] = timestamp; + } else if (!s.IsNotFound()) { + ret[DataType::kSets] = -3; + (*type_status)[DataType::kSets] = s; + } + + s = zsets_db_->TTL(key, ×tamp); + if (s.ok() || s.IsNotFound()) { + ret[DataType::kZSets] = timestamp; + } else if (!s.IsNotFound()) { + ret[DataType::kZSets] = -3; + (*type_status)[DataType::kZSets] = s; + } + return ret; +} + +Status Storage::GetType(const std::string& key, bool single, std::vector& types) { + types.clear(); + + Status s; + std::string value; + s = strings_db_->Get(key, &value); + if (s.ok()) { + types.emplace_back("string"); + } else if (!s.IsNotFound()) { + return s; + } + if (single && !types.empty()) { + return s; + } + + int32_t hashes_len = 0; + s = hashes_db_->HLen(key, &hashes_len); + if (s.ok() && hashes_len != 0) { + types.emplace_back("hash"); + } else if (!s.IsNotFound()) { + return s; + } + if (single && !types.empty()) { + return s; + } + + uint64_t lists_len = 0; + s = lists_db_->LLen(key, &lists_len); + if (s.ok() && lists_len != 0) { + types.emplace_back("list"); + } else if (!s.IsNotFound()) { + return s; + } + if (single && !types.empty()) { + return s; + } + + int32_t zsets_size = 0; + s = zsets_db_->ZCard(key, &zsets_size); + if (s.ok() && zsets_size != 0) { + types.emplace_back("zset"); + } else if (!s.IsNotFound()) { + return s; + } + if (single && !types.empty()) { + return s; + } + + int32_t sets_size = 0; + s = sets_db_->SCard(key, &sets_size); + if (s.ok() && sets_size != 0) { + types.emplace_back("set"); + } else if (!s.IsNotFound()) { + return s; + } + if (single && types.empty()) { + types.emplace_back("none"); + } + return Status::OK(); +} + +Status Storage::Keys(const DataType& data_type, const std::string& pattern, std::vector* keys) { + Status s; + if (data_type == DataType::kStrings) { + s = strings_db_->ScanKeys(pattern, keys); + if (!s.ok()) { + return s; + } + } else if (data_type == DataType::kHashes) { + s = hashes_db_->ScanKeys(pattern, keys); + if (!s.ok()) { + return s; + } + } else if (data_type == DataType::kZSets) { + s = zsets_db_->ScanKeys(pattern, keys); + if (!s.ok()) { + return s; + } + } else if (data_type == DataType::kSets) { + s = sets_db_->ScanKeys(pattern, keys); + if (!s.ok()) { + return s; + } + } else if (data_type == DataType::kLists) { + s = lists_db_->ScanKeys(pattern, keys); + if (!s.ok()) { + return s; + } + } else { + s = strings_db_->ScanKeys(pattern, keys); + if (!s.ok()) { + return s; + } + s = hashes_db_->ScanKeys(pattern, keys); + if (!s.ok()) { + return s; + } + s = zsets_db_->ScanKeys(pattern, keys); + if (!s.ok()) { + return s; + } + s = sets_db_->ScanKeys(pattern, keys); + if (!s.ok()) { + return s; + } + s = lists_db_->ScanKeys(pattern, keys); + if (!s.ok()) { + return s; + } + } + return s; +} + +void Storage::ScanDatabase(const DataType& type) { + switch (type) { + case kStrings: + strings_db_->ScanDatabase(); + break; + case kHashes: + hashes_db_->ScanDatabase(); + break; + case kSets: + sets_db_->ScanDatabase(); + break; + case kZSets: + zsets_db_->ScanDatabase(); + break; + case kLists: + lists_db_->ScanDatabase(); + break; + case kAll: + strings_db_->ScanDatabase(); + hashes_db_->ScanDatabase(); + sets_db_->ScanDatabase(); + zsets_db_->ScanDatabase(); + lists_db_->ScanDatabase(); + break; + } +} + +// HyperLogLog +Status Storage::PfAdd(const Slice& key, const std::vector& values, bool* update) { + *update = false; + if (values.size() >= kMaxKeys) { + return Status::InvalidArgument("Invalid the number of key"); + } + + std::string value; + std::string registers; + std::string result; + Status s = strings_db_->Get(key, &value); + if (s.ok()) { + registers = value; + } else if (s.IsNotFound()) { + registers = ""; + } else { + return s; + } + HyperLogLog log(kPrecision, registers); + auto previous = static_cast(log.Estimate()); + for (const auto& value : values) { + result = log.Add(value.data(), value.size()); + } + HyperLogLog update_log(kPrecision, result); + auto now = static_cast(update_log.Estimate()); + if (previous != now || (s.IsNotFound() && values.empty())) { + *update = true; + } + s = strings_db_->Set(key, result); + return s; +} + +Status Storage::PfCount(const std::vector& keys, int64_t* result) { + if (keys.size() >= kMaxKeys || keys.empty()) { + return Status::InvalidArgument("Invalid the number of key"); + } + + std::string value; + std::string first_registers; + Status s = strings_db_->Get(keys[0], &value); + if (s.ok()) { + first_registers = std::string(value.data(), value.size()); + } else if (s.IsNotFound()) { + first_registers = ""; + } + + HyperLogLog first_log(kPrecision, first_registers); + for (size_t i = 1; i < keys.size(); ++i) { + std::string value; + std::string registers; + s = strings_db_->Get(keys[i], &value); + if (s.ok()) { + registers = value; + } else if (s.IsNotFound()) { + continue; + } else { + return s; + } + HyperLogLog log(kPrecision, registers); + first_log.Merge(log); + } + *result = static_cast(first_log.Estimate()); + return Status::OK(); +} + +Status Storage::PfMerge(const std::vector& keys, std::string& value_to_dest) { + if (keys.size() >= kMaxKeys || keys.empty()) { + return Status::InvalidArgument("Invalid the number of key"); + } + + Status s; + std::string value; + std::string first_registers; + std::string result; + s = strings_db_->Get(keys[0], &value); + if (s.ok()) { + first_registers = std::string(value.data(), value.size()); + } else if (s.IsNotFound()) { + first_registers = ""; + } + + result = first_registers; + HyperLogLog first_log(kPrecision, first_registers); + for (size_t i = 1; i < keys.size(); ++i) { + std::string value; + std::string registers; + s = strings_db_->Get(keys[i], &value); + if (s.ok()) { + registers = std::string(value.data(), value.size()); + } else if (s.IsNotFound()) { + continue; + } else { + return s; + } + HyperLogLog log(kPrecision, registers); + result = first_log.Merge(log); + } + s = strings_db_->Set(keys[0], result); + value_to_dest = std::move(result); + return s; +} + +static void* StartBGThreadWrapper(void* arg) { + auto s = reinterpret_cast(arg); + s->RunBGTask(); + return nullptr; +} + +Status Storage::StartBGThread() { + int result = pthread_create(&bg_tasks_thread_id_, nullptr, StartBGThreadWrapper, this); + if (result != 0) { + char msg[128]; + snprintf(msg, sizeof(msg), "pthread create: %s", strerror(result)); + return Status::Corruption(msg); + } + return Status::OK(); +} + +Status Storage::AddBGTask(const BGTask& bg_task) { + bg_tasks_mutex_.lock(); + if (bg_task.type == kAll) { + // if current task it is global compact, + // clear the bg_tasks_queue_; + std::queue empty_queue; + bg_tasks_queue_.swap(empty_queue); + } + bg_tasks_queue_.push(bg_task); + bg_tasks_cond_var_.notify_one(); + bg_tasks_mutex_.unlock(); + return Status::OK(); +} + +Status Storage::RunBGTask() { + BGTask task; + while (!bg_tasks_should_exit_) { + std::unique_lock lock(bg_tasks_mutex_); + bg_tasks_cond_var_.wait(lock, [this]() { return !bg_tasks_queue_.empty() || bg_tasks_should_exit_; }); + + if (!bg_tasks_queue_.empty()) { + task = bg_tasks_queue_.front(); + bg_tasks_queue_.pop(); + } + lock.unlock(); + + if (bg_tasks_should_exit_) { + return Status::Incomplete("bgtask return with bg_tasks_should_exit true"); + } + + if (task.operation == kCleanAll) { + DoCompact(task.type); + } else if (task.operation == kCompactKey) { + CompactKey(task.type, task.argv); + } + } + return Status::OK(); +} + +Status Storage::Compact(const DataType& type, bool sync) { + if (sync) { + return DoCompact(type); + } else { + AddBGTask({type, kCleanAll}); + } + return Status::OK(); +} + +Status Storage::DoCompact(const DataType& type) { + if (type != kAll && type != kStrings && type != kHashes && type != kSets && type != kZSets && type != kLists) { + return Status::InvalidArgument(""); + } + + Status s; + if (type == kStrings) { + current_task_type_ = Operation::kCleanStrings; + s = strings_db_->CompactRange(nullptr, nullptr); + } else if (type == kHashes) { + current_task_type_ = Operation::kCleanHashes; + s = hashes_db_->CompactRange(nullptr, nullptr); + } else if (type == kSets) { + current_task_type_ = Operation::kCleanSets; + s = sets_db_->CompactRange(nullptr, nullptr); + } else if (type == kZSets) { + current_task_type_ = Operation::kCleanZSets; + s = zsets_db_->CompactRange(nullptr, nullptr); + } else if (type == kLists) { + current_task_type_ = Operation::kCleanLists; + s = lists_db_->CompactRange(nullptr, nullptr); + } else { + current_task_type_ = Operation::kCleanAll; + s = strings_db_->CompactRange(nullptr, nullptr); + s = hashes_db_->CompactRange(nullptr, nullptr); + s = sets_db_->CompactRange(nullptr, nullptr); + s = zsets_db_->CompactRange(nullptr, nullptr); + s = lists_db_->CompactRange(nullptr, nullptr); + } + current_task_type_ = Operation::kNone; + return s; +} + +Status Storage::CompactKey(const DataType& type, const std::string& key) { + std::string meta_start_key; + std::string meta_end_key; + std::string data_start_key; + std::string data_end_key; + CalculateMetaStartAndEndKey(key, &meta_start_key, &meta_end_key); + CalculateDataStartAndEndKey(key, &data_start_key, &data_end_key); + Slice slice_meta_begin(meta_start_key); + Slice slice_meta_end(meta_end_key); + Slice slice_data_begin(data_start_key); + Slice slice_data_end(data_end_key); + if (type == kSets) { + sets_db_->CompactRange(&slice_meta_begin, &slice_meta_end, kMeta); + sets_db_->CompactRange(&slice_data_begin, &slice_data_end, kData); + } else if (type == kZSets) { + zsets_db_->CompactRange(&slice_meta_begin, &slice_meta_end, kMeta); + zsets_db_->CompactRange(&slice_data_begin, &slice_data_end, kData); + } else if (type == kHashes) { + hashes_db_->CompactRange(&slice_meta_begin, &slice_meta_end, kMeta); + hashes_db_->CompactRange(&slice_data_begin, &slice_data_end, kData); + } else if (type == kLists) { + lists_db_->CompactRange(&slice_meta_begin, &slice_meta_end, kMeta); + lists_db_->CompactRange(&slice_data_begin, &slice_data_end, kData); + } + return Status::OK(); +} + +Status Storage::SetMaxCacheStatisticKeys(uint32_t max_cache_statistic_keys) { + std::vector dbs = {sets_db_.get(), zsets_db_.get(), hashes_db_.get(), lists_db_.get()}; + for (const auto& db : dbs) { + db->SetMaxCacheStatisticKeys(max_cache_statistic_keys); + } + return Status::OK(); +} + +Status Storage::SetSmallCompactionThreshold(uint32_t small_compaction_threshold) { + std::vector dbs = {sets_db_.get(), zsets_db_.get(), hashes_db_.get(), lists_db_.get()}; + for (const auto& db : dbs) { + db->SetSmallCompactionThreshold(small_compaction_threshold); + } + return Status::OK(); +} + +std::string Storage::GetCurrentTaskType() { + int type = current_task_type_; + switch (type) { + case kCleanAll: + return "All"; + case kCleanStrings: + return "String"; + case kCleanHashes: + return "Hash"; + case kCleanZSets: + return "ZSet"; + case kCleanSets: + return "Set"; + case kCleanLists: + return "List"; + case kNone: + default: + return "No"; + } +} + +Status Storage::GetUsage(const std::string& property, uint64_t* const result) { + *result = GetProperty(ALL_DB, property); + return Status::OK(); +} + +Status Storage::GetUsage(const std::string& property, std::map* const type_result) { + type_result->clear(); + (*type_result)[STRINGS_DB] = GetProperty(STRINGS_DB, property); + (*type_result)[HASHES_DB] = GetProperty(HASHES_DB, property); + (*type_result)[LISTS_DB] = GetProperty(LISTS_DB, property); + (*type_result)[ZSETS_DB] = GetProperty(ZSETS_DB, property); + (*type_result)[SETS_DB] = GetProperty(SETS_DB, property); + return Status::OK(); +} + +uint64_t Storage::GetProperty(const std::string& db_type, const std::string& property) { + uint64_t out = 0; + uint64_t result = 0; + if (db_type == ALL_DB || db_type == STRINGS_DB) { + strings_db_->GetProperty(property, &out); + result += out; + } + if (db_type == ALL_DB || db_type == HASHES_DB) { + hashes_db_->GetProperty(property, &out); + result += out; + } + if (db_type == ALL_DB || db_type == LISTS_DB) { + lists_db_->GetProperty(property, &out); + result += out; + } + if (db_type == ALL_DB || db_type == ZSETS_DB) { + zsets_db_->GetProperty(property, &out); + result += out; + } + if (db_type == ALL_DB || db_type == SETS_DB) { + sets_db_->GetProperty(property, &out); + result += out; + } + return result; +} + +Status Storage::GetKeyNum(std::vector* key_infos) { + KeyInfo key_info; + // NOTE: keep the db order with string, hash, list, zset, set + std::vector dbs = {strings_db_.get(), hashes_db_.get(), lists_db_.get(), zsets_db_.get(), sets_db_.get()}; + for (const auto& db : dbs) { + // check the scanner was stopped or not, before scanning the next db + if (scan_keynum_exit_) { + break; + } + db->ScanKeyNum(&key_info); + key_infos->push_back(key_info); + } + if (scan_keynum_exit_) { + scan_keynum_exit_ = false; + return Status::Corruption("exit"); + } + return Status::OK(); +} + +Status Storage::StopScanKeyNum() { + scan_keynum_exit_ = true; + return Status::OK(); +} + +rocksdb::DB* Storage::GetDBByType(const std::string& type) { + if (type == STRINGS_DB) { + return strings_db_->GetDB(); + } else if (type == HASHES_DB) { + return hashes_db_->GetDB(); + } else if (type == LISTS_DB) { + return lists_db_->GetDB(); + } else if (type == SETS_DB) { + return sets_db_->GetDB(); + } else if (type == ZSETS_DB) { + return zsets_db_->GetDB(); + } else { + return nullptr; + } +} + +Status Storage::SetOptions(const OptionType& option_type, const std::string& db_type, + const std::unordered_map& options) { + Status s; + if (db_type == ALL_DB || db_type == STRINGS_DB) { + s = strings_db_->SetOptions(option_type, options); + if (!s.ok()) { + return s; + } + } + if (db_type == ALL_DB || db_type == HASHES_DB) { + s = hashes_db_->SetOptions(option_type, options); + if (!s.ok()) { + return s; + } + } + if (db_type == ALL_DB || db_type == LISTS_DB) { + s = lists_db_->SetOptions(option_type, options); + if (!s.ok()) { + return s; + } + } + if (db_type == ALL_DB || db_type == ZSETS_DB) { + s = zsets_db_->SetOptions(option_type, options); + if (!s.ok()) { + return s; + } + } + if (db_type == ALL_DB || db_type == SETS_DB) { + s = sets_db_->SetOptions(option_type, options); + if (!s.ok()) { + return s; + } + } + return s; +} + +void Storage::GetRocksDBInfo(std::string& info) { + strings_db_->GetRocksDBInfo(info, "strings_"); + hashes_db_->GetRocksDBInfo(info, "hashes_"); + lists_db_->GetRocksDBInfo(info, "lists_"); + sets_db_->GetRocksDBInfo(info, "sets_"); + zsets_db_->GetRocksDBInfo(info, "zsets_"); +} + +void Storage::DisableWal(const bool is_wal_disable) { + strings_db_->SetWriteWalOptions(is_wal_disable); + hashes_db_->SetWriteWalOptions(is_wal_disable); + lists_db_->SetWriteWalOptions(is_wal_disable); + sets_db_->SetWriteWalOptions(is_wal_disable); + zsets_db_->SetWriteWalOptions(is_wal_disable); +} + +} // namespace storage diff --git a/src/storage/src/storage_murmur3.h b/src/storage/src/storage_murmur3.h new file mode 100644 index 000000000..19a3a195f --- /dev/null +++ b/src/storage/src/storage_murmur3.h @@ -0,0 +1,148 @@ +#pragma once + +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The autohor hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +typedef unsigned char uint8_t; +typedef unsigned long uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +# include + +#endif // !defined(_MSC_VER) + +namespace storage { + +#define FORCE_INLINE __attribute__((always_inline)) + +inline uint32_t rotl32(uint32_t x, uint8_t r) { return (x << r) | (x >> (32 - r)); } + +#define ROTL32(x, y) rotl32(x, y) + +#define BIG_CONSTANT(x) (x##LLU) + +/* NO-OP for little-endian platforms */ +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define BYTESWAP(x) (x) +# endif +/* if __BYTE_ORDER__ is not predefined (like FreeBSD), use arch */ +#elif defined(__i386) || defined(__x86_64) || defined(__alpha) || defined(__vax) + +# define BYTESWAP(x) (x) +/* use __builtin_bswap32 if available */ +#elif defined(__GNUC__) || defined(__clang__) +# ifdef __has_builtin +# if __has_builtin(__builtin_bswap32) +# define BYTESWAP(x) __builtin_bswap32(x) +# endif // __has_builtin(__builtin_bswap32) +# endif // __has_builtin +#endif // defined(__GNUC__) || defined(__clang__) +/* last resort (big-endian w/o __builtin_bswap) */ +#ifndef BYTESWAP +# define BYTESWAP(x) ((((x)&0xFF) << 24) | (((x) >> 24) & 0xFF) | (((x)&0x0000FF00) << 8) | (((x)&0x00FF0000) >> 8)) +#endif + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +#define getblock(p, i) BYTESWAP((p)[i]) + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +uint32_t fmix32(uint32_t h) { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//----------------------------------------------------------------------------- + +#ifdef __cplusplus +extern "C" +#else +extern +#endif + void + MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 4; + int i; + + uint32_t h1 = seed; + + uint32_t c1 = 0xcc9e2d51; + uint32_t c2 = 0x1b873593; + + //---------- + // body + + const auto blocks = (const uint32_t*)(data + nblocks * 4); + + for (i = -nblocks; i != 0; i++) { + uint32_t k1 = getblock(blocks, i); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + //---------- + // tail + { + const auto tail = (data + nblocks * 4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + case 2: + k1 ^= tail[1] << 8; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + } + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + *(uint32_t*)out = h1; +} + +} // namespace storage diff --git a/src/storage/src/strings_filter.h b/src/storage/src/strings_filter.h new file mode 100644 index 000000000..25916c984 --- /dev/null +++ b/src/storage/src/strings_filter.h @@ -0,0 +1,52 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include + +#include "rocksdb/compaction_filter.h" +#include "src/debug.h" +#include "src/strings_value_format.h" + +namespace storage { + +class StringsFilter : public rocksdb::CompactionFilter { + public: + StringsFilter() = default; + bool Filter(int level, const rocksdb::Slice& key, const rocksdb::Slice& value, std::string* new_value, + bool* value_changed) const override { + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + auto cur_time = static_cast(unix_time); + ParsedStringsValue parsed_strings_value(value); + TRACE("==========================START=========================="); + TRACE("[StringsFilter], key: %s, value = %s, timestamp: %d, cur_time: %d", key.ToString().c_str(), + parsed_strings_value.value().ToString().c_str(), parsed_strings_value.timestamp(), cur_time); + + if (parsed_strings_value.timestamp() != 0 && parsed_strings_value.timestamp() < cur_time) { + TRACE("Drop[Stale]"); + return true; + } else { + TRACE("Reserve"); + return false; + } + } + + const char* Name() const override { return "StringsFilter"; } +}; + +class StringsFilterFactory : public rocksdb::CompactionFilterFactory { + public: + StringsFilterFactory() = default; + std::unique_ptr CreateCompactionFilter( + const rocksdb::CompactionFilter::Context& context) override { + return std::unique_ptr(new StringsFilter()); + } + const char* Name() const override { return "StringsFilterFactory"; } +}; + +} // namespace storage diff --git a/src/storage/src/strings_value_format.h b/src/storage/src/strings_value_format.h new file mode 100644 index 000000000..d2139b3fa --- /dev/null +++ b/src/storage/src/strings_value_format.h @@ -0,0 +1,66 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include + +#include "src/base_value_format.h" + +namespace storage { + +class StringsValue : public InternalValue { + public: + explicit StringsValue(const rocksdb::Slice& user_value) : InternalValue(user_value) {} + size_t AppendTimestampAndVersion() override { + size_t usize = user_value_.size(); + char* dst = start_; + memcpy(dst, user_value_.data(), usize); + dst += usize; + EncodeFixed32(dst, timestamp_); + return usize + sizeof(int32_t); + } +}; + +class ParsedStringsValue : public ParsedInternalValue { + public: + // Use this constructor after rocksdb::DB::Get(); + explicit ParsedStringsValue(std::string* internal_value_str) : ParsedInternalValue(internal_value_str) { + if (internal_value_str->size() >= kStringsValueSuffixLength) { + user_value_ = rocksdb::Slice(internal_value_str->data(), internal_value_str->size() - kStringsValueSuffixLength); + timestamp_ = DecodeFixed32(internal_value_str->data() + internal_value_str->size() - kStringsValueSuffixLength); + } + } + + // Use this constructor in rocksdb::CompactionFilter::Filter(); + explicit ParsedStringsValue(const rocksdb::Slice& internal_value_slice) : ParsedInternalValue(internal_value_slice) { + if (internal_value_slice.size() >= kStringsValueSuffixLength) { + user_value_ = rocksdb::Slice(internal_value_slice.data(), internal_value_slice.size() - kStringsValueSuffixLength); + timestamp_ = DecodeFixed32(internal_value_slice.data() + internal_value_slice.size() - kStringsValueSuffixLength); + } + } + + void StripSuffix() override { + if (value_) { + value_->erase(value_->size() - kStringsValueSuffixLength, kStringsValueSuffixLength); + } + } + + // Strings type do not have version field; + void SetVersionToValue() override {} + + void SetTimestampToValue() override { + if (value_) { + char* dst = const_cast(value_->data()) + value_->size() - kStringsValueSuffixLength; + EncodeFixed32(dst, timestamp_); + } + } + + rocksdb::Slice value() { return user_value_; } + + static const size_t kStringsValueSuffixLength = sizeof(int32_t); +}; + +} // namespace storage diff --git a/src/storage/src/util.cc b/src/storage/src/util.cc new file mode 100644 index 000000000..9effa0768 --- /dev/null +++ b/src/storage/src/util.cc @@ -0,0 +1,291 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include +#include +#include +#include +#include +#include + +#include "pstd/pstd_string.h" + +#include "src/coding.h" +#include "storage/util.h" + +namespace storage { + +/* Convert a long long into a string. Returns the number of + * characters needed to represent the number. + * If the buffer is not big enough to store the string, 0 is returned. + * + * Based on the following article (that apparently does not provide a + * novel approach but only publicizes an already used technique): + * + * https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920 + * + * Modified in order to handle signed integers since the original code was + * designed for unsigned integers. */ +int Int64ToStr(char* dst, size_t dstlen, int64_t svalue) { + return pstd::Ll2string(dst, dstlen, svalue); +} + +/* Convert a string into a long long. Returns 1 if the string could be parsed + * into a (non-overflowing) long long, 0 otherwise. The value will be set to + * the parsed value when appropriate. */ +int StrToInt64(const char* s, size_t slen, int64_t* value) { + return pstd::String2int(s, slen, value); +} + +/* Glob-style pattern matching. */ +int StringMatch(const char* pattern, uint64_t pattern_len, const char* str, uint64_t string_len, int nocase) { + return pstd::StringMatchLen(pattern, static_cast(pattern_len), str, static_cast(string_len), nocase); +} + +int StrToLongDouble(const char* s, size_t slen, long double* ldval) { + char* pEnd; + std::string t(s, slen); + if (t.find(' ') != std::string::npos) { + return -1; + } + long double d = strtold(s, &pEnd); + if (pEnd != s + slen) { + return -1; + } + + if (ldval) { + *ldval = d; + } + return 0; +} + +int LongDoubleToStr(long double ldval, std::string* value) { + char buf[256]; + int len; + if (std::isnan(ldval)) { + return -1; + } else if (std::isinf(ldval)) { + /* Libc in odd systems (Hi Solaris!) will format infinite in a + * different way, so better to handle it in an explicit way. */ + if (ldval > 0) { + strcpy(buf, "inf"); + len = 3; + } else { + strcpy(buf, "-inf"); + len = 4; + } + return -1; + } else { + /* We use 17 digits precision since with 128 bit floats that precision + * after rounding is able to represent most small decimal numbers in a + * way that is "non surprising" for the user (that is, most small + * decimal numbers will be represented in a way that when converted + * back into a string are exactly the same as what the user typed.) */ + len = snprintf(buf, sizeof(buf), "%.17Lf", ldval); + /* Now remove trailing zeroes after the '.' */ + if (strchr(buf, '.')) { + char* p = buf + len - 1; + while (*p == '0') { + p--; + len--; + } + if (*p == '.') { + len--; + } + } + value->assign(buf, len); + return 0; + } +} + +int do_mkdir(const char* path, mode_t mode) { + struct stat st; + int status = 0; + + if (stat(path, &st) != 0) { + /* Directory does not exist. EEXIST for race + * condition */ + if (mkdir(path, mode) != 0 && errno != EEXIST) { + status = -1; + } + } else if (!S_ISDIR(st.st_mode)) { + errno = ENOTDIR; + status = -1; + } + + return (status); +} + +/** +** mkpath - ensure all directories in path exist +** Algorithm takes the pessimistic view and works top-down to ensure +** each directory in path exists, rather than optimistically creating +** the last element and working backwards. +*/ +int mkpath(const char* path, mode_t mode) { + char* pp; + char* sp; + int status; + char* copypath = strdup(path); + + status = 0; + pp = copypath; + while (status == 0 && (sp = strchr(pp, '/')) != nullptr) { + if (sp != pp) { + /* Neither root nor double slash in path */ + *sp = '\0'; + status = do_mkdir(copypath, mode); + *sp = '/'; + } + pp = sp + 1; + } + if (status == 0) { + status = do_mkdir(path, mode); + } + free(copypath); + return (status); +} + +int delete_dir(const char* dirname) { + char chBuf[256]; + DIR* dir = nullptr; + struct dirent* ptr; + int ret = 0; + dir = opendir(dirname); + if (nullptr == dir) { + return -1; + } + while ((ptr = readdir(dir)) != nullptr) { + ret = strcmp(ptr->d_name, "."); + if (0 == ret) { + continue; + } + ret = strcmp(ptr->d_name, ".."); + if (0 == ret) { + continue; + } + snprintf(chBuf, sizeof(chBuf), "%s/%s", dirname, ptr->d_name); + ret = is_dir(chBuf); + if (0 == ret) { + // is dir + ret = delete_dir(chBuf); + if (0 != ret) { + return -1; + } + } else if (1 == ret) { + // is file + ret = remove(chBuf); + if (0 != ret) { + return -1; + } + } + } + (void)closedir(dir); + ret = remove(dirname); + if (0 != ret) { + return -1; + } + return 0; +} + +int is_dir(const char* filename) { + struct stat buf; + int ret = stat(filename, &buf); + if (0 == ret) { + if ((buf.st_mode & S_IFDIR) != 0) { + // folder + return 0; + } else { + // file + return 1; + } + } + return -1; +} + +int CalculateMetaStartAndEndKey(const std::string& key, std::string* meta_start_key, std::string* meta_end_key) { + size_t needed = key.size() + 1; + auto dst = std::make_unique(needed); + const char* start = dst.get(); + std::strncpy(dst.get(), key.data(), key.size()); + char* dst_ptr = dst.get() + key.size(); + meta_start_key->assign(start, key.size()); + *dst_ptr = static_cast(0xff); + meta_end_key->assign(start, key.size() + 1); + return 0; +} + +int CalculateDataStartAndEndKey(const std::string& key, std::string* data_start_key, std::string* data_end_key) { + size_t needed = sizeof(int32_t) + key.size() + 1; + auto dst = std::make_unique(needed); + const char* start = dst.get(); + char* dst_ptr = dst.get(); + + EncodeFixed32(dst_ptr, key.size()); + dst_ptr += sizeof(int32_t); + std::strncpy(dst_ptr, key.data(), key.size()); + dst_ptr += key.size(); + *dst_ptr = static_cast(0xff); + + data_start_key->assign(start, sizeof(int32_t) + key.size()); + data_end_key->assign(start, sizeof(int32_t) + key.size() + 1); + + return 0; +} + +bool isTailWildcard(const std::string& pattern) { + if (pattern.size() < 2) { + return false; + } else { + if (pattern.back() != '*') { + return false; + } else { + for (uint32_t idx = 0; idx < pattern.size() - 1; ++idx) { + if (pattern[idx] == '*' || pattern[idx] == '?' || pattern[idx] == '[' || pattern[idx] == ']') { + return false; + } + } + } + } + return true; +} + +void GetFilepath(const char* path, const char* filename, char* filepath) { + strcpy(filepath, path); // NOLINT + if (filepath[strlen(path) - 1] != '/') { + strcat(filepath, "/"); // NOLINT + } + strcat(filepath, filename); // NOLINT +} + +bool DeleteFiles(const char* path) { + DIR* dir; + struct dirent* dirinfo; + struct stat statbuf; + char filepath[256] = {0}; + lstat(path, &statbuf); + + if (S_ISREG(statbuf.st_mode)) // 判断是否是常规文件 + { + remove(path); + } else if (S_ISDIR(statbuf.st_mode)) // 判断是否是目录 + { + if (!(dir = opendir(path))) { + return true; + } + while ((dirinfo = readdir(dir)) != nullptr) { + GetFilepath(path, dirinfo->d_name, filepath); + if (strcmp(dirinfo->d_name, ".") == 0 || strcmp(dirinfo->d_name, "..") == 0) { // 判断是否是特殊目录 + continue; + } + DeleteFiles(filepath); + rmdir(filepath); + } + closedir(dir); + } + return false; +} + +} // namespace storage diff --git a/src/storage/src/zsets_data_key_format.h b/src/storage/src/zsets_data_key_format.h new file mode 100644 index 000000000..525f54020 --- /dev/null +++ b/src/storage/src/zsets_data_key_format.h @@ -0,0 +1,107 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +namespace storage { + +/* + * | | | | | | + * 4 Bytes key size Bytes 4 Bytes 8 Bytes member size Bytes + */ +class ZSetsScoreKey { + public: + ZSetsScoreKey(const Slice& key, int32_t version, double score, const Slice& member) + : key_(key), version_(version), score_(score), member_(member) {} + + ~ZSetsScoreKey() { + if (start_ != space_) { + delete[] start_; + } + } + + Slice Encode() { + size_t needed = key_.size() + member_.size() + sizeof(int32_t) * 2 + sizeof(uint64_t); + char* dst = nullptr; + if (needed <= sizeof(space_)) { + dst = space_; + } else { + dst = new char[needed]; + + // Need to allocate space, delete previous space + if (start_ != space_) { + delete[] start_; + } + } + start_ = dst; + EncodeFixed32(dst, key_.size()); + dst += sizeof(int32_t); + memcpy(dst, key_.data(), key_.size()); + dst += key_.size(); + EncodeFixed32(dst, version_); + dst += sizeof(int32_t); + const void* addr_score = reinterpret_cast(&score_); + EncodeFixed64(dst, *reinterpret_cast(addr_score)); + dst += sizeof(uint64_t); + memcpy(dst, member_.data(), member_.size()); + return Slice(start_, needed); + } + + private: + char space_[200]; + char* start_ = nullptr; + Slice key_; + int32_t version_ = 0; + double score_ = 0.0; + Slice member_; +}; + +class ParsedZSetsScoreKey { + public: + explicit ParsedZSetsScoreKey(const std::string* key) { + const char* ptr = key->data(); + int32_t key_len = DecodeFixed32(ptr); + ptr += sizeof(int32_t); + key_ = Slice(ptr, key_len); + ptr += key_len; + version_ = DecodeFixed32(ptr); + ptr += sizeof(int32_t); + + uint64_t tmp = DecodeFixed64(ptr); + const void* ptr_tmp = reinterpret_cast(&tmp); + score_ = *reinterpret_cast(ptr_tmp); + ptr += sizeof(uint64_t); + member_ = Slice(ptr, key->size() - key_len - 2 * sizeof(int32_t) - sizeof(uint64_t)); + } + + explicit ParsedZSetsScoreKey(const Slice& key) { + const char* ptr = key.data(); + int32_t key_len = DecodeFixed32(ptr); + ptr += sizeof(int32_t); + key_ = Slice(ptr, key_len); + ptr += key_len; + version_ = DecodeFixed32(ptr); + ptr += sizeof(int32_t); + + uint64_t tmp = DecodeFixed64(ptr); + const void* ptr_tmp = reinterpret_cast(&tmp); + score_ = *reinterpret_cast(ptr_tmp); + ptr += sizeof(uint64_t); + member_ = Slice(ptr, key.size() - key_len - 2 * sizeof(int32_t) - sizeof(uint64_t)); + } + + Slice key() { return key_; } + int32_t version() const { return version_; } + double score() const { return score_; } + Slice member() { return member_; } + + private: + Slice key_; + int32_t version_ = 0; + double score_ = 0.0; + Slice member_; +}; + +} // namespace storage diff --git a/src/storage/src/zsets_filter.h b/src/storage/src/zsets_filter.h new file mode 100644 index 000000000..b35da29fe --- /dev/null +++ b/src/storage/src/zsets_filter.h @@ -0,0 +1,104 @@ +// Copyright (c) 2017-present, Qihoo, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once + +#include +#include +#include + +#include "rocksdb/compaction_filter.h" + +#include "base_filter.h" +#include "base_meta_value_format.h" +#include "zsets_data_key_format.h" + +namespace storage { + +class ZSetsScoreFilter : public rocksdb::CompactionFilter { + public: + ZSetsScoreFilter(rocksdb::DB* db, std::vector* handles_ptr) + : db_(db), cf_handles_ptr_(handles_ptr) {} + + bool Filter(int level, const rocksdb::Slice& key, const rocksdb::Slice& value, std::string* new_value, + bool* value_changed) const override { + ParsedZSetsScoreKey parsed_zsets_score_key(key); + TRACE("==========================START=========================="); + TRACE("[ScoreFilter], key: %s, score = %lf, member = %s, version = %d", + parsed_zsets_score_key.key().ToString().c_str(), parsed_zsets_score_key.score(), + parsed_zsets_score_key.member().ToString().c_str(), parsed_zsets_score_key.version()); + + if (parsed_zsets_score_key.key().ToString() != cur_key_) { + cur_key_ = parsed_zsets_score_key.key().ToString(); + std::string meta_value; + // destroyed when close the database, Reserve Current key value + if (cf_handles_ptr_->empty()) { + return false; + } + Status s = db_->Get(default_read_options_, (*cf_handles_ptr_)[0], cur_key_, &meta_value); + if (s.ok()) { + meta_not_found_ = false; + ParsedZSetsMetaValue parsed_zsets_meta_value(&meta_value); + cur_meta_version_ = parsed_zsets_meta_value.version(); + cur_meta_timestamp_ = parsed_zsets_meta_value.timestamp(); + } else if (s.IsNotFound()) { + meta_not_found_ = true; + } else { + cur_key_ = ""; + TRACE("Reserve[Get meta_key faild]"); + return false; + } + } + + if (meta_not_found_) { + TRACE("Drop[Meta key not exist]"); + return true; + } + + int64_t unix_time; + rocksdb::Env::Default()->GetCurrentTime(&unix_time); + if (cur_meta_timestamp_ != 0 && cur_meta_timestamp_ < static_cast(unix_time)) { + TRACE("Drop[Timeout]"); + return true; + } + if (cur_meta_version_ > parsed_zsets_score_key.version()) { + TRACE("Drop[score_key_version < cur_meta_version]"); + return true; + } else { + TRACE("Reserve[score_key_version == cur_meta_version]"); + return false; + } + } + + const char* Name() const override { return "ZSetsScoreFilter"; } + + private: + rocksdb::DB* db_ = nullptr; + std::vector* cf_handles_ptr_ = nullptr; + rocksdb::ReadOptions default_read_options_; + mutable std::string cur_key_; + mutable bool meta_not_found_ = false; + mutable int32_t cur_meta_version_ = 0; + mutable int32_t cur_meta_timestamp_ = 0; +}; + +class ZSetsScoreFilterFactory : public rocksdb::CompactionFilterFactory { + public: + ZSetsScoreFilterFactory(rocksdb::DB** db_ptr, std::vector* handles_ptr) + : db_ptr_(db_ptr), cf_handles_ptr_(handles_ptr) {} + + std::unique_ptr CreateCompactionFilter( + const rocksdb::CompactionFilter::Context& context) override { + return std::make_unique(*db_ptr_, cf_handles_ptr_); + } + + const char* Name() const override { return "ZSetsScoreFilterFactory"; } + + private: + rocksdb::DB** db_ptr_ = nullptr; + std::vector* cf_handles_ptr_ = nullptr; +}; + +} // namespace storage diff --git a/src/store.cc b/src/store.cc index 7379aa902..a7edb78c1 100644 --- a/src/store.cc +++ b/src/store.cc @@ -9,12 +9,12 @@ #include #include #include "client.h" +#include "common.h" #include "config.h" #include "event_loop.h" #include "leveldb.h" #include "log.h" #include "multi.h" - namespace pikiwidb { uint32_t PObject::lruclock = static_cast(::time(nullptr)); @@ -589,6 +589,50 @@ PError PStore::Incrby(const PString& key, int64_t value, int64_t* ret) { return PError_ok; } +PError PStore::Incrbyfloat(const PString& key, std::string value, std::string* ret) { + PObject* old_value = nullptr; + long double old_number = 0.00f; + long double long_double_by = 0.00f; + auto db = &dbs_[dbno_]; + + if (StrToLongDouble(value.data(), value.size(), &long_double_by)) { + return PError_type; + } + + // shared when reading + std::unique_lock lock(mutex_); + PError err = getValueByType(key, old_value, PType_string); + if (err != PError_ok) { + return err; + } + + auto old_number_str = pikiwidb::GetDecodedString(old_value); + // old number to long double + if (StrToLongDouble(old_number_str->c_str(), old_number_str->size(), &old_number)) { + return PError_type; + } + + std::string total_string; + long double total = old_number + long_double_by; + if (LongDoubleToStr(total, &total_string)) { + return PError_overflow; + } + + *ret = total_string; + PObject new_value; + new_value = PObject::CreateString(total_string); + new_value.lru = PObject::lruclock; + auto [realObj, status] = db->insert_or_assign(key, std::move(new_value)); + const PObject& obj = realObj->second; + + // put this key to sync list + if (!waitSyncKeys_.empty()) { + waitSyncKeys_[dbno_].insert_or_assign(key, &obj); + } + + return PError_ok; +} + void PStore::SetExpire(const PString& key, uint64_t when) const { expiredDBs_[dbno_].SetExpire(key, when); } int64_t PStore::TTL(const PString& key, uint64_t now) { return expiredDBs_[dbno_].TTL(key, now); } diff --git a/src/store.h b/src/store.h index b16cb4cc3..fc2c8f9d4 100644 --- a/src/store.h +++ b/src/store.h @@ -121,6 +121,7 @@ class PStore { PObject* SetValue(const PString& key, PObject&& value); // incr PError Incrby(const PString& key, int64_t value, int64_t* ret); + PError Incrbyfloat(const PString& key, std::string value, std::string* ret); // for expire key enum ExpireResult : std::int8_t {