diff --git a/BUILD.bazel b/BUILD.bazel index ad73528fb7cbc2aa801203a9cfb8bdca5658c48b..ad9d6fa8a0612c42f40af3bab8572cdc84ba6bf8 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -94,6 +94,7 @@ cc_library( "@msgpack", "@nlohmann_json", "@securec", + "@com_googlesource_code_re2//:re2", ], alwayslink = True, ) diff --git a/api/cpp/src/local_state_store.cpp b/api/cpp/src/local_state_store.cpp index 905c25503b2f3784e052f8c961e371f72118ba46..2f77bbe46cdef808280b90ff5fef8db146c5aecd 100644 --- a/api/cpp/src/local_state_store.cpp +++ b/api/cpp/src/local_state_store.cpp @@ -16,7 +16,7 @@ */ #include "yr/api/local_state_store.h" #include -#include +#include "re2/re2.h" #include "parallel_for/complier.h" #include "src/libruntime/err_type.h" #include "src/libruntime/statestore/state_store.h" @@ -33,7 +33,7 @@ const int MIN_CHECK_INTERVAL_MS = 200; const int MAX_CHECK_INTERVAL_MS = 1000; const int GET_RETRY_MAX_TIME = 5; const int MAX_MSET_SIZE = 8; -const std::regex KEY_REGEX("^[a-zA-Z0-9\\~\\.\\-\\/_!@#%\\^\\&\\*\\(\\)\\+\\=\\:;]*$"); +const re2::RE2 KEY_REGEX("^[a-zA-Z0-9\\~\\.\\-\\/_!@#%\\^\\&\\*\\(\\)\\+\\=\\:;]*$"); LocalStateStore::LocalStateStore() {} @@ -51,10 +51,9 @@ void LocalStateStore::Write(const std::string &key, std::shared_ptr #endif #include "src/dto/config.h" - +#include "src/utility/string_utility.h" namespace YR { namespace Libruntime { @@ -65,18 +65,16 @@ void ClusterAccessInfo::ParseFromMasterInfo(const std::string &masterInfoPath) std::map kvMap; std::map> kvsMap; - std::regex regex("[,:]"); - std::sregex_token_iterator iter(masterInfo.begin(), masterInfo.end(), regex, -1); - std::sregex_token_iterator end; - - while (iter != end) { - std::string key = *iter; - iter++; - if (iter == end) { + std::string pattern = "[,:]"; + auto result = YR::utility::SplitToStr(masterInfo, pattern); + for (size_t i = 0; i < result.size();) { + std::string key = result[i]; + ++i; + if (i >= result.size()) { break; } - std::string value = *iter; - iter++; + std::string value = result[i]; + ++i; if (kvMap.find(key) != kvMap.end()) { if (kvsMap.find(key) == kvsMap.end()) { kvsMap[key].push_back(kvMap[key]); @@ -187,13 +185,10 @@ void ClusterAccessInfo::ParseDsAddr() std::pair ClusterAccessInfo::ParseURLWithProtocol(const std::string &url) { - std::regex urlPattern(R"((^[a-zA-Z]+://)?(.*))"); - std::smatch matches; - - if (std::regex_match(url, matches, urlPattern)) { - std::string protocol = matches[1].str(); - std::string remainder = matches[2].str(); - + re2::RE2 urlPattern(R"((^[a-zA-Z]+://)?(.*))"); + std::string protocol; + std::string remainder; + if (RE2::PartialMatch(url, urlPattern, &protocol, &remainder)) { if (protocol.empty()) { return {"", remainder}; } else { @@ -315,14 +310,14 @@ ClusterAccessInfo AutoCreateYuanRongCluster(std::vector &args) bool IsValidIPPort(const std::string &input) { - std::regex pattern(R"((\d{1,3}\.){3}\d{1,3}:\d{1,5})"); - return std::regex_match(input, pattern); + re2::RE2 pattern(R"((\d{1,3}\.){3}\d{1,3}:\d{1,5})"); + return RE2::FullMatch(input, pattern); } bool IsURLHasProtocalPrefix(const std::string &input) { - std::regex pattern(R"((http|https|grpc)://([a-zA-Z0-9.-]+|\d{1,3}(\.\d{1,3}){3}):\d{1,5})"); - return std::regex_match(input, pattern); + re2::RE2 pattern(R"((http|https|grpc)://([a-zA-Z0-9.-]+|\d{1,3}(\.\d{1,3}){3}):\d{1,5})"); + return RE2::FullMatch(input, pattern); } bool NeedToBeParsed(ClusterAccessInfo info) diff --git a/src/libruntime/auto_init.h b/src/libruntime/auto_init.h index 96a6e6c3f5aa1baeed8254bf49872c976bbe163e..4cf8e3cc4c6815c8de80d8d92890070dd0d72d79 100644 --- a/src/libruntime/auto_init.h +++ b/src/libruntime/auto_init.h @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/src/libruntime/clientsmanager/clients_manager.cpp b/src/libruntime/clientsmanager/clients_manager.cpp index ff7a48ad693cd7a61019ecf395fafd6ec44c8617..abd39a263956ac6023124a582b709223d92ffb68 100644 --- a/src/libruntime/clientsmanager/clients_manager.cpp +++ b/src/libruntime/clientsmanager/clients_manager.cpp @@ -23,7 +23,7 @@ std::pair, ErrorInfo> ClientsManager::GetFsConn(c { auto addr = GetIpAddr(ip, port); YRLOG_DEBUG("grpc client target is {}", addr); - if (!std::regex_match(addr, std::regex(IP_PORT_REGEX))) { + if (!RE2::FullMatch(addr, re2::RE2(IP_PORT_REGEX))) { YRLOG_ERROR("failed to get valid runtime-rpc server address({})", addr); return std::make_pair(nullptr, ErrorInfo(ErrorCode::ERR_CONNECTION_FAILED, "The server address is invalid.")); } diff --git a/src/libruntime/clientsmanager/clients_manager.h b/src/libruntime/clientsmanager/clients_manager.h index 43e25cba743939640e3aa7bbc09c26d02c79237e..53707fdc16afe6b50a1afe4ce16506351aa6ac2d 100644 --- a/src/libruntime/clientsmanager/clients_manager.h +++ b/src/libruntime/clientsmanager/clients_manager.h @@ -17,7 +17,6 @@ #pragma once #include -#include #include "src/libruntime/err_type.h" #include "src/libruntime/gwclient/http/client_manager.h" diff --git a/src/libruntime/invoke_spec.cpp b/src/libruntime/invoke_spec.cpp index ef472eb3a67ae5993c528b583c4557e6bf35f887..b407c20cb6c6fa6fe5342933a4a2a592ab9d5dd1 100644 --- a/src/libruntime/invoke_spec.cpp +++ b/src/libruntime/invoke_spec.cpp @@ -15,7 +15,6 @@ */ #include "src/libruntime/invoke_spec.h" -#include namespace YR { namespace Libruntime { const std::string LOW_RELIABILITY_TYPE = "low"; diff --git a/src/libruntime/libruntime.cpp b/src/libruntime/libruntime.cpp index 6826920511fda4a4059e851cced574c6e5c771bf..600191f6528cbd190e01c58dbeb5475214a0a93b 100755 --- a/src/libruntime/libruntime.cpp +++ b/src/libruntime/libruntime.cpp @@ -15,7 +15,7 @@ */ #include - +#include "re2/re2.h" #include "invoke_order_manager.h" #include "src/dto/config.h" #include "src/dto/data_object.h" @@ -42,8 +42,8 @@ const std::string ACTOR_INSTANCE_TYPE = "actor"; const char *DEFAULT_DELEGATE_DIRECTORY_QUOTA = "512"; // 512MB const int MAX_DELEGATE_DIRECTORY_QUOTA = 1024 * 1024; // 1TB const std::string QUOTA_NO_LIMIT = "-1"; -const std::regex POD_LABELS_KEY_REGEX("^[a-zA-Z0-9]([-a-zA-Z0-9]{0,61}[a-zA-Z0-9])?$"); -const std::regex POD_LABELS_VALUE_REGEX("^[a-zA-Z0-9]([-a-zA-Z0-9]{0,61}[a-zA-Z0-9])?$|^$"); +const re2::RE2 POD_LABELS_KEY_REGEX("^[a-zA-Z0-9]([-a-zA-Z0-9]{0,61}[a-zA-Z0-9])?$"); +const re2::RE2 POD_LABELS_VALUE_REGEX("^[a-zA-Z0-9]([-a-zA-Z0-9]{0,61}[a-zA-Z0-9])?$|^$"); const std::string DISPATCHER = "dis"; const size_t NUM_DISPATCHER = 2; @@ -126,12 +126,12 @@ ErrorInfo Libruntime::CheckSpec(std::shared_ptr spec) "The number of pod labels is invalid, please set the pod labels less than and equal to 5"); } for (auto &iter : spec->opts.podLabels) { - if (!std::regex_match(iter.first, POD_LABELS_KEY_REGEX)) { + if (!RE2::FullMatch(iter.first, POD_LABELS_KEY_REGEX)) { return ErrorInfo(YR::Libruntime::ErrorCode::ERR_PARAM_INVALID, YR::Libruntime::ModuleCode::RUNTIME, "The pod label key is invalid, please set the pod label key with letters, digits and '-' " "which cannot start or end with '-' and cannot exceed 63 characters."); } - if (!std::regex_match(iter.second, POD_LABELS_VALUE_REGEX)) { + if (!RE2::FullMatch(iter.second, POD_LABELS_VALUE_REGEX)) { return ErrorInfo( YR::Libruntime::ErrorCode::ERR_PARAM_INVALID, YR::Libruntime::ModuleCode::RUNTIME, "The pod label value is invalid, please set the pod label value with letters, digits and '-' which " @@ -141,8 +141,8 @@ ErrorInfo Libruntime::CheckSpec(std::shared_ptr spec) } if (spec->opts.customExtensions.find(DELEGATE_DIRECTORY_QUOTA) != spec->opts.customExtensions.end()) { auto quota = spec->opts.customExtensions[DELEGATE_DIRECTORY_QUOTA]; - std::regex pattern(R"(^[0-9]+$)"); - if (quota != QUOTA_NO_LIMIT && !std::regex_match(quota, pattern)) { + re2::RE2 pattern(R"(^[0-9]+$)"); + if (quota != QUOTA_NO_LIMIT && !RE2::FullMatch(quota, pattern)) { return ErrorInfo(YR::Libruntime::ErrorCode::ERR_PARAM_INVALID, YR::Libruntime::ModuleCode::RUNTIME, "The DELEGATE_DIRECTORY_QUOTA value: {" + quota + "} is invalid, not composed of numbers"); } diff --git a/src/libruntime/libruntime.h b/src/libruntime/libruntime.h index 9d38e541029f75b726138ec1a8f02bd2223d9b8e..f942e8de2a4fd1a6af99c6677ec35d772fda1baf 100644 --- a/src/libruntime/libruntime.h +++ b/src/libruntime/libruntime.h @@ -17,7 +17,6 @@ #pragma once #include -#include #include #include diff --git a/src/utility/BUILD.bazel b/src/utility/BUILD.bazel index f8f9b99dc98d1f18344a759bb2aa7d40659e5b72..a1ffec5361b8aee8de60694f5a6ded3c30ed0c43 100644 --- a/src/utility/BUILD.bazel +++ b/src/utility/BUILD.bazel @@ -25,6 +25,7 @@ cc_library( "@com_google_absl//absl/random:random", "@com_google_absl//absl/synchronization:synchronization", "@securec", + "@com_googlesource_code_re2//:re2", ], ) @@ -45,5 +46,6 @@ cc_library( "@com_google_absl//absl/random:random", "@com_google_absl//absl/synchronization:synchronization", "@securec", + "@com_googlesource_code_re2//:re2", ], ) diff --git a/src/utility/string_utility.h b/src/utility/string_utility.h index e0faf8d2be33484673a1ad8da40900b9d7023620..1d467a7adb8437e05144e086079b0265447c002f 100644 --- a/src/utility/string_utility.h +++ b/src/utility/string_utility.h @@ -21,7 +21,7 @@ #include #include - +#include "re2/re2.h" namespace YR { namespace utility { inline void Split(const std::string &source, std::vector &result, const char sep) @@ -81,5 +81,30 @@ inline std::string DecodedToString(const std::string &inStr) boost::beast::detail::base64::decode(data, inStr.data(), inStr.size()); return reinterpret_cast(data); } + +inline std::vector SplitToStr(const std::string &info, const std::string &pattern) +{ + if (info.empty()) { + return {}; + } + re2::StringPiece text(info); + re2::RE2 re2Pattern(pattern); + std::vector result; + size_t lastPos = 0; + + re2::StringPiece match; + while (re2Pattern.Match(text, lastPos, text.size(), RE2::UNANCHORED, &match, 1)) { + uint64_t splitIndex = match.data() - text.data(); + if (match.data() - (text.data() + lastPos) > 0) { + result.push_back(std::string(text.data() + lastPos, match.data() - (text.data() + lastPos))); + } + lastPos = splitIndex + match.size(); + } + + if (lastPos < text.size()) { + result.push_back(std::string(text.data() + lastPos)); + } + return result; +} } // namespace utility } // namespace YR \ No newline at end of file diff --git a/test/libruntime/auto_init_test.cpp b/test/libruntime/auto_init_test.cpp index 52fcd6e3a7cbf5bf28d7aa5a01ef9e3d48177e2a..cb983dcdb226597bc5e29dcb867ac5b24609e51a 100644 --- a/test/libruntime/auto_init_test.cpp +++ b/test/libruntime/auto_init_test.cpp @@ -84,3 +84,13 @@ TEST_F(AutoInitTest, AutoInitWithClusterAccessInfo) ASSERT_EQ(info2.dsAddr, "127.0.0.1:31499"); ASSERT_EQ(info2.inCluster, true); } + +TEST_F(AutoInitTest, ParseFromMasterInfo) +{ + MakeMasterInfoFile(YR::Libruntime::kDefaultDeployPathCurrMasterInfo, masterInfoString); + YR::Libruntime::ClusterAccessInfo info; + info.ParseFromMasterInfo(); + ASSERT_EQ(info.serverAddr, "10.90.42.75:34834"); + ASSERT_EQ(info.dsAddr, "10.90.42.75:31499"); + ASSERT_EQ(info.inCluster, true); +} \ No newline at end of file diff --git a/tools/download_dependency.sh b/tools/download_dependency.sh index 71044b36b96ea83e6ed335f93634e1d6b14999c8..0daff57025b767d2e449bd10326124d1885b8f59 100644 --- a/tools/download_dependency.sh +++ b/tools/download_dependency.sh @@ -37,7 +37,9 @@ YR_METRICS_BIN_DIR="${RUNTIME_SRC_DIR}/../metrics" THIRD_PARTY_DIR="${RUNTIME_SRC_DIR}/../thirdparty/" MODULES="runtime" bash -x ${BASE_DIR}/download_opensource.sh -M $MODULES -T $THIRD_PARTY_DIR - +RUNTIME_THIRD_PARTY_CACHE=${RUNTIME_THIRD_PARTY_CACHE:-"https://build-logs.openeuler.openatom.cn:38080/temp-archived/openeuler/openYuanrong/runtime_deps/"} +DATA_SYSTEM_CACHE=${DATA_SYSTEM_CACHE:-"https://build-logs.openeuler.openatom.cn:38080/temp-archived/openeuler/openYuanrong/yr_cache/$(uname -m)/yr-datasystem-v0.5.0.tar.gz"} +FUNCTION_SYSTEM_CACHE=${FUNCTION_SYSTEM_CACHE:-"https://build-logs.openeuler.openatom.cn:38080/temp-archived/openeuler/openYuanrong/yr_cache/$(uname -m)/yr-functionsystem-v0.5.0.tar.gz"} function check_datasystem() { # check whether datasystem exist if [ ! -d "${YR_DATASYSTEM_BIN_DIR}"/datasystem/output/sdk/cpp/include ]; then diff --git a/tools/download_opensource.sh b/tools/download_opensource.sh index a8cb80db6a9b02938847467b826ed0dc20bac9a8..44a03e24845e7e3e8298facb9c5a6a391fe599bc 100644 --- a/tools/download_opensource.sh +++ b/tools/download_opensource.sh @@ -22,7 +22,7 @@ MODULES="all" DOWNLOAD_TEST_THIRDPARTY="ON" LOCAL_OS=$(head -1 /etc/os-release | tail -1 | awk -F "\"" '{print $2}')_$(uname -m) - +THIRD_PARTY_CACHE=${THIRD_PARTY_CACHE:-"https://build-logs.openeuler.openatom.cn:38080/temp-archived/openeuler/openYuanrong/deps/"} echo -e "local os is $LOCAL_OS"