diff --git a/static_core/irtoc/backend/compiler/codegen_fastpath.cpp b/static_core/irtoc/backend/compiler/codegen_fastpath.cpp index 8a5cbc5432ba0156a9053612e44b041de91e7b0d..ef9ea20523143e75d7eeec8f0ddcbd7297b8f0a7 100644 --- a/static_core/irtoc/backend/compiler/codegen_fastpath.cpp +++ b/static_core/irtoc/backend/compiler/codegen_fastpath.cpp @@ -322,10 +322,6 @@ void CodegenFastPath::EmitWriteTlabStatsSafeIntrinsic([[maybe_unused]] Intrinsic auto src1 = src[FIRST_OPERAND]; auto src2 = src[SECOND_OPERAND]; - auto tmp = src[THIRD_OPERAND]; - - ASSERT(tmp.IsValid()); - ASSERT(tmp != GetRegfile()->GetZeroReg()); auto regs = GetCallerRegsMask(GetArch(), false) | GetCalleeRegsMask(GetArch(), false); auto vregs = GetCallerRegsMask(GetArch(), true); @@ -335,6 +331,9 @@ void CodegenFastPath::EmitWriteTlabStatsSafeIntrinsic([[maybe_unused]] Intrinsic auto id = RuntimeInterface::EntrypointId::WRITE_TLAB_STATS_NO_BRIDGE; MemRef entry(ThreadReg(), GetRuntime()->GetEntrypointTlsOffset(GetArch(), id)); + constexpr size_t NUM_OF_ARGS = 2; + // Temp registers are not available because they are busy in irtoc tlab allocation. So it is faked with param reg. + auto tmp = GetEncoder()->GetTarget().GetParamReg(NUM_OF_ARGS); GetEncoder()->EncodeLdr(tmp, false, entry); GetEncoder()->MakeCall(tmp); diff --git a/static_core/irtoc/scripts/common.irt b/static_core/irtoc/scripts/common.irt index 3fc4376ef978302e5034efec78b1e4243f04f5a2..afc4655a8cc6671a3bb8c09a7f8f7f56670be246 100644 --- a/static_core/irtoc/scripts/common.irt +++ b/static_core/irtoc/scripts/common.irt @@ -127,6 +127,7 @@ module Constants THREAD_FRAME_OFFSET = "cross_values::GetManagedThreadFrameOffset(GetArch())" THREAD_EXCEPTION_OFFSET = "cross_values::GetManagedThreadExceptionOffset(GetArch())" THREAD_INTERPRETER_CACHE_OFFSET = "cross_values::GetManagedThreadInterpreterCacheOffset(GetArch())" + THREAD_FLATTENED_STRING_CACHE_OFFSET = "cross_values::GetManagedThreadFlattenedStringCacheOffset(GetArch())" THREAD_FLAG_OFFSET = "cross_values::GetManagedThreadFlagOffset(GetArch())" THREAD_VM_OFFSET = "cross_values::GetThreadVmOffset(GetArch())" MARK_WORD_OFFSET = "cross_values::GetObjectHeaderMarkWordOffset(GetArch())" @@ -224,8 +225,10 @@ macro(:call_runtime) { |e, *args| } macro(:call_runtime_save_all) { |e, *args| + # Load entry before save registers because stack SpillFill is possible + entry := LoadI(%tr).Imm(e).ptr Intrinsic(:SAVE_REGISTERS_EP).void - ret = call_runtime(e, *args) + ret := CallIndirect(entry, *args) Intrinsic(:RESTORE_REGISTERS_EP).void ret } diff --git a/static_core/irtoc/scripts/string_helpers.irt b/static_core/irtoc/scripts/string_helpers.irt index c3663a76637db848a462797cbaff5d440865ef73..ce75448a5752a7eef1156a2c6825e44fd27b20e8 100644 --- a/static_core/irtoc/scripts/string_helpers.irt +++ b/static_core/irtoc/scripts/string_helpers.irt @@ -39,6 +39,41 @@ macro(:check_string_type) do |str| } end +module StringFlatteningCacheConstants + ADDRESS_SHIFT = "cross_values::GetStringFlatteningCacheAddressShift(GetArch())" + ADDRESS_MASK = "cross_values::GetStringFlatteningCacheAddressMask(GetArch())" + ENTRY_SIZE = "cross_values::GetStringFlatteningCacheEntrySize(GetArch())" + VALUE_OFFSET = "cross_values::GetStringFlatteningCacheValueOffset(GetArch())" +end + +macro(:try_use_cached_flat_str) do |str| + baseClass := LoadI(str).ref + stringType := LoadI(baseClass).Imm(Constants::STRING_TYPE_OFFSET).u64 + If(stringType, Constants::STRING_TYPE_SLICE).EQ { + Goto(:SlowPathEntrypoint) + } + If(stringType, Constants::STRING_TYPE_TREE).EQ { + stringCache := LoadI(%tr).Imm(Constants::THREAD_FLATTENED_STRING_CACHE_OFFSET).ref + If(stringCache, 0).EQ { + Goto(:SlowPathEntrypoint) + } + + # cannot cast directly from ref to uint + strPtr := Cast(str).ptr + strUint := Bitcast(strPtr).word + strShifted := ShrI(strUint).Imm(StringFlatteningCacheConstants::ADDRESS_SHIFT).word + strMasked := AndI(strShifted).Imm(StringFlatteningCacheConstants::ADDRESS_MASK).word + strIndex := Mul(strMasked, StringFlatteningCacheConstants::ENTRY_SIZE).word + key := LoadArray(stringCache, strIndex).ref + If(key, str).NE { + Goto(:SlowPathEntrypoint) + } + strIndex := AddI(strIndex).Imm(StringFlatteningCacheConstants::VALUE_OFFSET).ref_uint + strCached := LoadArray(stringCache, strIndex).ref + } + result := Phi(str, strCached).ref +end + def GenerateStringEquals(lang, dynamic, compression, cgmode = :FastPath) suffix = (compression ? 'Compressed' : '') + (cgmode == :NativePlus ? 'NativePlus': '') length_shift = Constants::STRING_LENGTH_SHIFT @@ -111,7 +146,7 @@ def GenerateStringEquals(lang, dynamic, compression, cgmode = :FastPath) RegMask.new($full_regmap, :arg0, :arg1, :tmp0, :tmp1, :callee0, :caller1) function("#{lang}StringEquals#{suffix}".to_sym, - params: {str1: 'ref', str2: 'ref'}, + params: {str1_orig: 'ref', str2_orig: 'ref'}, regmap: $full_regmap, regalloc_set: reg_mask, mode: mode, @@ -122,15 +157,15 @@ def GenerateStringEquals(lang, dynamic, compression, cgmode = :FastPath) next end unless dynamic - If(str2, 0).EQ.Unlikely.b { + If(str2_orig, 0).EQ.Unlikely.b { Goto(:NotEqual) } end - If(str1, str2).EQ.Unlikely.b { + If(str1_orig, str2_orig).EQ.Unlikely.b { Return(1).b } - check_string_type(str1) - check_string_type(str2) + str1 := try_use_cached_flat_str(str1_orig) + str2 := try_use_cached_flat_str(str2_orig) if dynamic length1 := LoadI(str1).Imm(Constants::STRING_LENGTH_OFFSET).u32 length2 := LoadI(str2).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -210,10 +245,10 @@ def GenerateStringEquals(lang, dynamic, compression, cgmode = :FastPath) Return(0).b Label(:SlowPathEntrypoint) if cgmode == :NativePlus - Return(Call(str1, str2).Method("CoreStringEquals").b).b + Return(Call(str1_orig, str2_orig).Method("CoreStringEquals").b).b else entrypoint = get_entrypoint_offset("STRING_EQUALS_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str1, str2).AddImm(entrypoint).MethodAsImm("StringEqualsUsualBridge").Terminator.b + Intrinsic(:SLOW_PATH_ENTRY, str1_orig, str2_orig).AddImm(entrypoint).MethodAsImm("StringEqualsUsualBridge").Terminator.b Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG end } @@ -880,7 +915,7 @@ def GenerateSubstringFromStringTlab(string_compression_enabled) suffix = (string_compression_enabled ? "Compressed" : "") available_regs = $panda_mask function("SubStringFromStringTlab#{suffix}".to_sym, - params: {str: 'ref', begin_index: 'i32', end_index: 'i32'}, + params: {str_orig: 'ref', begin_index: 'i32', end_index: 'i32'}, regmap: $full_regmap, regalloc_set: available_regs, mode: [:FastPath]) { @@ -891,7 +926,7 @@ def GenerateSubstringFromStringTlab(string_compression_enabled) next end - check_string_type(str) + str := try_use_cached_flat_str(str_orig) # Note, 'str' is checked against nullptr in the InstBuilder (see AddArgNullcheckIfNeeded) length_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -993,7 +1028,7 @@ def GenerateSubstringFromStringTlab(string_compression_enabled) Label(:SlowPathEntrypoint) entrypoint = get_entrypoint_offset("SUB_STRING_FROM_STRING_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str, begin_index, end_index).AddImm(entrypoint).MethodAsImm("SubStringFromStringOddSavedBridge").Terminator.ptr + Intrinsic(:SLOW_PATH_ENTRY, str_orig, begin_index, end_index).AddImm(entrypoint).MethodAsImm("SubStringFromStringOddSavedBridge").Terminator.ptr Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } end # def GenerateSubstringFromStringTlab @@ -1003,7 +1038,7 @@ def GenerateStringGetCharsTlab(string_compression_enabled) suffix = (string_compression_enabled ? "Compressed" : "") available_regs = $panda_mask function("StringGetCharsTlab#{suffix}".to_sym, - params: {str: 'ref', begin_index: 'i32', end_index: 'i32', array_klass: 'ref'}, + params: {str_orig: 'ref', begin_index: 'i32', end_index: 'i32', array_klass: 'ref'}, regmap: $full_regmap, regalloc_set: available_regs, mode: [:FastPath]) { @@ -1014,7 +1049,7 @@ def GenerateStringGetCharsTlab(string_compression_enabled) next end - check_string_type(str) + str := try_use_cached_flat_str(str_orig) If(begin_index, end_index).GT.Unlikely.b { Goto(:SlowPathEntrypoint) # Out of range @@ -1058,7 +1093,7 @@ def GenerateStringGetCharsTlab(string_compression_enabled) Label(:SlowPathEntrypoint) entrypoint = get_entrypoint_offset("STRING_GET_CHARS_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str, begin_index, end_index).AddImm(entrypoint).MethodAsImm("StringGetChars4ArgBridge").Terminator.ptr + Intrinsic(:SLOW_PATH_ENTRY, str_orig, begin_index, end_index).AddImm(entrypoint).MethodAsImm("StringGetChars4ArgBridge").Terminator.ptr Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } end # def GenerateStringGetCharsTlab diff --git a/static_core/irtoc/scripts/strings.irt b/static_core/irtoc/scripts/strings.irt index c0c53eb8ee63515ec0436dbf0f0c2b0839df6a86..0d7313187ed45c40859f9015d086ca040ae11a3f 100644 --- a/static_core/irtoc/scripts/strings.irt +++ b/static_core/irtoc/scripts/strings.irt @@ -35,7 +35,7 @@ GenerateStringHashCode(string_compression_enabled=false, :FastPath) available_regs = $panda_mask function(:StringConcat2Tlab, - params: {str1: 'ref', str2: 'ref'}, + params: {str1_orig: 'ref', str2_orig: 'ref'}, regmap: $full_regmap, regalloc_set: available_regs, mode: [:FastPath]) { @@ -45,8 +45,8 @@ function(:StringConcat2Tlab, next end - check_string_type(str1) - check_string_type(str2) + str1 := try_use_cached_flat_str(str1_orig) + str2 := try_use_cached_flat_str(str2_orig) klass := load_class(str1) length1 := LoadI(str1).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -105,13 +105,13 @@ function(:StringConcat2Tlab, Label(:SlowPathEntrypoint) ep_offset = get_entrypoint_offset("STRING_CONCAT2_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str1, str2).AddImm(ep_offset).MethodAsImm("StringConcat2UsualBridge").Terminator.ptr + Intrinsic(:SLOW_PATH_ENTRY, str1_orig, str2_orig).AddImm(ep_offset).MethodAsImm("StringConcat2UsualBridge").Terminator.ptr Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } available_regs = $panda_mask function(:StringConcat3Tlab, - params: {str1: 'ref', str2: 'ref', str3: 'ref'}, + params: {str1_orig: 'ref', str2_orig: 'ref', str3_orig: 'ref'}, regmap: $full_regmap, regalloc_set: available_regs, mode: [:FastPath]) { @@ -121,9 +121,9 @@ function(:StringConcat3Tlab, next end - check_string_type(str1) - check_string_type(str2) - check_string_type(str3) + str1 := try_use_cached_flat_str(str1_orig) + str2 := try_use_cached_flat_str(str2_orig) + str3 := try_use_cached_flat_str(str3_orig) klass := load_class(str1) length1 := LoadI(str1).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -199,13 +199,13 @@ Label(:EndCopy) Label(:SlowPathEntrypoint) ep_offset = get_entrypoint_offset("STRING_CONCAT3_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str1, str2, str3).AddImm(ep_offset).MethodAsImm("StringConcat3OddSavedBridge").Terminator.ptr + Intrinsic(:SLOW_PATH_ENTRY, str1_orig, str2_orig, str3_orig).AddImm(ep_offset).MethodAsImm("StringConcat3OddSavedBridge").Terminator.ptr Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } available_regs = $panda_mask function(:StringConcat4Tlab, - params: {str1: 'ref', str2: 'ref', str3: 'ref', str4: 'ref'}, + params: {str1_orig: 'ref', str2_orig: 'ref', str3_orig: 'ref', str4_orig: 'ref'}, regmap: $full_regmap, regalloc_set: available_regs, mode: [:FastPath]) { @@ -214,10 +214,10 @@ function(:StringConcat4Tlab, next end - check_string_type(str1) - check_string_type(str2) - check_string_type(str3) - check_string_type(str4) + str1 := try_use_cached_flat_str(str1_orig) + str2 := try_use_cached_flat_str(str2_orig) + str3 := try_use_cached_flat_str(str3_orig) + str4 := try_use_cached_flat_str(str4_orig) klass := load_class(str1) length1 := LoadI(str1).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -312,14 +312,14 @@ Label(:EndCopy) Label(:SlowPathEntrypoint) ep_offset = get_entrypoint_offset("STRING_CONCAT4_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str1, str2, str3, str4).AddImm(ep_offset).MethodAsImm("StringConcat4UsualBridge").Terminator.ptr + Intrinsic(:SLOW_PATH_ENTRY, str1_orig, str2_orig, str3_orig, str4_orig).AddImm(ep_offset).MethodAsImm("StringConcat4UsualBridge").Terminator.ptr Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } def GenerateStringCompareTo(cgmode) suffix = (cgmode == :NativePlus ? 'NativePlus': '') function("StringCompareTo#{suffix}", - params: {str1: 'ref', str2: 'ref'}, + params: {str1_orig: 'ref', str2_orig: 'ref'}, regmap: $full_regmap, regalloc_set: $panda_mask, mode: [cgmode]) { @@ -330,17 +330,17 @@ def GenerateStringCompareTo(cgmode) next end - check_string_type(str1) - check_string_type(str2) + str1 := try_use_cached_flat_str(str1_orig) + str2 := try_use_cached_flat_str(str2_orig) Return(macroStringCompareTo(str1, str2)).i32 Label(:SlowPathEntrypoint) if cgmode == :NativePlus - Return(Call(str1, str2).Method('CoreStringCompareTo').i32).i32 + Return(Call(str1_orig, str2_orig).Method('CoreStringCompareTo').i32).i32 else entrypoint = get_entrypoint_offset("STRING_COMPARE_TO_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str1, str2).AddImm(entrypoint).MethodAsImm("StringCompareToUsualBridge").Terminator.i32 + Intrinsic(:SLOW_PATH_ENTRY, str1_orig, str2_orig).AddImm(entrypoint).MethodAsImm("StringCompareToUsualBridge").Terminator.i32 Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG end } diff --git a/static_core/plugins/ets/irtoc_scripts/string.irt b/static_core/plugins/ets/irtoc_scripts/string.irt index 1b6e316c5208562322f94389f84da6b741111b80..94c8ce0e8a10352c7f0404f19ee8bd13b5514e27 100644 --- a/static_core/plugins/ets/irtoc_scripts/string.irt +++ b/static_core/plugins/ets/irtoc_scripts/string.irt @@ -247,15 +247,6 @@ Label(:SlowPathEntrypoint) } -if Options.arch == :arm64 - trim_left_regs = $temps_mask + :callee0 + :callee1 - trim_right_regs = $temps_mask + :callee0 + :callee1 -else - trim_left_regs = $temps_mask + :callee0 + :caller0 + :caller1 - trim_right_regs = $temps_mask + :callee0 + :caller0 + :caller1 -end - - function(:StringTrimLeftBase, params: {str: 'ref', unused1: 'i32', unused2: 'i32'}, regmap: $full_regmap, @@ -310,9 +301,9 @@ Label(:SlowPathEntrypoint) function(:StringTrimLeft, - params: {str: 'ref', unused1: 'i32', unused2: 'i32'}, + params: {str_orig: 'ref', unused1: 'i32', unused2: 'i32'}, regmap: $full_regmap, - regalloc_set: $trim_left_regs, + regalloc_set: $panda_mask, mode: [:FastPath]) { if Options.arch == :arm32 @@ -321,7 +312,7 @@ function(:StringTrimLeft, next end - check_string_type(str) + str := try_use_cached_flat_str(str_orig) length_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 If(length_packed, 1).LE.Unlikely.b { @@ -351,7 +342,7 @@ Label(:L1) Intrinsic(:TAIL_CALL).AddImm(entrypoint2).MethodAsImm("StringTrimLeftBase").Terminator.ptr Label(:SlowPathEntrypoint) entrypoint = get_entrypoint_offset("STRING_TRIM_LEFT_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str).AddImm(entrypoint).MethodAsImm("StringTrimLeft3ArgBridge").Terminator.ptr + Intrinsic(:SLOW_PATH_ENTRY, str_orig).AddImm(entrypoint).MethodAsImm("StringTrimLeft3ArgBridge").Terminator.ptr Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } @@ -411,9 +402,9 @@ Label(:SlowPathEntrypoint) function(:StringTrimRight, - params: {str: 'ref', unused1: 'i32', unused2: 'i32'}, + params: {str_orig: 'ref', unused1: 'i32', unused2: 'i32'}, regmap: $full_regmap, - regalloc_set: $trim_right_regs, + regalloc_set: $panda_mask, mode: [:FastPath]) { if Options.arch == :arm32 @@ -422,7 +413,7 @@ function(:StringTrimRight, next end - check_string_type(str) + str := try_use_cached_flat_str(str_orig) length_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 If(length_packed, 1).LE.Unlikely.b { @@ -454,7 +445,7 @@ Label(:L1) Intrinsic(:TAIL_CALL).AddImm(entrypoint2).MethodAsImm("StringTrimRightBase").Terminator.ptr Label(:SlowPathEntrypoint) entrypoint = get_entrypoint_offset("STRING_TRIM_RIGHT_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str).AddImm(entrypoint).MethodAsImm("StringTrimRight3ArgBridge").Terminator.ptr + Intrinsic(:SLOW_PATH_ENTRY, str_orig).AddImm(entrypoint).MethodAsImm("StringTrimRight3ArgBridge").Terminator.ptr Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } @@ -531,7 +522,7 @@ Label(:SlowPathEntrypoint) function(:StringTrim, - params: {str: 'ref', unused1: 'i32', unused2: 'i32'}, + params: {str_orig: 'ref', unused1: 'i32', unused2: 'i32'}, regmap: $full_regmap, regalloc_set: $panda_mask, mode: [:FastPath]) { @@ -542,7 +533,7 @@ function(:StringTrim, next end - check_string_type(str) + str := try_use_cached_flat_str(str_orig) length_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 # length == 0 @@ -606,7 +597,7 @@ Label(:FirstCharWhitespace) Intrinsic(:TAIL_CALL).AddImm(entrypoint3).MethodAsImm("StringTrimLeftBase").Terminator.ptr Label(:SlowPathEntrypoint) entrypoint = get_entrypoint_offset("STRING_TRIM_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str).AddImm(entrypoint).MethodAsImm("StringTrim3ArgBridge").Terminator.ptr + Intrinsic(:SLOW_PATH_ENTRY, str_orig).AddImm(entrypoint).MethodAsImm("StringTrim3ArgBridge").Terminator.ptr Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } @@ -667,7 +658,7 @@ def GenerateStringStartsWith(cgmode) suffix = (cgmode == :NativePlus ? 'NativePlus': '') upsuffix = (cgmode == :NativePlus ? '_NATIVE_PLUS': '') function("StringStartsWith#{suffix}".to_sym, - params: {str: 'ref', pfx: 'ref', from_index: 'i32'}, + params: {str_orig: 'ref', pfx_orig: 'ref', from_index: 'i32'}, regmap: $full_regmap, regalloc_set: $panda_mask, mode: [cgmode]) { @@ -678,8 +669,8 @@ def GenerateStringStartsWith(cgmode) next end - check_string_type(str) - check_string_type(pfx) + str := try_use_cached_flat_str(str_orig) + pfx := try_use_cached_flat_str(pfx_orig) pfx_len_packed := LoadI(pfx).Imm(Constants::STRING_LENGTH_OFFSET).u32 # Return 'true' if prefix is empty @@ -724,10 +715,10 @@ def GenerateStringStartsWith(cgmode) Label(:SlowPathEntrypoint) if cgmode == :NativePlus - Return(Call(str, pfx, from_index).Method("StdCoreStringStartsWith").b).b + Return(Call(str_orig, pfx_orig, from_index).Method("StdCoreStringStartsWith").b).b else entrypoint = get_entrypoint_offset("STRING_STARTS_WITH_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str, pfx, from_index).AddImm(entrypoint).MethodAsImm("StringStartsWithOddSavedBridge").Terminator.b + Intrinsic(:SLOW_PATH_ENTRY, str_orig, pfx_orig, from_index).AddImm(entrypoint).MethodAsImm("StringStartsWithOddSavedBridge").Terminator.b Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG end } @@ -782,7 +773,7 @@ def GenerateStringEndsWith(cgmode) suffix = (cgmode == :NativePlus ? 'NativePlus': '') upsuffix = (cgmode == :NativePlus ? '_NATIVE_PLUS': '') function("StringEndsWith#{suffix}".to_sym, - params: {str: 'ref', sfx: 'ref', end_index: 'i32'}, + params: {str_orig: 'ref', sfx_orig: 'ref', end_index: 'i32'}, regmap: $full_regmap, regalloc_set: $panda_mask, mode: [cgmode]) { @@ -793,8 +784,8 @@ def GenerateStringEndsWith(cgmode) next end - check_string_type(str) - check_string_type(sfx) + str := try_use_cached_flat_str(str_orig) + sfx := try_use_cached_flat_str(sfx_orig) sfx_len_packed := LoadI(sfx).Imm(Constants::STRING_LENGTH_OFFSET).u32 # Return 'true' if suffix is empty @@ -841,10 +832,10 @@ def GenerateStringEndsWith(cgmode) Intrinsic(:TAIL_CALL).AddImm(entrypoint).MethodAsImm("StringEndsWithBase#{suffix}").Terminator.b Label(:SlowPathEntrypoint) if cgmode == :NativePlus - Return(Call(str, sfx, end_index).Method("StdCoreStringEndsWith").b).b + Return(Call(str_orig, sfx_orig, end_index).Method("StdCoreStringEndsWith").b).b else entrypoint = get_entrypoint_offset("STRING_ENDS_WITH_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str, sfx, end_index).AddImm(entrypoint).MethodAsImm("StringEndsWithOddSavedBridge").Terminator.b + Intrinsic(:SLOW_PATH_ENTRY, str_orig, sfx_orig, end_index).AddImm(entrypoint).MethodAsImm("StringEndsWithOddSavedBridge").Terminator.b Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG end } @@ -855,7 +846,7 @@ GenerateStringEndsWith(:NativePlus) function(:StringGetBytesTlab, - params: {str: 'ref', begin_index: 'i32', end_index: 'i32', array_klass: 'ref'}, + params: {str_orig: 'ref', begin_index: 'i32', end_index: 'i32', array_klass: 'ref'}, regmap: $full_regmap, regalloc_set: $panda_mask, mode: [:FastPath]) { @@ -866,7 +857,7 @@ function(:StringGetBytesTlab, next end - check_string_type(str) + str := try_use_cached_flat_str(str_orig) If(begin_index, end_index).GT.Unlikely.b { Goto(:SlowPathEntrypoint) # Out of range @@ -876,8 +867,6 @@ function(:StringGetBytesTlab, Goto(:SlowPathEntrypoint) # Out of range } - check_string_type(str) - # Note, 'str' is checked against nullptr in the InstBuilder (see AddArgNullcheckIfNeeded) length := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32; uncompressed := AndI(length).Imm(1).u32; @@ -906,7 +895,7 @@ function(:StringGetBytesTlab, Label(:SlowPathEntrypoint) entrypoint = get_entrypoint_offset("STRING_GET_BYTES_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str, begin_index, end_index).AddImm(entrypoint).MethodAsImm("StringGetBytes4ArgBridge").Terminator.ptr + Intrinsic(:SLOW_PATH_ENTRY, str_orig, begin_index, end_index).AddImm(entrypoint).MethodAsImm("StringGetBytes4ArgBridge").Terminator.ptr Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } @@ -1480,7 +1469,7 @@ def GenerateStringIndexOf(cgmode) suffix = (cgmode == :NativePlus ? 'NativePlus': '') upsuffix = (cgmode == :NativePlus ? '_NATIVE_PLUS': '') function("StringIndexOf#{suffix}".to_sym, - params: {str: 'ref', ch: 'u16', fake: 'i32'}, + params: {str_orig: 'ref', ch: 'u16', fake: 'i32'}, regmap: $full_regmap, regalloc_set: $panda_mask, mode: [cgmode]) { @@ -1491,7 +1480,7 @@ def GenerateStringIndexOf(cgmode) next end - check_string_type(str) + str := try_use_cached_flat_str(str_orig) str_len_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -1600,10 +1589,10 @@ def GenerateStringIndexOf(cgmode) end Label(:SlowPathEntrypoint) if cgmode == :NativePlus - Return(Call(str, ch).Method("StdCoreStringIndexOf").i32).i32 + Return(Call(str_orig, ch).Method("StdCoreStringIndexOf").i32).i32 else entrypoint = get_entrypoint_offset("STRING_INDEX_OF_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str, ch, fake).AddImm(entrypoint).MethodAsImm("StringIndexOfOddSavedBridge").Terminator.i32 + Intrinsic(:SLOW_PATH_ENTRY, str_orig, ch, fake).AddImm(entrypoint).MethodAsImm("StringIndexOfOddSavedBridge").Terminator.i32 Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG end } @@ -1622,7 +1611,7 @@ def GenerateStringIndexOfAfter(cgmode) suffix = (cgmode == :NativePlus ? 'NativePlus': '') upsuffix = (cgmode == :NativePlus ? '_NATIVE_PLUS': '') function("StringIndexOfAfter#{suffix}".to_sym, - params: {str: 'ref', ch: 'u16', start_index: 'i32'}, + params: {str_orig: 'ref', ch: 'u16', start_index: 'i32'}, regmap: $full_regmap, regalloc_set: $panda_mask, mode: [cgmode]) { @@ -1643,7 +1632,7 @@ def GenerateStringIndexOfAfter(cgmode) start_index := Phi(start_index0, start_index1).i32 end - check_string_type(str) + str := try_use_cached_flat_str(str_orig) str_len_packed := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 # Return '-1' if 'str' is empty. @@ -1791,10 +1780,10 @@ def GenerateStringIndexOfAfter(cgmode) end Label(:SlowPathEntrypoint) if cgmode == :NativePlus - Return(Call(str, ch, start_index).Method("StdCoreStringIndexOfAfter").i32).i32 + Return(Call(str_orig, ch, start_index).Method("StdCoreStringIndexOfAfter").i32).i32 else entrypoint = get_entrypoint_offset("STRING_INDEX_OF_AFTER_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str, ch, start_index).AddImm(entrypoint).MethodAsImm("StringIndexOfAfterOddSavedBridge").Terminator.i32 + Intrinsic(:SLOW_PATH_ENTRY, str_orig, ch, start_index).AddImm(entrypoint).MethodAsImm("StringIndexOfAfterOddSavedBridge").Terminator.i32 Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG end } @@ -1805,7 +1794,7 @@ GenerateStringIndexOfAfter(:NativePlus) function(:StringRepeatTlab, - params: {str: 'ref', cnt: 'i32'}, + params: {str_orig: 'ref', cnt: 'i32'}, regmap: $full_regmap, regalloc_set: $panda_mask, mode: [:FastPath]) { @@ -1820,7 +1809,7 @@ function(:StringRepeatTlab, Goto(:SlowPathEntrypoint) } - check_string_type(str) + str := try_use_cached_flat_str(str_orig) IfImm(Compare(count, 0).EQ.b).Imm(0).NE { klass := load_class(str) @@ -1878,12 +1867,12 @@ Label(:End) Label(:SlowPathEntrypoint) entrypoint = get_entrypoint_offset("STRING_REPEAT_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, str, count).AddImm(entrypoint).MethodAsImm("StringRepeatUsualBridge").Terminator.ptr + Intrinsic(:SLOW_PATH_ENTRY, str_orig, count).AddImm(entrypoint).MethodAsImm("StringRepeatUsualBridge").Terminator.ptr Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } function(:WriteStringToMem, - params: {mem: 'i64', str: 'ref'}, + params: {mem: 'i64', str_orig: 'ref'}, regmap: $full_regmap, regalloc_set: $panda_mask, mode: [:FastPath]) { @@ -1893,7 +1882,7 @@ function(:WriteStringToMem, next end - check_string_type(str) + str := try_use_cached_flat_str(str_orig) buf := Bitcast(mem).ptr len := LoadI(str).Imm(Constants::STRING_LENGTH_OFFSET).u32 @@ -1926,7 +1915,7 @@ Label(:End) Return(len).u32 Label(:SlowPathEntrypoint) entrypoint = get_entrypoint_offset("WRITE_STRING_TO_MEM_SLOW_PATH") - Intrinsic(:SLOW_PATH_ENTRY, mem, str).AddImm(entrypoint).MethodAsImm("WriteStringToMemUsualBridge").Terminator.u32 + Intrinsic(:SLOW_PATH_ENTRY, mem, str_orig).AddImm(entrypoint).MethodAsImm("WriteStringToMemUsualBridge").Terminator.u32 Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG } diff --git a/static_core/plugins/ets/irtoc_scripts/string_builder.irt b/static_core/plugins/ets/irtoc_scripts/string_builder.irt index 37759df9c7674bba04d9b6915061aea2520d23a5..9b02395eee69cd36cb92d8919612287d80b58f34 100644 --- a/static_core/plugins/ets/irtoc_scripts/string_builder.irt +++ b/static_core/plugins/ets/irtoc_scripts/string_builder.irt @@ -528,9 +528,9 @@ Label(:ForEachBufferSlot) # ------------------- # Object is a string # ------------------- - check_string_type(obj) - str_len := LoadI(obj).Imm(Constants::STRING_LENGTH_OFFSET).i32 - src_data := AddI(Cast(obj).SrcType(Constants::COMPILER_REFERENCE).ptr).Imm(Constants::STRING_DATA_OFFSET).ptr + flat_str := try_use_cached_flat_str(obj) + str_len := LoadI(flat_str).Imm(Constants::STRING_LENGTH_OFFSET).i32 + src_data := AddI(Cast(flat_str).SrcType(Constants::COMPILER_REFERENCE).ptr).Imm(Constants::STRING_DATA_OFFSET).ptr src_len := ShrI(str_len).Imm(Constants::STRING_LENGTH_SHIFT).i32 If(sb_compress, 0).EQ.Unlikely.b { Goto(:DoNotCompressString) diff --git a/static_core/plugins/ets/runtime/ets_coroutine.cpp b/static_core/plugins/ets/runtime/ets_coroutine.cpp index ad8b3bd08779dec8ab4805e3f5c6ebf78721815c..1e8b293b5da52b9533003292c00d901df35bf4ea 100644 --- a/static_core/plugins/ets/runtime/ets_coroutine.cpp +++ b/static_core/plugins/ets/runtime/ets_coroutine.cpp @@ -14,6 +14,7 @@ */ #include "plugins/ets/runtime/ets_coroutine.h" +#include "mem/refstorage/global_object_storage.h" #include "runtime/include/value.h" #include "macros.h" #include "mem/refstorage/reference.h" @@ -254,11 +255,37 @@ ExternalIfaceTable *EtsCoroutine::GetExternalIfaceTable() } void EtsCoroutine::OnHostWorkerChanged() +{ + UpdateCachedObjects(); +} + +void EtsCoroutine::UpdateCachedObjects() { // update the interop context pointer auto *worker = GetWorker(); auto *ptr = worker->GetLocalStorage().Get(); GetLocalStorage().Set(ptr); + + if (GetType() == Coroutine::Type::MUTATOR) { + // update the string cache pointer + auto *curCoro = EtsCoroutine::GetCurrent(); + ASSERT(curCoro != nullptr); + auto setStringCachePtr = [this, worker]() { + auto *cacheRef = + worker->GetLocalStorage().Get(); + auto *cache = GetVM()->GetGlobalObjectStorage()->Get(cacheRef); + SetFlattenedStringCache(cache); + }; + // We need to put the current coro into the managed state to be GC-safe, because we manipulate a raw + // ObjectHeader* + if (ManagedThread::IsManagedScope()) { + setStringCachePtr(); + } else { + // maybe we will find a more performant solution in future... + ScopedManagedCodeThread s(curCoro); + setStringCachePtr(); + } + } } void EtsCoroutine::OnContextSwitchedTo() diff --git a/static_core/plugins/ets/runtime/ets_coroutine.h b/static_core/plugins/ets/runtime/ets_coroutine.h index 8ad028b3f99d8749879a285f73b9e8e10505b8a5..34a014099524703c6cb0dfae0e3c1e18d839f446 100644 --- a/static_core/plugins/ets/runtime/ets_coroutine.h +++ b/static_core/plugins/ets/runtime/ets_coroutine.h @@ -152,6 +152,8 @@ public: static constexpr CoroutinePriority TIMER_CALLBACK = CoroutinePriority::MEDIUM_PRIORITY; static constexpr CoroutinePriority LAUNCH = CoroutinePriority::MEDIUM_PRIORITY; + void UpdateCachedObjects() override; + protected: // we would like everyone to use the factory to create a EtsCoroutine explicit EtsCoroutine(ThreadId id, mem::InternalAllocatorPtr allocator, PandaVM *vm, PandaString name, diff --git a/static_core/plugins/ets/runtime/ets_vm.cpp b/static_core/plugins/ets/runtime/ets_vm.cpp index ab1dff4cbff7ba3a0a8bea2be9f7f9cbd0de8a52..f6d1493b7d84cfe6bc09e497d2242ecdf47af9be 100644 --- a/static_core/plugins/ets/runtime/ets_vm.cpp +++ b/static_core/plugins/ets/runtime/ets_vm.cpp @@ -326,6 +326,7 @@ bool PandaEtsVM::Initialize() } referenceProcessor_->Initialize(); + coroutineManager_->OnRuntimeInitialization(); } [[maybe_unused]] bool cachesCreated = (doubleToStringCache_ != nullptr && floatToStringCache_ != nullptr && longToStringCache_ != nullptr); diff --git a/static_core/runtime/arch/asm_support.cpp b/static_core/runtime/arch/asm_support.cpp index b4d23b18abe95cbc0620a4a4a62018c84bd88913..71953ad8e54b279b90f72595aa7fe81de4255f10 100644 --- a/static_core/runtime/arch/asm_support.cpp +++ b/static_core/runtime/arch/asm_support.cpp @@ -21,6 +21,7 @@ #include "runtime/include/method.h" #include "runtime/include/mtmanaged_thread.h" #include "runtime/include/thread.h" +#include "runtime/include/string_flattening_cache.h" #include "plugins_defines.h" namespace ark { diff --git a/static_core/runtime/asm_defines/asm_defines.def b/static_core/runtime/asm_defines/asm_defines.def index 92e6a6f76311f13dc9958f1b18536310b4d6412e..5c4f831d5c77226b13d8914c3cd2a33d446bc038 100644 --- a/static_core/runtime/asm_defines/asm_defines.def +++ b/static_core/runtime/asm_defines/asm_defines.def @@ -90,6 +90,7 @@ DEFINE_VALUE(MANAGED_THREAD_LANGUAGE_EXTENSION_DATA_OFFSET, ManagedThread::GetLa DEFINE_VALUE(MANAGED_THREAD_INTERNAL_ID_OFFSET, ManagedThread::GetInternalIdOffset()) DEFINE_VALUE(MANAGED_THREAD_RUNTIME_CALL_ENABLED_OFFSET, ManagedThread::GetRuntimeCallEnabledOffset()) DEFINE_VALUE(MANAGED_THREAD_INTERPRETER_CACHE_OFFSET, ManagedThread::GetInterpreterCacheOffset()) +DEFINE_VALUE(MANAGED_THREAD_FLATTENED_STRING_CACHE_OFFSET, ManagedThread::GetFlattenedStringCacheOffset()) DEFINE_VALUE(MT_MANAGED_THREAD_LOCKED_OBJECT_CAPACITY_OFFSET, MTManagedThread::GetLockedObjectCapacityOffset()) DEFINE_VALUE(MT_MANAGED_THREAD_LOCKED_OBJECT_SIZE_OFFSET, MTManagedThread::GetLockedObjectSizeOffset()) @@ -218,5 +219,10 @@ DEFINE_VALUE(RUNTIME_MODE_ODD_SAVED, 3) DEFINE_VALUE(MIN_PREFIX_OPCODE_INDEX, BytecodeInstruction::GetMinPrefixOpcodeIndex()) DEFINE_VALUE(TAGGED_VALUE_UNDEFINED, coretypes::TaggedValue::VALUE_UNDEFINED) + +DEFINE_VALUE(STRING_FLATTENING_CACHE_ADDRESS_SHIFT, ark::StringFlatteningCache::GetAddressShift()) +DEFINE_VALUE(STRING_FLATTENING_CACHE_ADDRESS_MASK, ark::StringFlatteningCache::GetAddressMask()) +DEFINE_VALUE(STRING_FLATTENING_CACHE_ENTRY_SIZE, ark::StringFlatteningCache::GetEntrySize()) +DEFINE_VALUE(STRING_FLATTENING_CACHE_VALUE_OFFSET, ark::StringFlatteningCache::GetValueOffset()) // NOLINTEND(readability-identifier-naming) #include "plugins_asm_defines.def" diff --git a/static_core/runtime/asm_defines/defines.cpp b/static_core/runtime/asm_defines/defines.cpp index dbef986831b1a6ee055c843bbdfe1e75c8f56973..c8bc146b37fa4410970301a8ae68ac43137643d5 100644 --- a/static_core/runtime/asm_defines/defines.cpp +++ b/static_core/runtime/asm_defines/defines.cpp @@ -25,6 +25,7 @@ #include "runtime/include/mtmanaged_thread.h" #include "runtime/mem/tlab.h" #include "utils/cframe_layout.h" +#include "runtime/include/string_flattening_cache.h" #include "plugins_defines.h" diff --git a/static_core/runtime/coretypes/string.cpp b/static_core/runtime/coretypes/string.cpp index 7a997e464d798dd5665cb638a9073233067b00ff..6871b10a8e680ed06752721abf188393fc6e0e01 100644 --- a/static_core/runtime/coretypes/string.cpp +++ b/static_core/runtime/coretypes/string.cpp @@ -34,6 +34,7 @@ #include "runtime/include/panda_vm.h" #include "runtime/include/coretypes/string.h" #include "runtime/include/coretypes/line_string.h" +#include "runtime/include/string_flattening_cache.h" namespace ark::coretypes { @@ -888,7 +889,15 @@ FlatStringInfo FlatStringInfo::FlattenTreeString(VMHandle &treeStr, cons return FlatStringInfo(String::Cast(first), 0, treeString->GetLength()); } + StringFlatteningCache cache(ManagedThread::GetCurrent()); + auto *cachedFlatStr = cache.Get(treeStr.GetPtr()); + + if (cachedFlatStr != nullptr) { + return FlatStringInfo(cachedFlatStr, 0, cachedFlatStr->GetLength()); + } + String *s = SlowFlatten(treeStr, ctx); + cache.Update(treeStr.GetPtr(), s); return FlatStringInfo(s, 0, treeString->GetLength()); } diff --git a/static_core/runtime/coroutines/coroutine.h b/static_core/runtime/coroutines/coroutine.h index 27111a2a035ff50c1c2b796a98fe00320cb52fe5..d9e1dc8bdd274b606850691da03d104f76988166 100644 --- a/static_core/runtime/coroutines/coroutine.h +++ b/static_core/runtime/coroutines/coroutine.h @@ -344,6 +344,8 @@ public: return abortFlag_; } + virtual void UpdateCachedObjects() {}; + protected: // We would like everyone to use the factory to create a Coroutine, thus ctor is protected explicit Coroutine(ThreadId id, mem::InternalAllocatorPtr allocator, PandaVM *vm, diff --git a/static_core/runtime/coroutines/coroutine_manager.cpp b/static_core/runtime/coroutines/coroutine_manager.cpp index fa224958ea6e80e5648577a47cf660eddd022ce8..4fc031cc93b726e6d1f11759704347b0f8d3ae60 100644 --- a/static_core/runtime/coroutines/coroutine_manager.cpp +++ b/static_core/runtime/coroutines/coroutine_manager.cpp @@ -169,4 +169,14 @@ CoroutineSchedulingPolicy CoroutineManager::GetSchedulingPolicy() const return schedulingPolicy_; } +void CoroutineManager::OnRuntimeInitialization() +{ + ASSERT(Coroutine::GetCurrent() == GetMainThread()); + EnumerateWorkers([](CoroutineWorker *worker) { + worker->OnRuntimeInitialization(); + return true; + }); + Coroutine::GetCurrent()->UpdateCachedObjects(); +} + } // namespace ark diff --git a/static_core/runtime/coroutines/coroutine_manager.h b/static_core/runtime/coroutines/coroutine_manager.h index fa83e207967ad4bb2101a0a5fa035b2fed48f40e..f67b4ab6c4f0127060220d926e96e1e337163db2 100644 --- a/static_core/runtime/coroutines/coroutine_manager.h +++ b/static_core/runtime/coroutines/coroutine_manager.h @@ -111,6 +111,7 @@ public: std::optional &&epInfo, Coroutine::Type type, CoroutinePriority priority); using NativeEntrypointFunc = Coroutine::NativeEntrypointInfo::NativeEntrypointFunc; + using EnumerateWorkerCallback = std::function; NO_COPY_SEMANTIC(CoroutineManager); NO_MOVE_SEMANTIC(CoroutineManager); @@ -248,6 +249,15 @@ public: { } + /** + * @brief enumerate workers and apply @param cb to them + * @return true if @param cb call was successful (returned true) for every worekr and false otherwise + */ + bool EnumerateWorkers(const EnumerateWorkerCallback &cb) const + { + return EnumerateWorkersImpl(cb); + } + virtual bool IsExclusiveWorkersLimitReached() const { return false; @@ -346,6 +356,9 @@ public: return config_; } + /// should be called during runtime initialization + void OnRuntimeInitialization(); + protected: using EntrypointInfo = Coroutine::EntrypointInfo; /// Create native coroutine context instance (implementation dependent) @@ -372,6 +385,9 @@ protected: /// Can be used in descendants to create custom coroutines manually CoroutineFactory GetCoroutineFactory(); + /// Worker enumerator, returns true iff cb call succeeds for every worker + virtual bool EnumerateWorkersImpl(const EnumerateWorkerCallback &cb) const = 0; + /// limit the number of IDs for performance reasons static constexpr size_t MAX_COROUTINE_ID = std::min(0xffffU, Coroutine::MAX_COROUTINE_ID); static constexpr size_t UNINITIALIZED_COROUTINE_ID = 0x0U; diff --git a/static_core/runtime/coroutines/coroutine_worker.cpp b/static_core/runtime/coroutines/coroutine_worker.cpp index 4c5eeafe13584421adb6c8b84672d481e34acc81..086728a135e3575a4bb400044d8c623928e444ea 100644 --- a/static_core/runtime/coroutines/coroutine_worker.cpp +++ b/static_core/runtime/coroutines/coroutine_worker.cpp @@ -13,8 +13,12 @@ * limitations under the License. */ -#include "coroutines/coroutine_manager.h" -#include "coroutines/coroutine_worker.h" +#include "runtime/coroutines/coroutine_manager.h" +#include "runtime/coroutines/coroutine_worker.h" +#include "runtime/include/thread_scopes.h" +#include "runtime/include/string_flattening_cache.h" +#include "runtime/include/thread-inl.h" +#include "mem/refstorage/global_object_storage.h" namespace ark { @@ -33,4 +37,44 @@ void CoroutineWorker::OnCoroBecameActive(Coroutine *co) TriggerSchedulerExternally(co); } +void CoroutineWorker::OnRuntimeInitialization() +{ + ASSERT(!GetRuntime()->IsInitialized()); + CreateWorkerLocalObjects(); + CacheLocalObjectsInCoroutines(); +} + +void CoroutineWorker::OnWorkerStartup() +{ + ASSERT(Coroutine::GetCurrent()->GetWorker() == this); + if (GetRuntime()->IsInitialized()) { + CreateWorkerLocalObjects(); + CacheLocalObjectsInCoroutines(); + Coroutine::GetCurrent()->UpdateCachedObjects(); + } +} + +void CoroutineWorker::CreateWorkerLocalObjects() +{ + ASSERT((GetLocalStorage().Get() == nullptr)); + auto *coro = Coroutine::GetCurrent(); + ASSERT(coro != nullptr); + auto setFlattenedStringCache = [this, coro] { + auto *stringFlatteningCache = StringFlatteningCache::Create(GetPandaVM()); + ASSERT(stringFlatteningCache != nullptr); + auto *refStorage = coro->GetVM()->GetGlobalObjectStorage(); + auto *cacheRef = refStorage->Add(stringFlatteningCache, mem::Reference::ObjectType::GLOBAL); + GetLocalStorage().Set( + cacheRef, [refStorage](void *ref) { refStorage->Remove(static_cast(ref)); }); + }; + // We need to put the current coro into the managed state to be GC-safe, because we manipulate a raw + // ObjectHeader* + if (coro->IsInNativeCode()) { + ScopedManagedCodeThread s(coro); + setFlattenedStringCache(); + } else { + setFlattenedStringCache(); + } +} + } // namespace ark diff --git a/static_core/runtime/coroutines/coroutine_worker.h b/static_core/runtime/coroutines/coroutine_worker.h index 9cfae6cf4443d5086017e13312b3afcc674d963a..881d7dd82dc1a0fe2cedb44e450e39d3a68eb886 100644 --- a/static_core/runtime/coroutines/coroutine_worker.h +++ b/static_core/runtime/coroutines/coroutine_worker.h @@ -39,7 +39,7 @@ enum class CoroutinePriority { /// Represents a coroutine worker, which can host multiple coroutines and schedule them. class CoroutineWorker { public: - enum class DataIdx { INTEROP_CTX_PTR, EXTERNAL_IFACES, LAST_ID }; + enum class DataIdx { INTEROP_CTX_PTR, EXTERNAL_IFACES, STRING_FLATTENING_CACHE, LAST_ID }; using LocalStorage = StaticLocalStorage; using Id = int32_t; @@ -111,6 +111,16 @@ public: void TriggerSchedulerExternally(Coroutine *requester); + /// should be called during runtime initialization + void OnRuntimeInitialization(); + + /// should be called from CoroutineManager after worker creation from the coroutine assigned to the worker + void OnWorkerStartup(); + +private: + void CreateWorkerLocalObjects(); + virtual void CacheLocalObjectsInCoroutines() {} + private: Runtime *runtime_ = nullptr; PandaVM *vm_ = nullptr; diff --git a/static_core/runtime/coroutines/local_storage.h b/static_core/runtime/coroutines/local_storage.h index cddec614363fa54b056f30749ba56acec9b4c197..1e7bf6c896f49781610d12653cb3b65307e48f8f 100644 --- a/static_core/runtime/coroutines/local_storage.h +++ b/static_core/runtime/coroutines/local_storage.h @@ -1,5 +1,5 @@ /** - * Copyright (c) 2024 Huawei Device Co., Ltd. + * Copyright (c) 2024-2025 Huawei Device Co., Ltd. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -72,13 +72,30 @@ public: } template - T Get() + T Get() const { static_assert((ToIndex(IDX) < NUM_ENTRIES), "idx should be correct"); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) return reinterpret_cast(entries_[ToIndex(IDX)].data.ptr); } + template + T *GetPtr() + { + static_assert((ToIndex(IDX) < NUM_ENTRIES), "idx should be correct"); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + return reinterpret_cast(&(entries_[ToIndex(IDX)].data.ptr)); + } + + template + constexpr size_t GetOffset() const + { + static_assert((ToIndex(IDX) < NUM_ENTRIES), "idx should be correct"); + // NOTE(konstanting): TO BE IMPLEMENTED! + UNREACHABLE(); + return 0; + } + private: std::array entries_; }; diff --git a/static_core/runtime/coroutines/stackful_coroutine_manager.cpp b/static_core/runtime/coroutines/stackful_coroutine_manager.cpp index 78f5df0241ea05f6a44fb6e616d3b08e6cbdf74a..6575041291496f13a66c069cf2af106e4e4c0753 100644 --- a/static_core/runtime/coroutines/stackful_coroutine_manager.cpp +++ b/static_core/runtime/coroutines/stackful_coroutine_manager.cpp @@ -192,6 +192,7 @@ void StackfulCoroutineManager::OnWorkerStartup(StackfulCoroutineWorker *worker) void StackfulCoroutineManager::OnWorkerStartupImpl(StackfulCoroutineWorker *worker) { + worker->OnWorkerStartup(); workers_.push_back(worker); ++activeWorkersCount_; workersCv_.Signal(); @@ -539,6 +540,17 @@ bool StackfulCoroutineManager::EnumerateThreadsImpl(const ThreadManager::Callbac return true; } +bool StackfulCoroutineManager::EnumerateWorkersImpl(const EnumerateWorkerCallback &cb) const +{ + os::memory::LockHolder lock(workersLock_); + for (auto *w : workers_) { + if (!cb(w)) { + return false; + } + } + return true; +} + void StackfulCoroutineManager::SuspendAllThreads() { os::memory::LockHolder lock(coroListLock_); diff --git a/static_core/runtime/coroutines/stackful_coroutine_manager.h b/static_core/runtime/coroutines/stackful_coroutine_manager.h index 97370c47e279671e7571803a7ee161dceca4b770..db8af86b2dbe7b785e16a855c519a1706ec762d2 100644 --- a/static_core/runtime/coroutines/stackful_coroutine_manager.h +++ b/static_core/runtime/coroutines/stackful_coroutine_manager.h @@ -145,6 +145,8 @@ protected: bool EnumerateThreadsImpl(const ThreadManager::Callback &cb, unsigned int incMask, unsigned int xorMask) const override; + bool EnumerateWorkersImpl(const EnumerateWorkerCallback &cb) const override; + CoroutineContext *CreateCoroutineContext(bool coroHasEntrypoint) override; void DeleteCoroutineContext(CoroutineContext *ctx) override; diff --git a/static_core/runtime/coroutines/stackful_coroutine_worker.cpp b/static_core/runtime/coroutines/stackful_coroutine_worker.cpp index bcc7671d1f546162def5ab69471c59d9c1a2381e..a126c8733b035d022eec42a0137f49df71519b3e 100644 --- a/static_core/runtime/coroutines/stackful_coroutine_worker.cpp +++ b/static_core/runtime/coroutines/stackful_coroutine_worker.cpp @@ -64,17 +64,17 @@ void StackfulCoroutineWorker::AddCreatedCoroutineAndSwitchToIt(Coroutine *newCor { // precondition: called within the current worker, no cross-worker calls allowed ASSERT(GetCurrentContext()->GetWorker() == this); + RegisterIncomingActiveCoroutine(newCoro); + // suspend current coro... auto *coro = Coroutine::GetCurrent(); ScopedNativeCodeThread n(coro); coro->RequestSuspend(false); - - newCoro->LinkToExternalHolder(IsMainWorker() || InExclusiveMode()); + // ..and resume the new one auto *currentCtx = GetCurrentContext(); auto *nextCtx = newCoro->GetContext(); nextCtx->RequestResume(); Coroutine::SetCurrent(newCoro); - RegisterIncomingActiveCoroutine(newCoro); SwitchCoroutineContext(currentCtx, nextCtx); @@ -282,6 +282,7 @@ void StackfulCoroutineWorker::ScheduleLoop() void StackfulCoroutineWorker::ScheduleLoopBody() { + // run the loop while (IsActive()) { RequestScheduleImpl(); os::memory::LockHolder lkRunnables(runnablesLock_); @@ -610,6 +611,18 @@ void StackfulCoroutineWorker::OnAfterContextSwitch(StackfulCoroutineContext *to) coroTo->OnContextSwitchedTo(); } +void StackfulCoroutineWorker::CacheLocalObjectsInCoroutines() +{ + os::memory::LockHolder lock(runnablesLock_); + runnables_.IterateOverCoroutines([](Coroutine *co) { co->UpdateCachedObjects(); }); + { + os::memory::LockHolder lh(waitersLock_); + for (auto &[_, co] : waiters_) { + co->UpdateCachedObjects(); + } + } +} + void StackfulCoroutineWorker::GetFullWorkerStateInfo(StackfulCoroutineWorkerStateInfo *info) const { os::memory::LockHolder lock(runnablesLock_); diff --git a/static_core/runtime/coroutines/stackful_coroutine_worker.h b/static_core/runtime/coroutines/stackful_coroutine_worker.h index 4e66c4fa9720665e9e045f5fa5333a24a5b3cb96..01a68ca59c685042c032ce508cfe7e1cd9f1ff57 100644 --- a/static_core/runtime/coroutines/stackful_coroutine_worker.h +++ b/static_core/runtime/coroutines/stackful_coroutine_worker.h @@ -255,11 +255,15 @@ private: bool IsPotentiallyBlocked(); void MigrateCoroutinesImpl(StackfulCoroutineWorker *to, size_t migrateCount) REQUIRES(runnablesLock_); + /* events */ /// called right before the coroutineContext is switched void OnBeforeContextSwitch(StackfulCoroutineContext *from, StackfulCoroutineContext *to); /// called right after the coroutineContext is switched (in case if no migration happened) void OnAfterContextSwitch(StackfulCoroutineContext *to); + /// worker local storage + void CacheLocalObjectsInCoroutines() override; + private: // data members StackfulCoroutineManager *coroManager_; Coroutine *scheduleLoopCtx_ = nullptr; diff --git a/static_core/runtime/coroutines/threaded_coroutine_manager.cpp b/static_core/runtime/coroutines/threaded_coroutine_manager.cpp index c3da01a1186c43cfac3952ed78cadf3e83576a98..8c1eb7334f7ea709f40698c70a8624bf0a9b3e1e 100644 --- a/static_core/runtime/coroutines/threaded_coroutine_manager.cpp +++ b/static_core/runtime/coroutines/threaded_coroutine_manager.cpp @@ -314,6 +314,17 @@ bool ThreadedCoroutineManager::EnumerateThreadsImpl(const ThreadManager::Callbac return true; } +bool ThreadedCoroutineManager::EnumerateWorkersImpl(const EnumerateWorkerCallback &cb) const +{ + os::memory::LockHolder lock(workersLock_); + for (auto *w : workers_) { + if (!cb(w)) { + return false; + } + } + return true; +} + void ThreadedCoroutineManager::SuspendAllThreads() { os::memory::LockHolder lList(coroListLock_); diff --git a/static_core/runtime/coroutines/threaded_coroutine_manager.h b/static_core/runtime/coroutines/threaded_coroutine_manager.h index 7dd1f1830d44a8cddad6ea370c35095912e0cbc7..065aec82838b07526cddeab16a9f7a66ef6ba107 100644 --- a/static_core/runtime/coroutines/threaded_coroutine_manager.h +++ b/static_core/runtime/coroutines/threaded_coroutine_manager.h @@ -81,6 +81,8 @@ public: protected: bool EnumerateThreadsImpl(const ThreadManager::Callback &cb, unsigned int incMask, unsigned int xorMask) const override; + bool EnumerateWorkersImpl(const EnumerateWorkerCallback &cb) const override; + CoroutineContext *CreateCoroutineContext(bool coroHasEntrypoint) override; void DeleteCoroutineContext(CoroutineContext *ctx) override; diff --git a/static_core/runtime/include/managed_thread.h b/static_core/runtime/include/managed_thread.h index 2278a385abd79c3b2c5b997e74be4db7dfeb65d0..ca855594ff1bbb58c926356adc3553a7d4a85ce6 100644 --- a/static_core/runtime/include/managed_thread.h +++ b/static_core/runtime/include/managed_thread.h @@ -376,6 +376,11 @@ public: return MEMBER_OFFSET(ManagedThread, interpreterCache_); } + static constexpr uint32_t GetFlattenedStringCacheOffset() + { + return MEMBER_OFFSET(ManagedThread, flattenedStringCache_); + } + void *GetLanguageExtensionsData() const { return languageExtensionData_; @@ -437,6 +442,9 @@ public: PANDA_PUBLIC_API void SetCustomTLSData(const char *key, CustomTLSData *data); PANDA_PUBLIC_API bool EraseCustomTLSData(const char *key); + void SetFlattenedStringCache(ObjectHeader *cacheInstance); + ObjectHeader *GetFlattenedStringCache() const; + #if EVENT_METHOD_ENTER_ENABLED || EVENT_METHOD_EXIT_ENABLED uint32_t RecordMethodEnter() { @@ -705,6 +713,9 @@ private: PandaMap> customTlsCache_ GUARDED_BY(Locks::customTlsLock_); + // NOTE(konstanting): this is to be moved once we decouple Thread from ManagedThread + ObjectHeader *flattenedStringCache_ {nullptr}; + mem::GCG1BarrierSet::G1PostBarrierRingBufferType *g1PostBarrierRingBuffer_ {nullptr}; // Keep these here to speed up interpreter mem::BarrierType preBarrierType_ {mem::BarrierType::PRE_WRB_NONE}; diff --git a/static_core/runtime/include/string_flattening_cache.h b/static_core/runtime/include/string_flattening_cache.h new file mode 100644 index 0000000000000000000000000000000000000000..94a20a3200a31b7a151e1fa4dc0b0f16e0ecc2b0 --- /dev/null +++ b/static_core/runtime/include/string_flattening_cache.h @@ -0,0 +1,119 @@ +/** + * Copyright (c) 2025 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef PANDA_RUNTIME_STRING_FLATTENING_CACHE_H +#define PANDA_RUNTIME_STRING_FLATTENING_CACHE_H + +#include "runtime/include/coretypes/string.h" +#include "runtime/include/runtime.h" +#include "runtime/include/panda_vm.h" + +namespace ark { +class StringFlatteningCache { +public: + explicit StringFlatteningCache(ManagedThread *thread) : thread_(thread) {} + + static coretypes::Array *Create(PandaVM *vm) + { + auto *linker = Runtime::GetCurrent()->GetClassLinker(); + auto *ext = linker->GetExtension(vm->GetLanguageContext()); + auto *klass = ext->GetClassRoot(ClassRoot::ARRAY_STRING); + return coretypes::Array::Create(klass, ENTRY_SIZE * SIZE); + } + + coretypes::String *Get(coretypes::String *treeStr) + { + auto *cache = GetCache(); + ASSERT(cache != nullptr); + auto index = GetIndex(treeStr); + auto *key = GetKey(cache, index); + if (key != treeStr) { + return nullptr; + } + return GetValue(cache, index); + } + + void Update(coretypes::String *treeStr, coretypes::String *flatStr) + { + auto *cache = GetCache(); + ASSERT(cache != nullptr); + ASSERT(treeStr != nullptr); + ASSERT(flatStr != nullptr); + auto index = GetIndex(treeStr); + SetKey(cache, index, treeStr); + SetValue(cache, index, flatStr); + } + + static constexpr size_t GetAddressShift() + { + return SHIFT; + } + + static constexpr size_t GetAddressMask() + { + return MASK; + } + + static constexpr size_t GetEntrySize() + { + return ENTRY_SIZE; + } + + static constexpr size_t GetValueOffset() + { + return VALUE_OFFSET; + } + +private: + static constexpr size_t SHIFT = 3; + static constexpr size_t MASK = 0x3f; + static constexpr size_t SIZE = MASK + 1; + static constexpr size_t ENTRY_SIZE = 2; + static constexpr size_t VALUE_OFFSET = 1; + + static size_t GetIndex(coretypes::String *treeStr) + { + return ((ToUintPtr(treeStr) >> SHIFT) & MASK) * ENTRY_SIZE; + } + + static coretypes::String *GetKey(coretypes::Array *cache, size_t index) + { + return cache->Get(index); + } + + static void SetKey(coretypes::Array *cache, size_t index, coretypes::String *key) + { + cache->Set(index, key); + } + + static coretypes::String *GetValue(coretypes::Array *cache, size_t index) + { + return cache->Get(index + VALUE_OFFSET); + } + + static void SetValue(coretypes::Array *cache, size_t index, coretypes::String *value) + { + cache->Set(index + VALUE_OFFSET, value); + } + + coretypes::Array *GetCache() + { + return coretypes::Array::Cast(thread_->GetFlattenedStringCache()); + } + + ManagedThread *thread_; +}; +} // namespace ark + +#endif // PANDA_RUNTIME_STRING_FLATTENING_CACHE_H diff --git a/static_core/runtime/thread.cpp b/static_core/runtime/thread.cpp index d144747093f67024ac1d8cd3125bc0a43ef36c6c..32e7fb2150ad2390f9efd74404e229795db6ecc5 100644 --- a/static_core/runtime/thread.cpp +++ b/static_core/runtime/thread.cpp @@ -678,9 +678,13 @@ void MTManagedThread::ProcessCreatedThread() void ManagedThread::UpdateGCRoots(const GCRootUpdater &gcRootUpdater) { - if ((exception_ != nullptr)) { + if (exception_ != nullptr) { gcRootUpdater(&exception_); } + if (flattenedStringCache_ != nullptr) { + // This is the cached pointer. We need to update it; visiting it as root is nor required. + gcRootUpdater(&flattenedStringCache_); + } for (auto **localObject : localObjects_) { gcRootUpdater(localObject); } @@ -849,6 +853,16 @@ bool ManagedThread::EraseCustomTLSData(const char *key) return customTlsCache_.erase(key) != 0; } +void ManagedThread::SetFlattenedStringCache(ObjectHeader *cacheInstance) +{ + flattenedStringCache_ = cacheInstance; +} + +ObjectHeader *ManagedThread::GetFlattenedStringCache() const +{ + return flattenedStringCache_; +} + LanguageContext ManagedThread::GetLanguageContext() { return Runtime::GetCurrent()->GetLanguageContext(threadLang_);