diff --git a/code/AI/MindSporeLiteCDemoASR/entry/oh-package.json5 b/code/AI/MindSporeLiteCDemoASR/entry/oh-package.json5 index f71a220dfa5ce47016a0b61722ecfda864faf117..9c75ea3643ed82d16f84edf78024be9f08c43904 100644 --- a/code/AI/MindSporeLiteCDemoASR/entry/oh-package.json5 +++ b/code/AI/MindSporeLiteCDemoASR/entry/oh-package.json5 @@ -16,7 +16,7 @@ { "name": "entry", "version": "1.0.0", - "description": "Please describe the basic information.", + "description": "MindSpore Lite inference module", "main": "", "author": "", "license": "", diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/mslite_napi.cpp b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/mslite_napi.cpp index d4f581b6be22c55febd9b0a9481a8049fd1f46b4..75aab4ac20f7bc8ed143fb46f6c968a8bfcdda04 100644 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/mslite_napi.cpp +++ b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/mslite_napi.cpp @@ -13,25 +13,25 @@ * limitations under the License. */ +#include "AudioFile.h" +#include "base64.h" #include "napi/native_api.h" +#include "utils.h" +#include +#include +#include #include -#include #include -#include -#include -#include -#include +#include #include +#include #include #include +#include +#include +#include #include -#include #include -#include -#include -#include "utils.h" -#include "AudioFile.h" -#include "base64.h" #define LOGI(...) ((void)OH_LOG_Print(LOG_APP, LOG_INFO, LOG_DOMAIN, "[MSLiteNapi]", __VA_ARGS__)) #define LOGD(...) ((void)OH_LOG_Print(LOG_APP, LOG_DEBUG, LOG_DOMAIN, "[MSLiteNapi]", __VA_ARGS__)) @@ -39,16 +39,16 @@ #define LOGE(...) ((void)OH_LOG_Print(LOG_APP, LOG_ERROR, LOG_DOMAIN, "[MSLiteNapi]", __VA_ARGS__)) const float NEG_INF = -std::numeric_limits::infinity(); -const int WHISPER_SOT = 50258; -const int WHISPER_TRANSCRIBE = 50359; -const int WHISPER_TRANSLATE = 50358; +const int WHISPER_SOT = 50258; +const int WHISPER_TRANSCRIBE = 50359; +const int WHISPER_TRANSLATE = 50358; const int WHISPER_NO_TIMESTAMPS = 50363; -const int WHISPER_EOT = 50257; -const int WHISPER_BLANK = 220; -const int WHISPER_NO_SPEECH = 50362; -const int WHISPER_N_TEXT_CTX = 448; +const int WHISPER_EOT = 50257; +const int WHISPER_BLANK = 220; +const int WHISPER_NO_SPEECH = 50362; +const int WHISPER_N_TEXT_CTX = 448; const int WHISPER_N_TEXT_STATE = 384; // for tiny -constexpr int WHISPER_SAMPLE_RATE = 16000; +constexpr int WHISPER_SAMPLE_RATE = 16000; constexpr int K_NUM_PRINT_OF_OUT_DATA = 20; using BinBuffer = std::pair; @@ -63,7 +63,7 @@ int FillInputTensor(OH_AI_TensorHandle input, const BinBuffer &bin) return OH_AI_STATUS_SUCCESS; } -BinBuffer ReadTokens(NativeResourceManager *nativeResourceManager, const std::string &modelName){ +BinBuffer ReadTokens(NativeResourceManager *nativeResourceManager, const std::string &modelName) { auto rawFile = OH_ResourceManager_OpenRawFile(nativeResourceManager, modelName.c_str()); if (rawFile == nullptr) { LOGE("MS_LITE_ERR: Open model file failed"); @@ -156,27 +156,25 @@ OH_AI_ModelHandle CreateMSLiteModel(BinBuffer &bin) } template -void PrintBinAs(const BinBuffer &bin, const std::string& name = "Vector", const size_t n_visible = 10) { +void PrintBinAs(const BinBuffer &bin, const std::string &name = "Vector", const size_t n_visible = 10) { size_t n_elem = bin.second / sizeof(T); std::stringstream ss; const T *data = reinterpret_cast(bin.first); for (size_t i = 0; i < bin.second / sizeof(T) && i < n_visible; i++) { ss << data[i] << " "; } - LOGD("MS_LITE_LOG: bin name: %{public}s, n_elem: %{public}zu, data: [%{public}s]", name.c_str(), n_elem, ss.str().c_str()); -} + LOGD("MS_LITE_LOG: bin name: %{public}s, n_elem: %{public}zu, data: [%{public}s]", name.c_str(), n_elem, + ss.str().c_str()); + } -void save_to_binary_file(const std::vector& data, const std::string& filename) { +void SaveToBinaryFile(const std::vector& data, const std::string& filename) { // 打开文件,以二进制格式写入 std::ofstream outFile(filename, std::ios::binary); - if (!outFile) { throw std::runtime_error("无法打开文件进行写入!"); } - // 写入数据到文件 outFile.write(reinterpret_cast(data.data()), data.size() * sizeof(float)); - // 关闭文件 outFile.close(); std::cout << "数据已成功保存为二进制文件: " << filename << std::endl; @@ -226,15 +224,12 @@ int RunMSLiteModel(OH_AI_ModelHandle model, std::vector inputBins) return OH_AI_STATUS_SUCCESS; } -std::vector convertIntVectorToFloat(const std::vector& vec) { +std::vector ConvertIntVectorToFloat(const std::vector& vec) { std::vector floatVec(vec.size()); - float* floatPtr = reinterpret_cast(const_cast(vec.data())); - for (size_t i = 0; i < vec.size(); ++i) { floatVec[i] = *(floatPtr + i); } - return floatVec; } @@ -244,14 +239,14 @@ BinBuffer GetMSOutput(OH_AI_TensorHandle output) { return {outputData, size}; } -std::vector getVecOutput(OH_AI_TensorHandle output){ +std::vector GetVecOutput(OH_AI_TensorHandle output){ float *outputData = reinterpret_cast(OH_AI_TensorGetMutableData(output)); size_t len = OH_AI_TensorGetElementNum(output); std::vector res(outputData, outputData + len); return res; } -void supress_tokens(BinBuffer& logits, bool is_initial) { +void SupressTokens(BinBuffer &logits, bool is_initial) { auto logits_data = static_cast(logits.first); if (is_initial) { // 假设这两个值在 logits 中的索引位置 @@ -267,7 +262,8 @@ void supress_tokens(BinBuffer& logits, bool is_initial) { } template -void CompareVectorHelper(const T *data_a, const T *data_b, const std::string &label, size_t n, float rtol = 1e-3, float atol = 5e-3) { +void CompareVectorHelper(const T *data_a, const T *data_b, const std::string &label, size_t n, float rtol = 1e-3, + float atol = 5e-3) { LOGD("MS_LITE_LOG: ==== 精度校验 ===="); LOGD("MS_LITE_LOG: 比较 %{public}s", label.c_str()); @@ -295,9 +291,9 @@ void CompareVectorHelper(const T *data_a, const T *data_b, const std::string &la int show_count = 0; for (size_t i = 0; i < n && show_count < MAX_SHOW; ++i) { float diff = data_a[i] - data_b[i]; - if (std::abs(diff) > (atol + rtol * std::abs(data_b[i])) && data_a[i] != 0.0f ) { - LOGD("MS_LITE_LOG: 索引[%{public}zu]: %{public}.6f vs %{public}.6f (Δ=%{public}.6f)", - i, data_a[i], data_b[i], diff); + if (std::abs(diff) > (atol + rtol * std::abs(data_b[i])) && data_a[i] != 0.0f) { + LOGD("MS_LITE_LOG: 索引[%{public}zu]: %{public}.6f vs %{public}.6f (Δ=%{public}.6f)", i, data_a[i], + data_b[i], diff); ++show_count; } } @@ -306,7 +302,8 @@ void CompareVectorHelper(const T *data_a, const T *data_b, const std::string &la return; } -void CompareFloatVector(const BinBuffer &a, const BinBuffer &b, const std::string &label, float rtol = 1e-3, float atol = 5e-3) { +void CompareFloatVector(const BinBuffer &a, const BinBuffer &b, const std::string &label, float rtol = 1e-3, + float atol = 5e-3) { // 检查数据尺寸 assert(a.second == b.second); const float *data_a = (const float *)a.first; @@ -314,7 +311,8 @@ void CompareFloatVector(const BinBuffer &a, const BinBuffer &b, const std::strin CompareVectorHelper(data_a, data_b, label, b.second / sizeof(float), rtol, atol); } -void CompareFloatVector(const std::vector &fp_a, const BinBuffer &b, const std::string &label, float rtol = 1e-3, float atol = 5e-3) { +void CompareFloatVector(const std::vector &fp_a, const BinBuffer &b, const std::string &label, float rtol = 1e-3, + float atol = 5e-3) { // 检查数据尺寸 assert(fp_a.size() * sizeof(float) == b.second); @@ -324,7 +322,8 @@ void CompareFloatVector(const std::vector &fp_a, const BinBuffer &b, cons CompareVectorHelper(data_a, data_b, label, b.second / sizeof(float), rtol, atol); } -void CompareIntVector(const BinBuffer &a, const BinBuffer &b, const std::string &label, float rtol = 1e-3, float atol = 5e-3) { +void CompareIntVector(const BinBuffer &a, const BinBuffer &b, const std::string &label, float rtol = 1e-3, + float atol = 5e-3) { // 检查数据尺寸 assert(a.second == b.second); @@ -334,24 +333,25 @@ void CompareIntVector(const BinBuffer &a, const BinBuffer &b, const std::string CompareVectorHelper(data_a, data_b, label, b.second / sizeof(float), rtol, atol); } -std::vector loop_predict(const OH_AI_ModelHandle model, const BinBuffer &n_layer_cross_k, const BinBuffer &n_layer_cross_v, - const BinBuffer &logits_init, BinBuffer &out_n_layer_self_k_cache, - BinBuffer &out_n_layer_self_v_cache, const BinBuffer &data_embedding, const int loop, const int offset_init) { +std::vector LoopPredict(const OH_AI_ModelHandle model, const BinBuffer &n_layer_cross_k, + const BinBuffer &n_layer_cross_v, const BinBuffer &logits_init, + BinBuffer &out_n_layer_self_k_cache, BinBuffer &out_n_layer_self_v_cache, + const BinBuffer &data_embedding, const int loop, const int offset_init) { // logits BinBuffer logits{nullptr, 51865 * sizeof(float)}; logits.first = malloc(logits.second); if (!logits.first) { LOGE("MS_LITE_LOG: Fail to malloc!\n"); } - void* logits_init_src = static_cast(logits_init.first) + 51865 * 3 * sizeof(float); + void *logits_init_src = static_cast(logits_init.first) + 51865 * 3 * sizeof(float); memcpy(logits.first, logits_init_src, logits.second); - supress_tokens(logits, true); + SupressTokens(logits, true); std::vector output_token; - float* logits_data = static_cast(logits.first); + float *logits_data = static_cast(logits.first); int max_token_id = 0; float max_token = logits_data[0]; - for (int i=0; i< logits.second / sizeof(float); i++) { + for (int i = 0; i < logits.second / sizeof(float); i++) { if (logits_data[i] > max_token) { max_token_id = i; max_token = logits_data[i]; @@ -380,7 +380,8 @@ std::vector loop_predict(const OH_AI_ModelHandle model, const BinBuffer &n_ } BinBuffer tokens{&max_token_id, sizeof(int)}; - void* data_embedding_src = static_cast(data_embedding.first) + offset * WHISPER_N_TEXT_STATE * sizeof(float); + void *data_embedding_src = + static_cast(data_embedding.first) + offset * WHISPER_N_TEXT_STATE * sizeof(float); memcpy(slice.first, data_embedding_src, slice.second); // out_n_layer_self_k_cache // out_n_layer_self_v_cache @@ -388,21 +389,20 @@ std::vector loop_predict(const OH_AI_ModelHandle model, const BinBuffer &n_ // n_layer_cross_v // slice // token - BinBuffer mask_bin(mask.data(),mask.size()*sizeof(float)); - int ret = RunMSLiteModel(model, {tokens, out_n_layer_self_k_cache_new, - out_n_layer_self_v_cache_new, n_layer_cross_k, - n_layer_cross_v, slice, mask_bin}); - + BinBuffer mask_bin(mask.data(), mask.size() * sizeof(float)); + int ret = RunMSLiteModel(model, {tokens, out_n_layer_self_k_cache_new, out_n_layer_self_v_cache_new, + n_layer_cross_k, n_layer_cross_v, slice, mask_bin}); + auto outputs = OH_AI_ModelGetOutputs(model); logits = GetMSOutput(outputs.handle_list[0]); out_n_layer_self_k_cache_new = GetMSOutput(outputs.handle_list[1]); out_n_layer_self_v_cache_new = GetMSOutput(outputs.handle_list[2]); offset++; - supress_tokens(logits, false); - logits_data = static_cast(logits.first); + SupressTokens(logits, false); + logits_data = static_cast(logits.first); max_token = logits_data[0]; - for (int j=0; j< logits.second / sizeof(float); j++) { + for (int j = 0; j < logits.second / sizeof(float); j++) { if (logits_data[j] > max_token) { max_token_id = j; max_token = logits_data[j]; @@ -414,12 +414,12 @@ std::vector loop_predict(const OH_AI_ModelHandle model, const BinBuffer &n_ return output_token; } -std::vector processDataLines(const BinBuffer token_txt) { - void* data_ptr = token_txt.first; +std::vector ProcessDataLines(const BinBuffer token_txt) { + void *data_ptr = token_txt.first; size_t data_size = token_txt.second; std::vector tokens; - - const char* char_data = static_cast(data_ptr); + + const char *char_data = static_cast(data_ptr); std::stringstream ss(std::string(char_data, char_data + data_size)); std::string line; while (std::getline(ss, line)) { @@ -449,11 +449,11 @@ static napi_value RunDemo(napi_env env, napi_callback_info info) size_t dataSize = audioBin.second; uint8_t *dataBuffer = (uint8_t *)audioBin.first; bool ok = audioFile.loadFromMemory(std::vector(dataBuffer, dataBuffer + dataSize)); - if(!ok ){ + if (!ok) { LOGI("MS_LITE_LOG: Fail to read %{public}s!", filePath.c_str()); } std::vector data(audioFile.samples[0]); - resample_audio(data, audioFile.getSampleRate(), WHISPER_SAMPLE_RATE, 1, SRC_SINC_BEST_QUALITY); + ResampleAudio(data, audioFile.getSampleRate(), WHISPER_SAMPLE_RATE, 1, SRC_SINC_BEST_QUALITY); std::vector audio(data); int padding = 480000; @@ -462,20 +462,23 @@ static napi_value RunDemo(napi_env env, napi_callback_info info) int n_hop = 160; int n_mel = 80; int fmin = 0; // Minimum frequency, default value is 0.0 Hz - int fmax = sr / 2.0; // Maximum frequency, default value is half of the sampling rate (sr / 2.0), i.e., the Nyquist frequency. - audio.insert(audio.end(),padding,0.0f); - std::vector> mels_T = librosa::Feature::melspectrogram(audio, sr, n_fft, n_hop, "hann", true, "reflect", 2.f, n_mel, fmin, fmax); - std::cout << "mels: "<> mels_T = + librosa::Feature::melspectrogram(audio, sr, n_fft, n_hop, "hann", true, "reflect", 2.f, n_mel, fmin, fmax); + std::cout << "mels: " << std::endl; - std::vector> mels = transpose_mel(mels_T); - processMelSpectrogram(mels); + std::vector> mels = TransposeMel(mels_T); + ProcessMelSpectrogram(mels); std::vector inputMels(mels.size() * mels[0].size(), 0); for (int i = 0; i < mels.size(); i++) { std::copy(mels[i].begin(), mels[i].end(), inputMels.begin() + i * mels[0].size()); } - BinBuffer inputMelsBin(inputMels.data(), inputMels.size() * sizeof(float) ); + BinBuffer inputMelsBin(inputMels.data(), inputMels.size() * sizeof(float)); // --- encoder --- auto encoderBin = ReadBinFile(resourcesManager, "tiny-encoder.ms"); @@ -497,12 +500,9 @@ static napi_value RunDemo(napi_env env, napi_callback_info info) auto n_layer_cross_v = GetMSOutput(outputs.handle_list[1]); // --- decoder_main --- - std::vector SOT_SEQUENCE = { - WHISPER_SOT, - WHISPER_SOT + 1 + 1, // wait to modify - WHISPER_TRANSCRIBE, - WHISPER_NO_TIMESTAMPS - }; + std::vector SOT_SEQUENCE = {WHISPER_SOT, + WHISPER_SOT + 1 + 1, // wait to modify + WHISPER_TRANSCRIBE, WHISPER_NO_TIMESTAMPS}; BinBuffer sotSequence(SOT_SEQUENCE.data(), SOT_SEQUENCE.size() * sizeof(int)); const std::string decoder_main_path = "tiny-decoder-main.ms"; @@ -511,7 +511,7 @@ static napi_value RunDemo(napi_env env, napi_callback_info info) return error_ret; } auto decoder_main = CreateMSLiteModel(decoderMainBin); - int ret2 = RunMSLiteModel(decoder_main, {sotSequence,n_layer_cross_k, n_layer_cross_v}); + int ret2 = RunMSLiteModel(decoder_main, {sotSequence, n_layer_cross_k, n_layer_cross_v}); if (ret2 != OH_AI_STATUS_SUCCESS) { OH_AI_ModelDestroy(&decoder_main); @@ -520,7 +520,7 @@ static napi_value RunDemo(napi_env env, napi_callback_info info) LOGI("run decoder_main ok!\n"); auto decoderMainOut = OH_AI_ModelGetOutputs(decoder_main); - auto logitsBin= GetMSOutput(decoderMainOut.handle_list[0]); + auto logitsBin = GetMSOutput(decoderMainOut.handle_list[0]); auto out_n_layer_self_k_cache_Bin = GetMSOutput(decoderMainOut.handle_list[1]); auto out_n_layer_self_v_cache_Bin = GetMSOutput(decoderMainOut.handle_list[2]); @@ -537,17 +537,16 @@ static napi_value RunDemo(napi_env env, napi_callback_info info) int loop_times = WHISPER_N_TEXT_CTX - SOT_SEQUENCE.size(); int offset_init = SOT_SEQUENCE.size(); - auto output_tokens = loop_predict(decoder_loop, n_layer_cross_k, - n_layer_cross_v, logitsBin, out_n_layer_self_k_cache_Bin, - out_n_layer_self_v_cache_Bin, data_embedding, loop_times, offset_init); + auto output_tokens = + LoopPredict(decoder_loop, n_layer_cross_k, n_layer_cross_v, logitsBin, out_n_layer_self_k_cache_Bin, + out_n_layer_self_v_cache_Bin, data_embedding, loop_times, offset_init); - std::vector token_tables = processDataLines(ReadTokens(resourcesManager, "tiny-tokens.txt")); + std::vector token_tables = ProcessDataLines(ReadTokens(resourcesManager, "tiny-tokens.txt")); std::string result; for (const auto i : output_tokens) { char str[1024]; - base64_decode((const uint8*)token_tables[i].c_str(), (uint32)token_tables[i].size(), str); + base64_decode((const uint8 *)token_tables[i].c_str(), (uint32)token_tables[i].size(), str); result += str; - } LOGI("MS_LITE_LOG: result is -> %{public}s", result.c_str()); diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/utils.cc b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/utils.cc index a2e462193eb6974163b43b3111b9ae88ead39c15..3768f72033626b102da86e70fb95abf659203956 100644 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/utils.cc +++ b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/utils.cc @@ -17,7 +17,7 @@ #include #include -std::vector> transpose_mel(const std::vector>& mels) { +std::vector> TransposeMel(const std::vector>& mels) { if (mels.empty()) return {}; size_t rows = mels.size(); @@ -33,7 +33,7 @@ std::vector> transpose_mel(const std::vector resample_audio( +std::vector ResampleAudio( const std::vector& input_data, int input_sample_rate, int output_sample_rate, @@ -65,7 +65,7 @@ std::vector resample_audio( return output_data; } -void processMelSpectrogram(std::vector>& mels) { +void ProcessMelSpectrogram(std::vector>& mels) { // log_spec = np.log10(np.maximum(mel, 1e-10)) for (auto& row : mels) { for (auto& val : row) { diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/utils.h b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/utils.h index 25d8d803817d155258180ba6a2790ead88ea331d..eda54d17225958fcb207f4cdbfe9b02d5d273e88 100644 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/utils.h +++ b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/utils.h @@ -18,14 +18,14 @@ #include "AudioFile.h" #include "samplerate.h" -std::vector> transpose_mel(const std::vector>& mels); -std::vector resample_audio( +std::vector> TransposeMel(const std::vector>& mels); +std::vector ResampleAudio( const std::vector& input_data, int input_sample_rate, int output_sample_rate, int channels, int converter ); -void processMelSpectrogram(std::vector>& mels); +void ProcessMelSpectrogram(std::vector>& mels); diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/wavreader.c b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/wavreader.c deleted file mode 100644 index b9dae130d14bcfd8d5fd6781df711babfc23fd2c..0000000000000000000000000000000000000000 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/wavreader.c +++ /dev/null @@ -1,204 +0,0 @@ -/* ------------------------------------------------------------------ -* Copyright (C) 2025 Martin Storsjo -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -* express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* ------------------------------------------------------------------- -*/ - -#include "wavreader.h" -#include -#include -#include -#include - -#define TAG(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) - -struct wav_reader { - FILE *wav; - long data_pos; - uint32_t data_length; - uint32_t data_left; - - int format; - int sample_rate; - int bits_per_sample; - int channels; - int byte_rate; - int block_align; - - int streamed; -}; - -static uint32_t read_tag(struct wav_reader* wr) { - uint32_t tag = 0; - tag = (tag << 8) | fgetc(wr->wav); - tag = (tag << 8) | fgetc(wr->wav); - tag = (tag << 8) | fgetc(wr->wav); - tag = (tag << 8) | fgetc(wr->wav); - return tag; -} - -static uint32_t read_int32(struct wav_reader* wr) { - uint32_t value = 0; - value |= fgetc(wr->wav) << 0; - value |= fgetc(wr->wav) << 8; - value |= fgetc(wr->wav) << 16; - value |= fgetc(wr->wav) << 24; - return value; -} - -static uint16_t read_int16(struct wav_reader* wr) { - uint16_t value = 0; - value |= fgetc(wr->wav) << 0; - value |= fgetc(wr->wav) << 8; - return value; -} - -static void skip(FILE *f, int n) { - int i; - for (i = 0; i < n; i++) - fgetc(f); -} - -void* wav_read_open(const char *filename) { - struct wav_reader* wr = (struct wav_reader*) malloc(sizeof(*wr)); - memset(wr, 0, sizeof(*wr)); - - if (!strcmp(filename, "-")) - wr->wav = stdin; - else - wr->wav = fopen(filename, "rb"); - if (wr->wav == NULL) { - free(wr); - return NULL; - } - - while (1) { - uint32_t tag, tag2, length; - tag = read_tag(wr); - if (feof(wr->wav)) - break; - length = read_int32(wr); - if (!length || length >= 0x7fff0000) { - wr->streamed = 1; - length = ~0; - } - if (tag != TAG('R', 'I', 'F', 'F') || length < 4) { - fseek(wr->wav, length, SEEK_CUR); - continue; - } - tag2 = read_tag(wr); - length -= 4; - if (tag2 != TAG('W', 'A', 'V', 'E')) { - fseek(wr->wav, length, SEEK_CUR); - continue; - } - // RIFF chunk found, iterate through it - while (length >= 8) { - uint32_t subtag, sublength; - subtag = read_tag(wr); - if (feof(wr->wav)) - break; - sublength = read_int32(wr); - length -= 8; - if (length < sublength) - break; - if (subtag == TAG('f', 'm', 't', ' ')) { - if (sublength < 16) { - // Insufficient data for 'fmt ' - break; - } - wr->format = read_int16(wr); - wr->channels = read_int16(wr); - wr->sample_rate = read_int32(wr); - wr->byte_rate = read_int32(wr); - wr->block_align = read_int16(wr); - wr->bits_per_sample = read_int16(wr); - if (wr->format == 0xfffe) { - if (sublength < 28) { - // Insufficient data for waveformatex - break; - } - skip(wr->wav, 8); - wr->format = read_int32(wr); - skip(wr->wav, sublength - 28); - } - else { - skip(wr->wav, sublength - 16); - } - } - else if (subtag == TAG('d', 'a', 't', 'a')) { - wr->data_pos = ftell(wr->wav); - wr->data_length = sublength; - wr->data_left = wr->data_length; - if (!wr->data_length || wr->streamed) { - wr->streamed = 1; - return wr; - } - fseek(wr->wav, sublength, SEEK_CUR); - } - else { - skip(wr->wav, sublength); - } - length -= sublength; - } - if (length > 0) { - // Bad chunk? - fseek(wr->wav, length, SEEK_CUR); - } - } - fseek(wr->wav, wr->data_pos, SEEK_SET); - return wr; -} - -void wav_read_close(void* obj) { - struct wav_reader* wr = (struct wav_reader*) obj; - if (wr->wav != stdin) - fclose(wr->wav); - free(wr); -} - -int wav_get_header(void* obj, int* format, int* channels, int* sample_rate, int* bits_per_sample, unsigned int* data_length) { - struct wav_reader* wr = (struct wav_reader*) obj; - if (format) - *format = wr->format; - if (channels) - *channels = wr->channels; - if (sample_rate) - *sample_rate = wr->sample_rate; - if (bits_per_sample) - *bits_per_sample = wr->bits_per_sample; - if (data_length) - *data_length = wr->data_length; - return wr->format && wr->sample_rate; -} - -int wav_read_data(void* obj, unsigned char* data, unsigned int length) { - struct wav_reader* wr = (struct wav_reader*) obj; - int n; - if (wr->wav == NULL) - return -1; - if (length > wr->data_left && !wr->streamed) { - int loop = 1; - if (loop) { - fseek(wr->wav, wr->data_pos, SEEK_SET); - wr->data_left = wr->data_length; - } - length = wr->data_left; - } - n = fread(data, 1, length, wr->wav); - wr->data_left -= length; - return n; -} - diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/wavreader.h b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/wavreader.h deleted file mode 100644 index 5f0a65986858772faae1aeae8623652283559051..0000000000000000000000000000000000000000 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/src/wavreader.h +++ /dev/null @@ -1,37 +0,0 @@ -/* ------------------------------------------------------------------ -* Copyright (C) 2025 Martin Storsjo -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -* express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* ------------------------------------------------------------------- -*/ - -#ifndef WAVREADER_H_ -#define WAVREADER_H_ - - -#ifdef __cplusplus -extern "C" { -#endif - -void* wav_read_open(const char *filename); -void wav_read_close(void* obj); - -int wav_get_header(void* obj, int* format, int* channels, int* sample_rate, int* bits_per_sample, unsigned int* data_length); -int wav_read_data(void* obj, unsigned char* data, unsigned int length); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/librosa/eigen3/Eigen/src/misc/lapacke_mangling.h b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/librosa/eigen3/Eigen/src/misc/lapacke_mangling.h index c4ad16ee282606d1c23dedb754200f2639a649c3..6211fd144d361701309e3d39ace6b5b9e14d188e 100644 Binary files a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/librosa/eigen3/Eigen/src/misc/lapacke_mangling.h and b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/librosa/eigen3/Eigen/src/misc/lapacke_mangling.h differ diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/docs/win32.md b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/docs/win32.md index 9ff2b861d827527a8ac3d042dad588df58611071..6f93347d9edd1cca113c0e0f12efa35684b28988 100644 Binary files a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/docs/win32.md and b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/docs/win32.md differ diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/gitattribute_backup b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/gitattribute_backup deleted file mode 100644 index 9e57d28ecd03acb9d44535f7192fba67a1bffc80..0000000000000000000000000000000000000000 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/gitattribute_backup +++ /dev/null @@ -1,15 +0,0 @@ -# enforce correct file-extensions -*.ac text eol=lf -*.am text eol=lf -*.m4 text eol=lf -*.pc text eol=lf -*.spec text eol=lf -*.sh text eol=lf - -*.bat text eol=crlf -*.def text eol=crlf -*.msvc text eol=crlf - -# repository configuration excluded from the archive -.git* export-ignore -.travis.yml export-ignore diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/gitignore_backup b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/gitignore_backup deleted file mode 100644 index c7e5e81a60e098e89847ea10d023fef5e7f46f34..0000000000000000000000000000000000000000 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/gitignore_backup +++ /dev/null @@ -1,73 +0,0 @@ -# *.cm* # This line is broken it matches all files. -*.dat -*.gz -*.la -*.lo -*.mat -*.o -*.s -*.wav -.depend -.deps -.libs -build-aux/ -Hack -Images -m4/libtool.m4 -m4/ltoptions.m4 -m4/ltsugar.m4 -m4/ltversion.m4 -m4/lt~obsolete.m4 -Makefile -Makefile.in -Octave/*.h -Octave/make_quad_filter -Octave/make_src_filter -Win32/Makefile.mingw -aclocal.m4 -autom4te.cache -libsamplerate-*.tar.xz -compile -config.* -!/config.h.in -configure -depcomp -doc/ChangeLog -doc/NEWS -examples/timewarp-file -examples/varispeed-play -install-sh -libsamplerate.spec -libtool -localinstall.py -ltmain.sh -missing -samplerate.pc -src/Version_script -src/config.h -src/config.h.in -stamp-h1 -.dirstamp -tests/*_test -tests/benchmark -tests/src-evaluate -*.log -*.trs - -CMakeCache.txt -CMakeFiles -CMakeScripts -Testing -Makefile -cmake_install.cmake -install_manifest.txt -compile_commands.json -CTestTestfile.cmake -CMakeSettings.json - -/*[Bb]uild*/ - -/.vscode/ -/.vs/ -/out/ - diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/high_qual_coeffs.h b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/high_qual_coeffs.h index e06a3f46333b8a0782201874ad34b06adcd8aea1..dc3b8840d425dc610c4ff734835bd073b6fb895b 100644 Binary files a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/high_qual_coeffs.h and b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/high_qual_coeffs.h differ diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/mid_qual_coeffs.h b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/mid_qual_coeffs.h index 161001c64048858d0d26cc72a1ba56637d41a53b..6fc4d377e55813c39f043640b8b330c3e219534f 100644 Binary files a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/mid_qual_coeffs.h and b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/mid_qual_coeffs.h differ diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/samplerate.c b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/samplerate.c index f19f40848f9b703ee8fef826e1c9f69f91add9b0..6a51f2794517d36d1fafe3152a2fb1fbc005535e 100644 Binary files a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/samplerate.c and b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/third_party/libsamplerate/src/samplerate.c differ diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/types/libentry/oh-package.json5 b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/types/libentry/oh-package.json5 index ea410725a8826704d061021d98cf02aa76cd8016..301287bcbacbb7aeaa04002127b56cc61f56cc7d 100644 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/types/libentry/oh-package.json5 +++ b/code/AI/MindSporeLiteCDemoASR/entry/src/main/cpp/types/libentry/oh-package.json5 @@ -2,5 +2,5 @@ "name": "libentry.so", "types": "./Index.d.ts", "version": "1.0.0", - "description": "Please describe the basic information." + "description": "MindSpore Lite inference module." } \ No newline at end of file diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/ets/pages/Index.ets b/code/AI/MindSporeLiteCDemoASR/entry/src/main/ets/pages/Index.ets index 8076f4351b827e6589627a7d65cd57b91124c67a..45d9b923dbfa67cd447943d3fe546dc4d65e9304 100644 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/ets/pages/Index.ets +++ b/code/AI/MindSporeLiteCDemoASR/entry/src/main/ets/pages/Index.ets @@ -24,12 +24,7 @@ const TAG = 'MindSporeLite'; struct Index { @State message: string = 'MSLite Whisper Demo'; @State wavName: string = 'zh.wav'; - @State uris: Array = []; @State content: string = ''; - @State max: number = 0; - @State maxIndex: number = 0; - @State maxArray: Array = []; - @State maxIndexArray: Array = []; build() { Row() { @@ -37,11 +32,8 @@ struct Index { Text(this.message) .fontSize(30) .fontWeight(FontWeight.Bold); - //图片显示 - // Image(this.uris[0]).width(320).height(40).margin(15) - Button() { - Text('播放示例音频') + Text($r('app.string.play')) .fontSize(20) .fontWeight(FontWeight.Medium) } @@ -53,19 +45,13 @@ struct Index { .width('40%') .height('5%') .onClick(async () =>{ - // 通过实例调用类中的函数 - console.warn('MS_LITE_LOG: begin to play wav.'); + console.info('MS_LITE_LOG: begin to play wav.'); let myClass = new AVPlayerDemo(); myClass.avPlayerFdSrcDemo(); - - // 创建avPlayer实例对象。 - // let avPlayer: media.AVPlayer = await media.createAVPlayer(); - }) - Button() { - Text('识别示例音频') + Text($r('app.string.asr')) .fontSize(20) .fontWeight(FontWeight.Medium) } @@ -81,13 +67,9 @@ struct Index { // 调用c++的runDemo console.info('MS_LITE_LOG: *** Start MSLite Demo ***'); - // let output: Array = msliteNapi.runDemo(Array.from(float32View), resMgr); let output = msliteNapi.runDemo(resMgr); - // let output = '我认为跑步最重要的就是给我带来了身体健康。'; - // let output = '我認為跑步最重要的就是給我帶來了身體健康。'; - console.warn('MS_LITE_WARN: output length = ', output.length, ';value = ', output.slice(0, 20)); + console.info('MS_LITE_LOG: output length = ', output.length, ';value = ', output.slice(0, 20)); this.content = output; - console.info('MS_LITE_LOG: *** Finished MSLite Demo ***'); }) @@ -99,7 +81,6 @@ struct Index { language: TransverterLanguage.ZH_CN }) + '\n').focusable(true).fontSize(20).height('20%') } - }.width('100%') } .height('100%') diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/ets/pages/player.ets b/code/AI/MindSporeLiteCDemoASR/entry/src/main/ets/pages/player.ets index 68555638bfcb625347095a299e1283829f397e27..522021a01e250ee47b0e6d7c3ed314b09361c2d5 100644 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/ets/pages/player.ets +++ b/code/AI/MindSporeLiteCDemoASR/entry/src/main/ets/pages/player.ets @@ -14,24 +14,20 @@ */ import { media } from '@kit.MediaKit'; -import { fileIo as fs } from '@kit.CoreFileKit'; import { common } from '@kit.AbilityKit'; import { BusinessError } from '@kit.BasicServicesKit'; import { audio } from '@kit.AudioKit'; +import { UIContext } from '@kit.ArkUI'; -// export default async function modelPredict( export default class AVPlayerDemo { - private count: number = 10; - private isSeek: boolean = true; // 用于区分模式是否支持seek操作。 - private fileSize: number = -1; - private fd: number = 0; + private isSeek: boolean = false; // 用于区分模式是否支持seek操作。 // 注册avplayer回调函数。 setAVPlayerCallback(avPlayer: media.AVPlayer) { // seek操作结果回调函数。 avPlayer.on('seekDone', (seekDoneTime: number) => { console.info(`MS_LITE_LOG: AVPlayer seek succeeded, seek time is ${seekDoneTime}`); }); - // error回调监听函数,当avPlayer在操作过程中出现错误时调用 reset接口触发重置流程。 + // error回调监听函数,当avPlayer在操作过程中出现错误时调用reset接口触发重置流程。 avPlayer.on('error', (err: BusinessError) => { console.error(`MS_LITE_LOG: Invoke avPlayer failed, code is ${err.code}, message is ${err.message}`); avPlayer.reset(); // 调用reset重置资源,触发idle状态。 @@ -46,7 +42,7 @@ export default class AVPlayerDemo { case 'initialized': // avplayer 设置播放源后触发该状态上报。 console.info('MS_LITE_LOG: AVPlayer state initialized called.'); avPlayer.audioRendererInfo = { - usage: audio.StreamUsage.STREAM_USAGE_MUSIC, // 音频流使用类型:音乐。根据业务场景配置,参考StreamUsage。 + usage: audio.StreamUsage.STREAM_USAGE_MUSIC, // 音频流使用类型:音乐。根据业务场景配置。 rendererFlags: 0 // 音频渲染器标志。 }; avPlayer.prepare(); @@ -57,21 +53,13 @@ export default class AVPlayerDemo { break; case 'playing': // play成功调用后触发该状态机上报。 console.info('MS_LITE_LOG: AVPlayer state playing called.'); - if (this.count !== 0) { - if (this.isSeek) { - console.info('MS_LITE_LOG: AVPlayer start to seek.'); - avPlayer.seek(0); //seek到音频末尾。 - } else { - // 当播放模式不支持seek操作时继续播放到结尾。 - console.info('MS_LITE_LOG: AVPlayer wait to play end.'); - } + if (this.isSeek) { + console.info('MS_LITE_LOG: AVPlayer start to seek.'); + avPlayer.seek(0); // seek到音频末尾。 } else { - setTimeout(() => { - console.info('MS_LITE_LOG: AVPlayer playing wait to pause'); - avPlayer.pause(); // 播放3s后调用暂停接口暂停播放。 - }, 3000); + // 当播放模式不支持seek操作时继续播放到结尾。 + console.info('MS_LITE_LOG: AVPlayer wait to play end.'); } - this.count++; break; case 'paused': // pause成功调用后触发该状态机上报。 console.info('MS_LITE_LOG: AVPlayer state paused called.'); @@ -82,7 +70,7 @@ export default class AVPlayerDemo { break; case 'completed': // 播放结束后触发该状态机上报。 console.info('MS_LITE_LOG: AVPlayer state completed called.'); - avPlayer.stop(); //调用播放结束接口。 + avPlayer.stop(); // 调用播放结束接口。 break; case 'stopped': // stop接口成功调用后触发该状态机上报。 console.info('MS_LITE_LOG: AVPlayer state stopped called.'); @@ -98,29 +86,20 @@ export default class AVPlayerDemo { }); } - // 以下demo为使用资源管理接口获取打包在HAP内的媒体资源文件并通过fdSrc属性进行播放示例。 + // 使用资源管理接口获取音频文件并通过fdSrc属性进行播放。 async avPlayerFdSrcDemo() { - console.warn('MS_LITE_LOG: begin to run avPlayerFdSrcDemo.'); // 创建avPlayer实例对象。 let avPlayer: media.AVPlayer = await media.createAVPlayer(); // 创建状态机变化回调函数。 this.setAVPlayerCallback(avPlayer); - console.warn('MS_LITE_LOG: begin to run setAVPlayerCallback.'); // 通过UIAbilityContext的resourceManager成员的getRawFd接口获取媒体资源播放地址。 // 返回类型为{fd,offset,length},fd为HAP包fd地址,offset为媒体资源偏移量,length为播放长度。 - let context = getContext(this) as common.UIAbilityContext; + let context = new UIContext().getHostContext() as common.UIAbilityContext; let fileDescriptor = await context.resourceManager.getRawFd('zh.wav'); - console.warn('MS_LITE_LOG: begin to run fileDescriptor.fd: ' + fileDescriptor.fd); - console.warn('MS_LITE_LOG: begin to run fileDescriptor.offset: ' + fileDescriptor.offset); - console.warn('MS_LITE_LOG: begin to run fileDescriptor.length: ' + fileDescriptor.length); - let avFileDescriptor: media.AVFileDescriptor = { fd: fileDescriptor.fd, offset: fileDescriptor.offset, length: fileDescriptor.length }; this.isSeek = true; // 支持seek操作。 // 为fdSrc赋值触发initialized状态机上报。 avPlayer.fdSrc = avFileDescriptor; - console.warn('MS_LITE_LOG: run end.'); } - } - diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/module.json5 b/code/AI/MindSporeLiteCDemoASR/entry/src/main/module.json5 index 14d3cd0bf9c6c9081751ec34a56516d0b501a47d..abfc5127c52dc8b23a8b2df94ef6a00c25873d6d 100644 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/module.json5 +++ b/code/AI/MindSporeLiteCDemoASR/entry/src/main/module.json5 @@ -60,7 +60,7 @@ "name": "ohos.extension.backup", "resource": "$profile:backup_config" } - ], + ] } ] } diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/resources/base/element/string.json b/code/AI/MindSporeLiteCDemoASR/entry/src/main/resources/base/element/string.json index 74a646790bfd03311329727ab77f74e55752427f..9f22b4fdd310dcf589331d017795141b7114d107 100644 --- a/code/AI/MindSporeLiteCDemoASR/entry/src/main/resources/base/element/string.json +++ b/code/AI/MindSporeLiteCDemoASR/entry/src/main/resources/base/element/string.json @@ -11,6 +11,14 @@ { "name": "EntryAbility_label", "value": "mindsporeliteASR" + }, + { + "name": "play", + "value": "ʾƵ" + }, + { + "name": "asr", + "value": "ʶʾƵ" } ] } diff --git a/code/AI/MindSporeLiteCDemoASR/entry/src/main/resources/rawfile/tiny-tokens.txt b/code/AI/MindSporeLiteCDemoASR/entry/src/main/resources/rawfile/tiny-tokens.txt index 41f2927d7e8d532c82e3a1bd0322cf5691e040d2..a4edf0b719c10d28cd83f8c294449f99ff5d5dc0 100644 Binary files a/code/AI/MindSporeLiteCDemoASR/entry/src/main/resources/rawfile/tiny-tokens.txt and b/code/AI/MindSporeLiteCDemoASR/entry/src/main/resources/rawfile/tiny-tokens.txt differ