diff --git a/vibration_convert/core/algorithm/onset/include/onset.h b/vibration_convert/core/algorithm/onset/include/onset.h new file mode 100644 index 0000000000000000000000000000000000000000..e4ad6ad1213836997b4db7cf6ec9e61139a3b650 --- /dev/null +++ b/vibration_convert/core/algorithm/onset/include/onset.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ONSET_H +#define ONSET_H + +#include +#include +#include +#include +#include + +#include "peak_finder.h" + +namespace OHOS { +namespace Sensors { +struct OnsetInfo { + /** If True, detected onset events are backtracked to the nearest. preceding minimum of energy. + * This is primarily useful when using onsets as slice points for segmentation. + */ + bool backTrackFlag { false }; + /** Compute a spectral flux onset strength envelope. */ + std::vector envelopes; + std::vector idxs; + std::vector times; + void Clear() { + backTrackFlag = false; + envelopes.clear(); + idxs.clear(); + times.clear(); + } +}; + +class Onset { +public: + Onset() = default; + ~Onset() = default; + + /** + * @brief Locate note onset events by picking peaks in an onset strength envelope.. + * + * @param data audio time-series. + * @param nFft length of the windowed signal after padding with zeros. The number of rows in the STFT matrix D + * is (1 + n_fft/2). The default value, n_fft=2048 samples. + * @param hopLength If unspecified, defaults to win_length / 4. + * @param onsets + * + * @return Returns 0 if the operation is successful; returns a negative value otherwise. + */ + int32_t CheckOnset(const std::vector &data, int32_t nFft, int32_t hopLength, OnsetInfo &onsetInfo); + +private: + int32_t Sfft(const std::vector &data, int32_t hopLength, int32_t &frmCount, + std::vector &magnitudes, int32_t &numBins); + int32_t GetMelBias(int32_t numBins, int32_t nFft, size_t &frmCount, std::vector &melBias); + std::vector MatrixDot(size_t matrixAcols, const std::vector &matrixA, + size_t matrixBcols, const std::vector &matrixB); + std::vector MatrixDiff(size_t valueCols, const std::vector &values); + std::vector PowerDB(const std::vector &values); + std::optional Median(const std::vector &values); + std::optional Mean(const std::vector &values); + +private: + bool htkFlag_ { false }; + OnsetInfo onsetInfo_; +}; +} // namespace Sensors +} // namespace OHOS +#endif // ONSET_H \ No newline at end of file diff --git a/vibration_convert/core/algorithm/onset/src/onset.cpp b/vibration_convert/core/algorithm/onset/src/onset.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f0d9a3a26b0f5324aefbbe6448fc41cb1f38665 --- /dev/null +++ b/vibration_convert/core/algorithm/onset/src/onset.cpp @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2023 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "onset.h" + +#include +#include +#include + +#include "conversion_fft.h" +#include "conversion_mfcc.h" +#include "sensor_log.h" +#include "sensors_errors.h" +#include "utils.h" + +namespace OHOS { +namespace Sensors { +namespace { +constexpr OHOS::HiviewDFX::HiLogLabel LABEL = { LOG_CORE, SENSOR_LOG_DOMAIN, "Onset" }; +// Effective threshold of note envelope +constexpr double C_ONSET_ENV_VALIDE_THRESHOLD = 0.0001; +constexpr double POWER_DB_COEF = 10.0; +constexpr size_t N_MELS_OR_FILTERS = 128; +/** 12 + 1 semitones*/ +constexpr uint32_t SEMITONE_NUM_COEFFS = 13; +constexpr double ONSET_PEAK_THRESHOLD_RATIO = 0.4; +constexpr double MIN_FREQ = 0.0; +constexpr double MAX_FREQ = SAMPLE_RATE / 2.0; +} // namespace + +std::vector Onset::MatrixDot(size_t matrixAcols, const std::vector &matrixA, + size_t matrixBcols, const std::vector &matrixB) +{ + if ((matrixAcols == 0) || (matrixBcols == 0)) { + SEN_HILOGE("Invalid parameter"); + return {}; + } + if ((matrixA.empty()) || (matrixB.empty())) { + SEN_HILOGE("matrixA or matrixB is empty"); + return {}; + } + size_t aRows = matrixA.size() / matrixAcols; + size_t bRows = matrixB.size() / matrixBcols; + std::vector result(aRows * matrixBcols); + for (size_t i = 0; i < aRows; ++i) { + for (size_t j = 0; j < matrixBcols; ++j) { + // bRows must equal to matrixAcols. + int32_t idx = j * aRows + i; + double sum = 0.0; + for (size_t k = 0; k < bRows; ++k) { + // Multiply each column of the matrixB by each row of the matrixA, and then sum it up. + sum += matrixA[k * aRows + i] * matrixB[j * bRows + k]; + } + result[idx] = sum; + } + } + return result; +} + +std::vector Onset::MatrixDiff(size_t valueCols, const std::vector &values) +{ + if ((valueCols == 0) || (values.empty())) { + SEN_HILOGE("Invalid parameter"); + return {}; + } + size_t valueRows = values.size() / valueCols; + std::vector result; + for (size_t i = 0; i < (valueCols - 1); ++i) { + for (size_t j = 0; j < valueRows; ++j) { + result.push_back(values[(i + 1) * valueRows + j] - values[i * valueRows + j]); + } + } + return result; +} + +std::optional Onset::Median(const std::vector &values) +{ + if (values.empty()) { + SEN_HILOGE("values is empty"); + return std::nullopt; + } + std::vector result = values; + sort(result.begin(), result.end()); + double valueMedian = result[result.size() / 2]; + return valueMedian; +} + +std::optional Onset::Mean(const std::vector &values) +{ + if (values.empty()) { + SEN_HILOGE("values is empty"); + return std::nullopt; + } + double sumValue = accumulate(values.begin(), values.end(), 0); + return sumValue / values.size(); +} + +// Need to subtract an offset value. +std::vector Onset::PowerDB(const std::vector &values) +{ + std::vector logSpectrum; + for (size_t i = 0; i < values.size(); ++i) { + logSpectrum.push_back(POWER_DB_COEF * log(std::max(EPS_MIN, values[i]))); + } + return logSpectrum; +} + +int32_t Onset::Sfft(const std::vector &data, int32_t hopLength, int32_t &frmCount, + std::vector &magnitudes, int32_t &numBins) +{ + if (data.empty()) { + SEN_HILOGE("data is empty"); + return Sensors::ERROR; + } + ConversionFFT convFft; + FFTInputPara fftPara; + fftPara.sampleRate = SAMPLE_RATE; + fftPara.fftSize = NFFT; + fftPara.hopSize = hopLength; + fftPara.windowSize = NFFT; + int32_t ret = convFft.Init(fftPara); + if (ret != Sensors::SUCCESS) { + SEN_HILOGE("Init failed"); + return Sensors::ERROR; + } + numBins = convFft.GetNumBins(); + ret = convFft.Process(data, frmCount, magnitudes); + if (ret != Sensors::SUCCESS) { + SEN_HILOGE("Process failed"); + return Sensors::ERROR; + } + return Sensors::SUCCESS; +} + +int32_t Onset::GetMelBias(int32_t numBins, int32_t nFft, size_t &frmCount, std::vector &melBias) +{ + MfccInputPara para; + para.sampleRate = SAMPLE_RATE; + para.nMels = static_cast(N_MELS_OR_FILTERS); + para.minFreq = MIN_FREQ; + para.maxFreq = MAX_FREQ; + // Use slaneyBias if htkFlag_ is true. + if (htkFlag_) { + ConversionMfcc mfcc; + uint32_t numCoeffs = SEMITONE_NUM_COEFFS; + if (mfcc.Init(numBins, numCoeffs, para) != Sensors::SUCCESS) { + SEN_HILOGE("Init failed"); + return Sensors::ERROR; + } + melBias = mfcc.GetMelFilterBank(); + frmCount = melBias.size() / N_MELS_OR_FILTERS; + return Sensors::SUCCESS; + } else { + ConversionMfcc mfcc; + if (mfcc.FiltersMel(nFft, para, frmCount, melBias) != Sensors::SUCCESS) { + SEN_HILOGE("FiltersMel failed"); + return Sensors::ERROR; + } + return Sensors::SUCCESS; + } +} + +int32_t Onset::CheckOnset(const std::vector &data, int32_t nFft, int32_t hopLength, OnsetInfo &onsetInfo) +{ + CALL_LOG_ENTER; + if ((data.size() < ONSET_HOP_LEN) || (nFft == 0) || (hopLength == 0)) { + SEN_HILOGE("Invalid parameter, data:%{public}zu, nFft:%{public}d, hopLength:%{public}d", + data.size(), nFft, hopLength); + return Sensors::PARAMETER_ERROR;; + } + std::vector magnitudes; + int32_t sfftFrmCount; + int32_t numBins; + if (Sfft(data, hopLength, sfftFrmCount, magnitudes, numBins) != Sensors::SUCCESS) { + SEN_HILOGE("Sfft failed"); + return Sensors::ERROR; + } + if (magnitudes.empty()) { + SEN_HILOGE("magnitudes is empty"); + return Sensors::ERROR; + } + std::vector frmMagnitudes; + for (size_t i = 0; i < magnitudes.size(); ++i) { + frmMagnitudes.push_back(static_cast(magnitudes[i])); + frmMagnitudes[i] = pow(frmMagnitudes[i], 2); + } + size_t biasFrmCount = 0; + std::vector melBias; + if (GetMelBias(numBins, nFft, biasFrmCount, melBias) != Sensors::SUCCESS) { + SEN_HILOGE("GetMelBias failed"); + return Sensors::ERROR; + } + std::vector onsetEnvelope = MatrixDot(biasFrmCount, melBias, sfftFrmCount, frmMagnitudes); + if (onsetEnvelope.empty()) { + SEN_HILOGE("onsetEnvelope is empty"); + return Sensors::ERROR; + } + std::vector dbEnvelope = PowerDB(onsetEnvelope); + if (dbEnvelope.empty()) { + SEN_HILOGE("dbEnvelope is empty"); + return Sensors::ERROR; + } + std::vector dbEnvelopeDiff = MatrixDiff(sfftFrmCount, dbEnvelope); + if (dbEnvelopeDiff.empty()) { + SEN_HILOGE("dbEnvelopeDiff is empty"); + return Sensors::ERROR; + } + for (size_t i = 0; i < dbEnvelopeDiff.size(); ++i) { + dbEnvelopeDiff[i] = (IsGreatNotEqual(dbEnvelopeDiff[i], 0.0)) ? dbEnvelopeDiff[i] : 0.0; + } + if (sfftFrmCount <= 1) { + SEN_HILOGE("sfftFrmCount is less than or equal to 1"); + return Sensors::ERROR; + } + size_t cols = sfftFrmCount - 1; + size_t rows = dbEnvelopeDiff.size() / cols; + onsetInfo_.Clear(); + std::vector oneFrmValues; + for (size_t i = 0; i < cols; ++i) { + oneFrmValues.assign(dbEnvelopeDiff.begin() + i * rows, dbEnvelopeDiff.begin() + ((i + 1) * rows)); + std::optional median = Median(oneFrmValues); + if (!median) { + SEN_HILOGE("Median failed"); + return Sensors::ERROR; + } + onsetInfo_.envelopes.push_back(median.value()); + } + double envelopeMax = *max_element(onsetInfo_.envelopes.begin(), onsetInfo_.envelopes.end()); + if (envelopeMax < C_ONSET_ENV_VALIDE_THRESHOLD) { + onsetInfo_.envelopes.clear(); + for (size_t i = 0; i < cols; ++i) { + oneFrmValues.assign(dbEnvelopeDiff.begin() + i * rows, dbEnvelopeDiff.begin() + ((i + 1) * rows)); + std::optional mean = Mean(oneFrmValues); + if (!mean) { + SEN_HILOGE("Mean failed"); + return Sensors::ERROR; + } + onsetInfo_.envelopes.push_back(mean.value()); + } + } + double tPerFrame = static_cast(hopLength) / SAMPLE_RATE; + PeakFinder peakFinder; + onsetInfo_.idxs = peakFinder.DetectPeak(onsetInfo_.envelopes, ONSET_PEAK_THRESHOLD_RATIO); + for (size_t i = 0; i < onsetInfo_.idxs.size(); ++i) { + onsetInfo_.times.push_back(onsetInfo_.idxs[i] * tPerFrame); + } + onsetInfo = onsetInfo_; + return Sensors::SUCCESS; +} +} // namespace Sensors +} // namespace OHOS \ No newline at end of file diff --git a/vibration_convert/core/native/src/vibration_convert_core.cpp b/vibration_convert/core/native/src/vibration_convert_core.cpp index 13d16e22b7e256c496afa0f0e41d483cb87b51e4..f491b1a36617308355b29a56fb664c5ab74ccbc0 100644 --- a/vibration_convert/core/native/src/vibration_convert_core.cpp +++ b/vibration_convert/core/native/src/vibration_convert_core.cpp @@ -202,7 +202,7 @@ int32_t VibrationConvertCore::PreprocessParameter(const std::vector &dat SEN_HILOGE("CheckOnset Failed"); return Sensors::ERROR; } - std::vector newDrwIdx = MapOnsetHop(onsetInfo.idx, onsetHopLength); + std::vector newDrwIdx = MapOnsetHop(onsetInfo.idxs, onsetHopLength); lowerDelta = CalcRmsLowerData(data.size(), rmse, newDrwIdx); double rmseMax = *std::max_element(rmse.begin(),rmse.end()); size_t newDrwIdxLen = newDrwIdx.size(); @@ -581,10 +581,10 @@ std::vector VibrationConvertCore::DetectOnset(const std::ve SEN_HILOGE("CheckOnset Failed"); return {}; } - onsetInfo.idx = MapOnsetHop(onsetInfo.idx, onsetHopLength); + onsetInfo.idxs = MapOnsetHop(onsetInfo.idxs, onsetHopLength); std::vector newIdx; std::vector newTime; - UniqueIdx(onsetInfo.idx, onsetInfo.time, newIdx, newTime); + UniqueIdx(onsetInfo.idxs, onsetInfo.times, newIdx, newTime); int32_t minSkip = ONSET_MINSKIP_MAX; if (newIdx.size() > 1) { std::vector idxDiff;