diff --git a/sonic/BUILD.gn b/sonic/BUILD.gn index 672e76dd3dea6c4dfc6ce1f93cc3ad373b0c2c9b..c7b6b07b212d5fb96f8ab0c88b33d7c52a79e2ad 100644 --- a/sonic/BUILD.gn +++ b/sonic/BUILD.gn @@ -35,12 +35,17 @@ config("sonic_include_config") { ohos_shared_library("sonic") { branch_protector_ret = "pac_ret" - sources = [ "./sonic.c" ] + include_dirs = [ "../sonic/" ] - configs = [ ":sonic_config" ] + sources = [ + "./sonic.c", + "./wave.c", + ] public_configs = [ ":sonic_include_config" ] + configs = [ ":sonic_config" ] + innerapi_tags = [ "platformsdk" ] subsystem_name = "thirdparty" part_name = "pulseaudio" diff --git a/sonic/LICENSE b/sonic/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..d645695673349e3947e8e5ae42332d0ac3164cd7 --- /dev/null +++ b/sonic/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/sonic/Main.java b/sonic/Main.java index 31a68d4893a504a1c7a88f6af633b7c5d0e91360..437857f56d688fa6c078a27cd6b19835f3b8e0a8 100644 --- a/sonic/Main.java +++ b/sonic/Main.java @@ -5,7 +5,6 @@ package sonic; import java.io.File; import java.io.IOException; - import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioInputStream; import javax.sound.sampled.AudioSystem; @@ -60,14 +59,14 @@ public class Main { public static void main( String[] argv) throws UnsupportedAudioFileException, IOException, LineUnavailableException { - float speed = 2.0f; + float speed = 1.0f; float pitch = 1.0f; - float rate = 1.0f; + float rate = 1.5f; float volume = 1.0f; boolean emulateChordPitch = false; int quality = 0; - AudioInputStream stream = AudioSystem.getAudioInputStream(new File("talking.wav")); + AudioInputStream stream = AudioSystem.getAudioInputStream(new File("stereo_test.wav")); AudioFormat format = stream.getFormat(); int sampleRate = (int)format.getSampleRate(); int numChannels = format.getChannels(); diff --git a/sonic/Makefile b/sonic/Makefile index 696e21103d7e16c6460a0675a65c0946d94c5b58..4f4b8b58f7a31b7d6ffb0b855401ef0933608b51 100644 --- a/sonic/Makefile +++ b/sonic/Makefile @@ -5,56 +5,149 @@ # safe. We call malloc, and older Linux versions only linked in the thread-safe # malloc if -pthread is specified. -SONAME=soname +# Uncomment this if you want to link in spectrogram generation. It is not +# needed to adjust speech speed or pitch. It is included primarily to provide +# high-quality spectrograms with low CPU overhead, for applications such a +# speech recognition. +#USE_SPECTROGRAM=1 + +PREFIX=/usr + UNAME := $(shell uname) ifeq ($(UNAME), Darwin) - SONAME=install_name + PREFIX=/usr/local endif -#CFLAGS=-Wall -g -ansi -fPIC -pthread -CFLAGS=-Wall -O3 -ansi -fPIC -pthread -LIB_TAG=0.2.0 -CC=gcc -PREFIX=/usr + +BINDIR=$(PREFIX)/bin LIBDIR=$(PREFIX)/lib +INCDIR=$(PREFIX)/include + +SONAME=-soname, +SHARED_OPT=-shared +LIB_NAME=libsonic.so +LIB_INTERNAL_NAME=libsonic_internal.so +LIB_TAG=.0.3.0 + +ifeq ($(UNAME), Darwin) + SONAME=-install_name,$(LIBDIR)/ + SHARED_OPT=-dynamiclib + LIB_NAME=libsonic.dylib + LIB_TAG= +endif + +CFLAGS=-Wall -Wno-unused-function -g -ansi -fPIC -pthread +#CFLAGS ?= -O3 +#CFLAGS += -Wall -Wno-unused-function -ansi -fPIC -pthread + +CC=gcc + +# Set NO_MALLOC=1 as a parameter to make to compile Sonic with static buffers +# instead of calling malloc. This is usefule primarily on microcontrollers. +ifeq ($(NO_MALLOC), 1) + CFLAGS+= -DSONIC_NO_MALLOC + # Set MAX_MEMORY= if you need to incease the static memory buffer + ifdef MAX_MEMORY + CFLAGS+= -DSONIC_MAX_MEMORY=$(MAX_MEMORY) + else + CFLAGS+= -DSONIC_MAX_MEMORY=4096 + endif +endif + +ifdef MIN_PITCH + CFLAGS+= -DSONIC_MIN_PITCH=$(MIN_PITCH) +endif -all: sonic libsonic.so.$(LIB_TAG) libsonic.a +EXTRA_SRC= +# Set this to empty if not using spectrograms. +FFTLIB= +ifeq ($(USE_SPECTROGRAM), 1) + CFLAGS+= -DSONIC_SPECTROGRAM + EXTRA_SRC+= spectrogram.c + FFTLIB= -L$(LIBDIR) -lfftw3 +endif +EXTRA_OBJ=$(EXTRA_SRC:.c=.o) + +all: sonic sonic_lite sonic_experimental $(LIB_NAME)$(LIB_TAG) libsonic.a libsonic_internal.a $(LIB_INTERNAL_NAME)$(LIB_TAG) + +sonic: main.o libsonic.a + $(CC) $(CFLAGS) $(LDFLAGS) -o sonic main.o libsonic.a -lm $(FFTLIB) -sonic: wave.o main.o libsonic.so.$(LIB_TAG) - $(CC) $(CFLAGS) -o sonic wave.o main.o libsonic.so.$(LIB_TAG) +sonic_lite: wave.c wave.h main_lite.c sonic_lite.c sonic_lite.h + $(CC) $(CFLAGS) $(LDFLAGS) -o sonic_lite sonic_lite.c wave.c main_lite.c + +sonic_experimental: wave.c wave.h main_experimental.c sonic_experimental.c sonic_experimental.h + $(CC) $(CFLAGS) $(LDFLAGS) -o sonic_experimental sonic_experimental.c wave.c main_experimental.c -lm sonic.o: sonic.c sonic.h - $(CC) $(CFLAGS) -c sonic.c + $(CC) $(CPPFLAGS) $(CFLAGS) -c sonic.c + +# Define a version of sonic with the internal names defined so others (i.e. Speedy) +# can build new APIs that superscede the default API. +sonic_internal.o: sonic.c sonic.h + $(CC) $(CPPFLAGS) $(CFLAGS) -DSONIC_INTERNAL -c sonic.c -o sonic_internal.o wave.o: wave.c wave.h - $(CC) $(CFLAGS) -c wave.c + $(CC) $(CPPFLAGS) $(CFLAGS) -c wave.c main.o: main.c sonic.h wave.h - $(CC) $(CFLAGS) -c main.c + $(CC) $(CPPFLAGS) $(CFLAGS) -c main.c -libsonic.so.$(LIB_TAG): sonic.o - $(CC) $(CFLAGS) -shared -Wl,-$(SONAME),libsonic.so.0 sonic.o -o libsonic.so.$(LIB_TAG) - ln -sf libsonic.so.$(LIB_TAG) libsonic.so - ln -sf libsonic.so.$(LIB_TAG) libsonic.so.0 +spectrogram.o: spectrogram.c sonic.h + $(CC) $(CPPFLAGS) $(CFLAGS) -DSONIC_SPECTROGRAM -c spectrogram.c + +$(LIB_NAME)$(LIB_TAG): $(EXTRA_OBJ) sonic.o wave.o + $(CC) $(CFLAGS) $(LDFLAGS) $(SHARED_OPT) -Wl,$(SONAME)$(LIB_NAME) $(EXTRA_OBJ) sonic.o -o $(LIB_NAME)$(LIB_TAG) $(FFTLIB) wave.o +ifneq ($(UNAME), Darwin) + ln -sf $(LIB_NAME)$(LIB_TAG) $(LIB_NAME) + ln -sf $(LIB_NAME)$(LIB_TAG) $(LIB_NAME).0 +endif + +$(LIB_INTERNAL_NAME)$(LIB_TAG): $(EXTRA_OBJ) sonic_internal.o wave.o # No spectrogram needed here. + $(CC) $(CFLAGS) $(LDFLAGS) $(SHARED_OPT) -Wl,$(SONAME)$(LIB_INTERNAL_NAME) $(EXTRA_OBJ) sonic_internal.o -o $(LIB_INTERNAL_NAME)$(LIB_TAG) $(FFTLIB) wave.o +ifneq ($(UNAME), Darwin) + ln -sf $(LIB_INTERNAL_NAME)$(LIB_TAG) $(LIB_INTERNAL_NAME) + ln -sf $(LIB_INTERNAL_NAME)$(LIB_TAG) $(LIB_INTERNAL_NAME).0 +endif -libsonic.a: sonic.o - $(AR) cqs libsonic.a sonic.o +libsonic.a: $(EXTRA_OBJ) sonic.o wave.o + $(AR) cqs libsonic.a $(EXTRA_OBJ) sonic.o wave.o -install: sonic libsonic.so.$(LIB_TAG) sonic.h - install -d $(DESTDIR)$(PREFIX)/bin $(DESTDIR)$(PREFIX)/include $(DESTDIR)$(PREFIX)/lib - install sonic $(DESTDIR)$(PREFIX)/bin - install sonic.h $(DESTDIR)$(PREFIX)/include - install libsonic.so.$(LIB_TAG) $(DESTDIR)$(PREFIX)/lib +# Define a version of sonic with the internal names defined so others (i.e. Speedy) +# can build new APIs that superscede the default API. +libsonic_internal.a: $(EXTRA_OBJ) sonic_internal.o wave.o + $(AR) cqs libsonic_internal.a $(EXTRA_OBJ) sonic_internal.o wave.o + +install: sonic $(LIB_NAME)$(LIB_TAG) sonic.h + install -d $(DESTDIR)$(BINDIR) $(DESTDIR)$(INCDIR) $(DESTDIR)$(LIBDIR) + install sonic $(DESTDIR)$(BINDIR) + install sonic.h $(DESTDIR)$(INCDIR) install libsonic.a $(DESTDIR)$(LIBDIR) - ln -sf libsonic.so.$(LIB_TAG) $(DESTDIR)$(PREFIX)/lib/libsonic.so - ln -sf libsonic.so.$(LIB_TAG) $(DESTDIR)$(PREFIX)/lib/libsonic.so.0 - -uninstall: - rm -f $(DESTDIR)$(PREFIX)/bin/sonic - rm -f $(DESTDIR)$(PREFIX)/include/sonic.h - rm -f $(DESTDIR)$(PREFIX)/lib/libsonic.so.$(LIB_TAG) - rm -f $(DESTDIR)$(PREFIX)/lib/libsonic.so - rm -f $(DESTDIR)$(PREFIX)/lib/libsonic.so.0 + install $(LIB_NAME)$(LIB_TAG) $(DESTDIR)$(LIBDIR) +ifneq ($(UNAME), Darwin) + ln -sf $(LIB_NAME)$(LIB_TAG) $(DESTDIR)$(LIBDIR)/$(LIB_NAME) + ln -sf $(LIB_NAME)$(LIB_TAG) $(DESTDIR)$(LIBDIR)/$(LIB_NAME).0 +endif + +uninstall: + rm -f $(DESTDIR)$(BINDIR)/sonic + rm -f $(DESTDIR)$(INCDIR)/sonic.h rm -f $(DESTDIR)$(LIBDIR)/libsonic.a + rm -f $(DESTDIR)$(LIBDIR)/$(LIB_NAME)$(LIB_TAG) + rm -f $(DESTDIR)$(LIBDIR)/$(LIB_NAME).0 + rm -f $(DESTDIR)$(LIBDIR)/$(LIB_NAME) clean: - rm -f *.o sonic libsonic.so* libsonic.a + rm -f *.o sonic sonic_lite sonic_experimental $(LIB_NAME)* libsonic.a libsonic_internal.a test.wav + +check: + ./sonic -s 2.0 ./samples/talking.wav ./test.wav + + +libspeedy.so: + cd speedy; make libspeedy.so SONIC_DIR=.. FFTW_DIR=../../fftw + +speedy_wave: libsonic_internal.so + cd speedy; make speedy_wave SONIC_DIR=.. FFTW_DIR=../../fftw + # You will probably also need to set the LDPATH. For example + # export LD_LIBRARY_PATH=/usr/local/lib:../kissfft:speedy:. + diff --git a/sonic/README b/sonic/README index 9a0b9fe9ffb3640403ba2fe26df33dfbd79ace85..243bdc9389bd1a3cda598c20f2156596022e7caf 100644 --- a/sonic/README +++ b/sonic/README @@ -32,5 +32,14 @@ real 0m52.043s user 0m51.190s sys 0m0.310s +Update, May 7, 2017 +------------------- +I upgraded the pitch change algorithm to use a 12-point sinc FIR filter for +interpolation, rather than linearly interpolating between points. This +significantly reduces noise introduced by the pitch change algorithm. It is +most noticable in low-sample-rate streams, such as the 11,025 Hz output of the +Eloquence TTS engine. The upgrade is in both the C and Java versions. + + Author: Bill Cox email: waywardgeek@gmail.com diff --git a/sonic/Sonic.java b/sonic/Sonic.java index a3394d71b4cc59bf11e8d1dda1d9d53e2fb985a1..3a2594009f45f9c432ab83bfb674cffb8f2f9d87 100644 --- a/sonic/Sonic.java +++ b/sonic/Sonic.java @@ -10,10 +10,66 @@ package sonic; public class Sonic { - private static final int SONIC_MIN_PITCH = 65; - private static final int SONIC_MAX_PITCH = 400; - /* This is used to down-sample some inputs to improve speed */ - private static final int SONIC_AMDF_FREQ = 4000; + private static final int SONIC_MIN_PITCH = 65; + private static final int SONIC_MAX_PITCH = 400; + // This is used to down-sample some inputs to improve speed + private static final int SONIC_AMDF_FREQ = 4000; + // The number of points to use in the sinc FIR filter for resampling. + private static final int SINC_FILTER_POINTS = 12; + private static final int SINC_TABLE_SIZE = 601; + + // Lookup table for windowed sinc function of SINC_FILTER_POINTS points. + // The code to generate this is in the header comment of sonic.c. + private static final short sincTable[] = { + 0, 0, 0, 0, 0, 0, 0, -1, -1, -2, -2, -3, -4, -6, -7, -9, -10, -12, -14, + -17, -19, -21, -24, -26, -29, -32, -34, -37, -40, -42, -44, -47, -48, -50, + -51, -52, -53, -53, -53, -52, -50, -48, -46, -43, -39, -34, -29, -22, -16, + -8, 0, 9, 19, 29, 41, 53, 65, 79, 92, 107, 121, 137, 152, 168, 184, 200, + 215, 231, 247, 262, 276, 291, 304, 317, 328, 339, 348, 357, 363, 369, 372, + 374, 375, 373, 369, 363, 355, 345, 332, 318, 300, 281, 259, 234, 208, 178, + 147, 113, 77, 39, 0, -41, -85, -130, -177, -225, -274, -324, -375, -426, + -478, -530, -581, -632, -682, -731, -779, -825, -870, -912, -951, -989, + -1023, -1053, -1080, -1104, -1123, -1138, -1149, -1154, -1155, -1151, + -1141, -1125, -1105, -1078, -1046, -1007, -963, -913, -857, -796, -728, + -655, -576, -492, -403, -309, -210, -107, 0, 111, 225, 342, 462, 584, 708, + 833, 958, 1084, 1209, 1333, 1455, 1575, 1693, 1807, 1916, 2022, 2122, 2216, + 2304, 2384, 2457, 2522, 2579, 2625, 2663, 2689, 2706, 2711, 2705, 2687, + 2657, 2614, 2559, 2491, 2411, 2317, 2211, 2092, 1960, 1815, 1658, 1489, + 1308, 1115, 912, 698, 474, 241, 0, -249, -506, -769, -1037, -1310, -1586, + -1864, -2144, -2424, -2703, -2980, -3254, -3523, -3787, -4043, -4291, + -4529, -4757, -4972, -5174, -5360, -5531, -5685, -5819, -5935, -6029, + -6101, -6150, -6175, -6175, -6149, -6096, -6015, -5905, -5767, -5599, + -5401, -5172, -4912, -4621, -4298, -3944, -3558, -3141, -2693, -2214, + -1705, -1166, -597, 0, 625, 1277, 1955, 2658, 3386, 4135, 4906, 5697, 6506, + 7332, 8173, 9027, 9893, 10769, 11654, 12544, 13439, 14335, 15232, 16128, + 17019, 17904, 18782, 19649, 20504, 21345, 22170, 22977, 23763, 24527, + 25268, 25982, 26669, 27327, 27953, 28547, 29107, 29632, 30119, 30569, + 30979, 31349, 31678, 31964, 32208, 32408, 32565, 32677, 32744, 32767, + 32744, 32677, 32565, 32408, 32208, 31964, 31678, 31349, 30979, 30569, + 30119, 29632, 29107, 28547, 27953, 27327, 26669, 25982, 25268, 24527, + 23763, 22977, 22170, 21345, 20504, 19649, 18782, 17904, 17019, 16128, + 15232, 14335, 13439, 12544, 11654, 10769, 9893, 9027, 8173, 7332, 6506, + 5697, 4906, 4135, 3386, 2658, 1955, 1277, 625, 0, -597, -1166, -1705, + -2214, -2693, -3141, -3558, -3944, -4298, -4621, -4912, -5172, -5401, + -5599, -5767, -5905, -6015, -6096, -6149, -6175, -6175, -6150, -6101, + -6029, -5935, -5819, -5685, -5531, -5360, -5174, -4972, -4757, -4529, + -4291, -4043, -3787, -3523, -3254, -2980, -2703, -2424, -2144, -1864, + -1586, -1310, -1037, -769, -506, -249, 0, 241, 474, 698, 912, 1115, 1308, + 1489, 1658, 1815, 1960, 2092, 2211, 2317, 2411, 2491, 2559, 2614, 2657, + 2687, 2705, 2711, 2706, 2689, 2663, 2625, 2579, 2522, 2457, 2384, 2304, + 2216, 2122, 2022, 1916, 1807, 1693, 1575, 1455, 1333, 1209, 1084, 958, 833, + 708, 584, 462, 342, 225, 111, 0, -107, -210, -309, -403, -492, -576, -655, + -728, -796, -857, -913, -963, -1007, -1046, -1078, -1105, -1125, -1141, + -1151, -1155, -1154, -1149, -1138, -1123, -1104, -1080, -1053, -1023, -989, + -951, -912, -870, -825, -779, -731, -682, -632, -581, -530, -478, -426, + -375, -324, -274, -225, -177, -130, -85, -41, 0, 39, 77, 113, 147, 178, + 208, 234, 259, 281, 300, 318, 332, 345, 355, 363, 369, 373, 375, 374, 372, + 369, 363, 357, 348, 339, 328, 317, 304, 291, 276, 262, 247, 231, 215, 200, + 184, 168, 152, 137, 121, 107, 92, 79, 65, 53, 41, 29, 19, 9, 0, -8, -16, + -22, -29, -34, -39, -43, -46, -48, -50, -52, -53, -53, -53, -52, -51, -50, + -48, -47, -44, -42, -40, -37, -34, -32, -29, -26, -24, -21, -19, -17, -14, + -12, -10, -9, -7, -6, -4, -3, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0 + }; private short inputBuffer[]; private short outputBuffer[]; @@ -41,34 +97,31 @@ public class Sonic { private int sampleRate; private int prevPeriod; private int prevMinDiff; + private int minDiff; + private int maxDiff; // Resize the array. private short[] resize( - short[] oldArray, - int newLength) + short[] oldArray, + int newLength) { - newLength *= numChannels; - short[] newArray = new short[newLength]; + newLength *= numChannels; + short[] newArray = new short[newLength]; int length = oldArray.length <= newLength? oldArray.length : newLength; - - - for(int x = 0; x < length; x++) { - newArray[x] = oldArray[x]; - } + + System.arraycopy(oldArray, 0, newArray, 0, length); return newArray; } // Move samples from one array to another. May move samples down within an array, but not up. private void move( - short dest[], - int destPos, - short source[], - int sourcePos, - int numSamples) + short dest[], + int destPos, + short source[], + int sourcePos, + int numSamples) { - for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { - dest[destPos*numChannels + xSample] = source[sourcePos*numChannels + xSample]; - } + System.arraycopy(source, sourcePos*numChannels, dest, destPos*numChannels, numSamples*numChannels); } // Scale the samples by the factor. @@ -78,11 +131,13 @@ public class Sonic { int numSamples, float volume) { + // Convert volume to fixed-point, with a 12 bit fraction. int fixedPointVolume = (int)(volume*4096.0f); int start = position*numChannels; int stop = start + numSamples*numChannels; for(int xSample = start; xSample < stop; xSample++) { + // Convert back from fixed point to 16-bit integer. int value = (samples[xSample]*fixedPointVolume) >> 12; if(value > 32767) { value = 32767; @@ -296,7 +351,7 @@ public class Sonic { enlargeInputBufferIfNeeded(numSamples); int xBuffer = numInputSamples*numChannels; for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { - sample = (short)((samples[xSample] & 0xff) - 128); // Convert from unsigned to signed + sample = (short)((samples[xSample] & 0xff) - 128); // Convert from unsigned to signed inputBuffer[xBuffer++] = (short) (sample << 8); } numInputSamples += numSamples; @@ -307,13 +362,13 @@ public class Sonic { byte inBuffer[], int numBytes) { - int numSamples = numBytes/(2*numChannels); + int numSamples = numBytes/(2*numChannels); short sample; enlargeInputBufferIfNeeded(numSamples); int xBuffer = numInputSamples*numChannels; for(int xByte = 0; xByte + 1 < numBytes; xByte += 2) { - sample = (short)((inBuffer[xByte] & 0xff) | (inBuffer[xByte + 1] << 8)); + sample = (short)((inBuffer[xByte] & 0xff) | (inBuffer[xByte + 1] << 8)); inputBuffer[xBuffer++] = sample; } numInputSamples += numSamples; @@ -371,7 +426,7 @@ public class Sonic { numSamples = maxSamples; } for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { - samples[xSample++] = (outputBuffer[xSample])/32767.0f; + samples[xSample] = (outputBuffer[xSample])/32767.0f; } move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); numOutputSamples = remainingSamples; @@ -417,7 +472,7 @@ public class Sonic { numSamples = maxSamples; } for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { - samples[xSample] = (byte)((outputBuffer[xSample] >> 8) + 128); + samples[xSample] = (byte)((outputBuffer[xSample] >> 8) + 128); } move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); numOutputSamples = remainingSamples; @@ -430,7 +485,7 @@ public class Sonic { byte outBuffer[], int maxBytes) { - int maxSamples = maxBytes/(2*numChannels); + int maxSamples = maxBytes/(2*numChannels); int numSamples = numOutputSamples; int remainingSamples = 0; @@ -442,9 +497,9 @@ public class Sonic { numSamples = maxSamples; } for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { - short sample = outputBuffer[xSample]; - outBuffer[xSample << 1] = (byte)(sample & 0xff); - outBuffer[(xSample << 1) + 1] = (byte)(sample >> 8); + short sample = outputBuffer[xSample]; + outBuffer[xSample << 1] = (byte)(sample & 0xff); + outBuffer[(xSample << 1) + 1] = (byte)(sample >> 8); } move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); numOutputSamples = remainingSamples; @@ -508,15 +563,12 @@ public class Sonic { } // Find the best frequency match in the range, and given a sample skip multiple. - // For now, just find the pitch of the first channel. Note that retMinDiff and - // retMaxDiff are Int objects, which the caller will need to create with new. + // For now, just find the pitch of the first channel. private int findPitchPeriodInRange( short samples[], int position, int minPeriod, - int maxPeriod, - Integer retMinDiff, - Integer retMaxDiff) + int maxPeriod) { int bestPeriod = 0, worstPeriod = 255; int minDiff = 1, maxDiff = 0; @@ -541,15 +593,15 @@ public class Sonic { worstPeriod = period; } } - retMinDiff = minDiff/bestPeriod; - retMaxDiff = maxDiff/worstPeriod; + this.minDiff = minDiff/bestPeriod; + this.maxDiff = maxDiff/worstPeriod; + return bestPeriod; } // At abrupt ends of voiced words, we can have pitch periods that are better // approximated by the previous pitch period estimate. Try to detect this case. private boolean prevPeriodBetter( - int period, int minDiff, int maxDiff, boolean preferNewPeriod) @@ -583,8 +635,6 @@ public class Sonic { int position, boolean preferNewPeriod) { - Integer minDiff = new Integer(0); - Integer maxDiff = new Integer(0); int period, retPeriod; int skip = 1; @@ -592,11 +642,11 @@ public class Sonic { skip = sampleRate/SONIC_AMDF_FREQ; } if(numChannels == 1 && skip == 1) { - period = findPitchPeriodInRange(samples, position, minPeriod, maxPeriod, minDiff, maxDiff); + period = findPitchPeriodInRange(samples, position, minPeriod, maxPeriod); } else { downSampleInput(samples, position, skip); period = findPitchPeriodInRange(downSampleBuffer, 0, minPeriod/skip, - maxPeriod/skip, minDiff, maxDiff); + maxPeriod/skip); if(skip != 1) { period *= skip; int minP = period - (skip << 2); @@ -608,14 +658,14 @@ public class Sonic { maxP = maxPeriod; } if(numChannels == 1) { - period = findPitchPeriodInRange(samples, position, minP, maxP, minDiff, maxDiff); + period = findPitchPeriodInRange(samples, position, minP, maxP); } else { downSampleInput(samples, position, 1); - period = findPitchPeriodInRange(downSampleBuffer, 0, minP, maxP, minDiff, maxDiff); + period = findPitchPeriodInRange(downSampleBuffer, 0, minP, maxP); } } } - if(prevPeriodBetter(period, minDiff, maxDiff, preferNewPeriod)) { + if(prevPeriodBetter(minDiff, maxDiff, preferNewPeriod)) { retPeriod = prevPeriod; } else { retPeriod = period; @@ -728,11 +778,11 @@ public class Sonic { enlargeOutputBufferIfNeeded(newPeriod); if(pitch >= 1.0f) { overlapAdd(newPeriod, numChannels, outputBuffer, numOutputSamples, pitchBuffer, - position, pitchBuffer, position + period - newPeriod); + position, pitchBuffer, position + period - newPeriod); } else { separation = newPeriod - period; overlapAddWithSeparation(period, numChannels, separation, outputBuffer, numOutputSamples, - pitchBuffer, position, pitchBuffer, position); + pitchBuffer, position, pitchBuffer, position); } numOutputSamples += newPeriod; position += period; @@ -740,22 +790,60 @@ public class Sonic { removePitchSamples(position); } + // Approximate the sinc function times a Hann window from the sinc table. + private int findSincCoefficient(int i, int ratio, int width) { + int lobePoints = (SINC_TABLE_SIZE-1)/SINC_FILTER_POINTS; + int left = i*lobePoints + (ratio*lobePoints)/width; + int right = left + 1; + int position = i*lobePoints*width + ratio*lobePoints - left*width; + int leftVal = sincTable[left]; + int rightVal = sincTable[right]; + + return ((leftVal*(width - position) + rightVal*position) << 1)/width; + } + + // Return 1 if value >= 0, else -1. This represents the sign of value. + private int getSign(int value) { + return value >= 0? 1 : -1; + } + // Interpolate the new output sample. private short interpolate( short in[], - int inPos, + int inPos, // Index to first sample which already includes channel offset. int oldSampleRate, int newSampleRate) { - short left = in[inPos*numChannels]; - short right = in[inPos*numChannels + numChannels]; + // Compute N-point sinc FIR-filter here. Clip rather than overflow. + int i; + int total = 0; int position = newRatePosition*oldSampleRate; int leftPosition = oldRatePosition*newSampleRate; int rightPosition = (oldRatePosition + 1)*newSampleRate; - int ratio = rightPosition - position; + int ratio = rightPosition - position - 1; int width = rightPosition - leftPosition; - - return (short)((ratio*left + (width - ratio)*right)/width); + int weight, value; + int oldSign; + int overflowCount = 0; + + for (i = 0; i < SINC_FILTER_POINTS; i++) { + weight = findSincCoefficient(i, ratio, width); + /* printf("%u %f\n", i, weight); */ + value = in[inPos + i*numChannels]*weight; + oldSign = getSign(total); + total += value; + if (oldSign != getSign(total) && getSign(value) == oldSign) { + /* We must have overflowed. This can happen with a sinc filter. */ + overflowCount += oldSign; + } + } + /* It is better to clip than to wrap if there was a overflow. */ + if (overflowCount > 0) { + return Short.MAX_VALUE; + } else if (overflowCount < 0) { + return Short.MIN_VALUE; + } + return (short)(total >> 16); } // Change the rate. @@ -766,6 +854,7 @@ public class Sonic { int newSampleRate = (int)(sampleRate/rate); int oldSampleRate = sampleRate; int position; + int N = SINC_FILTER_POINTS; // Set these values to help with the integer math while(newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) { @@ -776,13 +865,13 @@ public class Sonic { return; } moveNewSamplesToPitchBuffer(originalNumOutputSamples); - // Leave at least one pitch sample in the buffer - for(position = 0; position < numPitchSamples - 1; position++) { + // Leave at least N pitch samples in the buffer + for(position = 0; position < numPitchSamples - N; position++) { while((oldRatePosition + 1)*newSampleRate > newRatePosition*oldSampleRate) { enlargeOutputBufferIfNeeded(1); for(int i = 0; i < numChannels; i++) { - outputBuffer[numOutputSamples*numChannels + i] = interpolate(pitchBuffer, position + i, - oldSampleRate, newSampleRate); + outputBuffer[numOutputSamples*numChannels + i] = interpolate(pitchBuffer, + position*numChannels + i, oldSampleRate, newSampleRate); } newRatePosition++; numOutputSamples++; @@ -818,7 +907,7 @@ public class Sonic { } enlargeOutputBufferIfNeeded(newSamples); overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples, samples, position, - samples, position + period); + samples, position + period); numOutputSamples += newSamples; return newSamples; } @@ -841,7 +930,7 @@ public class Sonic { enlargeOutputBufferIfNeeded(period + newSamples); move(outputBuffer, numOutputSamples, samples, position, period); overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples + period, samples, - position + period, samples, position); + position + period, samples, position); numOutputSamples += period + newSamples; return newSamples; } diff --git a/sonic/TODO b/sonic/TODO new file mode 100644 index 0000000000000000000000000000000000000000..bacb3e59fa622479e64472824938d0e18af794f3 --- /dev/null +++ b/sonic/TODO @@ -0,0 +1,13 @@ +This project still needs some enhancements: + +Security hardening +------------------ +This project should be fuzzed, not just for sound inputs, but for settings +outside normal ranges, such as setting the speed to 1.0e10. Sonic should be +secure against attacker-controlled audio signals . However libsonic on some +systems may run at elevated privileges, and may be controlled through APIs +available in user space. + +Unit tests +---------- +Sonic is now widely used, and should be properly covered by unit tests. diff --git a/sonic/debian/control b/sonic/debian/control index caeba9590fd5d25cb56a44584861c1d9427b0ab2..f756ce7c60d066850d05ee5bc737540457127b8f 100644 --- a/sonic/debian/control +++ b/sonic/debian/control @@ -25,7 +25,7 @@ Description: Simple library to speed up or slow down speech This package contains just the actual library. libsonic is a very simple library for speeding up or slowing down speech. It has only basic dependencies, and is meant to - work on both Linux destop machines and embedded systems. + work on both Linux desktop machines and embedded systems. The key new feature in Sonic versus other libraries is very high quality at speed up factors well over 2X. diff --git a/sonic/doc/index.md b/sonic/doc/index.md index 1e02141508f9ee0820e8eace1874a755eb5af97e..855abe06d654287f4f819917572b92bb17d86485 100644 --- a/sonic/doc/index.md +++ b/sonic/doc/index.md @@ -39,7 +39,7 @@ Sonic is Copyright 2010, 2011, Bill Cox, all rights reserved. It is released as under the Apache 2.0 license. Feel free to contact me at . One user was concerned about patents. I believe the sonic algorithms do not violate any patents, as most of it is very old, based -on [PICOLA](http://keizai.yokkaichi-u.ac.jp/~ikeda/research/picola.html), and +on [PICOLA](https://web.archive.org/web/20120731100136/http://keizai.yokkaichi-u.ac.jp/~ikeda/research/picola.html), and the new part, for greater than 2X speed up, is clearly a capability most developers ignore, and would not bother to patent. @@ -62,7 +62,7 @@ implementation of PICOLA is available in the spandsp library. I find the one in RockBox quite good, though it's limited to 2X speed up. So far as I know, only sonic is optimized for speed factors needed by the blind, up to 6X. -Sonic does all of it's CPU intensive work with integer math, and works well on +Sonic does all of its CPU intensive work with integer math, and works well on ARM CPUs without FPUs. It supports multiple channels (stereo), and is also able to change the pitch of a voice. It works well in streaming audio applications, and can deal with sound streams in 16-bit signed integer, 32-bit floating point, diff --git a/sonic/genwave.c b/sonic/genwave.c new file mode 100644 index 0000000000000000000000000000000000000000..f927586d8d2ef4b7c10fca0c554a74b880eca3fd --- /dev/null +++ b/sonic/genwave.c @@ -0,0 +1,35 @@ +#include +#include + +#include "wave.h" + +#ifndef M_PI +# define M_PI 3.1415926535897932384 +#endif + +/* Write a sine wave to outFile. */ +static void genSineWave(waveFile outFile, int sampleRate, int period, int amplitude, int numPeriods) { + int i, j; + short value; + double x; + + for (i = 0; i < numPeriods; i++) { + for (j = 0; j < period; j++) { + x = (double)j * (2.0 * M_PI) / period; + value = (short)(amplitude * sin(x)); + writeToWaveFile(outFile, &value, 1); + } + } +} + +int main(int argc, char** argv) { + int sampleRate = 96000; + int freq = 200; + int period = sampleRate / freq; + int amplitude = 6000; + waveFile outFile = openOutputWaveFile("out.wav", sampleRate, 1); + + genSineWave(outFile, sampleRate, period, amplitude, 500); + closeWaveFile(outFile); + return 0; +} diff --git a/sonic/main.c b/sonic/main.c index 77829c2f3ae437ae10c0c3418f1ec0a9a98f32a0..cc6371df6099b03358a0040d245929ce5d7af3a0 100644 --- a/sonic/main.c +++ b/sonic/main.c @@ -2,7 +2,7 @@ 2.0 license. This file is meant as a simple example for how to use libsonic. It is also a - useful utility on it's own, which can speed up or slow down wav files, change + useful utility on its own, which can speed up or slow down wav files, change pitch, and scale volume. */ #include @@ -14,126 +14,175 @@ #define BUFFER_SIZE 2048 /* Run sonic. */ -static void runSonic( - waveFile inFile, - waveFile outFile, - float speed, - float pitch, - float rate, - float volume, - int emulateChordPitch, - int quality, - int sampleRate, - int numChannels) -{ - sonicStream stream = sonicCreateStream(sampleRate, numChannels); - short inBuffer[BUFFER_SIZE], outBuffer[BUFFER_SIZE]; - int samplesRead, samplesWritten; +static void runSonic(char* inFileName, char* outFileName, float speed, + float pitch, float rate, float volume, int outputSampleRate, + int emulateChordPitch, int quality, int computeSpectrogram, + int numRows, int numCols) { + waveFile inFile, outFile = NULL; + sonicStream stream; + short inBuffer[BUFFER_SIZE], outBuffer[BUFFER_SIZE]; + int sampleRate, numChannels, samplesRead, samplesWritten; - sonicSetSpeed(stream, speed); - sonicSetPitch(stream, pitch); - sonicSetRate(stream, rate); - sonicSetVolume(stream, volume); - sonicSetChordPitch(stream, emulateChordPitch); - sonicSetQuality(stream, quality); - do { - samplesRead = readFromWaveFile(inFile, inBuffer, BUFFER_SIZE/numChannels); - if(samplesRead == 0) { - sonicFlushStream(stream); - } else { - sonicWriteShortToStream(stream, inBuffer, samplesRead); - } - do { - samplesWritten = sonicReadShortFromStream(stream, outBuffer, - BUFFER_SIZE/numChannels); - if(samplesWritten > 0) { - writeToWaveFile(outFile, outBuffer, samplesWritten); - } - } while(samplesWritten > 0); - } while(samplesRead > 0); - sonicDestroyStream(stream); + inFile = openInputWaveFile(inFileName, &sampleRate, &numChannels); + if (outputSampleRate != 0) { + sampleRate = outputSampleRate; + } + if (inFile == NULL) { + fprintf(stderr, "Unable to read wave file %s\n", inFileName); + exit(1); + } + if (!computeSpectrogram) { + outFile = openOutputWaveFile(outFileName, sampleRate, numChannels); + if (outFile == NULL) { + closeWaveFile(inFile); + fprintf(stderr, "Unable to open wave file %s for writing\n", outFileName); + exit(1); + } + } + stream = sonicCreateStream(sampleRate, numChannels); + sonicSetSpeed(stream, speed); + sonicSetPitch(stream, pitch); + sonicSetRate(stream, rate); + sonicSetVolume(stream, volume); + sonicSetChordPitch(stream, emulateChordPitch); + sonicSetQuality(stream, quality); +#ifdef SONIC_SPECTROGRAM + if (computeSpectrogram) { + sonicComputeSpectrogram(stream); + } +#endif /* SONIC_SPECTROGRAM */ + do { + samplesRead = readFromWaveFile(inFile, inBuffer, BUFFER_SIZE / numChannels); + if (samplesRead == 0) { + sonicFlushStream(stream); + } else { + sonicWriteShortToStream(stream, inBuffer, samplesRead); + } + if (!computeSpectrogram) { + do { + samplesWritten = sonicReadShortFromStream(stream, outBuffer, + BUFFER_SIZE / numChannels); + if (samplesWritten > 0 && !computeSpectrogram) { + writeToWaveFile(outFile, outBuffer, samplesWritten); + } + } while (samplesWritten > 0); + } + } while (samplesRead > 0); +#ifdef SONIC_SPECTROGRAM + if (computeSpectrogram) { + sonicSpectrogram spectrogram = sonicGetSpectrogram(stream); + sonicBitmap bitmap = + sonicConvertSpectrogramToBitmap(spectrogram, numRows, numCols); + sonicWritePGM(bitmap, outFileName); + sonicDestroyBitmap(bitmap); + } +#endif /* SONIC_SPECTROGRAM */ + sonicDestroyStream(stream); + closeWaveFile(inFile); + if (!computeSpectrogram) { + closeWaveFile(outFile); + } } /* Print the usage. */ -static void usage(void) -{ - fprintf(stderr, "Usage: sonic [OPTION]... infile outfile\n" - " -c -- Modify pitch by emulating vocal chords vibrating\n" - " faster or slower.\n" - " -p pitch -- Set pitch scaling factor. 1.3 means 30%% higher.\n" - " -q -- Disable speed-up heuristics. May increase quality.\n" - " -r rate -- Set playback rate. 2.0 means 2X faster, and 2X pitch.\n" - " -s speed -- Set speed up factor. 2.0 means 2X faster.\n" - " -v volume -- Scale volume by a constant factor.\n"); - exit(1); +static void usage(void) { + fprintf( + stderr, + "Usage: sonic [OPTION]... infile outfile\n" + " -c -- Modify pitch by emulating vocal chords vibrating\n" + " faster or slower.\n" + " -o -- Override the sample rate of the output. -o 44200\n" + " on an input file at 22100 KHz will play twice as fast\n" + " and have twice the pitch.\n" + " -p pitch -- Set pitch scaling factor. 1.3 means 30%% higher.\n" + " -q -- Disable speed-up heuristics. May increase quality.\n" + " -r rate -- Set playback rate. 2.0 means 2X faster, and 2X " + "pitch.\n" + " -s speed -- Set speed up factor. 2.0 means 2X faster.\n" +#ifdef SONIC_SPECTROGRAM + " -S width height -- Write a spectrogram in outfile in PGM format.\n" +#endif /* SONIC_SPECTROGRAM */ + " -v volume -- Scale volume by a constant factor.\n"); + exit(1); } -int main( - int argc, - char **argv) -{ - waveFile inFile, outFile; - char *inFileName, *outFileName; - float speed = 1.0f; - float pitch = 1.0f; - float rate = 1.0f; - float volume = 1.0f; - int emulateChordPitch = 0; - int quality = 0; - int sampleRate, numChannels; - int xArg = 1; +int main(int argc, char** argv) { + char* inFileName; + char* outFileName; + float speed = 1.0f; + float pitch = 1.0f; + float rate = 1.0f; + float volume = 1.0f; + int outputSampleRate = 0; /* Means use the input file sample rate. */ + int emulateChordPitch = 0; + int quality = 0; + int xArg = 1; + int computeSpectrogram = 0; + int numRows = 0, numCols = 0; - while(xArg < argc && *(argv[xArg]) == '-') { - if(!strcmp(argv[xArg], "-c")) { - emulateChordPitch = 1; - printf("Scaling pitch linearly.\n"); - } else if(!strcmp(argv[xArg], "-p")) { - xArg++; - if(xArg < argc) { - pitch = atof(argv[xArg]); - printf("Setting pitch to %0.2fX\n", pitch); - } - } else if(!strcmp(argv[xArg], "-q")) { - quality = 1; - printf("Disabling speed-up heuristics\n"); - } else if(!strcmp(argv[xArg], "-r")) { - xArg++; - if(xArg < argc) { - rate = atof(argv[xArg]); - printf("Setting rate to %0.2fX\n", rate); - } - } else if(!strcmp(argv[xArg], "-s")) { - xArg++; - if(xArg < argc) { - speed = atof(argv[xArg]); - printf("Setting speed to %0.2fX\n", speed); - } - } else if(!strcmp(argv[xArg], "-v")) { - xArg++; - if(xArg < argc) { - volume = atof(argv[xArg]); - printf("Setting volume to %0.2f\n", volume); - } - } - xArg++; - } - if(argc - xArg != 2) { - usage(); - } - inFileName = argv[xArg]; - outFileName = argv[xArg + 1]; - inFile = openInputWaveFile(inFileName, &sampleRate, &numChannels); - if(inFile == NULL) { - return 1; + while (xArg < argc && *(argv[xArg]) == '-') { + if (!strcmp(argv[xArg], "-c")) { + emulateChordPitch = 1; + printf("Scaling pitch linearly.\n"); + } else if (!strcmp(argv[xArg], "-o")) { + xArg++; + if (xArg < argc) { + outputSampleRate = atoi(argv[xArg]); + printf("Setting output sample rate to %d\n", outputSampleRate); + } + } else if (!strcmp(argv[xArg], "-p")) { + xArg++; + if (xArg < argc) { + pitch = atof(argv[xArg]); + printf("Setting pitch to %0.2fX\n", pitch); + } + } else if (!strcmp(argv[xArg], "-q")) { + quality = 1; + printf("Disabling speed-up heuristics\n"); + } else if (!strcmp(argv[xArg], "-r")) { + xArg++; + if (xArg < argc) { + rate = atof(argv[xArg]); + if (rate == 0.0f) { + usage(); + } + printf("Setting rate to %0.2fX\n", rate); + } + } else if (!strcmp(argv[xArg], "-s")) { + xArg++; + if (xArg < argc) { + speed = atof(argv[xArg]); + printf("Setting speed to %0.2fX\n", speed); + } +#ifdef SONIC_SPECTROGRAM + } else if (!strcmp(argv[xArg], "-S")) { + xArg++; + if (xArg < argc) { + numCols = atof(argv[xArg]); + } + xArg++; + if (xArg < argc) { + numRows = atof(argv[xArg]); + computeSpectrogram = 1; + printf("Computing spectrogram %d wide and %d tall\n", numCols, numRows); + } +#endif /* SONIC_SPECTROGRAM */ + } else if (!strcmp(argv[xArg], "-v")) { + xArg++; + if (xArg < argc) { + volume = atof(argv[xArg]); + printf("Setting volume to %0.2f\n", volume); + } } - outFile = openOutputWaveFile(outFileName, sampleRate, numChannels); - if(outFile == NULL) { - closeWaveFile(inFile); - return 1; - } - runSonic(inFile, outFile, speed, pitch, rate, volume, emulateChordPitch, quality, - sampleRate, numChannels); - closeWaveFile(inFile); - closeWaveFile(outFile); - return 0; + xArg++; + } + if (argc - xArg != 2) { + usage(); + } + inFileName = argv[xArg]; + outFileName = argv[xArg + 1]; + runSonic(inFileName, outFileName, speed, pitch, rate, volume, + outputSampleRate, emulateChordPitch, quality, + computeSpectrogram, numRows, numCols); + return 0; } diff --git a/sonic/main_experimental.c b/sonic/main_experimental.c new file mode 100644 index 0000000000000000000000000000000000000000..022ebe1961b67b3fff50dc9ee1864e4ef4221df6 --- /dev/null +++ b/sonic/main_experimental.c @@ -0,0 +1,93 @@ +/* This file was written by Bill Cox in 2010, and is licensed under the Apache + 2.0 license. + + This file is meant as a simple example for how to use libsonic. It is also a + useful utility on its own, which can speed up or slow down wav files, change + pitch, and scale volume. */ + +#include +#include +#include +#include "sonic_experimental.h" +#include "wave.h" + +/* temp */ +static int counter = 0; + +/* Run the experimental version of sonic. */ +static void runSonic(char* inFileName, char* outFileName, float speed) { + waveFile inFile, outFile = NULL; + short inBuffer[SONIC_INPUT_SAMPLES], outBuffer[SONIC_INPUT_SAMPLES]; + int samplesRead, samplesWritten, sampleRate, numChannels; + + inFile = openInputWaveFile(inFileName, &sampleRate, &numChannels); + if (numChannels != 1) { + fprintf(stderr, "sonic_experimental only processes mono wave files. This file has %d channels.\n", + numChannels); + exit(1); + } + if (inFile == NULL) { + fprintf(stderr, "Unable to read wave file %s\n", inFileName); + exit(1); + } + printf("Sample rate %d\n", sampleRate); + outFile = openOutputWaveFile(outFileName, sampleRate, 1); + if (outFile == NULL) { + closeWaveFile(inFile); + fprintf(stderr, "Unable to open wave file %s for writing\n", outFileName); + exit(1); + } + sonicInit(speed, sampleRate); + do { + samplesRead = readFromWaveFile(inFile, inBuffer, SONIC_INPUT_SAMPLES); + if (samplesRead == 0) { + sonicFlushStream(); + } else { + sonicWriteShortToStream(inBuffer, samplesRead); + } + do { + samplesWritten = sonicReadShortFromStream(outBuffer, SONIC_INPUT_SAMPLES); + if (samplesWritten > 0) { + writeToWaveFile(outFile, outBuffer, samplesWritten); + } + /* temp */ + counter++; + } while (samplesWritten > 0); + } while (samplesRead > 0); + closeWaveFile(inFile); + closeWaveFile(outFile); +} + +/* Print the usage. */ +static void usage(void) { + fprintf( + stderr, + "Usage: sonic_experimental [OPTION]... infile outfile\n" + " -s speed -- Set speed up factor. 2.0 means 2X faster.\n"); + exit(1); +} + +int main(int argc, char** argv) { + char* inFileName; + char* outFileName; + float speed = 1.0f; + int xArg = 1; + + while (xArg < argc && *(argv[xArg]) == '-') { + if (!strcmp(argv[xArg], "-s")) { + xArg++; + if (xArg < argc) { + speed = atof(argv[xArg]); + printf("Setting speed to %0.2fX\n", speed); + } + } + xArg++; + } + if (argc - xArg != 2) { + usage(); + } + inFileName = argv[xArg]; + outFileName = argv[xArg + 1]; + runSonic(inFileName, outFileName, speed); + return 0; +} diff --git a/sonic/main_lite.c b/sonic/main_lite.c new file mode 100644 index 0000000000000000000000000000000000000000..b8c972c2bd1edaa3c175737b8c7f884c43fdc1da --- /dev/null +++ b/sonic/main_lite.c @@ -0,0 +1,103 @@ +/* This file was written by Bill Cox in 2010, and is licensed under the Apache + 2.0 license. + + This file is meant as a simple example for how to use libsonic. It is also a + useful utility on its own, which can speed up or slow down wav files, change + pitch, and scale volume. */ + +#include +#include +#include +#include "sonic_lite.h" +#include "wave.h" + +/* Run sonic_lite. */ +static void runSonic(char* inFileName, char* outFileName, float speed, float volume) { + waveFile inFile, outFile = NULL; + short inBuffer[SONIC_INPUT_SAMPLES], outBuffer[SONIC_INPUT_SAMPLES]; + int samplesRead, samplesWritten, sampleRate, numChannels; + + inFile = openInputWaveFile(inFileName, &sampleRate, &numChannels); + if (numChannels != 1) { + fprintf(stderr, "sonic_lite only processes mono wave files. This file has %d channels.\n", + numChannels); + exit(1); + } + if (sampleRate != SONIC_SAMPLE_RATE) { + fprintf(stderr, + "sonic_lite only processes wave files with a sample rate of %d Hz. This file uses %d\n", + SONIC_SAMPLE_RATE, sampleRate); + exit(1); + } + if (inFile == NULL) { + fprintf(stderr, "Unable to read wave file %s\n", inFileName); + exit(1); + } + outFile = openOutputWaveFile(outFileName, sampleRate, 1); + if (outFile == NULL) { + closeWaveFile(inFile); + fprintf(stderr, "Unable to open wave file %s for writing\n", outFileName); + exit(1); + } + sonicInit(); + sonicSetSpeed(speed); + sonicSetVolume(volume); + do { + samplesRead = readFromWaveFile(inFile, inBuffer, SONIC_INPUT_SAMPLES); + if (samplesRead == 0) { + sonicFlushStream(); + } else { + sonicWriteShortToStream(inBuffer, samplesRead); + } + do { + samplesWritten = sonicReadShortFromStream(outBuffer, SONIC_INPUT_SAMPLES); + if (samplesWritten > 0) { + writeToWaveFile(outFile, outBuffer, samplesWritten); + } + } while (samplesWritten > 0); + } while (samplesRead > 0); + closeWaveFile(inFile); + closeWaveFile(outFile); +} + +/* Print the usage. */ +static void usage(void) { + fprintf( + stderr, + "Usage: sonic_lite [OPTION]... infile outfile\n" + " -s speed -- Set speed up factor. 2.0 means 2X faster.\n" + " -v volume -- Scale volume by a constant factor.\n"); + exit(1); +} + +int main(int argc, char** argv) { + char* inFileName; + char* outFileName; + float speed = 1.0f; + float volume = 1.0f; + int xArg = 1; + + while (xArg < argc && *(argv[xArg]) == '-') { + if (!strcmp(argv[xArg], "-s")) { + xArg++; + if (xArg < argc) { + speed = atof(argv[xArg]); + printf("Setting speed to %0.2fX\n", speed); + } + } else if (!strcmp(argv[xArg], "-v")) { + xArg++; + if (xArg < argc) { + volume = atof(argv[xArg]); + printf("Setting volume to %0.2f\n", volume); + } + } + xArg++; + } + if (argc - xArg != 2) { + usage(); + } + inFileName = argv[xArg]; + outFileName = argv[xArg + 1]; + runSonic(inFileName, outFileName, speed, volume); + return 0; +} diff --git a/sonic/samples/espeak_s450.wav b/sonic/samples/espeak_s450.wav new file mode 100644 index 0000000000000000000000000000000000000000..4239477d13fb2191a4705689cb5086f340000b57 Binary files /dev/null and b/sonic/samples/espeak_s450.wav differ diff --git a/sonic/samples/espeak_sonic.wav b/sonic/samples/espeak_sonic.wav new file mode 100644 index 0000000000000000000000000000000000000000..cffdb960e85d6eae6226aa66ca174d998d774243 Binary files /dev/null and b/sonic/samples/espeak_sonic.wav differ diff --git a/sonic/samples/sonic.wav b/sonic/samples/sonic.wav new file mode 100644 index 0000000000000000000000000000000000000000..bc5510949e2e3a1fca51749bfeb15a70fc128d7f Binary files /dev/null and b/sonic/samples/sonic.wav differ diff --git a/sonic/samples/soundstretch.wav b/sonic/samples/soundstretch.wav new file mode 100644 index 0000000000000000000000000000000000000000..e0226d56e0c6cd71247d8c940eea2f36e2ae0682 Binary files /dev/null and b/sonic/samples/soundstretch.wav differ diff --git a/sonic/samples/stereo_test.wav b/sonic/samples/stereo_test.wav new file mode 100644 index 0000000000000000000000000000000000000000..6dfc809a6334a6a8d0f6b61700cea4ce024f230d Binary files /dev/null and b/sonic/samples/stereo_test.wav differ diff --git a/sonic/samples/talking.wav b/sonic/samples/talking.wav new file mode 100644 index 0000000000000000000000000000000000000000..1df692e8c9b8169ae2cab733f09612b0c743cd28 Binary files /dev/null and b/sonic/samples/talking.wav differ diff --git a/sonic/samples/talking_2x.wav b/sonic/samples/talking_2x.wav new file mode 100644 index 0000000000000000000000000000000000000000..e20f71f580517d2c02a65a027becb3c6da23063d Binary files /dev/null and b/sonic/samples/talking_2x.wav differ diff --git a/sonic/samples/twosineperiods.wav b/sonic/samples/twosineperiods.wav new file mode 100644 index 0000000000000000000000000000000000000000..c71dffffaa397b303056ea7a2872751361805377 Binary files /dev/null and b/sonic/samples/twosineperiods.wav differ diff --git a/sonic/sonic.c b/sonic/sonic.c index 8b04c647a668cd5be01ecef5bd59f005ecb05d2f..8e2f696b252b7b3bee489823bc60bcc72943ba28 100644 --- a/sonic/sonic.c +++ b/sonic/sonic.c @@ -6,1190 +6,1243 @@ This file is licensed under the Apache 2.0 license. */ -#include +#include "sonic.h" + +#include +#include #include #include -#include -#ifdef SONIC_USE_SIN -#include -#ifndef M_PI -#define M_PI 3.14159265358979323846 + +/* + The following code was used to generate the following sinc lookup table. + + #include + #include + #include + + double findHannWeight(int N, double x) { + return 0.5*(1.0 - cos(2*M_PI*x/N)); + } + + double findSincCoefficient(int N, double x) { + double hannWindowWeight = findHannWeight(N, x); + double sincWeight; + + x -= N/2.0; + if (x > 1e-9 || x < -1e-9) { + sincWeight = sin(M_PI*x)/(M_PI*x); + } else { + sincWeight = 1.0; + } + return hannWindowWeight*sincWeight; + } + + int main() { + double x; + int i; + int N = 12; + + for (i = 0, x = 0.0; x <= N; x += 0.02, i++) { + printf("%u %d\n", i, (int)(SHRT_MAX*findSincCoefficient(N, x))); + } + return 0; + } +*/ + +#define CLAMP(val, min, max) \ + ((val) < (min) ? (min) : (val) > (max) ? (max) : (val)) + +/* The number of points to use in the sinc FIR filter for resampling. */ +#define SINC_FILTER_POINTS \ + 12 /* I am not able to hear improvement with higher N. */ +#define SINC_TABLE_SIZE 601 + +/* Lookup table for windowed sinc function of SINC_FILTER_POINTS points. */ +static short sincTable[SINC_TABLE_SIZE] = { + 0, 0, 0, 0, 0, 0, 0, -1, -1, -2, -2, + -3, -4, -6, -7, -9, -10, -12, -14, -17, -19, -21, + -24, -26, -29, -32, -34, -37, -40, -42, -44, -47, -48, + -50, -51, -52, -53, -53, -53, -52, -50, -48, -46, -43, + -39, -34, -29, -22, -16, -8, 0, 9, 19, 29, 41, + 53, 65, 79, 92, 107, 121, 137, 152, 168, 184, 200, + 215, 231, 247, 262, 276, 291, 304, 317, 328, 339, 348, + 357, 363, 369, 372, 374, 375, 373, 369, 363, 355, 345, + 332, 318, 300, 281, 259, 234, 208, 178, 147, 113, 77, + 39, 0, -41, -85, -130, -177, -225, -274, -324, -375, -426, + -478, -530, -581, -632, -682, -731, -779, -825, -870, -912, -951, + -989, -1023, -1053, -1080, -1104, -1123, -1138, -1149, -1154, -1155, -1151, + -1141, -1125, -1105, -1078, -1046, -1007, -963, -913, -857, -796, -728, + -655, -576, -492, -403, -309, -210, -107, 0, 111, 225, 342, + 462, 584, 708, 833, 958, 1084, 1209, 1333, 1455, 1575, 1693, + 1807, 1916, 2022, 2122, 2216, 2304, 2384, 2457, 2522, 2579, 2625, + 2663, 2689, 2706, 2711, 2705, 2687, 2657, 2614, 2559, 2491, 2411, + 2317, 2211, 2092, 1960, 1815, 1658, 1489, 1308, 1115, 912, 698, + 474, 241, 0, -249, -506, -769, -1037, -1310, -1586, -1864, -2144, + -2424, -2703, -2980, -3254, -3523, -3787, -4043, -4291, -4529, -4757, -4972, + -5174, -5360, -5531, -5685, -5819, -5935, -6029, -6101, -6150, -6175, -6175, + -6149, -6096, -6015, -5905, -5767, -5599, -5401, -5172, -4912, -4621, -4298, + -3944, -3558, -3141, -2693, -2214, -1705, -1166, -597, 0, 625, 1277, + 1955, 2658, 3386, 4135, 4906, 5697, 6506, 7332, 8173, 9027, 9893, + 10769, 11654, 12544, 13439, 14335, 15232, 16128, 17019, 17904, 18782, 19649, + 20504, 21345, 22170, 22977, 23763, 24527, 25268, 25982, 26669, 27327, 27953, + 28547, 29107, 29632, 30119, 30569, 30979, 31349, 31678, 31964, 32208, 32408, + 32565, 32677, 32744, 32767, 32744, 32677, 32565, 32408, 32208, 31964, 31678, + 31349, 30979, 30569, 30119, 29632, 29107, 28547, 27953, 27327, 26669, 25982, + 25268, 24527, 23763, 22977, 22170, 21345, 20504, 19649, 18782, 17904, 17019, + 16128, 15232, 14335, 13439, 12544, 11654, 10769, 9893, 9027, 8173, 7332, + 6506, 5697, 4906, 4135, 3386, 2658, 1955, 1277, 625, 0, -597, + -1166, -1705, -2214, -2693, -3141, -3558, -3944, -4298, -4621, -4912, -5172, + -5401, -5599, -5767, -5905, -6015, -6096, -6149, -6175, -6175, -6150, -6101, + -6029, -5935, -5819, -5685, -5531, -5360, -5174, -4972, -4757, -4529, -4291, + -4043, -3787, -3523, -3254, -2980, -2703, -2424, -2144, -1864, -1586, -1310, + -1037, -769, -506, -249, 0, 241, 474, 698, 912, 1115, 1308, + 1489, 1658, 1815, 1960, 2092, 2211, 2317, 2411, 2491, 2559, 2614, + 2657, 2687, 2705, 2711, 2706, 2689, 2663, 2625, 2579, 2522, 2457, + 2384, 2304, 2216, 2122, 2022, 1916, 1807, 1693, 1575, 1455, 1333, + 1209, 1084, 958, 833, 708, 584, 462, 342, 225, 111, 0, + -107, -210, -309, -403, -492, -576, -655, -728, -796, -857, -913, + -963, -1007, -1046, -1078, -1105, -1125, -1141, -1151, -1155, -1154, -1149, + -1138, -1123, -1104, -1080, -1053, -1023, -989, -951, -912, -870, -825, + -779, -731, -682, -632, -581, -530, -478, -426, -375, -324, -274, + -225, -177, -130, -85, -41, 0, 39, 77, 113, 147, 178, + 208, 234, 259, 281, 300, 318, 332, 345, 355, 363, 369, + 373, 375, 374, 372, 369, 363, 357, 348, 339, 328, 317, + 304, 291, 276, 262, 247, 231, 215, 200, 184, 168, 152, + 137, 121, 107, 92, 79, 65, 53, 41, 29, 19, 9, + 0, -8, -16, -22, -29, -34, -39, -43, -46, -48, -50, + -52, -53, -53, -53, -52, -51, -50, -48, -47, -44, -42, + -40, -37, -34, -32, -29, -26, -24, -21, -19, -17, -14, + -12, -10, -9, -7, -6, -4, -3, -2, -2, -1, -1, + 0, 0, 0, 0, 0, 0, 0}; + +/* These functions allocate out of a static array rather than calling + calloc/realloc/free if the NO_MALLOC flag is defined. Otherwise, call + calloc/realloc/free as usual. This is useful for running on small + microcontrollers. */ +#ifndef SONIC_NO_MALLOC + +/* Just call calloc. */ +static void* sonicCalloc(int num, int size) { return calloc(num, size); } + +/* Just call realloc */ +static void* sonicRealloc(void* p, int oldNum, int newNum, int size) { + return realloc(p, newNum * size); +} + +/* Just call free. */ +static void sonicFree(void* p) { free(p); } + +#else + +#ifndef SONIC_MAX_MEMORY +/* Large enough for speedup/slowdown at 8KHz, 16-bit mono samples/second. */ +#define SONIC_MAX_MEMORY (16 * 1024) #endif + +/* This static buffer is used to hold data allocated for the sonicStream struct + and its buffers. There should never be more than one sonicStream in use at a + time when using SONIC_NO_MALLOC mode. Calls to realloc move the data to the + end of memoryBuffer. Calls to free reset the memory buffer to empty. */ +static void* + memoryBufferAligned[(SONIC_MAX_MEMORY + sizeof(void) - 1) / sizeof(void*)]; +static unsigned char* memoryBuffer = (unsigned char*)memoryBufferAligned; +static int memoryBufferPos = 0; + +/* Allocate elements from a static memory buffer. */ +static void* sonicCalloc(int num, int size) { + int len = num * size; + + if (memoryBufferPos + len > SONIC_MAX_MEMORY) { + return 0; + } + unsigned char* p = memoryBuffer + memoryBufferPos; + memoryBufferPos += len; + memset(p, 0, len); + return p; +} + +/* Preferably, SONIC_MAX_MEMORY has been set large enough that this is never + * called. */ +static void* sonicRealloc(void* p, int oldNum, int newNum, int size) { + if (newNum <= oldNum) { + return p; + } + void* newBuffer = sonicCalloc(newNum, size); + if (newBuffer == NULL) { + return NULL; + } + memcpy(newBuffer, p, oldNum * size); + return newBuffer; +} + +/* Reset memoryBufferPos to 0. We asssume all data is freed at the same time. + */ +static void sonicFree(void* p) { memoryBufferPos = 0; } + #endif -#include "sonic.h" struct sonicStreamStruct { - short *inputBuffer; - short *outputBuffer; - short *pitchBuffer; - short *downSampleBuffer; - float speed; - float volume; - float pitch; - float rate; - int oldRatePosition; - int newRatePosition; - int useChordPitch; - int quality; - int numChannels; - int inputBufferSize; - int pitchBufferSize; - int outputBufferSize; - int numInputSamples; - int numOutputSamples; - int numPitchSamples; - int minPeriod; - int maxPeriod; - int maxRequired; - int remainingInputToCopy; - int sampleRate; - int prevPeriod; - int prevMinDiff; - float remainingSamplesForSkip; - float remainingSamplesForInsert; +#ifdef SONIC_SPECTROGRAM + sonicSpectrogram spectrogram; +#endif /* SONIC_SPECTROGRAM */ + short* inputBuffer; + short* outputBuffer; + short* pitchBuffer; + short* downSampleBuffer; + void* userData; + float speed; + float volume; + float pitch; + float rate; + /* The point of the following 3 new variables is to gracefully handle rapidly + changing input speed. + + samplePeriod is just 1.0/sampleRate. It is used in accumulating + inputPlayTime, which is how long we expect the total time should be to play + the current input samples in the input buffer. timeError keeps track of + the error in play time created when playing < 2.0X speed, where we either + insert or delete a whole pitch period. This can cause the output generated + from the input to be off in play time by up to a pitch period. timeError + replaces PICOLA's concept of the number of samples to play unmodified after + a pitch period insertion or deletion. If speeding up, and the error is >= + 0.0, then remove a pitch period, and play samples unmodified until + timeError is >= 0 again. If slowing down, and the error is <= 0.0, + then add a pitch period, and play samples unmodified until timeError is <= + 0 again. */ + float samplePeriod; /* How long each output sample takes to play. */ + /* How long we expect the entire input buffer to take to play. */ + float inputPlayTime; + /* The difference in when the latest output sample was played vs when we + * wanted. */ + float timeError; + int oldRatePosition; + int newRatePosition; + int quality; + int numChannels; + int inputBufferSize; + int pitchBufferSize; + int outputBufferSize; + int numInputSamples; + int numOutputSamples; + int numPitchSamples; + int minPeriod; + int maxPeriod; + int maxRequired; + int remainingInputToCopy; + int sampleRate; + int prevPeriod; + int prevMinDiff; }; -/* Just used for debugging */ -/* -void sonicMSG(char *format, ...) -{ - char buffer[4096]; - va_list ap; - FILE *file; - - va_start(ap, format); - vsprintf((char *)buffer, (char *)format, ap); - va_end(ap); - file=fopen("/tmp/sonic.log", "a"); - fprintf(file, "%s", buffer); - fclose(file); +/* Attach user data to the stream. */ +void sonicSetUserData(sonicStream stream, void* userData) { + stream->userData = userData; +} + +/* Retrieve user data attached to the stream. */ +void* sonicGetUserData(sonicStream stream) { return stream->userData; } + +#ifdef SONIC_SPECTROGRAM + +/* Compute a spectrogram on the fly. */ +void sonicComputeSpectrogram(sonicStream stream) { + stream->spectrogram = sonicCreateSpectrogram(stream->sampleRate); + /* Force changeSpeed to be called to compute the spectrogram. */ + sonicSetSpeed(stream, 2.0); } -*/ + +/* Get the spectrogram. */ +sonicSpectrogram sonicGetSpectrogram(sonicStream stream) { + return stream->spectrogram; +} + +#endif /* Scale the samples by the factor. */ -static void scaleSamples( - short *samples, - int numSamples, - float volume) -{ - int fixedPointVolume = volume*4096.0f; - int value; - - while(numSamples--) { - value = (*samples*fixedPointVolume) >> 12; - if(value > 32767) { - value = 32767; - } else if(value < -32767) { - value = -32767; - } - *samples++ = value; - } +static void scaleSamples(short* samples, int numSamples, float volume) { + /* This is 24-bit integer and 8-bit fraction fixed-point representation. */ + int fixedPointVolume = volume * 256.0f; + int value; + + while (numSamples--) { + value = (*samples * fixedPointVolume) >> 8; + if (value > 32767) { + value = 32767; + } else if (value < -32767) { + value = -32767; + } + *samples++ = value; + } } /* Get the speed of the stream. */ -float sonicGetSpeed( - sonicStream stream) -{ - return stream->speed; -} +float sonicGetSpeed(sonicStream stream) { return stream->speed; } /* Set the speed of the stream. */ -void sonicSetSpeed( - sonicStream stream, - float speed) -{ - stream->speed = speed; +void sonicSetSpeed(sonicStream stream, float speed) { + stream->speed = CLAMP(speed, SONIC_MIN_SPEED, SONIC_MAX_SPEED); } /* Get the pitch of the stream. */ -float sonicGetPitch( - sonicStream stream) -{ - return stream->pitch; -} +float sonicGetPitch(sonicStream stream) { return stream->pitch; } /* Set the pitch of the stream. */ -void sonicSetPitch( - sonicStream stream, - float pitch) -{ - stream->pitch = pitch; +void sonicSetPitch(sonicStream stream, float pitch) { + stream->pitch = CLAMP(pitch, SONIC_MIN_PITCH_SETTING, SONIC_MAX_PITCH_SETTING); } /* Get the rate of the stream. */ -float sonicGetRate( - sonicStream stream) -{ - return stream->rate; -} +float sonicGetRate(sonicStream stream) { return stream->rate; } -/* Set the playback rate of the stream. This scales pitch and speed at the same time. */ -void sonicSetRate( - sonicStream stream, - float rate) -{ - stream->rate = rate; +/* Set the playback rate of the stream. This scales pitch and speed at the same + time. */ +void sonicSetRate(sonicStream stream, float rate) { + stream->rate = CLAMP(rate, SONIC_MIN_RATE, SONIC_MAX_RATE); - stream->oldRatePosition = 0; - stream->newRatePosition = 0; + stream->oldRatePosition = 0; + stream->newRatePosition = 0; } -/* Get the vocal chord pitch setting. */ -int sonicGetChordPitch( - sonicStream stream) -{ - return stream->useChordPitch; -} +/* DEPRECATED. Get the vocal chord pitch setting. */ +int sonicGetChordPitch(sonicStream stream) { return 0; } -/* Set the vocal chord mode for pitch computation. Default is off. */ -void sonicSetChordPitch( - sonicStream stream, - int useChordPitch) -{ - stream->useChordPitch = useChordPitch; -} +/* DEPRECATED. Set the vocal chord mode for pitch computation. Default is off. + */ +void sonicSetChordPitch(sonicStream stream, int useChordPitch) {} /* Get the quality setting. */ -int sonicGetQuality( - sonicStream stream) -{ - return stream->quality; -} +int sonicGetQuality(sonicStream stream) { return stream->quality; } -/* Set the "quality". Default 0 is virtually as good as 1, but very much faster. */ -void sonicSetQuality( - sonicStream stream, - int quality) -{ - stream->quality = quality; +/* Set the "quality". Default 0 is virtually as good as 1, but very much + faster. */ +void sonicSetQuality(sonicStream stream, int quality) { + stream->quality = quality != 0? 1 : 0; } /* Get the scaling factor of the stream. */ -float sonicGetVolume( - sonicStream stream) -{ - return stream->volume; -} +float sonicGetVolume(sonicStream stream) { return stream->volume; } /* Set the scaling factor of the stream. */ -void sonicSetVolume( - sonicStream stream, - float volume) -{ - stream->volume = volume; +void sonicSetVolume(sonicStream stream, float volume) { + stream->volume = CLAMP(volume, SONIC_MIN_VOLUME, SONIC_MAX_VOLUME); } /* Free stream buffers. */ -static void freeStreamBuffers( - sonicStream stream) -{ - if(stream->inputBuffer != NULL) { - free(stream->inputBuffer); - } - if(stream->outputBuffer != NULL) { - free(stream->outputBuffer); - } - if(stream->pitchBuffer != NULL) { - free(stream->pitchBuffer); - } - if(stream->downSampleBuffer != NULL) { - free(stream->downSampleBuffer); - } +static void freeStreamBuffers(sonicStream stream) { + if (stream->inputBuffer != NULL) { + sonicFree(stream->inputBuffer); + } + if (stream->outputBuffer != NULL) { + sonicFree(stream->outputBuffer); + } + if (stream->pitchBuffer != NULL) { + sonicFree(stream->pitchBuffer); + } + if (stream->downSampleBuffer != NULL) { + sonicFree(stream->downSampleBuffer); + } } /* Destroy the sonic stream. */ -void sonicDestroyStream( - sonicStream stream) -{ - freeStreamBuffers(stream); - free(stream); +void sonicDestroyStream(sonicStream stream) { +#ifdef SONIC_SPECTROGRAM + if (stream->spectrogram != NULL) { + sonicDestroySpectrogram(stream->spectrogram); + } +#endif /* SONIC_SPECTROGRAM */ + freeStreamBuffers(stream); + sonicFree(stream); +} + +/* Compute the number of samples to skip to down-sample the input. */ +static int computeSkip(sonicStream stream, int sampleRate) { + int skip = 1; + if (sampleRate > SONIC_AMDF_FREQ && stream->quality == 0) { + skip = sampleRate / SONIC_AMDF_FREQ; + } + return skip; } /* Allocate stream buffers. */ -static int allocateStreamBuffers( - sonicStream stream, - int sampleRate, - int numChannels) -{ - int minPeriod = sampleRate/SONIC_MAX_PITCH; - int maxPeriod = sampleRate/SONIC_MIN_PITCH; - int maxRequired = 2*maxPeriod; - - stream->inputBufferSize = maxRequired; - stream->inputBuffer = (short *)calloc(maxRequired, sizeof(short)*numChannels); - if(stream->inputBuffer == NULL) { - sonicDestroyStream(stream); - return 0; - } - stream->outputBufferSize = maxRequired; - stream->outputBuffer = (short *)calloc(maxRequired, sizeof(short)*numChannels); - if(stream->outputBuffer == NULL) { - sonicDestroyStream(stream); - return 0; - } - stream->pitchBufferSize = maxRequired; - stream->pitchBuffer = (short *)calloc(maxRequired, sizeof(short)*numChannels); - if(stream->pitchBuffer == NULL) { - sonicDestroyStream(stream); - return 0; - } - stream->downSampleBuffer = (short *)calloc(maxRequired, sizeof(short)); - if(stream->downSampleBuffer == NULL) { - sonicDestroyStream(stream); - return 0; - } - stream->sampleRate = sampleRate; - stream->numChannels = numChannels; - stream->oldRatePosition = 0; - stream->newRatePosition = 0; - stream->minPeriod = minPeriod; - stream->maxPeriod = maxPeriod; - stream->maxRequired = maxRequired; - stream->prevPeriod = 0; - return 1; +static int allocateStreamBuffers(sonicStream stream, int sampleRate, + int numChannels) { + int minPeriod = sampleRate / SONIC_MAX_PITCH; + int maxPeriod = sampleRate / SONIC_MIN_PITCH; + int maxRequired = 2 * maxPeriod; + int skip = computeSkip(stream, sampleRate); + + /* Allocate 25% more than needed so we hopefully won't grow. */ + stream->inputBufferSize = maxRequired + (maxRequired >> 2); + + stream->inputBuffer = + (short*)sonicCalloc(stream->inputBufferSize, sizeof(short) * numChannels); + if (stream->inputBuffer == NULL) { + sonicDestroyStream(stream); + return 0; + } + /* Allocate 25% more than needed so we hopefully won't grow. */ + stream->outputBufferSize = maxRequired + (maxRequired >> 2); + stream->outputBuffer = (short*)sonicCalloc(stream->outputBufferSize, + sizeof(short) * numChannels); + if (stream->outputBuffer == NULL) { + sonicDestroyStream(stream); + return 0; + } + /* Allocate 25% more than needed so we hopefully won't grow. */ + stream->pitchBufferSize = maxRequired + (maxRequired >> 2); + stream->pitchBuffer = + (short*)sonicCalloc(stream->pitchBufferSize, sizeof(short) * numChannels); + if (stream->pitchBuffer == NULL) { + sonicDestroyStream(stream); + return 0; + } + int downSampleBufferSize = (maxRequired + skip - 1) / skip; + stream->downSampleBuffer = + (short*)sonicCalloc(downSampleBufferSize, sizeof(short)); + if (stream->downSampleBuffer == NULL) { + sonicDestroyStream(stream); + return 0; + } + stream->sampleRate = sampleRate; + stream->samplePeriod = 1.0 / sampleRate; + stream->numChannels = numChannels; + stream->oldRatePosition = 0; + stream->newRatePosition = 0; + stream->minPeriod = minPeriod; + stream->maxPeriod = maxPeriod; + stream->maxRequired = maxRequired; + stream->prevPeriod = 0; + return 1; } /* Create a sonic stream. Return NULL only if we are out of memory and cannot allocate the stream. */ -sonicStream sonicCreateStream( - int sampleRate, - int numChannels) -{ - sonicStream stream = (sonicStream)calloc(1, sizeof(struct sonicStreamStruct)); - - if(stream == NULL) { - return NULL; - } - if(!allocateStreamBuffers(stream, sampleRate, numChannels)) { - return NULL; - } - stream->speed = 1.0f; - stream->pitch = 1.0f; - stream->volume = 1.0f; - stream->rate = 1.0f; - stream->oldRatePosition = 0; - stream->newRatePosition = 0; - stream->useChordPitch = 0; - stream->quality = 0; - stream->remainingSamplesForSkip = 0.0f; - stream->remainingSamplesForInsert = 0.0f; - return stream; +sonicStream sonicCreateStream(int sampleRate, int numChannels) { + sonicStream stream = + (sonicStream)sonicCalloc(1, sizeof(struct sonicStreamStruct)); + + sampleRate = CLAMP(sampleRate, SONIC_MIN_SAMPLE_RATE, SONIC_MAX_SAMPLE_RATE); + numChannels = CLAMP(numChannels, SONIC_MIN_CHANNELS, SONIC_MAX_CHANNELS); + if (stream == NULL) { + return NULL; + } + if (!allocateStreamBuffers(stream, sampleRate, numChannels)) { + return NULL; + } + stream->speed = 1.0f; + stream->pitch = 1.0f; + stream->volume = 1.0f; + stream->rate = 1.0f; + stream->oldRatePosition = 0; + stream->newRatePosition = 0; + stream->quality = 0; + return stream; } /* Get the sample rate of the stream. */ -int sonicGetSampleRate( - sonicStream stream) -{ - return stream->sampleRate; -} - -/* Set the sample rate of the stream. This will cause samples buffered in the stream to - be lost. */ -void sonicSetSampleRate( - sonicStream stream, - int sampleRate) -{ - freeStreamBuffers(stream); - allocateStreamBuffers(stream, sampleRate, stream->numChannels); +int sonicGetSampleRate(sonicStream stream) { return stream->sampleRate; } + +/* Set the sample rate of the stream. This will cause samples buffered in the + stream to be lost. */ +void sonicSetSampleRate(sonicStream stream, int sampleRate) { + sampleRate = CLAMP(sampleRate, SONIC_MIN_SAMPLE_RATE, SONIC_MAX_SAMPLE_RATE); + freeStreamBuffers(stream); + allocateStreamBuffers(stream, sampleRate, stream->numChannels); } /* Get the number of channels. */ -int sonicGetNumChannels( - sonicStream stream) -{ - return stream->numChannels; -} - -/* Set the num channels of the stream. This will cause samples buffered in the stream to - be lost. */ -void sonicSetNumChannels( - sonicStream stream, - int numChannels) -{ - freeStreamBuffers(stream); - allocateStreamBuffers(stream, stream->sampleRate, numChannels); +int sonicGetNumChannels(sonicStream stream) { return stream->numChannels; } + +/* Set the num channels of the stream. This will cause samples buffered in the + stream to be lost. */ +void sonicSetNumChannels(sonicStream stream, int numChannels) { + numChannels = CLAMP(numChannels, SONIC_MIN_CHANNELS, SONIC_MAX_CHANNELS); + freeStreamBuffers(stream); + allocateStreamBuffers(stream, stream->sampleRate, numChannels); } /* Enlarge the output buffer if needed. */ -static int enlargeOutputBufferIfNeeded( - sonicStream stream, - int numSamples) -{ - if(stream->numOutputSamples + numSamples > stream->outputBufferSize) { - stream->outputBufferSize += (stream->outputBufferSize >> 1) + numSamples; - stream->outputBuffer = (short *)realloc(stream->outputBuffer, - stream->outputBufferSize*sizeof(short)*stream->numChannels); - if(stream->outputBuffer == NULL) { - return 0; - } - } - return 1; +static int enlargeOutputBufferIfNeeded(sonicStream stream, int numSamples) { + int outputBufferSize = stream->outputBufferSize; + + if (stream->numOutputSamples + numSamples > outputBufferSize) { + stream->outputBufferSize += (outputBufferSize >> 1) + numSamples; + stream->outputBuffer = (short*)sonicRealloc( + stream->outputBuffer, outputBufferSize, stream->outputBufferSize, + sizeof(short) * stream->numChannels); + if (stream->outputBuffer == NULL) { + return 0; + } + } + return 1; } /* Enlarge the input buffer if needed. */ -static int enlargeInputBufferIfNeeded( - sonicStream stream, - int numSamples) -{ - if(stream->numInputSamples + numSamples > stream->inputBufferSize) { - stream->inputBufferSize += (stream->inputBufferSize >> 1) + numSamples; - stream->inputBuffer = (short *)realloc(stream->inputBuffer, - stream->inputBufferSize*sizeof(short)*stream->numChannels); - if(stream->inputBuffer == NULL) { - return 0; - } - } - return 1; +static int enlargeInputBufferIfNeeded(sonicStream stream, int numSamples) { + int inputBufferSize = stream->inputBufferSize; + + if (stream->numInputSamples + numSamples > inputBufferSize) { + stream->inputBufferSize += (inputBufferSize >> 1) + numSamples; + stream->inputBuffer = (short*)sonicRealloc( + stream->inputBuffer, inputBufferSize, stream->inputBufferSize, + sizeof(short) * stream->numChannels); + if (stream->inputBuffer == NULL) { + return 0; + } + } + return 1; +} + +/* Update stream->numInputSamples, and update stream->inputPlayTime. Call this + whenever adding samples to the input buffer, to keep track of total expected + input play time accounting. */ +static void updateNumInputSamples(sonicStream stream, int numSamples) { + float speed = stream->speed / stream->pitch; + + stream->numInputSamples += numSamples; + stream->inputPlayTime += numSamples * stream->samplePeriod / speed; } /* Add the input samples to the input buffer. */ -static int addFloatSamplesToInputBuffer( - sonicStream stream, - float *samples, - int numSamples) -{ - short *buffer; - int count = numSamples*stream->numChannels; - - if(numSamples == 0) { - return 1; - } - if(!enlargeInputBufferIfNeeded(stream, numSamples)) { - return 0; - } - buffer = stream->inputBuffer + stream->numInputSamples*stream->numChannels; - while(count--) { - *buffer++ = (*samples++)*32767.0f; - } - stream->numInputSamples += numSamples; +static int addFloatSamplesToInputBuffer(sonicStream stream, + const float* samples, int numSamples) { + short* buffer; + int count = numSamples * stream->numChannels; + + if (numSamples == 0) { return 1; + } + if (!enlargeInputBufferIfNeeded(stream, numSamples)) { + return 0; + } + buffer = stream->inputBuffer + stream->numInputSamples * stream->numChannels; + while (count--) { + *buffer++ = (*samples++) * 32767.0f; + } + updateNumInputSamples(stream, numSamples); + return 1; } /* Add the input samples to the input buffer. */ -static int addShortSamplesToInputBuffer( - sonicStream stream, - short *samples, - int numSamples) -{ - if(numSamples == 0) { - return 1; - } - if(!enlargeInputBufferIfNeeded(stream, numSamples)) { - return 0; - } - memcpy(stream->inputBuffer + stream->numInputSamples*stream->numChannels, samples, - numSamples*sizeof(short)*stream->numChannels); - stream->numInputSamples += numSamples; +static int addShortSamplesToInputBuffer(sonicStream stream, + const short* samples, int numSamples) { + if (numSamples == 0) { return 1; + } + if (!enlargeInputBufferIfNeeded(stream, numSamples)) { + return 0; + } + memcpy(stream->inputBuffer + stream->numInputSamples * stream->numChannels, + samples, numSamples * sizeof(short) * stream->numChannels); + updateNumInputSamples(stream, numSamples); + return 1; } /* Add the input samples to the input buffer. */ -static int addUnsignedCharSamplesToInputBuffer( - sonicStream stream, - unsigned char *samples, - int numSamples) -{ - short *buffer; - int count = numSamples*stream->numChannels; - - if(numSamples == 0) { - return 1; - } - if(!enlargeInputBufferIfNeeded(stream, numSamples)) { - return 0; - } - buffer = stream->inputBuffer + stream->numInputSamples*stream->numChannels; - while(count--) { - *buffer++ = (*samples++ - 128) << 8; - } - stream->numInputSamples += numSamples; +static int addUnsignedCharSamplesToInputBuffer(sonicStream stream, + const unsigned char* samples, + int numSamples) { + short* buffer; + int count = numSamples * stream->numChannels; + + if (numSamples == 0) { return 1; + } + if (!enlargeInputBufferIfNeeded(stream, numSamples)) { + return 0; + } + buffer = stream->inputBuffer + stream->numInputSamples * stream->numChannels; + while (count--) { + *buffer++ = (*samples++ - 128) << 8; + } + updateNumInputSamples(stream, numSamples); + return 1; } /* Remove input samples that we have already processed. */ -static void removeInputSamples( - sonicStream stream, - int position) -{ - int remainingSamples = stream->numInputSamples - position; - - if(remainingSamples > 0) { - memmove(stream->inputBuffer, stream->inputBuffer + position*stream->numChannels, - remainingSamples*sizeof(short)*stream->numChannels); - } - stream->numInputSamples = remainingSamples; +static void removeInputSamples(sonicStream stream, int position) { + int remainingSamples = stream->numInputSamples - position; + + if (remainingSamples > 0) { + memmove(stream->inputBuffer, + stream->inputBuffer + position * stream->numChannels, + remainingSamples * sizeof(short) * stream->numChannels); + } + /* If we play 3/4ths of the samples, then the expected play time of the + remaining samples is 1/4th of the original expected play time. */ + stream->inputPlayTime = + (stream->inputPlayTime * remainingSamples) / stream->numInputSamples; + stream->numInputSamples = remainingSamples; } -/* Just copy from the array to the output buffer */ -static int copyToOutput( - sonicStream stream, - short *samples, - int numSamples) -{ - if(!enlargeOutputBufferIfNeeded(stream, numSamples)) { - return 0; - } - memcpy(stream->outputBuffer + stream->numOutputSamples*stream->numChannels, - samples, numSamples*sizeof(short)*stream->numChannels); - stream->numOutputSamples += numSamples; - return 1; +/* Copy from the input buffer to the output buffer, and remove the samples from + the input buffer. */ +static int copyInputToOutput(sonicStream stream, int numSamples) { + if (!enlargeOutputBufferIfNeeded(stream, numSamples)) { + return 0; + } + memcpy(stream->outputBuffer + stream->numOutputSamples * stream->numChannels, + stream->inputBuffer, numSamples * sizeof(short) * stream->numChannels); + stream->numOutputSamples += numSamples; + removeInputSamples(stream, numSamples); + return 1; } -/* Just copy from the input buffer to the output buffer. Return 0 if we fail to - resize the output buffer. Otherwise, return numSamples */ -static int copyInputToOutput( - sonicStream stream, - int position) -{ - int numSamples = stream->remainingInputToCopy; - - if(numSamples > stream->maxRequired) { - numSamples = stream->maxRequired; - } - if(!copyToOutput(stream, stream->inputBuffer + position*stream->numChannels, - numSamples)) { - return 0; - } - stream->remainingInputToCopy -= numSamples; - return numSamples; +/* Copy from samples to the output buffer */ +static int copyToOutput(sonicStream stream, short* samples, int numSamples) { + if (!enlargeOutputBufferIfNeeded(stream, numSamples)) { + return 0; + } + memcpy(stream->outputBuffer + stream->numOutputSamples * stream->numChannels, + samples, numSamples * sizeof(short) * stream->numChannels); + stream->numOutputSamples += numSamples; + return 1; } /* Read data out of the stream. Sometimes no data will be available, and zero is returned, which is not an error condition. */ -int sonicReadFloatFromStream( - sonicStream stream, - float *samples, - int maxSamples) -{ - int numSamples = stream->numOutputSamples; - int remainingSamples = 0; - short *buffer; - int count; - - if(numSamples == 0) { - return 0; - } - if(numSamples > maxSamples) { - remainingSamples = numSamples - maxSamples; - numSamples = maxSamples; - } - buffer = stream->outputBuffer; - count = numSamples*stream->numChannels; - while(count--) { - *samples++ = (*buffer++)/32767.0f; - } - if(remainingSamples > 0) { - memmove(stream->outputBuffer, stream->outputBuffer + numSamples*stream->numChannels, - remainingSamples*sizeof(short)*stream->numChannels); - } - stream->numOutputSamples = remainingSamples; - return numSamples; +int sonicReadFloatFromStream(sonicStream stream, float* samples, + int maxSamples) { + int numSamples = stream->numOutputSamples; + int remainingSamples = 0; + short* buffer; + int count; + + if (numSamples == 0) { + return 0; + } + if (numSamples > maxSamples) { + remainingSamples = numSamples - maxSamples; + numSamples = maxSamples; + } + buffer = stream->outputBuffer; + count = numSamples * stream->numChannels; + while (count--) { + *samples++ = (*buffer++) / 32767.0f; + } + if (remainingSamples > 0) { + memmove(stream->outputBuffer, + stream->outputBuffer + numSamples * stream->numChannels, + remainingSamples * sizeof(short) * stream->numChannels); + } + stream->numOutputSamples = remainingSamples; + return numSamples; } -/* Read short data out of the stream. Sometimes no data will be available, and zero - is returned, which is not an error condition. */ -int sonicReadShortFromStream( - sonicStream stream, - short *samples, - int maxSamples) -{ - int numSamples = stream->numOutputSamples; - int remainingSamples = 0; - - if(numSamples == 0) { - return 0; - } - if(numSamples > maxSamples) { - remainingSamples = numSamples - maxSamples; - numSamples = maxSamples; - } - memcpy(samples, stream->outputBuffer, numSamples*sizeof(short)*stream->numChannels); - if(remainingSamples > 0) { - memmove(stream->outputBuffer, stream->outputBuffer + numSamples*stream->numChannels, - remainingSamples*sizeof(short)*stream->numChannels); - } - stream->numOutputSamples = remainingSamples; - return numSamples; +/* Read short data out of the stream. Sometimes no data will be available, and + zero is returned, which is not an error condition. */ +int sonicReadShortFromStream(sonicStream stream, short* samples, + int maxSamples) { + int numSamples = stream->numOutputSamples; + int remainingSamples = 0; + + if (numSamples == 0) { + return 0; + } + if (numSamples > maxSamples) { + remainingSamples = numSamples - maxSamples; + numSamples = maxSamples; + } + memcpy(samples, stream->outputBuffer, + numSamples * sizeof(short) * stream->numChannels); + if (remainingSamples > 0) { + memmove(stream->outputBuffer, + stream->outputBuffer + numSamples * stream->numChannels, + remainingSamples * sizeof(short) * stream->numChannels); + } + stream->numOutputSamples = remainingSamples; + return numSamples; } -/* Read unsigned char data out of the stream. Sometimes no data will be available, and zero - is returned, which is not an error condition. */ -int sonicReadUnsignedCharFromStream( - sonicStream stream, - unsigned char *samples, - int maxSamples) -{ - int numSamples = stream->numOutputSamples; - int remainingSamples = 0; - short *buffer; - int count; - - if(numSamples == 0) { - return 0; - } - if(numSamples > maxSamples) { - remainingSamples = numSamples - maxSamples; - numSamples = maxSamples; - } - buffer = stream->outputBuffer; - count = numSamples*stream->numChannels; - while(count--) { - *samples++ = (char)((*buffer++) >> 8) + 128; - } - if(remainingSamples > 0) { - memmove(stream->outputBuffer, stream->outputBuffer + numSamples*stream->numChannels, - remainingSamples*sizeof(short)*stream->numChannels); - } - stream->numOutputSamples = remainingSamples; - return numSamples; +/* Read unsigned char data out of the stream. Sometimes no data will be + available, and zero is returned, which is not an error condition. */ +int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples, + int maxSamples) { + int numSamples = stream->numOutputSamples; + int remainingSamples = 0; + short* buffer; + int count; + + if (numSamples == 0) { + return 0; + } + if (numSamples > maxSamples) { + remainingSamples = numSamples - maxSamples; + numSamples = maxSamples; + } + buffer = stream->outputBuffer; + count = numSamples * stream->numChannels; + while (count--) { + *samples++ = (char)((*buffer++) >> 8) + 128; + } + if (remainingSamples > 0) { + memmove(stream->outputBuffer, + stream->outputBuffer + numSamples * stream->numChannels, + remainingSamples * sizeof(short) * stream->numChannels); + } + stream->numOutputSamples = remainingSamples; + return numSamples; } /* Force the sonic stream to generate output using whatever data it currently - has. No extra delay will be added to the output, but flushing in the middle of - words could introduce distortion. */ -int sonicFlushStream( - sonicStream stream) -{ - int maxRequired = stream->maxRequired; - int remainingSamples = stream->numInputSamples; - float speed = stream->speed/stream->pitch; - float rate = stream->rate*stream->pitch; - int expectedOutputSamples = stream->numOutputSamples + - (int)((remainingSamples/speed + stream->numPitchSamples)/rate + 0.5f); - - /* Add enough silence to flush both input and pitch buffers. */ - if(!enlargeInputBufferIfNeeded(stream, remainingSamples + 2*maxRequired)) { - return 0; - } - memset(stream->inputBuffer + remainingSamples*stream->numChannels, 0, - 2*maxRequired*sizeof(short)*stream->numChannels); - stream->numInputSamples += 2*maxRequired; - if(!sonicWriteShortToStream(stream, NULL, 0)) { - return 0; - } - /* Throw away any extra samples we generated due to the silence we added */ - if(stream->numOutputSamples > expectedOutputSamples) { - stream->numOutputSamples = expectedOutputSamples; - } - /* Empty input and pitch buffers */ - stream->numInputSamples = 0; - stream->remainingInputToCopy = 0; - stream->numPitchSamples = 0; - return 1; + has. No extra delay will be added to the output, but flushing in the middle + of words could introduce distortion. */ +int sonicFlushStream(sonicStream stream) { + int maxRequired = stream->maxRequired; + int remainingSamples = stream->numInputSamples; + float speed = stream->speed / stream->pitch; + float rate = stream->rate * stream->pitch; + int expectedOutputSamples = + stream->numOutputSamples + + (int)((remainingSamples / speed + stream->numPitchSamples) / rate + 0.5f); + + /* Add enough silence to flush both input and pitch buffers. */ + if (!enlargeInputBufferIfNeeded(stream, remainingSamples + 2 * maxRequired)) { + return 0; + } + memset(stream->inputBuffer + remainingSamples * stream->numChannels, 0, + 2 * maxRequired * sizeof(short) * stream->numChannels); + stream->numInputSamples += 2 * maxRequired; + if (!sonicWriteShortToStream(stream, NULL, 0)) { + return 0; + } + /* Throw away any extra samples we generated due to the silence we added */ + if (stream->numOutputSamples > expectedOutputSamples) { + stream->numOutputSamples = expectedOutputSamples; + } + /* Empty input and pitch buffers */ + stream->numInputSamples = 0; + stream->inputPlayTime = 0.0f; + stream->timeError = 0.0f; + stream->numPitchSamples = 0; + return 1; } /* Return the number of samples in the output buffer */ -int sonicSamplesAvailable( - sonicStream stream) -{ - return stream->numOutputSamples; +int sonicSamplesAvailable(sonicStream stream) { + return stream->numOutputSamples; } /* If skip is greater than one, average skip samples together and write them to the down-sample buffer. If numChannels is greater than one, mix the channels together as we down sample. */ -static void downSampleInput( - sonicStream stream, - short *samples, - int skip) -{ - int numSamples = stream->maxRequired/skip; - int samplesPerValue = stream->numChannels*skip; - int i, j; - int value; - short *downSamples = stream->downSampleBuffer; - - for(i = 0; i < numSamples; i++) { - value = 0; - for(j = 0; j < samplesPerValue; j++) { - value += *samples++; - } - value /= samplesPerValue; - *downSamples++ = value; - } +static void downSampleInput(sonicStream stream, short* samples, int skip) { + int numSamples = stream->maxRequired / skip; + int samplesPerValue = stream->numChannels * skip; + int i, j; + int value; + short* downSamples = stream->downSampleBuffer; + + for (i = 0; i < numSamples; i++) { + value = 0; + for (j = 0; j < samplesPerValue; j++) { + value += *samples++; + } + value /= samplesPerValue; + *downSamples++ = value; + } } /* Find the best frequency match in the range, and given a sample skip multiple. For now, just find the pitch of the first channel. */ -static int findPitchPeriodInRange( - short *samples, - int minPeriod, - int maxPeriod, - int *retMinDiff, - int *retMaxDiff) -{ - int period, bestPeriod = 0, worstPeriod = 255; - short *s, *p, sVal, pVal; - unsigned long diff, minDiff = 1, maxDiff = 0; - int i; - - for(period = minPeriod; period <= maxPeriod; period++) { - diff = 0; - s = samples; - p = samples + period; - for(i = 0; i < period; i++) { - sVal = *s++; - pVal = *p++; - diff += sVal >= pVal? (unsigned short)(sVal - pVal) : - (unsigned short)(pVal - sVal); - } - /* Note that the highest number of samples we add into diff will be less - than 256, since we skip samples. Thus, diff is a 24 bit number, and - we can safely multiply by numSamples without overflow */ - if(diff*bestPeriod < minDiff*period) { - minDiff = diff; - bestPeriod = period; - } - if(diff*worstPeriod > maxDiff*period) { - maxDiff = diff; - worstPeriod = period; - } - } - *retMinDiff = minDiff/bestPeriod; - *retMaxDiff = maxDiff/worstPeriod; - return bestPeriod; +static int findPitchPeriodInRange(short* samples, int minPeriod, int maxPeriod, + int* retMinDiff, int* retMaxDiff) { + int period, bestPeriod = 0, worstPeriod = 255; + short* s; + short* p; + short sVal, pVal; + unsigned long diff, minDiff = 1, maxDiff = 0; + int i; + + for (period = minPeriod; period <= maxPeriod; period++) { + diff = 0; + s = samples; + p = samples + period; + for (i = 0; i < period; i++) { + sVal = *s++; + pVal = *p++; + diff += sVal >= pVal ? (unsigned short)(sVal - pVal) + : (unsigned short)(pVal - sVal); + } + /* Note that the highest number of samples we add into diff will be less + than 256, since we skip samples. Thus, diff is a 24 bit number, and + we can safely multiply by numSamples without overflow */ + if (bestPeriod == 0 || diff * bestPeriod < minDiff * period) { + minDiff = diff; + bestPeriod = period; + } + if (diff * worstPeriod > maxDiff * period) { + maxDiff = diff; + worstPeriod = period; + } + } + *retMinDiff = minDiff / bestPeriod; + *retMaxDiff = maxDiff / worstPeriod; + return bestPeriod; } /* At abrupt ends of voiced words, we can have pitch periods that are better - approximated by the previous pitch period estimate. Try to detect this case. */ -static int prevPeriodBetter( - sonicStream stream, - int period, - int minDiff, - int maxDiff, - int preferNewPeriod) -{ - if(minDiff == 0 || stream->prevPeriod == 0) { - return 0; - } - if(preferNewPeriod) { - if(maxDiff > minDiff*3) { - /* Got a reasonable match this period */ - return 0; - } - if(minDiff*2 <= stream->prevMinDiff*3) { - /* Mismatch is not that much greater this period */ - return 0; - } - } else { - if(minDiff <= stream->prevMinDiff) { - return 0; - } - } - return 1; + approximated by the previous pitch period estimate. Try to detect this case. + */ +static int prevPeriodBetter(sonicStream stream, int minDiff, int maxDiff, + int preferNewPeriod) { + if (minDiff == 0 || stream->prevPeriod == 0) { + return 0; + } + if (preferNewPeriod) { + if (maxDiff > minDiff * 3) { + /* Got a reasonable match this period */ + return 0; + } + if (minDiff * 2 <= stream->prevMinDiff * 3) { + /* Mismatch is not that much greater this period */ + return 0; + } + } else { + if (minDiff <= stream->prevMinDiff) { + return 0; + } + } + return 1; } /* Find the pitch period. This is a critical step, and we may have to try - multiple ways to get a good answer. This version uses AMDF. To improve - speed, we down sample by an integer factor get in the 11KHz range, and then - do it again with a narrower frequency range without down sampling */ -static int findPitchPeriod( - sonicStream stream, - short *samples, - int preferNewPeriod) -{ - int minPeriod = stream->minPeriod; - int maxPeriod = stream->maxPeriod; - int sampleRate = stream->sampleRate; - int minDiff, maxDiff, retPeriod; - int skip = 1; - int period; - - if(sampleRate > SONIC_AMDF_FREQ && stream->quality == 0) { - skip = sampleRate/SONIC_AMDF_FREQ; - } - if(stream->numChannels == 1 && skip == 1) { - period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, &maxDiff); - } else { - downSampleInput(stream, samples, skip); - period = findPitchPeriodInRange(stream->downSampleBuffer, minPeriod/skip, - maxPeriod/skip, &minDiff, &maxDiff); - if(skip != 1) { - period *= skip; - minPeriod = period - (skip << 2); - maxPeriod = period + (skip << 2); - if(minPeriod < stream->minPeriod) { - minPeriod = stream->minPeriod; - } - if(maxPeriod > stream->maxPeriod) { - maxPeriod = stream->maxPeriod; - } - if(stream->numChannels == 1) { - period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, - &minDiff, &maxDiff); - } else { - downSampleInput(stream, samples, 1); - period = findPitchPeriodInRange(stream->downSampleBuffer, minPeriod, - maxPeriod, &minDiff, &maxDiff); - } - } - } - if(prevPeriodBetter(stream, period, minDiff, maxDiff, preferNewPeriod)) { - retPeriod = stream->prevPeriod; - } else { - retPeriod = period; - } - stream->prevMinDiff = minDiff; - stream->prevPeriod = period; - return retPeriod; + multiple ways to get a good answer. This version uses Average Magnitude + Difference Function (AMDF). To improve speed, we down sample by an integer + factor get in the 11KHz range, and then do it again with a narrower + frequency range without down sampling */ +static int findPitchPeriod(sonicStream stream, short* samples, + int preferNewPeriod) { + int minPeriod = stream->minPeriod; + int maxPeriod = stream->maxPeriod; + int minDiff, maxDiff, retPeriod; + int skip = computeSkip(stream, stream->sampleRate); + int period; + + if (stream->numChannels == 1 && skip == 1) { + period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, + &maxDiff); + } else { + downSampleInput(stream, samples, skip); + period = findPitchPeriodInRange(stream->downSampleBuffer, minPeriod / skip, + maxPeriod / skip, &minDiff, &maxDiff); + if (skip != 1) { + period *= skip; + minPeriod = period - (skip << 2); + maxPeriod = period + (skip << 2); + if (minPeriod < stream->minPeriod) { + minPeriod = stream->minPeriod; + } + if (maxPeriod > stream->maxPeriod) { + maxPeriod = stream->maxPeriod; + } + if (stream->numChannels == 1) { + period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, + &maxDiff); + } else { + downSampleInput(stream, samples, 1); + period = findPitchPeriodInRange(stream->downSampleBuffer, minPeriod, + maxPeriod, &minDiff, &maxDiff); + } + } + } + if (prevPeriodBetter(stream, minDiff, maxDiff, preferNewPeriod)) { + retPeriod = stream->prevPeriod; + } else { + retPeriod = period; + } + stream->prevMinDiff = minDiff; + stream->prevPeriod = period; + return retPeriod; } /* Overlap two sound segments, ramp the volume of one down, while ramping the other one from zero up, and add them, storing the result at the output. */ -static void overlapAdd( - int numSamples, - int numChannels, - short *out, - short *rampDown, - short *rampUp) -{ - short *o, *u, *d; - int i, t; - - for(i = 0; i < numChannels; i++) { - o = out + i; - u = rampUp + i; - d = rampDown + i; - for(t = 0; t < numSamples; t++) { +static void overlapAdd(int numSamples, int numChannels, short* out, + short* rampDown, short* rampUp) { + short* o; + short* u; + short* d; + int i, t; + + for (i = 0; i < numChannels; i++) { + o = out + i; + u = rampUp + i; + d = rampDown + i; + for (t = 0; t < numSamples; t++) { #ifdef SONIC_USE_SIN - float ratio = sin(t*M_PI/(2*numSamples)); - *o = *d*(1.0f - ratio) + *u*ratio; + float ratio = sin(t * M_PI / (2 * numSamples)); + *o = *d * (1.0f - ratio) + *u * ratio; #else - *o = (*d*(numSamples - t) + *u*t)/numSamples; + *o = (*d * (numSamples - t) + *u * t) / numSamples; #endif - o += numChannels; - d += numChannels; - u += numChannels; - } - } -} - -/* Overlap two sound segments, ramp the volume of one down, while ramping the - other one from zero up, and add them, storing the result at the output. */ -static void overlapAddWithSeparation( - int numSamples, - int numChannels, - int separation, - short *out, - short *rampDown, - short *rampUp) -{ - short *o, *u, *d; - int i, t; - - for(i = 0; i < numChannels; i++) { - o = out + i; - u = rampUp + i; - d = rampDown + i; - for(t = 0; t < numSamples + separation; t++) { - if(t < separation) { - *o = *d*(numSamples - t)/numSamples; - d += numChannels; - } else if(t < numSamples) { - *o = (*d*(numSamples - t) + *u*(t - separation))/numSamples; - d += numChannels; - u += numChannels; - } else { - *o = *u*(t - separation)/numSamples; - u += numChannels; - } - o += numChannels; - } + o += numChannels; + d += numChannels; + u += numChannels; } + } } /* Just move the new samples in the output buffer to the pitch buffer */ -static int moveNewSamplesToPitchBuffer( - sonicStream stream, - int originalNumOutputSamples) -{ - int numSamples = stream->numOutputSamples - originalNumOutputSamples; - int numChannels = stream->numChannels; - - if(stream->numPitchSamples + numSamples > stream->pitchBufferSize) { - stream->pitchBufferSize += (stream->pitchBufferSize >> 1) + numSamples; - stream->pitchBuffer = (short *)realloc(stream->pitchBuffer, - stream->pitchBufferSize*sizeof(short)*numChannels); - if(stream->pitchBuffer == NULL) { - return 0; - } - } - memcpy(stream->pitchBuffer + stream->numPitchSamples*numChannels, - stream->outputBuffer + originalNumOutputSamples*numChannels, - numSamples*sizeof(short)*numChannels); - stream->numOutputSamples = originalNumOutputSamples; - stream->numPitchSamples += numSamples; - return 1; +static int moveNewSamplesToPitchBuffer(sonicStream stream, + int originalNumOutputSamples) { + int numSamples = stream->numOutputSamples - originalNumOutputSamples; + int numChannels = stream->numChannels; + int pitchBufferSize = stream->pitchBufferSize; + + if (stream->numPitchSamples + numSamples > pitchBufferSize) { + stream->pitchBufferSize += (pitchBufferSize >> 1) + numSamples; + stream->pitchBuffer = (short*)sonicRealloc( + stream->pitchBuffer, pitchBufferSize, stream->pitchBufferSize, + sizeof(short) * numChannels); + } + memcpy(stream->pitchBuffer + stream->numPitchSamples * numChannels, + stream->outputBuffer + originalNumOutputSamples * numChannels, + numSamples * sizeof(short) * numChannels); + stream->numOutputSamples = originalNumOutputSamples; + stream->numPitchSamples += numSamples; + return 1; } /* Remove processed samples from the pitch buffer. */ -static void removePitchSamples( - sonicStream stream, - int numSamples) -{ - int numChannels = stream->numChannels; - short *source = stream->pitchBuffer + numSamples*numChannels; - - if(numSamples == 0) { - return; - } - if(numSamples != stream->numPitchSamples) { - memmove(stream->pitchBuffer, source, (stream->numPitchSamples - - numSamples)*sizeof(short)*numChannels); - } - stream->numPitchSamples -= numSamples; +static void removePitchSamples(sonicStream stream, int numSamples) { + int numChannels = stream->numChannels; + short* source = stream->pitchBuffer + numSamples * numChannels; + + if (numSamples == 0) { + return; + } + if (numSamples != stream->numPitchSamples) { + memmove( + stream->pitchBuffer, source, + (stream->numPitchSamples - numSamples) * sizeof(short) * numChannels); + } + stream->numPitchSamples -= numSamples; } -/* Change the pitch. The latency this introduces could be reduced by looking at - past samples to determine pitch, rather than future. */ -static int adjustPitch( - sonicStream stream, - int originalNumOutputSamples) -{ - float pitch = stream->pitch; - int numChannels = stream->numChannels; - int period, newPeriod, separation; - int position = 0; - short *out, *rampDown, *rampUp; - - if(stream->numOutputSamples == originalNumOutputSamples) { - return 1; - } - if(!moveNewSamplesToPitchBuffer(stream, originalNumOutputSamples)) { - return 0; - } - while(stream->numPitchSamples - position >= stream->maxRequired) { - period = findPitchPeriod(stream, stream->pitchBuffer + position*numChannels, 0); - newPeriod = period/pitch; - if(!enlargeOutputBufferIfNeeded(stream, newPeriod)) { - return 0; - } - out = stream->outputBuffer + stream->numOutputSamples*numChannels; - if(pitch >= 1.0f) { - rampDown = stream->pitchBuffer + position*numChannels; - rampUp = stream->pitchBuffer + (position + period - newPeriod)*numChannels; - overlapAdd(newPeriod, numChannels, out, rampDown, rampUp); - } else { - rampDown = stream->pitchBuffer + position*numChannels; - rampUp = stream->pitchBuffer + position*numChannels; - separation = newPeriod - period; - overlapAddWithSeparation(period, numChannels, separation, out, rampDown, rampUp); - } - stream->numOutputSamples += newPeriod; - position += period; - } - removePitchSamples(stream, position); - return 1; +/* Approximate the sinc function times a Hann window from the sinc table. */ +static int findSincCoefficient(int i, int ratio, int width) { + int lobePoints = (SINC_TABLE_SIZE - 1) / SINC_FILTER_POINTS; + int left = i * lobePoints + (ratio * lobePoints) / width; + int right = left + 1; + int position = i * lobePoints * width + ratio * lobePoints - left * width; + int leftVal = sincTable[left]; + int rightVal = sincTable[right]; + + return ((leftVal * (width - position) + rightVal * position) << 1) / width; } +/* Return 1 if value >= 0, else -1. This represents the sign of value. */ +static int getSign(int value) { return value >= 0 ? 1 : -1; } + /* Interpolate the new output sample. */ -static short interpolate( - sonicStream stream, - short *in, - int oldSampleRate, - int newSampleRate) -{ - short left = *in; - short right = in[stream->numChannels]; - int position = stream->newRatePosition*oldSampleRate; - int leftPosition = stream->oldRatePosition*newSampleRate; - int rightPosition = (stream->oldRatePosition + 1)*newSampleRate; - int ratio = rightPosition - position; - int width = rightPosition - leftPosition; - - return (ratio*left + (width - ratio)*right)/width; +static short interpolate(sonicStream stream, short* in, int oldSampleRate, + int newSampleRate) { + /* Compute N-point sinc FIR-filter here. Clip rather than overflow. */ + int i; + int total = 0; + int position = stream->newRatePosition * oldSampleRate; + int leftPosition = stream->oldRatePosition * newSampleRate; + int rightPosition = (stream->oldRatePosition + 1) * newSampleRate; + int ratio = rightPosition - position - 1; + int width = rightPosition - leftPosition; + int weight, value; + int oldSign; + int overflowCount = 0; + + for (i = 0; i < SINC_FILTER_POINTS; i++) { + weight = findSincCoefficient(i, ratio, width); + value = in[i * stream->numChannels] * weight; + oldSign = getSign(total); + total += value; + if (oldSign != getSign(total) && getSign(value) == oldSign) { + /* We must have overflowed. This can happen with a sinc filter. */ + overflowCount += oldSign; + } + } + /* It is better to clip than to wrap if there was a overflow. */ + if (overflowCount > 0) { + return SHRT_MAX; + } else if (overflowCount < 0) { + return SHRT_MIN; + } + return total >> 16; } -/* Change the rate. */ -static int adjustRate( - sonicStream stream, - float rate, - int originalNumOutputSamples) -{ - int newSampleRate = stream->sampleRate/rate; - int oldSampleRate = stream->sampleRate; - int numChannels = stream->numChannels; - int position = 0; - short *in, *out; - int i; - - /* Set these values to help with the integer math */ - while(newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) { - newSampleRate >>= 1; - oldSampleRate >>= 1; - } - if(stream->numOutputSamples == originalNumOutputSamples) { - return 1; - } - if(!moveNewSamplesToPitchBuffer(stream, originalNumOutputSamples)) { - return 0; - } - /* Leave at least one pitch sample in the buffer */ - for(position = 0; position < stream->numPitchSamples - 1; position++) { - while((stream->oldRatePosition + 1)*newSampleRate > - stream->newRatePosition*oldSampleRate) { - if(!enlargeOutputBufferIfNeeded(stream, 1)) { - return 0; - } - out = stream->outputBuffer + stream->numOutputSamples*numChannels; - in = stream->pitchBuffer + position; - for(i = 0; i < numChannels; i++) { - *out++ = interpolate(stream, in, oldSampleRate, newSampleRate); - in++; - } - stream->newRatePosition++; - stream->numOutputSamples++; - } - stream->oldRatePosition++; - if(stream->oldRatePosition == oldSampleRate) { - stream->oldRatePosition = 0; - if(stream->newRatePosition != newSampleRate) { - fprintf(stderr, - "Assertion failed: stream->newRatePosition != newSampleRate\n"); - exit(1); - } - stream->newRatePosition = 0; - } - } - removePitchSamples(stream, position); +/* Change the rate. Interpolate with a sinc FIR filter using a Hann window. */ +static int adjustRate(sonicStream stream, float rate, + int originalNumOutputSamples) { + int newSampleRate = stream->sampleRate / rate; + int oldSampleRate = stream->sampleRate; + int numChannels = stream->numChannels; + int position; + short *in, *out; + int i; + int N = SINC_FILTER_POINTS; + + /* Set these values to help with the integer math */ + while (newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) { + newSampleRate >>= 1; + oldSampleRate >>= 1; + } + if (stream->numOutputSamples == originalNumOutputSamples) { return 1; + } + if (!moveNewSamplesToPitchBuffer(stream, originalNumOutputSamples)) { + return 0; + } + /* Leave at least N pitch sample in the buffer */ + for (position = 0; position < stream->numPitchSamples - N; position++) { + while ((stream->oldRatePosition + 1) * newSampleRate > + stream->newRatePosition * oldSampleRate) { + if (!enlargeOutputBufferIfNeeded(stream, 1)) { + return 0; + } + out = stream->outputBuffer + stream->numOutputSamples * numChannels; + in = stream->pitchBuffer + position * numChannels; + for (i = 0; i < numChannels; i++) { + *out++ = interpolate(stream, in, oldSampleRate, newSampleRate); + in++; + } + stream->newRatePosition++; + stream->numOutputSamples++; + } + stream->oldRatePosition++; + if (stream->oldRatePosition == oldSampleRate) { + stream->oldRatePosition = 0; + stream->newRatePosition = 0; + } + } + removePitchSamples(stream, position); + return 1; } - -/* Skip over a pitch period, and copy period/speed samples to the output */ -static int skipPitchPeriod( - sonicStream stream, - short *samples, - float speed, - int period) -{ - long newSamples; - int numChannels = stream->numChannels; - - if(speed >= 2.0f) { - int upNewSamples = ceil((float)period / (speed - 1.0f)); - int downNewSamples = floor((float)period / (speed - 1.0f)); - if (stream->remainingSamplesForSkip < 1) { - newSamples = downNewSamples; - stream->remainingSamplesForSkip += (float)period / (speed - 1.0f) - downNewSamples; - } else { - newSamples = upNewSamples; - stream->remainingSamplesForSkip += (float)period / (speed - 1.0f) - upNewSamples; - } - } else { - newSamples = period; - stream->remainingInputToCopy = period*(2.0f - speed)/(speed - 1.0f); - } - if(!enlargeOutputBufferIfNeeded(stream, newSamples)) { - return 0; - } - overlapAdd(newSamples, numChannels, stream->outputBuffer + - stream->numOutputSamples*numChannels, samples, samples + period*numChannels); - stream->numOutputSamples += newSamples; - return newSamples; +/* Skip over a pitch period. Return the number of output samples. */ +static int skipPitchPeriod(sonicStream stream, short* samples, float speed, + int period) { + long newSamples; + int numChannels = stream->numChannels; + + if (speed >= 2.0f) { + /* For speeds >= 2.0, we skip over a portion of each pitch period rather + than dropping whole pitch periods. */ + newSamples = period / (speed - 1.0f); + } else { + newSamples = period; + } + if (!enlargeOutputBufferIfNeeded(stream, newSamples)) { + return 0; + } + overlapAdd(newSamples, numChannels, + stream->outputBuffer + stream->numOutputSamples * numChannels, + samples, samples + period * numChannels); + stream->numOutputSamples += newSamples; + return newSamples; } /* Insert a pitch period, and determine how much input to copy directly. */ -static int insertPitchPeriod( - sonicStream stream, - short *samples, - float speed, - int period) -{ - long newSamples; - short *out; - int numChannels = stream->numChannels; - - if(speed < 0.5f) { - int upNewSamples = ceil((float)period * speed / (1.0f - speed)); - int downNewSamples = floor((float)period * speed / (1.0f - speed)); - if (stream->remainingSamplesForInsert < 1) { - newSamples = downNewSamples; - stream->remainingSamplesForInsert += (float)period * speed / (1.0f - speed) - downNewSamples; - } else { - newSamples = upNewSamples; - stream->remainingSamplesForInsert += (float)period * speed / (1.0f - speed) - upNewSamples; - } - } else { - newSamples = period; - stream->remainingInputToCopy = period*(2.0f*speed - 1.0f)/(1.0f - speed); - } - if(!enlargeOutputBufferIfNeeded(stream, period + newSamples)) { - return 0; - } - out = stream->outputBuffer + stream->numOutputSamples*numChannels; - memcpy(out, samples, period*sizeof(short)*numChannels); - out = stream->outputBuffer + (stream->numOutputSamples + period)*numChannels; - overlapAdd(newSamples, numChannels, out, samples + period*numChannels, samples); - stream->numOutputSamples += period + newSamples; - return newSamples; +static int insertPitchPeriod(sonicStream stream, short* samples, float speed, + int period) { + long newSamples; + short* out; + int numChannels = stream->numChannels; + + if (speed <= 0.5f) { + newSamples = period * speed / (1.0f - speed); + } else { + newSamples = period; + } + if (!enlargeOutputBufferIfNeeded(stream, period + newSamples)) { + return 0; + } + out = stream->outputBuffer + stream->numOutputSamples * numChannels; + memcpy(out, samples, period * sizeof(short) * numChannels); + out = + stream->outputBuffer + (stream->numOutputSamples + period) * numChannels; + overlapAdd(newSamples, numChannels, out, samples + period * numChannels, + samples); + stream->numOutputSamples += period + newSamples; + return newSamples; } -/* Resample as many pitch periods as we have buffered on the input. Return 0 if - we fail to resize an input or output buffer. Also scale the output by the volume. */ -static int changeSpeed( - sonicStream stream, - float speed) -{ - short *samples; - int numSamples = stream->numInputSamples; - int position = 0, period, newSamples; - int maxRequired = stream->maxRequired; - - if(stream->numInputSamples < maxRequired) { - return 1; - } - do { - if(stream->remainingInputToCopy > 0) { - newSamples = copyInputToOutput(stream, position); - position += newSamples; - } else { - samples = stream->inputBuffer + position*stream->numChannels; - period = findPitchPeriod(stream, samples, 1); - if(speed > 1.0) { - newSamples = skipPitchPeriod(stream, samples, speed, period); - position += period + newSamples; - } else { - newSamples = insertPitchPeriod(stream, samples, speed, period); - position += newSamples; - } - } - if(newSamples == 0) { - return 0; /* Failed to resize output buffer */ - } - } while(position + maxRequired <= numSamples); - removeInputSamples(stream, position); - return 1; +/* PICOLA copies input to output until the total output samples == consumed + input samples * speed. */ +static int copyUnmodifiedSamples(sonicStream stream, short* samples, + float speed, int position, int* newSamples) { + int availableSamples = stream->numInputSamples - position; + float inputToCopyFloat = + 1 - stream->timeError * speed / (stream->samplePeriod * (speed - 1.0)); + + *newSamples = inputToCopyFloat > availableSamples ? availableSamples + : (int)inputToCopyFloat; + if (!copyToOutput(stream, samples, *newSamples)) { + return 0; + } + stream->timeError += + *newSamples * stream->samplePeriod * (speed - 1.0) / speed; + return 1; } /* Resample as many pitch periods as we have buffered on the input. Return 0 if - we fail to resize an input or output buffer. Also scale the output by the volume. */ -static int processStreamInput( - sonicStream stream) -{ - int originalNumOutputSamples = stream->numOutputSamples; - float speed = stream->speed/stream->pitch; - float rate = stream->rate; - - if(!stream->useChordPitch) { - rate *= stream->pitch; - } - if(speed > 1.00001 || speed < 0.99999) { - changeSpeed(stream, speed); + we fail to resize an input or output buffer. */ +static int changeSpeed(sonicStream stream, float speed) { + short* samples; + int numSamples = stream->numInputSamples; + int position = 0, period, newSamples; + int maxRequired = stream->maxRequired; + + if (stream->numInputSamples < maxRequired) { + return 1; + } + do { + samples = stream->inputBuffer + position * stream->numChannels; + if ((speed > 1.0f && speed < 2.0f && stream->timeError < 0.0f) || + (speed < 1.0f && speed > 0.5f && stream->timeError > 0.0f)) { + /* Deal with the case where PICOLA is still copying input samples to + output unmodified, */ + if (!copyUnmodifiedSamples(stream, samples, speed, position, + &newSamples)) { + return 0; + } + position += newSamples; } else { - if(!copyToOutput(stream, stream->inputBuffer, stream->numInputSamples)) { - return 0; - } - stream->numInputSamples = 0; - } - if(stream->useChordPitch) { - if(stream->pitch != 1.0f) { - if(!adjustPitch(stream, originalNumOutputSamples)) { - return 0; - } - } - } else if(rate != 1.0f) { - if(!adjustRate(stream, rate, originalNumOutputSamples)) { - return 0; - } - } - if(stream->volume != 1.0f) { - /* Adjust output volume. */ - scaleSamples(stream->outputBuffer + originalNumOutputSamples*stream->numChannels, - (stream->numOutputSamples - originalNumOutputSamples)*stream->numChannels, - stream->volume); + /* We are in the remaining cases, either inserting/removing a pitch period + for speed < 2.0X, or a portion of one for speed >= 2.0X. */ + period = findPitchPeriod(stream, samples, 1); +#ifdef SONIC_SPECTROGRAM + if (stream->spectrogram != NULL) { + sonicAddPitchPeriodToSpectrogram(stream->spectrogram, samples, period, + stream->numChannels); + newSamples = period; + position += period; + } else +#endif /* SONIC_SPECTROGRAM */ + if (speed > 1.0) { + newSamples = skipPitchPeriod(stream, samples, speed, period); + position += period + newSamples; + if (speed < 2.0) { + stream->timeError += newSamples * stream->samplePeriod - + (period + newSamples) * stream->inputPlayTime / + stream->numInputSamples; + } + } else { + newSamples = insertPitchPeriod(stream, samples, speed, period); + position += newSamples; + if (speed > 0.5) { + stream->timeError += + (period + newSamples) * stream->samplePeriod - + newSamples * stream->inputPlayTime / stream->numInputSamples; + } + } + if (newSamples == 0) { + return 0; /* Failed to resize output buffer */ + } } + } while (position + maxRequired <= numSamples); + removeInputSamples(stream, position); + return 1; +} + +/* Resample as many pitch periods as we have buffered on the input. Return 0 if + we fail to resize an input or output buffer. Also scale the output by the + volume. */ +static int processStreamInput(sonicStream stream) { + int originalNumOutputSamples = stream->numOutputSamples; + float rate = stream->rate * stream->pitch; + float localSpeed; + + if (stream->numInputSamples == 0) { return 1; + } + localSpeed = + stream->numInputSamples * stream->samplePeriod / stream->inputPlayTime; + if (localSpeed > 1.00001 || localSpeed < 0.99999) { + changeSpeed(stream, localSpeed); + } else { + if (!copyInputToOutput(stream, stream->numInputSamples)) { + return 0; + } + } + if (rate != 1.0f) { + if (!adjustRate(stream, rate, originalNumOutputSamples)) { + return 0; + } + } + if (stream->volume != 1.0f) { + /* Adjust output volume. */ + scaleSamples( + stream->outputBuffer + originalNumOutputSamples * stream->numChannels, + (stream->numOutputSamples - originalNumOutputSamples) * + stream->numChannels, + stream->volume); + } + return 1; } /* Write floating point data to the input buffer and process it. */ -int sonicWriteFloatToStream( - sonicStream stream, - float *samples, - int numSamples) -{ - if(!addFloatSamplesToInputBuffer(stream, samples, numSamples)) { - return 0; - } - return processStreamInput(stream); +int sonicWriteFloatToStream(sonicStream stream, const float* samples, + int numSamples) { + if (!addFloatSamplesToInputBuffer(stream, samples, numSamples)) { + return 0; + } + return processStreamInput(stream); } /* Simple wrapper around sonicWriteFloatToStream that does the short to float conversion for you. */ -int sonicWriteShortToStream( - sonicStream stream, - short *samples, - int numSamples) -{ - if(!addShortSamplesToInputBuffer(stream, samples, numSamples)) { - return 0; - } - return processStreamInput(stream); +int sonicWriteShortToStream(sonicStream stream, const short* samples, + int numSamples) { + if (!addShortSamplesToInputBuffer(stream, samples, numSamples)) { + return 0; + } + return processStreamInput(stream); } -/* Simple wrapper around sonicWriteFloatToStream that does the unsigned char to float - conversion for you. */ -int sonicWriteUnsignedCharToStream( - sonicStream stream, - unsigned char *samples, - int numSamples) -{ - if(!addUnsignedCharSamplesToInputBuffer(stream, samples, numSamples)) { - return 0; - } - return processStreamInput(stream); +/* Simple wrapper around sonicWriteFloatToStream that does the unsigned char to + float conversion for you. */ +int sonicWriteUnsignedCharToStream(sonicStream stream, + const unsigned char* samples, + int numSamples) { + if (!addUnsignedCharSamplesToInputBuffer(stream, samples, numSamples)) { + return 0; + } + return processStreamInput(stream); } -/* This is a non-stream oriented interface to just change the speed of a sound sample */ -int sonicChangeFloatSpeed( - float *samples, - int numSamples, - float speed, - float pitch, - float rate, - float volume, - int useChordPitch, - int sampleRate, - int numChannels) -{ - sonicStream stream = sonicCreateStream(sampleRate, numChannels); - - sonicSetSpeed(stream, speed); - sonicSetPitch(stream, pitch); - sonicSetRate(stream, rate); - sonicSetVolume(stream, volume); - sonicSetChordPitch(stream, useChordPitch); - sonicWriteFloatToStream(stream, samples, numSamples); - sonicFlushStream(stream); - numSamples = sonicSamplesAvailable(stream); - sonicReadFloatFromStream(stream, samples, numSamples); - sonicDestroyStream(stream); - return numSamples; +/* This is a non-stream oriented interface to just change the speed of a sound + * sample */ +int sonicChangeFloatSpeed(float* samples, int numSamples, float speed, + float pitch, float rate, float volume, + int useChordPitch, int sampleRate, int numChannels) { + sonicStream stream = sonicCreateStream(sampleRate, numChannels); + + sonicSetSpeed(stream, speed); + sonicSetPitch(stream, pitch); + sonicSetRate(stream, rate); + sonicSetVolume(stream, volume); + sonicWriteFloatToStream(stream, samples, numSamples); + sonicFlushStream(stream); + numSamples = sonicSamplesAvailable(stream); + sonicReadFloatFromStream(stream, samples, numSamples); + sonicDestroyStream(stream); + return numSamples; } -/* This is a non-stream oriented interface to just change the speed of a sound sample */ -int sonicChangeShortSpeed( - short *samples, - int numSamples, - float speed, - float pitch, - float rate, - float volume, - int useChordPitch, - int sampleRate, - int numChannels) -{ - sonicStream stream = sonicCreateStream(sampleRate, numChannels); - - sonicSetSpeed(stream, speed); - sonicSetPitch(stream, pitch); - sonicSetRate(stream, rate); - sonicSetVolume(stream, volume); - sonicSetChordPitch(stream, useChordPitch); - sonicWriteShortToStream(stream, samples, numSamples); - sonicFlushStream(stream); - numSamples = sonicSamplesAvailable(stream); - sonicReadShortFromStream(stream, samples, numSamples); - sonicDestroyStream(stream); - return numSamples; +/* This is a non-stream oriented interface to just change the speed of a sound + * sample */ +int sonicChangeShortSpeed(short* samples, int numSamples, float speed, + float pitch, float rate, float volume, + int useChordPitch, int sampleRate, int numChannels) { + sonicStream stream = sonicCreateStream(sampleRate, numChannels); + + sonicSetSpeed(stream, speed); + sonicSetPitch(stream, pitch); + sonicSetRate(stream, rate); + sonicSetVolume(stream, volume); + sonicWriteShortToStream(stream, samples, numSamples); + sonicFlushStream(stream); + numSamples = sonicSamplesAvailable(stream); + sonicReadShortFromStream(stream, samples, numSamples); + sonicDestroyStream(stream); + return numSamples; } diff --git a/sonic/sonic.h b/sonic/sonic.h index 9b44e680930f269ec0bdfff4196acbcc6c150251..a8b5fb5f1f0ce22a14b49ad2654644f67609f7bc 100644 --- a/sonic/sonic.h +++ b/sonic/sonic.h @@ -1,3 +1,6 @@ +#ifndef SONIC_H_ +#define SONIC_H_ + /* Sonic library Copyright 2010 Bill Cox @@ -30,7 +33,7 @@ where t = 0 to newSamples - 1. For speed factors < 2X, the PICOLA algorithm is used. The above algorithm is first used to double the speed of one pitch period. Then, enough input is directly copied from the input to the output to achieve the desired -speed up facter, where 1.0 < speed < 2.0. The amount of data copied is derived: +speed up factor, where 1.0 < speed < 2.0. The amount of data copied is derived: speed = (2*period + length)/(period + length) speed*length + speed*period = 2*period + length @@ -47,24 +50,92 @@ For slow down factors below 0.5, no data is copied, and an algorithm similar to high speed factors is used. */ -#ifdef __cplusplus -extern "C" { -#endif - /* Uncomment this to use sin-wav based overlap add which in theory can improve sound quality slightly, at the expense of lots of floating point math. */ /* #define SONIC_USE_SIN */ +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef SONIC_INTERNAL +/* The following #define's are used to change the names of the routines defined + * here so that a new library (i.e. speedy) can reuse these names, and then call + * the original names. We do this for two reasons: 1) we don't want to change + * the original API, and 2) we want to add a shim, using the original names and + * still call these routines. + * + * Original users of this API and the libsonic library need to do nothing. The + * original behavior remains. + * + * A new user that add some additional functionality above this library (a shim) + * should #define SONIC_INTERNAL before including this file, undefine all these + * symbols and call the sonicIntXXX functions directly. + */ +#define sonicCreateStream sonicIntCreateStream +#define sonicDestroyStream sonicIntDestroyStream +#define sonicWriteFloatToStream sonicIntWriteFloatToStream +#define sonicWriteShortToStream sonicIntWriteShortToStream +#define sonicWriteUnsignedCharToStream sonicIntWriteUnsignedCharToStream +#define sonicReadFloatFromStream sonicIntReadFloatFromStream +#define sonicReadShortFromStream sonicIntReadShortFromStream +#define sonicReadUnsignedCharFromStream sonicIntReadUnsignedCharFromStream +#define sonicFlushStream sonicIntFlushStream +#define sonicSamplesAvailable sonicIntSamplesAvailable +#define sonicGetSpeed sonicIntGetSpeed +#define sonicSetSpeed sonicIntSetSpeed +#define sonicGetPitch sonicIntGetPitch +#define sonicSetPitch sonicIntSetPitch +#define sonicGetRate sonicIntGetRate +#define sonicSetRate sonicIntSetRate +#define sonicGetVolume sonicIntGetVolume +#define sonicSetVolume sonicIntSetVolume +#define sonicGetQuality sonicIntGetQuality +#define sonicSetQuality sonicIntSetQuality +#define sonicGetSampleRate sonicIntGetSampleRate +#define sonicSetSampleRate sonicIntSetSampleRate +#define sonicGetNumChannels sonicIntGetNumChannels +#define sonicGetUserData sonicIntGetUserData +#define sonicSetUserData sonicIntSetUserData +#define sonicSetNumChannels sonicIntSetNumChannels +#define sonicChangeFloatSpeed sonicIntChangeFloatSpeed +#define sonicChangeShortSpeed sonicIntChangeShortSpeed +#define sonicEnableNonlinearSpeedup sonicIntEnableNonlinearSpeedup +#define sonicSetDurationFeedbackStrength sonicIntSetDurationFeedbackStrength +#define sonicComputeSpectrogram sonicIntComputeSpectrogram +#define sonicGetSpectrogram sonicIntGetSpectrogram + +#endif /* SONIC_INTERNAL */ + /* This specifies the range of voice pitches we try to match. Note that if we go lower than 65, we could overflow in findPitchInRange */ +#ifndef SONIC_MIN_PITCH #define SONIC_MIN_PITCH 65 +#endif /* SONIC_MIN_PITCH */ +#ifndef SONIC_MAX_PITCH #define SONIC_MAX_PITCH 400 +#endif /* SONIC_MAX_PITCH */ + +/* The following values are used to clamp inputs such as speed to sane values. + */ +#define SONIC_MIN_VOLUME 0.01f +#define SONIC_MAX_VOLUME 100.0f +#define SONIC_MIN_SPEED 0.05f +#define SONIC_MAX_SPEED 20.0f +#define SONIC_MIN_PITCH_SETTING 0.05f +#define SONIC_MAX_PITCH_SETTING 20.0f +#define SONIC_MIN_RATE 0.05f +#define SONIC_MAX_RATE 20.0f +#define SONIC_MIN_SAMPLE_RATE 1000 +#define SONIC_MAX_SAMPLE_RATE 500000 +#define SONIC_MIN_CHANNELS 1 +#define SONIC_MAX_CHANNELS 32 /* These are used to down-sample some inputs to improve speed */ #define SONIC_AMDF_FREQ 4000 struct sonicStreamStruct; -typedef struct sonicStreamStruct *sonicStream; +typedef struct sonicStreamStruct* sonicStream; /* For all of the following functions, numChannels is multiplied by numSamples to determine the actual number of values read or returned. */ @@ -74,27 +145,36 @@ typedef struct sonicStreamStruct *sonicStream; sonicStream sonicCreateStream(int sampleRate, int numChannels); /* Destroy the sonic stream. */ void sonicDestroyStream(sonicStream stream); +/* Attach user data to the stream. */ +void sonicSetUserData(sonicStream stream, void *userData); +/* Retrieve user data attached to the stream. */ +void *sonicGetUserData(sonicStream stream); /* Use this to write floating point data to be speed up or down into the stream. - Values must be between -1 and 1. Return 0 if memory realloc failed, otherwise 1 */ -int sonicWriteFloatToStream(sonicStream stream, float *samples, int numSamples); + Values must be between -1 and 1. Return 0 if memory realloc failed, + otherwise 1 */ +int sonicWriteFloatToStream(sonicStream stream, const float* samples, int numSamples); /* Use this to write 16-bit data to be speed up or down into the stream. Return 0 if memory realloc failed, otherwise 1 */ -int sonicWriteShortToStream(sonicStream stream, short *samples, int numSamples); +int sonicWriteShortToStream(sonicStream stream, const short* samples, int numSamples); /* Use this to write 8-bit unsigned data to be speed up or down into the stream. Return 0 if memory realloc failed, otherwise 1 */ -int sonicWriteUnsignedCharToStream(sonicStream stream, unsigned char *samples, int numSamples); +int sonicWriteUnsignedCharToStream(sonicStream stream, const unsigned char* samples, + int numSamples); /* Use this to read floating point data out of the stream. Sometimes no data will be available, and zero is returned, which is not an error condition. */ -int sonicReadFloatFromStream(sonicStream stream, float *samples, int maxSamples); +int sonicReadFloatFromStream(sonicStream stream, float* samples, + int maxSamples); /* Use this to read 16-bit data out of the stream. Sometimes no data will be available, and zero is returned, which is not an error condition. */ -int sonicReadShortFromStream(sonicStream stream, short *samples, int maxSamples); -/* Use this to read 8-bit unsigned data out of the stream. Sometimes no data will - be available, and zero is returned, which is not an error condition. */ -int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char *samples, int maxSamples); +int sonicReadShortFromStream(sonicStream stream, short* samples, + int maxSamples); +/* Use this to read 8-bit unsigned data out of the stream. Sometimes no data + will be available, and zero is returned, which is not an error condition. */ +int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples, + int maxSamples); /* Force the sonic stream to generate output using whatever data it currently - has. No extra delay will be added to the output, but flushing in the middle of - words could introduce distortion. */ + has. No extra delay will be added to the output, but flushing in the middle + of words could introduce distortion. */ int sonicFlushStream(sonicStream stream); /* Return the number of samples in the output buffer */ int sonicSamplesAvailable(sonicStream stream); @@ -114,6 +194,8 @@ void sonicSetRate(sonicStream stream, float rate); float sonicGetVolume(sonicStream stream); /* Set the scaling factor of the stream. */ void sonicSetVolume(sonicStream stream, float volume); +/* Chord pitch is DEPRECATED. AFAIK, it was never used by anyone. These + functions still exist to avoid breaking existing code. */ /* Get the chord pitch setting. */ int sonicGetChordPitch(sonicStream stream); /* Set chord pitch mode on or off. Default is off. See the documentation @@ -121,27 +203,102 @@ int sonicGetChordPitch(sonicStream stream); void sonicSetChordPitch(sonicStream stream, int useChordPitch); /* Get the quality setting. */ int sonicGetQuality(sonicStream stream); -/* Set the "quality". Default 0 is virtually as good as 1, but very much faster. */ +/* Set the "quality". Default 0 is virtually as good as 1, but very much + * faster. */ void sonicSetQuality(sonicStream stream, int quality); /* Get the sample rate of the stream. */ int sonicGetSampleRate(sonicStream stream); -/* Set the sample rate of the stream. This will drop any samples that have not been read. */ +/* Set the sample rate of the stream. This will drop any samples that have not + * been read. */ void sonicSetSampleRate(sonicStream stream, int sampleRate); /* Get the number of channels. */ int sonicGetNumChannels(sonicStream stream); -/* Set the number of channels. This will drop any samples that have not been read. */ +/* Set the number of channels. This will drop any samples that have not been + * read. */ void sonicSetNumChannels(sonicStream stream, int numChannels); /* This is a non-stream oriented interface to just change the speed of a sound sample. It works in-place on the sample array, so there must be at least - speed*numSamples available space in the array. Returns the new number of samples. */ -int sonicChangeFloatSpeed(float *samples, int numSamples, float speed, float pitch, - float rate, float volume, int useChordPitch, int sampleRate, int numChannels); + speed*numSamples available space in the array. Returns the new number of + samples. */ +int sonicChangeFloatSpeed(float* samples, int numSamples, float speed, + float pitch, float rate, float volume, + int useChordPitch, int sampleRate, int numChannels); /* This is a non-stream oriented interface to just change the speed of a sound sample. It works in-place on the sample array, so there must be at least - speed*numSamples available space in the array. Returns the new number of samples. */ -int sonicChangeShortSpeed(short *samples, int numSamples, float speed, float pitch, - float rate, float volume, int useChordPitch, int sampleRate, int numChannels); + speed*numSamples available space in the array. Returns the new number of + samples. */ +int sonicChangeShortSpeed(short* samples, int numSamples, float speed, + float pitch, float rate, float volume, + int useChordPitch, int sampleRate, int numChannels); + +#ifdef SONIC_SPECTROGRAM +/* +This code generates high quality spectrograms from sound samples, using +Time-Aliased-FFTs as described at: + + https://github.com/waywardgeek/spectrogram -#ifdef __cplusplus +Basically, two adjacent pitch periods are overlap-added to create a sound +sample that accurately represents the speech sound at that moment in time. +This set of samples is converted to a spetral line using an FFT, and the result +is saved as a single spectral line at that moment in time. The resulting +spectral lines vary in resolution (it is equal to the number of samples in the +pitch period), and the spacing of spectral lines also varies (proportional to +the numver of samples in the pitch period). + +To generate a bitmap, linear interpolation is used to render the grayscale +value at any particular point in time and frequency. +*/ + +#define SONIC_MAX_SPECTRUM_FREQ 5000 + +struct sonicSpectrogramStruct; +struct sonicBitmapStruct; +typedef struct sonicSpectrogramStruct* sonicSpectrogram; +typedef struct sonicBitmapStruct* sonicBitmap; + +/* sonicBitmap objects represent spectrograms as grayscale bitmaps where each + pixel is from 0 (black) to 255 (white). Bitmaps are rows*cols in size. + Rows are indexed top to bottom and columns are indexed left to right */ +struct sonicBitmapStruct { + unsigned char* data; + int numRows; + int numCols; +}; + +/* Enable coomputation of a spectrogram on the fly. */ +void sonicComputeSpectrogram(sonicStream stream); + +/* Get the spectrogram. */ +sonicSpectrogram sonicGetSpectrogram(sonicStream stream); + +/* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram + has been called. */ +sonicSpectrogram sonicCreateSpectrogram(int sampleRate); + +/* Destroy the spectrotram. This is called automatically when calling + sonicDestroyStream. */ +void sonicDestroySpectrogram(sonicSpectrogram spectrogram); + +/* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */ +sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram, + int numRows, int numCols); + +/* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */ +void sonicDestroyBitmap(sonicBitmap bitmap); + +int sonicWritePGM(sonicBitmap bitmap, char* fileName); + +/* Add two pitch periods worth of samples to the spectrogram. There must be + 2*period samples. Time should advance one pitch period for each call to + this function. */ +void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram, + short* samples, int numSamples, + int numChannels); +#endif /* SONIC_SPECTROGRAM */ + +#ifdef __cplusplus } #endif + +#endif /* SONIC_H_ */ diff --git a/sonic/sonic_experimental.c b/sonic/sonic_experimental.c new file mode 100644 index 0000000000000000000000000000000000000000..c908c89a9d13cc83cfd4a33144bd68ede28b0815 --- /dev/null +++ b/sonic/sonic_experimental.c @@ -0,0 +1,440 @@ +/* Sonic library + Copyright 2010 + Bill Cox + This file is part of the Sonic Library. + + This file is licensed under the Apache 2.0 license. +*/ + +/* This file is designed for low-powered microcontrollers, minimizing memory + compared to the fuller sonic.c implementation. */ + +#include "sonic_experimental.h" + +#include + +#define SONIC_INPUT_BUFFER_SIZE (3 * (SONIC_MAX_SAMPLE_RATE / SONIC_MIN_PITCH) + SONIC_INPUT_SAMPLES) +static int sonicMinPeriod, sonicMaxPeriod; + +struct sonicStruct { + /* The input buffer will have at least 3 pitch periods. The sample at + sonicMaxPeriod is the first new unprocessed sample. We keep the prior + samples up to sonicMaxPeriod so we can find the snippet at this point for + any pitch period. This is used when transitioning from the current + snippet to the next. */ + short inputBuffer[1000000]; + short outputBuffer[1000000]; + short downSampleBuffer[1000000]; + float speed; + int sampleRate; + int numInputSamples; + int snippetPeriod; + int snippetOffset; + int numOutputSamples; + int prevPeriod; + int prevMinDiff; +}; + +static struct sonicStruct sonicStream; + +/* A snippet is computed around an input in the input buffer by first applying + a 2-period Hann window, and then overlap-adding the left half to the right. + It should essentially sound like the input at that point in time. */ +struct sonicSnippetStruct { + short samples[100000]; + int inputPos; /* Index into input buffer. */ + int offset; + int period; +}; + +typedef struct sonicSnippetStruct* sonicSnippet; + +/* Set the speed of the stream. */ +void sonicSetSpeed(float speed) { sonicStream.speed = speed; } + +/* Set the sample rate of the stream. */ +void sonicSetSampleRate(int sampleRate) { +} + +/* Create a sonic stream. Return NULL only if we are out of memory and cannot + allocate the stream. */ +void sonicInit(float speed, int sampleRate) { + sonicStream.speed = speed; + sonicStream.sampleRate = sampleRate; + sonicMinPeriod = sampleRate / SONIC_MAX_PITCH; + sonicMaxPeriod = sampleRate / SONIC_MIN_PITCH; + memset(&sonicStream, 0, sizeof(struct sonicStruct)); + sonicStream.speed = speed; + sonicStream.sampleRate = sampleRate; + sonicStream.numInputSamples = 0; + sonicStream.numOutputSamples = 0; + sonicStream.prevPeriod = 0; + sonicStream.prevMinDiff = 0; + sonicStream.numInputSamples = sonicMinPeriod; + sonicStream.snippetPeriod = sonicMinPeriod; + sonicStream.snippetOffset = 0; +} + +/* Add the input samples to the input buffer. */ +static int addShortSamplesToInputBuffer(short *samples, + int numSamples) { + if (numSamples == 0) { + return 1; + } + if (sonicStream.numInputSamples + numSamples > SONIC_INPUT_BUFFER_SIZE) { + return 0; + } + memcpy(sonicStream.inputBuffer + sonicStream.numInputSamples, + samples, numSamples * sizeof(short)); + sonicStream.numInputSamples += numSamples; + return 1; +} + +/* Remove input samples that we have already processed. */ +static void removeInputSamples(int position) { + int remainingSamples = sonicStream.numInputSamples - position; + + if (remainingSamples > 0) { + memmove(sonicStream.inputBuffer, + sonicStream.inputBuffer + position - sonicMaxPeriod, + (remainingSamples + sonicMaxPeriod) * sizeof(short)); + } + sonicStream.numInputSamples = sonicMaxPeriod + remainingSamples; +} + +/* Read short data out of the stream. Sometimes no data will be available, and + zero is returned, which is not an error condition. */ +int sonicReadShortFromStream(short *samples, int maxSamples) { + int numSamples = sonicStream.numOutputSamples; + int remainingSamples = 0; + + if (numSamples == 0) { + return 0; + } + if (numSamples > maxSamples) { + remainingSamples = numSamples - maxSamples; + numSamples = maxSamples; + } + memcpy(samples, sonicStream.outputBuffer, numSamples * sizeof(short)); + if (remainingSamples > 0) { + memmove(sonicStream.outputBuffer, sonicStream.outputBuffer + numSamples, + remainingSamples * sizeof(short)); + } + sonicStream.numOutputSamples = remainingSamples; + return numSamples; +} + +/* Force the sonic stream to generate output using whatever data it currently + has. No extra delay will be added to the output, but flushing in the middle + of words could introduce distortion. */ +void sonicFlushStream(void) { + int remainingSamples = sonicStream.numInputSamples - sonicMaxPeriod; + float speed = sonicStream.speed; + int expectedOutputSamples = sonicStream.numOutputSamples + (int)((remainingSamples / speed) + 0.5f); + + memset(sonicStream.inputBuffer + sonicMaxPeriod + remainingSamples, 0, + sizeof(short) * (SONIC_INPUT_BUFFER_SIZE - (sonicMaxPeriod + remainingSamples))); + sonicStream.numInputSamples = SONIC_INPUT_BUFFER_SIZE; + sonicWriteShortToStream(NULL, 0); + /* Throw away any extra samples we generated due to the silence we added */ + if (sonicStream.numOutputSamples > expectedOutputSamples) { + sonicStream.numOutputSamples = expectedOutputSamples; + } + /* Empty input buffer */ + sonicStream.numInputSamples = sonicMinPeriod; + memset(sonicStream.inputBuffer, 0, SONIC_INPUT_BUFFER_SIZE * sizeof(short)); +} + +/* Return the number of samples in the output buffer */ +int sonicSamplesAvailable(void) { + return sonicStream.numOutputSamples; +} + +/* If skip is greater than one, average skip samples together and write them to + the down-sample buffer. */ +static void downSampleInput(short *samples) { + int numSamples = 2 * sonicMaxPeriod; + int i, j; + int value; + short *downSamples = sonicStream.downSampleBuffer; + int skip = sonicStream.sampleRate / SONIC_AMDF_FREQ; + + for (i = 0; i < numSamples; i++) { + value = 0; + for (j = 0; j < skip; j++) { + value += *samples++; + } + value /= skip; + *downSamples++ = value; + } +} + +/* Find the best frequency match in the range, and given a sample skip multiple. + For now, just find the pitch of the first channel. */ +static int findPitchPeriodInRange(short *samples, int minPeriod, int maxPeriod, + int* retMinDiff, int* retMaxDiff) { + int period, bestPeriod = 0, worstPeriod = 255; + short *s; + short *p; + short sVal, pVal; + unsigned long diff, minDiff = 1, maxDiff = 0; + int i; + + for (period = minPeriod; period <= maxPeriod; period++) { + diff = 0; + s = samples; + p = samples + period; + for (i = 0; i < period; i++) { + sVal = *s++; + pVal = *p++; + diff += sVal >= pVal ? (unsigned short)(sVal - pVal) + : (unsigned short)(pVal - sVal); + } + /* Note that the highest number of samples we add into diff will be less + than 256, since we skip samples. Thus, diff is a 24 bit number, and + we can safely multiply by numSamples without overflow */ + if (bestPeriod == 0 || diff * bestPeriod < minDiff * period) { + minDiff = diff; + bestPeriod = period; + } + if (diff * worstPeriod > maxDiff * period) { + maxDiff = diff; + worstPeriod = period; + } + } + *retMinDiff = minDiff / bestPeriod; + *retMaxDiff = maxDiff / worstPeriod; + return bestPeriod; +} + +/* At abrupt ends of voiced words, we can have pitch periods that are better + approximated by the previous pitch period estimate. Try to detect this case. */ +static int prevPeriodBetter(int minDiff, int maxDiff, int preferNewPeriod) { + if (minDiff == 0 || sonicStream.prevPeriod == 0) { + return 0; + } + if (preferNewPeriod) { + if (maxDiff > minDiff * 3) { + /* Got a reasonable match this period */ + return 0; + } + if (minDiff * 2 <= sonicStream.prevMinDiff * 3) { + /* Mismatch is not that much greater this period */ + return 0; + } + } else { + if (minDiff <= sonicStream.prevMinDiff) { + return 0; + } + } + return 1; +} + +/* Find the pitch period. This is a critical step, and we may have to try + multiple ways to get a good answer. This version uses Average Magnitude + Difference Function (AMDF). To improve speed, we down sample by an integer + factor get in the 11KHz range, and then do it again with a narrower + frequency range without down sampling */ +static int findPitchPeriod(short *samples, int preferNewPeriod) { + int minPeriod = sonicMinPeriod; + int maxPeriod = sonicMaxPeriod; + int minDiff, maxDiff, retPeriod; + int period; + int skip = sonicStream.sampleRate / SONIC_AMDF_FREQ; + + if (skip == 1) { + period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, &maxDiff); + } else { + downSampleInput(samples); + period = findPitchPeriodInRange(sonicStream.downSampleBuffer, minPeriod / skip, + maxPeriod / skip, &minDiff, &maxDiff); + period *= skip; + minPeriod = period - (skip << 2); + maxPeriod = period + (skip << 2); + if (minPeriod < sonicMinPeriod) { + minPeriod = sonicMinPeriod; + } + if (maxPeriod > sonicMaxPeriod) { + maxPeriod = sonicMaxPeriod; + } + period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, &maxDiff); + } + if (prevPeriodBetter(minDiff, maxDiff, preferNewPeriod)) { + retPeriod = sonicStream.prevPeriod; + } else { + retPeriod = period; + } + sonicStream.prevMinDiff = minDiff; + sonicStream.prevPeriod = period; + return retPeriod; +} + +/* Overlap two sound segments, ramp the volume of one down, while ramping the + other one from zero up, and add them, storing the result at the output. */ +static void overlapAdd(int numSamples, short *out, short *rampDown, short *rampUp) { + short *o; + short *u; + short *d; + int t; + + o = out; + u = rampUp; + d = rampDown; + for (t = 0; t < numSamples; t++) { + *o = (*d * (numSamples - t) + *u * t) / numSamples; + o++; + d++; + u++; + } +} + +/* temp: comput on the fly */ +#include +#ifndef M_PI +# define M_PI 3.1415926535897932384 +#endif + +/* Compute the sound snippet from the current input, and the prior input if + needed. */ +static void setPeriod(sonicSnippet snippet, int period) { + int pos = snippet->inputPos; + short* p = sonicStream.inputBuffer + pos; + float fade; + int i; + + snippet->period = period; + for (i = 0; i < period; i++) { + /* TODO: Make this a Hann window. */ + fade = 0.5*(1.0 - cos(M_PI*i/period)); + snippet->samples[i] = (1.0f - fade) * p[i] + fade * p[i - period]; + } +} + +/* Write the output sample. */ +static void outputSample(short value) { + sonicStream.outputBuffer[sonicStream.numOutputSamples++] = value; +} + +/* Increment the offset into the snippent. Set to 0 if we reach the period. */ +static void incOffset(sonicSnippet snippet) { + snippet->offset++; + if (snippet->offset == snippet->period) { + snippet->offset = 0; + } +} + +/* Fade from snippet A to snippet B smoothly. */ +static void fadeFromAToB(sonicSnippet A, sonicSnippet B) { + int numOutputSamples = B->period / sonicStream.speed; + /* Initially snippet A and snippet B have different periods. */ + int periodB = B->period; + int changedPeriod = 0; + int i; + float fadeA, fadeB; + + /* A’s offset may be non-zero from playing it in the prior iteration. */ + if (numOutputSamples <= A->period - A->offset) { + /* We will fade out A before finishing it. Just use B’s period + for B. Don’t use it for A as that would cause a discontinuity. */ + changedPeriod = 1; + } else { + /* Play B using A’s period until we reset the offset to 0. */ + setPeriod(B, A->period); + B->offset = A->offset; + } + for (i = 0; i < numOutputSamples; i++) { + if (!changedPeriod && A->offset == 0) { + setPeriod(A, periodB); + setPeriod(B, periodB); + changedPeriod = 1; + } + fadeB = (float) i / numOutputSamples; + fadeA = 1.0 - fadeB; + outputSample(fadeA * A->samples[A->offset] + + fadeB * B->samples[B->offset]); + incOffset(A); /* Sets offset to 0 if offset == period. */ + incOffset(B); + } +} + +/* Set the offset of B to be in phase with A's offset. */ +static void setBOffset(sonicSnippet A, sonicSnippet B) { + int offset = A->offset; + + /* When pitch is increasing, offset can be > B->period. */ + while (offset >= B->period) { + offset -= B->period; + } + B->offset = offset; +} + +/* Determine if two snippets are identical other than for inputPos. */ +static int snippetsEqual(sonicSnippet A, sonicSnippet B) { + int i; + + if (A->period != B->period || A->offset != B->offset) { + return 0; + } + for (i = 0; i < A->period; i++) { + if (A->samples[i] != B->samples[i]) { + return 0; + } + } + return 1; +} + +/* Process as many pitch periods as we have buffered on the input. */ +static void changeSpeed(float speed) { + struct sonicSnippetStruct A, B; + int period; + + if (sonicStream.numInputSamples < 3 * sonicMaxPeriod) { + return; + } + while (sonicStream.numInputSamples >= 3 * sonicMaxPeriod) { + /* TODO: Don't recompute the snippet for A. */ + A.inputPos = sonicMaxPeriod; + A.offset = sonicStream.snippetOffset; + setPeriod(&A, sonicStream.snippetPeriod); + period = findPitchPeriod(sonicStream.inputBuffer + sonicMaxPeriod, 1); + B.inputPos = sonicMaxPeriod + period; + setPeriod(&B, period); + setBOffset(&A, &B); + fadeFromAToB(&A, &B); + removeInputSamples(B.inputPos); + sonicStream.snippetPeriod = B.period; + sonicStream.snippetOffset = B.offset; + } +} + +/* Just copy from the array to the output buffer */ +static void copyToOutput(short *samples, int numSamples) { + memcpy(sonicStream.outputBuffer + sonicStream.numOutputSamples, + samples, numSamples * sizeof(short)); + sonicStream.numOutputSamples += numSamples; +} + +/* Resample as many pitch periods as we have buffered on the input. Also scale + the output by the volume. */ +static void processStreamInput(void) { + float speed = sonicStream.speed; + + if (speed > 1.00001 || speed < 0.99999) { + changeSpeed(speed); + } else { + copyToOutput(sonicStream.inputBuffer + sonicMaxPeriod, + sonicStream.numInputSamples - sonicMaxPeriod); + sonicStream.numInputSamples = sonicMaxPeriod; + } +} + +/* Simple wrapper around sonicWriteFloatToStream that does the short to float + conversion for you. */ +void sonicWriteShortToStream(short *samples, int numSamples) { + addShortSamplesToInputBuffer(samples, numSamples); + processStreamInput(); +} + +/* This is ignored. */ +void sonicSetVolume(float volume) { +} diff --git a/sonic/sonic_experimental.h b/sonic/sonic_experimental.h new file mode 100644 index 0000000000000000000000000000000000000000..bdb302c479a1f1b9b45106d9586d4ddb767e1736 --- /dev/null +++ b/sonic/sonic_experimental.h @@ -0,0 +1,48 @@ +/* Sonic library + Copyright 2010 + Bill Cox + This file is part of the Sonic Library. + + This file is licensed under the Apache 2.0 license. +*/ + +/* + This is a stripped down version of sonic, to help it fit in micro-controllers. + Only mono speedup remains. All buffers are allocated statically. +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Use a minimum pitch of 80 to reduce buffer sizes. Set it back to 65 if you + have the room in memory and find it sounds better. */ +#define SONIC_MIN_PITCH 65 +#define SONIC_MAX_PITCH 400 +#define SONIC_MIN_SAMPLE_RATE 8000 +#define SONIC_MAX_SAMPLE_RATE 96000 + +/* These are used to down-sample some inputs to improve speed */ +#define SONIC_AMDF_FREQ 4000 + +/* This is the number of samples in the buffer size passed to Sonic. */ +#define SONIC_INPUT_SAMPLES 80 + +/* Initialize Sonic. */ +void sonicInit(float speed, int sampleRate); +/* Write input samples to the stream. numSamples must be <= SONIC_INPUT_SAMPLES + */ +void sonicWriteShortToStream(short *samples, int numSamples); +/* Use this to read 16-bit data out of the stream. Sometimes no data will + be available, and zero is returned, which is not an error condition. */ +int sonicReadShortFromStream(short *samples, int maxSamples); +/* Force the sonic stream to generate output using whatever data it currently + has. No extra delay will be added to the output, but flushing in the middle + of words could introduce distortion. */ +void sonicFlushStream(void); +/* Return the number of samples in the output buffer */ +int sonicSamplesAvailable(void); + +#ifdef __cplusplus +} +#endif diff --git a/sonic/sonic_lite.c b/sonic/sonic_lite.c new file mode 100644 index 0000000000000000000000000000000000000000..a3397b4435aad4a6919b29d6261986ff9aa503a6 --- /dev/null +++ b/sonic/sonic_lite.c @@ -0,0 +1,371 @@ +/* Sonic library + Copyright 2010 + Bill Cox + This file is part of the Sonic Library. + + This file is licensed under the Apache 2.0 license. +*/ + +/* This file is designed for low-powered microcontrollers, minimizing memory + compared to the fuller sonic.c implementation. */ + +#include "sonic_lite.h" + +#include + +#define SONIC_MAX_PERIOD (SONIC_SAMPLE_RATE / SONIC_MIN_PITCH) +#define SONIC_MIN_PERIOD (SONIC_SAMPLE_RATE / SONIC_MAX_PITCH) +#define SONIC_SKIP (SONIC_SAMPLE_RATE / SONIC_AMDF_FREQ) +#define SONIC_INPUT_BUFFER_SIZE (2 * SONIC_MAX_PERIOD + SONIC_INPUT_SAMPLES) + +struct sonicStruct { + short inputBuffer[SONIC_INPUT_BUFFER_SIZE]; + short outputBuffer [2 * SONIC_MAX_PERIOD]; + short downSampleBuffer[(2 * SONIC_MAX_PERIOD) / SONIC_SKIP]; + float speed; + float volume; + int numInputSamples; + int numOutputSamples; + int remainingInputToCopy; + int prevPeriod; + int prevMinDiff; +}; + +static struct sonicStruct sonicStream; + +/* Scale the samples by the factor. Volume should be no greater than 127X, or + it is possible to overflow the fixed-point mathi. */ +static void scaleSamples(short *samples, int numSamples, float volume) { + /* This is 24-bit integer and 8-bit fraction fixed-point representation. */ + int fixedPointVolume; + int value; + + if (volume > 127.0) { + volume = 127.0; + } + fixedPointVolume = volume * 256.0f; + while (numSamples--) { + value = (*samples * fixedPointVolume) >> 8; + if (value > 32767) { + value = 32767; + } else if (value < -32767) { + value = -32767; + } + *samples++ = value; + } +} + +/* Set the speed of the stream. */ +void sonicSetSpeed(float speed) { sonicStream.speed = speed; } + +/* Set the scaling factor of the stream. */ +void sonicSetVolume(float volume) { + sonicStream.volume = volume; +} + +/* Create a sonic stream. Return NULL only if we are out of memory and cannot + allocate the stream. */ +void sonicInit(void) { + sonicStream.speed = 1.0; + sonicStream.volume = 1.0f; + sonicStream.numInputSamples = 0;; + sonicStream.numOutputSamples = 0; + sonicStream.remainingInputToCopy = 0; + sonicStream.prevPeriod = 0; + sonicStream.prevMinDiff = 0; +} + +/* Add the input samples to the input buffer. */ +static int addShortSamplesToInputBuffer(short *samples, + int numSamples) { + if (numSamples == 0) { + return 1; + } + memcpy(sonicStream.inputBuffer + sonicStream.numInputSamples, + samples, numSamples * sizeof(short)); + sonicStream.numInputSamples += numSamples; + return 1; +} + +/* Remove input samples that we have already processed. */ +static void removeInputSamples(int position) { + int remainingSamples = sonicStream.numInputSamples - position; + + if (remainingSamples > 0) { + memmove(sonicStream.inputBuffer, + sonicStream.inputBuffer + position, + remainingSamples * sizeof(short)); + } + sonicStream.numInputSamples = remainingSamples; +} + +/* Just copy from the array to the output buffer */ +static void copyToOutput(short *samples, int numSamples) { + memcpy(sonicStream.outputBuffer + sonicStream.numOutputSamples, + samples, numSamples * sizeof(short)); + sonicStream.numOutputSamples += numSamples; +} + +/* Just copy from the input buffer to the output buffer. */ +static int copyInputToOutput(int position) { + int numSamples = sonicStream.remainingInputToCopy; + + if (numSamples > 2 * SONIC_MAX_PERIOD) { + numSamples = 2 * SONIC_MAX_PERIOD; + } + copyToOutput(sonicStream.inputBuffer + position, numSamples); + sonicStream.remainingInputToCopy -= numSamples; + return numSamples; +} + +/* Read short data out of the stream. Sometimes no data will be available, and + zero is returned, which is not an error condition. */ +int sonicReadShortFromStream(short *samples, int maxSamples) { + int numSamples = sonicStream.numOutputSamples; + int remainingSamples = 0; + + if (numSamples == 0) { + return 0; + } + if (numSamples > maxSamples) { + remainingSamples = numSamples - maxSamples; + numSamples = maxSamples; + } + memcpy(samples, sonicStream.outputBuffer, numSamples * sizeof(short)); + if (remainingSamples > 0) { + memmove(sonicStream.outputBuffer, sonicStream.outputBuffer + numSamples, + remainingSamples * sizeof(short)); + } + sonicStream.numOutputSamples = remainingSamples; + return numSamples; +} + +/* Force the sonic stream to generate output using whatever data it currently + has. No extra delay will be added to the output, but flushing in the middle + of words could introduce distortion. */ +void sonicFlushStream(void) { + int maxRequired = 2 * SONIC_MAX_PERIOD; + int remainingSamples = sonicStream.numInputSamples; + float speed = sonicStream.speed; + int expectedOutputSamples = sonicStream.numOutputSamples + (int)((remainingSamples / speed) + 0.5f); + + memset(sonicStream.inputBuffer + remainingSamples, 0, + sizeof(short) * (SONIC_INPUT_BUFFER_SIZE - remainingSamples)); + sonicStream.numInputSamples += 2 * maxRequired; + sonicWriteShortToStream(NULL, 0); + /* Throw away any extra samples we generated due to the silence we added */ + if (sonicStream.numOutputSamples > expectedOutputSamples) { + sonicStream.numOutputSamples = expectedOutputSamples; + } + /* Empty input buffer */ + sonicStream.numInputSamples = 0; + sonicStream.remainingInputToCopy = 0; +} + +/* Return the number of samples in the output buffer */ +int sonicSamplesAvailable(void) { + return sonicStream.numOutputSamples; +} + +/* If skip is greater than one, average skip samples together and write them to + the down-sample buffer. */ +static void downSampleInput(short *samples) { + int numSamples = 2 * SONIC_MAX_PERIOD / SONIC_SKIP; + int i, j; + int value; + short *downSamples = sonicStream.downSampleBuffer; + + for (i = 0; i < numSamples; i++) { + value = 0; + for (j = 0; j < SONIC_SKIP; j++) { + value += *samples++; + } + value /= SONIC_SKIP; + *downSamples++ = value; + } +} + +/* Find the best frequency match in the range, and given a sample skip multiple. + For now, just find the pitch of the first channel. */ +static int findPitchPeriodInRange(short *samples, int minPeriod, int maxPeriod, + int* retMinDiff, int* retMaxDiff) { + int period, bestPeriod = 0, worstPeriod = 255; + short *s; + short *p; + short sVal, pVal; + unsigned long diff, minDiff = 1, maxDiff = 0; + int i; + + for (period = minPeriod; period <= maxPeriod; period++) { + diff = 0; + s = samples; + p = samples + period; + for (i = 0; i < period; i++) { + sVal = *s++; + pVal = *p++; + diff += sVal >= pVal ? (unsigned short)(sVal - pVal) + : (unsigned short)(pVal - sVal); + } + /* Note that the highest number of samples we add into diff will be less + than 256, since we skip samples. Thus, diff is a 24 bit number, and + we can safely multiply by numSamples without overflow */ + if (bestPeriod == 0 || diff * bestPeriod < minDiff * period) { + minDiff = diff; + bestPeriod = period; + } + if (diff * worstPeriod > maxDiff * period) { + maxDiff = diff; + worstPeriod = period; + } + } + *retMinDiff = minDiff / bestPeriod; + *retMaxDiff = maxDiff / worstPeriod; + return bestPeriod; +} + +/* At abrupt ends of voiced words, we can have pitch periods that are better + approximated by the previous pitch period estimate. Try to detect this case. */ +static int prevPeriodBetter(int minDiff, int maxDiff, int preferNewPeriod) { + if (minDiff == 0 || sonicStream.prevPeriod == 0) { + return 0; + } + if (preferNewPeriod) { + if (maxDiff > minDiff * 3) { + /* Got a reasonable match this period */ + return 0; + } + if (minDiff * 2 <= sonicStream.prevMinDiff * 3) { + /* Mismatch is not that much greater this period */ + return 0; + } + } else { + if (minDiff <= sonicStream.prevMinDiff) { + return 0; + } + } + return 1; +} + +/* Find the pitch period. This is a critical step, and we may have to try + multiple ways to get a good answer. This version uses Average Magnitude + Difference Function (AMDF). To improve speed, we down sample by an integer + factor get in the 11KHz range, and then do it again with a narrower + frequency range without down sampling */ +static int findPitchPeriod(short *samples, int preferNewPeriod) { + int minPeriod = SONIC_MIN_PERIOD; + int maxPeriod = SONIC_MAX_PERIOD; + int minDiff, maxDiff, retPeriod; + int period; + + if (SONIC_SKIP == 1) { + period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, &maxDiff); + } else { + downSampleInput(samples); + period = findPitchPeriodInRange(sonicStream.downSampleBuffer, minPeriod / SONIC_SKIP, + maxPeriod / SONIC_SKIP, &minDiff, &maxDiff); + period *= SONIC_SKIP; + minPeriod = period - (SONIC_SKIP << 2); + maxPeriod = period + (SONIC_SKIP << 2); + if (minPeriod < SONIC_MIN_PERIOD) { + minPeriod = SONIC_MIN_PERIOD; + } + if (maxPeriod > SONIC_MAX_PERIOD) { + maxPeriod = SONIC_MAX_PERIOD; + } + period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, &maxDiff); + } + if (prevPeriodBetter(minDiff, maxDiff, preferNewPeriod)) { + retPeriod = sonicStream.prevPeriod; + } else { + retPeriod = period; + } + sonicStream.prevMinDiff = minDiff; + sonicStream.prevPeriod = period; + return retPeriod; +} + +/* Overlap two sound segments, ramp the volume of one down, while ramping the + other one from zero up, and add them, storing the result at the output. */ +static void overlapAdd(int numSamples, short *out, short *rampDown, short *rampUp) { + short *o; + short *u; + short *d; + int t; + + o = out; + u = rampUp; + d = rampDown; + for (t = 0; t < numSamples; t++) { + *o = (*d * (numSamples - t) + *u * t) / numSamples; + o++; + d++; + u++; + } +} + +/* Skip over a pitch period, and copy period/speed samples to the output */ +static int skipPitchPeriod(short *samples, float speed, int period) { + long newSamples; + + if (speed >= 2.0f) { + newSamples = period / (speed - 1.0f); + } else { + newSamples = period; + sonicStream.remainingInputToCopy = period * (2.0f - speed) / (speed - 1.0f); + } + overlapAdd(newSamples, sonicStream.outputBuffer + sonicStream.numOutputSamples, + samples, samples + period); + sonicStream.numOutputSamples += newSamples; + return newSamples; +} + +/* Resample as many pitch periods as we have buffered on the input. */ +static void changeSpeed(float speed) { + short *samples; + int numSamples = sonicStream.numInputSamples; + int position = 0, period, newSamples; + int maxRequired = 2 * SONIC_MAX_PERIOD; + + /* printf("Changing speed to %f\n", speed); */ + if (sonicStream.numInputSamples < maxRequired) { + return; + } + do { + if (sonicStream.remainingInputToCopy > 0) { + newSamples = copyInputToOutput(position); + position += newSamples; + } else { + samples = sonicStream.inputBuffer + position; + period = findPitchPeriod(samples, 1); + newSamples = skipPitchPeriod(samples, speed, period); + position += period + newSamples; + } + } while (position + maxRequired <= numSamples); + removeInputSamples(position); +} + +/* Resample as many pitch periods as we have buffered on the input. Also scale + the output by the volume. */ +static void processStreamInput(void) { + int originalNumOutputSamples = sonicStream.numOutputSamples; + float speed = sonicStream.speed; + + if (speed > 1.00001) { + changeSpeed(speed); + } else { + copyToOutput(sonicStream.inputBuffer, sonicStream.numInputSamples); + sonicStream.numInputSamples = 0; + } + if (sonicStream.volume != 1.0f) { + /* Adjust output volume. */ + scaleSamples( sonicStream.outputBuffer + originalNumOutputSamples, + (sonicStream.numOutputSamples - originalNumOutputSamples), sonicStream.volume); + } +} + +/* Simple wrapper around sonicWriteFloatToStream that does the short to float + conversion for you. */ +void sonicWriteShortToStream(short *samples, int numSamples) { + addShortSamplesToInputBuffer(samples, numSamples); + processStreamInput(); +} diff --git a/sonic/sonic_lite.h b/sonic/sonic_lite.h new file mode 100644 index 0000000000000000000000000000000000000000..09319c40ddd357a57625e8e4ebd658d3c8087b7c --- /dev/null +++ b/sonic/sonic_lite.h @@ -0,0 +1,52 @@ +/* Sonic library + Copyright 2010 + Bill Cox + This file is part of the Sonic Library. + + This file is licensed under the Apache 2.0 license. +*/ + +/* + This is a stripped down version of sonic, to help it fit in micro-controllers. + Only mono speedup remains. All buffers are allocated statically. +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Use a minimum pitch of 80 to reduce buffer sizes. Set it back to 65 if you + have the room in memory and find it sounds better. */ +#define SONIC_MIN_PITCH 65 +#define SONIC_MAX_PITCH 400 + +/* These are used to down-sample some inputs to improve speed */ +#define SONIC_AMDF_FREQ 4000 + +/* This is the sample frequency. You must hard-code it rather than passing it in. */ +#define SONIC_SAMPLE_RATE 8000 + +/* This is the number of samples in the buffer size passed to Sonic. */ +#define SONIC_INPUT_SAMPLES 80 + +/* Initialize Sonic. */ +void sonicInit(void); +/* Write input samples to the stream. numSamples must be <= SONIC_INPUT_SAMPLES */ +void sonicWriteShortToStream(short *samples, int numSamples); +/* Use this to read 16-bit data out of the stream. Sometimes no data will + be available, and zero is returned, which is not an error condition. */ +int sonicReadShortFromStream(short *samples, int maxSamples); +/* Force the sonic stream to generate output using whatever data it currently + has. No extra delay will be added to the output, but flushing in the middle + of words could introduce distortion. */ +void sonicFlushStream(void); +/* Return the number of samples in the output buffer */ +int sonicSamplesAvailable(void); +/* Set the speed of the stream. */ +void sonicSetSpeed(float speed); +/* Set the scaling factor of the stream. */ +void sonicSetVolume(float volume); + +#ifdef __cplusplus +} +#endif diff --git a/sonic/spectrogram.c b/sonic/spectrogram.c new file mode 100644 index 0000000000000000000000000000000000000000..e24a898e01a3b24226531139776b96f687b9a199 --- /dev/null +++ b/sonic/spectrogram.c @@ -0,0 +1,377 @@ +/* Sonic library + Copyright 2016 + Bill Cox + This file is part of the Sonic Library. + + This file is licensed under the Apache 2.0 license. +*/ + +#ifdef KISS_FFT +#include /* kiss_fft.h fails to load this */ +#include +#include +#else +#include +#endif +#include +#include +#include +#include +#include "sonic.h" +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif +#ifndef M_E +#define M_E 2.7182818284590452354 +#endif + +struct sonicSpectrumStruct; +typedef struct sonicSpectrumStruct* sonicSpectrum; + +struct sonicSpectrogramStruct { + sonicSpectrum* spectrums; + double minPower, maxPower; + int numSpectrums; + int allocatedSpectrums; + int sampleRate; + int totalSamples; +}; + +struct sonicSpectrumStruct { + sonicSpectrogram spectrogram; + double* power; + int numFreqs; /* Number of frequencies */ + int numSamples; + int startingSample; +}; + +/* Print out spectrum data for debugging. */ +static void dumpSpectrum(sonicSpectrum spectrum) { + printf("spectrum numFreqs:%d numSamples:%d startingSample:%d\n", + spectrum->numFreqs, spectrum->numSamples, spectrum->startingSample); + printf(" "); + int i; + for (i = 0; i < spectrum->numFreqs; i++) { + printf(" %.1f", spectrum->power[i]); + } + printf("\n"); +} + +/* Print out spectrogram data for debugging. */ +void dumpSpectrogram(sonicSpectrogram spectrogram) { + printf( + "spectrogram minPower:%f maxPower:%f numSpectrums:%d totalSamples:%d\n", + spectrogram->minPower, spectrogram->maxPower, spectrogram->numSpectrums, + spectrogram->totalSamples); + int i; + for (i = 0; i < spectrogram->numSpectrums; i++) { + dumpSpectrum(spectrogram->spectrums[i]); + } +} + +/* Create an new spectrum. */ +static sonicSpectrum sonicCreateSpectrum(sonicSpectrogram spectrogram) { + sonicSpectrum spectrum = + (sonicSpectrum)calloc(1, sizeof(struct sonicSpectrumStruct)); + if (spectrum == NULL) { + return NULL; + } + if (spectrogram->numSpectrums == spectrogram->allocatedSpectrums) { + spectrogram->allocatedSpectrums <<= 1; + spectrogram->spectrums = (sonicSpectrum*)realloc( + spectrogram->spectrums, + spectrogram->allocatedSpectrums * sizeof(sonicSpectrum)); + if (spectrogram->spectrums == NULL) { + return NULL; + } + } + spectrogram->spectrums[spectrogram->numSpectrums++] = spectrum; + spectrum->spectrogram = spectrogram; + return spectrum; +} + +/* Destroy the spectrum. */ +static void sonicDestroySpectrum(sonicSpectrum spectrum) { + if (spectrum == NULL) { + return; + } + if (spectrum->power != NULL) { + free(spectrum->power); + } + free(spectrum); +} + +/* Create an empty spectrogram. */ +sonicSpectrogram sonicCreateSpectrogram(int sampleRate) { + sonicSpectrogram spectrogram = + (sonicSpectrogram)calloc(1, sizeof(struct sonicSpectrogramStruct)); + if (spectrogram == NULL) { + return NULL; + } + spectrogram->allocatedSpectrums = 32; + spectrogram->spectrums = (sonicSpectrum*)calloc( + spectrogram->allocatedSpectrums, sizeof(sonicSpectrum)); + if (spectrogram->spectrums == NULL) { + sonicDestroySpectrogram(spectrogram); + return NULL; + } + spectrogram->sampleRate = sampleRate; + spectrogram->minPower = DBL_MAX; + spectrogram->maxPower = DBL_MIN; + return spectrogram; +} + +/* Destroy the spectrotram. */ +void sonicDestroySpectrogram(sonicSpectrogram spectrogram) { + if (spectrogram != NULL) { + if (spectrogram->spectrums != NULL) { + int i; + for (i = 0; i < spectrogram->numSpectrums; i++) { + sonicSpectrum spectrum = spectrogram->spectrums[i]; + sonicDestroySpectrum(spectrum); + } + free(spectrogram->spectrums); + } + free(spectrogram); + } +} + +/* Create a new bitmap. This takes ownership of data. */ +sonicBitmap sonicCreateBitmap(unsigned char* data, int numRows, int numCols) { + sonicBitmap bitmap = (sonicBitmap)calloc(1, sizeof(struct sonicBitmapStruct)); + if (bitmap == NULL) { + return NULL; + } + bitmap->data = data; + bitmap->numRows = numRows; + bitmap->numCols = numCols; + return bitmap; +} + +/* Destroy the bitmap. */ +void sonicDestroyBitmap(sonicBitmap bitmap) { + if (bitmap == NULL) { + return; + } + if (bitmap->data != NULL) { + free(bitmap->data); + } + free(bitmap); +} + +/* Overlap-add the two pitch periods using a Hann window. Caller must free the + * result. */ +static void computeOverlapAdd(short* samples, int period, int numChannels, + double* ola_samples) { + int i; + for (i = 0; i < period; i++) { + double weight = (1.0 - cos(M_PI * i / period)) / 2.0; + short sample1, sample2; + if (numChannels == 1) { + sample1 = samples[i]; + sample2 = samples[i + period]; + } else { + /* Average the samples */ + int total1 = 0; + int total2 = 0; + int j; + for (j = 0; j < numChannels; j++) { + total1 += samples[i * numChannels + j]; + total2 += samples[(i + period) * numChannels + j]; + } + sample1 = (total1 + (numChannels >> 1)) / numChannels; + sample2 = (total2 + (numChannels >> 1)) / numChannels; + } + ola_samples[i] = weight * sample1 + (1.0 - weight) * sample2; + } +} + +#ifdef KISS_FFT +/* Compute the amplitude of the kiss_complex number. */ +static double magnitude(kiss_fft_cpx c) { + return sqrt(c.r * c.r + c.i * c.i); +} +#else +/* Compute the amplitude of the fftw_complex number. */ +static double magnitude(fftw_complex c) { + return sqrt(c[0] * c[0] + c[1] * c[1]); +} +#endif + +/* Add two pitch periods worth of samples to the spectrogram. There must be + 2*period samples. Time should advance one pitch period for each call to + this function. */ +void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram, + short* samples, int numSamples, + int numChannels) { + int i; + sonicSpectrum spectrum = sonicCreateSpectrum(spectrogram); + spectrum->startingSample = spectrogram->totalSamples; + spectrogram->totalSamples += numSamples; + /* TODO: convert to fixed-point */ + double* in = calloc(numSamples, sizeof(double)); + int numFreqs = numSamples / 2 + 1; + spectrum->numFreqs = numFreqs; + spectrum->numSamples = numSamples; + spectrum->power = (double*)calloc(spectrum->numFreqs, sizeof(double)); + computeOverlapAdd(samples, numSamples, numChannels, in); +#ifdef KISS_FFT + kiss_fft_cpx* cin = calloc(numFreqs, sizeof(kiss_fft_cpx)); + for (i=0; ipower[0] = 0.0; + for (i = 1; i < numFreqs; ++i) { + double power = magnitude(out[i]) / numSamples; + spectrum->power[i] = power; + if (power > spectrogram->maxPower) { + spectrogram->maxPower = power; + } + if (power < spectrogram->minPower) { + spectrogram->minPower = power; + } + } + free(in); + free(out); +} + +/* Linearly interpolate the power at a given position in the spectrogram. */ +static double interpolateSpectrum(sonicSpectrum spectrum, int row, + int numRows) { + /* Flip the row so that we show lowest frequency on the bottom. */ + row = numRows - row - 1; + /* We want the max row to be 1/2 the Niquist frequency, or 4 samples worth. */ + double spectrumFreqSpacing = + (double)spectrum->spectrogram->sampleRate / spectrum->numSamples; + double rowFreqSpacing = SONIC_MAX_SPECTRUM_FREQ / (numRows - 1); + double targetFreq = row * rowFreqSpacing; + int bottomIndex = targetFreq / spectrumFreqSpacing; + double bottomPower = spectrum->power[bottomIndex]; + double topPower = spectrum->power[bottomIndex + 1]; + double position = + (targetFreq - bottomIndex * spectrumFreqSpacing) / spectrumFreqSpacing; + return (1.0 - position) * bottomPower + position * topPower; +} + +/* Linearly interpolate the power at a given position in the spectrogram. */ +static double interpolateSpectrogram(sonicSpectrum leftSpectrum, + sonicSpectrum rightSpectrum, int row, + int numRows, int colTime) { + double leftPower = interpolateSpectrum(leftSpectrum, row, numRows); + double rightPower = interpolateSpectrum(rightSpectrum, row, numRows); + if (rightSpectrum->startingSample != + leftSpectrum->startingSample + leftSpectrum->numSamples) { + fprintf(stderr, "Invalid sample spacing\n"); + exit(1); + } + int remainder = colTime - leftSpectrum->startingSample; + double position = (double)remainder / leftSpectrum->numSamples; + return (1.0 - position) * leftPower + position * rightPower; +} + +/* Add one column of data to the output bitmap data. */ +static void addBitmapCol(unsigned char* data, int col, int numCols, int numRows, + sonicSpectrogram spectrogram, sonicSpectrum spectrum, + sonicSpectrum nextSpectrum, int colTime) { + double minPower = spectrogram->minPower; + double maxPower = spectrogram->maxPower; + int row; + for (row = 0; row < numRows; row++) { + double power = + interpolateSpectrogram(spectrum, nextSpectrum, row, numRows, colTime); + if (power < minPower && power > maxPower) { + fprintf(stderr, "Power outside min/max range\n"); + exit(1); + } + double range = maxPower - minPower; + /* Use log scale such that log(min) = 0, and log(max) = 255. */ + int value = + 256.0 * sqrt(sqrt(log((M_E - 1.0) * (power - minPower) / range + 1.0))); + /* int value = (unsigned char)(((power - minPower)/range)*256); */ + if (value >= 256) { + value = 255; + } + data[row * numCols + col] = 255 - value; + } +} + +/* Convert the spectrogram to a bitmap. The returned array must be freed by + the caller. It will be rows*cols in size. The pixels are written top row + to bottom, and each row is left to right. So, the pixel in the 5th row from + the top, in the 18th column from the left in a 32x128 array would be in + position 128*4 + 18. NULL is returned if calloc fails to allocate the + memory. */ +sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram, + int numRows, int numCols) { + /* dumpSpectrogram(spectrogram); */ + unsigned char* data = + (unsigned char*)calloc(numRows * numCols, sizeof(unsigned char)); + if (data == NULL) { + return NULL; + } + int xSpectrum = 0; /* xSpectrum is index of nextSpectrum */ + sonicSpectrum spectrum = spectrogram->spectrums[xSpectrum++]; + sonicSpectrum nextSpectrum = spectrogram->spectrums[xSpectrum]; + int totalTime = + spectrogram->spectrums[spectrogram->numSpectrums - 1]->startingSample; + int col; + for (col = 0; col < numCols; col++) { + /* There must be at least two spectrums for this to work right. */ + double colTime = (double)totalTime * col / (numCols - 1); + while (xSpectrum + 1 < spectrogram->numSpectrums && + colTime >= nextSpectrum->startingSample) { + spectrum = nextSpectrum; + nextSpectrum = spectrogram->spectrums[++xSpectrum]; + } + addBitmapCol(data, col, numCols, numRows, spectrogram, spectrum, + nextSpectrum, colTime); + } + return sonicCreateBitmap(data, numRows, numCols); +} + +/* Write a PGM image file, which is 8-bit grayscale and looks like: + P2 + # CREATOR: libsonic + 640 400 + 255 + ... +*/ +int sonicWritePGM(sonicBitmap bitmap, char* fileName) { + printf("Writing PGM to %s\n", fileName); + FILE* file = fopen(fileName, "w"); + if (file == NULL) { + return 0; + } + if (fprintf(file, "P2\n# CREATOR: libsonic\n%d %d\n255\n", bitmap->numCols, + bitmap->numRows) < 0) { + fclose(file); + return 0; + } + int i; + int numPixels = bitmap->numRows * bitmap->numCols; + unsigned char* p = bitmap->data; + for (i = 0; i < numPixels; i++) { + if (fprintf(file, "%d\n", 255 - *p++) < 0) { + fclose(file); + return 0; + } + } + fclose(file); + return 1; +} + +#ifdef MAIN +main(){ +} +#endif diff --git a/sonic/tests/BUILD b/sonic/tests/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..1399556508ddf16d22023bce978e4a669f18607f --- /dev/null +++ b/sonic/tests/BUILD @@ -0,0 +1,34 @@ +# Sonic is a library for speeding up or slowing down speech without changing +# the pitch. +package( + default_applicable_licenses = ["//third_party/sonic:license"], +) + +licenses(["notice"]) + +cc_library( + name = "genwave_lib", + srcs = ["genwave.c"], + hdrs = ["genwave.h"], + copts = [ + "-D GOOGLE_BUILD", + ], + deps = ["//third_party/sonic:sonic_internal_lib_fftw"], +) + +cc_test( + name = "sonic_unit_test", + srcs = [ + "input_clamping_test.c", + "sonic_unit_test.cc", + "tests.h", + ], + copts = [ + "-D GOOGLE_BUILD", + ], + deps = [ + ":genwave_lib", + "//testing/base/public:gunit_main", + "//third_party/sonic:sonic_classic_lib_fftw", + ], +) diff --git a/sonic/tests/Makefile b/sonic/tests/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..959719059058b1dc5842bbb79995c7e49b3876d3 --- /dev/null +++ b/sonic/tests/Makefile @@ -0,0 +1,32 @@ +# This file was written by Bill Cox in 2010, and is licensed under the Apache +# 2.0 license. +# +# Note that -pthread is only included so that older Linux builds will be thread +# safe. We call malloc, and older Linux versions only linked in the thread-safe +# malloc if -pthread is specified. + +# Uncomment this if you want to link in spectrogram generation. It is not +# needed to adjust speech speed or pitch. It is included primarily to provide +# high-quality spectrograms with low CPU overhead, for applications such a +# speech recognition. +#USE_SPECTROGRAM=1 + +CFLAGS=-Wall -Wno-unused-function -g -ansi -fPIC -pthread -I .. +#CFLAGS ?= -O3 +#CFLAGS += -Wall -Wno-unused-function -ansi -fPIC -pthread -I .. + +TEST_SRC = \ +input_clamping_test.c + +CC=gcc + +all: genwave runtests + +genwave: ../wave.c ../wave.h genwave.c genwave.h genwave_main.c + $(CC) $(CFLAGS) -o genwave genwave.c genwave_main.c ../wave.c -lm + +runtests: runtests.c genwave.c ../sonic.c ../sonic.h tests.h $(TEST_SRC) + $(CC) $(CFLAGS) -o runtests runtests.c genwave.c ../sonic.c $(TEST_SRC) -lm + +clean: + rm -f *.o genwave runtests diff --git a/sonic/tests/genwave.c b/sonic/tests/genwave.c new file mode 100644 index 0000000000000000000000000000000000000000..035b6e063df2a26b3291b045eff9643be5135cdf --- /dev/null +++ b/sonic/tests/genwave.c @@ -0,0 +1,35 @@ +#include +#include + +/* Unfortunate Google compatibility cruft. */ +#ifdef GOOGLE_BUILD +#include "third_party/sonic/wave.h" +#else +#include "wave.h" +#endif + +#ifndef M_PI +#define M_PI 3.1415926535897932384 +#endif + +/* Write a sine wave to an output buffer. Return the number of samples written. + */ +int genSineWave(short* output, int outputLen, int sampleRate, int period, + int amplitude, int numPeriods) { + int i, j; + short value; + double x; + int numSamples = 0; + + for (i = 0; i < numPeriods; i++) { + for (j = 0; j < period; j++) { + if (numSamples == outputLen) { + return numSamples; + } + x = (double)j * (2.0 * M_PI) / period; + value = (short)(amplitude * sin(x)); + output[numSamples++] = value; + } + } + return numSamples; +} diff --git a/sonic/tests/genwave.h b/sonic/tests/genwave.h new file mode 100644 index 0000000000000000000000000000000000000000..7109b180124b635bac81fd7b3906b23826cb17b5 --- /dev/null +++ b/sonic/tests/genwave.h @@ -0,0 +1,4 @@ +/* Write a sine wave to an output buffer. Return the number of samples written. + */ +int genSineWave(short* output, int outputLen, int sampleRate, int period, + int amplitude, int numPeriods); diff --git a/sonic/tests/genwave_main.c b/sonic/tests/genwave_main.c new file mode 100644 index 0000000000000000000000000000000000000000..2fb5b847590892e54b3bdb6f9f0b1d813d54e118 --- /dev/null +++ b/sonic/tests/genwave_main.c @@ -0,0 +1,35 @@ +#include +#include + +#include "wave.h" +#include "genwave.h" + +#include +#include + +#ifndef M_PI +# define M_PI 3.1415926535897932384 +#endif + +/* Write samples to the outFile. */ +static void writeSamplesToFile(char* fileName, int sampleRate, short* samples, int numSamples) { + waveFile outFile = openOutputWaveFile(fileName, sampleRate, 1); + + assert(writeToWaveFile(outFile, samples, numSamples)); + closeWaveFile(outFile); +} + +int main(int argc, char** argv) { + int sampleRate = 96000; + int freq = 200; + int period = sampleRate / freq; + int amplitude = 6000; + int numPeriods = 500; + int numSamples = period * numPeriods; + short* samples = malloc(numSamples * sizeof(short)); + + numSamples = genSineWave(samples, numSamples, sampleRate, period, amplitude, numPeriods); + writeSamplesToFile("out.wav", sampleRate, samples, numSamples); + free(samples); + return 0; +} diff --git a/sonic/tests/input_clamping_test.c b/sonic/tests/input_clamping_test.c new file mode 100644 index 0000000000000000000000000000000000000000..f9fb1b7cbc51c10f37e4415a7d509bb249944dcd --- /dev/null +++ b/sonic/tests/input_clamping_test.c @@ -0,0 +1,145 @@ +/* Sonic library + Copyright 2025 + Bill Cox + This file is part of the Sonic Library. + + This file is licensed under the Apache 2.0 license. +*/ + +/* Unfortunate Google compatibility cruft. */ +#ifdef GOOGLE_BUILD +#include "third_party/sonic/sonic.h" +#else +#include "sonic.h" +#endif + +#include "genwave.h" +#include "tests.h" + +#include + +/* Just verify that we clamp to expected input values. */ +int sonicTestInputClamping(void) { + sonicStream stream = sonicCreateStream(44100, 1); + sonicSetVolume(stream, SONIC_MIN_VOLUME * 0.9f); + if (sonicGetVolume(stream) != SONIC_MIN_VOLUME) { + return 0; + } + sonicSetVolume(stream, SONIC_MAX_VOLUME * 1.1f); + if (sonicGetVolume(stream) != SONIC_MAX_VOLUME) { + return 0; + } + sonicSetSpeed(stream, SONIC_MIN_SPEED * 0.9f); + if (sonicGetSpeed(stream) != SONIC_MIN_SPEED) { + return 0; + } + sonicSetSpeed(stream, SONIC_MAX_SPEED * 1.1f); + if (sonicGetSpeed(stream) != SONIC_MAX_SPEED) { + return 0; + } + sonicSetPitch(stream, SONIC_MIN_PITCH_SETTING * 0.9f); + if (sonicGetPitch(stream) != SONIC_MIN_PITCH_SETTING) { + return 0; + } + sonicSetPitch(stream, SONIC_MAX_PITCH_SETTING * 1.1f); + if (sonicGetPitch(stream) != SONIC_MAX_PITCH_SETTING) { + return 0; + } + sonicSetSpeed(stream, SONIC_MAX_SPEED * 1.1f); + if (sonicGetSpeed(stream) != SONIC_MAX_SPEED) { + return 0; + } + sonicSetRate(stream, SONIC_MIN_RATE * 0.9f); + if (sonicGetRate(stream) != SONIC_MIN_RATE) { + return 0; + } + sonicSetRate(stream, SONIC_MAX_RATE * 1.1f); + if (sonicGetRate(stream) != SONIC_MAX_RATE) { + return 0; + } + sonicSetSpeed(stream, SONIC_MAX_SPEED * 1.1f); + if (sonicGetSpeed(stream) != SONIC_MAX_SPEED) { + return 0; + } + sonicSetSampleRate(stream, (int)(SONIC_MIN_SAMPLE_RATE * 0.9f)); + if (sonicGetSampleRate(stream) != SONIC_MIN_SAMPLE_RATE) { + return 0; + } + sonicSetSampleRate(stream, (int)(SONIC_MAX_SAMPLE_RATE * 1.1f)); + if (sonicGetSampleRate(stream) != SONIC_MAX_SAMPLE_RATE) { + return 0; + } + sonicSetNumChannels(stream, 0); + if (sonicGetNumChannels(stream) != SONIC_MIN_CHANNELS) { + return 0; + } + sonicSetNumChannels(stream, SONIC_MAX_CHANNELS * 2); + if (sonicGetNumChannels(stream) != SONIC_MAX_CHANNELS) { + return 0; + } + sonicDestroyStream(stream); + return 1; +} + +/* Used as the read buffer length when processing audio. */ +#define READ_BUF_LEN 1000 + +/* Process a few pitch periods of a sine wave. */ +static void processSomeSamples(sonicStream stream, const short* samples, int numSamples) { + short readBuf[READ_BUF_LEN]; + int samplesRead; + + /* Write all at once. */ + assert(sonicWriteShortToStream(stream, samples, numSamples)); + while ((samplesRead = sonicReadShortFromStream(stream, readBuf, READ_BUF_LEN)) != 0); +} + +/* Constants defining a sine wave for tests. */ +#define SAMPLE_RATE 44100 +#define FREQ 200 +#define PERIOD (SAMPLE_RATE / FREQ) +#define AMPLITUDE 6000 +#define NUM_PERIODS 500 +#define NUM_SAMPLES (NUM_PERIODS * PERIOD) + +/* Test that the min and max values do not crash. This does not test all + combinations of min/max values. */ +int sonicTestInputsDontCrash(void) { + short samples[NUM_SAMPLES]; + int numSamples = genSineWave(samples, NUM_SAMPLES, SAMPLE_RATE, PERIOD, + AMPLITUDE, NUM_PERIODS); + sonicStream stream; + + assert(numSamples == NUM_SAMPLES); + stream = sonicCreateStream(SAMPLE_RATE, 1); + sonicSetVolume(stream, SONIC_MIN_VOLUME); + processSomeSamples(stream, samples, numSamples); + sonicSetVolume(stream, SONIC_MAX_VOLUME); + processSomeSamples(stream, samples, numSamples); + sonicSetSpeed(stream, SONIC_MIN_SPEED); + processSomeSamples(stream, samples, numSamples); + sonicSetSpeed(stream, SONIC_MAX_SPEED); + processSomeSamples(stream, samples, numSamples); + sonicSetPitch(stream, SONIC_MIN_PITCH_SETTING); + processSomeSamples(stream, samples, numSamples); + sonicSetPitch(stream, SONIC_MAX_PITCH_SETTING); + processSomeSamples(stream, samples, numSamples); + sonicSetSpeed(stream, SONIC_MAX_SPEED); + processSomeSamples(stream, samples, numSamples); + sonicSetRate(stream, SONIC_MIN_RATE); + processSomeSamples(stream, samples, numSamples); + sonicSetRate(stream, SONIC_MAX_RATE); + processSomeSamples(stream, samples, numSamples); + sonicSetSpeed(stream, SONIC_MAX_SPEED); + processSomeSamples(stream, samples, numSamples); + sonicSetSampleRate(stream, SONIC_MIN_SAMPLE_RATE); + processSomeSamples(stream, samples, numSamples); + sonicSetSampleRate(stream, SONIC_MAX_SAMPLE_RATE); + processSomeSamples(stream, samples, numSamples); + sonicSetNumChannels(stream, SONIC_MIN_CHANNELS); + processSomeSamples(stream, samples, numSamples / SONIC_MIN_CHANNELS); + sonicSetNumChannels(stream, SONIC_MAX_CHANNELS); + processSomeSamples(stream, samples, numSamples / SONIC_MAX_CHANNELS); + sonicDestroyStream(stream); + return 1; +} diff --git a/sonic/tests/runtests.c b/sonic/tests/runtests.c new file mode 100644 index 0000000000000000000000000000000000000000..a7ef2e95ace17eaeae784cdcc40f65d251ff503a --- /dev/null +++ b/sonic/tests/runtests.c @@ -0,0 +1,21 @@ +/* Sonic library + Copyright 2020 + Bill Cox + This file is part of the Sonic Library. + + This file is licensed under the Apache 2.0 license. +*/ + +/* Run unit tests. */ + +#include "tests.h" + +#include +#include + +int main(int argc, char** argv) { + assert(sonicTestInputClamping()); + assert(sonicTestInputsDontCrash()); + printf("All tests passed.\n"); + return 0; +} diff --git a/sonic/tests/sonic_unit_test.cc b/sonic/tests/sonic_unit_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..70e9c7855410679e721b948d7439fb18fed6457e --- /dev/null +++ b/sonic/tests/sonic_unit_test.cc @@ -0,0 +1,11 @@ +#include "testing/base/public/gunit.h" +#include "third_party/sonic/tests/tests.h" + +namespace { + +TEST(SonicUnitTests, ClampTest) { + EXPECT_TRUE(sonicTestInputClamping()); + EXPECT_TRUE(sonicTestInputsDontCrash()); +} + +} // namespace diff --git a/sonic/tests/tests.h b/sonic/tests/tests.h new file mode 100644 index 0000000000000000000000000000000000000000..9f08d06c49bf17d3ede807ebe7815fb8aba14bf8 --- /dev/null +++ b/sonic/tests/tests.h @@ -0,0 +1,18 @@ +/* Sonic library + Copyright 2025 + Bill Cox + This file is part of the Sonic Library. + + This file is licensed under the Apache 2.0 license. +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +int sonicTestInputClamping(void); +int sonicTestInputsDontCrash(void); + +#ifdef __cplusplus +} +#endif diff --git a/sonic/wave.c b/sonic/wave.c index 05bbf1f12db5d5f4e6c56599dda874157e10a097..94827795ce8c5e737dc3e7ce2aea2cf03badf03e 100644 --- a/sonic/wave.c +++ b/sonic/wave.c @@ -9,365 +9,338 @@ /* This file supports read/write wave files. */ +#include "wave.h" #include #include #include -#include "wave.h" #define WAVE_BUF_LEN 4096 struct waveFileStruct { - int numChannels; - int sampleRate; - FILE *soundFile; - int bytesWritten; /* The number of bytes written so far, including header */ - int failed; - int isInput; + int numChannels; + int sampleRate; + FILE* soundFile; + int bytesWritten; /* The number of bytes written so far, including header */ + int failed; + int isInput; }; /* Write a string to a file. */ -static void writeBytes( - waveFile file, - void *bytes, - int length) -{ - size_t bytesWritten; - - if(file->failed) { - return; - } - bytesWritten = fwrite(bytes, sizeof(char), length, file->soundFile); - if(bytesWritten != length) { - fprintf(stderr, "Unable to write to output file"); - file->failed = 1; - } - file->bytesWritten += bytesWritten; +static void writeBytes(waveFile file, void* bytes, int length) { + size_t bytesWritten; + + if (file->failed) { + return; + } + bytesWritten = fwrite(bytes, sizeof(char), length, file->soundFile); + if (bytesWritten != length) { + fprintf(stderr, "Unable to write to output file"); + file->failed = 1; + } + file->bytesWritten += bytesWritten; } /* Write a string to a file. */ -static void writeString( - waveFile file, - char *string) -{ - writeBytes(file, string, strlen(string)); +static void writeString(waveFile file, char* string) { + writeBytes(file, string, strlen(string)); } /* Write an integer to a file in little endian order. */ -static void writeInt( - waveFile file, - int value) -{ - char bytes[4]; - int i; - - for(i = 0; i < 4; i++) { - bytes[i] = value; - value >>= 8; - } - writeBytes(file, bytes, 4); +static void writeInt(waveFile file, int value) { + char bytes[4]; + int i; + + for (i = 0; i < 4; i++) { + bytes[i] = value; + value >>= 8; + } + writeBytes(file, bytes, 4); } /* Write a short integer to a file in little endian order. */ -static void writeShort( - waveFile file, - short value) -{ - char bytes[2]; - int i; - - for(i = 0; i < 2; i++) { - bytes[i] = value; - value >>= 8; - } - writeBytes(file, bytes, 2); +static void writeShort(waveFile file, short value) { + char bytes[2]; + int i; + + for (i = 0; i < 2; i++) { + bytes[i] = value; + value >>= 8; + } + writeBytes(file, bytes, 2); } /* Read bytes from the input file. Return the number of bytes actually read. */ -static int readBytes( - waveFile file, - void *bytes, - int length) -{ - if(file->failed) { - return 0; - } - return fread(bytes, sizeof(char), length, file->soundFile); +static int readBytes(waveFile file, void* bytes, int length) { + if (file->failed) { + return 0; + } + return fread(bytes, sizeof(char), length, file->soundFile); } /* Read an exact number of bytes from the input file. */ -static void readExactBytes( - waveFile file, - void *bytes, - int length) -{ - int numRead; - - if(file->failed) { - return; - } - numRead = fread(bytes, sizeof(char), length, file->soundFile); - if(numRead != length) { - fprintf(stderr, "Failed to read requested bytes from input file\n"); - file->failed = 1; - } +static void readExactBytes(waveFile file, void* bytes, int length) { + int numRead; + + if (file->failed) { + return; + } + numRead = fread(bytes, sizeof(char), length, file->soundFile); + if (numRead != length) { + fprintf(stderr, "Failed to read requested bytes from input file\n"); + file->failed = 1; + } } /* Read an integer from the input file */ -static int readInt( - waveFile file) -{ - unsigned char bytes[4]; - int value = 0, i; - - readExactBytes(file, bytes, 4); - for(i = 3; i >= 0; i--) { - value <<= 8; - value |= bytes[i]; - } - return value; +static int readInt(waveFile file) { + unsigned char bytes[4]; + int value = 0, i; + + readExactBytes(file, bytes, 4); + for (i = 3; i >= 0; i--) { + value <<= 8; + value |= bytes[i]; + } + return value; } /* Read a short from the input file */ -static int readShort( - waveFile file) -{ - unsigned char bytes[2]; - int value = 0, i; - - readExactBytes(file, bytes, 2); - for(i = 1; i >= 0; i--) { - value <<= 8; - value |= bytes[i]; - } - return value; +static int readShort(waveFile file) { + unsigned char bytes[2]; + int value = 0, i; + + readExactBytes(file, bytes, 2); + for (i = 1; i >= 0; i--) { + value <<= 8; + value |= bytes[i]; + } + return value; } /* Read a string from the input and compare it to an expected string. */ -static void expectString( - waveFile file, - char *expectedString) -{ - char buf[11]; /* Be sure that we never call with a longer string */ - int length = strlen(expectedString); - - if(length > 10) { - fprintf(stderr, "Internal error: expected string too long\n"); - file->failed = 1; - } else { - readExactBytes(file, buf, length); - buf[length] = '\0'; - if(strcmp(expectedString, buf)) { - fprintf(stderr, "Unsupported wave file format\n"); - file->failed = 1; - } +static void expectString(waveFile file, char* expectedString) { + char buf[11]; /* Be sure that we never call with a longer string */ + int length = strlen(expectedString); + + if (length > 10) { + fprintf(stderr, "Internal error: expected string too long\n"); + file->failed = 1; + } else { + readExactBytes(file, buf, length); + buf[length] = '\0'; + if (strcmp(expectedString, buf)) { + fprintf(stderr, "Unsupported wave file format: Expected '%s', got '%s;\n", + expectedString, buf); + file->failed = 1; } + } } /* Write the header of the wave file. */ -static void writeHeader( - waveFile file, - int sampleRate) -{ - /* write the wav file per the wav file format */ - writeString(file, "RIFF"); /* 00 - RIFF */ - /* We have to fseek and overwrite this later when we close the file because */ - /* we don't know how big it is until then. */ - writeInt(file, 36 /* + dataLength */); /* 04 - how big is the rest of this file? */ - writeString(file, "WAVE"); /* 08 - WAVE */ - writeString(file, "fmt "); /* 12 - fmt */ - writeInt(file, 16); /* 16 - size of this chunk */ - writeShort(file, 1); /* 20 - what is the audio format? 1 for PCM = Pulse Code Modulation */ - writeShort(file, 1); /* 22 - mono or stereo? 1 or 2? (or 5 or ???) */ - writeInt(file, sampleRate); /* 24 - samples per second (numbers per second) */ - writeInt(file, sampleRate * 2); /* 28 - bytes per second */ - writeShort(file, 2); /* 32 - # of bytes in one sample, for all channels */ - writeShort(file, 16); /* 34 - how many bits in a sample(number)? usually 16 or 24 */ - writeString(file, "data"); /* 36 - data */ - writeInt(file, 0); /* 40 - how big is this data chunk */ +static void writeHeader(waveFile file, int sampleRate, int numChannels) { + /* write the wav file per the wav file format */ + writeString(file, "RIFF"); /* 00 - RIFF */ + /* We have to fseek and overwrite this later when we close the file because */ + /* we don't know how big it is until then. */ + writeInt(file, + 36 /* + dataLength */); /* 04 - how big is the rest of this file? */ + writeString(file, "WAVE"); /* 08 - WAVE */ + writeString(file, "fmt "); /* 12 - fmt */ + writeInt(file, 16); /* 16 - size of this chunk */ + writeShort( + file, + 1); /* 20 - what is the audio format? 1 for PCM = Pulse Code Modulation */ + writeShort(file, + numChannels); /* 22 - mono or stereo? 1 or 2? (or 5 or ???) */ + writeInt(file, sampleRate); /* 24 - samples per second (numbers per second) */ + writeInt(file, sampleRate * 2); /* 28 - bytes per second */ + writeShort(file, 2); /* 32 - # of bytes in one sample, for all channels */ + writeShort( + file, 16); /* 34 - how many bits in a sample(number)? usually 16 or 24 */ + writeString(file, "data"); /* 36 - data */ + writeInt(file, 0); /* 40 - how big is this data chunk */ } /* Read the header of the wave file. */ -static int readHeader( - waveFile file) -{ - int data; - - expectString(file, "RIFF"); - data = readInt(file); /* 04 - how big is the rest of this file? */ - expectString(file, "WAVE"); /* 08 - WAVE */ - expectString(file, "fmt "); /* 12 - fmt */ - int chunkSize = readInt(file); /* 16 or 18 - size of this chunk */ - if(chunkSize != 16 && chunkSize != 18) { - fprintf(stderr, "Only basic wave files are supported\n"); - return 0; - } - data = readShort(file); /* 20 - what is the audio format? 1 for PCM = Pulse Code Modulation */ - if(data != 1) { - fprintf(stderr, "Only PCM wave files are supported\n"); - return 0; +static int readHeader(waveFile file) { + int data; + + expectString(file, "RIFF"); + data = readInt(file); /* 04 - how big is the rest of this file? */ + expectString(file, "WAVE"); /* 08 - WAVE */ + expectString(file, "fmt "); /* 12 - fmt */ + int chunkSize = readInt(file); /* 16 or 18 - size of this chunk */ + if (chunkSize != 16 && chunkSize != 18) { + fprintf(stderr, "Only basic wave files are supported\n"); + return 0; + } + data = readShort(file); /* 20 - what is the audio format? 1 for PCM = Pulse + Code Modulation */ + if (data != 1) { + fprintf(stderr, "Only PCM wave files are supported (not %d)\n", data); + return 0; + } + file->numChannels = + readShort(file); /* 22 - mono or stereo? 1 or 2? (or 5 or ???) */ + file->sampleRate = + readInt(file); /* 24 - samples per second (numbers per second) */ + readInt(file); /* 28 - bytes per second */ + readShort(file); /* 32 - # of bytes in one sample, for all channels */ + data = readShort( + file); /* 34 - how many bits in a sample(number)? usually 16 or 24 */ + if (data != 16) { + fprintf(stderr, "Only 16 bit PCM wave files are supported\n"); + return 0; + } + if (chunkSize == 18) { /* ffmpeg writes 18, and so has 2 extra bytes here */ + data = readShort(file); + } + + /* Read and discard chunks until we find the "data" chunk or fail */ + char chunk[5]; + chunk[4] = 0; + + while (1) { + readExactBytes(file, chunk, 4); /* chunk id */ + int size = readInt(file); /* how big is this data chunk */ + if (strcmp(chunk, "data") == 0) { + return 1; } - file->numChannels = readShort(file); /* 22 - mono or stereo? 1 or 2? (or 5 or ???) */ - file->sampleRate = readInt(file); /* 24 - samples per second (numbers per second) */ - readInt(file); /* 28 - bytes per second */ - readShort(file); /* 32 - # of bytes in one sample, for all channels */ - data = readShort(file); /* 34 - how many bits in a sample(number)? usually 16 or 24 */ - if(data != 16) { - fprintf(stderr, "Only 16 bit PCM wave files are supported\n"); - return 0; + if (fseek(file->soundFile, size, SEEK_CUR) != 0) { + fprintf(stderr, "Failed to seek on input file.\n"); + return 0; } - if (chunkSize == 18) { /* ffmpeg writes 18, and so has 2 extra bytes here */ - data = readShort(file); - } - expectString(file, "data"); /* 36 - data */ - readInt(file); /* 40 - how big is this data chunk */ - return 1; + } } /* Close the input or output file and free the waveFile. */ -static void closeFile( - waveFile file) -{ - FILE *soundFile = file->soundFile; - - if(soundFile != NULL) { - fclose(soundFile); - file->soundFile = NULL; - } - free(file); +static void closeFile(waveFile file) { + FILE* soundFile = file->soundFile; + + if (soundFile != NULL) { + fclose(soundFile); + file->soundFile = NULL; + } + free(file); } -/* Open a 16-bit little-endian wav file for reading. It may be mono or stereo. */ -waveFile openInputWaveFile( - char *fileName, - int *sampleRate, - int *numChannels) -{ - waveFile file; - FILE *soundFile = fopen(fileName, "rb"); - - if(soundFile == NULL) { - fprintf(stderr, "Unable to open wave file %s for reading\n", fileName); - return NULL; - } - file = (waveFile)calloc(1, sizeof(struct waveFileStruct)); - file->soundFile = soundFile; - file->isInput = 1; - if(!readHeader(file)) { - closeFile(file); - return NULL; - } - *sampleRate = file->sampleRate; - *numChannels = file->numChannels; - return file; +/* Open a 16-bit little-endian wav file for reading. It may be mono or stereo. + */ +waveFile openInputWaveFile(const char* fileName, int* sampleRate, int* numChannels) { + waveFile file; + FILE* soundFile = fopen(fileName, "rb"); + + if (soundFile == NULL) { + fprintf(stderr, "Unable to open wave file %s for reading\n", fileName); + return NULL; + } + file = (waveFile)calloc(1, sizeof(struct waveFileStruct)); + file->soundFile = soundFile; + file->isInput = 1; + if (!readHeader(file)) { + closeFile(file); + return NULL; + } + *sampleRate = file->sampleRate; + *numChannels = file->numChannels; + return file; } -/* Open a 16-bit little-endian wav file for writing. It may be mono or stereo. */ -waveFile openOutputWaveFile( - char *fileName, - int sampleRate, - int numChannels) -{ - waveFile file; - FILE *soundFile = fopen(fileName, "wb"); - - if(soundFile == NULL) { - fprintf(stderr, "Unable to open wave file %s for writing\n", fileName); - return NULL; - } - file = (waveFile)calloc(1, sizeof(struct waveFileStruct)); - file->soundFile = soundFile; - file->sampleRate = sampleRate; - file->numChannels = numChannels; - writeHeader(file, sampleRate); - if(file->failed) { - closeFile(file); - return NULL; - } - return file; +/* Open a 16-bit little-endian wav file for writing. It may be mono or stereo. + */ +waveFile openOutputWaveFile(const char* fileName, int sampleRate, int numChannels) { + waveFile file; + FILE* soundFile = fopen(fileName, "wb"); + + if (soundFile == NULL) { + fprintf(stderr, "Unable to open wave file %s for writing\n", fileName); + return NULL; + } + file = (waveFile)calloc(1, sizeof(struct waveFileStruct)); + file->soundFile = soundFile; + file->sampleRate = sampleRate; + file->numChannels = numChannels; + writeHeader(file, sampleRate, numChannels); + if (file->failed) { + closeFile(file); + return NULL; + } + return file; } /* Close the sound file. */ -int closeWaveFile( - waveFile file) -{ - FILE *soundFile = file->soundFile; - int passed = 1; - - if(!file->isInput) { - if(fseek(soundFile, 4, SEEK_SET) != 0) { - fprintf(stderr, "Failed to seek on input file.\n"); - passed = 0; - } else { - /* Now update the file to have the correct size. */ - writeInt(file, file->bytesWritten - 8); - if(file->failed) { - fprintf(stderr, "Failed to write wave file size.\n"); - passed = 0; - } - if(fseek(soundFile, 40, SEEK_SET) != 0) { - fprintf(stderr, "Failed to seek on input file.\n"); - passed = 0; - } else { - /* Now update the file to have the correct size. */ - writeInt(file, file->bytesWritten - 48); - if(file->failed) { - fprintf(stderr, "Failed to write wave file size.\n"); - passed = 0; - } - } +int closeWaveFile(waveFile file) { + FILE* soundFile = file->soundFile; + int passed = 1; + + if (!file->isInput) { + if (fseek(soundFile, 4, SEEK_SET) != 0) { + fprintf(stderr, "Failed to seek on input file.\n"); + passed = 0; + } else { + /* Now update the file to have the correct size. */ + writeInt(file, file->bytesWritten - 8); + if (file->failed) { + fprintf(stderr, "Failed to write wave file size.\n"); + passed = 0; + } + if (fseek(soundFile, 40, SEEK_SET) != 0) { + fprintf(stderr, "Failed to seek on input file.\n"); + passed = 0; + } else { + /* Now update the file to have the correct size. */ + writeInt(file, file->bytesWritten - 48); + if (file->failed) { + fprintf(stderr, "Failed to write wave file size.\n"); + passed = 0; } + } } - closeFile(file); - return passed; + } + closeFile(file); + return passed; } -/* Read from the wave file. Return the number of samples read. */ -int readFromWaveFile( - waveFile file, - short *buffer, - int maxSamples) -{ - int i, bytesRead, samplesRead; - int bytePos = 0; - unsigned char bytes[WAVE_BUF_LEN]; - short sample; - - if(maxSamples*file->numChannels*2 > WAVE_BUF_LEN) { - maxSamples = WAVE_BUF_LEN/(file->numChannels*2); - } - bytesRead = readBytes(file, bytes, maxSamples*file->numChannels*2); - samplesRead = bytesRead/(file->numChannels*2); - for(i = 0; i < samplesRead*file->numChannels; i++) { - sample = bytes[bytePos++]; - sample |= (unsigned int)bytes[bytePos++] << 8; - *buffer++ = sample; - } - return samplesRead; +/* Read from the wave file. Return the number of samples read. + numSamples and maxSamples are the number of **multi-channel** samples */ +int readFromWaveFile(waveFile file, short* buffer, int maxSamples) { + int i, bytesRead, samplesRead; + int bytePos = 0; + unsigned char bytes[WAVE_BUF_LEN]; + short sample; + + if (maxSamples * file->numChannels * 2 > WAVE_BUF_LEN) { + maxSamples = WAVE_BUF_LEN / (file->numChannels * 2); + } + bytesRead = readBytes(file, bytes, maxSamples * file->numChannels * 2); + samplesRead = bytesRead / (file->numChannels * 2); + for (i = 0; i < samplesRead * file->numChannels; i++) { + sample = bytes[bytePos++]; + sample |= (unsigned int)bytes[bytePos++] << 8; + *buffer++ = sample; + } + return samplesRead; } /* Write to the wave file. */ -int writeToWaveFile( - waveFile file, - short *buffer, - int numSamples) -{ - int i; - int bytePos = 0; - unsigned char bytes[WAVE_BUF_LEN]; - short sample; - int total = numSamples*file->numChannels; - - for(i = 0; i < total; i++) { - if(bytePos == WAVE_BUF_LEN) { - writeBytes(file, bytes, bytePos); - bytePos = 0; - } - sample = buffer[i]; - bytes[bytePos++] = sample; - bytes[bytePos++] = sample >> 8; - } - if(bytePos != 0) { - writeBytes(file, bytes, bytePos); +int writeToWaveFile(waveFile file, short* buffer, int numSamples) { + int i; + int bytePos = 0; + unsigned char bytes[WAVE_BUF_LEN]; + short sample; + int total = numSamples * file->numChannels; + + for (i = 0; i < total; i++) { + if (bytePos == WAVE_BUF_LEN) { + writeBytes(file, bytes, bytePos); + bytePos = 0; } - return file->failed; + sample = buffer[i]; + bytes[bytePos++] = sample; + bytes[bytePos++] = sample >> 8; + } + if (bytePos != 0) { + writeBytes(file, bytes, bytePos); + } + return !file->failed; } diff --git a/sonic/wave.h b/sonic/wave.h index aad45c538ca42d3a48a666d847e92efb7b34e6f7..d2de54785857187478dff101ab89816daf8ddf42 100644 --- a/sonic/wave.h +++ b/sonic/wave.h @@ -7,10 +7,10 @@ /* Support for reading and writing wave files. */ -typedef struct waveFileStruct *waveFile; +typedef struct waveFileStruct* waveFile; -waveFile openInputWaveFile(char *fileName, int *sampleRate, int *numChannels); -waveFile openOutputWaveFile(char *fileName, int sampleRate, int numChannels); +waveFile openInputWaveFile(const char* fileName, int* sampleRate, int* numChannels); +waveFile openOutputWaveFile(const char* fileName, int sampleRate, int numChannels); int closeWaveFile(waveFile file); -int readFromWaveFile(waveFile file, short *buffer, int maxSamples); -int writeToWaveFile(waveFile file, short *buffer, int numSamples); +int readFromWaveFile(waveFile file, short* buffer, int maxSamples); +int writeToWaveFile(waveFile file, short* buffer, int numSamples);