diff --git a/components/component_conf.mk b/components/component_conf.mk index 1af9e46..2eb953e 100644 --- a/components/component_conf.mk +++ b/components/component_conf.mk @@ -4,4 +4,7 @@ EXTRA_COMPONENT_DIRS += $(SOLUTION_PATH)/components/esp-face/image_util EXTRA_COMPONENT_DIRS += $(SOLUTION_PATH)/components/esp-face/face_detection EXTRA_COMPONENT_DIRS += $(SOLUTION_PATH)/components/esp-face/face_recognition EXTRA_COMPONENT_DIRS += $(SOLUTION_PATH)/components/esp-face/object_detection +EXTRA_COMPONENT_DIRS += $(SOLUTION_PATH)/components/esp-sr/lib +EXTRA_COMPONENT_DIRS += $(SOLUTION_PATH)/components/esp-sr/wake_word_engine +EXTRA_COMPONENT_DIRS += $(SOLUTION_PATH)/components/esp-sr/acoustic_algorithm diff --git a/components/esp-sr/CMakeLists.txt b/components/esp-sr/CMakeLists.txt new file mode 100644 index 0000000..6317ec3 --- /dev/null +++ b/components/esp-sr/CMakeLists.txt @@ -0,0 +1,20 @@ +set(COMPONENT_SRCS dummy.c) +set(COMPONENT_ADD_INCLUDEDIRS + lib/include + wake_word_engine/include + acoustic_algorithm/include + ) + + +register_component() + +target_link_libraries(${COMPONENT_TARGET} "-L ${CMAKE_CURRENT_SOURCE_DIR}/lib") +target_link_libraries(${COMPONENT_TARGET} "-L ${CMAKE_CURRENT_SOURCE_DIR}/wake_word_engine") +target_link_libraries(${COMPONENT_TARGET} "-L ${CMAKE_CURRENT_SOURCE_DIR}/acoustic_algorithm") +target_link_libraries(${COMPONENT_TARGET} + dl_lib_sr + c_speech_features + wakenet + hilexin_wn5 + esp_audio_processor + ) \ No newline at end of file diff --git a/components/esp-sr/Makefile b/components/esp-sr/Makefile new file mode 100644 index 0000000..65dd745 --- /dev/null +++ b/components/esp-sr/Makefile @@ -0,0 +1,10 @@ +PROJECT_NAME := esp_sr + +MODULE_PATH := $(abspath $(shell pwd)) + +EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/lib +EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/wake_word_engine +EXTRA_COMPONENT_DIRS += $(MODULE_PATH)/acoustic_algorithm + +include $(IDF_PATH)/make/project.mk + diff --git a/components/esp-sr/acoustic_algorithm/component.mk b/components/esp-sr/acoustic_algorithm/component.mk new file mode 100644 index 0000000..ef00b03 --- /dev/null +++ b/components/esp-sr/acoustic_algorithm/component.mk @@ -0,0 +1,11 @@ +COMPONENT_ADD_INCLUDEDIRS := include + +COMPONENT_SRCDIRS := . + +LIB_FILES := $(shell ls $(COMPONENT_PATH)/lib*.a) + +LIBS := $(patsubst lib%.a,-l%,$(notdir $(LIB_FILES))) + +COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/ $(LIBS) + +ALL_LIB_FILES += $(LIB_FILES) diff --git a/components/esp-sr/acoustic_algorithm/include/esp_aec.h b/components/esp-sr/acoustic_algorithm/include/esp_aec.h new file mode 100644 index 0000000..891f780 --- /dev/null +++ b/components/esp-sr/acoustic_algorithm/include/esp_aec.h @@ -0,0 +1,76 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License +#ifndef _ESP_AEC_H_ +#define _ESP_AEC_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define USE_AEC_FFT // Not kiss_fft +#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz +#define AEC_FRAME_LENGTH_MS 16 // Only support 16ms +#define AEC_FILTER_LENGTH 1200 // Number of samples of echo to cancel + +typedef void* aec_handle_t; + +/** + * @brief Creates an instance to the AEC structure. + * + * @param sample_rate The Sampling frequency (Hz) can be 8000, 16000. + * + * @param frame_length The length of the audio processing can be 10ms, 20ms, 30ms, default: 30. + * + * @param filter_length Number of samples of echo to cancel. + * + * @return + * - NULL: Create failed + * - Others: The instance of AEC + */ +aec_handle_t aec_create(int sample_rate, int frame_length, int filter_length); + + +/** + * @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic. + * + * @param inst The instance of AEC. + * + * @param indata An array of 16-bit signed audio samples from mic. + * + * @param refdata An array of 16-bit signed audio samples sent to the speaker. + * + * @param outdata Returns near-end signal with echo removed. + * + * @return None + * + */ +void aec_process(aec_handle_t inst, int16_t *indata, int16_t *refdata, int16_t *outdata); + +/** + * @brief Free the AEC instance + * + * @param inst The instance of AEC. + * + * @return None + * + */ +void aec_destroy(aec_handle_t inst); + +#ifdef __cplusplus +extern "C" { +#endif + +#endif //_ESP_AEC_H_ diff --git a/components/esp-sr/acoustic_algorithm/include/esp_agc.h b/components/esp-sr/acoustic_algorithm/include/esp_agc.h new file mode 100644 index 0000000..3a34849 --- /dev/null +++ b/components/esp-sr/acoustic_algorithm/include/esp_agc.h @@ -0,0 +1,31 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License +#ifndef _ESP_AGC_H_ +#define _ESP_AGC_H_ + +////all positive value is valid, negective is error +typedef enum { + ESP_AGC_SUCCESS = 0, ////success + ESP_AGC_FAIL = -1, ////agc fail + ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz + ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size +} ESP_AGE_ERR; + + +void *esp_agc_open(int agc_mode, int sample_rate); +void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs); +int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate); +void esp_agc_clse(void *agc_handle); + +#endif // _ESP_AGC_H_ diff --git a/components/esp-sr/acoustic_algorithm/include/esp_ns.h b/components/esp-sr/acoustic_algorithm/include/esp_ns.h new file mode 100644 index 0000000..402e924 --- /dev/null +++ b/components/esp-sr/acoustic_algorithm/include/esp_ns.h @@ -0,0 +1,70 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License +#ifndef _ESP_NS_H_ +#define _ESP_NS_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define NS_FRAME_LENGTH_MS 30 //Supports 10ms, 20ms, 30ms + +/** +* The Sampling frequency (Hz) must be 16000Hz +*/ + +typedef void* ns_handle_t; + +/** + * @brief Creates an instance to the NS structure. + * + * @param frame_length_ms The length of the audio processing can be 10ms, 20ms, 30ms. + * + * @return + * - NULL: Create failed + * - Others: The instance of NS + */ +ns_handle_t ns_create(int frame_length_ms); + +/** + * @brief Feed samples of an audio stream to the NS and get the audio stream after Noise suppression. + * + * @param inst The instance of NS. + * + * @param indata An array of 16-bit signed audio samples. + * + * @param outdata An array of 16-bit signed audio samples after noise suppression. + * + * @return None + * + */ +void ns_process(ns_handle_t inst, int16_t *indata, int16_t *outdata); + +/** + * @brief Free the NS instance + * + * @param inst The instance of NS. + * + * @return None + * + */ +void ns_destroy(ns_handle_t inst); + +#ifdef __cplusplus +extern "C" { +#endif + +#endif //_ESP_NS_H_ diff --git a/components/esp-sr/acoustic_algorithm/include/esp_vad.h b/components/esp-sr/acoustic_algorithm/include/esp_vad.h new file mode 100644 index 0000000..c4126d2 --- /dev/null +++ b/components/esp-sr/acoustic_algorithm/include/esp_vad.h @@ -0,0 +1,104 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License +#ifndef _ESP_VAD_H_ +#define _ESP_VAD_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define SAMPLE_RATE_HZ 16000 //Supports 32000, 16000, 8000 +#define VAD_FRAME_LENGTH_MS 30 //Supports 10ms, 20ms, 30ms + +/** + * @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more + * restrictive in reporting speech. + */ +typedef enum { + VAD_MODE_0 = 0, + VAD_MODE_1, + VAD_MODE_2, + VAD_MODE_3, + VAD_MODE_4 +} vad_mode_t; + +typedef enum { + VAD_SILENCE = 0, + VAD_SPEECH +} vad_state_t; + +typedef void* vad_handle_t; + +/** + * @brief Creates an instance to the VAD structure. + * + * @param vad_mode Sets the VAD operating mode. + * + * @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000. + * + * @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30. + * + * @return + * - NULL: Create failed + * - Others: The instance of VAD + */ +vad_handle_t vad_create(vad_mode_t vad_mode, int sample_rate_hz, int one_frame_ms); + +/** + * @brief Feed samples of an audio stream to the VAD and check if there is someone speaking. + * + * @param inst The instance of VAD. + * + * @param data An array of 16-bit signed audio samples. + * + * @return + * - VAD_SILENCE if no voice + * - VAD_SPEECH if voice is detected + * + */ +vad_state_t vad_process(vad_handle_t inst, int16_t *data); + +/** + * @brief Free the VAD instance + * + * @param inst The instance of VAD. + * + * @return None + * + */ +void vad_destroy(vad_handle_t inst); + +/* +* Programming Guide: +* +* @code{c} +* vad_handle_t vad_inst = vad_create(VAD_MODE_3, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS); // Creates an instance to the VAD structure. +* +* while (1) { +* //Use buffer to receive the audio data from MIC. +* vad_state_t vad_state = vad_process(vad_inst, buffer); // Feed samples to the VAD process and get the result. +* } +* +* vad_destroy(vad_inst); // Free the VAD instance at the end of whole VAD process +* +* @endcode +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +#endif //_ESP_VAD_H_ diff --git a/components/esp-sr/acoustic_algorithm/libesp_audio_processor.a b/components/esp-sr/acoustic_algorithm/libesp_audio_processor.a new file mode 100644 index 0000000..7df0c39 Binary files /dev/null and b/components/esp-sr/acoustic_algorithm/libesp_audio_processor.a differ diff --git a/components/esp-sr/component.mk b/components/esp-sr/component.mk new file mode 100644 index 0000000..d20f8d1 --- /dev/null +++ b/components/esp-sr/component.mk @@ -0,0 +1,16 @@ + +COMPONENT_ADD_INCLUDEDIRS := lib/include \ + wake_word_engine/include \ + acoustic_algorithm/include \ + + +LIB_FILES := $(shell ls $(COMPONENT_PATH)/wake_word_engine/lib*.a) \ + $(shell ls $(COMPONENT_PATH)/lib/lib*.a) \ + $(shell ls $(COMPONENT_PATH)/acoustic_algorithm/lib*.a) \ + +LIBS := $(patsubst lib%.a,-l%,$(LIB_FILES)) + +COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/lib \ + -L$(COMPONENT_PATH)/wake_word_engine \ + -L$(COMPONENT_PATH)/acoustic_algorithm \ + $(LIBS) diff --git a/components/recorder_engine/a.c b/components/esp-sr/dummy.c similarity index 100% rename from components/recorder_engine/a.c rename to components/esp-sr/dummy.c diff --git a/components/esp-sr/lib/component.mk b/components/esp-sr/lib/component.mk new file mode 100644 index 0000000..ef00b03 --- /dev/null +++ b/components/esp-sr/lib/component.mk @@ -0,0 +1,11 @@ +COMPONENT_ADD_INCLUDEDIRS := include + +COMPONENT_SRCDIRS := . + +LIB_FILES := $(shell ls $(COMPONENT_PATH)/lib*.a) + +LIBS := $(patsubst lib%.a,-l%,$(notdir $(LIB_FILES))) + +COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/ $(LIBS) + +ALL_LIB_FILES += $(LIB_FILES) diff --git a/components/esp-sr/lib/include/dl_lib.h b/components/esp-sr/lib/include/dl_lib.h new file mode 100644 index 0000000..f18beea --- /dev/null +++ b/components/esp-sr/lib/include/dl_lib.h @@ -0,0 +1,328 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_H +#define DL_LIB_H + +#include "dl_lib_matrix.h" +#include "dl_lib_matrixq.h" + + +typedef int padding_state; +/** + * @brief Does a fast version of the exp() operation on a floating point number. + * + * As described in https://codingforspeed.com/using-faster-exponential-approximation/ + * Should be good til an input of 5 or so with a steps factor of 8. + * + * @param in Floating point input + * @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes. + * @return Exp()'ed output + */ +fptp_t fast_exp(double x, int steps); + + + +/** + * @brief Does a softmax operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out); + + +/** + * @brief Does a softmax operation on a quantized matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a sigmoid operation on a floating point number + * + * @param in Floating point input + * @return Sigmoid output + */ + +fptp_t dl_sigmoid_op(fptp_t in); + + +/** + * @brief Does a sigmoid operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out); + +/** + * @brief Does a tanh operation on a floating point number + * + * @param in Floating point input number + * @return Tanh value + */ +fptp_t dl_tanh_op(fptp_t v); + +/** + * @brief Does a tanh operation on a matrix. + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out); + + +/** + * @brief Does a relu (Rectifier Linear Unit) operation on a floating point number + * + * @param in Floating point input + * @param clip If value is higher than this, it will be clipped to this value + * @return Relu output + */ +fptp_t dl_relu_op(fptp_t in, fptp_t clip); + +/** + * @brief Does a ReLu operation on a matrix. + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out); + +/** + * @brief Fully connected layer operation + * + * @param in Input vector + * @param weight Weights of the neurons + * @param bias Biases for the neurons. Can be NULL if a bias of 0 is required. + * @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix. + */ +void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out); + +/** + * @brief Pre-calculate the sqrtvari variable for the batch_normalize function. + * The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence, + * this matrix only needs to be calculated once. This function does that. + * + * @param + * @return + */ +void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out); + +/** + * @brief Batch-normalize a matrix + * + * @param m The matrix to normalize + * @param offset Offset matrix + * @param scale Scale matrix + * @param mean Mean matrix + * @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar + * @return + */ +void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale, + const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari); + +/** + * @brief Do a basic LSTM layer pass. + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @return Output values of the neurons + */ +dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2d_t *weight, const dl_matrix2d_t *bias); + +/** + * @brief Do a basic LSTM layer pass, partial quantized version. + * This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias. + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons, need to be quantised + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @return Output values of the neurons + */ +dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias); + +/** + * @brief Do a fully-connected layer pass, fully-quantized version. + * + * @param in Input vector + * @param weight Weights of the neurons + * @param bias Bias values of the neurons. Can be NULL if no bias is needed. + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return Output values of the neurons + */ +void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift); + +/** + * @brief Do a basic LSTM layer pass, fully-quantized version + * + * @warning Returns state_h pointer, so do not free result. + + * @param in Input vector + * @param state_c Internal state of the LSTM network + * @param state_h Internal state (previous output values) of the LSTM network + * @param weights Weights for the neurons + * @param bias Bias for the neurons. Can be NULL if no bias is required + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return Output values of the neurons + */ +dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift); + +/** + * @brief Batch-normalize a matrix, fully-quantized version + * + * @param m The matrix to normalize + * @param offset Offset matrix + * @param scale Scale matrix + * @param mean Mean matrix + * @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar + * @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info + * @return + */ +void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale, + const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift); + +/** + * @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number + * This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @param clip If value is higher than this, it will be clipped to this value + * @return Relu output + */ +qtp_t dl_relu_q_op(qtp_t in, qtp_t clip); + +/** + * @brief Does a ReLu operation on a matrix, quantized version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out); + +/** + * @brief Does a sigmoid operation on a fixed-point number. + * This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @return Sigmoid output + */ +int dl_sigmoid_op_q(const int in); + +/** + * @brief Does a sigmoid operation on a matrix, quantized version + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a tanh operation on a matrix, quantized version + * + * @param in Input matrix + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out); + +/** + * @brief Does a tanh operation on a fixed-point number. + * This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal + * point. (Equivalent to a mantissa in a quantized matrix with exponent -15.) + * + * @param in Fixed-point input + * @return tanh output + */ +int dl_tanh_op_q(int v); + +/** + * @brief Filter out the number greater than clip in the matrix, quantized version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out); + +/** + * @brief Filter out the number greater than clip in the matrix, float version + * + * @param in Input matrix + * @param clip If values are higher than this, they will be clipped to this value + * @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input. + */ +void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out); +/** + * @brief Do a basic CNN layer pass. + * + * @Warning This just supports the single channel input image, and the output is single row matrix. + That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height + * + * @param in Input single channel image + * @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height + * @param bias Bias for the CNN layer. + * @param filter_height The height of convolution kernel + * @param filter_width The width of convolution kernel + * @param out_channels The number of output channels of convolution kernel + * @param stride_x The step length of the convolution window in x(width) direction + * @param stride_y The step length of the convolution window in y(height) direction + * @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME" + * @param out The result of CNN layer, out->h=1. + * @return The result of CNN layer. + */ +dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height, + const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out); + + +/** + * @brief Do a basic CNN layer pass, quantised wersion. + * + * @Warning This just supports the single channel input image, and the output is single row matrix. + That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height + * + * @param in Input single channel image + * @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height, + * @param bias Bias of the neurons. + * @param filter_height The height of convolution kernel + * @param filter_width The width of convolution kernel + * @param out_channels The number of output channels of convolution kernel + * @param stride_x The step length of the convolution window in x(width) direction + * @param stride_y The step length of the convolution window in y(height) direction + * @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME" + * @param out The result of CNN layer, out->h=1 + * @return The result of CNN layer + */ +dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height, + const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out); + +#endif + diff --git a/components/esp-sr/lib/include/dl_lib_coefgetter_if.h b/components/esp-sr/lib/include/dl_lib_coefgetter_if.h new file mode 100644 index 0000000..53210a5 --- /dev/null +++ b/components/esp-sr/lib/include/dl_lib_coefgetter_if.h @@ -0,0 +1,67 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_COEFGETTER_IF_H +#define DL_LIB_COEFGETTER_IF_H + +#include "dl_lib_matrix.h" +#include "dl_lib_matrixq.h" + +//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by +//dl_batch_normalize_get_sqrtvar first. +#define COEF_GETTER_HINT_BNVAR (1<<0) + +/* +This struct describes the basic information of model data: +word_num: the number of wake words or speech commands +word_list: the name list of wake words or speech commands +thres_list: the threshold list of wake words or speech commands +info_str: the string used to reflect the version and information of model data + which consist of the architecture of network, the version of model data, wake words and their threshold +*/ +typedef struct { + int word_num; + char **word_list; + int *win_list; + float *thresh_list; + char *info_str; +} model_info_t; + +/* +Alphabet struct describes the basic grapheme or phoneme. +item_num: the number of baisc item(grapheme or phonemr) +items: the list of basic item +*/ +typedef struct { + int item_num; + char **items; +}alphabet_t; + +/* +This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network. +For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the +coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument +to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument +is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the +memory for the returned matrices, when applicable. +*/ +typedef struct { + const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint); + const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint); + void (*free_f)(const dl_matrix2d_t *m); + void (*free_q)(const dl_matrix2dq_t *m); + const model_info_t* (*getter_info)(void *arg); + const alphabet_t* (*getter_alphabet)(void *arg); +} model_coeff_getter_t; + +#endif \ No newline at end of file diff --git a/components/esp-sr/lib/include/dl_lib_conv_queue.h b/components/esp-sr/lib/include/dl_lib_conv_queue.h new file mode 100644 index 0000000..e0ca0a1 --- /dev/null +++ b/components/esp-sr/lib/include/dl_lib_conv_queue.h @@ -0,0 +1,164 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONV_QUEUE_H +#define DL_LIB_CONV_QUEUE_H + + +#include "dl_lib_matrix.h" +typedef float fptp_t; + + +//Flags for matrices +#define DL_MF_FOREIGNDATA (1<<0) /*< Matrix *item data actually points to another matrix and should not be freed */ + +//Float convolution FIFO queue. +typedef struct { + int n; /*< the length of queue */ + int c; /*< the channel number of queue element*/ + int front; /*< the front(top) position of queue */ + int flag; /*< not used*/ + fptp_t *item; /*< Pointer to item array */ +} dl_conv_queue_t; + +/** + * @brief Allocate a convolution queue + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_conv_queue_t *dl_conv_queue_alloc(int n, int c); + +/** + * @brief Free a convolution queue + * + * @param cq The convolution queue to free + */ +void dl_conv_queue_free(dl_conv_queue_t *cq); + +void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input convolution queue + * @return Pointer of oldest element + */ +fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq); + +/** + * @brief Remove the oldest element, then insert the input element at the end of queue + * + * @param cq Input convolution queue + * @param item The new element + */ +void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item); + + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a sigmoid operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a tanh operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a tanh operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset); + +/** + * @brief Does a softmax operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a softmax operation + * by this pointer, then return the pointer + * + * @param cq Input convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset); + +fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset); +fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset); +dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b); +dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h, + const dl_matrix2d_t *weight, const dl_matrix2d_t *bias); +/** + * @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is first element of output queue and should not be freed separately. + * + * @param in Input convolution queue + * @param out Output convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @return The result of atrous convolution + */ +fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size, + dl_matrix2d_t* kernel, dl_matrix2d_t* bias); +fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size, + dl_matrix2d_t* kernel, dl_matrix2d_t* bias); + +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is first element of output queue and should not be freed separately. + * + * @param in Input convolution queue + * @param out Output convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @return The result of dilation layer + */ +fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size, + dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias, + dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias); + + +void test_atrous_conv(int size, int rate, int in_channel, int out_channel); + +#endif \ No newline at end of file diff --git a/components/esp-sr/lib/include/dl_lib_convq_queue.h b/components/esp-sr/lib/include/dl_lib_convq_queue.h new file mode 100644 index 0000000..0e92676 --- /dev/null +++ b/components/esp-sr/lib/include/dl_lib_convq_queue.h @@ -0,0 +1,174 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_CONVQ_QUEUE_H +#define DL_LIB_CONVQ_QUEUE_H + + +#include "dl_lib_matrixq.h" +#include "dl_lib_conv_queue.h" + +//fixed-point convolution FIFO queue. +typedef struct { + int n; /*< the length of queue */ + int c; /*< the channel number of queue element*/ + int front; /*< the front(top) position of queue */ + int flag; /*< not used */ + int exponent; /*< The values in items should be multiplied by pow(2,exponent) + to get the real values */ + qtp_t *itemq; /*< Pointer to item array */ +} dl_convq_queue_t; + +/** + * @brief Allocate a fixed-point convolution queue + * + * @param n The length of queue + * @param c The channel number of elements in the queue + * @return The convolution queue, or NULL if out of memory + */ +dl_convq_queue_t *dl_convq_queue_alloc(int n, int c); + +void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row); + +/** + * @brief Free a fixed-point convolution queue + * + * @param cq The fixed-point convolution queue to free + */ +void dl_convq_queue_free(dl_convq_queue_t *cq); + +/** + * @brief Move the front pointer of queue forward, + the First(oldest) element become the last(newest) element, + * + * @param cq Input fixed-point convolution queue + * @return Pointer of oldest element + */ +qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq); + +/** + * @brief Remove the oldest element, then insert the input element at the end of queue + * + * @param cq Input fixed-point convolution queue + * @param item The new element + */ +void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift); + +/** + * @brief Insert the float-point element at the end of queue. + * The precision of fixed-point numbers is described by the Qm.f notation, + * + * @param cq Input fixed-point convolution queue + * @param item The float-point element + * @param m_bit The number of integer bits including the sign bits + * @param f_bit The number of fractional bits + */ +void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit); + +/** + * @brief Get the pointer of element in the queue by offset + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int offset); + +/** + * @brief Does a tanh operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * tanh operation by this pointer, then return the pointer + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +void dl_tanh_convq(dl_convq_queue_t *cq, int last_num); + +/** + * @brief Does a relu operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, and does a + * relu operation by this pointer, then return the pointer + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @return Pointer of the element + */ +void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num); + +/** + * @brief Does a softmax operation on the one of element in the convolution queue. + * Gets the pointer of element in the convolution queue by offset, input data + stay as it is. Results are saved into the *out* array. + * + * @param cq Input fixed-point convolution queue + * @param offset Offset from the front of the queue + * @param out Old array to re-use. Passing NULL will allocate a new matrix. + * @return softmax results + */ +fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out); + +/** + * @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution) + * based on convolution queue. + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param kernel The kernel matrix of filter + * @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param shift Shift ratio used in dot operation between two 16-bit fixed point vector + * @return The result of atrous convolution + */ +qtp_t *dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift); +qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset); +/** + * @brief Fast implement of dilation layer as follows + * + * |-> [gate(sigmoid)] -| + * input - | |-> (*) - output + * |-> [filter(tanh)] -| + * + * @Warning All input and output convolution queue and matrix should be allocated. The return pointer + * is last element of output queue and should not be freed separately. + * + * @param in Input fixed-point convolution queue + * @param out Output fixed-point convolution queue + * @param rate A positive int, the stride with which we sample input value + * @param size A positive int, the size of 1D-filter + * @param filter_kernel The kernel matrix of filter + * @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required. + * @param gate_kernel The kernel matrix of gate + * @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required. + * @filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector + * @gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector + * @return The result of dilation layer + */ +qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size, + dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias, + dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, + int filter_shift, int gate_shift); + +dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h, + const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift); +void test_atrous_convq(int size, int rate, int in_channel, int out_channel); + +dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2); + +#endif \ No newline at end of file diff --git a/components/esp-sr/lib/include/dl_lib_matrix.h b/components/esp-sr/lib/include/dl_lib_matrix.h new file mode 100644 index 0000000..5cb4dcb --- /dev/null +++ b/components/esp-sr/lib/include/dl_lib_matrix.h @@ -0,0 +1,236 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIX_H +#define DL_LIB_MATRIX_H + +#if CONFIG_BT_SHARE_MEM_REUSE +#include "freertos/FreeRTOS.h" +#endif + +typedef float fptp_t; + +#if CONFIG_BT_SHARE_MEM_REUSE +extern multi_heap_handle_t gst_heap; +#endif + +//Flags for matrices +#define DL_MF_FOREIGNDATA (1<<0) /*< Matrix *item data actually points to another matrix and should not be freed */ + +//'Normal' float matrix +typedef struct { + int w; /*< Width */ + int h; /*< Height */ + int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */ + int flags; /*< Flags. OR of DL_MF_* values */ + fptp_t *item; /*< Pointer to item array */ +} dl_matrix2d_t; + +//Macro to quickly access the raw items in a matrix +#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride] + + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_alloc(int w, int h); + + +/** + * @brief Free a matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrix_free(dl_matrix2d_t *m); + +/** + * @brief Zero out the matrix + * Sets all entries in the matrix to 0. + * + * @param m Matrix to zero + */ +void dl_matrix_zero(dl_matrix2d_t *m); + +/** + * @brief Generate a new matrix using a range of items from an existing matrix. + * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer + * to the existing data. Changing the data in the resulting matrix, as a result, will also change + * the data in the existing matrix that has been sliced. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix. + * @return The resulting slice matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in); + +/** + * @brief select a range of items from an existing matrix and flatten them into one dimension. + * + * @Warning The results are flattened in row-major order. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix to re-use. Passing NULL will allocate a new matrix. + * @return The resulting flatten matrix, or NULL if out of memory + */ +dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in); + +/** + * @brief Generate a matrix from existing floating-point data + * + * @param w Width of resulting matrix + * @param h Height of resulting matrix + * @param data Data to populate matrix with + * @return A newaly allocated matrix populated with the given input data, or NULL if out of memory. + */ +dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data); + + +/** + * @brief Multiply a pair of matrices item-by-item: res=a*b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Multiplicated data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of two matrices : res=a.b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res); + +/** + * @brief Add a pair of matrices item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Added data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + + +/** + * @brief Divide a pair of matrices item-by-item: res=a/b + * + * @param a First matrix + * @param b Second matrix + * @param res Divided data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + +/** + * @brief Subtract a matrix from another, item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Subtracted data. Can be equal to a or b to overwrite that. + */ +void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out); + +/** + * @brief Add a constant to every item of the matrix + * + * @param subj Matrix to add the constant to + * @param add The constant + */ +void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add); + + +/** + * @brief Concatenate the rows of two matrices into a new matrix + * + * @param a First matrix + * @param b Second matrix + * @return A newly allocated array with as avlues a|b + */ +dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + + +/** + * @brief Print the contents of a matrix to stdout. Used for debugging. + * + * @param a The matrix to print. + */ +void dl_printmatrix(const dl_matrix2d_t *a); + +/** + * @brief Return the average square error given a correct and a test matrix. + * + * ...Well, more or less. If anything, it gives an indication of the error between + * the two. Check the code for the exact implementation. + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return value indicating the relative difference between matrices + */ +float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + + + +/** + * @brief Check if two matrices have the same shape, that is, the same amount of rows and columns + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return true if the two matrices are shaped the same, false otherwise. + */ +int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b); + + +/** + * @brief Get a specific item from the matrix + * + * Please use these for external matrix access instead of DL_ITM + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @return Value in that position + */ +inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) { + return DL_ITM(m, x, y); +} + +/** + * @brief Set a specific item in the matrix to the given value + * + * Please use these for external matrix access instead of DL_ITM + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @param val Value to write to that position + */ +inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) { + DL_ITM(m, x, y)=val; +} + + + +#endif + diff --git a/components/esp-sr/lib/include/dl_lib_matrixq.h b/components/esp-sr/lib/include/dl_lib_matrixq.h new file mode 100644 index 0000000..fcd449a --- /dev/null +++ b/components/esp-sr/lib/include/dl_lib_matrixq.h @@ -0,0 +1,372 @@ +// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#ifndef DL_LIB_MATRIXQ_H +#define DL_LIB_MATRIXQ_H + +#include +#include "dl_lib_matrix.h" + +typedef int16_t qtp_t; + +//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted +//for easy use as a multiplicand without stressing out the flash cache too much. +typedef struct { + int w; + int h; + int stride; //Normally equals h, not w! + int flags; + int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values. + qtp_t *itemq; +} dl_matrix2dq_t; + +#define DL_QTP_SHIFT 15 +#define DL_QTP_RANGE ((1<itemq[(y)+(x)*m->stride] +#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null + +#define DL_SHIFT_AUTO 32 + +/** + * @info About quantized matrices and shift values + * + * Grab a coffee (or tea, or hot water) and sit down when you read this for the first + * time. Quantized matrices can speed up your operations, but come with some quirks, and + * it's good to understand how they work before using them. + * + * The data in the quantized matrix type is stored similarily to floating-point types: + * when storing a real value, the value is stored as a mantissa (base number) and an + * exponent. The 'real' value that can be re-derived from those two numbers is something + * similar to mantissa*2^exponent. Up to this point, there's not that much difference from + * the standard floating point implementations like e.g. IEEE-754. + * + * The difference with respect to quantized matrices is that for a quantized matrix, it is + * assumed all values stored have more-or-less the same order of magnitude. This allows the + * matrix to only store all the mantissas, while the exponents are shared; there is only one + * exponent for the entire matrix. This makes it quicker to handle matrix operations - the + * logic to fix the exponents only needs to happen once, while the rest can be done in simple + * integer arithmetic. It also nets us some memory savings - while normally a floating point + * number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the + * memory requirements. + * + * While most of the details of handling the intricacies of the quantized matrixes are done + * transparently by the code in dl_lib_matrixq.c, some implementation details leak out, + * specifically in places where addition/subtraction/division happens. + * + * The problem is that the routines do not know what the size of the resulting operation is. For + * instance, when adding two matrices of numbers, the resulting numbers *could* be large enough + * to overflow the mantissa of the result if the exponent is the same. However, if by default we + * assume the mantissas needs to be scaled back, we may lose precision. + * + * In order to counter this, all operations that have this issue have a ``shift`` argument. If + * the argument is zero, the routine will be conservative, that is, increase the exponent of + * the result to such an extent it's mathematically impossible a value in the result will exceed + * the maximum value that can be stored. However, when this argument is larger than zero, the + * algorithm will hold back on this scaling by the indicated amount of bits, preserving precision + * but increasing the chance of some of the calculated values not fitting in the mantissa anymore. + * If this happens, the value will be clipped to the largest (or, for negative values, smallest) + * value possible. (Neural networks usually are okay with this happening for a limited amount + * of matrix indices). + * + * For deciding on these shift values, it is recommended to start with a shift value of one, then + * use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value. + * If it indicates bits are under-used, increase it. Note that for adding and subtraction, only + * shift values of 0 or 1 make sense; these routines will error out if you try to do something + * else. + * + * For neural networks and other noise-tolerant applications, note that even when + * dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead + * to slightly improved precision. Feel free to experiment. + **/ + + +/** + * @brief Allocate a matrix + * + * @param w Width of the matrix + * @param h Height of the matrix + * @return The matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_alloc(int w, int h); + +/** + * @brief Convert a floating-point matrix to a quantized matrix + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + */ +dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out); + + +/** + * TODO: DESCRIBE THIS FUNCTION + */ +dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit); + + +/** + * @brief Convert a quantized matrix to a floating-point one. + * + * @param m Floating-point matrix to convert + * @param out Quantized matrix to re-use. If NULL, allocate a new one. + * @Return The quantized version of the floating-point matrix + **/ +dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out); + + +/** + * @brief Free a quantized matrix + * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. + * + * @param m Matrix to free + */ +void dl_matrixq_free(dl_matrix2dq_t *m); + +/** + * @brief Zero out the matrix + * Sets all entries in the matrix to 0. + * + * @param m Matrix to zero + */ +void dl_matrixq_zero(dl_matrix2dq_t *m); + + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + * @param shift Shift ratio + */ +void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. + * + * Result is a fixed-point matrix. + * + * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be + * much slower than dl_matrixq_dot . + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + * @param shift Shift ratio + */ +void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. + * + * Result is a floating-point matrix. + * + * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be + * much slower than dl_matrixq_dot_matrix_out. + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Dotproduct data. *Must* be a *different* matrix from a or b! + */ +void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + +/** + * @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix. + * + * @param a First multiplicand; float matrix + * @param b Second multiplicand; quantized matrix + * @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b! + */ +void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); + + +/** + * @brief Print the contents of a quantized matrix to stdout. Used for debugging. + * + * @param a The matrix to print. + */ +void dl_printmatrixq(const dl_matrix2dq_t *a); + + +/** + * @brief Add a pair of quantizedmatrices item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Added data. Can be equal to a or b to overwrite that. + * @param shift Shift value. Only 0 or 1 makes sense here. + */ +void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Generate a new matrix using a range of items from an existing matrix. + * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer + * to the existing data. Changing the data in the resulting matrix, as a result, will also change + * the data in the existing matrix that has been sliced. + * + * @Warning In contrast to the floating point equivalent of this function, the fixed-point version + * of this has the issue that as soon as the output exponent of one of the slices changes, the data + * in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you + * use this function, either treat the slices as read-only, or assume the sliced matrix contains + * garbage after modifying the data in one of the slices. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix. + * @return The resulting slice matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in); + +/** + * @brief select a range of items from an existing matrix and flatten them into one dimension. + * + * @Warning The results are flattened in row-major order. + * + * @param x X-offset of the origin of the returned matrix within the sliced matrix + * @param y Y-offset of the origin of the returned matrix within the sliced matrix + * @param w Width of the resulting matrix + * @param h Height of the resulting matrix + * @param in Old matrix to re-use. Passing NULL will allocate a new matrix. + * @return The resulting flatten matrix, or NULL if out of memory + */ +dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in); + +/** + * @brief Subtract a quantized matrix from another, item-by-item: res=a-b + * + * @param a First matrix + * @param b Second matrix + * @param res Subtracted data. Can be equal to a or b to overwrite that. + * @param shift Shift value. Only 0 or 1 makes sense here. + */ +void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); + +/** + * @brief Multiply a pair of quantized matrices item-by-item: res=a*b + * + * @param a First multiplicand + * @param b Second multiplicand + * @param res Multiplicated data. Can be equal to a or b to overwrite that matrix. + */ +void dl_matrixq_mul(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res); + +/** + * @brief Divide a pair of quantized matrices item-by-item: res=a/b + * + * @param a First matrix + * @param b Second matrix + * @param res Divided data. Can be equal to a or b to overwrite that. + */ +void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift); + +/** + * @brief Check if two quantized matrices have the same shape, that is, the same amount of + * rows and columns + * + * @param a First of the two matrices to compare + * @param b Second of the two matrices to compare + * @return true if the two matrices are shaped the same, false otherwise. + */ +int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b); + +/** + * @brief Concatenate the rows of two quantized matrices into a new matrix + * + * @param a First matrix + * @param b Second matrix + * @return A newly allocated quantized matrix with as values a|b + */ +dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b); + +/** + * @brief Add a constant to every item of the quantized matrix + * + * @param subj Matrix to add the constant to + * @param add The constant + */ +void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift); + +/** + * @brief Check the sanity of a quantized matrix + * + * Due to the nature of quantized matrices, depending on the calculations a quantized + * matrix is the result of and the shift values chosen in those calculations, a quantized + * matrix may have an exponent and mantissas that lead to a loss of precision, either because + * most significant mantissa bits are unused, or because a fair amount of mantissas are + * clipped. This function checks if this is the case and will report a message to stdout + * if significant loss of precision is detected. + * + * @param m The quantized matrix to check + * @param name A string to be displayed in the message if the sanity check fails + * @return True if matrix is sane, false otherwise + **/ + +int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name); + +/** + * @brief re-adjust the exponent of the matrix to fit the mantissa better + * + * This function will shift up all the data in the mantissas so there are no + * most-significant bits that are unused in all mantissas. It will also adjust + * the exponent to keep the actua values in the matrix the same. + * + * Some operations done on a matrix, especially operations that re-use the + * result of earlier operations done in the same way, can lead to the loss of + * data because the exponent of the quantized matrix is never re-adjusted. You + * can do that implicitely by calling this function. + * + * @param m The matrix to re-adjust +**/ +void dl_matrixq_readjust_exp(dl_matrix2dq_t *m); + + + +/** + * @brief Get the floating-point value of a specific item from the quantized matrix + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @return Value in that position + */ +fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y); + +/** + * @brief Set a specific item in the quantized matrix to the given + * floating-point value + * + * @warning If the given value is more than the exponent in the quantized matrix + * allows for, all mantissas in the matrix will be shifted down to make the value + * 'fit'. If, however, the exponent is such that the value would result in a + * quantized mantissa of 0, nothing is done. + * + * @param m Matrix to access + * @param x Column address + * @param y Row address + * @param val Value to write to that position + */ +void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val); + +#endif diff --git a/components/esp-sr/lib/libc_speech_features.a b/components/esp-sr/lib/libc_speech_features.a new file mode 100644 index 0000000..4e665a5 Binary files /dev/null and b/components/esp-sr/lib/libc_speech_features.a differ diff --git a/components/esp-sr/lib/libdl_lib_sr.a b/components/esp-sr/lib/libdl_lib_sr.a new file mode 100644 index 0000000..65614cd Binary files /dev/null and b/components/esp-sr/lib/libdl_lib_sr.a differ diff --git a/components/esp-sr/lib/libwakenet.a b/components/esp-sr/lib/libwakenet.a new file mode 100644 index 0000000..9eb1f91 Binary files /dev/null and b/components/esp-sr/lib/libwakenet.a differ diff --git a/components/esp-sr/wake_word_engine/component.mk b/components/esp-sr/wake_word_engine/component.mk new file mode 100644 index 0000000..ef00b03 --- /dev/null +++ b/components/esp-sr/wake_word_engine/component.mk @@ -0,0 +1,11 @@ +COMPONENT_ADD_INCLUDEDIRS := include + +COMPONENT_SRCDIRS := . + +LIB_FILES := $(shell ls $(COMPONENT_PATH)/lib*.a) + +LIBS := $(patsubst lib%.a,-l%,$(notdir $(LIB_FILES))) + +COMPONENT_ADD_LDFLAGS += -L$(COMPONENT_PATH)/ $(LIBS) + +ALL_LIB_FILES += $(LIB_FILES) diff --git a/components/recorder_engine/include/esp_sr_iface.h b/components/esp-sr/wake_word_engine/include/esp_wn_iface.h similarity index 60% rename from components/recorder_engine/include/esp_sr_iface.h rename to components/esp-sr/wake_word_engine/include/esp_wn_iface.h index b4001b3..8082681 100644 --- a/components/recorder_engine/include/esp_sr_iface.h +++ b/components/esp-sr/wake_word_engine/include/esp_wn_iface.h @@ -1,6 +1,6 @@ #pragma once #include "stdint.h" -#include "esp_err.h" +#include "dl_lib_coefgetter_if.h" //Opaque model data container typedef struct model_iface_data_t model_iface_data_t; @@ -19,12 +19,13 @@ typedef struct { } wake_word_info_t; /** - * @brief Easy function type to initialze a model instance with a detection mode + * @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient * - * @param det_mode The wake words detection mode to trigger wake words, the range of det_threshold is 0.5~0.9999 + * @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95 + * @param model_coeff The specified wake word model coefficient * @returns Handle to the model data */ -typedef model_iface_data_t* (*esp_sr_iface_op_create_t)(det_mode_t det_mode); +typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const model_coeff_getter_t *model_coeff, det_mode_t det_mode); /** @@ -36,7 +37,7 @@ typedef model_iface_data_t* (*esp_sr_iface_op_create_t)(det_mode_t det_mode); * @param model The model object to query * @return The amount of samples to feed the detect function */ -typedef int (*esp_sr_iface_op_get_samp_chunksize_t)(model_iface_data_t *model); +typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model); /** @@ -45,7 +46,7 @@ typedef int (*esp_sr_iface_op_get_samp_chunksize_t)(model_iface_data_t *model); * @param model The model object to query * @return The sample rate, in hz */ -typedef int (*esp_sr_iface_op_get_samp_rate_t)(model_iface_data_t *model); +typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model); /** * @brief Get the number of wake words @@ -53,7 +54,7 @@ typedef int (*esp_sr_iface_op_get_samp_rate_t)(model_iface_data_t *model); * @param model The model object to query * @returns the number of wake words */ -typedef int (*esp_sr_iface_op_get_word_num_t)(model_iface_data_t *model); +typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model); /** * @brief Get the name of wake word by index @@ -64,18 +65,7 @@ typedef int (*esp_sr_iface_op_get_word_num_t)(model_iface_data_t *model); * @param word_index The index of wake word * @returns the detection threshold */ -typedef char* (*esp_sr_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index); - -/** - * @brief Get the structure which contains the information about wake words - * - * @param model The model object to query - * @param word_list The structure which contains the number and name of wake words - * @return - * - ESP_OK Success - * - ESP_FAIL The word_list is NULL. - */ -typedef esp_err_t (*esp_sr_iface_op_get_word_list_t)(model_iface_data_t *model, wake_word_info_t* word_list); +typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index); /** * @brief Set the detection threshold to manually abjust the probability @@ -85,20 +75,19 @@ typedef esp_err_t (*esp_sr_iface_op_get_word_list_t)(model_iface_data_t *model, * @param word_index The index of wake word * @return 0: setting failed, 1: setting success */ -typedef int (*esp_sr_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index); +typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index); /** * @brief Get the wake word detection threshold of different modes * * @param model The model object to query - * @param det_mode The wake words recognition operating mode * @param word_index The index of wake word * @returns the detection threshold */ -typedef float (*esp_sr_iface_op_get_det_threshold_t)(model_iface_data_t *model, det_mode_t det_mode, int word_index); +typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index); /** - * @brief Feed samples of an audio stream to the speech recognition model and detect if there is a keyword found. + * @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found. * * @Warning The index of wake word start with 1, 0 means no wake words is detected. * @@ -107,28 +96,27 @@ typedef float (*esp_sr_iface_op_get_det_threshold_t)(model_iface_data_t *model, * get_samp_chunksize function. * @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words. */ -typedef int (*esp_sr_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples); +typedef int (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples); /** * @brief Destroy a speech recognition model * * @param model Model object to destroy */ -typedef void (*esp_sr_iface_op_destroy_t)(model_iface_data_t *model); +typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model); /** - * This structure contains the functions used to do operations on a speech recognition model. + * This structure contains the functions used to do operations on a wake word detection model. */ typedef struct { - esp_sr_iface_op_create_t create; - esp_sr_iface_op_get_samp_chunksize_t get_samp_chunksize; - esp_sr_iface_op_get_samp_rate_t get_samp_rate; - esp_sr_iface_op_get_word_num_t get_word_num; - esp_sr_iface_op_get_word_name_t get_word_name; - esp_sr_iface_op_get_word_list_t get_word_list; - esp_sr_iface_op_set_det_threshold_t set_det_threshold; - esp_sr_iface_op_get_det_threshold_t get_det_threshold_by_mode; - esp_sr_iface_op_detect_t detect; - esp_sr_iface_op_destroy_t destroy; -} esp_sr_iface_t; \ No newline at end of file + esp_wn_iface_op_create_t create; + esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize; + esp_wn_iface_op_get_samp_rate_t get_samp_rate; + esp_wn_iface_op_get_word_num_t get_word_num; + esp_wn_iface_op_get_word_name_t get_word_name; + esp_wn_iface_op_set_det_threshold_t set_det_threshold; + esp_wn_iface_op_get_det_threshold_t get_det_threshold; + esp_wn_iface_op_detect_t detect; + esp_wn_iface_op_destroy_t destroy; +} esp_wn_iface_t; diff --git a/components/recorder_engine/include/esp_sr_models.h b/components/esp-sr/wake_word_engine/include/esp_wn_models.h similarity index 61% rename from components/recorder_engine/include/esp_sr_models.h rename to components/esp-sr/wake_word_engine/include/esp_wn_models.h index 6f3d828..4761395 100644 --- a/components/recorder_engine/include/esp_sr_models.h +++ b/components/esp-sr/wake_word_engine/include/esp_wn_models.h @@ -1,19 +1,29 @@ #pragma once -#include "esp_sr_iface.h" +#include "esp_wn_iface.h" //Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize //a specific phrase or word. -extern const esp_sr_iface_t esp_sr_wakenet2_float; -extern const esp_sr_iface_t sr_model_wakenet1_float; -extern const esp_sr_iface_t sr_model_wakenet1_quantized; -extern const esp_sr_iface_t esp_sr_wakenet2_quantized; -extern const esp_sr_iface_t esp_sr_wakenet3_quantized; -extern const esp_sr_iface_t esp_sr_wakenet4_quantized; + +extern const esp_wn_iface_t esp_sr_wakenet5_quantized; + + +/* + Configure network to use based on what's selected in menuconfig. +*/ +#define WAKENET_MODEL esp_sr_wakenet5_quantized + +/* + Configure wake word to use based on what's selected in menuconfig. +*/ + +#include "hilexin_wn5.h" +#define WAKENET_COEFF get_coeff_hilexin_wn5 + /* example -static const sr_model_iface_t *model = &sr_model_wakenet3_quantized; +static const sr_model_iface_t *model = &WAKENET_MODEL; //Initialize wakeNet model data static model_iface_data_t *model_data=model->create(DET_MODE_90); diff --git a/components/esp-sr/wake_word_engine/include/hilexin_wn5.h b/components/esp-sr/wake_word_engine/include/hilexin_wn5.h new file mode 100644 index 0000000..d922a6a --- /dev/null +++ b/components/esp-sr/wake_word_engine/include/hilexin_wn5.h @@ -0,0 +1,8 @@ +//Generated by mkmodel +#pragma once +#include +#include "dl_lib_coefgetter_if.h" +#include "dl_lib_matrix.h" +#include "dl_lib_matrixq.h" + +extern const model_coeff_getter_t get_coeff_hilexin_wn5; diff --git a/components/esp-sr/wake_word_engine/libhilexin_wn5.a b/components/esp-sr/wake_word_engine/libhilexin_wn5.a new file mode 100644 index 0000000..94ce15d Binary files /dev/null and b/components/esp-sr/wake_word_engine/libhilexin_wn5.a differ diff --git a/components/recorder_engine/CMakeLists.txt b/components/recorder_engine/CMakeLists.txt deleted file mode 100644 index c205db3..0000000 --- a/components/recorder_engine/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -set(COMPONENT_SRCS a.c) -set(COMPONENT_ADD_INCLUDEDIRS include) - -register_component() - -target_link_libraries(${COMPONENT_TARGET} "-L ${CMAKE_CURRENT_SOURCE_DIR}") -target_link_libraries(${COMPONENT_TARGET} - esp_wakenet - nn_model - ) diff --git a/components/recorder_engine/component.mk b/components/recorder_engine/component.mk deleted file mode 100644 index 7ae9250..0000000 --- a/components/recorder_engine/component.mk +++ /dev/null @@ -1,12 +0,0 @@ -COMPONENT_ADD_INCLUDEDIRS := . \ - ./include - -COMPONENT_SRCDIRS := . \ - ./include - -LIBS := esp_wakenet nn_model - -COMPONENT_ADD_LDFLAGS := -L$(COMPONENT_PATH)/ $(addprefix -l,$(LIBS)) - -ALL_LIB_FILES += $(patsubst %,$(COMPONENT_PATH)/lib%.a,$(LIBS)) - diff --git a/components/recorder_engine/libesp_wakenet.a b/components/recorder_engine/libesp_wakenet.a deleted file mode 100644 index 5709346..0000000 Binary files a/components/recorder_engine/libesp_wakenet.a and /dev/null differ diff --git a/components/recorder_engine/libnn_model.a b/components/recorder_engine/libnn_model.a deleted file mode 100644 index 30ebc36..0000000 Binary files a/components/recorder_engine/libnn_model.a and /dev/null differ diff --git a/examples/single_chip/recognition_solution/main/CMakeLists.txt b/examples/single_chip/recognition_solution/main/CMakeLists.txt index 73a4851..8727ea5 100644 --- a/examples/single_chip/recognition_solution/main/CMakeLists.txt +++ b/examples/single_chip/recognition_solution/main/CMakeLists.txt @@ -15,8 +15,9 @@ set(COMPONENT_REQUIRES nvs_flash esp_http_server fb_gfx - recorder_engine + esp-sr spiffs ) register_component() + diff --git a/examples/single_chip/recognition_solution/main/app_httpserver.c b/examples/single_chip/recognition_solution/main/app_httpserver.c index 2c64408..aad7d2d 100644 --- a/examples/single_chip/recognition_solution/main/app_httpserver.c +++ b/examples/single_chip/recognition_solution/main/app_httpserver.c @@ -37,7 +37,7 @@ static const char *TAG = "app_httpserver"; #define FACE_COLOR_CYAN (FACE_COLOR_BLUE | FACE_COLOR_GREEN) #define FACE_COLOR_PURPLE (FACE_COLOR_BLUE | FACE_COLOR_RED) -#define ENROLL_CONFIRM_TIMES 3 +#define ENROLL_CONFIRM_TIMES 1 #define FACE_ID_SAVE_NUMBER 10 #define PART_BOUNDARY "123456789000000000000987654321" diff --git a/examples/single_chip/recognition_solution/main/app_speech_recsrc.c b/examples/single_chip/recognition_solution/main/app_speech_recsrc.c index 5ad9466..13a4d03 100644 --- a/examples/single_chip/recognition_solution/main/app_speech_recsrc.c +++ b/examples/single_chip/recognition_solution/main/app_speech_recsrc.c @@ -69,3 +69,4 @@ void recsrcTask(void *arg) vTaskDelete(NULL); } + diff --git a/examples/single_chip/recognition_solution/main/app_speech_wakeup.c b/examples/single_chip/recognition_solution/main/app_speech_wakeup.c index 5eade03..fe5c751 100644 --- a/examples/single_chip/recognition_solution/main/app_speech_wakeup.c +++ b/examples/single_chip/recognition_solution/main/app_speech_wakeup.c @@ -12,14 +12,15 @@ #include "esp_partition.h" #include "app_speech_srcif.h" #include "sdkconfig.h" -#include "esp_sr_iface.h" -#include "esp_sr_models.h" +#include "esp_wn_iface.h" +#include "esp_wn_models.h" +#include "dl_lib_coefgetter_if.h" #include "app_main.h" -#define SR_MODEL esp_sr_wakenet3_quantized +static const esp_wn_iface_t *wakenet = &WAKENET_MODEL; +static const model_coeff_getter_t *model_coeff_getter = &WAKENET_COEFF; static src_cfg_t srcif; -static const esp_sr_iface_t *model = &SR_MODEL; static model_iface_data_t *model_data; QueueHandle_t sndQueue; @@ -27,20 +28,20 @@ QueueHandle_t sndQueue; static void event_wakeup_detected(int r) { assert(g_state == WAIT_FOR_WAKEUP); - printf("%s DETECTED.\n", model->get_word_name(model_data, r)); + printf("%s DETECTED.\n", wakenet->get_word_name(model_data, r)); g_state = WAIT_FOR_CONNECT; } void nnTask(void *arg) { - int audio_chunksize = model->get_samp_chunksize(model_data); + int audio_chunksize = wakenet->get_samp_chunksize(model_data); int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t)); assert(buffer); while(1) { xQueueReceive(sndQueue, buffer, portMAX_DELAY); - int r=model->detect(model_data, buffer); + int r=wakenet->detect(model_data, buffer); if (r) { event_wakeup_detected(r); @@ -54,15 +55,14 @@ void nnTask(void *arg) void app_speech_wakeup_init() { //Initialize NN model - model_data=model->create(DET_MODE_95); + model_data=wakenet->create(model_coeff_getter,DET_MODE_95); - wake_word_info_t* word_list = malloc(sizeof(wake_word_info_t)); - esp_err_t ret = model->get_word_list(model_data, word_list); - if (ret == ESP_OK) printf("wake word number = %d, word1 name = %s\n", - word_list->wake_word_num, word_list->wake_word_list[0]); - free(word_list); + int wake_word_num = wakenet->get_word_num(model_data); + char *wake_word_list = wakenet->get_word_name(model_data, 1); + if (wake_word_num) printf("wake word number = %d, word1 name = %s\n", + wake_word_num, wake_word_list); - int audio_chunksize=model->get_samp_chunksize(model_data); + int audio_chunksize=wakenet->get_samp_chunksize(model_data); //Initialize sound source sndQueue=xQueueCreate(2, (audio_chunksize*sizeof(int16_t))); @@ -73,3 +73,4 @@ void app_speech_wakeup_init() xTaskCreatePinnedToCore(&nnTask, "nn", 2*1024, NULL, 5, NULL, 1); } + diff --git a/examples/single_chip/recognition_solution/main/include/app_main.h b/examples/single_chip/recognition_solution/main/include/app_main.h index 48ee92e..b7aa573 100644 --- a/examples/single_chip/recognition_solution/main/include/app_main.h +++ b/examples/single_chip/recognition_solution/main/include/app_main.h @@ -3,7 +3,7 @@ #include "app_wifi.h" #include "app_speech_srcif.h" -#define VERSION "0.9.0" +#define VERSION "1.0.0" #define GPIO_LED_RED 21 #define GPIO_LED_WHITE 22