// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef DL_LIB_MATRIXQ_H #define DL_LIB_MATRIXQ_H #include #include "dl_lib_matrix.h" typedef int16_t qtp_t; //Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted //for easy use as a multiplicand without stressing out the flash cache too much. typedef struct { int w; int h; int stride; //Normally equals h, not w! int flags; int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values. qtp_t *itemq; } dl_matrix2dq_t; #define DL_QTP_SHIFT 15 #define DL_QTP_RANGE ((1<itemq[(y)+(x)*m->stride] #define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null #define DL_SHIFT_AUTO 32 /** * @info About quantized matrices and shift values * * Grab a coffee (or tea, or hot water) and sit down when you read this for the first * time. Quantized matrices can speed up your operations, but come with some quirks, and * it's good to understand how they work before using them. * * The data in the quantized matrix type is stored similarily to floating-point types: * when storing a real value, the value is stored as a mantissa (base number) and an * exponent. The 'real' value that can be re-derived from those two numbers is something * similar to mantissa*2^exponent. Up to this point, there's not that much difference from * the standard floating point implementations like e.g. IEEE-754. * * The difference with respect to quantized matrices is that for a quantized matrix, it is * assumed all values stored have more-or-less the same order of magnitude. This allows the * matrix to only store all the mantissas, while the exponents are shared; there is only one * exponent for the entire matrix. This makes it quicker to handle matrix operations - the * logic to fix the exponents only needs to happen once, while the rest can be done in simple * integer arithmetic. It also nets us some memory savings - while normally a floating point * number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the * memory requirements. * * While most of the details of handling the intricacies of the quantized matrixes are done * transparently by the code in dl_lib_matrixq.c, some implementation details leak out, * specifically in places where addition/subtraction/division happens. * * The problem is that the routines do not know what the size of the resulting operation is. For * instance, when adding two matrices of numbers, the resulting numbers *could* be large enough * to overflow the mantissa of the result if the exponent is the same. However, if by default we * assume the mantissas needs to be scaled back, we may lose precision. * * In order to counter this, all operations that have this issue have a ``shift`` argument. If * the argument is zero, the routine will be conservative, that is, increase the exponent of * the result to such an extent it's mathematically impossible a value in the result will exceed * the maximum value that can be stored. However, when this argument is larger than zero, the * algorithm will hold back on this scaling by the indicated amount of bits, preserving precision * but increasing the chance of some of the calculated values not fitting in the mantissa anymore. * If this happens, the value will be clipped to the largest (or, for negative values, smallest) * value possible. (Neural networks usually are okay with this happening for a limited amount * of matrix indices). * * For deciding on these shift values, it is recommended to start with a shift value of one, then * use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value. * If it indicates bits are under-used, increase it. Note that for adding and subtraction, only * shift values of 0 or 1 make sense; these routines will error out if you try to do something * else. * * For neural networks and other noise-tolerant applications, note that even when * dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead * to slightly improved precision. Feel free to experiment. **/ /** * @brief Allocate a matrix * * @param w Width of the matrix * @param h Height of the matrix * @return The matrix, or NULL if out of memory */ dl_matrix2dq_t *dl_matrixq_alloc(int w, int h); /** * @brief Convert a floating-point matrix to a quantized matrix * * @param m Floating-point matrix to convert * @param out Quantized matrix to re-use. If NULL, allocate a new one. * @Return The quantized version of the floating-point matrix */ dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out); /** * TODO: DESCRIBE THIS FUNCTION */ dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit); /** * @brief Convert a quantized matrix to a floating-point one. * * @param m Floating-point matrix to convert * @param out Quantized matrix to re-use. If NULL, allocate a new one. * @Return The quantized version of the floating-point matrix **/ dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out); /** * @brief Free a quantized matrix * Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well. * * @param m Matrix to free */ void dl_matrixq_free(dl_matrix2dq_t *m); /** * @brief Zero out the matrix * Sets all entries in the matrix to 0. * * @param m Matrix to zero */ void dl_matrixq_zero(dl_matrix2dq_t *m); /** * @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix. * * @param a First multiplicand * @param b Second multiplicand * @param res Dotproduct data. *Must* be a *different* matrix from a or b! * @param shift Shift ratio */ void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); /** * @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix. * * @param a First multiplicand * @param b Second multiplicand * @param res Dotproduct data. *Must* be a *different* matrix from a or b! */ void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); /** * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. * * Result is a fixed-point matrix. * * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be * much slower than dl_matrixq_dot . * * @param a First multiplicand * @param b Second multiplicand * @param res Dotproduct data. *Must* be a *different* matrix from a or b! * @param shift Shift ratio */ void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); /** * @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product. * * Result is a floating-point matrix. * * Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be * much slower than dl_matrixq_dot_matrix_out. * * @param a First multiplicand * @param b Second multiplicand * @param res Dotproduct data. *Must* be a *different* matrix from a or b! */ void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); /** * @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix. * * @param a First multiplicand; float matrix * @param b Second multiplicand; quantized matrix * @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b! */ void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res); /** * @brief Print the contents of a quantized matrix to stdout. Used for debugging. * * @param a The matrix to print. */ void dl_printmatrixq(const dl_matrix2dq_t *a); /** * @brief Add a pair of quantizedmatrices item-by-item: res=a-b * * @param a First matrix * @param b Second matrix * @param res Added data. Can be equal to a or b to overwrite that. * @param shift Shift value. Only 0 or 1 makes sense here. */ void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); /** * @brief Generate a new matrix using a range of items from an existing matrix. * When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer * to the existing data. Changing the data in the resulting matrix, as a result, will also change * the data in the existing matrix that has been sliced. * * @Warning In contrast to the floating point equivalent of this function, the fixed-point version * of this has the issue that as soon as the output exponent of one of the slices changes, the data * in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you * use this function, either treat the slices as read-only, or assume the sliced matrix contains * garbage after modifying the data in one of the slices. * * @param x X-offset of the origin of the returned matrix within the sliced matrix * @param y Y-offset of the origin of the returned matrix within the sliced matrix * @param w Width of the resulting matrix * @param h Height of the resulting matrix * @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix. * @return The resulting slice matrix, or NULL if out of memory */ dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in); /** * @brief select a range of items from an existing matrix and flatten them into one dimension. * * @Warning The results are flattened in row-major order. * * @param x X-offset of the origin of the returned matrix within the sliced matrix * @param y Y-offset of the origin of the returned matrix within the sliced matrix * @param w Width of the resulting matrix * @param h Height of the resulting matrix * @param in Old matrix to re-use. Passing NULL will allocate a new matrix. * @return The resulting flatten matrix, or NULL if out of memory */ dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in); /** * @brief Subtract a quantized matrix from another, item-by-item: res=a-b * * @param a First matrix * @param b Second matrix * @param res Subtracted data. Can be equal to a or b to overwrite that. * @param shift Shift value. Only 0 or 1 makes sense here. */ void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift); /** * @brief Multiply a pair of quantized matrices item-by-item: res=a*b * * @param a First multiplicand * @param b Second multiplicand * @param res Multiplicated data. Can be equal to a or b to overwrite that matrix. */ void dl_matrixq_mul(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res); /** * @brief Divide a pair of quantized matrices item-by-item: res=a/b * * @param a First matrix * @param b Second matrix * @param res Divided data. Can be equal to a or b to overwrite that. */ void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift); /** * @brief Check if two quantized matrices have the same shape, that is, the same amount of * rows and columns * * @param a First of the two matrices to compare * @param b Second of the two matrices to compare * @return true if the two matrices are shaped the same, false otherwise. */ int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b); /** * @brief Concatenate the rows of two quantized matrices into a new matrix * * @param a First matrix * @param b Second matrix * @return A newly allocated quantized matrix with as values a|b */ dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b); /** * @brief Add a constant to every item of the quantized matrix * * @param subj Matrix to add the constant to * @param add The constant */ void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift); /** * @brief Check the sanity of a quantized matrix * * Due to the nature of quantized matrices, depending on the calculations a quantized * matrix is the result of and the shift values chosen in those calculations, a quantized * matrix may have an exponent and mantissas that lead to a loss of precision, either because * most significant mantissa bits are unused, or because a fair amount of mantissas are * clipped. This function checks if this is the case and will report a message to stdout * if significant loss of precision is detected. * * @param m The quantized matrix to check * @param name A string to be displayed in the message if the sanity check fails * @return True if matrix is sane, false otherwise **/ int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name); /** * @brief re-adjust the exponent of the matrix to fit the mantissa better * * This function will shift up all the data in the mantissas so there are no * most-significant bits that are unused in all mantissas. It will also adjust * the exponent to keep the actua values in the matrix the same. * * Some operations done on a matrix, especially operations that re-use the * result of earlier operations done in the same way, can lead to the loss of * data because the exponent of the quantized matrix is never re-adjusted. You * can do that implicitely by calling this function. * * @param m The matrix to re-adjust **/ void dl_matrixq_readjust_exp(dl_matrix2dq_t *m); /** * @brief Get the floating-point value of a specific item from the quantized matrix * * @param m Matrix to access * @param x Column address * @param y Row address * @return Value in that position */ fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y); /** * @brief Set a specific item in the quantized matrix to the given * floating-point value * * @warning If the given value is more than the exponent in the quantized matrix * allows for, all mantissas in the matrix will be shifted down to make the value * 'fit'. If, however, the exponent is such that the value would result in a * quantized mantissa of 0, nothing is done. * * @param m Matrix to access * @param x Column address * @param y Row address * @param val Value to write to that position */ void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val); #endif