From 5ffeeec59aeb7454f41874019897c4aaa17bdbb0 Mon Sep 17 00:00:00 2001
From: Ye Hang Yang <yehangyang@espressif.com>
Date: Tue, 3 Aug 2021 16:11:34 +0800
Subject: [PATCH] :sparkles: add moving target detection

---
 components/common/Kconfig                     |   5 +
 components/esp-dl                             |   2 +-
 examples/camera_lcd/README_cn.rst             |   2 +
 examples/camera_lcd/main/app_dl.cpp           |  52 +++++++--
 examples/camera_terminal/README_cn.rst        |   1 +
 examples/camera_terminal/main/app_dl.cpp      |  57 +++++++---
 examples/camera_web_server/README_cn.rst      |   1 +
 examples/camera_web_server/main/app_httpd.cpp | 106 +++++++++++-------
 8 files changed, 161 insertions(+), 65 deletions(-)

diff --git a/components/common/Kconfig b/components/common/Kconfig
index 2686392..675f6ae 100644
--- a/components/common/Kconfig
+++ b/components/common/Kconfig
@@ -306,5 +306,10 @@ menu "ESP-WHO General Configuration"
 
         # Human Hand Series
 
+
+        config DL_MOVING_TARGET_DETECTION_ENABLED
+            bool "Enable Moving Target Detection"
+            default y
+
     endmenu
 endmenu
\ No newline at end of file
diff --git a/components/esp-dl b/components/esp-dl
index cc615c2..ec9703e 160000
--- a/components/esp-dl
+++ b/components/esp-dl
@@ -1 +1 @@
-Subproject commit cc615c23793efffb5380b32b2c3d3eae54df7e98
+Subproject commit ec9703e70b6e5e738cdc9ac3b7aad0cd7a4f0797
diff --git a/examples/camera_lcd/README_cn.rst b/examples/camera_lcd/README_cn.rst
index ce40478..938baad 100755
--- a/examples/camera_lcd/README_cn.rst
+++ b/examples/camera_lcd/README_cn.rst
@@ -10,6 +10,8 @@ Camera with LCD `[English] <./README.rst>`_
 
 + `CatFaceDetectMN03 <https://github.com/espressif/esp-dl/blob/master/include/model_zoo/cat_face_detect_mn03.hpp>`_
 
++ `移动侦测 <https://github.com/espressif/esp-dl/blob/master/include/image/dl_image.hpp#L322>`_
+
 
 支持的开发套件
 ************************************************************************************************
diff --git a/examples/camera_lcd/main/app_dl.cpp b/examples/camera_lcd/main/app_dl.cpp
index 77c8181..f4d2444 100644
--- a/examples/camera_lcd/main/app_dl.cpp
+++ b/examples/camera_lcd/main/app_dl.cpp
@@ -34,12 +34,12 @@ static const char *TAG = "app_dl";
 
 void task_dl(void *arg)
 {
-    camera_fb_t *fb = NULL;
     dl::tool::Latency latency_total(24);
     dl::tool::Latency latency_fetch;
     dl::tool::Latency latency_decode;
     dl::tool::Latency latency_detect;
     dl::tool::Latency latency_recognize;
+    dl::tool::Latency latency_moving;
 
 #if CONFIG_DL_HUMAN_FACE
 #if CONFIG_DL_HUMAN_FACE_DETECTION_S1_MSR01
@@ -67,32 +67,53 @@ void task_dl(void *arg)
         latency_decode.clear_period();
         latency_detect.clear_period();
         latency_recognize.clear_period();
+        latency_moving.clear_period();
 
         latency_total.start();
 
         latency_fetch.start();
-        fb = esp_camera_fb_get();
-        if (!fb)
+        camera_fb_t *frame = esp_camera_fb_get();
+        if (!frame)
         {
             ESP_LOGE(TAG, "Camera capture failed");
             continue;
         }
         latency_fetch.end();
 
+#if CONFIG_DL_MOVING_TARGET_DETECTION_ENABLED
+        camera_fb_t *frame2 = esp_camera_fb_get();
+        if (!frame2)
+        {
+            ESP_LOGE(TAG, "Camera capture failed");
+            continue;
+        }
+        latency_fetch.end();
+
+        latency_moving.start();
+        uint32_t moving_point_number = dl::image::get_moving_point_number((uint16_t *)frame->buf, (uint16_t *)frame2->buf, frame->height, frame->width, 8, 15);
+        latency_moving.end();
+        if (moving_point_number > 50)
+        {
+            ESP_LOGI("Moving Target", "Detected.");
+            dl::image::draw_filled_rectangle((uint16_t *)frame->buf, frame->height, frame->width, 0, 0, 10, 10);
+        }
+        esp_camera_fb_return(frame2);
+#endif
+
 #if CONFIG_DL_ENABLED
 #if CONFIG_DL_HUMAN_FACE
         latency_detect.start();
 #if CONFIG_DL_HUMAN_FACE_DETECTION_S2_ENABLED
-        std::list<dl::detect::result_t> &detect_candidates = detector.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
-        std::list<dl::detect::result_t> &detect_results = detector2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}, detect_candidates);
+        std::list<dl::detect::result_t> &detect_candidates = detector.infer((uint16_t *)frame->buf, {(int)frame->height, (int)frame->width, 3});
+        std::list<dl::detect::result_t> &detect_results = detector2.infer((uint16_t *)frame->buf, {(int)frame->height, (int)frame->width, 3}, detect_candidates);
 #else
-        std::list<dl::detect::result_t> &detect_results = detector.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
+        std::list<dl::detect::result_t> &detect_results = detector.infer((uint16_t *)frame->buf, {(int)frame->height, (int)frame->width, 3});
 #endif
         latency_detect.end();
 
         if (detect_results.size() > 0)
         {
-            draw_detection_result((uint16_t *)fb->buf, fb->height, fb->width, detect_results);
+            draw_detection_result((uint16_t *)frame->buf, frame->height, frame->width, detect_results);
 
 #if CONFIG_DL_HUMAN_FACE_RECOGNITION_ENABLED
             latency_recognize.start();
@@ -104,11 +125,11 @@ void task_dl(void *arg)
 
 #if CONFIG_DL_CAT_FACE
         latency_detect.start();
-        std::list<dl::detect::result_t> &detect_results = detector.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3});
+        std::list<dl::detect::result_t> &detect_results = detector.infer((uint16_t *)frame->buf, {(int)frame->height, (int)frame->width, 3});
         latency_detect.end();
         if (detect_results.size() > 0)
         {
-            draw_detection_result((uint16_t *)fb->buf, fb->height, fb->width, detect_results);
+            draw_detection_result((uint16_t *)frame->buf, frame->height, frame->width, detect_results);
         }
 #endif // CONFIG_DL_CAT_FACE
 
@@ -119,16 +140,23 @@ void task_dl(void *arg)
 #endif // CONFIG_DL_HUMAN_HAND
 #endif // CONFIG_DL_ENABLED
 
-        app_lcd_draw_bitmap((uint16_t *)fb->buf, fb->height, fb->width);
+        app_lcd_draw_bitmap((uint16_t *)frame->buf, frame->height, frame->width);
 
-        esp_camera_fb_return(fb);
+        esp_camera_fb_return(frame);
 
         latency_total.end();
         uint32_t frame_latency = latency_total.get_period() / 1000;
         uint32_t average_frame_latency = latency_total.get_average_period() / 1000;
-        ESP_LOGI("Frame Latency", "%4ums (%2.1ffps), Average: %4ums (%2.1ffps) | fetch: %4ums, decode: %4ums, detect: %4ums, recognize: %5ums",
+        ESP_LOGI("Frame Latency", "%4ums (%2.1ffps), Average: %4ums (%2.1ffps) | fetch: %4ums, "
+#if CONFIG_DL_MOVING_TARGET_DETECTION_ENABLED
+                                  "moving: %4uus, "
+#endif
+                                  "decode: %4ums, detect: %4ums, recognize: %5ums",
                  frame_latency, 1000.0 / frame_latency, average_frame_latency, 1000.0 / average_frame_latency,
                  latency_fetch.get_period() / 1000,
+#if CONFIG_DL_MOVING_TARGET_DETECTION_ENABLED
+                 latency_moving.get_period(),
+#endif
                  latency_decode.get_period() / 1000,
                  latency_detect.get_period() / 1000,
                  latency_recognize.get_period() / 1000);
diff --git a/examples/camera_terminal/README_cn.rst b/examples/camera_terminal/README_cn.rst
index 9a278c3..2df4e19 100644
--- a/examples/camera_terminal/README_cn.rst
+++ b/examples/camera_terminal/README_cn.rst
@@ -10,6 +10,7 @@ Camera with Terminal `[English] <./README.rst>`_
 
 + `CatFaceDetectMN03 <https://github.com/espressif/esp-dl/blob/master/include/model_zoo/cat_face_detect_mn03.hpp>`_
 
++ `移动侦测 <https://github.com/espressif/esp-dl/blob/master/include/image/dl_image.hpp#L322>`_
 
 
 运行示例
diff --git a/examples/camera_terminal/main/app_dl.cpp b/examples/camera_terminal/main/app_dl.cpp
index 179f553..e1b3506 100644
--- a/examples/camera_terminal/main/app_dl.cpp
+++ b/examples/camera_terminal/main/app_dl.cpp
@@ -38,6 +38,7 @@ void task_dl(void *arg)
     dl::tool::Latency latency_decode;
     dl::tool::Latency latency_detect;
     dl::tool::Latency latency_recognize;
+    dl::tool::Latency latency_moving;
 
     /* 1. Load configuration for detection */
 #if CONFIG_DL_HUMAN_FACE
@@ -66,32 +67,53 @@ void task_dl(void *arg)
         latency_decode.clear_period();
         latency_detect.clear_period();
         latency_recognize.clear_period();
+        latency_moving.clear_period();
 
         latency_total.start();
 
         latency_fetch.start();
-        camera_fb_t *fb = esp_camera_fb_get();
-        if (!fb)
+        camera_fb_t *frame = esp_camera_fb_get();
+        if (!frame)
         {
             ESP_LOGE(TAG, "Camera capture failed");
             continue;
         }
         latency_fetch.end();
 
-#if CONFIG_DL_ENABLED
-        latency_decode.start();
-        IMAGE_T *image_ptr = (IMAGE_T *)app_camera_decode(fb);
-        if (!image_ptr)
+#if CONFIG_DL_MOVING_TARGET_DETECTION_ENABLED && CONFIG_CAMERA_PIXEL_FORMAT_RGB565
+        camera_fb_t *frame2 = esp_camera_fb_get();
+        if (!frame2)
         {
-            esp_camera_fb_return(fb);
+            ESP_LOGE(TAG, "Camera capture failed");
             continue;
         }
-        int image_height = fb->height;
-        int image_width = fb->width;
-        pixformat_t image_format = fb->format;
+        latency_fetch.end();
+
+        latency_moving.start();
+        uint32_t moving_point_number = dl::image::get_moving_point_number((uint16_t *)frame->buf, (uint16_t *)frame2->buf, frame->height, frame->width, 8, 15);
+        latency_moving.end();
+        if (moving_point_number > 50)
+        {
+            ESP_LOGI("Moving Target", "Detected.");
+            dl::image::draw_filled_rectangle((uint16_t *)frame->buf, frame->height, frame->width, 0, 0, 10, 10);
+        }
+        esp_camera_fb_return(frame2);
+#endif
+
+#if CONFIG_DL_ENABLED
+        latency_decode.start();
+        IMAGE_T *image_ptr = (IMAGE_T *)app_camera_decode(frame);
+        if (!image_ptr)
+        {
+            esp_camera_fb_return(frame);
+            continue;
+        }
+        int image_height = frame->height;
+        int image_width = frame->width;
+        pixformat_t image_format = frame->format;
 
         if (image_format != PIXFORMAT_RGB565)
-            esp_camera_fb_return(fb);
+            esp_camera_fb_return(frame);
         latency_decode.end();
 
 #if CONFIG_DL_HUMAN_FACE
@@ -133,20 +155,27 @@ void task_dl(void *arg)
 #endif // CONFIG_DL_HUMAN_HAND
 
         if (image_format == PIXFORMAT_RGB565)
-            esp_camera_fb_return(fb);
+            esp_camera_fb_return(frame);
         else
             free(image_ptr);
 
 #else
-        esp_camera_fb_return(fb);
+        esp_camera_fb_return(frame);
 #endif // CONFIG_DL_ENABLED
 
         latency_total.end();
         uint32_t frame_latency = latency_total.get_period() / 1000;
         uint32_t average_frame_latency = latency_total.get_average_period() / 1000;
-        ESP_LOGI("Frame Latency", "%4ums (%2.1ffps), Average: %4ums (%2.1ffps) | fetch: %4ums, decode: %4ums, detect: %4ums, recognize: %5ums",
+        ESP_LOGI("Frame Latency", "%4ums (%2.1ffps), Average: %4ums (%2.1ffps) | fetch: %4ums, "
+#if CONFIG_DL_MOVING_TARGET_DETECTION_ENABLED && CONFIG_CAMERA_PIXEL_FORMAT_RGB565
+                                  "moving: %4uus, "
+#endif
+                                  "decode: %4ums, detect: %4ums, recognize: %5ums",
                  frame_latency, 1000.0 / frame_latency, average_frame_latency, 1000.0 / average_frame_latency,
                  latency_fetch.get_period() / 1000,
+#if CONFIG_DL_MOVING_TARGET_DETECTION_ENABLED && CONFIG_CAMERA_PIXEL_FORMAT_RGB565
+                 latency_moving.get_period(),
+#endif
                  latency_decode.get_period() / 1000,
                  latency_detect.get_period() / 1000,
                  latency_recognize.get_period() / 1000);
diff --git a/examples/camera_web_server/README_cn.rst b/examples/camera_web_server/README_cn.rst
index 0b046af..6cdf27f 100644
--- a/examples/camera_web_server/README_cn.rst
+++ b/examples/camera_web_server/README_cn.rst
@@ -10,6 +10,7 @@ Camera with Web Server `[English] <./README.rst>`_
 
 + `CatFaceDetectMN03 <https://github.com/espressif/esp-dl/blob/master/include/model_zoo/cat_face_detect_mn03.hpp>`_
 
++ `移动侦测 <https://github.com/espressif/esp-dl/blob/master/include/image/dl_image.hpp#L322>`_
 
 
 运行示例
diff --git a/examples/camera_web_server/main/app_httpd.cpp b/examples/camera_web_server/main/app_httpd.cpp
index 597aed6..80c84af 100644
--- a/examples/camera_web_server/main/app_httpd.cpp
+++ b/examples/camera_web_server/main/app_httpd.cpp
@@ -24,6 +24,7 @@
 #include "app_common.hpp"
 #include "sdkconfig.h"
 #include "dl_tool.hpp"
+#include "dl_image.hpp"
 
 #if defined(ARDUINO_ARCH_ESP32) && defined(CONFIG_ARDUHAL_ESP_LOG)
 #include "esp32-hal-log.h"
@@ -92,19 +93,19 @@ static esp_err_t capture_handler(httpd_req_t *req)
     dl::tool::Latency latency_total;
     latency_total.start();
 
-    camera_fb_t *fb = NULL;
+    camera_fb_t *frame = NULL;
     esp_err_t res = ESP_OK;
 
 #ifdef CONFIG_LED_ILLUMINATOR_ENABLED
     app_led_duty(led_duty);
     vTaskDelay(150 / portTICK_PERIOD_MS); // The LED needs to be turned on ~150ms before the call to esp_camera_fb_get()
-    fb = esp_camera_fb_get();             // or it won't be visible in the frame. A better way to do this is needed.
+    frame = esp_camera_fb_get();             // or it won't be visible in the frame. A better way to do this is needed.
     app_led_duty(0);
 #else
-    fb = esp_camera_fb_get();
+    frame = esp_camera_fb_get();
 #endif
 
-    if (!fb)
+    if (!frame)
     {
         ESP_LOGE(TAG, "Camera capture failed");
         httpd_resp_send_500(req);
@@ -116,23 +117,23 @@ static esp_err_t capture_handler(httpd_req_t *req)
     httpd_resp_set_hdr(req, "Access-Control-Allow-Origin", "*");
 
     char ts[32];
-    snprintf(ts, 32, "%ld.%06ld", fb->timestamp.tv_sec, fb->timestamp.tv_usec);
+    snprintf(ts, 32, "%ld.%06ld", frame->timestamp.tv_sec, frame->timestamp.tv_usec);
     httpd_resp_set_hdr(req, "X-Timestamp", (const char *)ts);
 
     size_t fb_len = 0;
-    if (fb->format == PIXFORMAT_JPEG)
+    if (frame->format == PIXFORMAT_JPEG)
     {
-        fb_len = fb->len;
-        res = httpd_resp_send(req, (const char *)fb->buf, fb->len);
+        fb_len = frame->len;
+        res = httpd_resp_send(req, (const char *)frame->buf, frame->len);
     }
     else
     {
         jpg_chunking_t jchunk = {req, 0};
-        res = frame2jpg_cb(fb, 80, jpg_encode_stream, &jchunk) ? ESP_OK : ESP_FAIL;
+        res = frame2jpg_cb(frame, 80, jpg_encode_stream, &jchunk) ? ESP_OK : ESP_FAIL;
         httpd_resp_send_chunk(req, NULL, 0);
         fb_len = jchunk.len;
     }
-    esp_camera_fb_return(fb);
+    esp_camera_fb_return(frame);
     latency_total.end();
 
     ESP_LOGI(TAG, "JPG: %uB %ums", (uint32_t)(fb_len), latency_total.get_period() / 1000);
@@ -141,7 +142,7 @@ static esp_err_t capture_handler(httpd_req_t *req)
 
 static esp_err_t stream_handler(httpd_req_t *req)
 {
-    camera_fb_t *fb = NULL;
+    camera_fb_t *frame = NULL;
     struct timeval _timestamp;
     esp_err_t res = ESP_OK;
     size_t _jpg_buf_len = 0;
@@ -157,6 +158,7 @@ static esp_err_t stream_handler(httpd_req_t *req)
     dl::tool::Latency latency_encode;
     dl::tool::Latency latency_detect;
     dl::tool::Latency latency_recognize;
+    dl::tool::Latency latency_moving;
 
 #if CONFIG_DL_HUMAN_FACE
 #if CONFIG_DL_HUMAN_FACE_DETECTION_S1_MSR01
@@ -198,6 +200,7 @@ static esp_err_t stream_handler(httpd_req_t *req)
 
         latency_detect.clear_period();
         latency_recognize.clear_period();
+        latency_moving.clear_period();
 
         image_ptr = NULL;
         is_detected = false;
@@ -207,22 +210,42 @@ static esp_err_t stream_handler(httpd_req_t *req)
             latency_total.start();
 
             latency_fetch.start();
-            fb = esp_camera_fb_get();
-            if (!fb)
+            frame = esp_camera_fb_get();
+            if (!frame)
             {
                 ESP_LOGE(TAG, "Camera capture failed");
                 res = ESP_FAIL;
                 break;
             }
-            _timestamp.tv_sec = fb->timestamp.tv_sec;
-            _timestamp.tv_usec = fb->timestamp.tv_usec;
+            _timestamp.tv_sec = frame->timestamp.tv_sec;
+            _timestamp.tv_usec = frame->timestamp.tv_usec;
             latency_fetch.end();
 
+#if CONFIG_DL_MOVING_TARGET_DETECTION_ENABLED && CONFIG_CAMERA_PIXEL_FORMAT_RGB565
+            camera_fb_t *frame2 = esp_camera_fb_get();
+            if (!frame2)
+            {
+                ESP_LOGE(TAG, "Camera capture failed");
+                continue;
+            }
+            latency_fetch.end();
+
+            latency_moving.start();
+            uint32_t moving_point_number = dl::image::get_moving_point_number((uint16_t *)frame->buf, (uint16_t *)frame2->buf, frame->height, frame->width, 8, 15);
+            latency_moving.end();
+            if (moving_point_number > 50)
+            {
+                ESP_LOGI("Moving Target", "Detected.");
+                dl::image::draw_filled_rectangle((uint16_t *)frame->buf, frame->height, frame->width, 0, 0, 10, 10);
+            }
+            esp_camera_fb_return(frame2);
+#endif
+
 #if CONFIG_DL_ENABLED
-            if (detection_enabled && fb->width < 400)
+            if (detection_enabled && frame->width < 400)
             {
                 latency_decode.start();
-                image_ptr = (IMAGE_T *)app_camera_decode(fb);
+                image_ptr = (IMAGE_T *)app_camera_decode(frame);
                 if (!image_ptr)
                 {
                     ESP_LOGE(TAG, "Frame decode failed");
@@ -234,17 +257,17 @@ static esp_err_t stream_handler(httpd_req_t *req)
 #if CONFIG_DL_HUMAN_FACE
                 latency_detect.start();
 #if CONFIG_DL_HUMAN_FACE_DETECTION_S2_ENABLED
-                std::list<dl::detect::result_t> &detect_candidates = detector.infer(image_ptr, {(int)fb->height, (int)fb->width, 3});
-                std::list<dl::detect::result_t> &detect_results = detector2.infer(image_ptr, {(int)fb->height, (int)fb->width, 3}, detect_candidates);
+                std::list<dl::detect::result_t> &detect_candidates = detector.infer(image_ptr, {(int)frame->height, (int)frame->width, 3});
+                std::list<dl::detect::result_t> &detect_results = detector2.infer(image_ptr, {(int)frame->height, (int)frame->width, 3}, detect_candidates);
 #else
-                std::list<dl::detect::result_t> &detect_results = detector.infer(image_ptr, {(int)fb->height, (int)fb->width, 3});
+                std::list<dl::detect::result_t> &detect_results = detector.infer(image_ptr, {(int)frame->height, (int)frame->width, 3});
 #endif
                 latency_detect.end();
 
                 if (detect_results.size() > 0)
                 {
                     is_detected = true;
-                    draw_detection_result(image_ptr, fb->height, fb->width, detect_results);
+                    draw_detection_result(image_ptr, frame->height, frame->width, detect_results);
 
                     latency_recognize.start();
                     if (recognition_enabled)
@@ -259,12 +282,12 @@ static esp_err_t stream_handler(httpd_req_t *req)
 
 #if CONFIG_DL_CAT_FACE
                 latency_detect.start();
-                std::list<dl::detect::result_t> &detect_results = detector.infer(image_ptr, {(int)fb->height, (int)fb->width, 3});
+                std::list<dl::detect::result_t> &detect_results = detector.infer(image_ptr, {(int)frame->height, (int)frame->width, 3});
                 latency_detect.end();
                 if (detect_results.size() > 0)
                 {
                     is_detected = true;
-                    draw_detection_result(image_ptr, fb->height, fb->width, detect_results);
+                    draw_detection_result(image_ptr, frame->height, frame->width, detect_results);
                 }
 #endif // CONFIG_DL_CAT_FACE
 
@@ -279,38 +302,38 @@ static esp_err_t stream_handler(httpd_req_t *req)
             latency_encode.start();
             if (is_detected)
             {
-                if (!fmt2jpg((uint8_t *)image_ptr, fb->width * fb->height * 3, fb->width, fb->height, CAMERA_PIXEL_FORMAT, 90, &_jpg_buf, &_jpg_buf_len))
+                if (!fmt2jpg((uint8_t *)image_ptr, frame->width * frame->height * 3, frame->width, frame->height, CAMERA_PIXEL_FORMAT, 90, &_jpg_buf, &_jpg_buf_len))
                 {
                     ESP_LOGE(TAG, "fmt2jpg failed");
                     res = ESP_FAIL;
                     break;
                 }
 
-                if (fb->format == PIXFORMAT_RGB565)
+                if (frame->format == PIXFORMAT_RGB565)
                     image_ptr = NULL;
 
-                esp_camera_fb_return(fb);
-                fb = NULL;
+                esp_camera_fb_return(frame);
+                frame = NULL;
             }
-            else if (fb->format == PIXFORMAT_JPEG)
+            else if (frame->format == PIXFORMAT_JPEG)
             {
-                _jpg_buf = fb->buf;
-                _jpg_buf_len = fb->len;
+                _jpg_buf = frame->buf;
+                _jpg_buf_len = frame->len;
             }
             else
             {
-                if (!frame2jpg(fb, 80, &_jpg_buf, &_jpg_buf_len))
+                if (!frame2jpg(frame, 80, &_jpg_buf, &_jpg_buf_len))
                 {
                     ESP_LOGE(TAG, "JPEG compression failed");
                     res = ESP_FAIL;
                     break;
                 }
 
-                if (fb->format == PIXFORMAT_RGB565)
+                if (frame->format == PIXFORMAT_RGB565)
                     image_ptr = NULL;
 
-                esp_camera_fb_return(fb);
-                fb = NULL;
+                esp_camera_fb_return(frame);
+                frame = NULL;
             }
             if (image_ptr)
                 free(image_ptr);
@@ -334,10 +357,10 @@ static esp_err_t stream_handler(httpd_req_t *req)
             res = httpd_resp_send_chunk(req, (const char *)_jpg_buf, _jpg_buf_len);
         }
 
-        if (fb)
+        if (frame)
         {
-            esp_camera_fb_return(fb);
-            fb = NULL;
+            esp_camera_fb_return(frame);
+            frame = NULL;
             _jpg_buf = NULL;
         }
         else if (_jpg_buf)
@@ -355,9 +378,16 @@ static esp_err_t stream_handler(httpd_req_t *req)
         uint32_t frame_latency = latency_total.get_period() / 1000;
         uint32_t average_frame_latency = latency_total.get_average_period() / 1000;
 
-        ESP_LOGI("Frame Latency", "%4ums (%2.1ffps), Average: %4ums (%2.1ffps) | fetch %4ums, decode: %4ums, detect: %4ums, recognize: %5ums, encode: %4ums",
+        ESP_LOGI("Frame Latency", "%4ums (%2.1ffps), Average: %4ums (%2.1ffps) | fetch %4ums, "
+#if CONFIG_DL_MOVING_TARGET_DETECTION_ENABLED && CONFIG_CAMERA_PIXEL_FORMAT_RGB565
+                                  "moving: %4uus, "
+#endif
+                                  "decode: %4ums, detect: %4ums, recognize: %5ums, encode: %4ums",
                  frame_latency, 1000.0 / frame_latency, average_frame_latency, 1000.0 / average_frame_latency,
                  latency_fetch.get_period() / 1000,
+#if CONFIG_DL_MOVING_TARGET_DETECTION_ENABLED && CONFIG_CAMERA_PIXEL_FORMAT_RGB565
+                 latency_moving.get_period(),
+#endif
                  latency_decode.get_period() / 1000,
                  latency_detect.get_period() / 1000,
                  latency_recognize.get_period() / 1000,