From 32a217eda7c1452e724cc23905eb5f2adc5850d6 Mon Sep 17 00:00:00 2001
From: Brenno <brenno.caldato@gmail.com>
Date: Fri, 22 Jan 2021 19:28:16 -0300
Subject: [PATCH] Added support to attach clip to detected object
 (ObjectDetection) effect

This feature let's the user attach a clip to an object detected by the Object Detection effect, in the same way it is done with the Tracker Effect.
---
 src/CVObjectDetection.cpp                 | 21 ++++--
 src/CVObjectDetection.h                   | 10 ++-
 src/Timeline.cpp                          | 38 +++++++++++
 src/Timeline.h                            |  2 +
 src/TrackedObjectBBox.cpp                 |  2 +-
 src/TrackedObjectBBox.h                   |  2 +-
 src/TrackedObjectBase.h                   |  5 +-
 src/effects/ObjectDetection.cpp           | 80 ++++++++++++++++++-----
 src/effects/ObjectDetection.h             | 12 +++-
 src/protobuf_messages/objdetectdata.proto |  1 +
 src/sort_filter/sort.hpp                  |  2 +-
 11 files changed, 148 insertions(+), 27 deletions(-)
diff --git a/src/CVObjectDetection.cpp b/src/CVObjectDetection.cpp
index c391197a..b262f8c5 100644
--- a/src/CVObjectDetection.cpp
+++ b/src/CVObjectDetection.cpp
@@ -101,7 +101,6 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start,
         // Update progress
         processingController->SetProgress(uint(100*(frame_number-start)/(end-start)));
 
-        // std::cout<<"Frame: "<<frame_number<<"\n";
     }
 }
 
@@ -134,6 +133,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
     std::vector<int> classIds;
     std::vector<float> confidences;
     std::vector<cv::Rect> boxes;
+    std::vector<int> objectIds;
 
     for (size_t i = 0; i < outs.size(); ++i)
     {
@@ -176,13 +176,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
     sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);
 
     // Clear data vectors
-    boxes.clear(); confidences.clear(); classIds.clear();
+    boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
     // Get SORT predicted boxes
     for(auto TBox : sort.frameTrackingResult){
         if(TBox.frame == frameId){
             boxes.push_back(TBox.box);
             confidences.push_back(TBox.confidence);
             classIds.push_back(TBox.classId);
+            objectIds.push_back(TBox.id);
         }
     }
 
@@ -198,12 +199,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
                         boxes.erase(boxes.begin() + j);
                         classIds.erase(classIds.begin() + j);
                         confidences.erase(confidences.begin() + j);
+                        objectIds.erase(objectIds.begin() + j);
                         break;
                     }
                     else{
                         boxes.erase(boxes.begin() + i);
                         classIds.erase(classIds.begin() + i);
                         confidences.erase(confidences.begin() + i);
+                        objectIds.erase(objectIds.begin() + i);
                         i = 0;
                         break;
                     }
@@ -222,12 +225,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
                         boxes.erase(boxes.begin() + j);
                         classIds.erase(classIds.begin() + j);
                         confidences.erase(confidences.begin() + j);
+                        objectIds.erase(objectIds.begin() + j);
                         break;
                     }
                     else{
                         boxes.erase(boxes.begin() + i);
                         classIds.erase(classIds.begin() + i);
                         confidences.erase(confidences.begin() + i);
+                        objectIds.erase(objectIds.begin() + i);
                         i = 0;
                         break;
                     }
@@ -247,7 +252,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector
         normalized_boxes.push_back(normalized_box);
     }
 
-    detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId);
+    detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds);
 }
 
 // Compute IOU between 2 boxes
@@ -355,6 +360,7 @@ void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CV
         box->set_h(dData.boxes.at(i).height);
         box->set_classid(dData.classIds.at(i));
         box->set_confidence(dData.confidences.at(i));
+        box->set_objectid(dData.objectIds.at(i));
 
     }
 }
@@ -457,7 +463,10 @@ bool CVObjectDetection::_LoadObjDetectdData(){
         const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
 
         // Construct data vectors related to detections in the current frame
-        std::vector<int> classIds; std::vector<float> confidences; std::vector<cv::Rect_<float>> boxes;
+        std::vector<int> classIds; 
+        std::vector<float> confidences; 
+        std::vector<cv::Rect_<float>> boxes;
+        std::vector<int> objectIds;
 
         for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
             // Get bounding box coordinates
@@ -468,13 +477,15 @@ bool CVObjectDetection::_LoadObjDetectdData(){
 
             // Get class Id (which will be assign to a class name) and prediction confidence
             int classId = pBox.Get(i).classid(); float confidence = pBox.Get(i).confidence();
+            // Get object Id
+            int objectId = pBox.Get(i).objectid();
 
             // Push back data into vectors
             boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
         }
 
         // Assign data to object detector map
-        detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id);
+        detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds);
     }
 
     // Show the time stamp from the last update in object detector data file
diff --git a/src/CVObjectDetection.h b/src/CVObjectDetection.h
index ae3dbf7d..3ec80217 100644
--- a/src/CVObjectDetection.h
+++ b/src/CVObjectDetection.h
@@ -49,16 +49,24 @@ namespace openshot
     // Stores the detected object bounding boxes and its properties.
     struct CVDetectionData{
         CVDetectionData(){}
-        CVDetectionData(std::vector<int> _classIds, std::vector<float> _confidences, std::vector<cv::Rect_<float>> _boxes, size_t _frameId){
+        CVDetectionData(
+            std::vector<int> _classIds, 
+            std::vector<float> _confidences,
+            std::vector<cv::Rect_<float>> _boxes, 
+            size_t _frameId,
+            std::vector<int> _objectIds)
+        {
             classIds = _classIds;
             confidences = _confidences;
             boxes = _boxes;
             frameId = _frameId;
+            objectIds = _objectIds;
         }
         size_t frameId;
         std::vector<int> classIds;
         std::vector<float> confidences;
         std::vector<cv::Rect_<float>> boxes;
+        std::vector<int> objectIds;
     };
 
     /**
diff --git a/src/Timeline.cpp b/src/Timeline.cpp
index de69bcce..05b9bead 100644
--- a/src/Timeline.cpp
+++ b/src/Timeline.cpp
@@ -290,6 +290,44 @@ std::list<std::string> Timeline::GetTrackedObjectsIds() const{
 	return trackedObjects_ids;
 }
 
+std::string Timeline::GetTrackedObjectValues(std::string id) const {
+
+	// Initialize the JSON object
+	Json::Value trackedObjectJson;
+
+	// Search for the tracked object on the map
+	auto iterator = tracked_objects.find(id);
+
+	if (iterator != tracked_objects.end())
+	{
+		// Id found, Get the object pointer and cast it as a TrackedObjectBBox
+		std::shared_ptr<TrackedObjectBBox> trackedObject = std::static_pointer_cast<TrackedObjectBBox>(iterator->second);
+
+		// Get the trackedObject values for it's first frame
+		auto boxes = trackedObject->BoxVec;
+		auto firstBox = boxes.begin()->second;
+		float x1 = firstBox.cx - (firstBox.width/2);
+		float y1 = firstBox.cy - (firstBox.height/2);
+		float x2 = firstBox.cx + (firstBox.width/2);
+		float y2 = firstBox.cy + (firstBox.height/2);
+
+		trackedObjectJson["x1"] = x1;
+		trackedObjectJson["y1"] = y1;
+		trackedObjectJson["x2"] = x2;
+		trackedObjectJson["y2"] = y2;
+
+	}
+	else {
+		// Id not found, return all 0 values
+		trackedObjectJson["x1"] = 0;
+		trackedObjectJson["y1"] = 0;
+		trackedObjectJson["x2"] = 0;
+		trackedObjectJson["y2"] = 0;
+	}	
+
+	return trackedObjectJson.toStyledString();
+}
+
 // Add an openshot::Clip to the timeline
 void Timeline::AddClip(Clip* clip)
 {
diff --git a/src/Timeline.h b/src/Timeline.h
index 69cebc8a..e9b201b0 100644
--- a/src/Timeline.h
+++ b/src/Timeline.h
@@ -250,6 +250,8 @@ namespace openshot {
 		std::shared_ptr<openshot::TrackedObjectBase> GetTrackedObject(std::string id) const;
 		/// Return the ID's of the tracked objects as a list of strings
 		std::list<std::string> GetTrackedObjectsIds() const;
+		/// Return the first trackedObject's properties as a JSON string
+		std::string GetTrackedObjectValues(std::string id) const;
 
 		/// @brief Add an openshot::Clip to the timeline
 		/// @param clip Add an openshot::Clip to the timeline. A clip can contain any type of Reader.
diff --git a/src/TrackedObjectBBox.cpp b/src/TrackedObjectBBox.cpp
index 40ba5e21..75f96094 100644
--- a/src/TrackedObjectBBox.cpp
+++ b/src/TrackedObjectBBox.cpp
@@ -230,7 +230,7 @@ bool TrackedObjectBBox::LoadBoxData(std::string inputFilePath)
     // Read the existing tracker message.
     fstream input(inputFilePath, ios::in | ios::binary);
 
-    //Check if it was able to read the protobuf data
+    // Check if it was able to read the protobuf data
     if (!bboxMessage.ParseFromIstream(&input))
     {
         cerr << "Failed to parse protobuf message." << endl;
diff --git a/src/TrackedObjectBBox.h b/src/TrackedObjectBBox.h
index 4a5a34ad..a8deadf6 100644
--- a/src/TrackedObjectBBox.h
+++ b/src/TrackedObjectBBox.h
@@ -174,7 +174,7 @@ namespace openshot
         TrackedObjectBBox();
 
         /// Add a BBox to the BoxVec map
-        void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle);
+        void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override;
         
         /// Update object's BaseFps
         void SetBaseFPS(Fraction fps);
diff --git a/src/TrackedObjectBase.h b/src/TrackedObjectBase.h
index 683b0b3f..0a336cf2 100644
--- a/src/TrackedObjectBase.h
+++ b/src/TrackedObjectBase.h
@@ -83,7 +83,10 @@ namespace openshot {
 		virtual std::map<std::string, float> GetBoxValues(int64_t frame_number) const { std::map<std::string, float> ret; return ret; };
         /// Return the main properties of the tracked object's parent clip
         virtual std::map<std::string, float> GetParentClipProperties(int64_t frame_number) const { std::map<std::string, float> ret; return ret; }
-    
+		/// Add a bounding box to the tracked object's BoxVec map
+		virtual void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) { return; };
+
+
 		/// Get and Set JSON methods
         virtual std::string Json() const = 0;                  ///< Generate JSON string of this object
         virtual Json::Value JsonValue() const = 0;             ///< Generate Json::Value for this object
diff --git a/src/effects/ObjectDetection.cpp b/src/effects/ObjectDetection.cpp
index 929c9039..8036e9fc 100644
--- a/src/effects/ObjectDetection.cpp
+++ b/src/effects/ObjectDetection.cpp
@@ -28,12 +28,15 @@
  * along with OpenShot Library. If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <string>
+
 #include "effects/ObjectDetection.h"
 #include "effects/Tracker.h"
 
 using namespace std;
 using namespace openshot;
 
+
 /// Blank constructor, useful when using Json to load the effect properties
 ObjectDetection::ObjectDetection(std::string clipObDetectDataPath)
 {
@@ -64,7 +67,7 @@ void ObjectDetection::init_effect_details()
 	info.description = "Detect objects through the video.";
 	info.has_audio = false;
 	info.has_video = true;
-    info.has_tracked_object = false;
+    info.has_tracked_object = true;
 }
 
 // This method is required for all derived classes of EffectBase, and returns a
@@ -92,7 +95,7 @@ std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, i
                            (int)(bb_nrml.width*fw),
                            (int)(bb_nrml.height*fh));
             drawPred(detections.classIds.at(i), detections.confidences.at(i),
-                     box, cv_image);
+                     box, cv_image, detections.objectIds.at(i));
         }
     }
 
@@ -103,7 +106,7 @@ std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, i
 	return frame;
 }
 
-void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame)
+void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame, int objectNumber)
 {
 
     //Draw a rectangle displaying the bounding box
@@ -133,29 +136,32 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
     // Create tracker message
     pb_objdetect::ObjDetect objMessage;
 
-    {
-        // Read the existing tracker message.
-        fstream input(inputFilePath, ios::in | ios::binary);
-        if (!objMessage.ParseFromIstream(&input)) {
-            cerr << "Failed to parse protobuf message." << endl;
-            return false;
-        }
+    
+    // Read the existing tracker message.
+    fstream input(inputFilePath, ios::in | ios::binary);
+    if (!objMessage.ParseFromIstream(&input)) {
+        cerr << "Failed to parse protobuf message." << endl;
+        return false;
     }
+    
 
-    // Make sure classNames and detectionsData are empty
+    // Make sure classNames, detectionsData and trackedObjects are empty
     classNames.clear();
     detectionsData.clear();
+    trackedObjects.clear();
 
     // Seed to generate same random numbers
     std::srand(1);
     // Get all classes names and assign a color to them
-    for(int i = 0; i < objMessage.classnames_size(); i++){
+    for(int i = 0; i < objMessage.classnames_size(); i++)
+    {
         classNames.push_back(objMessage.classnames(i));
         classesColor.push_back(cv::Scalar(std::rand()%205 + 50, std::rand()%205 + 50, std::rand()%205 + 50));
     }
 
     // Iterate over all frames of the saved message
-    for (size_t i = 0; i < objMessage.frame_size(); i++) {
+    for (size_t i = 0; i < objMessage.frame_size(); i++)
+    {
         // Create protobuf message reader
         const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
 
@@ -169,8 +175,11 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
         std::vector<int> classIds;
         std::vector<float> confidences;
         std::vector<cv::Rect_<float>> boxes;
+        std::vector<int> objectIds;
 
-        for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
+        // Iterate through the detected objects
+        for(int i = 0; i < pbFrameData.bounding_box_size(); i++)
+        {
             // Get bounding box coordinates
             float x = pBox.Get(i).x();
             float y = pBox.Get(i).y();
@@ -180,6 +189,26 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
             int classId = pBox.Get(i).classid();
             // Get prediction confidence
             float confidence = pBox.Get(i).confidence();
+            
+            // Get the object Id
+            int objectId = pBox.Get(i).objectid();
+
+            // Search for the object id on trackedObjects map
+            auto trackedObject = trackedObjects.find(objectId);
+            // Check if object already exists on the map
+            if (trackedObject != trackedObjects.end())
+            {
+                // Add a new BBox to it
+                trackedObject->second->AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
+            } 
+            else
+            {
+                // There is no tracked object with that id, so insert a new one
+                TrackedObjectBBox trackedObj;
+                trackedObj.AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
+	            std::shared_ptr<TrackedObjectBBox> trackedObjPtr = std::make_shared<TrackedObjectBBox>(trackedObj);
+                trackedObjects.insert({objectId, trackedObjPtr});
+            }
 
             // Create OpenCV rectangle with the bouding box info
             cv::Rect_<float> box(x, y, w, h);
@@ -188,10 +217,11 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
             boxes.push_back(box);
             classIds.push_back(classId);
             confidences.push_back(confidence);
+            objectIds.push_back(objectId);
         }
 
         // Assign data to object detector map
-        detectionsData[id] = DetectionData(classIds, confidences, boxes, id);
+        detectionsData[id] = DetectionData(classIds, confidences, boxes, id, objectIds);
     }
 
     // Delete all global objects allocated by libprotobuf.
@@ -226,6 +256,12 @@ Json::Value ObjectDetection::JsonValue() const {
 	Json::Value root = EffectBase::JsonValue(); // get parent properties
 	root["type"] = info.class_name;
 	root["protobuf_data_path"] = protobuf_data_path;
+    
+    // Add trackedObjects IDs to JSON
+	for (auto const& trackedObject : trackedObjects){
+		// Save the trackedObject Id on root
+        root["box_id"+to_string(trackedObject.first)] = trackedObject.second->Id();
+	}
 
 	// return JsonValue
 	return root;
@@ -262,6 +298,12 @@ void ObjectDetection::SetJsonValue(const Json::Value root) {
 			protobuf_data_path = "";
 		}
 	}
+
+    for (auto const& trackedObject : trackedObjects){
+        Json::Value trackedObjectJSON;
+        trackedObjectJSON["box_id"] = root["box_id"+to_string(trackedObject.first)];
+		trackedObject.second->SetJsonValue(trackedObjectJSON);
+	}
 }
 
 // Get all properties for a specific frame
@@ -269,6 +311,14 @@ std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const {
 
 	// Generate JSON properties list
 	Json::Value root;
+
+    // Add trackedObjects IDs to JSON
+	for (auto const& trackedObject : trackedObjects){
+		// Save the trackedObject Id on root
+        Json::Value trackedObjectJSON = trackedObject.second->PropertiesJSON(requested_frame);
+        root["box_id"+to_string(trackedObject.first)] = trackedObjectJSON["box_id"];
+	}
+
 	root["id"] = add_property_json("ID", 0.0, "string", Id(), NULL, -1, -1, true, requested_frame);
 	root["position"] = add_property_json("Position", Position(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame);
 	root["layer"] = add_property_json("Track", Layer(), "int", "", NULL, 0, 20, false, requested_frame);
diff --git a/src/effects/ObjectDetection.h b/src/effects/ObjectDetection.h
index 31518c86..64df91c3 100644
--- a/src/effects/ObjectDetection.h
+++ b/src/effects/ObjectDetection.h
@@ -45,16 +45,24 @@
 // Struct that stores the detected bounding boxes for all the clip frames
 struct DetectionData{
     DetectionData(){}
-    DetectionData(std::vector<int> _classIds, std::vector<float> _confidences, std::vector<cv::Rect_<float>> _boxes, size_t _frameId){
+    DetectionData(
+		std::vector<int> _classIds,
+		std::vector<float> _confidences,
+		std::vector<cv::Rect_<float>> _boxes,
+		size_t _frameId,
+		std::vector<int> _objectIds)
+	{
         classIds = _classIds;
         confidences = _confidences;
         boxes = _boxes;
         frameId = _frameId;
+		objectIds = _objectIds;
     }
     size_t frameId;
     std::vector<int> classIds;
     std::vector<float> confidences;
     std::vector<cv::Rect_<float>> boxes;
+	std::vector<int> objectIds;
 };
 
 namespace openshot
@@ -74,7 +82,7 @@ namespace openshot
 		/// Init effect settings
 		void init_effect_details();
 
-		void drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame);
+		void drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame, int objectNumber);
 
 	public:
 
diff --git a/src/protobuf_messages/objdetectdata.proto b/src/protobuf_messages/objdetectdata.proto
index 49ad94af..272ffb4f 100644
--- a/src/protobuf_messages/objdetectdata.proto
+++ b/src/protobuf_messages/objdetectdata.proto
@@ -17,6 +17,7 @@ message Frame {
       float h = 4;
       int32 classId = 5;
       float confidence = 6;
+      int32 objectId = 7;
   }
 
   repeated Box bounding_box = 2;
diff --git a/src/sort_filter/sort.hpp b/src/sort_filter/sort.hpp
index 295b9d56..086af1f1 100644
--- a/src/sort_filter/sort.hpp
+++ b/src/sort_filter/sort.hpp
@@ -39,7 +39,7 @@ public:
 	double GetCentroidsDistance(cv::Rect_<float> bb_test, cv::Rect_<float> bb_gt);
 	std::vector<KalmanTracker> trackers;
 
-	double max_centroid_dist_norm = 0.15;
+	double max_centroid_dist_norm = 0.05;
 
 	std::vector<cv::Rect_<float>> predictedBoxes;
 	std::vector<std::vector<double>> centroid_dist_matrix;