From 32a217eda7c1452e724cc23905eb5f2adc5850d6 Mon Sep 17 00:00:00 2001 From: Brenno Date: Fri, 22 Jan 2021 19:28:16 -0300 Subject: [PATCH] Added support to attach clip to detected object (ObjectDetection) effect This feature let's the user attach a clip to an object detected by the Object Detection effect, in the same way it is done with the Tracker Effect. --- src/CVObjectDetection.cpp | 21 ++++-- src/CVObjectDetection.h | 10 ++- src/Timeline.cpp | 38 +++++++++++ src/Timeline.h | 2 + src/TrackedObjectBBox.cpp | 2 +- src/TrackedObjectBBox.h | 2 +- src/TrackedObjectBase.h | 5 +- src/effects/ObjectDetection.cpp | 80 ++++++++++++++++++----- src/effects/ObjectDetection.h | 12 +++- src/protobuf_messages/objdetectdata.proto | 1 + src/sort_filter/sort.hpp | 2 +- 11 files changed, 148 insertions(+), 27 deletions(-) diff --git a/src/CVObjectDetection.cpp b/src/CVObjectDetection.cpp index c391197a..b262f8c5 100644 --- a/src/CVObjectDetection.cpp +++ b/src/CVObjectDetection.cpp @@ -101,7 +101,6 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, // Update progress processingController->SetProgress(uint(100*(frame_number-start)/(end-start))); - // std::cout<<"Frame: "< classIds; std::vector confidences; std::vector boxes; + std::vector objectIds; for (size_t i = 0; i < outs.size(); ++i) { @@ -176,13 +176,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds); // Clear data vectors - boxes.clear(); confidences.clear(); classIds.clear(); + boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear(); // Get SORT predicted boxes for(auto TBox : sort.frameTrackingResult){ if(TBox.frame == frameId){ boxes.push_back(TBox.box); confidences.push_back(TBox.confidence); classIds.push_back(TBox.classId); + objectIds.push_back(TBox.id); } } @@ -198,12 +199,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector boxes.erase(boxes.begin() + j); classIds.erase(classIds.begin() + j); confidences.erase(confidences.begin() + j); + objectIds.erase(objectIds.begin() + j); break; } else{ boxes.erase(boxes.begin() + i); classIds.erase(classIds.begin() + i); confidences.erase(confidences.begin() + i); + objectIds.erase(objectIds.begin() + i); i = 0; break; } @@ -222,12 +225,14 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector boxes.erase(boxes.begin() + j); classIds.erase(classIds.begin() + j); confidences.erase(confidences.begin() + j); + objectIds.erase(objectIds.begin() + j); break; } else{ boxes.erase(boxes.begin() + i); classIds.erase(classIds.begin() + i); confidences.erase(confidences.begin() + i); + objectIds.erase(objectIds.begin() + i); i = 0; break; } @@ -247,7 +252,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector normalized_boxes.push_back(normalized_box); } - detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId); + detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds); } // Compute IOU between 2 boxes @@ -355,6 +360,7 @@ void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CV box->set_h(dData.boxes.at(i).height); box->set_classid(dData.classIds.at(i)); box->set_confidence(dData.confidences.at(i)); + box->set_objectid(dData.objectIds.at(i)); } } @@ -457,7 +463,10 @@ bool CVObjectDetection::_LoadObjDetectdData(){ const google::protobuf::RepeatedPtrField &pBox = pbFrameData.bounding_box(); // Construct data vectors related to detections in the current frame - std::vector classIds; std::vector confidences; std::vector> boxes; + std::vector classIds; + std::vector confidences; + std::vector> boxes; + std::vector objectIds; for(int i = 0; i < pbFrameData.bounding_box_size(); i++){ // Get bounding box coordinates @@ -468,13 +477,15 @@ bool CVObjectDetection::_LoadObjDetectdData(){ // Get class Id (which will be assign to a class name) and prediction confidence int classId = pBox.Get(i).classid(); float confidence = pBox.Get(i).confidence(); + // Get object Id + int objectId = pBox.Get(i).objectid(); // Push back data into vectors boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence); } // Assign data to object detector map - detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id); + detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds); } // Show the time stamp from the last update in object detector data file diff --git a/src/CVObjectDetection.h b/src/CVObjectDetection.h index ae3dbf7d..3ec80217 100644 --- a/src/CVObjectDetection.h +++ b/src/CVObjectDetection.h @@ -49,16 +49,24 @@ namespace openshot // Stores the detected object bounding boxes and its properties. struct CVDetectionData{ CVDetectionData(){} - CVDetectionData(std::vector _classIds, std::vector _confidences, std::vector> _boxes, size_t _frameId){ + CVDetectionData( + std::vector _classIds, + std::vector _confidences, + std::vector> _boxes, + size_t _frameId, + std::vector _objectIds) + { classIds = _classIds; confidences = _confidences; boxes = _boxes; frameId = _frameId; + objectIds = _objectIds; } size_t frameId; std::vector classIds; std::vector confidences; std::vector> boxes; + std::vector objectIds; }; /** diff --git a/src/Timeline.cpp b/src/Timeline.cpp index de69bcce..05b9bead 100644 --- a/src/Timeline.cpp +++ b/src/Timeline.cpp @@ -290,6 +290,44 @@ std::list Timeline::GetTrackedObjectsIds() const{ return trackedObjects_ids; } +std::string Timeline::GetTrackedObjectValues(std::string id) const { + + // Initialize the JSON object + Json::Value trackedObjectJson; + + // Search for the tracked object on the map + auto iterator = tracked_objects.find(id); + + if (iterator != tracked_objects.end()) + { + // Id found, Get the object pointer and cast it as a TrackedObjectBBox + std::shared_ptr trackedObject = std::static_pointer_cast(iterator->second); + + // Get the trackedObject values for it's first frame + auto boxes = trackedObject->BoxVec; + auto firstBox = boxes.begin()->second; + float x1 = firstBox.cx - (firstBox.width/2); + float y1 = firstBox.cy - (firstBox.height/2); + float x2 = firstBox.cx + (firstBox.width/2); + float y2 = firstBox.cy + (firstBox.height/2); + + trackedObjectJson["x1"] = x1; + trackedObjectJson["y1"] = y1; + trackedObjectJson["x2"] = x2; + trackedObjectJson["y2"] = y2; + + } + else { + // Id not found, return all 0 values + trackedObjectJson["x1"] = 0; + trackedObjectJson["y1"] = 0; + trackedObjectJson["x2"] = 0; + trackedObjectJson["y2"] = 0; + } + + return trackedObjectJson.toStyledString(); +} + // Add an openshot::Clip to the timeline void Timeline::AddClip(Clip* clip) { diff --git a/src/Timeline.h b/src/Timeline.h index 69cebc8a..e9b201b0 100644 --- a/src/Timeline.h +++ b/src/Timeline.h @@ -250,6 +250,8 @@ namespace openshot { std::shared_ptr GetTrackedObject(std::string id) const; /// Return the ID's of the tracked objects as a list of strings std::list GetTrackedObjectsIds() const; + /// Return the first trackedObject's properties as a JSON string + std::string GetTrackedObjectValues(std::string id) const; /// @brief Add an openshot::Clip to the timeline /// @param clip Add an openshot::Clip to the timeline. A clip can contain any type of Reader. diff --git a/src/TrackedObjectBBox.cpp b/src/TrackedObjectBBox.cpp index 40ba5e21..75f96094 100644 --- a/src/TrackedObjectBBox.cpp +++ b/src/TrackedObjectBBox.cpp @@ -230,7 +230,7 @@ bool TrackedObjectBBox::LoadBoxData(std::string inputFilePath) // Read the existing tracker message. fstream input(inputFilePath, ios::in | ios::binary); - //Check if it was able to read the protobuf data + // Check if it was able to read the protobuf data if (!bboxMessage.ParseFromIstream(&input)) { cerr << "Failed to parse protobuf message." << endl; diff --git a/src/TrackedObjectBBox.h b/src/TrackedObjectBBox.h index 4a5a34ad..a8deadf6 100644 --- a/src/TrackedObjectBBox.h +++ b/src/TrackedObjectBBox.h @@ -174,7 +174,7 @@ namespace openshot TrackedObjectBBox(); /// Add a BBox to the BoxVec map - void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle); + void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override; /// Update object's BaseFps void SetBaseFPS(Fraction fps); diff --git a/src/TrackedObjectBase.h b/src/TrackedObjectBase.h index 683b0b3f..0a336cf2 100644 --- a/src/TrackedObjectBase.h +++ b/src/TrackedObjectBase.h @@ -83,7 +83,10 @@ namespace openshot { virtual std::map GetBoxValues(int64_t frame_number) const { std::map ret; return ret; }; /// Return the main properties of the tracked object's parent clip virtual std::map GetParentClipProperties(int64_t frame_number) const { std::map ret; return ret; } - + /// Add a bounding box to the tracked object's BoxVec map + virtual void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) { return; }; + + /// Get and Set JSON methods virtual std::string Json() const = 0; ///< Generate JSON string of this object virtual Json::Value JsonValue() const = 0; ///< Generate Json::Value for this object diff --git a/src/effects/ObjectDetection.cpp b/src/effects/ObjectDetection.cpp index 929c9039..8036e9fc 100644 --- a/src/effects/ObjectDetection.cpp +++ b/src/effects/ObjectDetection.cpp @@ -28,12 +28,15 @@ * along with OpenShot Library. If not, see . */ +#include + #include "effects/ObjectDetection.h" #include "effects/Tracker.h" using namespace std; using namespace openshot; + /// Blank constructor, useful when using Json to load the effect properties ObjectDetection::ObjectDetection(std::string clipObDetectDataPath) { @@ -64,7 +67,7 @@ void ObjectDetection::init_effect_details() info.description = "Detect objects through the video."; info.has_audio = false; info.has_video = true; - info.has_tracked_object = false; + info.has_tracked_object = true; } // This method is required for all derived classes of EffectBase, and returns a @@ -92,7 +95,7 @@ std::shared_ptr ObjectDetection::GetFrame(std::shared_ptr frame, i (int)(bb_nrml.width*fw), (int)(bb_nrml.height*fh)); drawPred(detections.classIds.at(i), detections.confidences.at(i), - box, cv_image); + box, cv_image, detections.objectIds.at(i)); } } @@ -103,7 +106,7 @@ std::shared_ptr ObjectDetection::GetFrame(std::shared_ptr frame, i return frame; } -void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame) +void ObjectDetection::drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame, int objectNumber) { //Draw a rectangle displaying the bounding box @@ -133,29 +136,32 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){ // Create tracker message pb_objdetect::ObjDetect objMessage; - { - // Read the existing tracker message. - fstream input(inputFilePath, ios::in | ios::binary); - if (!objMessage.ParseFromIstream(&input)) { - cerr << "Failed to parse protobuf message." << endl; - return false; - } + + // Read the existing tracker message. + fstream input(inputFilePath, ios::in | ios::binary); + if (!objMessage.ParseFromIstream(&input)) { + cerr << "Failed to parse protobuf message." << endl; + return false; } + - // Make sure classNames and detectionsData are empty + // Make sure classNames, detectionsData and trackedObjects are empty classNames.clear(); detectionsData.clear(); + trackedObjects.clear(); // Seed to generate same random numbers std::srand(1); // Get all classes names and assign a color to them - for(int i = 0; i < objMessage.classnames_size(); i++){ + for(int i = 0; i < objMessage.classnames_size(); i++) + { classNames.push_back(objMessage.classnames(i)); classesColor.push_back(cv::Scalar(std::rand()%205 + 50, std::rand()%205 + 50, std::rand()%205 + 50)); } // Iterate over all frames of the saved message - for (size_t i = 0; i < objMessage.frame_size(); i++) { + for (size_t i = 0; i < objMessage.frame_size(); i++) + { // Create protobuf message reader const pb_objdetect::Frame& pbFrameData = objMessage.frame(i); @@ -169,8 +175,11 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){ std::vector classIds; std::vector confidences; std::vector> boxes; + std::vector objectIds; - for(int i = 0; i < pbFrameData.bounding_box_size(); i++){ + // Iterate through the detected objects + for(int i = 0; i < pbFrameData.bounding_box_size(); i++) + { // Get bounding box coordinates float x = pBox.Get(i).x(); float y = pBox.Get(i).y(); @@ -180,6 +189,26 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){ int classId = pBox.Get(i).classid(); // Get prediction confidence float confidence = pBox.Get(i).confidence(); + + // Get the object Id + int objectId = pBox.Get(i).objectid(); + + // Search for the object id on trackedObjects map + auto trackedObject = trackedObjects.find(objectId); + // Check if object already exists on the map + if (trackedObject != trackedObjects.end()) + { + // Add a new BBox to it + trackedObject->second->AddBox(id, x+(w/2), y+(h/2), w, h, 0.0); + } + else + { + // There is no tracked object with that id, so insert a new one + TrackedObjectBBox trackedObj; + trackedObj.AddBox(id, x+(w/2), y+(h/2), w, h, 0.0); + std::shared_ptr trackedObjPtr = std::make_shared(trackedObj); + trackedObjects.insert({objectId, trackedObjPtr}); + } // Create OpenCV rectangle with the bouding box info cv::Rect_ box(x, y, w, h); @@ -188,10 +217,11 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){ boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence); + objectIds.push_back(objectId); } // Assign data to object detector map - detectionsData[id] = DetectionData(classIds, confidences, boxes, id); + detectionsData[id] = DetectionData(classIds, confidences, boxes, id, objectIds); } // Delete all global objects allocated by libprotobuf. @@ -226,6 +256,12 @@ Json::Value ObjectDetection::JsonValue() const { Json::Value root = EffectBase::JsonValue(); // get parent properties root["type"] = info.class_name; root["protobuf_data_path"] = protobuf_data_path; + + // Add trackedObjects IDs to JSON + for (auto const& trackedObject : trackedObjects){ + // Save the trackedObject Id on root + root["box_id"+to_string(trackedObject.first)] = trackedObject.second->Id(); + } // return JsonValue return root; @@ -262,6 +298,12 @@ void ObjectDetection::SetJsonValue(const Json::Value root) { protobuf_data_path = ""; } } + + for (auto const& trackedObject : trackedObjects){ + Json::Value trackedObjectJSON; + trackedObjectJSON["box_id"] = root["box_id"+to_string(trackedObject.first)]; + trackedObject.second->SetJsonValue(trackedObjectJSON); + } } // Get all properties for a specific frame @@ -269,6 +311,14 @@ std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const { // Generate JSON properties list Json::Value root; + + // Add trackedObjects IDs to JSON + for (auto const& trackedObject : trackedObjects){ + // Save the trackedObject Id on root + Json::Value trackedObjectJSON = trackedObject.second->PropertiesJSON(requested_frame); + root["box_id"+to_string(trackedObject.first)] = trackedObjectJSON["box_id"]; + } + root["id"] = add_property_json("ID", 0.0, "string", Id(), NULL, -1, -1, true, requested_frame); root["position"] = add_property_json("Position", Position(), "float", "", NULL, 0, 1000 * 60 * 30, false, requested_frame); root["layer"] = add_property_json("Track", Layer(), "int", "", NULL, 0, 20, false, requested_frame); diff --git a/src/effects/ObjectDetection.h b/src/effects/ObjectDetection.h index 31518c86..64df91c3 100644 --- a/src/effects/ObjectDetection.h +++ b/src/effects/ObjectDetection.h @@ -45,16 +45,24 @@ // Struct that stores the detected bounding boxes for all the clip frames struct DetectionData{ DetectionData(){} - DetectionData(std::vector _classIds, std::vector _confidences, std::vector> _boxes, size_t _frameId){ + DetectionData( + std::vector _classIds, + std::vector _confidences, + std::vector> _boxes, + size_t _frameId, + std::vector _objectIds) + { classIds = _classIds; confidences = _confidences; boxes = _boxes; frameId = _frameId; + objectIds = _objectIds; } size_t frameId; std::vector classIds; std::vector confidences; std::vector> boxes; + std::vector objectIds; }; namespace openshot @@ -74,7 +82,7 @@ namespace openshot /// Init effect settings void init_effect_details(); - void drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame); + void drawPred(int classId, float conf, cv::Rect2d box, cv::Mat& frame, int objectNumber); public: diff --git a/src/protobuf_messages/objdetectdata.proto b/src/protobuf_messages/objdetectdata.proto index 49ad94af..272ffb4f 100644 --- a/src/protobuf_messages/objdetectdata.proto +++ b/src/protobuf_messages/objdetectdata.proto @@ -17,6 +17,7 @@ message Frame { float h = 4; int32 classId = 5; float confidence = 6; + int32 objectId = 7; } repeated Box bounding_box = 2; diff --git a/src/sort_filter/sort.hpp b/src/sort_filter/sort.hpp index 295b9d56..086af1f1 100644 --- a/src/sort_filter/sort.hpp +++ b/src/sort_filter/sort.hpp @@ -39,7 +39,7 @@ public: double GetCentroidsDistance(cv::Rect_ bb_test, cv::Rect_ bb_gt); std::vector trackers; - double max_centroid_dist_norm = 0.15; + double max_centroid_dist_norm = 0.05; std::vector> predictedBoxes; std::vector> centroid_dist_matrix;