/** * @file * @brief Source file for CVObjectDetection class * @author Jonathan Thomas * @author Brenno Caldato * * @ref License */ // Copyright (c) 2008-2019 OpenShot Studios, LLC // // SPDX-License-Identifier: LGPL-3.0-or-later #include #include #include #include "CVObjectDetection.h" #include "objdetectdata.pb.h" #include using namespace std; using namespace openshot; using google::protobuf::util::TimeUtil; CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingController &processingController) : processingController(&processingController), processingDevice("CPU"){ SetJson(processInfoJson); confThreshold = 0.5; nmsThreshold = 0.1; } void CVObjectDetection::setProcessingDevice(){ if(processingDevice == "GPU"){ net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); } else if(processingDevice == "CPU"){ net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); } } void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, size_t _end, bool process_interval) { start = _start; end = _end; video.Open(); if(error){ return; } processingController->SetError(false, ""); // Load names of classes std::ifstream ifs(classesFile.c_str()); std::string line; while (std::getline(ifs, line)) classNames.push_back(line); // Load the network if(classesFile == "" || modelConfiguration == "" || modelWeights == "") return; net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights); setProcessingDevice(); size_t frame_number; if(!process_interval || end <= 1 || end-start == 0){ // Get total number of frames in video start = (int)(video.Start() * video.Reader()->info.fps.ToFloat()); end = (int)(video.End() * video.Reader()->info.fps.ToFloat()); } for (frame_number = start; frame_number <= end; frame_number++) { // Stop the feature tracker process if(processingController->ShouldStop()){ return; } std::shared_ptr f = video.GetFrame(frame_number); // Grab OpenCV Mat image cv::Mat cvimage = f->GetImageCV(); DetectObjects(cvimage, frame_number); // Update progress processingController->SetProgress(uint(100*(frame_number-start)/(end-start))); } } void CVObjectDetection::DetectObjects(const cv::Mat &frame, size_t frameId){ // Get frame as OpenCV Mat cv::Mat blob; // Create a 4D blob from the frame. int inpWidth, inpHeight; inpWidth = inpHeight = 416; cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0), true, false); //Sets the input to the network net.setInput(blob); // Runs the forward pass to get output of the output layers std::vector outs; net.forward(outs, getOutputsNames(net)); // Remove the bounding boxes with low confidence postprocess(frame.size(), outs, frameId); } // Remove the bounding boxes with low confidence using non-maxima suppression void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector& outs, size_t frameId) { std::vector classIds; std::vector confidences; std::vector boxes; std::vector objectIds; for (size_t i = 0; i < outs.size(); ++i) { // Scan through all the bounding boxes output from the network and keep only the // ones with high confidence scores. Assign the box's class label as the class // with the highest score for the box. float* data = (float*)outs[i].data; for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) { cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols); cv::Point classIdPoint; double confidence; // Get the value and location of the maximum score cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint); if (confidence > confThreshold) { int centerX = (int)(data[0] * frameDims.width); int centerY = (int)(data[1] * frameDims.height); int width = (int)(data[2] * frameDims.width); int height = (int)(data[3] * frameDims.height); int left = centerX - width / 2; int top = centerY - height / 2; classIds.push_back(classIdPoint.x); confidences.push_back((float)confidence); boxes.push_back(cv::Rect(left, top, width, height)); } } } // Perform non maximum suppression to eliminate redundant overlapping boxes with // lower confidences std::vector indices; cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); // Pass boxes to SORT algorithm std::vector sortBoxes; for(auto box : boxes) sortBoxes.push_back(box); sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds); // Clear data vectors boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear(); // Get SORT predicted boxes for(auto TBox : sort.frameTrackingResult){ if(TBox.frame == frameId){ boxes.push_back(TBox.box); confidences.push_back(TBox.confidence); classIds.push_back(TBox.classId); objectIds.push_back(TBox.id); } } // Remove boxes based on controids distance for(uint i = 0; i= confidences[j]){ boxes.erase(boxes.begin() + j); classIds.erase(classIds.begin() + j); confidences.erase(confidences.begin() + j); objectIds.erase(objectIds.begin() + j); break; } else{ boxes.erase(boxes.begin() + i); classIds.erase(classIds.begin() + i); confidences.erase(confidences.begin() + i); objectIds.erase(objectIds.begin() + i); i = 0; break; } } } } } // Remove boxes based in IOU score for(uint i = 0; i= confidences[j]){ boxes.erase(boxes.begin() + j); classIds.erase(classIds.begin() + j); confidences.erase(confidences.begin() + j); objectIds.erase(objectIds.begin() + j); break; } else{ boxes.erase(boxes.begin() + i); classIds.erase(classIds.begin() + i); confidences.erase(confidences.begin() + i); objectIds.erase(objectIds.begin() + i); i = 0; break; } } } } } // Normalize boxes coordinates std::vector> normalized_boxes; for(auto box : boxes){ cv::Rect_ normalized_box; normalized_box.x = (box.x)/(float)frameDims.width; normalized_box.y = (box.y)/(float)frameDims.height; normalized_box.width = (box.width)/(float)frameDims.width; normalized_box.height = (box.height)/(float)frameDims.height; normalized_boxes.push_back(normalized_box); } detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds); } // Compute IOU between 2 boxes bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){ // Determine the (x, y)-coordinates of the intersection rectangle int xA = std::max(pred_box.x, sort_box.x); int yA = std::max(pred_box.y, sort_box.y); int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width); int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height); // Compute the area of intersection rectangle int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1); // Compute the area of both the prediction and ground-truth rectangles int boxAArea = (pred_box.width + 1) * (pred_box.height + 1); int boxBArea = (sort_box.width + 1) * (sort_box.height + 1); // Compute the intersection over union by taking the intersection float iou = interArea / (float)(boxAArea + boxBArea - interArea); // If IOU is above this value the boxes are very close (probably a variation of the same bounding box) if(iou > 0.5) return true; return false; } // Get the names of the output layers std::vector CVObjectDetection::getOutputsNames(const cv::dnn::Net& net) { static std::vector names; //Get the indices of the output layers, i.e. the layers with unconnected outputs std::vector outLayers = net.getUnconnectedOutLayers(); //get the names of all the layers in the network std::vector layersNames = net.getLayerNames(); // Get the names of the output layers in names names.resize(outLayers.size()); for (size_t i = 0; i < outLayers.size(); ++i) names[i] = layersNames[outLayers[i] - 1]; return names; } CVDetectionData CVObjectDetection::GetDetectionData(size_t frameId){ // Check if the stabilizer info for the requested frame exists if ( detectionsData.find(frameId) == detectionsData.end() ) { return CVDetectionData(); } else { return detectionsData[frameId]; } } bool CVObjectDetection::SaveObjDetectedData(){ // Create tracker message pb_objdetect::ObjDetect objMessage; //Save class names in protobuf message for(int i = 0; iassign(classNames.at(i)); } // Iterate over all frames data and save in protobuf message for(std::map::iterator it=detectionsData.begin(); it!=detectionsData.end(); ++it){ CVDetectionData dData = it->second; pb_objdetect::Frame* pbFrameData; AddFrameDataToProto(objMessage.add_frame(), dData); } // Add timestamp *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL)); { // Write the new message to disk. std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary); if (!objMessage.SerializeToOstream(&output)) { cerr << "Failed to write protobuf message." << endl; return false; } } // Delete all global objects allocated by libprotobuf. google::protobuf::ShutdownProtobufLibrary(); return true; } // Add frame object detection into protobuf message. void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CVDetectionData& dData) { // Save frame number and rotation pbFrameData->set_id(dData.frameId); for(size_t i = 0; i < dData.boxes.size(); i++){ pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box(); // Save bounding box data box->set_x(dData.boxes.at(i).x); box->set_y(dData.boxes.at(i).y); box->set_w(dData.boxes.at(i).width); box->set_h(dData.boxes.at(i).height); box->set_classid(dData.classIds.at(i)); box->set_confidence(dData.confidences.at(i)); box->set_objectid(dData.objectIds.at(i)); } } // Load JSON string into this object void CVObjectDetection::SetJson(const std::string value) { // Parse JSON string into JSON objects try { const Json::Value root = openshot::stringToJson(value); // Set all values that match SetJsonValue(root); } catch (const std::exception& e) { // Error parsing JSON (or missing keys) // throw InvalidJSON("JSON is invalid (missing keys or invalid data types)"); std::cout<<"JSON is invalid (missing keys or invalid data types)"<SetError(true, "Incorrect path to model config file"); error = true; } } if (!root["model-weights"].isNull()){ modelWeights= (root["model-weights"].asString()); std::ifstream infile(modelWeights); if(!infile.good()){ processingController->SetError(true, "Incorrect path to model weight file"); error = true; } } if (!root["class-names"].isNull()){ classesFile = (root["class-names"].asString()); std::ifstream infile(classesFile); if(!infile.good()){ processingController->SetError(true, "Incorrect path to class name file"); error = true; } } } /* |||||||||||||||||||||||||||||||||||||||||||||||||| ONLY FOR MAKE TEST |||||||||||||||||||||||||||||||||||||||||||||||||| */ // Load protobuf data file bool CVObjectDetection::_LoadObjDetectdData(){ // Create tracker message pb_objdetect::ObjDetect objMessage; { // Read the existing tracker message. fstream input(protobuf_data_path, ios::in | ios::binary); if (!objMessage.ParseFromIstream(&input)) { cerr << "Failed to parse protobuf message." << endl; return false; } } // Make sure classNames and detectionsData are empty classNames.clear(); detectionsData.clear(); // Get all classes names and assign a color to them for(int i = 0; i < objMessage.classnames_size(); i++){ classNames.push_back(objMessage.classnames(i)); } // Iterate over all frames of the saved message for (size_t i = 0; i < objMessage.frame_size(); i++) { // Create protobuf message reader const pb_objdetect::Frame& pbFrameData = objMessage.frame(i); // Get frame Id size_t id = pbFrameData.id(); // Load bounding box data const google::protobuf::RepeatedPtrField &pBox = pbFrameData.bounding_box(); // Construct data vectors related to detections in the current frame std::vector classIds; std::vector confidences; std::vector> boxes; std::vector objectIds; for(int i = 0; i < pbFrameData.bounding_box_size(); i++){ // Get bounding box coordinates float x = pBox.Get(i).x(); float y = pBox.Get(i).y(); float w = pBox.Get(i).w(); float h = pBox.Get(i).h(); // Create OpenCV rectangle with the bouding box info cv::Rect_ box(x, y, w, h); // Get class Id (which will be assign to a class name) and prediction confidence int classId = pBox.Get(i).classid(); float confidence = pBox.Get(i).confidence(); // Get object Id int objectId = pBox.Get(i).objectid(); // Push back data into vectors boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence); } // Assign data to object detector map detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds); } // Delete all global objects allocated by libprotobuf. google::protobuf::ShutdownProtobufLibrary(); return true; }