34#ifndef VP_DETECTOR_DNN_OPENCV_H
35#define VP_DETECTOR_DNN_OPENCV_H
37#include <visp3/core/vpConfig.h>
41#if defined(VISP_HAVE_OPENCV) && (VISP_HAVE_OPENCV_VERSION >= 0x030403) && defined(HAVE_OPENCV_DNN) && \
42 ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
48#include <opencv2/dnn.hpp>
50#include <visp3/core/vpColor.h>
51#include <visp3/core/vpDisplay.h>
52#include <visp3/core/vpImage.h>
53#include <visp3/core/vpRect.h>
57#ifdef VISP_HAVE_NLOHMANN_JSON
58#include VISP_NLOHMANN_JSON(json.hpp)
113 } DNNResultsParsingType;
120 } DetectionCandidates;
147 ,
double v_min,
double v_max
148 ,
unsigned int cls,
double score
149 ,
const std::optional<std::string> &classname
180 template <
typename Type >
184 } DetectedFeatures2D;
193 float m_confThreshold;
194 float m_nmsThreshold;
195 std::vector<std::string> m_classNames;
196 cv::Size m_inputSize;
197 double m_filterSizeRatio;
200 double m_scaleFactor;
203 std::string m_modelFilename;
204 std::string m_modelConfigFilename;
205 std::string m_framework;
207#ifdef VISP_HAVE_NLOHMANN_JSON
217 config.m_confThreshold = j.value(
"confidenceThreshold", config.m_confThreshold);
218 if (config.m_confThreshold <= 0) {
222 config.m_nmsThreshold = j.value(
"nmsThreshold", config.m_nmsThreshold);
223 if (config.m_nmsThreshold <= 0) {
227 config.m_filterSizeRatio = j.value(
"filterSizeRatio", config.m_filterSizeRatio);
229 config.m_classNames = j.value(
"classNames", config.m_classNames);
231 std::pair<unsigned int, unsigned int> resolution = j.value(
"resolution", std::pair<unsigned int, unsigned int>(config.m_inputSize.width, config.m_inputSize.height));
232 config.m_inputSize.width = resolution.first;
233 config.m_inputSize.height = resolution.second;
235 std::vector<double> v_mean = j.value(
"mean", std::vector<double>({ config.m_mean[0], config.m_mean[1], config.m_mean[2] }));
236 if (v_mean.size() != 3) {
239 config.m_mean = cv::Scalar(v_mean[0], v_mean[1], v_mean[2]);
241 config.m_scaleFactor = j.value(
"scale", config.m_scaleFactor);
242 config.m_swapRB = j.value(
"swapRB", config.m_swapRB);
244 config.m_modelFilename = j.value(
"modelFile", config.m_modelFilename);
245 config.m_modelConfigFilename = j.value(
"configurationFile", config.m_modelConfigFilename);
246 config.m_framework = j.value(
"framework", config.m_framework);
257 std::pair<unsigned int, unsigned int> resolution = { config.m_inputSize.width, config.m_inputSize.height };
258 std::vector<double> v_mean = { config.m_mean[0], config.m_mean[1], config.m_mean[2] };
260 {
"confidenceThreshold", config.m_confThreshold } ,
261 {
"nmsThreshold" , config.m_nmsThreshold } ,
262 {
"filterSizeRatio" , config.m_filterSizeRatio} ,
263 {
"classNames" , config.m_classNames } ,
264 {
"resolution" , resolution } ,
266 {
"scale" , config.m_scaleFactor } ,
267 {
"swapRB" , config.m_swapRB } ,
269 {
"modelFile" , config.m_modelFilename } ,
270 {
"configurationFile" , config.m_modelConfigFilename } ,
271 {
"framework" , config.m_framework }
301 std::vector<std::string> classNames;
302 std::ifstream ifs(filename);
304 while (getline(ifs, line)) {
305 if (line.find(
"[") == std::string::npos) {
306 classNames.push_back(line);
309 std::string lineWithoutBracket;
310 if (line.find(
"[") != std::string::npos) {
311 lineWithoutBracket = line.substr(line.find(
"[") + 1, line.size() - 2);
314 while (!lineWithoutBracket.empty()) {
315 std::string className;
316 auto start_pos = lineWithoutBracket.find(
"\"");
317 auto end_pos = lineWithoutBracket.find(
"\"", start_pos + 1);
318 className = lineWithoutBracket.substr(start_pos + 1, end_pos - (start_pos + 1));
319 if (!className.empty()) {
320 classNames.push_back(className);
321 lineWithoutBracket = lineWithoutBracket.substr(end_pos + 1);
333 : m_confThreshold(0.5f)
334 , m_nmsThreshold(0.4f)
336 , m_inputSize(300, 300)
337 , m_filterSizeRatio(0.)
338 , m_mean(127.5, 127.5, 127.5)
339 , m_scaleFactor(2.0 / 255.0)
343 , m_modelConfigFilename()
350 : m_confThreshold(config.m_confThreshold)
351 , m_nmsThreshold(config.m_nmsThreshold)
352 , m_classNames(config.m_classNames)
353 , m_inputSize(config.m_inputSize.width, config.m_inputSize.height)
354 , m_filterSizeRatio(config.m_filterSizeRatio)
355 , m_mean(cv::Scalar(config.m_mean[0], config.m_mean[1], config.m_mean[2]))
356 , m_scaleFactor(config.m_scaleFactor)
357 , m_swapRB(config.m_swapRB)
358 , m_parsingMethodType(config.m_parsingMethodType)
359 , m_modelFilename(config.m_modelFilename)
360 , m_modelConfigFilename(config.m_modelConfigFilename)
361 , m_framework(config.m_framework)
383 inline NetConfig(
float confThresh,
const float &nmsThresh,
const std::vector<std::string> &classNames,
const cv::Size &dnnInputSize,
const double &filterSizeRatio = 0.
384 ,
const cv::Scalar &mean = cv::Scalar(127.5, 127.5, 127.5),
const double &scaleFactor = 2. / 255.,
const bool &swapRB =
true
386 : m_confThreshold(confThresh)
387 , m_nmsThreshold(nmsThresh)
388 , m_classNames(classNames)
389 , m_inputSize(dnnInputSize)
390 , m_filterSizeRatio(filterSizeRatio)
392 , m_scaleFactor(scaleFactor)
394 , m_parsingMethodType(parsingType)
395 , m_modelFilename(modelFilename)
396 , m_modelConfigFilename(configFilename)
397 , m_framework(framework)
417 inline NetConfig(
const float &confThresh,
const float &nmsThresh,
const std::string &classNamesFile,
const cv::Size &dnnInputSize,
const double &filterSizeRatio = 0.
418 ,
const cv::Scalar &mean = cv::Scalar(127.5, 127.5, 127.5),
const double &scaleFactor = 2. / 255.,
const bool &swapRB =
true
420 : m_confThreshold(confThresh)
421 , m_nmsThreshold(nmsThresh)
422 , m_inputSize(dnnInputSize)
423 , m_filterSizeRatio(filterSizeRatio)
425 , m_scaleFactor(scaleFactor)
427 , m_parsingMethodType(parsingType)
428 , m_modelFilename(modelFilename)
429 , m_modelConfigFilename(configFilename)
430 , m_framework(framework)
438 text +=
"Model : " + m_modelFilename +
"\n";
440 text +=
"Config (optional): " + (m_modelConfigFilename.empty() ?
"\"None\"" : m_modelConfigFilename) +
"\n";
441 text +=
"Framework (optional): " + (m_framework.empty() ?
"\"None\"" : m_framework) +
"\n";
442 text +=
"Width x Height : " + std::to_string(m_inputSize.width) +
" x " + std::to_string(m_inputSize.height) +
"\n";
443 text +=
"Mean RGB : " + std::to_string(m_mean[0]) +
" " + std::to_string(m_mean[1]) +
" " + std::to_string(m_mean[2]) +
"\n";
444 text +=
"Scale : " + std::to_string(m_scaleFactor) +
"\n";
445 text +=
"Swap RB? : " + (m_swapRB ? std::string(
"true") : std::string(
"false")) +
"\n";
446 text +=
"Confidence threshold : " + std::to_string(m_confThreshold) +
"\n";
447 text +=
"NMS threshold : " + std::to_string(m_nmsThreshold) +
"\n";
448 text +=
"Filter threshold : " +
449 (m_filterSizeRatio > std::numeric_limits<double>::epsilon() ? std::to_string(m_filterSizeRatio)
450 :
"disabled") +
"\n";
456 os << config.toString();
462 m_confThreshold = config.m_confThreshold;
463 m_nmsThreshold = config.m_nmsThreshold;
464 m_classNames = config.m_classNames;
465 m_inputSize = cv::Size(config.m_inputSize.width, config.m_inputSize.height);
466 m_filterSizeRatio = config.m_filterSizeRatio;
467 m_mean = cv::Scalar(config.m_mean[0], config.m_mean[1], config.m_mean[2]);
468 m_scaleFactor = config.m_scaleFactor;
469 m_swapRB = config.m_swapRB;
470 m_parsingMethodType = config.m_parsingMethodType;
471 m_modelFilename = config.m_modelFilename;
472 m_modelConfigFilename = config.m_modelConfigFilename;
473 m_framework = config.m_framework;
480 static std::string getAvailableDnnResultsParsingTypes();
481 static std::string dnnResultsParsingTypeToString(
const DNNResultsParsingType &type);
482 static DNNResultsParsingType dnnResultsParsingTypeFromString(
const std::string &name);
483 static std::vector<std::string> parseClassNamesFile(
const std::string &filename);
485 vpDetectorDNNOpenCV(
const NetConfig &config,
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
486#ifdef VISP_HAVE_NLOHMANN_JSON
487 vpDetectorDNNOpenCV(
const std::string &jsonPath,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
488 void initFromJSON(
const std::string &jsonPath);
489 void saveConfigurationInJSON(
const std::string &jsonPath)
const;
494 virtual bool detect(
const vpImage<unsigned char> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
495 virtual bool detect(
const vpImage<unsigned char> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
496 virtual bool detect(
const vpImage<vpRGBa> &I, std::vector<DetectedFeatures2D> &output);
497 virtual bool detect(
const vpImage<vpRGBa> &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
498 virtual bool detect(
const vpImage<vpRGBa> &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
499 virtual bool detect(
const cv::Mat &I, std::vector<DetectedFeatures2D> &output);
500 virtual bool detect(
const cv::Mat &I, std::map< std::string, std::vector<DetectedFeatures2D>> &output);
501 virtual bool detect(
const cv::Mat &I, std::vector< std::pair<std::string, std::vector<DetectedFeatures2D>>> &output);
503 void readNet(
const std::string &model,
const std::string &config =
"",
const std::string &framework =
"");
505 void setNetConfig(
const NetConfig &config);
506 void setConfidenceThreshold(
const float &confThreshold);
507 void setNMSThreshold(
const float &nmsThreshold);
508 void setDetectionFilterSizeRatio(
const double &sizeRatio);
509 void setInputSize(
const int &width,
const int &height);
510 void setMean(
const double &meanR,
const double &meanG,
const double &meanB);
511 void setPreferableBackend(
const int &backendId);
512 void setPreferableTarget(
const int &targetId);
513 void setScaleFactor(
const double &scaleFactor);
514 void setSwapRB(
const bool &swapRB);
515 void setParsingMethod(
const DNNResultsParsingType &typeParsingMethod,
void (*parsingMethod)(DetectionCandidates &, std::vector<cv::Mat> &,
const NetConfig &) = postProcess_unimplemented);
521#ifdef VISP_HAVE_NLOHMANN_JSON
555#if (VISP_HAVE_OPENCV_VERSION == 0x030403)
556 std::vector<cv::String> getOutputsNames();
558 std::vector<DetectedFeatures2D>
559 filterDetectionSingleClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk);
561 std::vector<DetectedFeatures2D>
562 filterDetectionMultiClassInput(
const std::vector<DetectedFeatures2D> &detected_features,
const double minRatioOfAreaOk);
564 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>>
565 filterDetectionMultiClassInput(
const std::map< std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D>> &detected_features,
const double minRatioOfAreaOk);
567 void postProcess(DetectionCandidates &proposals);
569 void postProcess_YoloV3_V4(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
571 void postProcess_YoloV5_V7(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
573 void postProcess_YoloV8_V11_V12(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
575 void postProcess_FasterRCNN(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
577#if defined(VISP_BUILD_DEPRECATED_FUNCTIONS)
578 void postProcess_SSD_MobileNet(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
581 void postProcess_ResNet_10(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
583 static void postProcess_unimplemented(DetectionCandidates &proposals, std::vector<cv::Mat> &dnnRes,
const NetConfig &netConfig);
614template <
typename Type >
620 std::stringstream ss;
627 ss <<
"(" << std::setprecision(4) <<
m_score * 100. <<
"%)";
Class to define RGB colors available for display functionalities.
static const vpColor blue
void display(const vpImage< Type > &img, const vpColor &color=vpColor::blue, unsigned int thickness=1) const
friend vpDetectorDNNOpenCV
vpRect getBoundingBox() const
DetectedFeatures2D(double u_min, double u_max, double v_min, double v_max, unsigned int cls, double score, const std::optional< std::string > &classname)
Construct a new Detected Features 2 D object.
double getConfidenceScore() const
std::optional< std::string > getClassName() const
std::optional< std::string > m_classname
unsigned int getClassId() const
Structure containing some information required for the configuration of a vpDetectorDNNOpenCV object.
NetConfig(const NetConfig &config)
friend void to_json(nlohmann::json &j, const NetConfig &config)
Parse a vpDetectorDNNOpenCV::NetConfig into JSON format.
NetConfig()
Default constructor of the structure vpDetectorDNNOpenCV::NetConfig , required for JSON serialization...
friend vpDetectorDNNOpenCV
friend std::ostream & operator<<(std::ostream &os, const NetConfig &config)
std::string toString() const
friend void from_json(const nlohmann::json &j, NetConfig &config)
Read the network configuration from JSON. All values are optional and if an argument is not present,...
NetConfig(float confThresh, const float &nmsThresh, const std::vector< std::string > &classNames, const cv::Size &dnnInputSize, const double &filterSizeRatio=0., const cv::Scalar &mean=cv::Scalar(127.5, 127.5, 127.5), const double &scaleFactor=2./255., const bool &swapRB=true, const DNNResultsParsingType &parsingType=vpDetectorDNNOpenCV::USER_SPECIFIED, const std::string &modelFilename="", const std::string &configFilename="", const std::string &framework="")
Construct a new Net Config object.
NetConfig & operator=(const NetConfig &config)
NetConfig(const float &confThresh, const float &nmsThresh, const std::string &classNamesFile, const cv::Size &dnnInputSize, const double &filterSizeRatio=0., const cv::Scalar &mean=cv::Scalar(127.5, 127.5, 127.5), const double &scaleFactor=2./255., const bool &swapRB=true, const DNNResultsParsingType &parsingType=vpDetectorDNNOpenCV::USER_SPECIFIED, const std::string &modelFilename="", const std::string &configFilename="", const std::string &framework="")
Construct a new Net Config object.
static std::vector< std::string > parseClassNamesFile(const std::string &filename)
Parse the file containing the list of classes the DNN can detect. These classes can be written either...
friend void from_json(const nlohmann::json &j, vpDetectorDNNOpenCV &network)
Read the network configuration from JSON. All values are optional and if an argument is not present,...
cv::Mat m_blob
Buffer for the blob in input net.
DNNResultsParsingType
Enumeration listing the types of DNN for which the vpDetectorDNNOpenCV furnishes the methods permitti...
static DNNResultsParsingType dnnResultsParsingTypeFromString(const std::string &name)
std::vector< cv::String > m_outNames
Names of layers with unconnected outputs.
friend void to_json(nlohmann::json &j, const vpDetectorDNNOpenCV &network)
Parse the network configuration into JSON format.
cv::Mat m_img
Buffer for the input image.
std::vector< int > m_indices
Indices for NMS.
NetConfig m_netConfig
Configuration of the DNN.
std::vector< cv::Mat > m_dnnRes
Contains all output blobs for each layer specified in m_outNames.
cv::dnn::Net m_net
DNN network.
bool m_applySizeFilterAfterNMS
If true, filter the detections removing the ones for which the bbox does not respect area(bbox) € [me...
friend std::ostream & operator<<(std::ostream &os, const vpDetectorDNNOpenCV &network)
const NetConfig & getNetConfig() const
static std::string dnnResultsParsingTypeToString(const DNNResultsParsingType &type)
void(* m_parsingMethod)(DetectionCandidates &, std::vector< cv::Mat > &, const NetConfig &)
Pointer towards the parsing method, used if m_parsingMethodType is equal to m_parsingMethodType::USER...
vpImage< vpRGBa > m_I_color
Buffer for gray to RGBa image conversion.
static void displayRectangle(const vpImage< unsigned char > &I, const vpImagePoint &topLeft, unsigned int width, unsigned int height, const vpColor &color, bool fill=false, unsigned int thickness=1)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
@ badValue
Used to indicate that a value is not in the allowed range.
@ dimensionError
Bad dimension.
Class that defines a 2D point in an image. This class is useful for image processing and stores only ...
Definition of the vpImage class member functions.
Defines a rectangle in the plane.
std::vector< int > m_classIds
std::vector< float > m_confidences
std::vector< cv::Rect > m_boxes