Visual Servoing Platform version 3.7.0
Loading...
Searching...
No Matches
tutorial-dnn-object-detection-live.cpp
1
2#include <iostream>
3
4#include <visp3/core/vpConfig.h>
5
6 // Check if std:c++17 or higher
7#if defined(HAVE_OPENCV_DNN) && defined(HAVE_OPENCV_VIDEOIO) && defined(VISP_HAVE_DISPLAY) && \
8 ((__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)))
9
10#include <visp3/core/vpIoTools.h>
11#include <visp3/detection/vpDetectorDNNOpenCV.h>
12#include <visp3/gui/vpDisplayFactory.h>
13
14#include <opencv2/videoio.hpp>
15
16#ifdef VISP_HAVE_NLOHMANN_JSON
17#include VISP_NLOHMANN_JSON(json.hpp)
18using json = nlohmann::json;
19#endif
20
21#ifdef ENABLE_VISP_NAMESPACE
22using namespace VISP_NAMESPACE_NAME;
23#endif
24
25typedef enum
26{
27 DETECTION_CONTAINER_MAP = 0,
28 DETECTION_CONTAINER_VECTOR = 1,
29 DETECTION_CONTAINER_BOTH = 2,
30 DETECTION_CONTAINER_COUNT = 3
31} ChosenDetectionContainer;
32
33std::string chosenDetectionContainerToString(const ChosenDetectionContainer &choice);
34ChosenDetectionContainer chosenDetectionContainerFromString(const std::string &choiceStr);
35std::string getAvailableDetectionContainer();
36
37std::string chosenDetectionContainerToString(const ChosenDetectionContainer &choice)
38{
39 switch (choice) {
40 case DETECTION_CONTAINER_MAP:
41 return "map";
42 case DETECTION_CONTAINER_VECTOR:
43 return "vector";
44 case DETECTION_CONTAINER_BOTH:
45 return "both";
46 default:
47 break;
48 }
49 return "unknown";
50}
51
52ChosenDetectionContainer chosenDetectionContainerFromString(const std::string &choiceStr)
53{
54 ChosenDetectionContainer choice(DETECTION_CONTAINER_COUNT);
55 bool hasFoundMatch = false;
56 for (unsigned int i = 0; i < DETECTION_CONTAINER_COUNT && !hasFoundMatch; i++) {
57 ChosenDetectionContainer candidate = (ChosenDetectionContainer)i;
58 hasFoundMatch = (chosenDetectionContainerToString(candidate) == vpIoTools::toLowerCase(choiceStr));
59 if (hasFoundMatch) {
60 choice = candidate;
61 }
62 }
63 return choice;
64}
65
66std::string getAvailableDetectionContainer()
67{
68 std::string availableContainers("< ");
69 for (unsigned int i = 0; i < DETECTION_CONTAINER_COUNT - 1; i++) {
70 std::string name = chosenDetectionContainerToString((ChosenDetectionContainer)i);
71 availableContainers += name + " , ";
72 }
73 availableContainers +=
74 chosenDetectionContainerToString((ChosenDetectionContainer)(DETECTION_CONTAINER_COUNT - 1)) + " >";
75 return availableContainers;
76}
77
78int main(int argc, const char *argv[])
79{
80#if (VISP_CXX_STANDARD >= VISP_CXX_STANDARD_11)
81 std::shared_ptr<vpDisplay> display = vpDisplayFactory::createDisplay();
82#else
84#endif
85 try {
86 std::string opt_device("0");
88 std::string opt_dnn_model = "opencv_face_detector_uint8.pb";
89 std::string opt_dnn_config = "opencv_face_detector.pbtxt";
90 std::string opt_dnn_framework = "none";
91 std::string opt_dnn_label_file = "";
94 int opt_dnn_width = 300, opt_dnn_height = 300;
95 double opt_dnn_meanR = 104.0, opt_dnn_meanG = 177.0, opt_dnn_meanB = 123.0;
96 double opt_dnn_scale_factor = 1.0;
97 bool opt_dnn_swapRB = false;
98 bool opt_step_by_step = false;
99 float opt_dnn_confThresh = 0.5f;
100 float opt_dnn_nmsThresh = 0.4f;
101 double opt_dnn_filterThresh = 0.25;
102 ChosenDetectionContainer opt_dnn_containerType = DETECTION_CONTAINER_MAP;
103 bool opt_verbose = false;
104 std::string opt_input_json = "";
105 std::string opt_output_json = "";
106
107 for (int i = 1; i < argc; i++) {
108 if (std::string(argv[i]) == "--device" && i + 1 < argc) {
109 opt_device = std::string(argv[++i]);
110 }
111 else if (std::string(argv[i]) == "--step-by-step") {
112 opt_step_by_step = true;
113 }
114 else if (std::string(argv[i]) == "--model" && i + 1 < argc) {
115 opt_dnn_model = std::string(argv[++i]);
116 }
117 else if (std::string(argv[i]) == "--type" && i + 1 < argc) {
118 opt_dnn_type = vpDetectorDNNOpenCV::dnnResultsParsingTypeFromString(std::string(argv[++i]));
119 }
120 else if (std::string(argv[i]) == "--config" && i + 1 < argc) {
121 opt_dnn_config = std::string(argv[++i]);
122 if (opt_dnn_config.find("none") != std::string::npos) {
123 opt_dnn_config = std::string();
124 }
125 }
126 else if (std::string(argv[i]) == "--framework" && i + 1 < argc) {
127 opt_dnn_framework = std::string(argv[++i]);
128 if (opt_dnn_framework.find("none") != std::string::npos) {
129 opt_dnn_framework = std::string();
130 }
131 }
132 else if (std::string(argv[i]) == "--width" && i + 1 < argc) {
133 opt_dnn_width = atoi(argv[++i]);
134 }
135 else if (std::string(argv[i]) == "--height" && i + 1 < argc) {
136 opt_dnn_height = atoi(argv[++i]);
137 }
138 else if (std::string(argv[i]) == "--mean" && i + 3 < argc) {
139 opt_dnn_meanR = atof(argv[++i]);
140 opt_dnn_meanG = atof(argv[++i]);
141 opt_dnn_meanB = atof(argv[++i]);
142 }
143 else if (std::string(argv[i]) == "--scale" && i + 1 < argc) {
144 opt_dnn_scale_factor = atof(argv[++i]);
145 }
146 else if (std::string(argv[i]) == "--swapRB") {
147 opt_dnn_swapRB = true;
148 }
149 else if (std::string(argv[i]) == "--confThresh" && i + 1 < argc) {
150 opt_dnn_confThresh = static_cast<float>(atof(argv[++i]));
151 }
152 else if (std::string(argv[i]) == "--nmsThresh" && i + 1 < argc) {
153 opt_dnn_nmsThresh = static_cast<float>(atof(argv[++i]));
154 }
155 else if (std::string(argv[i]) == "--filterThresh" && i + 1 < argc) {
156 opt_dnn_filterThresh = atof(argv[++i]);
157 }
158 else if (std::string(argv[i]) == "--labels" && i + 1 < argc) {
159 opt_dnn_label_file = std::string(argv[++i]);
160 }
161 else if (std::string(argv[i]) == "--container" && i + 1 < argc) {
162 opt_dnn_containerType = chosenDetectionContainerFromString(std::string(argv[++i]));
163 }
164 else if (std::string(argv[i]) == "--input-json" && i + 1 < argc) {
165 opt_input_json = std::string(std::string(argv[++i]));
166 }
167 else if (std::string(argv[i]) == "--output-json" && i + 1 < argc) {
168 opt_output_json = std::string(std::string(argv[++i]));
169 }
170 else if (std::string(argv[i]) == "--verbose" || std::string(argv[i]) == "-v") {
171 opt_verbose = true;
172 }
173 else if (std::string(argv[i]) == "--help" || std::string(argv[i]) == "-h") {
174 std::cout << "\nSYNOPSIS " << std::endl
175 << argv[0] << " [--device <video>]"
176 << " [--model <dnn weights file>]"
177 << " [--type <dnn type>]"
178 << " [--config <dnn config file]"
179 << " [--framework <name>]"
180 << " [--width <blob width>] [--height <blob height>]"
181 << " [--mean <meanR meanG meanB>]"
182 << " [--scale <scale factor>]"
183 << " [--swapRB]"
184 << " [--confThresh <threshold>]"
185 << " [--nmsThresh <threshold>]"
186 << " [--filterThresh <threshold>]"
187 << " [--labels <file>]"
188 << " [--container <type>]"
189 << " [--input-json <path_to_input_json>]"
190 << " [--output-json <path_to_output_json>]"
191 << " [--step-by-step]"
192 << " [--verbose, -v]"
193 << " [--help, -h]" << std::endl;
194 std::cout << "\nOPTIONS " << std::endl
195 << " --device <video>" << std::endl
196 << " Camera device number or video name used to stream images." << std::endl
197 << " To use the first camera found on the bus set 0. On Ubuntu setting 0" << std::endl
198 << " will use /dev/video0 device. To use a video simply put the name of" << std::endl
199 << " the video, like \"path/my-video.mp4\" or \"path/image-%04d.png\"" << std::endl
200 << " if your video is a sequence of images." << std::endl
201 << " Default: " << opt_device << std::endl
202 << std::endl
203 << " --model <dnn weights file>" << std::endl
204 << " Path to dnn network trained weights." << std::endl
205 << " Default: " << opt_dnn_model << std::endl
206 << std::endl
207 << " --type <dnn type>" << std::endl
208 << " Type of dnn network. Admissible values are in " << std::endl
210 << " Default: " << opt_dnn_type << std::endl
211 << std::endl
212 << " --config <dnn config file>" << std::endl
213 << " Path to dnn network config file or \"none\" not to use one. " << std::endl
214 << " Default: " << opt_dnn_config << std::endl
215 << std::endl
216 << " --framework <name>" << std::endl
217 << " Framework name or \"none\" not to specify one. " << std::endl
218 << " Default: " << opt_dnn_framework << std::endl
219 << std::endl
220 << " --width <blob width>" << std::endl
221 << " Input images will be resized to this width. " << std::endl
222 << " Default: " << opt_dnn_width << std::endl
223 << std::endl
224 << " --height <blob height>" << std::endl
225 << " Input images will be resized to this height. " << std::endl
226 << " Default: " << opt_dnn_height << std::endl
227 << std::endl
228 << " --mean <meanR meanG meanB>" << std::endl
229 << " Mean RGB subtraction values. " << std::endl
230 << " Default: " << opt_dnn_meanR << " " << opt_dnn_meanG << " " << opt_dnn_meanB << std::endl
231 << std::endl
232 << " --scale <scale factor>" << std::endl
233 << " Scale factor used to normalize the range of pixel values. " << std::endl
234 << " Default: " << opt_dnn_scale_factor << std::endl
235 << std::endl
236 << " --swapRB" << std::endl
237 << " When used this option allows to swap Red and Blue channels. " << std::endl
238 << std::endl
239 << " --confThresh <threshold>" << std::endl
240 << " Confidence threshold. " << std::endl
241 << " Default: " << opt_dnn_confThresh << std::endl
242 << std::endl
243 << " --nmsThresh <threshold>" << std::endl
244 << " Non maximum suppression threshold. " << std::endl
245 << " Default: " << opt_dnn_nmsThresh << std::endl
246 << std::endl
247 << " --filterThresh <threshold >" << std::endl
248 << " Filter threshold. Set 0. to disable." << std::endl
249 << " Default: " << opt_dnn_filterThresh << std::endl
250 << std::endl
251 << " --labels <file>" << std::endl
252 << " Path to label file either in txt or yaml format. Keep empty if unknown." << std::endl
253 << " Default: \"" << opt_dnn_label_file << "\"" << std::endl
254 << std::endl
255 << " --container <type>" << std::endl
256 << " Container type in " << getAvailableDetectionContainer() << std::endl
257 << " Default: " << chosenDetectionContainerToString(opt_dnn_containerType) << std::endl
258 << std::endl
259 << " --input-json <path_to_input_json>" << std::endl
260 << " Input JSON file used to configure the DNN. If set, the other arguments will be used to override the values set in the json file." << std::endl
261 << " Default: empty" << std::endl
262 << std::endl
263 << " --output-json <type>" << std::endl
264 << " Output JSON file where will be saved the DNN configuration. If empty, does not save the configuration." << std::endl
265 << " Default: empty" << std::endl
266 << std::endl
267 << " --step-by-step" << std::endl
268 << " Enable step by step mode, waiting for a user click to process next image." << std::endl
269 << std::endl
270 << " --verbose, -v" << std::endl
271 << " Enable verbose mode." << std::endl
272 << std::endl
273 << " --help, -h" << std::endl
274 << " Display this helper message." << std::endl
275 << std::endl;
276 return EXIT_SUCCESS;
277 }
278 }
279
280 std::cout << "Video device : " << opt_device << std::endl;
281 std::cout << "Label file (optional): " << (opt_dnn_label_file.empty() ? "None" : opt_dnn_label_file) << std::endl;
282
283 cv::VideoCapture capture;
284 bool hasCaptureOpeningSucceeded;
285 if (vpMath::isNumber(opt_device)) {
286 hasCaptureOpeningSucceeded = capture.open(std::atoi(opt_device.c_str()));
287 }
288 else {
289 hasCaptureOpeningSucceeded = capture.open(opt_device);
290 }
291 if (!hasCaptureOpeningSucceeded) {
292 std::cout << "Capture from camera: " << opt_device << " didn't work" << std::endl;
293 return EXIT_FAILURE;
294 }
295
297 display->setDownScalingFactor(vpDisplay::SCALE_AUTO);
298
299 if (!opt_dnn_label_file.empty() && !vpIoTools::checkFilename(opt_dnn_label_file)) {
301 "The file containing the classes labels \"" + opt_dnn_label_file + "\" does not exist !"));
302 }
303
305#ifdef VISP_HAVE_NLOHMANN_JSON
306 if (!opt_input_json.empty()) {
308 dnn.initFromJSON(opt_input_json);
310 }
311#else
312 if (!opt_input_json.empty()) {
313 std::cerr << "Error: NLOHMANN JSON library is not installed, please install it following ViSP documentation to configure the vpDetectorDNNOpenCV from a JSON file." << std::endl;
314 return EXIT_FAILURE;
315 }
316#endif
317 else {
319 vpDetectorDNNOpenCV::NetConfig netConfig(opt_dnn_confThresh, opt_dnn_nmsThresh, opt_dnn_label_file
320 , cv::Size(opt_dnn_width, opt_dnn_height), opt_dnn_filterThresh, cv::Scalar(opt_dnn_meanR, opt_dnn_meanG, opt_dnn_meanB)
321 , opt_dnn_scale_factor, opt_dnn_swapRB, opt_dnn_type
322 , opt_dnn_model, opt_dnn_config, opt_dnn_framework
323 );
324 dnn.setNetConfig(netConfig);
326 }
327
328 std::cout << dnn.getNetConfig() << std::endl;
329
330#ifdef VISP_HAVE_NLOHMANN_JSON
331 if (!opt_output_json.empty()) {
332 dnn.saveConfigurationInJSON(opt_output_json);
333 }
334#else
335 if (!opt_output_json.empty()) {
336 std::cerr << "Error: NLOHMANN JSON library is not installed, please install it following ViSP documentation to save the configuration in a JSON file." << std::endl;
337 }
338#endif
339
340 cv::Mat frame;
341 while (true) {
342 capture >> frame;
343 if (frame.type() == CV_8UC4) {
344 // RGBa format is not supported by the class, converting to BGR format
345 cv::Mat cpy = frame;
346 cv::cvtColor(cpy, frame, cv::COLOR_RGBA2BGR);
347 }
348 if (frame.empty())
349 break;
350
351 if (I.getSize() == 0) {
352 vpImageConvert::convert(frame, I);
353 display->init(I);
354 vpDisplay::setTitle(I, "DNN object detection");
355 if (opt_verbose) {
356 std::cout << "Process image: " << I.getWidth() << " x " << I.getHeight() << std::endl;
357 }
358 }
359 else {
360 vpImageConvert::convert(frame, I);
361 }
362 if (opt_verbose) {
363 std::cout << "Process new image" << std::endl;
364 }
365
367
368 if (opt_dnn_containerType == DETECTION_CONTAINER_MAP || opt_dnn_containerType == DETECTION_CONTAINER_BOTH) {
369 double t = vpTime::measureTimeMs();
371 std::map<std::string, std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D> > detections;
372 dnn.detect(frame, detections);
375
377 for (auto key_val : detections) {
378 if (opt_verbose) {
379 std::cout << " Class name : " << key_val.first << std::endl;
380 }
381 for (vpDetectorDNNOpenCV::DetectedFeatures2D detection : key_val.second) {
382 if (opt_verbose) {
383 std::cout << " Bounding box : " << detection.getBoundingBox() << std::endl;
384 std::cout << " Class Id : " << detection.getClassId() << std::endl;
385 if (detection.getClassName())
386 std::cout << " Class name : " << detection.getClassName().value() << std::endl;
387 std::cout << " Confidence score: " << detection.getConfidenceScore() << std::endl;
388 }
389 detection.display(I);
390 }
391 }
393
394 std::ostringstream oss_map;
395 oss_map << "Detection time (map): " << t << " ms";
396 if (opt_verbose) {
397 // Displaying timing result in console
398 std::cout << " " << oss_map.str() << std::endl;
399 }
400 // Displaying timing result on the image
401 vpDisplay::displayText(I, 60, 20, oss_map.str(), vpColor::red);
402 }
403
404 if (opt_dnn_containerType == DETECTION_CONTAINER_VECTOR || opt_dnn_containerType == DETECTION_CONTAINER_BOTH) {
405 double t_vector = vpTime::measureTimeMs();
407 std::vector<vpDetectorDNNOpenCV::DetectedFeatures2D> detections_vec;
408 dnn.detect(frame, detections_vec);
410 t_vector = vpTime::measureTimeMs() - t_vector;
411
413 for (auto detection : detections_vec) {
414 if (opt_verbose) {
415 std::cout << " Bounding box : " << detection.getBoundingBox() << std::endl;
416 std::cout << " Class Id : " << detection.getClassId() << std::endl;
417 std::optional<std::string> classname_opt = detection.getClassName();
418 std::cout << " Class name : " << (classname_opt ? *classname_opt : "Not known") << std::endl;
419 std::cout << " Confidence score: " << detection.getConfidenceScore() << std::endl;
420 }
421 detection.display(I);
422 }
424
425 std::ostringstream oss_vec;
426 oss_vec << "Detection time (vector): " << t_vector << " ms";
427 if (opt_verbose) {
428 // Displaying timing result in console
429 std::cout << " " << oss_vec.str() << std::endl;
430 }
431 // Displaying timing result on the image
432 vpDisplay::displayText(I, 80, 20, oss_vec.str(), vpColor::red);
433 }
434
435 // // UI display
436 if (opt_step_by_step) {
437 vpDisplay::displayText(I, 20, 20, "Left click to display next image", vpColor::red);
438 }
439 vpDisplay::displayText(I, 40, 20, "Right click to quit", vpColor::red);
440
443
444 if (vpDisplay::getClick(I, button, opt_step_by_step)) {
445 if (button == vpMouseButton::button1) {
446 // Left click => next image
447 continue;
448 }
449 else if (button == vpMouseButton::button3) {
450 // Right click => stop the program
451 break;
452 }
453 }
454 }
455
456 }
457 catch (const vpException &e) {
458 std::cout << e.what() << std::endl;
459 }
460#if (VISP_CXX_STANDARD < VISP_CXX_STANDARD_11) && defined(VISP_HAVE_DISPLAY)
461 if (display != nullptr) {
462 delete display;
463 }
464#endif
465 return EXIT_SUCCESS;
466}
467
468#else
469
470int main()
471{
472#if !defined(HAVE_OPENCV_DNN)
473 std::cout << "This tutorial needs OpenCV dnn module that is missing." << std::endl;
474#endif
475#if !defined(HAVE_OPENCV_VIDEOIO)
476 std::cout << "This tutorial needs OpenCV videoio module that is missing." << std::endl;
477#endif
478#if (__cplusplus >= 201703L) || (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L))
479 std::cout << "This tutorial needs std::c++17 standard enabled." << std::endl;
480#endif
481 return EXIT_SUCCESS;
482}
483
484#endif
static const vpColor red
Definition vpColor.h:198
Structure containing the bounding box, expressed in pixels, confidence and class information about an...
void display(const vpImage< Type > &img, const vpColor &color=vpColor::blue, unsigned int thickness=1) const
std::optional< std::string > getClassName() const
Structure containing some information required for the configuration of a vpDetectorDNNOpenCV object.
void initFromJSON(const std::string &jsonPath)
Initialize detector from a json config file.
DNNResultsParsingType
Enumeration listing the types of DNN for which the vpDetectorDNNOpenCV furnishes the methods permitti...
static DNNResultsParsingType dnnResultsParsingTypeFromString(const std::string &name)
void setNetConfig(const NetConfig &config)
virtual bool detect(const vpImage< unsigned char > &I, std::vector< DetectedFeatures2D > &output)
Object detection using OpenCV DNN module.
const NetConfig & getNetConfig() const
static std::string getAvailableDnnResultsParsingTypes()
Get the list of the parsing methods / types of DNNs supported by the vpDetectorDNNOpenCV class.
void saveConfigurationInJSON(const std::string &jsonPath) const
Save the network configuration in a JSON file.
Class that defines generic functionalities for display.
Definition vpDisplay.h:171
static bool getClick(const vpImage< unsigned char > &I, bool blocking=true)
static void display(const vpImage< unsigned char > &I)
static void setTitle(const vpImage< unsigned char > &I, const std::string &windowtitle)
static void flush(const vpImage< unsigned char > &I)
static void displayText(const vpImage< unsigned char > &I, const vpImagePoint &ip, const std::string &s, const vpColor &color)
error that can be emitted by ViSP classes.
Definition vpException.h:60
@ fatalError
Fatal error.
Definition vpException.h:72
static void convert(const vpImage< unsigned char > &src, vpImage< vpRGBa > &dest)
Definition of the vpImage class member functions.
Definition vpImage.h:131
static std::string toLowerCase(const std::string &input)
Return a lower-case version of the string input . Numbers and special characters stay the same.
static bool checkFilename(const std::string &filename)
static bool isNumber(const std::string &str)
Definition vpMath.cpp:235
std::shared_ptr< vpDisplay > createDisplay()
Return a smart pointer vpDisplay specialization if a GUI library is available or nullptr otherwise.
vpDisplay * allocateDisplay()
Return a newly allocated vpDisplay specialization if a GUI library is available or nullptr otherwise.
VISP_EXPORT double measureTimeMs()