linemod.hpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. // This file is part of OpenCV project.
  2. // It is subject to the license terms in the LICENSE file found in the top-level directory
  3. // of this distribution and at http://opencv.org/license.html
  4. // This code is also subject to the license terms in the LICENSE_WillowGarage.md file found in this module's directory
  5. #ifndef __OPENCV_RGBD_LINEMOD_HPP__
  6. #define __OPENCV_RGBD_LINEMOD_HPP__
  7. #include "opencv2/core.hpp"
  8. #include <map>
  9. /****************************************************************************************\
  10. * LINE-MOD *
  11. \****************************************************************************************/
  12. namespace cv {
  13. namespace linemod {
  14. //! @addtogroup rgbd
  15. //! @{
  16. /**
  17. * \brief Discriminant feature described by its location and label.
  18. */
  19. struct CV_EXPORTS_W_SIMPLE Feature
  20. {
  21. CV_PROP_RW int x; ///< x offset
  22. CV_PROP_RW int y; ///< y offset
  23. CV_PROP_RW int label; ///< Quantization
  24. CV_WRAP Feature() : x(0), y(0), label(0) {}
  25. CV_WRAP Feature(int x, int y, int label);
  26. void read(const FileNode& fn);
  27. void write(FileStorage& fs) const;
  28. };
  29. inline Feature::Feature(int _x, int _y, int _label) : x(_x), y(_y), label(_label) {}
  30. struct CV_EXPORTS_W_SIMPLE Template
  31. {
  32. CV_PROP int width;
  33. CV_PROP int height;
  34. CV_PROP int pyramid_level;
  35. CV_PROP std::vector<Feature> features;
  36. void read(const FileNode& fn);
  37. void write(FileStorage& fs) const;
  38. };
  39. /**
  40. * \brief Represents a modality operating over an image pyramid.
  41. */
  42. class CV_EXPORTS_W QuantizedPyramid
  43. {
  44. public:
  45. // Virtual destructor
  46. virtual ~QuantizedPyramid() {}
  47. /**
  48. * \brief Compute quantized image at current pyramid level for online detection.
  49. *
  50. * \param[out] dst The destination 8-bit image. For each pixel at most one bit is set,
  51. * representing its classification.
  52. */
  53. CV_WRAP virtual void quantize(CV_OUT Mat& dst) const =0;
  54. /**
  55. * \brief Extract most discriminant features at current pyramid level to form a new template.
  56. *
  57. * \param[out] templ The new template.
  58. */
  59. CV_WRAP virtual bool extractTemplate(CV_OUT Template& templ) const =0;
  60. /**
  61. * \brief Go to the next pyramid level.
  62. *
  63. * \todo Allow pyramid scale factor other than 2
  64. */
  65. CV_WRAP virtual void pyrDown() =0;
  66. protected:
  67. /// Candidate feature with a score
  68. struct Candidate
  69. {
  70. Candidate(int x, int y, int label, float score);
  71. /// Sort candidates with high score to the front
  72. bool operator<(const Candidate& rhs) const
  73. {
  74. return score > rhs.score;
  75. }
  76. Feature f;
  77. float score;
  78. };
  79. /**
  80. * \brief Choose candidate features so that they are not bunched together.
  81. *
  82. * \param[in] candidates Candidate features sorted by score.
  83. * \param[out] features Destination vector of selected features.
  84. * \param[in] num_features Number of candidates to select.
  85. * \param[in] distance Hint for desired distance between features.
  86. */
  87. static void selectScatteredFeatures(const std::vector<Candidate>& candidates,
  88. std::vector<Feature>& features,
  89. size_t num_features, float distance);
  90. };
  91. inline QuantizedPyramid::Candidate::Candidate(int x, int y, int label, float _score) : f(x, y, label), score(_score) {}
  92. /**
  93. * \brief Interface for modalities that plug into the LINE template matching representation.
  94. *
  95. * \todo Max response, to allow optimization of summing (255/MAX) features as uint8
  96. */
  97. class CV_EXPORTS_W Modality
  98. {
  99. public:
  100. // Virtual destructor
  101. virtual ~Modality() {}
  102. /**
  103. * \brief Form a quantized image pyramid from a source image.
  104. *
  105. * \param[in] src The source image. Type depends on the modality.
  106. * \param[in] mask Optional mask. If not empty, unmasked pixels are set to zero
  107. * in quantized image and cannot be extracted as features.
  108. */
  109. CV_WRAP Ptr<QuantizedPyramid> process(const Mat& src,
  110. const Mat& mask = Mat()) const
  111. {
  112. return processImpl(src, mask);
  113. }
  114. CV_WRAP virtual String name() const =0;
  115. CV_WRAP virtual void read(const FileNode& fn) =0;
  116. virtual void write(FileStorage& fs) const =0;
  117. /**
  118. * \brief Create modality by name.
  119. *
  120. * The following modality types are supported:
  121. * - "ColorGradient"
  122. * - "DepthNormal"
  123. */
  124. CV_WRAP static Ptr<Modality> create(const String& modality_type);
  125. /**
  126. * \brief Load a modality from file.
  127. */
  128. CV_WRAP static Ptr<Modality> create(const FileNode& fn);
  129. protected:
  130. // Indirection is because process() has a default parameter.
  131. virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
  132. const Mat& mask) const =0;
  133. };
  134. /**
  135. * \brief Modality that computes quantized gradient orientations from a color image.
  136. */
  137. class CV_EXPORTS_W ColorGradient : public Modality
  138. {
  139. public:
  140. /**
  141. * \brief Default constructor. Uses reasonable default parameter values.
  142. */
  143. ColorGradient();
  144. /**
  145. * \brief Constructor.
  146. *
  147. * \param weak_threshold When quantizing, discard gradients with magnitude less than this.
  148. * \param num_features How many features a template must contain.
  149. * \param strong_threshold Consider as candidate features only gradients whose norms are
  150. * larger than this.
  151. */
  152. ColorGradient(float weak_threshold, size_t num_features, float strong_threshold);
  153. CV_WRAP static Ptr<ColorGradient> create(float weak_threshold, size_t num_features, float strong_threshold);
  154. virtual String name() const CV_OVERRIDE;
  155. virtual void read(const FileNode& fn) CV_OVERRIDE;
  156. virtual void write(FileStorage& fs) const CV_OVERRIDE;
  157. CV_PROP float weak_threshold;
  158. CV_PROP size_t num_features;
  159. CV_PROP float strong_threshold;
  160. protected:
  161. virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
  162. const Mat& mask) const CV_OVERRIDE;
  163. };
  164. /**
  165. * \brief Modality that computes quantized surface normals from a dense depth map.
  166. */
  167. class CV_EXPORTS_W DepthNormal : public Modality
  168. {
  169. public:
  170. /**
  171. * \brief Default constructor. Uses reasonable default parameter values.
  172. */
  173. DepthNormal();
  174. /**
  175. * \brief Constructor.
  176. *
  177. * \param distance_threshold Ignore pixels beyond this distance.
  178. * \param difference_threshold When computing normals, ignore contributions of pixels whose
  179. * depth difference with the central pixel is above this threshold.
  180. * \param num_features How many features a template must contain.
  181. * \param extract_threshold Consider as candidate feature only if there are no differing
  182. * orientations within a distance of extract_threshold.
  183. */
  184. DepthNormal(int distance_threshold, int difference_threshold, size_t num_features,
  185. int extract_threshold);
  186. CV_WRAP static Ptr<DepthNormal> create(int distance_threshold, int difference_threshold,
  187. size_t num_features, int extract_threshold);
  188. virtual String name() const CV_OVERRIDE;
  189. virtual void read(const FileNode& fn) CV_OVERRIDE;
  190. virtual void write(FileStorage& fs) const CV_OVERRIDE;
  191. CV_PROP int distance_threshold;
  192. CV_PROP int difference_threshold;
  193. CV_PROP size_t num_features;
  194. CV_PROP int extract_threshold;
  195. protected:
  196. virtual Ptr<QuantizedPyramid> processImpl(const Mat& src,
  197. const Mat& mask) const CV_OVERRIDE;
  198. };
  199. /**
  200. * \brief Debug function to colormap a quantized image for viewing.
  201. */
  202. CV_EXPORTS_W void colormap(const Mat& quantized, CV_OUT Mat& dst);
  203. /**
  204. * \brief Debug function to draw linemod features
  205. * @param img
  206. * @param templates see @ref Detector::addTemplate
  207. * @param tl template bbox top-left offset see @ref Detector::addTemplate
  208. * @param size marker size see @ref cv::drawMarker
  209. */
  210. CV_EXPORTS_W void drawFeatures(InputOutputArray img, const std::vector<Template>& templates, const Point2i& tl, int size = 10);
  211. /**
  212. * \brief Represents a successful template match.
  213. */
  214. struct CV_EXPORTS_W_SIMPLE Match
  215. {
  216. CV_WRAP Match()
  217. {
  218. }
  219. CV_WRAP Match(int x, int y, float similarity, const String& class_id, int template_id);
  220. /// Sort matches with high similarity to the front
  221. bool operator<(const Match& rhs) const
  222. {
  223. // Secondarily sort on template_id for the sake of duplicate removal
  224. if (similarity != rhs.similarity)
  225. return similarity > rhs.similarity;
  226. else
  227. return template_id < rhs.template_id;
  228. }
  229. bool operator==(const Match& rhs) const
  230. {
  231. return x == rhs.x && y == rhs.y && similarity == rhs.similarity && class_id == rhs.class_id;
  232. }
  233. CV_PROP_RW int x;
  234. CV_PROP_RW int y;
  235. CV_PROP_RW float similarity;
  236. CV_PROP_RW String class_id;
  237. CV_PROP_RW int template_id;
  238. };
  239. inline
  240. Match::Match(int _x, int _y, float _similarity, const String& _class_id, int _template_id)
  241. : x(_x), y(_y), similarity(_similarity), class_id(_class_id), template_id(_template_id)
  242. {}
  243. /**
  244. * \brief Object detector using the LINE template matching algorithm with any set of
  245. * modalities.
  246. */
  247. class CV_EXPORTS_W Detector
  248. {
  249. public:
  250. /**
  251. * \brief Empty constructor, initialize with read().
  252. */
  253. CV_WRAP Detector();
  254. /**
  255. * \brief Constructor.
  256. *
  257. * \param modalities Modalities to use (color gradients, depth normals, ...).
  258. * \param T_pyramid Value of the sampling step T at each pyramid level. The
  259. * number of pyramid levels is T_pyramid.size().
  260. */
  261. CV_WRAP Detector(const std::vector< Ptr<Modality> >& modalities, const std::vector<int>& T_pyramid);
  262. /**
  263. * \brief Detect objects by template matching.
  264. *
  265. * Matches globally at the lowest pyramid level, then refines locally stepping up the pyramid.
  266. *
  267. * \param sources Source images, one for each modality.
  268. * \param threshold Similarity threshold, a percentage between 0 and 100.
  269. * \param[out] matches Template matches, sorted by similarity score.
  270. * \param class_ids If non-empty, only search for the desired object classes.
  271. * \param[out] quantized_images Optionally return vector<Mat> of quantized images.
  272. * \param masks The masks for consideration during matching. The masks should be CV_8UC1
  273. * where 255 represents a valid pixel. If non-empty, the vector must be
  274. * the same size as sources. Each element must be
  275. * empty or the same size as its corresponding source.
  276. */
  277. CV_WRAP void match(const std::vector<Mat>& sources, float threshold, CV_OUT std::vector<Match>& matches,
  278. const std::vector<String>& class_ids = std::vector<String>(),
  279. OutputArrayOfArrays quantized_images = noArray(),
  280. const std::vector<Mat>& masks = std::vector<Mat>()) const;
  281. /**
  282. * \brief Add new object template.
  283. *
  284. * \param sources Source images, one for each modality.
  285. * \param class_id Object class ID.
  286. * \param object_mask Mask separating object from background.
  287. * \param[out] bounding_box Optionally return bounding box of the extracted features.
  288. *
  289. * \return Template ID, or -1 if failed to extract a valid template.
  290. */
  291. CV_WRAP int addTemplate(const std::vector<Mat>& sources, const String& class_id,
  292. const Mat& object_mask, CV_OUT Rect* bounding_box = NULL);
  293. /**
  294. * \brief Add a new object template computed by external means.
  295. */
  296. CV_WRAP int addSyntheticTemplate(const std::vector<Template>& templates, const String& class_id);
  297. /**
  298. * \brief Get the modalities used by this detector.
  299. *
  300. * You are not permitted to add/remove modalities, but you may dynamic_cast them to
  301. * tweak parameters.
  302. */
  303. CV_WRAP const std::vector< Ptr<Modality> >& getModalities() const { return modalities; }
  304. /**
  305. * \brief Get sampling step T at pyramid_level.
  306. */
  307. CV_WRAP int getT(int pyramid_level) const { return T_at_level[pyramid_level]; }
  308. /**
  309. * \brief Get number of pyramid levels used by this detector.
  310. */
  311. CV_WRAP int pyramidLevels() const { return pyramid_levels; }
  312. /**
  313. * \brief Get the template pyramid identified by template_id.
  314. *
  315. * For example, with 2 modalities (Gradient, Normal) and two pyramid levels
  316. * (L0, L1), the order is (GradientL0, NormalL0, GradientL1, NormalL1).
  317. */
  318. CV_WRAP const std::vector<Template>& getTemplates(const String& class_id, int template_id) const;
  319. CV_WRAP int numTemplates() const;
  320. CV_WRAP int numTemplates(const String& class_id) const;
  321. CV_WRAP int numClasses() const { return static_cast<int>(class_templates.size()); }
  322. CV_WRAP std::vector<String> classIds() const;
  323. CV_WRAP void read(const FileNode& fn);
  324. void write(FileStorage& fs) const;
  325. String readClass(const FileNode& fn, const String &class_id_override = "");
  326. void writeClass(const String& class_id, FileStorage& fs) const;
  327. CV_WRAP void readClasses(const std::vector<String>& class_ids,
  328. const String& format = "templates_%s.yml.gz");
  329. CV_WRAP void writeClasses(const String& format = "templates_%s.yml.gz") const;
  330. protected:
  331. std::vector< Ptr<Modality> > modalities;
  332. int pyramid_levels;
  333. std::vector<int> T_at_level;
  334. typedef std::vector<Template> TemplatePyramid;
  335. typedef std::map<String, std::vector<TemplatePyramid> > TemplatesMap;
  336. TemplatesMap class_templates;
  337. typedef std::vector<Mat> LinearMemories;
  338. // Indexed as [pyramid level][modality][quantized label]
  339. typedef std::vector< std::vector<LinearMemories> > LinearMemoryPyramid;
  340. void matchClass(const LinearMemoryPyramid& lm_pyramid,
  341. const std::vector<Size>& sizes,
  342. float threshold, std::vector<Match>& matches,
  343. const String& class_id,
  344. const std::vector<TemplatePyramid>& template_pyramids) const;
  345. };
  346. /**
  347. * \brief Factory function for detector using LINE algorithm with color gradients.
  348. *
  349. * Default parameter settings suitable for VGA images.
  350. */
  351. CV_EXPORTS_W Ptr<linemod::Detector> getDefaultLINE();
  352. /**
  353. * \brief Factory function for detector using LINE-MOD algorithm with color gradients
  354. * and depth normals.
  355. *
  356. * Default parameter settings suitable for VGA images.
  357. */
  358. CV_EXPORTS_W Ptr<linemod::Detector> getDefaultLINEMOD();
  359. //! @}
  360. } // namespace linemod
  361. } // namespace cv
  362. #endif // __OPENCV_OBJDETECT_LINEMOD_HPP__