all_layers.hpp 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012
  1. /*M///////////////////////////////////////////////////////////////////////////////////////
  2. //
  3. // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
  4. //
  5. // By downloading, copying, installing or using the software you agree to this license.
  6. // If you do not agree to this license, do not download, install,
  7. // copy or use the software.
  8. //
  9. //
  10. // License Agreement
  11. // For Open Source Computer Vision Library
  12. //
  13. // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
  14. // Third party copyrights are property of their respective owners.
  15. //
  16. // Redistribution and use in source and binary forms, with or without modification,
  17. // are permitted provided that the following conditions are met:
  18. //
  19. // * Redistribution's of source code must retain the above copyright notice,
  20. // this list of conditions and the following disclaimer.
  21. //
  22. // * Redistribution's in binary form must reproduce the above copyright notice,
  23. // this list of conditions and the following disclaimer in the documentation
  24. // and/or other materials provided with the distribution.
  25. //
  26. // * The name of the copyright holders may not be used to endorse or promote products
  27. // derived from this software without specific prior written permission.
  28. //
  29. // This software is provided by the copyright holders and contributors "as is" and
  30. // any express or implied warranties, including, but not limited to, the implied
  31. // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32. // In no event shall the Intel Corporation or contributors be liable for any direct,
  33. // indirect, incidental, special, exemplary, or consequential damages
  34. // (including, but not limited to, procurement of substitute goods or services;
  35. // loss of use, data, or profits; or business interruption) however caused
  36. // and on any theory of liability, whether in contract, strict liability,
  37. // or tort (including negligence or otherwise) arising in any way out of
  38. // the use of this software, even if advised of the possibility of such damage.
  39. //
  40. //M*/
  41. #ifndef OPENCV_DNN_DNN_ALL_LAYERS_HPP
  42. #define OPENCV_DNN_DNN_ALL_LAYERS_HPP
  43. #include <opencv2/dnn.hpp>
  44. namespace cv {
  45. namespace dnn {
  46. CV__DNN_INLINE_NS_BEGIN
  47. //! @addtogroup dnn
  48. //! @{
  49. /** @defgroup dnnLayerList Partial List of Implemented Layers
  50. @{
  51. This subsection of dnn module contains information about built-in layers and their descriptions.
  52. Classes listed here, in fact, provides C++ API for creating instances of built-in layers.
  53. In addition to this way of layers instantiation, there is a more common factory API (see @ref dnnLayerFactory), it allows to create layers dynamically (by name) and register new ones.
  54. You can use both API, but factory API is less convenient for native C++ programming and basically designed for use inside importers (see @ref readNetFromCaffe(), @ref readNetFromTorch(), @ref readNetFromTensorflow()).
  55. Built-in layers partially reproduce functionality of corresponding Caffe and Torch7 layers.
  56. In particular, the following layers and Caffe importer were tested to reproduce <a href="http://caffe.berkeleyvision.org/tutorial/layers.html">Caffe</a> functionality:
  57. - Convolution
  58. - Deconvolution
  59. - Pooling
  60. - InnerProduct
  61. - TanH, ReLU, Sigmoid, BNLL, Power, AbsVal
  62. - Softmax
  63. - Reshape, Flatten, Slice, Split
  64. - LRN
  65. - MVN
  66. - Dropout (since it does nothing on forward pass -))
  67. */
  68. class CV_EXPORTS BlankLayer : public Layer
  69. {
  70. public:
  71. static Ptr<Layer> create(const LayerParams &params);
  72. };
  73. /**
  74. * Constant layer produces the same data blob at an every forward pass.
  75. */
  76. class CV_EXPORTS ConstLayer : public Layer
  77. {
  78. public:
  79. static Ptr<Layer> create(const LayerParams &params);
  80. };
  81. //! LSTM recurrent layer
  82. class CV_EXPORTS LSTMLayer : public Layer
  83. {
  84. public:
  85. /** Creates instance of LSTM layer */
  86. static Ptr<LSTMLayer> create(const LayerParams& params);
  87. /** @deprecated Use LayerParams::blobs instead.
  88. @brief Set trained weights for LSTM layer.
  89. LSTM behavior on each step is defined by current input, previous output, previous cell state and learned weights.
  90. Let @f$x_t@f$ be current input, @f$h_t@f$ be current output, @f$c_t@f$ be current state.
  91. Than current output and current cell state is computed as follows:
  92. @f{eqnarray*}{
  93. h_t &= o_t \odot tanh(c_t), \\
  94. c_t &= f_t \odot c_{t-1} + i_t \odot g_t, \\
  95. @f}
  96. where @f$\odot@f$ is per-element multiply operation and @f$i_t, f_t, o_t, g_t@f$ is internal gates that are computed using learned weights.
  97. Gates are computed as follows:
  98. @f{eqnarray*}{
  99. i_t &= sigmoid&(W_{xi} x_t + W_{hi} h_{t-1} + b_i), \\
  100. f_t &= sigmoid&(W_{xf} x_t + W_{hf} h_{t-1} + b_f), \\
  101. o_t &= sigmoid&(W_{xo} x_t + W_{ho} h_{t-1} + b_o), \\
  102. g_t &= tanh &(W_{xg} x_t + W_{hg} h_{t-1} + b_g), \\
  103. @f}
  104. where @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
  105. @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
  106. For simplicity and performance purposes we use @f$ W_x = [W_{xi}; W_{xf}; W_{xo}, W_{xg}] @f$
  107. (i.e. @f$W_x@f$ is vertical concatenation of @f$ W_{x?} @f$), @f$ W_x \in R^{4N_h \times N_x} @f$.
  108. The same for @f$ W_h = [W_{hi}; W_{hf}; W_{ho}, W_{hg}], W_h \in R^{4N_h \times N_h} @f$
  109. and for @f$ b = [b_i; b_f, b_o, b_g]@f$, @f$b \in R^{4N_h} @f$.
  110. @param Wh is matrix defining how previous output is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_h @f$)
  111. @param Wx is matrix defining how current input is transformed to internal gates (i.e. according to above mentioned notation is @f$ W_x @f$)
  112. @param b is bias vector (i.e. according to above mentioned notation is @f$ b @f$)
  113. */
  114. CV_DEPRECATED virtual void setWeights(const Mat &Wh, const Mat &Wx, const Mat &b) = 0;
  115. /** @brief Specifies shape of output blob which will be [[`T`], `N`] + @p outTailShape.
  116. * @details If this parameter is empty or unset then @p outTailShape = [`Wh`.size(0)] will be used,
  117. * where `Wh` is parameter from setWeights().
  118. */
  119. virtual void setOutShape(const MatShape &outTailShape = MatShape()) = 0;
  120. /** @deprecated Use flag `produce_cell_output` in LayerParams.
  121. * @brief Specifies either interpret first dimension of input blob as timestamp dimension either as sample.
  122. *
  123. * If flag is set to true then shape of input blob will be interpreted as [`T`, `N`, `[data dims]`] where `T` specifies number of timestamps, `N` is number of independent streams.
  124. * In this case each forward() call will iterate through `T` timestamps and update layer's state `T` times.
  125. *
  126. * If flag is set to false then shape of input blob will be interpreted as [`N`, `[data dims]`].
  127. * In this case each forward() call will make one iteration and produce one timestamp with shape [`N`, `[out dims]`].
  128. */
  129. CV_DEPRECATED virtual void setUseTimstampsDim(bool use = true) = 0;
  130. /** @deprecated Use flag `use_timestamp_dim` in LayerParams.
  131. * @brief If this flag is set to true then layer will produce @f$ c_t @f$ as second output.
  132. * @details Shape of the second output is the same as first output.
  133. */
  134. CV_DEPRECATED virtual void setProduceCellOutput(bool produce = false) = 0;
  135. /* In common case it use single input with @f$x_t@f$ values to compute output(s) @f$h_t@f$ (and @f$c_t@f$).
  136. * @param input should contain packed values @f$x_t@f$
  137. * @param output contains computed outputs: @f$h_t@f$ (and @f$c_t@f$ if setProduceCellOutput() flag was set to true).
  138. *
  139. * If setUseTimstampsDim() is set to true then @p input[0] should has at least two dimensions with the following shape: [`T`, `N`, `[data dims]`],
  140. * where `T` specifies number of timestamps, `N` is number of independent streams (i.e. @f$ x_{t_0 + t}^{stream} @f$ is stored inside @p input[0][t, stream, ...]).
  141. *
  142. * If setUseTimstampsDim() is set to false then @p input[0] should contain single timestamp, its shape should has form [`N`, `[data dims]`] with at least one dimension.
  143. * (i.e. @f$ x_{t}^{stream} @f$ is stored inside @p input[0][stream, ...]).
  144. */
  145. int inputNameToIndex(String inputName) CV_OVERRIDE;
  146. int outputNameToIndex(const String& outputName) CV_OVERRIDE;
  147. };
  148. /** @brief GRU recurrent one-layer
  149. *
  150. * Accepts input sequence and computes the final hidden state for each element in the batch.
  151. *
  152. * - input[0] containing the features of the input sequence.
  153. * input[0] should have shape [`T`, `N`, `data_dims`] where `T` is sequence length, `N` is batch size, `data_dims` is input size
  154. * - output would have shape [`T`, `N`, `D` * `hidden_size`] where `D = 2` if layer is bidirectional otherwise `D = 1`
  155. *
  156. * Depends on the following attributes:
  157. * - hidden_size - Number of neurons in the hidden layer
  158. * - direction - RNN could be bidirectional or forward
  159. *
  160. * The final hidden state @f$ h_t @f$ computes by the following formulas:
  161. *
  162. @f{eqnarray*}{
  163. r_t = \sigma(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\
  164. z_t = \sigma(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\
  165. n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn})) \\
  166. h_t = (1 - z_t) \odot n_t + z_t \odot h_{(t-1)} \\
  167. @f}
  168. * Where @f$x_t@f$ is current input, @f$h_{(t-1)}@f$ is previous or initial hidden state.
  169. *
  170. * @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices:
  171. * @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$.
  172. *
  173. * @f$\odot@f$ is per-element multiply operation.
  174. */
  175. class CV_EXPORTS GRULayer : public Layer
  176. {
  177. public:
  178. /** Creates instance of GRU layer */
  179. static Ptr<GRULayer> create(const LayerParams& params);
  180. };
  181. /** @brief Classical recurrent layer
  182. Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$.
  183. - input: should contain packed input @f$x_t@f$.
  184. - output: should contain output @f$o_t@f$ (and @f$h_t@f$ if setProduceHiddenOutput() is set to true).
  185. input[0] should have shape [`T`, `N`, `data_dims`] where `T` and `N` is number of timestamps and number of independent samples of @f$x_t@f$ respectively.
  186. output[0] will have shape [`T`, `N`, @f$N_o@f$], where @f$N_o@f$ is number of rows in @f$ W_{xo} @f$ matrix.
  187. If setProduceHiddenOutput() is set to true then @p output[1] will contain a Mat with shape [`T`, `N`, @f$N_h@f$], where @f$N_h@f$ is number of rows in @f$ W_{hh} @f$ matrix.
  188. */
  189. class CV_EXPORTS RNNLayer : public Layer
  190. {
  191. public:
  192. /** Creates instance of RNNLayer */
  193. static Ptr<RNNLayer> create(const LayerParams& params);
  194. /** Setups learned weights.
  195. Recurrent-layer behavior on each step is defined by current input @f$ x_t @f$, previous state @f$ h_t @f$ and learned weights as follows:
  196. @f{eqnarray*}{
  197. h_t &= tanh&(W_{hh} h_{t-1} + W_{xh} x_t + b_h), \\
  198. o_t &= tanh&(W_{ho} h_t + b_o),
  199. @f}
  200. @param Wxh is @f$ W_{xh} @f$ matrix
  201. @param bh is @f$ b_{h} @f$ vector
  202. @param Whh is @f$ W_{hh} @f$ matrix
  203. @param Who is @f$ W_{xo} @f$ matrix
  204. @param bo is @f$ b_{o} @f$ vector
  205. */
  206. virtual void setWeights(const Mat &Wxh, const Mat &bh, const Mat &Whh, const Mat &Who, const Mat &bo) = 0;
  207. /** @brief If this flag is set to true then layer will produce @f$ h_t @f$ as second output.
  208. * @details Shape of the second output is the same as first output.
  209. */
  210. virtual void setProduceHiddenOutput(bool produce = false) = 0;
  211. };
  212. class CV_EXPORTS BaseConvolutionLayer : public Layer
  213. {
  214. public:
  215. CV_DEPRECATED_EXTERNAL Size kernel, stride, pad, dilation, adjustPad;
  216. std::vector<size_t> adjust_pads;
  217. std::vector<size_t> kernel_size, strides, dilations;
  218. std::vector<size_t> pads_begin, pads_end;
  219. String padMode;
  220. int numOutput;
  221. };
  222. class CV_EXPORTS ConvolutionLayer : public BaseConvolutionLayer
  223. {
  224. public:
  225. static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
  226. };
  227. class CV_EXPORTS ConvolutionLayerInt8 : public BaseConvolutionLayer
  228. {
  229. public:
  230. int input_zp, output_zp;
  231. float output_sc;
  232. static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
  233. };
  234. class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer
  235. {
  236. public:
  237. static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
  238. };
  239. class CV_EXPORTS LRNLayer : public Layer
  240. {
  241. public:
  242. int type;
  243. int size;
  244. float alpha, beta, bias;
  245. bool normBySize;
  246. static Ptr<LRNLayer> create(const LayerParams& params);
  247. };
  248. /** @brief ArgMax/ArgMin layer
  249. * @note returns indices as floats, which means the supported range is [-2^24; 2^24]
  250. */
  251. class CV_EXPORTS ArgLayer : public Layer
  252. {
  253. public:
  254. static Ptr<ArgLayer> create(const LayerParams& params);
  255. };
  256. class CV_EXPORTS PoolingLayer : public Layer
  257. {
  258. public:
  259. int type;
  260. std::vector<size_t> kernel_size, strides;
  261. std::vector<size_t> pads_begin, pads_end;
  262. bool globalPooling; //!< Flag is true if at least one of the axes is global pooled.
  263. std::vector<bool> isGlobalPooling;
  264. bool computeMaxIdx;
  265. String padMode;
  266. bool ceilMode;
  267. // If true for average pooling with padding, divide an every output region
  268. // by a whole kernel area. Otherwise exclude zero padded values and divide
  269. // by number of real values.
  270. bool avePoolPaddedArea;
  271. // ROIPooling parameters.
  272. Size pooledSize;
  273. float spatialScale;
  274. // PSROIPooling parameters.
  275. int psRoiOutChannels;
  276. static Ptr<PoolingLayer> create(const LayerParams& params);
  277. };
  278. class CV_EXPORTS PoolingLayerInt8 : public PoolingLayer
  279. {
  280. public:
  281. int input_zp, output_zp;
  282. static Ptr<PoolingLayerInt8> create(const LayerParams& params);
  283. };
  284. class CV_EXPORTS SoftmaxLayer : public Layer
  285. {
  286. public:
  287. bool logSoftMax;
  288. static Ptr<SoftmaxLayer> create(const LayerParams& params);
  289. };
  290. class CV_EXPORTS SoftmaxLayerInt8 : public SoftmaxLayer
  291. {
  292. public:
  293. float output_sc;
  294. int output_zp;
  295. static Ptr<SoftmaxLayerInt8> create(const LayerParams& params);
  296. };
  297. class CV_EXPORTS InnerProductLayer : public Layer
  298. {
  299. public:
  300. int axis;
  301. static Ptr<InnerProductLayer> create(const LayerParams& params);
  302. };
  303. class CV_EXPORTS InnerProductLayerInt8 : public InnerProductLayer
  304. {
  305. public:
  306. int output_zp;
  307. static Ptr<InnerProductLayerInt8> create(const LayerParams& params);
  308. };
  309. class CV_EXPORTS MVNLayer : public Layer
  310. {
  311. public:
  312. float eps;
  313. bool normVariance, acrossChannels;
  314. static Ptr<MVNLayer> create(const LayerParams& params);
  315. };
  316. /* Reshaping */
  317. class CV_EXPORTS ReshapeLayer : public Layer
  318. {
  319. public:
  320. MatShape newShapeDesc;
  321. Range newShapeRange;
  322. static Ptr<ReshapeLayer> create(const LayerParams& params);
  323. };
  324. class CV_EXPORTS FlattenLayer : public Layer
  325. {
  326. public:
  327. static Ptr<FlattenLayer> create(const LayerParams &params);
  328. };
  329. class CV_EXPORTS QuantizeLayer : public Layer
  330. {
  331. public:
  332. float scale;
  333. int zeropoint;
  334. static Ptr<QuantizeLayer> create(const LayerParams &params);
  335. };
  336. class CV_EXPORTS DequantizeLayer : public Layer
  337. {
  338. public:
  339. float scale;
  340. int zeropoint;
  341. static Ptr<DequantizeLayer> create(const LayerParams &params);
  342. };
  343. class CV_EXPORTS RequantizeLayer : public Layer
  344. {
  345. public:
  346. float scale, shift;
  347. static Ptr<RequantizeLayer> create(const LayerParams &params);
  348. };
  349. class CV_EXPORTS ConcatLayer : public Layer
  350. {
  351. public:
  352. int axis;
  353. /**
  354. * @brief Add zero padding in case of concatenation of blobs with different
  355. * spatial sizes.
  356. *
  357. * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat
  358. */
  359. bool padding;
  360. int paddingValue;
  361. static Ptr<ConcatLayer> create(const LayerParams &params);
  362. };
  363. class CV_EXPORTS SplitLayer : public Layer
  364. {
  365. public:
  366. int outputsCount; //!< Number of copies that will be produced (is ignored when negative).
  367. static Ptr<SplitLayer> create(const LayerParams &params);
  368. };
  369. /**
  370. * Slice layer has several modes:
  371. * 1. Caffe mode
  372. * @param[in] axis Axis of split operation
  373. * @param[in] slice_point Array of split points
  374. *
  375. * Number of output blobs equals to number of split points plus one. The
  376. * first blob is a slice on input from 0 to @p slice_point[0] - 1 by @p axis,
  377. * the second output blob is a slice of input from @p slice_point[0] to
  378. * @p slice_point[1] - 1 by @p axis and the last output blob is a slice of
  379. * input from @p slice_point[-1] up to the end of @p axis size.
  380. *
  381. * 2. TensorFlow mode
  382. * @param begin Vector of start indices
  383. * @param size Vector of sizes
  384. *
  385. * More convenient numpy-like slice. One and only output blob
  386. * is a slice `input[begin[0]:begin[0]+size[0], begin[1]:begin[1]+size[1], ...]`
  387. *
  388. * 3. Torch mode
  389. * @param axis Axis of split operation
  390. *
  391. * Split input blob on the equal parts by @p axis.
  392. */
  393. class CV_EXPORTS SliceLayer : public Layer
  394. {
  395. public:
  396. /**
  397. * @brief Vector of slice ranges.
  398. *
  399. * The first dimension equals number of output blobs.
  400. * Inner vector has slice ranges for the first number of input dimensions.
  401. */
  402. std::vector<std::vector<Range> > sliceRanges;
  403. std::vector<std::vector<int> > sliceSteps;
  404. int axis;
  405. int num_split;
  406. static Ptr<SliceLayer> create(const LayerParams &params);
  407. };
  408. class CV_EXPORTS PermuteLayer : public Layer
  409. {
  410. public:
  411. static Ptr<PermuteLayer> create(const LayerParams& params);
  412. };
  413. /**
  414. * Permute channels of 4-dimensional input blob.
  415. * @param group Number of groups to split input channels and pick in turns
  416. * into output blob.
  417. *
  418. * \f[ groupSize = \frac{number\ of\ channels}{group} \f]
  419. * \f[ output(n, c, h, w) = input(n, groupSize \times (c \% group) + \lfloor \frac{c}{group} \rfloor, h, w) \f]
  420. * Read more at https://arxiv.org/pdf/1707.01083.pdf
  421. */
  422. class CV_EXPORTS ShuffleChannelLayer : public Layer
  423. {
  424. public:
  425. static Ptr<Layer> create(const LayerParams& params);
  426. int group;
  427. };
  428. /**
  429. * @brief Adds extra values for specific axes.
  430. * @param paddings Vector of paddings in format
  431. * @code
  432. * [ pad_before, pad_after, // [0]th dimension
  433. * pad_before, pad_after, // [1]st dimension
  434. * ...
  435. * pad_before, pad_after ] // [n]th dimension
  436. * @endcode
  437. * that represents number of padded values at every dimension
  438. * starting from the first one. The rest of dimensions won't
  439. * be padded.
  440. * @param value Value to be padded. Defaults to zero.
  441. * @param type Padding type: 'constant', 'reflect'
  442. * @param input_dims Torch's parameter. If @p input_dims is not equal to the
  443. * actual input dimensionality then the `[0]th` dimension
  444. * is considered as a batch dimension and @p paddings are shifted
  445. * to a one dimension. Defaults to `-1` that means padding
  446. * corresponding to @p paddings.
  447. */
  448. class CV_EXPORTS PaddingLayer : public Layer
  449. {
  450. public:
  451. static Ptr<PaddingLayer> create(const LayerParams& params);
  452. };
  453. /* Activations */
  454. class CV_EXPORTS ActivationLayer : public Layer
  455. {
  456. public:
  457. virtual void forwardSlice(const float* src, float* dst, int len,
  458. size_t outPlaneSize, int cn0, int cn1) const {};
  459. virtual void forwardSlice(const int* src, const int* lut, int* dst, int len,
  460. size_t outPlaneSize, int cn0, int cn1) const {};
  461. virtual void forwardSlice(const int8_t* src, const int8_t* lut, int8_t* dst, int len,
  462. size_t outPlaneSize, int cn0, int cn1) const {};
  463. };
  464. class CV_EXPORTS ReLULayer : public ActivationLayer
  465. {
  466. public:
  467. float negativeSlope;
  468. static Ptr<ReLULayer> create(const LayerParams &params);
  469. };
  470. class CV_EXPORTS ReLU6Layer : public ActivationLayer
  471. {
  472. public:
  473. float minValue, maxValue;
  474. static Ptr<ReLU6Layer> create(const LayerParams &params);
  475. };
  476. class CV_EXPORTS ChannelsPReLULayer : public ActivationLayer
  477. {
  478. public:
  479. static Ptr<Layer> create(const LayerParams& params);
  480. };
  481. class CV_EXPORTS ELULayer : public ActivationLayer
  482. {
  483. public:
  484. float alpha;
  485. static Ptr<ELULayer> create(const LayerParams &params);
  486. };
  487. class CV_EXPORTS TanHLayer : public ActivationLayer
  488. {
  489. public:
  490. static Ptr<TanHLayer> create(const LayerParams &params);
  491. };
  492. class CV_EXPORTS SwishLayer : public ActivationLayer
  493. {
  494. public:
  495. static Ptr<SwishLayer> create(const LayerParams &params);
  496. };
  497. class CV_EXPORTS MishLayer : public ActivationLayer
  498. {
  499. public:
  500. static Ptr<MishLayer> create(const LayerParams &params);
  501. };
  502. class CV_EXPORTS SigmoidLayer : public ActivationLayer
  503. {
  504. public:
  505. static Ptr<SigmoidLayer> create(const LayerParams &params);
  506. };
  507. class CV_EXPORTS BNLLLayer : public ActivationLayer
  508. {
  509. public:
  510. static Ptr<BNLLLayer> create(const LayerParams &params);
  511. };
  512. class CV_EXPORTS AbsLayer : public ActivationLayer
  513. {
  514. public:
  515. static Ptr<AbsLayer> create(const LayerParams &params);
  516. };
  517. class CV_EXPORTS PowerLayer : public ActivationLayer
  518. {
  519. public:
  520. float power, scale, shift;
  521. static Ptr<PowerLayer> create(const LayerParams &params);
  522. };
  523. class CV_EXPORTS ExpLayer : public ActivationLayer
  524. {
  525. public:
  526. float base, scale, shift;
  527. static Ptr<ExpLayer> create(const LayerParams &params);
  528. };
  529. class CV_EXPORTS CeilLayer : public ActivationLayer
  530. {
  531. public:
  532. static Ptr<CeilLayer> create(const LayerParams &params);
  533. };
  534. class CV_EXPORTS FloorLayer : public ActivationLayer
  535. {
  536. public:
  537. static Ptr<FloorLayer> create(const LayerParams &params);
  538. };
  539. class CV_EXPORTS LogLayer : public ActivationLayer
  540. {
  541. public:
  542. static Ptr<LogLayer> create(const LayerParams &params);
  543. };
  544. class CV_EXPORTS RoundLayer : public ActivationLayer
  545. {
  546. public:
  547. static Ptr<RoundLayer> create(const LayerParams &params);
  548. };
  549. class CV_EXPORTS SqrtLayer : public ActivationLayer
  550. {
  551. public:
  552. static Ptr<SqrtLayer> create(const LayerParams &params);
  553. };
  554. class CV_EXPORTS NotLayer : public ActivationLayer
  555. {
  556. public:
  557. static Ptr<NotLayer> create(const LayerParams &params);
  558. };
  559. class CV_EXPORTS AcosLayer : public ActivationLayer
  560. {
  561. public:
  562. static Ptr<AcosLayer> create(const LayerParams &params);
  563. };
  564. class CV_EXPORTS AcoshLayer : public ActivationLayer
  565. {
  566. public:
  567. static Ptr<AcoshLayer> create(const LayerParams &params);
  568. };
  569. class CV_EXPORTS AsinLayer : public ActivationLayer
  570. {
  571. public:
  572. static Ptr<AsinLayer> create(const LayerParams &params);
  573. };
  574. class CV_EXPORTS AsinhLayer : public ActivationLayer
  575. {
  576. public:
  577. static Ptr<AsinhLayer> create(const LayerParams &params);
  578. };
  579. class CV_EXPORTS AtanLayer : public ActivationLayer
  580. {
  581. public:
  582. static Ptr<AtanLayer> create(const LayerParams &params);
  583. };
  584. class CV_EXPORTS AtanhLayer : public ActivationLayer
  585. {
  586. public:
  587. static Ptr<AtanhLayer> create(const LayerParams &params);
  588. };
  589. class CV_EXPORTS CosLayer : public ActivationLayer
  590. {
  591. public:
  592. static Ptr<CosLayer> create(const LayerParams &params);
  593. };
  594. class CV_EXPORTS CoshLayer : public ActivationLayer
  595. {
  596. public:
  597. static Ptr<CoshLayer> create(const LayerParams &params);
  598. };
  599. class CV_EXPORTS ErfLayer : public ActivationLayer
  600. {
  601. public:
  602. static Ptr<ErfLayer> create(const LayerParams &params);
  603. };
  604. class CV_EXPORTS HardSwishLayer : public ActivationLayer
  605. {
  606. public:
  607. static Ptr<HardSwishLayer> create(const LayerParams &params);
  608. };
  609. class CV_EXPORTS SinLayer : public ActivationLayer
  610. {
  611. public:
  612. static Ptr<SinLayer> create(const LayerParams &params);
  613. };
  614. class CV_EXPORTS SinhLayer : public ActivationLayer
  615. {
  616. public:
  617. static Ptr<SinhLayer> create(const LayerParams &params);
  618. };
  619. class CV_EXPORTS SoftplusLayer : public ActivationLayer
  620. {
  621. public:
  622. static Ptr<SoftplusLayer> create(const LayerParams &params);
  623. };
  624. class CV_EXPORTS SoftsignLayer : public ActivationLayer
  625. {
  626. public:
  627. static Ptr<SoftsignLayer> create(const LayerParams &params);
  628. };
  629. class CV_EXPORTS TanLayer : public ActivationLayer
  630. {
  631. public:
  632. static Ptr<TanLayer> create(const LayerParams &params);
  633. };
  634. class CV_EXPORTS CeluLayer : public ActivationLayer
  635. {
  636. public:
  637. float alpha;
  638. static Ptr<CeluLayer> create(const LayerParams &params);
  639. };
  640. class CV_EXPORTS HardSigmoidLayer : public ActivationLayer
  641. {
  642. public:
  643. float alpha;
  644. float beta;
  645. static Ptr<HardSigmoidLayer> create(const LayerParams &params);
  646. };
  647. class CV_EXPORTS SeluLayer : public ActivationLayer
  648. {
  649. public:
  650. float alpha;
  651. float gamma;
  652. static Ptr<SeluLayer> create(const LayerParams &params);
  653. };
  654. class CV_EXPORTS ThresholdedReluLayer : public ActivationLayer
  655. {
  656. public:
  657. float alpha;
  658. static Ptr<ThresholdedReluLayer> create(const LayerParams &params);
  659. };
  660. class CV_EXPORTS ActivationLayerInt8 : public ActivationLayer
  661. {
  662. public:
  663. static Ptr<ActivationLayerInt8> create(const LayerParams &params);
  664. };
  665. /* Layers used in semantic segmentation */
  666. class CV_EXPORTS CropLayer : public Layer
  667. {
  668. public:
  669. static Ptr<Layer> create(const LayerParams &params);
  670. };
  671. /** @brief Element wise operation on inputs
  672. Extra optional parameters:
  673. - "operation" as string. Values are "sum" (default), "prod", "max", "div", "min"
  674. - "coeff" as float array. Specify weights of inputs for SUM operation
  675. - "output_channels_mode" as string. Values are "same" (default, all input must have the same layout), "input_0", "input_0_truncate", "max_input_channels"
  676. */
  677. class CV_EXPORTS EltwiseLayer : public Layer
  678. {
  679. public:
  680. static Ptr<EltwiseLayer> create(const LayerParams &params);
  681. };
  682. class CV_EXPORTS EltwiseLayerInt8 : public Layer
  683. {
  684. public:
  685. static Ptr<EltwiseLayerInt8> create(const LayerParams &params);
  686. };
  687. class CV_EXPORTS BatchNormLayer : public ActivationLayer
  688. {
  689. public:
  690. bool hasWeights, hasBias;
  691. float epsilon;
  692. static Ptr<BatchNormLayer> create(const LayerParams &params);
  693. };
  694. class CV_EXPORTS BatchNormLayerInt8 : public BatchNormLayer
  695. {
  696. public:
  697. float input_sc, output_sc;
  698. int input_zp, output_zp;
  699. static Ptr<BatchNormLayerInt8> create(const LayerParams &params);
  700. };
  701. class CV_EXPORTS MaxUnpoolLayer : public Layer
  702. {
  703. public:
  704. Size poolKernel;
  705. Size poolPad;
  706. Size poolStride;
  707. static Ptr<MaxUnpoolLayer> create(const LayerParams &params);
  708. };
  709. class CV_EXPORTS ScaleLayer : public Layer
  710. {
  711. public:
  712. bool hasBias;
  713. int axis;
  714. String mode;
  715. static Ptr<ScaleLayer> create(const LayerParams& params);
  716. };
  717. class CV_EXPORTS ScaleLayerInt8 : public ScaleLayer
  718. {
  719. public:
  720. float output_sc;
  721. int output_zp;
  722. static Ptr<ScaleLayerInt8> create(const LayerParams &params);
  723. };
  724. class CV_EXPORTS ShiftLayer : public Layer
  725. {
  726. public:
  727. static Ptr<Layer> create(const LayerParams& params);
  728. };
  729. class CV_EXPORTS ShiftLayerInt8 : public Layer
  730. {
  731. public:
  732. static Ptr<Layer> create(const LayerParams& params);
  733. };
  734. class CV_EXPORTS CompareLayer : public Layer
  735. {
  736. public:
  737. static Ptr<Layer> create(const LayerParams& params);
  738. };
  739. class CV_EXPORTS DataAugmentationLayer : public Layer
  740. {
  741. public:
  742. static Ptr<DataAugmentationLayer> create(const LayerParams& params);
  743. };
  744. class CV_EXPORTS CorrelationLayer : public Layer
  745. {
  746. public:
  747. static Ptr<CorrelationLayer> create(const LayerParams& params);
  748. };
  749. class CV_EXPORTS AccumLayer : public Layer
  750. {
  751. public:
  752. static Ptr<AccumLayer> create(const LayerParams& params);
  753. };
  754. class CV_EXPORTS FlowWarpLayer : public Layer
  755. {
  756. public:
  757. static Ptr<FlowWarpLayer> create(const LayerParams& params);
  758. };
  759. class CV_EXPORTS PriorBoxLayer : public Layer
  760. {
  761. public:
  762. static Ptr<PriorBoxLayer> create(const LayerParams& params);
  763. };
  764. class CV_EXPORTS ReorgLayer : public Layer
  765. {
  766. public:
  767. static Ptr<ReorgLayer> create(const LayerParams& params);
  768. };
  769. class CV_EXPORTS RegionLayer : public Layer
  770. {
  771. public:
  772. float nmsThreshold;
  773. static Ptr<RegionLayer> create(const LayerParams& params);
  774. };
  775. /**
  776. * @brief Detection output layer.
  777. *
  778. * The layer size is: @f$ (1 \times 1 \times N \times 7) @f$
  779. * where N is [keep_top_k] parameter multiplied by batch size. Each row is:
  780. * [image_id, label, confidence, xmin, ymin, xmax, ymax]
  781. * where image_id is the index of image input in the batch.
  782. */
  783. class CV_EXPORTS DetectionOutputLayer : public Layer
  784. {
  785. public:
  786. static Ptr<DetectionOutputLayer> create(const LayerParams& params);
  787. };
  788. /**
  789. * @brief \f$ L_p \f$ - normalization layer.
  790. * @param p Normalization factor. The most common `p = 1` for \f$ L_1 \f$ -
  791. * normalization or `p = 2` for \f$ L_2 \f$ - normalization or a custom one.
  792. * @param eps Parameter \f$ \epsilon \f$ to prevent a division by zero.
  793. * @param across_spatial If true, normalize an input across all non-batch dimensions.
  794. * Otherwise normalize an every channel separately.
  795. *
  796. * Across spatial:
  797. * @f[
  798. * norm = \sqrt[p]{\epsilon + \sum_{x, y, c} |src(x, y, c)|^p } \\
  799. * dst(x, y, c) = \frac{ src(x, y, c) }{norm}
  800. * @f]
  801. *
  802. * Channel wise normalization:
  803. * @f[
  804. * norm(c) = \sqrt[p]{\epsilon + \sum_{x, y} |src(x, y, c)|^p } \\
  805. * dst(x, y, c) = \frac{ src(x, y, c) }{norm(c)}
  806. * @f]
  807. *
  808. * Where `x, y` - spatial coordinates, `c` - channel.
  809. *
  810. * An every sample in the batch is normalized separately. Optionally,
  811. * output is scaled by the trained parameters.
  812. */
  813. class CV_EXPORTS NormalizeBBoxLayer : public Layer
  814. {
  815. public:
  816. float pnorm, epsilon;
  817. CV_DEPRECATED_EXTERNAL bool acrossSpatial;
  818. static Ptr<NormalizeBBoxLayer> create(const LayerParams& params);
  819. };
  820. /**
  821. * @brief Resize input 4-dimensional blob by nearest neighbor or bilinear strategy.
  822. *
  823. * Layer is used to support TensorFlow's resize_nearest_neighbor and resize_bilinear ops.
  824. */
  825. class CV_EXPORTS ResizeLayer : public Layer
  826. {
  827. public:
  828. static Ptr<ResizeLayer> create(const LayerParams& params);
  829. };
  830. /**
  831. * @brief Bilinear resize layer from https://github.com/cdmh/deeplab-public-ver2
  832. *
  833. * It differs from @ref ResizeLayer in output shape and resize scales computations.
  834. */
  835. class CV_EXPORTS InterpLayer : public Layer
  836. {
  837. public:
  838. static Ptr<Layer> create(const LayerParams& params);
  839. };
  840. class CV_EXPORTS ProposalLayer : public Layer
  841. {
  842. public:
  843. static Ptr<ProposalLayer> create(const LayerParams& params);
  844. };
  845. class CV_EXPORTS CropAndResizeLayer : public Layer
  846. {
  847. public:
  848. static Ptr<Layer> create(const LayerParams& params);
  849. };
  850. class CV_EXPORTS CumSumLayer : public Layer
  851. {
  852. public:
  853. int exclusive;
  854. int reverse;
  855. static Ptr<CumSumLayer> create(const LayerParams& params);
  856. };
  857. //! @}
  858. //! @}
  859. CV__DNN_INLINE_NS_END
  860. }
  861. }
  862. #endif