New lightmapper

-Added LocalVector (needed it) -Added stb_rect_pack (It's pretty cool, we could probably use it for other stuff too) -Fixes and changes all around the place -Added library for 128 bits fixed point (required for Delaunay3D)
author: Juan Linietsky <reduzio@gmail.com> 2020-05-01 09:34:23 -0300
committer: Juan Linietsky <reduzio@gmail.com> 2020-05-10 15:59:09 -0300
commit: 1bea8e1eacc68bcedbd3f207395bccf11011dae2 (patch)
tree: b75303a69491978c1e13360a3e6f355c5234dfe0 /thirdparty/oidn/mkl-dnn/include
parent: 6a0473bcc23c096ef9ee929632a209761c2668f6 (diff)
6 files changed, 5963 insertions, 0 deletions
diff --git a/thirdparty/oidn/mkl-dnn/include/mkldnn.h b/thirdparty/oidn/mkl-dnn/include/mkldnn.h
new file mode 100644
index 0000000000..9b64994922
--- /dev/null
+++ b/thirdparty/oidn/mkl-dnn/include/mkldnn.h
@@ -0,0 +1,1771 @@
+/*******************************************************************************
+* Copyright 2016-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef MKLDNN_H
+#define MKLDNN_H
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+/* All symbols shall be internal unless marked as MKLDNN_API */
+#if defined _WIN32 || defined __CYGWIN__
+#   define MKLDNN_HELPER_DLL_IMPORT __declspec(dllimport)
+#   define MKLDNN_HELPER_DLL_EXPORT __declspec(dllexport)
+#else
+#   if __GNUC__ >= 4
+#       define MKLDNN_HELPER_DLL_IMPORT __attribute__ ((visibility ("default")))
+#       define MKLDNN_HELPER_DLL_EXPORT __attribute__ ((visibility ("default")))
+#   else
+#       define MKLDNN_HELPER_DLL_IMPORT
+#       define MKLDNN_HELPER_DLL_EXPORT
+#   endif
+#endif
+
+#ifdef MKLDNN_DLL
+#   ifdef MKLDNN_DLL_EXPORTS
+#       define MKLDNN_API MKLDNN_HELPER_DLL_EXPORT
+#   else
+#       define MKLDNN_API MKLDNN_HELPER_DLL_IMPORT
+#   endif
+#else
+#   define MKLDNN_API
+#endif
+
+#if defined (__GNUC__)
+#   define MKLDNN_DEPRECATED __attribute__((deprecated))
+#elif defined(_MSC_VER)
+#   define MKLDNN_DEPRECATED __declspec(deprecated)
+#else
+#   define MKLDNN_DEPRECATED
+#endif
+
+#include "mkldnn_types.h"
+#include "mkldnn_version.h"
+#endif /* DOXYGEN_SHOULD_SKIP_THIS */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @addtogroup c_api C API
+ * @{ */
+
+/** @addtogroup c_api_primitive Primitive operations
+ * @{ */
+
+/** @addtogroup c_api_primitive_common Common primitive operations
+ * @{ */
+
+/** Creates a primitive descriptor @p iterator for given @p op_desc, @p attr,
+ * @p engine, and optionally a hint primitive descriptor from forward
+ * propagation (required for backward propagation). Pass @c NULL for forward
+ * propagation.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_iterator_create(
+        mkldnn_primitive_desc_iterator_t *iterator,
+        const_mkldnn_op_desc_t op_desc, const_mkldnn_primitive_attr_t attr,
+        mkldnn_engine_t engine,
+        const_mkldnn_primitive_desc_t hint_forward_primitive_desc);
+
+/** Iterates over primitive descriptors. Returns #mkldnn_iterator_ends if no
+ * more primitive descriptors are available. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_iterator_next(
+        mkldnn_primitive_desc_iterator_t iterator);
+
+/** Fetches the current primitive descriptor.
+ *
+ * @note
+ *     The user should delete the fetched primitive descriptor using
+ *     mkldnn_primitive_desc_destroy() once it is no longer needed. */
+mkldnn_primitive_desc_t MKLDNN_API mkldnn_primitive_desc_iterator_fetch(
+        const_mkldnn_primitive_desc_iterator_t iterator);
+
+/** Deletes a primitive descriptor @p iterator */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_iterator_destroy(
+        mkldnn_primitive_desc_iterator_t iterator);
+
+/** Creates a @p primitive_desc using @p op_desc, @p attr, @p engine, and
+ * optionally a hint primitive descriptor from forward propagation. The call is
+ * equivalent to creating a primitive descriptor iterator, immediately fetching
+ * a primitive descriptor, and then destroying the iterator. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_create(
+        mkldnn_primitive_desc_t *primitive_desc,
+        const_mkldnn_op_desc_t op_desc, const_mkldnn_primitive_attr_t attr,
+        mkldnn_engine_t engine,
+        const_mkldnn_primitive_desc_t hint_forward_primitive_desc);
+
+/** Makes a copy of a @p primitive_desc. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_clone(
+        mkldnn_primitive_desc_t *primitive_desc,
+        const_mkldnn_primitive_desc_t existing_primitive_desc);
+
+/** Returns a constant reference to the attribute of a @p primitive_desc.
+ *
+ * @warning
+ *      The user should not destroy the obtained @p attr.
+ *
+ * @warning
+ *      The lifetime of an @p attr is the same as that of a @p primitive_desc,
+ *      so it is illegal to use the @p attr once @p primitive_desc has been
+ *      destroyed. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_get_attr(
+        const_mkldnn_primitive_desc_t primitive_desc,
+        const_mkldnn_primitive_attr_t *attr);
+
+/** Deletes a @p primitive_desc. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_destroy(
+        mkldnn_primitive_desc_t primitive_desc);
+
+/** Queries primitive descriptor
+ *
+ * One of the most typical use cases is to query a convolution primitive
+ * descriptor created with source, weights, and destination formats equal
+ * to #mkldnn_format_tag_any about the corresponding memory descriptors
+ * (@p what equals #mkldnn_query_src_md, #mkldnn_query_weights_md, and
+ * #mkldnn_query_dst_md respectively) to be able to prepare memory and
+ * create reorders if required.
+ *
+ * Another quite typical use case is to query an operation primitive
+ * descriptor for a workspace (@p what equals #mkldnn_query_workspace_md).
+ * The returned status #mkldnn_not_required indicates that a workspace is
+ * not required.
+ *
+ * A few other possibilities:
+ *  - query an operation primitive descriptor for the underlying operation
+ *    descriptor (#mkldnn_query_convolution_d, #mkldnn_query_eltwise_d,
+ *    #mkldnn_query_rnn_d, etc.)
+ *  - query an operation primitive descriptor for the implementation
+ *    information string (#mkldnn_query_impl_info_str)
+ *  - query an operation primitive descriptor for the number of inputs and
+ *    outputs (#mkldnn_query_num_of_inputs_s32 and
+ *    #mkldnn_query_num_of_outputs_s32 respectively)
+ *
+ * @sa mkldnn_query_t for more options
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_query(
+        const_mkldnn_primitive_desc_t primitive_desc, mkldnn_query_t what,
+        int index, void *result);
+
+/** Queries primitive descriptor for memory descriptor
+ *
+ * @returns NULL in case of any error.
+ *
+ * This is just a specialized version of mkldnn_primitive_desc_query
+ * used for convenience.
+ */
+const mkldnn_memory_desc_t MKLDNN_API *mkldnn_primitive_desc_query_md(
+        const_mkldnn_primitive_desc_t primitive_desc, mkldnn_query_t what,
+        int index);
+
+/** Queries primitive descriptor for signed 32bit int
+ *
+ * @returns 0 in case of any error (in particular if the queried entity is
+ * not of type int32_t). Note that 0 might also be the actual returned
+ * value.
+ *
+ * This is just a specialized version of mkldnn_primitive_desc_query
+ * used for convenience.
+ */
+int MKLDNN_API mkldnn_primitive_desc_query_s32(
+        const_mkldnn_primitive_desc_t primitive_desc, mkldnn_query_t what,
+        int index);
+
+/** Creates a @p primitive using a @p primitive_desc descriptor. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_create(
+        mkldnn_primitive_t *primitive,
+        const_mkldnn_primitive_desc_t primitive_desc);
+
+/** Executes a @p primitive using a @p stream, and @p nargs arguments
+ * @p args. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_execute(
+        const_mkldnn_primitive_t primitive, mkldnn_stream_t stream,
+        int nargs, const mkldnn_exec_arg_t *args);
+
+/** Retrieves a reference to the @p primitive_desc descriptor of given @p
+ * primitive.
+ *
+ * @warning
+ *     The returned object must not be destroyed by the user. The @c const
+ *     qualifier of the returned object prevents such attempts. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_get_primitive_desc(
+        const_mkldnn_primitive_t primitive,
+        const_mkldnn_primitive_desc_t *primitive_desc);
+
+/** Deletes a @p primitive. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_destroy(
+        mkldnn_primitive_t primitive);
+
+/** @} */
+
+/** @addtogroup c_api_attributes Attributes
+ * An extension for controlling primitive behavior.
+ * @{ */
+
+/** Creates an empty (default) @p attr attribute. All the parameters are set to
+ * default values.
+ *
+ * An empty attribute is used in primitive descriptor creation whenever it
+ * is not passed explicitly, e.g. in mkldnn_primitive_desc_create.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_create(
+        mkldnn_primitive_attr_t *attr);
+
+/** Makes a copy of an @p existing_attr. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_clone(
+        mkldnn_primitive_attr_t *attr,
+        const_mkldnn_primitive_attr_t existing_attr);
+
+/** Deletes an @p attr. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_destroy(
+        mkldnn_primitive_attr_t attr);
+
+/** Returns the scratchpad @p mode set in the attribute @p attr */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_scratchpad_mode(
+        const_mkldnn_primitive_attr_t attr, mkldnn_scratchpad_mode_t *mode);
+
+/** Sets scratchpad @p mode.
+ *
+ * The possible values are: #mkldnn_scratchpad_mode_library (default) and
+ * #mkldnn_scratchpad_mode_user. */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_scratchpad_mode(
+        mkldnn_primitive_attr_t attr, mkldnn_scratchpad_mode_t mode);
+
+/** Returns @p count, correspondence scale @p mask, and a pointer to a constant
+ * floating point array of output @p scales for given @p attr, previously set
+ * by mkldnn_primitive_attr_set_output_scales.
+ *
+ * @warning
+ *      The @p scales array points to the internal @p attr field, so the user
+ *      should not modify or destroy @p scales.
+ *
+ * @warning
+ *      The lifetime of @p scales is the same as that of the @p attr to which it
+ *      belongs, so it is illegal to use @p scales after @p attr is destroyed.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_output_scales(
+        const_mkldnn_primitive_attr_t attr, mkldnn_dim_t *count, int *mask,
+        const float **scales);
+
+/** Sets output @p scales for primitive operations. The number of elements @p
+ * count and correspondence scale @p mask are stored for future use.
+ *
+ * The @p mask argument defines the correspondence between the output tensor
+ * dimensions and the @p scales array. Set the i-th bit of @p mask to 1 to use a
+ * dedicated scaling factor for each slice of the output tensor over the i-th
+ * dimension. Set @p mask to 0 to use a common scaling factor for the whole
+ * output tensor.
+ *
+ * @note
+ *      The dimension order is always native and does not depend on the actual
+ *      layout used. Examples:
+ *       - 2D dimensional data the order of dimensions is always: (n, c)
+ *       - 4D dimensional data the order is always: (n, c, h, w)
+ *       - 5D dimensional weights the order is always: (g, oc, ic, kh, kw)
+ *
+ * Example usage:
+ * @code
+ *      int mb = 32, oc = 32, oh = 14, ow = 14; // convolution output params
+ *      float scales[oc] = { ... }; // unique output scales per output channel
+ *      int oc_dim = 1; // mb_dim = 0, channel_dim = 1, height_dim = 2, ...
+ *
+ *      mkldnn_convolution_desc_t cd; // create & configure convolution op_desc
+ *
+ *      mkldnn_primitive_attr_t attr;
+ *      mkldnn_primitive_attr_create(&attr);  // create default attributes
+ *      mkldnn_primitive_attr_set_output_scales(attr, oc, 1 << oc_dim, scales);
+ *
+ *      mkldnn_primitive_desc_t cpd;
+ *      mkldnn_primitive_desc_create(&cpd, &cd, attr, NULL);
+ * @endcode
+ *
+ * @note
+ *      There is no way to check that @p count corresponds to @p mask until an
+ *      actual primitive descriptor is created, so it is the user's
+ *      responsibility to set proper values. The following formula must hold:
+ *
+ *      \f[count = \prod\limits_{d \in mask} output.dims[d]\f]
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_output_scales(
+        mkldnn_primitive_attr_t attr, mkldnn_dim_t count, int mask,
+        const float *scales);
+
+/** Returns @p post_ops for given @p attr.
+ *
+ * @warning
+ *      @p post_ops points to the internal @p attr field, so the user should not
+ *      modify or destroy @p post_ops. Also, the lifetime of @p post_ops is the
+ *      same as that of the @p attr it belongs to, so it is illegal to use @p
+ *      post_ops after @p attr has been destroyed.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_post_ops(
+        const_mkldnn_primitive_attr_t attr, const_mkldnn_post_ops_t *post_ops);
+
+/** Sets configured @p post_ops to an attribute @p attr for future use (when
+ * primitive descriptor is being created).
+ *
+ * @note
+ *      At this point in time, there is no way to check whether the primitive
+ *      descriptor does or does not support a given sequence of post operations.
+ *      Therefore the user should handle an error that might occur at the
+ *      mkldnn_primitive_desc_create call.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_post_ops(
+        mkldnn_primitive_attr_t attr, const_mkldnn_post_ops_t post_ops);
+
+/** @addtogroup c_api_attributes_post_ops Sequence of post operations
+ * An extension for performing extra operations after a base operation.
+ * @{ */
+
+/** Creates an empty sequence of post operations @p post_ops. */
+mkldnn_status_t MKLDNN_API mkldnn_post_ops_create(mkldnn_post_ops_t *post_ops);
+
+/** Deletes a @p post_ops sequence. */
+mkldnn_status_t MKLDNN_API mkldnn_post_ops_destroy(mkldnn_post_ops_t post_ops);
+
+/** Returns the @p length of post operations for given @p post_ops. */
+int MKLDNN_API mkldnn_post_ops_len(const_mkldnn_post_ops_t post_ops);
+
+/** Returns the type of post operation with index @p index in given
+ * @p post_ops. In case of error, returns #mkldnn_undefined_primitive. */
+mkldnn_primitive_kind_t MKLDNN_API mkldnn_post_ops_get_kind(
+        const_mkldnn_post_ops_t post_ops, int index);
+
+/** Appends accumulation (sum) post operation to the @p post_ops. Prior to
+ * accumulating the result, the previous value would be multiplied by @p scale.
+ *
+ * The kind of this post operation is #mkldnn_sum.
+ *
+ * This feature might improve performance for cases like residual learning
+ * blocks, where the result of convolution is accumulated to the previously
+ * computed activations. The parameter @p scale might be extreme for the
+ * integer-based computations when the result and previous activations have
+ * different logical scaling factors.
+ *
+ * In the simplest case when the accumulation is the only post operation, the
+ * computations would be:
+ * dst[] <- scale * dst[] + op(...) // instead of dst[] <- op(...)
+ *
+ * @note
+ *      This post operation (as well as all the others) disregards the original
+ *      layout of the destination; that is, the layout of the original
+ *      destination is expected to be the same as the layout of the stored
+ *      destination.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_post_ops_append_sum(
+        mkldnn_post_ops_t post_ops, float scale);
+
+/** Gets the parameters of the accumulation (sum) post operation with index
+ * @p index in the sequence of @p post_ops.
+ *
+ * @note
+ *      If index @p index would not correspond to the accumulation post
+ *      operation, the function returns #mkldnn_invalid_arguments.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_sum(
+        const_mkldnn_post_ops_t post_ops, int index, float *scale);
+
+/** Appends eltwise post operation to the @p post_ops with given parameters
+ * @p kind, @p alpha, and @p beta (@sa mkldnn_eltwise_forward_desc_init and
+ * mkldnn_eltwise_desc_t).
+ *
+ * The kind of this post operation is #mkldnn_eltwise.
+ *
+ * In the simplest case when the eltwise is the only post operation, the
+ * computations would be:
+ * dst[] <- scale * eltwise_op ( op(...) ) // instead of dst[] <- op(...)
+ * where eltwise_op is configured with the given parameters.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_post_ops_append_eltwise(
+        mkldnn_post_ops_t post_ops, float scale, mkldnn_alg_kind_t alg,
+        float alpha, float beta);
+
+/** Gets the eltwise parameters of the post operation with index @p index in
+ * the sequence of @p post_ops.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_eltwise(
+        const_mkldnn_post_ops_t post_ops, int index, float *scale,
+        mkldnn_alg_kind_t *alg, float *alpha, float *beta);
+
+/** @} */
+
+/** @} */
+
+/** @addtogroup c_api_memory Memory
+ * A primitive to describe and store data.
+ *
+ * The library supports various data types and formats. Memory hierarchy
+ * consists of three levels of abstraction:
+ * 1. **Memory descriptor** -- engine agnostic logical description of data
+ *      (number of dimensions, dimensions themselves, and data type), and
+ *      optionally the format/layout that describes the physical representation
+ *      of data in memory. If the format is not known yet, one can pass
+ *      #mkldnn_format_tag_any. This approach is used to allow compute-intensive
+ *      primitives to specify the most appropriate format on their own with
+ *      users required to reorder the data if the incoming format doesn't match
+ *      the primitive's selection. Memory descriptor can be initialized with
+ *      mkldnn_memory_desc_init_by_tag() or mkldnn_memory_desc_init_by_strides()
+ *      functions, or by directly filling the mkldnn_memory_desc_t structure.
+ *      The latter requires deep knowledge of how the physical data
+ *      representation is mapped to the structure.
+ *      The @ref understanding_memory_formats topic should shed some light on
+ *      that.
+ *      For the fully defined memory descriptors (i.e. where the format kind is
+ *      not equal to #mkldnn_format_kind_any) a user can the size, using the
+ *      mkldnn_memory_desc_get_size() function. As described in
+ *      @ref understanding_memory_formats, the size of data sometimes cannot
+ *      be computed as the product of dimensions times the size of the data
+ *      type. So users are encouraged to use this function for better code
+ *      portability.
+ *      Two memory descriptors can be compared with mkldnn_memory_desc_equal().
+ *      The comparison is especially useful when checking whether a primitive
+ *      requires reorder from the user's data format to the primitive's format.
+ * 2. **Memory** -- an engine-specific object that handles the data and its
+ *      description (a memory descriptor). For CPU enigne, the data handle is
+ *      simply a pointer to @c void. The data handle can be queried using
+ *      mkldnn_memory_get_data_handle() and set using
+ *      mkldnn_memory_set_data_handle(). The latter function always sets the
+ *      memory in the padding region to zero, which is the invariant maintained
+ *      by all the primitives in Intel MKL-DNN.
+ *      See @ref understanding_memory_formats for more details.
+ *      A memory can be created using mkldnn_memory_create() function.
+ *      A memory can also be queried for the underlying memory descriptor and
+ *      engine using mkldnn_memory_get_memory_desc() and
+ *      mkldnn_memory_get_engine() functions.
+ *
+ * Along with ordinary memory with all dimensions being positive, Intel
+ * MKL-DNN supports *zero-volume* memory with one or more dimensions set to
+ * zero. This is to support the NumPy\* convention.
+ * If a *zero-volume* memory is passed to a primitive, the primitive does
+ * not perform any computations on this memory. For example:
+ *  - Convolution with `(0 batch, 3 input channels, 13 height, 13 width)`
+ *    source and `(16 output channels, 3 inputs, channel, 3 height, 3 width)`
+ *    weights would produce `(0 batch, 16 output channels, 11 height, 11 width)`
+ *    destination (assuming strides are `1` and paddings are zero) and perform
+ *    zero multiply-add operations.
+ *  - Concatenation of three memories of shapes `(3, 4, 13, 13)`,
+ *    `(3, 0, 13, 13)`, and `(3, 1, 13, 13)` along the second axis would produce
+ *    the output of the shape `(3, 5, 13, 13)`, effectively ignoring the second
+ *    input (however, if the user created a concatenation primitive descriptor
+ *    with three inputs they should also provide all three memories to the
+ *    concatenation primitive, including the one with zero second dimension).
+ *  - However, Intel MKL-DNN would return an error when attempting to create a
+ *    convolution with *zero-volume* memory passed for weights because such a
+ *    convolution is not well-defined:
+ *    ~~~
+ *    dst(1, 16, 11, 11) <-- src(1, 0, 13, 13) (*) wei(16, 0, 3, 3)
+ *    ~~~
+ *    Should the values in the destination be zeroes or just not accessed at
+ *    all? Moreover, backward pass w.r.t. weights in such cases is also not
+ *    well-defined.
+ *
+ *  Data handle of *zero-volume* memory is never accessed and hence can be
+ *  unset (NULL in case of CPU engine).
+ *
+ * @sa @ref understanding_memory_formats
+ * @{ */
+
+/** Initializes a @p memory_desc memory descriptor using @p ndims, @p dims, @p
+ * data_type, and @p strides.
+ *
+ * The @p strides might be NULL, which means the order of physical dimensions
+ * is the same as the order of logical ones.
+ *
+ * @note The logical order of dimensions is defined by a primitive that
+ *       consumes the memory.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_memory_desc_init_by_strides(
+        mkldnn_memory_desc_t *memory_desc, int ndims, const mkldnn_dims_t dims,
+        mkldnn_data_type_t data_type, const mkldnn_dims_t strides);
+
+/** Initializes a @p memory_desc memory descriptor using @p ndims, @p dims, @p
+ * data_type, and format @p tag.
+ *
+ * @p tag can be #mkldnn_format_tag_any, which allows a primitive to define
+ * the appropriate memory format. In this case, the @p format_kind would be set
+ * to #mkldnn_format_kind_any */
+mkldnn_status_t MKLDNN_API mkldnn_memory_desc_init_by_tag(
+        mkldnn_memory_desc_t *memory_desc, int ndims, const mkldnn_dims_t dims,
+        mkldnn_data_type_t data_type, mkldnn_format_tag_t tag);
+
+/** Initializes a @p memory_desc for a given @p parent_memory_desc, with
+ * @p dims sizes and @p offsets. May fail if layout used does not allow
+ * obtain desired submemory. In this case consider using `extract` or `insert`
+ * primitive */
+mkldnn_status_t MKLDNN_API mkldnn_memory_desc_init_submemory(
+        mkldnn_memory_desc_t *memory_desc,
+        const mkldnn_memory_desc_t *parent_memory_desc,
+        const mkldnn_dims_t dims, const mkldnn_dims_t offsets);
+
+/** Compares two memory descriptors.
+ * @return 1 if the descriptors are the same.
+ * @return 0 if the descriptors are different.
+ *
+ * Use this function to identify whether a reorder is required between the
+ * two memories */
+int MKLDNN_API mkldnn_memory_desc_equal(
+        const mkldnn_memory_desc_t *lhs,
+        const mkldnn_memory_desc_t *rhs);
+
+/** Returns the size (in bytes) that is required for given @p memory_desc */
+size_t MKLDNN_API mkldnn_memory_desc_get_size(
+        const mkldnn_memory_desc_t *memory_desc);
+
+/** Creates a memory for given @p memory_desc and @p engine. Also sets handle
+ * to @p native_handle.
+ * The @p native_handle can:
+ * - point to the user allocated memory, i.e. valid handle. In this case the
+ *   library doesn't own allocated memory.
+ * - be MKLDNN_NATIVE_HANDLE_ALLOCATE to ask the library to allocate and
+ *   attach memory. In this case the library owns allocated memory.
+ * - be MKLDNN_NATIVE_HANDLE_NONE to create mkldnn_memory w/o attached memory.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_memory_create(mkldnn_memory_t *memory,
+        const mkldnn_memory_desc_t *memory_desc, mkldnn_engine_t engine,
+        void *native_handle);
+
+/** Returns a @p memory_desc associated with @p memory. */
+mkldnn_status_t MKLDNN_API mkldnn_memory_get_memory_desc(
+        const_mkldnn_memory_t memory,
+        const mkldnn_memory_desc_t **memory_desc);
+
+/** Returns an @p engine associated with @p memory. */
+mkldnn_status_t MKLDNN_API mkldnn_memory_get_engine(
+        const_mkldnn_memory_t memory, mkldnn_engine_t *engine);
+
+/** For a @p memory, returns the data @p handle.
+ *
+ * For the CPU engine, the data handle is a pointer to the actual data. */
+mkldnn_status_t MKLDNN_API mkldnn_memory_get_data_handle(
+        const_mkldnn_memory_t memory, void **handle);
+
+/** For a @p memory, sets the data @p handle. */
+mkldnn_status_t MKLDNN_API mkldnn_memory_set_data_handle(
+        mkldnn_memory_t memory, void *handle);
+
+/** Deletes a @p memory. */
+mkldnn_status_t MKLDNN_API mkldnn_memory_destroy(mkldnn_memory_t memory);
+
+/** @} */
+
+/** @addtogroup c_api_reorder Reorder
+ * A primitive to copy data between memory formats.
+ * @{ */
+
+/** Initializes a @p reorder_primitive_desc using the description of the source
+ * (@p src_engine and @p src_md) and destination (@p dst_engine and @p dst_md)
+ * memory, and an @p attr attribute.
+ *
+ * Inputs:
+ *  - input (#mkldnn_query_src_md, 0)
+ *
+ * Outputs:
+ *  - output (#mkldnn_query_dst_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_reorder_primitive_desc_create(
+        mkldnn_primitive_desc_t *reorder_primitive_desc,
+        mkldnn_engine_t src_engine, const mkldnn_memory_desc_t *src_md,
+        mkldnn_engine_t dst_engine, const mkldnn_memory_desc_t *dst_md,
+        const_mkldnn_primitive_attr_t attr);
+
+/** @} */
+
+/** @addtogroup c_api_concat Concat
+ * A primitive to concatenate data by arbitrary dimension.
+ * @{ */
+
+/** Creates out-of-place @p concat_primitive_desc for concatenation of @p n
+ * inputs by @p concat_dimension with resulting @p output_desc memory
+ * descriptor. @p output_desc can be NULL or specified with the
+ * #mkldnn_format_kind_any format kind -- in this case, the appropriate memory
+ * format would be chosen automatically.
+ *
+ * Inputs:
+ *  - input 0 (#mkldnn_query_src_md, 0)
+ *  - input 1 (#mkldnn_query_src_md, 1)
+ *  - ...
+ *  - input @p n - 1 (#mkldnn_query_src_md, @p n - 1)
+ *
+ * Outputs:
+ *  - output (#mkldnn_query_dst_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_concat_primitive_desc_create(
+        mkldnn_primitive_desc_t *concat_primitive_desc,
+        const mkldnn_memory_desc_t *dst_md,
+        int n, int concat_dimension,
+        const mkldnn_memory_desc_t *src_mds,
+        const_mkldnn_primitive_attr_t attr,
+        mkldnn_engine_t engine);
+
+/** @} */
+
+/** @addtogroup c_api_sum Sum
+ * A primitive to sum data.
+ * @{ */
+
+/** Creates out-of-place @p sum_primitive_desc for sum of @p n
+ * inputs multiplied by scale with resulting @p output_desc memory
+ * descriptor. @p output_desc can be NULL or specified with the
+ * #mkldnn_format_kind_any format kind -- in this case, the appropriate memory
+ * format would be chosen automatically.
+ *
+ * Inputs:
+ *  - src 0 (#mkldnn_query_src_md, 0)
+ *  - src 1 (#mkldnn_query_src_md, 1)
+ *  - ...
+ *  - src @p n - 1 (#mkldnn_query_src_md, @p n - 1)
+ *
+ * Outputs:
+ *  - output (#mkldnn_query_dst_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_sum_primitive_desc_create(
+        mkldnn_primitive_desc_t *sum_primitive_desc,
+        const mkldnn_memory_desc_t *dst_mds,
+        int n, const float *scales,
+        const mkldnn_memory_desc_t *src_mds,
+        const_mkldnn_primitive_attr_t attr,
+        mkldnn_engine_t engine);
+
+/** @} */
+
+/** @addtogroup c_api_convolution Convolution
+ * A primitive to compute convolution using different algorithms.
+ *
+ * \f[dst[n][oc][oh][ow]  =
+ *     \sum_{kw=0}^{KW}\sum_{kh=0}^{KH}\sum_{ic=0}^{IC}
+ *     src[n][ic][oh \cdot s_h - p_l[0] + kh][ow \cdot s_w - p_r[1] + kw]
+ *     \cdot weights[g][oc][ic][kh][kw]
+ *     + bias[g][oc],\f]
+ *
+ * where size of output spatial domain is given by
+ * \f$ OH = \left\lfloor{\frac{IH - KH + p_l[0] + p_r[0]}{s_h}}
+ *          \right\rfloor + 1\f$,
+ * \f$ OW = \left\lfloor{\frac{IW - KW + p_l[1] + p_r[1]}{s_w}}
+ *          \right\rfloor + 1\f$,
+ *
+ * and summation is carried over input channels \f$ic\f$ in
+ * group \f$g\f$, and \f$s_h, s_w\f$ are @p strides and
+ * \f$p_l, p_r\f$ are @p padding_l and @p padding_r.
+ * @{ */
+
+/** Initializes a convolution descriptor @p conv_desc for forward propagation
+ * using @p prop_kind (possible values are #mkldnn_forward_training and
+ * #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides, @p
+ * padding_l, @p padding_r, and @p padding_kind. In order to create a
+ * convolution without bias, @p bias_desc should either be @c NULL or point to
+ * a descriptor with memory format kind equal to #mkldnn_format_kind_undef.
+ *
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - weights (#mkldnn_query_weights_md, 0)
+ *  - bias (#mkldnn_query_weights_md, 1), if created with bias
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_convolution_forward_desc_init(
+        mkldnn_convolution_desc_t *conv_desc, mkldnn_prop_kind_t prop_kind,
+        mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *bias_desc,
+        const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
+        mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a dilated convolution descriptor @p conv_desc for forward
+ * propagation using @p prop_kind (possible values are #mkldnn_forward_training
+ * and #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
+ * @p dilates, @p padding_l, @p padding_r, and @p padding_kind.
+ * In order to create a dilated convolution without bias, @p bias_desc
+ * should either be @c NULL or point to a descriptor with memory format kind
+ * equals #mkldnn_format_kind_undef.
+ *
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - weights (#mkldnn_query_weights_md, 0)
+ *  - bias (#mkldnn_query_weights_md, 1), if created with bias
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_dilated_convolution_forward_desc_init(
+        mkldnn_convolution_desc_t *conv_desc, mkldnn_prop_kind_t prop_kind,
+        mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *bias_desc,
+        const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
+        const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a convolution descriptor @p conv_desc for backward propagation
+ * with respect to data using @p alg_kind, memory descriptors, @p strides, @p
+ * padding_l, @p padding_r, and @p padding_kind.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *  - weights (#mkldnn_query_weights_md, 0)
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_convolution_backward_data_desc_init(
+        mkldnn_convolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *diff_src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
+        mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a dilated convolution descriptor @p conv_desc for backward
+ * propagation with respect to data using @p alg_kind, memory descriptors, @p
+ * strides, @p dilates @p padding_l, @p padding_r, and @p padding_kind.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *  - weights (#mkldnn_query_weights_md, 0)
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_dilated_convolution_backward_data_desc_init(
+        mkldnn_convolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *diff_src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
+        const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a convolution descriptor @p conv_desc for backward propagation
+ * with respect to weights using @p alg_kind, memory descriptors, @p strides,
+ * @p padding_l, @p padding_r, and @p padding_kind.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *
+ * Outputs:
+ *  - diff_weights (#mkldnn_query_diff_weights_md, 0)
+ *  - diff_bias (#mkldnn_query_diff_weights_md, 1), if created with bias
+ */
+mkldnn_status_t MKLDNN_API mkldnn_convolution_backward_weights_desc_init(
+        mkldnn_convolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *diff_weights_desc,
+        const mkldnn_memory_desc_t *diff_bias_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
+        mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a convolution descriptor @p conv_desc for backward propagation
+ * with respect to weights using @p alg_kind, memory descriptors, @p strides,
+ * @p dilates @p padding_l, @p padding_r, and @p padding_kind.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *
+ * Outputs:
+ *  - diff_weights (#mkldnn_query_diff_weights_md, 0)
+ *  - diff_bias (#mkldnn_query_diff_weights_md, 1), if created with bias
+ */
+mkldnn_status_t MKLDNN_API
+mkldnn_dilated_convolution_backward_weights_desc_init(
+        mkldnn_convolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *diff_weights_desc,
+        const mkldnn_memory_desc_t *diff_bias_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
+        const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
+
+/** @} */
+
+/** @addtogroup c_api_deconvolution Deconvolution
+ * A primitive to compute deconvolution using different algorithms.
+ *
+ * @{ */
+
+
+/** Initializes a deconvolution descriptor @p deconv_desc for forward
+ * propagation using @p prop_kind (possible values are #mkldnn_forward_training
+ * and #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
+ * @p padding_l, @p padding_r, and @p padding_kind. In order to create a
+ * deconvolution without bias, @p bias_desc should either be @c NULL or point to
+ * a descriptor with memory format kind equals #mkldnn_format_kind_undef.
+ *
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - weights (#mkldnn_query_weights_md, 0)
+ *  - bias (#mkldnn_query_weights_md, 1), if created with bias
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_deconvolution_forward_desc_init(
+        mkldnn_deconvolution_desc_t *conv_desc, mkldnn_prop_kind_t prop_kind,
+        mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *bias_desc,
+        const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
+        mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a dilated deconvolution descriptor @p deconv_desc for forward
+ * propagation using @p prop_kind (possible values are #mkldnn_forward_training
+ * and #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
+ * @p dilates, @p padding_l, @p padding_r, and @p padding_kind. In order to
+ * create a dilated deconvolution without bias, @p bias_desc should either be
+ * @c NULL or point to a descriptor with memory format kind equal
+ * #mkldnn_format_kind_undef.
+ *
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - weights (#mkldnn_query_weights_md, 0)
+ *  - bias (#mkldnn_query_weights_md, 1), if created with bias
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_dilated_deconvolution_forward_desc_init(
+        mkldnn_deconvolution_desc_t *conv_desc, mkldnn_prop_kind_t prop_kind,
+        mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *bias_desc,
+        const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
+        const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a deconvolution descriptor @p conv_desc for backward propagation
+ * with respect to data using @p alg_kind, memory descriptors, @p strides, @p
+ * padding_l, @p padding_r, and @p padding_kind.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *  - weights (#mkldnn_query_weights_md, 0)
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_deconvolution_backward_data_desc_init(
+        mkldnn_deconvolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *diff_src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
+        mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a dilated deconvolution descriptor @p conv_desc for backward
+ * propagation with respect to data using @p alg_kind, memory descriptors, @p
+ * strides, @p dilates, @p padding_l, @p padding_r, and @p padding_kind.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *  - weights (#mkldnn_query_weights_md, 0)
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_dilated_deconvolution_backward_data_desc_init(
+        mkldnn_deconvolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *diff_src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
+        const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a deconvolution descriptor @p conv_desc for backward propagation
+ * with respect to weights using @p alg_kind, memory descriptors, @p strides,
+ * @p padding_l, @p padding_r, and @p padding_kind.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *
+ * Outputs:
+ *  - diff_weights (#mkldnn_query_diff_weights_md, 0)
+ *  - diff_bias (#mkldnn_query_diff_weights_md, 1), if created with bias
+ */
+mkldnn_status_t MKLDNN_API mkldnn_deconvolution_backward_weights_desc_init(
+        mkldnn_deconvolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *diff_weights_desc,
+        const mkldnn_memory_desc_t *diff_bias_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t padding_l, const mkldnn_dims_t padding_r,
+        mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a dilated deconvolution descriptor @p conv_desc for backward
+ * propagation with respect to weights using @p alg_kind, memory descriptors,
+ * @p strides, @p dilates, @p padding_l, @p padding_r, and @p padding_kind.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *
+ * Outputs:
+ *  - diff_weights (#mkldnn_query_diff_weights_md, 0)
+ *  - diff_bias (#mkldnn_query_diff_weights_md, 1), if created with bias
+ */
+mkldnn_status_t MKLDNN_API mkldnn_dilated_deconvolution_backward_weights_desc_init(
+        mkldnn_deconvolution_desc_t *conv_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *diff_weights_desc,
+        const mkldnn_memory_desc_t *diff_bias_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
+        const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
+
+/** @} */
+
+/** @addtogroup c_api_shuffle Shuffle
+ * A primitive to shuffle data along the axis.
+ * @{ */
+
+/** Initializes a @p shuffle_desc for forward propagation using @p prop_kind,
+ * memory descriptor @p data_desc, @p axis, and @p group_size.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ *
+ */
+mkldnn_status_t MKLDNN_API mkldnn_shuffle_forward_desc_init(
+        mkldnn_shuffle_desc_t *shuffle_desc, mkldnn_prop_kind_t prop_kind,
+        const mkldnn_memory_desc_t *data_desc, int axis,
+        mkldnn_dim_t group_size);
+
+/** Initializes a @p shuffle_desc for backward propagation using memory
+ * descriptor @p diff_data_desc, @p axis, and @p group_size.
+ *
+ *
+ * Inputs:
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ *
+ */
+mkldnn_status_t MKLDNN_API mkldnn_shuffle_backward_desc_init(
+        mkldnn_shuffle_desc_t *shuffle_desc,
+        const mkldnn_memory_desc_t *diff_data_desc, int axis,
+        mkldnn_dim_t group_size);
+
+/** @} */
+
+/** @addtogroup c_api_eltwise Eltwise
+ * A primitive to compute element-wise operations like parametric rectifier
+ * linear unit (ReLU).
+ *
+ * Both forward and backward passes support in-place operation; that is, src
+ * and dst point to the same memory for forward pass, and diff_dst and diff_src
+ * point to the same memory for backward pass.
+ *
+ * @warning Because the original src is required for backward pass, in-place
+ * forward pass in general cannot be applied during training. However, for some
+ * kinds of element-wise operations (namely ReLU with alpha parameter equals 0),
+ * dst and src can be interchangeable for the backward pass, which enables
+ * performing in-place forward even for training.
+ *
+ * @{ */
+
+/** Initializes an @p eltwise_desc for forward propagation using @p prop_kind
+ * (possible values are #mkldnn_forward_training and #mkldnn_forward_inference),
+ * @p alg_kind algorithm, memory descriptor @p data_desc, @p alpha, and
+ * @p beta parameters.
+ *
+ * @sa mkldnn_eltwise_desc_t for details.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_eltwise_forward_desc_init(
+        mkldnn_eltwise_desc_t *eltwise_desc, mkldnn_prop_kind_t prop_kind,
+        mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *data_desc,
+        float alpha, float beta);
+
+/** Initializes an @p eltwise_desc for backward propagation using @p alg_kind
+ * algorithm memory descriptors @p diff_data_desc and @p data_desc, and the
+ * @p alpha and @p beta parameters.
+ *
+ * @sa mkldnn_eltwise_desc_t for details.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_eltwise_backward_desc_init(
+        mkldnn_eltwise_desc_t *eltwise_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *diff_data_desc,
+        const mkldnn_memory_desc_t *data_desc, float alpha, float beta);
+
+/** @} */
+
+/** @addtogroup c_api_softmax Softmax
+ * A primitive to perform softmax.
+ *
+ * \f[dst[u][c][in] =
+ *    \frac{\exp(src[ou][c][in]) - \max\limits_{c}(src[ou][c][in])}
+ *    {\sum\limits_{c}\{\exp(src[ou][c][in])
+ *    - \max\limits_{c}(src[ou][c][in])\}},\f]
+ *
+ * where \f$ou, iu\f$ are outer and inner sizes repectively, defined
+ * by @p data_desc.dims and @p softmax_axis.
+ * @{ */
+
+/** Initializes a @p softmax_desc for forward propagation using @p prop_kind
+ * (possible values are #mkldnn_forward_training and #mkldnn_forward_inference)
+ * and memory descriptor @p data_desc.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_softmax_forward_desc_init(
+        mkldnn_softmax_desc_t *softmax_desc, mkldnn_prop_kind_t prop_kind,
+        const mkldnn_memory_desc_t *data_desc, int softmax_axis);
+
+/** Initializes a @p softmax_desc for backward propagation using memory
+ * descriptors @p diff_desc and @p data_desc.
+ *
+ * Inputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_softmax_backward_desc_init(
+        mkldnn_softmax_desc_t *softmax_desc,
+        const mkldnn_memory_desc_t *diff_desc,
+        const mkldnn_memory_desc_t *data_desc, int softmax_axis);
+
+/** @} */
+
+/** @addtogroup c_api_pooling Pooling
+ * A primitive to perform max or average pooling.
+ *
+ * Max pooling:
+ * \f[dst[n][oc][oh][ow] =
+ *     \max\limits_{kw,kh}
+ *     (src[n][ic][oh \cdot s_h - p_l[0] + kh][ow \cdot s_w - p_r[1] + kw]),\f]
+ *
+ * Average pooling:
+ * \f[dst[n][oc][oh][ow] =
+ *     \frac{1}{KW \cdot KH}\sum\limits_{kw,kh}
+ *     src[n][ic][oh \cdot s_h - p_l[0] + kh][ow \cdot s_w - p_r[1] + kw],\f]
+ *
+ * where \f$p_l, p_r\f$ are @p padding_l and @p padding_r respectively, and
+ * output spatial dimensions are calculated similarly to how they are done in
+ * convolution.
+ *
+ * During training, max pooling requires a workspace on forward
+ * (#mkldnn_forward_training) and backward (#mkldnn_backward) passes to
+ * save indices where maximum was found. The workspace layout is opaque, and
+ * the indices cannot be restored from it. However, one can use backward
+ * pooling to perform up-sampling (used in some detection topologies).
+ *
+ * @{ */
+
+/** Initializes a pooling descriptor @p pool_desc for forward propagation using
+ * @p prop_kind (possible values are #mkldnn_forward_training and
+ * #mkldnn_forward_inference), @p alg_kind, memory descriptors, and pooling
+ * parameters in the spatial domain: @p strides, @p kernel sizes, @p padding_l,
+ * @p padding_r, and @p padding_kind.
+ *
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ *  - workspace (#mkldnn_query_workspace_md, 0),
+ *      if @p alg_kind = #mkldnn_pooling_max and
+ *      @p prop_kind = #mkldnn_forward_training
+ */
+mkldnn_status_t MKLDNN_API mkldnn_pooling_forward_desc_init(
+        mkldnn_pooling_desc_t *pool_desc, mkldnn_prop_kind_t prop_kind,
+        mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t kernel, const mkldnn_dims_t padding_l,
+        const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
+
+/** Initializes a pooling descriptor @p pool_desc for backward propagation
+ * using @p alg_kind, memory descriptors, and pooling parameters in the spatial
+ * domain: @p strides, @p kernel sizes, @p padding_l, @p padding_r, and @p
+ * padding_kind.
+ *
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
+ *
+ * Inputs:
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *  - workspace (#mkldnn_query_workspace_md, 0),
+ *      if @p alg_kind = #mkldnn_pooling_max
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_pooling_backward_desc_init(
+        mkldnn_pooling_desc_t *pool_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *diff_src_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t kernel, const mkldnn_dims_t padding_l,
+        const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
+
+/** @} */
+
+/** @addtogroup c_api_lrn LRN
+ * A primitive to perform local response normalization (LRN) across or within
+ * channels.
+ *
+ * LRN accross channels:
+ * \f[dst[n][c][h][w] = \left\{k + \frac{\alpha}{n_{l}}
+ *                      \sum\limits_{i=-(n_{l}-1)/2}^{(n_{l}+1)/2}
+ *                      (src[n][c+i][h][w])^2\right\}^{-\beta}
+ *                      src[n][c][h][w],\f]
+ *
+ * LRN within channels:
+ * \f[dst[n][c][h][w] = \left\{k + \frac{\alpha}{n_{l}}
+ *                      \sum\limits_{i=-(n_{l}-1)/2}^{(n_{l}+1)/2}
+ *                      (src[n][c][h+i][w+i])^2\right\}^{-\beta}
+ *                      src[n][c][h][w],\f]
+ *
+ * where \f$n_{l}\f$ is the @p local_size.
+ *
+ * During training, LRN might or might not require a workspace on forward
+ * (#mkldnn_forward_training) and backward (#mkldnn_backward) passes. The
+ * behavior is implementation specific. Optimized implementations typically
+ * require a workspace and use it to save some intermediate results from the
+ * forward pass that accelerate computations on the backward pass.
+ *
+ * To check whether a workspace is required, query the LRN primitive descriptor
+ * for the workspace (#mkldnn_query_workspace_md). Success indicates that the
+ * workspace is required and its description will be returned.
+ * @sa mkldnn_primitive_desc_query and mkldnn_primitive_desc_query_pd
+ *
+ * @{ */
+
+/** Initializes an @p lrn_desc for forward propagation using @p prop_kind
+ * (possible values are #mkldnn_forward_training and #mkldnn_forward_inference),
+ * @p alg_kind, memory descriptor @p data_desc, and regularization
+ * parameters @p local_size, @p alpha, @p beta, and @p k.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ *  - workspace (#mkldnn_query_workspace_md, 0),
+ *      if the underlying implementation requires
+ */
+mkldnn_status_t MKLDNN_API mkldnn_lrn_forward_desc_init(
+        mkldnn_lrn_desc_t *lrn_desc, mkldnn_prop_kind_t prop_kind,
+        mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *data_desc,
+        mkldnn_dim_t local_size, float alpha, float beta, float k);
+
+/** Initializes an @p lrn_desc for backward propagation using @p alg_kind,
+ * memory descriptors @p data_desc and @p diff_data_desc, and regularization
+ * parameters @p local_size, @p alpha, @p beta, and @p k.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *  - workspace (#mkldnn_query_workspace_md, 0),
+ *      if the underlying implementation requires
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_lrn_backward_desc_init(
+        mkldnn_lrn_desc_t *lrn_desc, mkldnn_alg_kind_t alg_kind,
+        const mkldnn_memory_desc_t *diff_data_desc,
+        const mkldnn_memory_desc_t *data_desc, mkldnn_dim_t local_size,
+        float alpha, float beta, float k);
+
+/** @} */
+
+/** @addtogroup c_api_batch_normalization Batch Normalization
+ * A primitive to perform batch normalization.
+ *
+ * \f[dst[n][c][h][w] = \gamma[c] \frac{src[n][c][h][w] - \mu[c]}
+ *                      {\sqrt{\sigma[c] + eps}} + \beta[c],\f]
+ *
+ * where \f$\gamma[c], \beta[c]\f$ are weights and bias for a channel and,
+ *
+ * \f$\mu[c] = \frac{1}{NHW} \sum\limits_{whn} src[n][c][h][w]\f$,
+ * \f$\sigma[c] = \frac{1}{NHW} \sum\limits_{whn}
+ *                              (src[n][c][h][w] - \mu[c])^2\f$,
+ *
+ * and @c eps is a constant to improve numerical stability.
+ *
+ * Both forward and backward passes support in-place operation; that is, src
+ * and dst point to the same memory for forward pass, and diff_dst and diff_src
+ * point to the same memory for backward pass.
+ *
+ * Batch normalization supports different flavors controlled by
+ * mkldnn_batch_normalization_desc_t. For example, batch normalization can
+ * compute the mean and variance on its own or take them as inputs. It can
+ * either perform scaling and shifting using gamma and beta parameters or not.
+ * Optionally it can also perform a fused ReLU, which in case of training would
+ * also require a workspace.
+ *
+ * @sa mkldnn_batch_normalization_desc_t
+ * @{ */
+
+/** Initializes a batch normalization descriptor @p bnrm_desc for forward
+ * propagation using @p prop_kind (possible values are
+ * #mkldnn_forward_training and #mkldnn_forward_inference), memory descriptor
+ * @p data_desc, normalization parameter @p epsilon, and @p flags set using bit
+ * flags of type mkldnn_batch_normalization_desc_t.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - mean (#mkldnn_query_src_md, 1),
+ *      if #mkldnn_use_global_stats bit-flags is set in @p flags
+ *  - variance (#mkldnn_query_src_md, 2),
+ *      if #mkldnn_use_global_stats bit-flags is set in @p flags
+ *  - scale_and_shift (#mkldnn_query_weights_md, 0),
+ *      if #mkldnn_use_scaleshift bit-flags is set in @p flags
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ *  - mean (#mkldnn_query_dst_md, 1),
+ *      if #mkldnn_use_global_stats bit-flags is not set in @p flags
+ *      @p prop_kind = #mkldnn_forward_training
+ *  - variance (#mkldnn_query_dst_md, 2),
+ *      if #mkldnn_use_global_stats bit-flags is not set in @p flags
+ *      and @p prop_kind = #mkldnn_forward_training
+ *  - workspace (#mkldnn_query_workspace_md, 0),
+ *      if #mkldnn_fuse_bn_relu bit-flags is set in @p flags
+ *      and @p prop_kind = #mkldnn_forward_training
+ *
+ * @note In-place operation is supported; that is, dst points to the same memory
+ *       as src.
+ *
+ * @sa mkldnn_batch_normalization_desc_t
+ */
+mkldnn_status_t MKLDNN_API mkldnn_batch_normalization_forward_desc_init(
+        mkldnn_batch_normalization_desc_t *bnrm_desc,
+        mkldnn_prop_kind_t prop_kind, const mkldnn_memory_desc_t *data_desc,
+        float epsilon, unsigned flags);
+
+/** Initializes a batch normalization descriptor @p bnrm_desc for backward
+ * propagation with respect to data and scale-shift parameters using memory
+ * descriptors @p data_desc and @p diff_data_desc, normalization parameter
+ * @p epsilon, and @p flags set using bit flags of type
+ * mkldnn_batch_normalization_desc_t.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - mean (#mkldnn_query_src_md, 1)
+ *  - variance (#mkldnn_query_src_md, 2)
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *  - scale_and_shift (#mkldnn_query_weights_md, 0),
+ *      if #mkldnn_use_scaleshift bit-flags is set in @p flags
+ *  - workspace (#mkldnn_query_workspace_md, 0),
+ *      if #mkldnn_fuse_bn_relu bit-flags is set in @p flags
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ *  - diff_scale_and_shift (#mkldnn_query_diff_weights_md, 0),
+ *      if #mkldnn_use_scaleshift bit-flags is set in @p flags
+ *      and @p prop_kind = #mkldnn_backward
+ *
+ * @note in-place operation is supported,
+ *       i.e. diff_src points to the same memory as diff_dst.
+ *
+ * @sa mkldnn_batch_normalization_desc_t
+ */
+mkldnn_status_t MKLDNN_API mkldnn_batch_normalization_backward_desc_init(
+        mkldnn_batch_normalization_desc_t *bnrm_desc,
+        mkldnn_prop_kind_t prop_kind,
+        const mkldnn_memory_desc_t *diff_data_desc,
+        const mkldnn_memory_desc_t *data_desc,
+        float epsilon, unsigned flags);
+
+/** @} */
+
+/** @addtogroup c_api_inner_product Inner product
+ * A primitive to compute an inner product.
+ *
+ * Inner product layer is also known as fully connected layer.
+ * With spatial dimension:
+ *
+ * \f[dst[n][oc] = \sum\limits_{ic, kh, kw}
+ *                 src[n][ic][kh][kw] \cdot weights[oc][ic][kh][kw]
+ *                 + bias[oc]\f]
+ * @{ */
+
+/** Initializes an inner product descriptor @p ip_desc for forward propagation
+ * using @p prop_kind (possible values are #mkldnn_forward_training and
+ * #mkldnn_forward_inference) and memory descriptors. In order to create an
+ * inner product without bias, @p bias_desc should be either @c NULL or a
+ * pointer to a descriptor with memory format kind equals
+ * #mkldnn_format_kind_undef.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - weights (#mkldnn_query_weights_md, 0)
+ *  - bias (#mkldnn_query_weights_md, 1), if created with bias
+ *
+ * Outputs:
+ *  - dst (#mkldnn_query_dst_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_inner_product_forward_desc_init(
+        mkldnn_inner_product_desc_t *ip_desc, mkldnn_prop_kind_t prop_kind,
+        const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *bias_desc,
+        const mkldnn_memory_desc_t *dst_desc);
+
+/** Initializes an inner product descriptor @p ip_desc for backward propagation
+ * with respect to data using memory descriptors.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *  - weights (#mkldnn_query_weights_md, 0)
+ *
+ * Outputs:
+ *  - diff_src (#mkldnn_query_diff_src_md, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_inner_product_backward_data_desc_init(
+        mkldnn_inner_product_desc_t *ip_desc,
+        const mkldnn_memory_desc_t *diff_src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc);
+
+/** Initializes an inner product descriptor @p ip_desc for backward propagation
+ * with respect to weights using memory descriptors.
+ *
+ * @note Memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src (#mkldnn_query_src_md, 0)
+ *  - diff_dst (#mkldnn_query_diff_dst_md, 0)
+ *
+ * Outputs:
+ *  - diff_weights (#mkldnn_query_diff_weights_md, 0)
+ *  - diff_bias (#mkldnn_query_diff_weights_md, 1), if created with bias
+ */
+mkldnn_status_t MKLDNN_API mkldnn_inner_product_backward_weights_desc_init(
+        mkldnn_inner_product_desc_t *ip_desc,
+        const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *diff_weights_desc,
+        const mkldnn_memory_desc_t *diff_bias_desc,
+        const mkldnn_memory_desc_t *diff_dst_desc);
+
+/** @} */
+
+/** @addtogroup c_api_rnn RNN
+ * A primitive to compute the common recurrent layer.
+ * @todo add additional description for the group
+ * @{ */
+
+/**
+ * Initializes a recurrent cell descriptor @p rnn_cell_desc
+ * using @p rnn_cell_desc, @p kind (possible values are
+ *  #mkldnn_vanilla_rnn, #mkldnn_vanilla_lstm, #mkldnn_vanilla_gru, and
+ *  #mkldnn_gru_linear_before_reset),
+ *  @p f (possible values are #mkldnn_eltwise_relu and
+ *   #mkldnn_eltwise_tanh), @p flags, @p alpha, and @p clipping.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_rnn_cell_desc_init(
+        mkldnn_rnn_cell_desc_t *rnn_cell_desc,
+        mkldnn_alg_kind_t kind, mkldnn_alg_kind_t f,
+        unsigned int flags, float alpha, float clipping);
+
+/** Returns the number of gates of a particular @p rnn_cell_desc. */
+int MKLDNN_API mkldnn_rnn_cell_get_gates_count(
+        const mkldnn_rnn_cell_desc_t *rnn_cell_desc);
+
+/** Returns the number of states of a particular @p rnn_cell_desc. */
+int MKLDNN_API mkldnn_rnn_cell_get_states_count(
+        const mkldnn_rnn_cell_desc_t *rnn_cell_desc);
+
+/** Sets quantization @p scale and @p shift for RNN data tensors.
+ *  For performance reasons, low precision configuration of RNN primitive
+ *  expects input activations to have unsigned int8 data type. Scale and shift
+ *  used to quantize floating point data to unsigned integer must be passed to
+ *  RNN primitive using attributes.
+ *  Example usage:
+ * @code
+ *      // rnn parameters
+ *      int l = 2, t = 2, mb = 32, sic = 32, slc = 32, dic = 32, dlc = 32;
+ *      // activations quantization parameters
+ *      float scale = ..., shift = ..;
+ *
+ *      mkldnn_primitive_attr_t rnn_attr;
+ *      // create default attributes
+ *      mkldnn_primitive_attr_create(&rnn_attr);
+ *
+ *      // set scale and shift for int8 quantization of activation
+ *      mkldnn_primitive_attr_set_rnn_data_qparams(rnn_attr, scale, shift);
+ *
+ *      // create & configure rnn op_desc
+ *      mkldnn_rnn_desc_t rnn_d;
+ *      mkldnn_primitive_desc_t rnn_pd;
+ *      mkldnn_primitive_desc_create(&rnn_pd, &rnn_d, attr, engine, NULL);
+ * @endcode
+ * @note
+ *      Quantization scale and shift are common for src_layer, src_iter,
+ *      dst_iter and dst_layer.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_rnn_data_qparams(
+        mkldnn_primitive_attr_t attr, const float scale, const float shift);
+
+/** Sets quantization scales @p weights_scales for RNN weights tensors.
+ * Low precision configuration of RNN primitive expects input weights to have
+ * signed int8 data type. Scales used to quantize floating point data
+ * to signed integer must be passed to RNN primitive using attributes.
+ * The @p mask argument defines correspondence between output tensor dimensions
+ * and the @p weights_scales array. Set i-th bit of @p mask to 1 to use
+ * dedicated scaling factor for each slice of the output tensor over i-th
+ * dimension. Set @p mask to 0 to use common scaling factor for the whole output
+ * tensor. Example usage:
+ * @code
+ *      // rnn parameters
+ *      int l = 2, t = 2, mb = 32, sic = 32, slc = 32, dic = 32, dlc = 32;
+ *      // unique output scales per output channel
+ *      float weights_scales[dic * n_gates] = { ... };
+ *      // mask that specifies last two dimensions of ldigo format
+ *      int mask = 0x3;
+ *
+ *      mkldnn_primitive_attr_t attr;
+ *      // create default attributes
+ *      mkldnn_primitive_attr_create(&attr);
+ *
+ *      // set output channel-wise weights scales
+ *      mkldnn_primitive_attr_set_rnn_weights_qparams(attr, dic * n_gates, mask,
+ *              weights_scales);
+ *
+ *      // create & configure rnn op_desc
+ *      mkldnn_rnn_desc_t rnn_d;
+ *      mkldnn_primitive_desc_t rnn_pd;
+ *      mkldnn_primitive_desc_create(&rnn_pd, &rnn_d, attr, engine, NULL);
+ * @endcode
+ * @note
+ *      The dimension order is always native and does not depend on the actual
+ *      layout used. For example, 5 dimensional weights always have
+ *      (l, d, i, g, o) logical dimension ordering.
+ * @note
+ *      Quantization sales are common for weights_layer and weights_iteration
+ * @note
+ *      There is no way to check that @p count corresponds to @p mask until an
+ *      actual primitive descriptor is created, so it is user's responsibility
+ *      to set proper values. The following formula must be held:
+ *
+ *      \f[count = \prod\limits_{d \in mask} output.dims[d]\f]
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_rnn_weights_qparams (
+        mkldnn_primitive_attr_t attr, mkldnn_dim_t count, int mask,
+                const float *weights_scales);
+
+/** Initializes a rnn descriptor @p rnn_desc for forward propagation
+ * using @p prop_kind, @p rnn_cell_desc, @p direction, and memory descriptors.
+ * @note If @p prop_kind equals #mkldnn_forward_training, you must query a
+ * workspace memory descriptor before creating the primitive.
+ *
+ * @p src_iter_desc, @p bias_desc, and @p dst_iter_desc are allowed to either be
+ * @c NULL or point to a zero memory descriptor, which would indicate that the
+ * RNN primitive should not use them.
+ *
+ * @note All memory descriptors except @p src_iter_desc are allowed to be
+ *       initialized with #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * Inputs:
+ *  - src_layer (#mkldnn_query_src_md, 0)
+ *  - src_iter (#mkldnn_query_src_md, 1), if used
+ *  - weights_layer (#mkldnn_query_weights_md, 0)
+ *  - weights_iter (#mkldnn_query_weights_md, 1)
+ *  - bias (#mkldnn_query_weights_md, 2), if used
+ *
+ * Outputs:
+ *  - dst_layer (#mkldnn_query_dst_md, 0)
+ *  - dst_iter (#mkldnn_query_dst_md, 1), if used
+ *  - workspace (#mkldnn_query_workspace_md, 0),
+ *      if @p prop_kind equals #mkldnn_forward_training
+ */
+mkldnn_status_t MKLDNN_API mkldnn_rnn_forward_desc_init(
+        mkldnn_rnn_desc_t *rnn_desc, mkldnn_prop_kind_t prop_kind,
+        const mkldnn_rnn_cell_desc_t *rnn_cell_desc,
+        const mkldnn_rnn_direction_t direction,
+        const mkldnn_memory_desc_t *src_layer_desc,
+        const mkldnn_memory_desc_t *src_iter_desc,
+        const mkldnn_memory_desc_t *weights_layer_desc,
+        const mkldnn_memory_desc_t *weights_iter_desc,
+        const mkldnn_memory_desc_t *bias_desc,
+        const mkldnn_memory_desc_t *dst_layer_desc,
+        const mkldnn_memory_desc_t *dst_iter_desc);
+
+/** Initializes a rnn descriptor @p rnn_desc for backward propagation
+ * using @p prop_kind, @p rnn_cell_desc, @p direction, and memory descriptors.
+ *
+ * @note All memory descriptors are allowed to be initialized with
+ *       #mkldnn_format_kind_any value of @p format_kind.
+ *
+ * @p src_iter_desc (simultaneously with @p diff_src_iter_desc),
+ * @p bias_desc (simultaneously with @p diff_bias_desc), and
+ * @p dst_iter_desc (simultaneously with @p diff_src_iter_desc) are allowed to
+ * either be @c NULL or point to a zero memory descriptor, which would indicate
+ * that the RNN primitive should not use them.
+ *
+ * Inputs:
+ *  - src_layer (#mkldnn_query_src_md, 0)
+ *  - src_iter (#mkldnn_query_src_md, 1), if used
+ *  - weights_layer (#mkldnn_query_weights_md, 0)
+ *  - weights_iter (#mkldnn_query_weights_md, 1)
+ *  - bias (#mkldnn_query_weights_md, 2), if used
+ *  - dst_layer (#mkldnn_query_dst_md, 0)
+ *  - dst_iter (#mkldnn_query_dst_md, 1), if used
+ *  - diff_dst_layer (#mkldnn_query_diff_dst_md, 0)
+ *  - diff_dst_iter (#mkldnn_query_diff_dst_md, 1), if used
+ *  - workspace (#mkldnn_query_workspace_md, 0)
+ *
+ * Outputs:
+ *  - diff_src_layer (#mkldnn_query_diff_src_md, 0)
+ *  - diff_src_iter (#mkldnn_query_diff_src_md, 1), if used
+ *  - diff_weights_layer (#mkldnn_query_diff_weights_md, 0)
+ *  - diff_weights_iter (#mkldnn_query_diff_weights_md, 1)
+ *  - diff_bias (#mkldnn_query_diff_weights_md, 2), if used
+ */
+mkldnn_status_t MKLDNN_API mkldnn_rnn_backward_desc_init(
+        mkldnn_rnn_desc_t *rnn_desc, mkldnn_prop_kind_t prop_kind,
+        const mkldnn_rnn_cell_desc_t *rnn_cell_desc,
+        const mkldnn_rnn_direction_t direction,
+        const mkldnn_memory_desc_t *src_layer_desc,
+        const mkldnn_memory_desc_t *src_iter_desc,
+        const mkldnn_memory_desc_t *weights_layer_desc,
+        const mkldnn_memory_desc_t *weights_iter_desc,
+        const mkldnn_memory_desc_t *bias_desc,
+        const mkldnn_memory_desc_t *dst_layer_desc,
+        const mkldnn_memory_desc_t *dst_iter_desc,
+        const mkldnn_memory_desc_t *diff_src_layer_desc,
+        const mkldnn_memory_desc_t *diff_src_iter_desc,
+        const mkldnn_memory_desc_t *diff_weights_layer_desc,
+        const mkldnn_memory_desc_t *diff_weights_iter_desc,
+        const mkldnn_memory_desc_t *diff_bias_desc,
+        const mkldnn_memory_desc_t *diff_dst_layer,
+        const mkldnn_memory_desc_t *diff_dst_iter_desc);
+
+/** @} */
+
+/** @} */
+
+/** @addtogroup c_api_engine Engine operations
+ * @{ */
+
+/** Returns the number of engines of a particular @p kind. */
+size_t MKLDNN_API mkldnn_engine_get_count(mkldnn_engine_kind_t kind);
+
+/** Creates an @p engine of particular @p kind and @p index. */
+mkldnn_status_t MKLDNN_API mkldnn_engine_create(mkldnn_engine_t *engine,
+        mkldnn_engine_kind_t kind, size_t index);
+
+/** Returns the kind of an @p engine. */
+mkldnn_status_t MKLDNN_API mkldnn_engine_get_kind(mkldnn_engine_t engine,
+        mkldnn_engine_kind_t *kind);
+
+/** Destroys an @p engine. */
+mkldnn_status_t MKLDNN_API mkldnn_engine_destroy(mkldnn_engine_t engine);
+
+/** @} */
+
+/** @addtogroup c_api_stream Execution stream operations
+ * @{ */
+
+/** Creates an execution @p stream for @p engine and with @p flags. */
+mkldnn_status_t MKLDNN_API mkldnn_stream_create(mkldnn_stream_t *stream,
+        mkldnn_engine_t engine, unsigned flags);
+
+/** Destroys an execution @p stream. */
+mkldnn_status_t MKLDNN_API mkldnn_stream_destroy(mkldnn_stream_t stream);
+
+/** @} */
+
+/** @addtogroup c_api_service Service functions
+ * @{ */
+
+/** Sets verbosity level (print information to stdout).
+ * Possible levels are:
+ *  - 0 -- no verbose output (default)
+ *  - 1 -- primitive information at execution
+ *  - 2 -- primitive information at creation and execution
+ *
+ * @note
+ *     Dumping information might affect performance.
+ *     This setting overrides the MKLDNN_VERBOSE environment variable. */
+mkldnn_status_t MKLDNN_API mkldnn_set_verbose(int level);
+
+/** Enables or disables dumping of JIT-generated code.
+ * The enable parameter can be:
+ *  - 0 -- disable
+ *  - any other value -- enable
+ *
+ * @note
+ *     This setting overrides the MKLDNN_JIT_DUMP environment variable. */
+mkldnn_status_t MKLDNN_API mkldnn_set_jit_dump(int enable);
+
+/** Gets library version information.
+ * Version information includes:
+ *  - major -- major version number
+ *  - minor -- minor version number
+ *  - patch -- patch release number
+ *  - hash -- git commit hash */
+const mkldnn_version_t MKLDNN_API *mkldnn_version();
+
+/** @} */
+
+/** @addtogroup c_api_blas BLAS functions
+ * A subset of Basic Linear ALgebra (BLAS) functions to perform
+ * matrix-matrix multiplication.
+ * @{ */
+
+/** SGEMM performs a matrix-matrix multiplication operation defined as
+ *
+ * C := alpha*op( A )*op( B ) + beta*C
+ *
+ * where
+ *  - op( X ) is one of op( X ) = X or op( X ) = X**T,
+ *  - alpha and beta are scalars,
+ *  - A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix
+ *    and C an m by n matrix.
+ *
+ * The matrices are assumed to be stored in column-major order (the elements
+ * in a matrix columns are contiguous in memory).
+ *
+ * @note
+ *      The API is different from the standard BLAS routine
+ *      because it returns mkldnn_status_t for error handling.
+ *      XERBLA is not supported: no error message will be printed
+ *      in case of incorrect parameters. */
+mkldnn_status_t MKLDNN_API mkldnn_sgemm(
+        const char *transa, const char *transb,
+        const mkldnn_dim_t *M, const mkldnn_dim_t *N, const mkldnn_dim_t *K,
+        const float *alpha, const float *A, const mkldnn_dim_t *lda,
+        const float *B, const mkldnn_dim_t *ldb,
+        const float *beta, float *C, const mkldnn_dim_t *ldc);
+
+/** gemm_s8u8s32 and gemm_s8s8s32 perform a matrix-matrix multiplication
+ * operation and add the result to a scalar-matrix product. For the final
+ * result, a vector is added to each row or column of the output matrix.
+ * The operation is defined as:
+ *
+ * C := alpha*(op(A) + A_offset) * (op(B) + B_offset) + beta*C + C_offset
+ *
+ * where
+ *  - op( X ) = X or op( X ) = X**T,
+ *  - A_offset is an m-by-k matrix with every element equal to the value oa,
+ *  - B_offset is an k-by-n matrix with every element equal to the value ob,
+ *  - C_offset is an m-by-n matrix defined by the oc array, size len:
+ *    - if offsetc = F: len must be at least 1
+ *    - if offsetc = C: len must be at least max(1, m)
+ *    - if offsetc = R: len must be at least max(1, n)
+ *  - alpha and beta are scalars, and A, B and C are matrices, with op( A )
+ *    an m-by-k matrix, op( B ) a k-by-n matrix and C an m-by-n matrix.
+ *
+ * The matrices are assumed to be stored in column-major order (the elements
+ * in a matrix columns are contiguous in memory).
+ *
+ * @note
+ *      The API is different compared with the standard BLAS routine
+ *      because it returns mkldnn_status_t for error handling.
+ *      XERBLA is not supported: no error message will be printed
+ *      in case of incorrect parameters. */
+mkldnn_status_t MKLDNN_API mkldnn_gemm_s8u8s32(
+        const char *transa, const char *transb, const char *offsetc,
+        const mkldnn_dim_t *M, const mkldnn_dim_t *N, const mkldnn_dim_t *K,
+        const float *alpha,
+        const int8_t *A, const mkldnn_dim_t *lda, const int8_t *ao,
+        const uint8_t *B, const mkldnn_dim_t *ldb, const int8_t *bo,
+        const float *beta,
+        int32_t *c, const mkldnn_dim_t *ldc, const int32_t *co);
+
+mkldnn_status_t MKLDNN_API mkldnn_gemm_s8s8s32(
+        const char *transa, const char *transb, const char *offsetc,
+        const mkldnn_dim_t *M, const mkldnn_dim_t *N, const mkldnn_dim_t *K,
+        const float *alpha,
+        const int8_t *A, const mkldnn_dim_t *lda, const int8_t *ao,
+        const int8_t *B, const mkldnn_dim_t *ldb, const int8_t *bo,
+        const float *beta,
+        int32_t *c, const mkldnn_dim_t *ldc, const int32_t *co);
+/** @} */
+
+/** @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/thirdparty/oidn/mkl-dnn/include/mkldnn.hpp b/thirdparty/oidn/mkl-dnn/include/mkldnn.hpp
new file mode 100644
index 0000000000..581400a013
--- /dev/null
+++ b/thirdparty/oidn/mkl-dnn/include/mkldnn.hpp
@@ -0,0 +1,2615 @@
+/*******************************************************************************
+* Copyright 2016-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef MKLDNN_HPP
+#define MKLDNN_HPP
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+#include <stdlib.h>
+#include <memory>
+#include <vector>
+#include <unordered_map>
+#include <algorithm>
+#include <iterator>
+
+#include "mkldnn.h"
+#endif
+
+namespace mkldnn {
+
+/// @addtogroup cpp_api C++ API
+/// @{
+
+/// @addtogroup cpp_api_utils Utils
+/// @{
+
+/// A class that provides the destructor for an Intel(R) MKL-DNN C handle
+template <typename T> class handle_traits {};
+
+/// A class for wrapping an Intel(R) MKL-DNN handle. It is used as the base
+/// class for primitive (#mkldnn_primitive_t), engine (#mkldnn_engine_t), and
+/// stream (#mkldnn_stream_t) handles. An object of the #mkldnn::handle class
+/// can be passed by value. This class enables wrapping:
+///  - Newly constructed handles.
+///    @n In this case, the constructed handle uses reference counting provided
+///    by @p std::shared_ptr with a proper deleter function specified through
+///    the @p handle_traits class.
+///  - Pre-existing handles returned by the Intel(R) MKL-DNN C API (for
+///    example, through mkldnn_primitive_get_primitive_desc()).
+///    @n In this case, an Intel(R) MKL-DNN C API handle is wrapped without a
+///    deleter because it is assumed that the handle wrapper for the original
+///    object deletes the handle (this model is similar to @p std::weak_ptr).
+template <typename T, typename traits=handle_traits<T>> class handle {
+private:
+    std::shared_ptr<typename std::remove_pointer<T>::type> _data;
+    handle(const handle &&) = delete;
+    handle &operator=(const handle &&other) = delete;
+protected:
+    bool operator==(const T other) const { return other == _data.get(); }
+    bool operator!=(const T other) const { return !(*this == other); }
+public:
+    /// Constructs a C handle wrapper.
+    /// @param t The C handle to wrap.
+    /// @param weak A flag to specify whether to construct a weak wrapper.
+    handle(T t = 0, bool weak = false): _data(0) {
+        reset(t, weak);
+    }
+
+    handle(const handle &other): _data(other._data) {}
+    handle &operator=(const handle &other) {
+        _data = other._data;
+        return *this;
+    }
+    /// Resets the value of a C handle.
+    /// @param t The new value of the C handle.
+    /// @param weak A flag to specify whether the wrapper should be weak.
+    void reset(T t, bool weak = false) {
+        auto dummy_destructor = [](T) { return decltype(traits::destructor(0))(0); };
+        _data.reset(t, weak ? dummy_destructor : traits::destructor);
+    }
+
+    /// Returns the value of the underlying C handle.
+    T get() const { return _data.get(); }
+
+    bool operator==(const handle &other) const { return other._data.get() == _data.get(); }
+    bool operator!=(const handle &other) const { return !(*this == other); }
+};
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+template <> struct handle_traits<mkldnn_memory_t> {
+    static constexpr auto destructor = &mkldnn_memory_destroy;
+};
+
+template <> struct handle_traits<mkldnn_primitive_desc_t> {
+    static constexpr auto destructor = &mkldnn_primitive_desc_destroy;
+};
+
+template <> struct handle_traits<mkldnn_primitive_t> {
+    static constexpr auto destructor = &mkldnn_primitive_destroy;
+};
+
+template <> struct handle_traits<mkldnn_primitive_desc_iterator_t> {
+    static constexpr auto destructor = &mkldnn_primitive_desc_iterator_destroy;
+};
+#endif
+
+struct memory;
+struct primitive_desc;
+
+/// Base class for all computational primitives.
+class primitive: public handle<mkldnn_primitive_t> {
+    friend struct error;
+    friend struct stream;
+    using handle::handle;
+public:
+    /// A proxy to C primitive kind enum
+    enum class kind {
+        undefined_primitive = mkldnn_undefined_primitive,
+        reorder = mkldnn_reorder,
+        concat = mkldnn_concat,
+        sum = mkldnn_sum,
+        convolution = mkldnn_convolution,
+        deconvolution = mkldnn_deconvolution,
+        shuffle = mkldnn_shuffle,
+        eltwise = mkldnn_eltwise,
+        softmax = mkldnn_softmax,
+        pooling = mkldnn_pooling,
+        lrn = mkldnn_lrn,
+        batch_normalization = mkldnn_batch_normalization,
+        inner_product = mkldnn_inner_product,
+        rnn = mkldnn_rnn,
+    };
+
+    primitive(const_mkldnn_primitive_desc_t c_pd);
+    primitive(const primitive_desc &pd);
+
+    /// Returns the descriptor of the underlying C API primitive.
+    inline const_mkldnn_primitive_desc_t get_primitive_desc() const;
+    // TODO: use the C++ API wrapper structure.
+
+    void execute(struct stream &astream,
+            const std::unordered_map<int, memory> &args) const;
+};
+
+inline mkldnn_primitive_kind_t convert_to_c(primitive::kind akind) {
+    return static_cast<mkldnn_primitive_kind_t>(akind);
+}
+/// Intel(R) MKL-DNN exception class.
+///
+/// This class captures the status returned by the failed C API function, error
+/// message, and, optionally, handle of the primitive that caused the error.
+struct error: public std::exception {
+    mkldnn_status_t status;
+    const char *message;
+
+    /// Constructs an error instance.
+    ///
+    /// @param astatus The error status returned by the C API.
+    /// @param amessage The error message.
+    error(mkldnn_status_t astatus, const char *amessage)
+        : status(astatus), message(amessage) {}
+
+    /// A convenience function for wrapping calls to the C API. Checks the
+    /// return status and throws an #error in case of failure.
+    ///
+    /// @param status The error status returned by the C API.
+    /// @param message The error message.
+    static void wrap_c_api(mkldnn_status_t status, const char *message) {
+        if (status != mkldnn_success)
+            throw error(status, message);
+    }
+};
+
+const_mkldnn_primitive_desc_t primitive::get_primitive_desc() const {
+    const_mkldnn_primitive_desc_t pd;
+    error::wrap_c_api(mkldnn_primitive_get_primitive_desc(get(), &pd),
+            "could not get primitive descriptor by primitive");
+    return pd;
+}
+/// @}
+
+/// @addtogroup cpp_api_enums Common data types and enumerations
+/// A proxy to @ref c_api_types in @ref c_api.
+///
+/// @{
+
+enum scratchpad_mode {
+    scratchpad_mode_library = mkldnn_scratchpad_mode_library,
+    scratchpad_mode_user = mkldnn_scratchpad_mode_user,
+};
+
+inline mkldnn_scratchpad_mode_t convert_to_c(scratchpad_mode mode) {
+    return static_cast<mkldnn_scratchpad_mode_t>(mode);
+}
+
+enum padding_kind {
+    zero = mkldnn_padding_zero
+};
+
+inline mkldnn_padding_kind_t convert_to_c(padding_kind kind) {
+    return static_cast<mkldnn_padding_kind_t>(kind);
+}
+
+enum prop_kind {
+    forward_training = mkldnn_forward_training,
+    forward_scoring = mkldnn_forward_scoring,
+    forward_inference = mkldnn_forward_inference,
+    forward = mkldnn_forward,
+    backward = mkldnn_backward,
+    backward_data = mkldnn_backward_data,
+    backward_weights = mkldnn_backward_weights,
+    backward_bias = mkldnn_backward_bias
+};
+
+inline mkldnn_prop_kind_t convert_to_c(prop_kind kind) {
+    return static_cast<mkldnn_prop_kind_t>(kind);
+}
+
+enum algorithm {
+    algorithm_undef = mkldnn_alg_kind_undef,
+    convolution_auto = mkldnn_convolution_auto,
+    convolution_direct = mkldnn_convolution_direct,
+    convolution_winograd = mkldnn_convolution_winograd,
+    deconvolution_direct = mkldnn_deconvolution_direct,
+    deconvolution_winograd = mkldnn_deconvolution_winograd,
+    eltwise_relu = mkldnn_eltwise_relu,
+    eltwise_tanh = mkldnn_eltwise_tanh,
+    eltwise_elu = mkldnn_eltwise_elu,
+    eltwise_square = mkldnn_eltwise_square,
+    eltwise_abs = mkldnn_eltwise_abs,
+    eltwise_sqrt = mkldnn_eltwise_sqrt,
+    eltwise_linear = mkldnn_eltwise_linear,
+    eltwise_bounded_relu = mkldnn_eltwise_bounded_relu,
+    eltwise_soft_relu = mkldnn_eltwise_soft_relu,
+    eltwise_logistic = mkldnn_eltwise_logistic,
+    lrn_across_channels = mkldnn_lrn_across_channels,
+    lrn_within_channel  = mkldnn_lrn_within_channel,
+    pooling_max = mkldnn_pooling_max,
+    pooling_avg = mkldnn_pooling_avg,
+    pooling_avg_include_padding = mkldnn_pooling_avg_include_padding,
+    pooling_avg_exclude_padding = mkldnn_pooling_avg_exclude_padding,
+    vanilla_rnn = mkldnn_vanilla_rnn,
+    vanilla_lstm = mkldnn_vanilla_lstm,
+    vanilla_gru = mkldnn_vanilla_gru,
+    gru_linear_before_reset = mkldnn_gru_linear_before_reset
+};
+
+inline mkldnn_alg_kind_t convert_to_c(algorithm aalgorithm) {
+    return static_cast<mkldnn_alg_kind_t>(aalgorithm);
+}
+
+enum batch_normalization_flag {
+    use_global_stats = mkldnn_use_global_stats,
+    use_scale_shift = mkldnn_use_scaleshift,
+    fuse_bn_relu = mkldnn_fuse_bn_relu
+};
+
+inline mkldnn_batch_normalization_flag_t convert_to_c(
+        batch_normalization_flag aflag) {
+    return static_cast<mkldnn_batch_normalization_flag_t>(aflag);
+}
+
+enum rnn_direction {
+    unidirectional_left2right = mkldnn_unidirectional_left2right,
+    unidirectional_right2left = mkldnn_unidirectional_right2left,
+    unidirectional = mkldnn_unidirectional,
+    bidirectional_concat = mkldnn_bidirectional_concat,
+    bidirectional_sum = mkldnn_bidirectional_sum,
+};
+
+inline mkldnn_rnn_direction_t convert_to_c(rnn_direction adir) {
+    return static_cast<mkldnn_rnn_direction_t>(adir);
+}
+
+enum query {
+    undef = mkldnn_query_undef,
+
+    query_engine = mkldnn_query_engine,
+    primitive_kind = mkldnn_query_primitive_kind,
+
+    num_of_inputs_s32 = mkldnn_query_num_of_inputs_s32,
+    num_of_outputs_s32 = mkldnn_query_num_of_outputs_s32,
+
+    time_estimate_f64 = mkldnn_query_time_estimate_f64,
+    memory_consumption_s64 = mkldnn_query_memory_consumption_s64,
+
+    query_scratchpad_engine = mkldnn_query_scratchpad_engine,
+
+    impl_info_str = mkldnn_query_impl_info_str,
+
+    op_d = mkldnn_query_op_d,
+    convolution_d = mkldnn_query_convolution_d,
+    deconvolution_d = mkldnn_query_deconvolution_d,
+    shuffle_d = mkldnn_query_shuffle_d,
+    eltwise_d = mkldnn_query_eltwise_d,
+    softmax_d = mkldnn_query_softmax_d,
+    pooling_d = mkldnn_query_pooling_d,
+    lrn_d = mkldnn_query_lrn_d,
+    batch_normalization_d = mkldnn_query_batch_normalization_d,
+    inner_product_d = mkldnn_query_inner_product_d,
+    rnn_d = mkldnn_query_rnn_d,
+
+    src_md = mkldnn_query_src_md,
+    diff_src_md = mkldnn_query_diff_src_md,
+    weights_md = mkldnn_query_weights_md,
+    diff_weights_md = mkldnn_query_diff_weights_md,
+    dst_md = mkldnn_query_dst_md,
+    diff_dst_md = mkldnn_query_diff_dst_md,
+    workspace_md = mkldnn_query_workspace_md,
+    scratchpad_md = mkldnn_query_scratchpad_md,
+};
+
+inline mkldnn_query_t convert_to_c(query aquery) {
+    return static_cast<mkldnn_query_t>(aquery);
+}
+
+/// @}
+
+/// @addtogroup cpp_api_attr Attributes
+/// An extension for controlling primitive behavior.
+///
+/// @sa @ref c_api_attributes in @ref c_api
+/// @{
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+template <> struct handle_traits<mkldnn_post_ops_t> {
+    static constexpr auto destructor = &mkldnn_post_ops_destroy;
+};
+#endif
+
+struct post_ops: public handle<mkldnn_post_ops_t> {
+    post_ops() {
+        mkldnn_post_ops_t result;
+        error::wrap_c_api(mkldnn_post_ops_create(&result),
+                "could not create post operation sequence");
+        reset(result);
+    }
+
+    int len() const { return mkldnn_post_ops_len(get()); }
+
+    primitive::kind kind(int index) const {
+        error::wrap_c_api(
+                index < len() ? mkldnn_success : mkldnn_invalid_arguments,
+                "post_ops index is out of range");
+        return static_cast<primitive::kind>(mkldnn_post_ops_get_kind(get(),
+                    index));
+    }
+
+    void append_sum(float scale = 1.) {
+        error::wrap_c_api(mkldnn_post_ops_append_sum(get(), scale),
+                "could not append sum");
+    }
+
+    void get_params_sum(int index, float &scale) const {
+        error::wrap_c_api(mkldnn_post_ops_get_params_sum(get(), index, &scale),
+                "could not get sum params");
+    }
+
+    void append_eltwise(float scale, algorithm alg, float alpha,
+            float beta) {
+        error::wrap_c_api(mkldnn_post_ops_append_eltwise(get(), scale,
+                    convert_to_c(alg), alpha, beta),
+                "could not append eltwise");
+    }
+
+    void get_params_eltwise(int index, float &scale, algorithm &alg,
+            float &alpha, float &beta) const {
+        mkldnn_alg_kind_t c_alg;
+        error::wrap_c_api(mkldnn_post_ops_get_params_eltwise(get(), index,
+                    &scale, &c_alg, &alpha, &beta),
+                "could not get eltwise params");
+        alg = static_cast<algorithm>(c_alg);
+    }
+};
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+template <> struct handle_traits<mkldnn_primitive_attr_t> {
+    static constexpr auto destructor = &mkldnn_primitive_attr_destroy;
+};
+#endif
+
+struct primitive_attr: public handle<mkldnn_primitive_attr_t> {
+    primitive_attr() {
+        mkldnn_primitive_attr_t result;
+        error::wrap_c_api(mkldnn_primitive_attr_create(&result),
+                "could not create a primitive attr");
+        reset(result);
+    }
+
+    scratchpad_mode get_scratchpad_mode() const {
+        mkldnn_scratchpad_mode_t result;
+        error::wrap_c_api(mkldnn_primitive_attr_get_scratchpad_mode(
+                    get(), &result), "could not get scratchpad mode");
+        return scratchpad_mode(result);
+    }
+
+    void set_scratchpad_mode(scratchpad_mode mode) {
+        error::wrap_c_api(mkldnn_primitive_attr_set_scratchpad_mode(
+                    get(), mkldnn::convert_to_c(mode)),
+                "could not set scratchpad mode");
+    }
+
+    void get_output_scales(int &mask, std::vector<float> &scales) const
+    {
+        mkldnn_dim_t count;
+        int c_mask;
+        const float *c_scales;
+        error::wrap_c_api(mkldnn_primitive_attr_get_output_scales(get(),
+                    &count, &c_mask, &c_scales),
+                "could not get int output scales");
+        scales.resize(count);
+
+        mask = c_mask;
+        for (mkldnn_dim_t c = 0; c < count; ++c)
+            scales[c] = c_scales[c];
+    }
+
+    void set_output_scales(int mask, const std::vector<float> &scales)
+    {
+        error::wrap_c_api(mkldnn_primitive_attr_set_output_scales(get(),
+                    (mkldnn_dim_t)scales.size(), mask, &scales[0]),
+                "could not set int output scales");
+    }
+
+    const post_ops get_post_ops() const {
+        post_ops result;
+        const_mkldnn_post_ops_t c_result;
+        error::wrap_c_api(mkldnn_primitive_attr_get_post_ops(get(), &c_result),
+                "could not get post operation sequence");
+        result.reset(const_cast<mkldnn_post_ops_t>(c_result), true);
+        return result;
+    }
+
+    void set_post_ops(post_ops ops) {
+        error::wrap_c_api(mkldnn_primitive_attr_set_post_ops(get(), ops.get()),
+                "could not set post operation sequence");
+    }
+
+    void set_rnn_data_qparams(const float scale, const float shift)
+    {
+        error::wrap_c_api(mkldnn_primitive_attr_set_rnn_data_qparams(get(),
+                    scale, shift), "could not set rnn data int scale/shift");
+    }
+
+    void set_rnn_weights_qparams(int mask, const std::vector<float> &scales)
+    {
+        error::wrap_c_api(mkldnn_primitive_attr_set_rnn_weights_qparams(get(),
+                    (int)scales.size(), mask, &scales[0]),
+                "could not set rnn weights int scales");
+    }
+};
+
+/// @}
+
+/// @addtogroup cpp_api_engine Engine
+/// Engine operations.
+///
+/// @sa @ref c_api_engine in @ref c_api
+/// @{
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+template <> struct handle_traits<mkldnn_engine_t> {
+    static constexpr auto destructor = &mkldnn_engine_destroy;
+};
+#endif
+
+/// An execution engine.
+struct engine: public handle<mkldnn_engine_t> {
+    friend class primitive;
+    // gcc bug??? using handle::handle;
+
+    /// Kinds of engines.
+    enum kind {
+        /// An unspecified engine
+        any = mkldnn_any_engine,
+        /// CPU engine
+        cpu = mkldnn_cpu,
+    };
+
+    /// Returns the number of engines of a certain kind.
+    ///
+    /// @param akind The kind of engines to count.
+
+    static size_t get_count(kind akind) {
+        return mkldnn_engine_get_count(convert_to_c(akind));
+    }
+
+    /// Constructs an engine.
+    ///
+    /// @param akind The kind of engine to construct.
+    /// @param index The index of the engine. Must be less than the value
+    ///              returned by #get_count() for this particular kind of engine.
+
+    engine(kind akind, size_t index) {
+        mkldnn_engine_t aengine;
+        error::wrap_c_api(
+                mkldnn_engine_create(&aengine,
+                    convert_to_c(akind), index),
+                "could not create an engine");
+        reset(aengine);
+    }
+
+    explicit engine(const mkldnn_engine_t& aengine)
+        : handle(aengine, true) {}
+
+    engine(const handle<mkldnn_primitive_desc_t> &pd) {
+        mkldnn_engine_t engine_q;
+        error::wrap_c_api(
+                mkldnn_primitive_desc_query(pd.get(),
+                    mkldnn::convert_to_c(query_engine), 0, &engine_q),
+                "could not get engine from primitive_desc");
+        reset(engine_q, true);
+    }
+
+    template <class primitive_desc>
+    static engine query(const primitive_desc &pd) {
+        mkldnn_engine_t engine_q;
+        error::wrap_c_api(
+                mkldnn_primitive_desc_query(pd.get(),
+                    mkldnn::convert_to_c(query_engine), 0, &engine_q),
+                "could not get engine from primitive_desc");
+
+        return engine(engine_q);
+    }
+
+private:
+    static mkldnn_engine_kind_t convert_to_c(kind akind) {
+        return static_cast<mkldnn_engine_kind_t>(akind);
+    }
+};
+
+/// @}
+
+/// @addtogroup cpp_api_stream Stream
+/// Execution stream operations
+///
+/// @sa @ref c_api_stream in @ref c_api
+/// @{
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+template <> struct handle_traits<mkldnn_stream_t> {
+    static constexpr auto destructor = &mkldnn_stream_destroy;
+};
+#endif
+
+struct stream: public handle<mkldnn_stream_t> {
+    using handle::handle;
+
+    enum: unsigned {
+        default_flags = mkldnn_stream_default_flags,
+    };
+
+    /// Constructs a stream.
+    stream(const engine &aengine,
+            unsigned flags = static_cast<unsigned>(default_flags)) {
+        mkldnn_stream_t astream;
+        error::wrap_c_api(mkldnn_stream_create(&astream, aengine.get(), flags),
+                "could not create a stream");
+        reset(astream);
+    }
+};
+
+/// @}
+
+/// @addtogroup cpp_api_memory_related Memory and memory related operations
+/// @{
+
+/// @addtogroup cpp_api_memory Memory
+/// A primitive to describe and store data.
+///
+/// For more information, refer to @ref c_api_memory in @ref c_api.
+/// @{
+
+/// Memory that describes the data.
+struct memory: public handle<mkldnn_memory_t> {
+    public:
+    typedef mkldnn_dim_t dim;
+    typedef std::vector<dim> dims;
+
+    template <typename T> static void validate_dims(const std::vector<T> &v) {
+        if (v.size() > MKLDNN_MAX_NDIMS)
+            throw error(mkldnn_invalid_arguments, "invalid dimensions");
+    }
+
+    /// Data type specification. See #mkldnn_data_type_t for a detailed
+    /// description.
+    enum data_type {
+        data_undef = mkldnn_data_type_undef,
+        f32 = mkldnn_f32,
+        s32 = mkldnn_s32,
+        s8 = mkldnn_s8,
+        u8 = mkldnn_u8,
+    };
+
+    /// Memory format tag specification. See #mkldnn_format_tag_t
+    /// for a detailed description.
+    enum format_tag {
+        format_tag_undef = mkldnn_format_tag_undef,
+        any = mkldnn_format_tag_any,
+        a = mkldnn_a,
+        ab = mkldnn_ab,
+        abc = mkldnn_abc,
+        abcd = mkldnn_abcd,
+        abcde = mkldnn_abcde,
+        abcdef = mkldnn_abcdef,
+        abdec = mkldnn_abdec,
+        acb = mkldnn_acb,
+        acbde = mkldnn_acbde,
+        acdb = mkldnn_acdb,
+        acdeb = mkldnn_acdeb,
+        ba = mkldnn_ba,
+        bac = mkldnn_bac,
+        bacd = mkldnn_bacd,
+        bcda = mkldnn_bcda,
+        cba = mkldnn_cba,
+        cdba = mkldnn_cdba,
+        cdeba = mkldnn_cdeba,
+        decab = mkldnn_decab,
+        Abc16a = mkldnn_Abc16a,
+        ABc16a16b = mkldnn_ABc16a16b,
+        aBc16b = mkldnn_aBc16b,
+        ABc16b16a = mkldnn_ABc16b16a,
+        Abc4a = mkldnn_Abc4a,
+        aBc4b = mkldnn_aBc4b,
+        ABc4b16a4b = mkldnn_ABc4b16a4b,
+        ABc4b4a = mkldnn_ABc4b4a,
+        ABc8a16b2a = mkldnn_ABc8a16b2a,
+        ABc8a8b = mkldnn_ABc8a8b,
+        aBc8b = mkldnn_aBc8b,
+        ABc8b16a2b = mkldnn_ABc8b16a2b,
+        ABc8b8a = mkldnn_ABc8b8a,
+        Abcd16a = mkldnn_Abcd16a,
+        ABcd16a16b = mkldnn_ABcd16a16b,
+        aBcd16b = mkldnn_aBcd16b,
+        ABcd16b16a = mkldnn_ABcd16b16a,
+        aBCd16b16c = mkldnn_aBCd16b16c,
+        aBCd16c16b = mkldnn_aBCd16c16b,
+        Abcd4a = mkldnn_Abcd4a,
+        aBcd4b = mkldnn_aBcd4b,
+        ABcd4b16a4b = mkldnn_ABcd4b16a4b,
+        ABcd4b4a = mkldnn_ABcd4b4a,
+        aBCd4c16b4c = mkldnn_aBCd4c16b4c,
+        aBCd4c4b = mkldnn_aBCd4c4b,
+        ABcd8a16b2a = mkldnn_ABcd8a16b2a,
+        ABcd8a8b = mkldnn_ABcd8a8b,
+        aBcd8b = mkldnn_aBcd8b,
+        ABcd8b16a2b = mkldnn_ABcd8b16a2b,
+        aBCd8b16c2b = mkldnn_aBCd8b16c2b,
+        ABcd8b8a = mkldnn_ABcd8b8a,
+        aBCd8b8c = mkldnn_aBCd8b8c,
+        aBCd8c16b2c = mkldnn_aBCd8c16b2c,
+        aBCd8c8b = mkldnn_aBCd8c8b,
+        Abcde16a = mkldnn_Abcde16a,
+        ABcde16a16b = mkldnn_ABcde16a16b,
+        aBcde16b = mkldnn_aBcde16b,
+        ABcde16b16a = mkldnn_ABcde16b16a,
+        aBCde16b16c = mkldnn_aBCde16b16c,
+        aBCde16c16b = mkldnn_aBCde16c16b,
+        aBCde2c8b4c = mkldnn_aBCde2c8b4c,
+        Abcde4a = mkldnn_Abcde4a,
+        aBcde4b = mkldnn_aBcde4b,
+        ABcde4b4a = mkldnn_ABcde4b4a,
+        aBCde4b4c = mkldnn_aBCde4b4c,
+        aBCde4c16b4c = mkldnn_aBCde4c16b4c,
+        aBCde4c4b = mkldnn_aBCde4c4b,
+        Abcde8a = mkldnn_Abcde8a,
+        ABcde8a8b = mkldnn_ABcde8a8b,
+        aBcde8b = mkldnn_aBcde8b,
+        ABcde8b16a2b = mkldnn_ABcde8b16a2b,
+        aBCde8b16c2b = mkldnn_aBCde8b16c2b,
+        ABcde8b8a = mkldnn_ABcde8b8a,
+        aBCde8b8c = mkldnn_aBCde8b8c,
+        aBCde8c16b2c = mkldnn_aBCde8c16b2c,
+        aBCde8c8b = mkldnn_aBCde8c8b,
+        aBcdef16b = mkldnn_aBcdef16b,
+        aBCdef16b16c = mkldnn_aBCdef16b16c,
+        aBCdef16c16b = mkldnn_aBCdef16c16b,
+        aBcdef4b = mkldnn_aBcdef4b,
+        aBCdef4c4b = mkldnn_aBCdef4c4b,
+        aBCdef8b8c = mkldnn_aBCdef8b8c,
+        aBCdef8c16b2c = mkldnn_aBCdef8c16b2c,
+        aBCdef8c8b = mkldnn_aBCdef8c8b,
+        aBdc16b = mkldnn_aBdc16b,
+        aBdc4b = mkldnn_aBdc4b,
+        aBdc8b = mkldnn_aBdc8b,
+        aBdec16b = mkldnn_aBdec16b,
+        aBdec4b = mkldnn_aBdec4b,
+        aBdec8b = mkldnn_aBdec8b,
+        aBdefc16b = mkldnn_aBdefc16b,
+        aBdefc4b = mkldnn_aBdefc4b,
+        aBdefc8b = mkldnn_aBdefc8b,
+        Acb16a = mkldnn_Acb16a,
+        Acb4a = mkldnn_Acb4a,
+        Acb8a = mkldnn_Acb8a,
+        aCBd16b16c = mkldnn_aCBd16b16c,
+        aCBde16b16c = mkldnn_aCBde16b16c,
+        Acdb16a = mkldnn_Acdb16a,
+        Acdb4a = mkldnn_Acdb4a,
+        Acdb8a = mkldnn_Acdb8a,
+        Acdeb16a = mkldnn_Acdeb16a,
+        Acdeb4a = mkldnn_Acdeb4a,
+        Acdeb8a = mkldnn_Acdeb8a,
+        BAc16a16b = mkldnn_BAc16a16b,
+        BAcd16a16b = mkldnn_BAcd16a16b,
+        format_tag_last = mkldnn_format_tag_last,
+
+        x = mkldnn_x,
+        nc = mkldnn_nc,
+        cn = mkldnn_cn,
+        ncw = mkldnn_ncw,
+        nwc = mkldnn_nwc,
+        nchw = mkldnn_nchw,
+        nhwc = mkldnn_nhwc,
+        chwn = mkldnn_chwn,
+        ncdhw = mkldnn_ncdhw,
+        ndhwc = mkldnn_ndhwc,
+        oi = mkldnn_oi,
+        io = mkldnn_io,
+        oiw = mkldnn_oiw,
+        wio = mkldnn_wio,
+        oihw = mkldnn_oihw,
+        hwio = mkldnn_hwio,
+        ihwo = mkldnn_ihwo,
+        iohw = mkldnn_iohw,
+        oidhw = mkldnn_oidhw,
+        dhwio = mkldnn_dhwio,
+        goiw = mkldnn_goiw,
+        goihw = mkldnn_goihw,
+        hwigo = mkldnn_hwigo,
+        giohw = mkldnn_giohw,
+        goidhw = mkldnn_goidhw,
+        tnc = mkldnn_tnc,
+        ntc = mkldnn_ntc,
+        ldsnc = mkldnn_ldsnc,
+        ldigo = mkldnn_ldigo,
+        ldgoi = mkldnn_ldgoi,
+        ldgo = mkldnn_ldgo,
+        nCdhw16c = mkldnn_nCdhw16c,
+        nCdhw4c = mkldnn_nCdhw4c,
+        nCdhw8c = mkldnn_nCdhw8c,
+        nChw16c = mkldnn_nChw16c,
+        nChw4c = mkldnn_nChw4c,
+        nChw8c = mkldnn_nChw8c,
+        nCw16c = mkldnn_nCw16c,
+        nCw4c = mkldnn_nCw4c,
+        nCw8c = mkldnn_nCw8c,
+        IOw16o16i = mkldnn_IOw16o16i,
+        OIw16i16o = mkldnn_OIw16i16o,
+        OIw16o16i = mkldnn_OIw16o16i,
+        Oiw16o = mkldnn_Oiw16o,
+        OIw4i16o4i = mkldnn_OIw4i16o4i,
+        OIw4i4o = mkldnn_OIw4i4o,
+        Oiw4o = mkldnn_Oiw4o,
+        OIw8i16o2i = mkldnn_OIw8i16o2i,
+        OIw8i8o = mkldnn_OIw8i8o,
+        OIw8o16i2o = mkldnn_OIw8o16i2o,
+        OIw8o8i = mkldnn_OIw8o8i,
+        Owi16o = mkldnn_Owi16o,
+        Owi4o = mkldnn_Owi4o,
+        Owi8o = mkldnn_Owi8o,
+        IOhw16o16i = mkldnn_IOhw16o16i,
+        Ohwi16o = mkldnn_Ohwi16o,
+        Ohwi4o = mkldnn_Ohwi4o,
+        Ohwi8o = mkldnn_Ohwi8o,
+        OIhw16i16o = mkldnn_OIhw16i16o,
+        OIhw16o16i = mkldnn_OIhw16o16i,
+        Oihw16o = mkldnn_Oihw16o,
+        OIhw4i16o4i = mkldnn_OIhw4i16o4i,
+        OIhw4i4o = mkldnn_OIhw4i4o,
+        Oihw4o = mkldnn_Oihw4o,
+        OIhw8i16o2i = mkldnn_OIhw8i16o2i,
+        OIhw8i8o = mkldnn_OIhw8i8o,
+        OIhw8o16i2o = mkldnn_OIhw8o16i2o,
+        OIhw8o8i = mkldnn_OIhw8o8i,
+        Odhwi16o = mkldnn_Odhwi16o,
+        Odhwi4o = mkldnn_Odhwi4o,
+        Odhwi8o = mkldnn_Odhwi8o,
+        OIdhw16i16o = mkldnn_OIdhw16i16o,
+        OIdhw16o16i = mkldnn_OIdhw16o16i,
+        Oidhw16o = mkldnn_Oidhw16o,
+        OIdhw4i4o = mkldnn_OIdhw4i4o,
+        Oidhw4o = mkldnn_Oidhw4o,
+        OIdhw8i16o2i = mkldnn_OIdhw8i16o2i,
+        OIdhw8i8o = mkldnn_OIdhw8i8o,
+        OIdhw8o8i = mkldnn_OIdhw8o8i,
+        gIOw16o16i = mkldnn_gIOw16o16i,
+        gOIw16i16o = mkldnn_gOIw16i16o,
+        gOIw16o16i = mkldnn_gOIw16o16i,
+        gOiw16o = mkldnn_gOiw16o,
+        gOIw4i16o4i = mkldnn_gOIw4i16o4i,
+        gOIw4i4o = mkldnn_gOIw4i4o,
+        gOiw4o = mkldnn_gOiw4o,
+        gOIw8i16o2i = mkldnn_gOIw8i16o2i,
+        gOIw8i8o = mkldnn_gOIw8i8o,
+        gOIw8o16i2o = mkldnn_gOIw8o16i2o,
+        gOIw8o8i = mkldnn_gOIw8o8i,
+        gOwi16o = mkldnn_gOwi16o,
+        gOwi4o = mkldnn_gOwi4o,
+        gOwi8o = mkldnn_gOwi8o,
+        gIOhw16o16i = mkldnn_gIOhw16o16i,
+        gOhwi16o = mkldnn_gOhwi16o,
+        gOhwi4o = mkldnn_gOhwi4o,
+        gOhwi8o = mkldnn_gOhwi8o,
+        Goihw16g = mkldnn_Goihw16g,
+        gOIhw16i16o = mkldnn_gOIhw16i16o,
+        gOIhw16o16i = mkldnn_gOIhw16o16i,
+        gOihw16o = mkldnn_gOihw16o,
+        gOIhw2i8o4i = mkldnn_gOIhw2i8o4i,
+        gOIhw4i16o4i = mkldnn_gOIhw4i16o4i,
+        gOIhw4i4o = mkldnn_gOIhw4i4o,
+        gOIhw4o4i = mkldnn_gOIhw4o4i,
+        gOihw4o = mkldnn_gOihw4o,
+        Goihw8g = mkldnn_Goihw8g,
+        gOIhw8i16o2i = mkldnn_gOIhw8i16o2i,
+        gOIhw8i8o = mkldnn_gOIhw8i8o,
+        gOIhw8o16i2o = mkldnn_gOIhw8o16i2o,
+        gOIhw8o8i = mkldnn_gOIhw8o8i,
+        gOdhwi16o = mkldnn_gOdhwi16o,
+        gOdhwi4o = mkldnn_gOdhwi4o,
+        gOdhwi8o = mkldnn_gOdhwi8o,
+        gOIdhw16i16o = mkldnn_gOIdhw16i16o,
+        gOIdhw16o16i = mkldnn_gOIdhw16o16i,
+        gOidhw16o = mkldnn_gOidhw16o,
+        gOIdhw4i4o = mkldnn_gOIdhw4i4o,
+        gOidhw4o = mkldnn_gOidhw4o,
+        gOIdhw8i16o2i = mkldnn_gOIdhw8i16o2i,
+        gOIdhw8i8o = mkldnn_gOIdhw8i8o,
+        gOIdhw8o8i = mkldnn_gOIdhw8o8i,
+    };
+
+    /// A memory descriptor.
+    struct desc {
+        friend struct memory;
+        /// The underlying C API data structure.
+        mkldnn_memory_desc_t data;
+
+        /// Constructs a zero memory descriptor
+        desc(): data() {}
+
+        /// Constructs a memory descriptor.
+        ///
+        /// @param adims Data dimensions
+        /// @param adata_type Data precision/type.
+        /// @param aformat Data layout format tag.
+        desc(const dims &adims, data_type adata_type,
+                format_tag aformat) {
+            validate_dims(adims);
+            error::wrap_c_api(mkldnn_memory_desc_init_by_tag(&data, (int)adims.size(),
+                        adims.size() == 0 ? nullptr : &adims[0],
+                        convert_to_c(adata_type), convert_to_c(aformat)),
+                    "could not initialize a memory descriptor");
+        }
+
+        /// Constructs a memory descriptor from a C API data structure.
+        ///
+        /// @param adata A C API #mkldnn_memory_desc_t structure.
+        desc(const mkldnn_memory_desc_t &adata): data(adata) {}
+
+        /// Constructs a sub-memory descriptor
+        //
+        /// @param adims Sizes of a sub-memory
+        /// @param offsets Offsets of a sub-memory
+        desc submemory_desc(const dims &adims, const dims &offsets) {
+            mkldnn_memory_desc_t sub_md;
+            error::wrap_c_api(mkldnn_memory_desc_init_submemory(&sub_md,
+                        &data, &adims[0], &offsets[0]),
+                    "could not initialize a sub-memory");
+            return desc(sub_md);
+        }
+
+        /// Returns the number of bytes required to allocate the memory described
+        /// including the padding area.
+        size_t get_size() const { return mkldnn_memory_desc_get_size(&data); }
+
+        bool operator==(const desc &other) const {
+            return mkldnn_memory_desc_equal(&data, &other.data) != 0;
+        }
+
+        bool operator!=(const desc &other) const { return !operator==(other); }
+    };
+
+    /// Constructs a memory.
+    ///
+    /// @param md Memory descriptor.
+    /// @param aengine Engine.
+    /// @param ahandle Native handle.
+    memory(const desc &md, const engine &aengine, void *ahandle) {
+        mkldnn_memory_t result;
+        error::wrap_c_api(mkldnn_memory_create(&result, &md.data,
+                    aengine.get(), ahandle), "could not create a memory");
+        reset(result);
+    }
+
+    /// Constructs a memory.
+    ///
+    /// @param md Memory descriptor.
+    /// @param aengine Engine.
+    memory(const desc &md, const engine &aengine)
+        : memory(md, aengine, MKLDNN_NATIVE_HANDLE_ALLOCATE) {}
+
+    /// Returns the descriptor of the memory.
+    desc get_desc() const {
+        const mkldnn_memory_desc_t *cdesc;
+        error::wrap_c_api(mkldnn_memory_get_memory_desc(get(), &cdesc),
+                "could not get memory descriptor from a memory");
+        return desc(*cdesc);
+    }
+
+    /// Returns the engine of the memory.
+    engine get_engine() const {
+        mkldnn_engine_t engine_q;
+        error::wrap_c_api(mkldnn_memory_get_engine(get(), &engine_q),
+                "could not get engine from a memory");
+        return engine(engine_q);
+    }
+
+    /// Returns a handle of the data contained in the memory.
+    ///
+    /// On the CPU engine, this is a pointer to the allocated memory.
+    void *get_data_handle() const {
+        void *handle;
+        error::wrap_c_api(mkldnn_memory_get_data_handle(get(), &handle),
+                "could not get native handle");
+        return handle;
+    }
+
+    void set_data_handle(void *handle) const {
+        error::wrap_c_api(mkldnn_memory_set_data_handle(get(), handle),
+                "could not set native handle");
+    }
+
+    // Must go away or be private:
+    static mkldnn_data_type_t convert_to_c(data_type adata_type) {
+        return static_cast<mkldnn_data_type_t>(adata_type);
+    }
+    static mkldnn_format_tag_t convert_to_c(format_tag aformat) {
+        return static_cast<mkldnn_format_tag_t>(aformat);
+    }
+};
+
+inline bool operator==(mkldnn_data_type_t a, memory::data_type b) {
+    return a == memory::convert_to_c(b);
+}
+inline bool operator!=(mkldnn_data_type_t a, memory::data_type b) {
+    return !(a == b);
+}
+inline bool operator==(memory::data_type a, mkldnn_data_type_t b) {
+    return b == a;
+}
+inline bool operator!=(memory::data_type a, mkldnn_data_type_t b) {
+    return !(a == b);
+}
+
+inline bool operator==(mkldnn_format_tag_t a, memory::format_tag b) {
+    return a == memory::convert_to_c(b);
+}
+inline bool operator!=(mkldnn_format_tag_t a, memory::format_tag b) {
+    return !(a == b);
+}
+inline bool operator==(memory::format_tag a, mkldnn_format_tag_t b) {
+    return b == a;
+}
+inline bool operator!=(memory::format_tag a, mkldnn_format_tag_t b) {
+    return !(a == b);
+}
+
+/// @}
+
+/// @addtogroup cpp_api_reorder Reorder
+/// A primitive to copy data between memory formats.
+///
+/// @sa @ref c_api_reorder in @ref c_api
+/// @{
+
+struct reorder : public primitive {
+    struct primitive_desc : public handle<mkldnn_primitive_desc_t> {
+        primitive_desc(const engine &src_engine, const memory::desc &src_md,
+                const engine &dst_engine, const memory::desc &dst_md,
+                const primitive_attr &aattr) {
+            mkldnn_primitive_desc_t result;
+            error::wrap_c_api(mkldnn_reorder_primitive_desc_create(&result,
+                        src_engine.get(), &src_md.data,
+                        dst_engine.get(), &dst_md.data, aattr.get()),
+                    "could not create a reorder primitive descriptor");
+            reset(result);
+        }
+
+        primitive_desc(const engine &src_engine, const memory::desc &src_md,
+                const engine &dst_engine, const memory::desc &dst_md) {
+            mkldnn_primitive_desc_t result;
+            error::wrap_c_api(mkldnn_reorder_primitive_desc_create(&result,
+                        src_engine.get(), &src_md.data,
+                        dst_engine.get(), &dst_md.data, nullptr),
+                    "could not create a reorder primitive descriptor");
+            reset(result);
+        }
+
+        primitive_desc(const memory &src, const memory &dst,
+                const primitive_attr &aattr) {
+            mkldnn_primitive_desc_t result;
+            auto src_md = src.get_desc();
+            auto dst_md = dst.get_desc();
+            error::wrap_c_api(mkldnn_reorder_primitive_desc_create(&result,
+                        src.get_engine().get(), &src_md.data,
+                        dst.get_engine().get(), &dst_md.data, aattr.get()),
+                    "could not create a reorder primitive descriptor");
+            reset(result);
+        }
+
+        primitive_desc(const memory &src, const memory &dst) {
+            mkldnn_primitive_desc_t result;
+            auto src_md = src.get_desc();
+            auto dst_md = dst.get_desc();
+            error::wrap_c_api(mkldnn_reorder_primitive_desc_create(&result,
+                        src.get_engine().get(), &src_md.data,
+                        dst.get_engine().get(), &dst_md.data, nullptr),
+                    "could not create a reorder primitive descriptor");
+            reset(result);
+        }
+
+        memory::desc scratchpad_desc() const {
+            const mkldnn_memory_desc_t *cdesc = mkldnn_primitive_desc_query_md(
+                    get(), mkldnn::convert_to_c(scratchpad_md), 0);
+            if (cdesc == nullptr)
+                return memory::desc();
+            return memory::desc(*cdesc);
+        }
+
+        engine scratchpad_engine() {
+            mkldnn_engine_t engine_q;
+            error::wrap_c_api(
+                mkldnn_primitive_desc_query(get(),
+                    mkldnn::convert_to_c(query_scratchpad_engine), 0, &engine_q),
+                "could not get scratchpad engine from reorder primitive_desc");
+
+            return engine(engine_q);
+        }
+
+        engine get_engine() { return engine::query(*this); }
+    };
+
+    reorder(const primitive_desc &pd): primitive(pd.get()) {}
+
+    reorder(const memory &src, const memory &dst):
+        primitive(primitive_desc(src, dst).get()) {}
+
+    void execute(stream astream, memory &src, memory &dst) {
+        primitive::execute(astream,
+                {{MKLDNN_ARG_FROM, src}, {MKLDNN_ARG_TO, dst}});
+    }
+};
+
+/// @}
+
+/// @addtogroup cpp_api_concat Concat
+/// A primitive to concatenate data by arbitrary dimension.
+///
+/// @sa @ref c_api_concat in @ref c_api
+/// @{
+
+struct concat : public primitive {
+    struct primitive_desc : public handle<mkldnn_primitive_desc_t> {
+        std::vector<mkldnn_memory_desc_t> cpp_to_c(
+                const std::vector<memory::desc> &srcs) {
+            std::vector<mkldnn_memory_desc_t> c_api_srcs;
+            c_api_srcs.reserve(srcs.size());
+            for (const auto &s : srcs) c_api_srcs.push_back(s.data);
+            return c_api_srcs;
+        }
+
+        primitive_desc(const memory::desc &dst, int concat_dimension,
+                const std::vector<memory::desc> &srcs, const engine &aengine) {
+            auto c_api_srcs = cpp_to_c(srcs);
+
+            mkldnn_primitive_desc_t result;
+            error::wrap_c_api(mkldnn_concat_primitive_desc_create(
+                    &result, &dst.data, (int)c_api_srcs.size(),
+                    concat_dimension, &c_api_srcs[0], nullptr, aengine.get()),
+                "could not create a concat primitive descriptor");
+            reset(result);
+        }
+
+        primitive_desc(int concat_dimension,
+                const std::vector<memory::desc> &srcs, const engine &aengine) {
+            auto c_api_srcs = cpp_to_c(srcs);
+
+            mkldnn_primitive_desc_t result;
+            error::wrap_c_api(mkldnn_concat_primitive_desc_create(
+                    &result, nullptr, (int)c_api_srcs.size(),
+                    concat_dimension, &c_api_srcs[0], nullptr, aengine.get()),
+                "could not create a concat primitive descriptor");
+            reset(result);
+        }
+
+        memory::desc dst_desc() const {
+            const mkldnn_memory_desc_t *cdesc = mkldnn_primitive_desc_query_md(
+                    get(), mkldnn::convert_to_c(dst_md), 0);
+            error::wrap_c_api(
+                    cdesc == nullptr ? mkldnn_runtime_error : mkldnn_success,
+                    "could not get a dst memory descriptor");
+            return memory::desc(*cdesc);
+        }
+
+        memory::desc scratchpad_desc() const {
+            const mkldnn_memory_desc_t *cdesc = mkldnn_primitive_desc_query_md(
+                    get(), mkldnn::convert_to_c(scratchpad_md), 0);
+            if (cdesc == nullptr)
+                return memory::desc();
+            return memory::desc(*cdesc);
+        }
+
+        engine get_engine() { return engine::query(*this); }
+    };
+
+    concat(const primitive_desc &pd): primitive(pd.get()) {}
+};
+
+/// @}
+
+/// @addtogroup cpp_api_sum Sum
+/// A primitive to sum data.
+///
+/// @sa @ref c_api_sum in @ref c_api
+/// @{
+
+struct sum : public primitive {
+    struct primitive_desc : public handle<mkldnn_primitive_desc_t> {
+        std::vector<mkldnn_memory_desc_t> cpp_to_c(
+                const std::vector<memory::desc> &srcs) {
+            std::vector<mkldnn_memory_desc_t> c_api_srcs;
+            c_api_srcs.reserve(srcs.size());
+            for (const auto &s : srcs) c_api_srcs.push_back(s.data);
+            return c_api_srcs;
+        }
+
+        primitive_desc(const memory::desc &dst,
+                const std::vector<float> &scales,
+                const std::vector<memory::desc> &srcs, const engine &aengine) {
+            error::wrap_c_api(scales.size() == srcs.size()
+                    ? mkldnn_success : mkldnn_invalid_arguments,
+                "number of scales not equal to number of srcs");
+
+            auto c_api_srcs = cpp_to_c(srcs);
+
+            mkldnn_primitive_desc_t result;
+            error::wrap_c_api(mkldnn_sum_primitive_desc_create(
+                    &result, &dst.data, (int)c_api_srcs.size(),
+                    &scales[0], &c_api_srcs[0], nullptr, aengine.get()),
+                "could not create a sum primitive descriptor");
+            reset(result);
+        }
+
+        primitive_desc(const std::vector<float> &scales,
+                const std::vector<memory::desc> &srcs, const engine &aengine) {
+            error::wrap_c_api(scales.size() == srcs.size()
+                    ? mkldnn_success : mkldnn_invalid_arguments,
+                "number of scales not equal to number of srcs");
+
+            auto c_api_srcs = cpp_to_c(srcs);
+            mkldnn_primitive_desc_t result;
+            error::wrap_c_api(mkldnn_sum_primitive_desc_create(&result,
+                        nullptr, (int)c_api_srcs.size(), &scales[0],
+                        &c_api_srcs[0], nullptr, aengine.get()),
+                    "could not create a sum primitive descriptor");
+            reset(result);
+        }
+
+        memory::desc dst_desc() const {
+            const mkldnn_memory_desc_t *cdesc = mkldnn_primitive_desc_query_md(
+                    get(), mkldnn::convert_to_c(dst_md), 0);
+            error::wrap_c_api(
+                    cdesc == nullptr ? mkldnn_runtime_error : mkldnn_success,
+                    "could not get a dst memory descriptor");
+            return memory::desc(*cdesc);
+        }
+
+        memory::desc scratchpad_desc() const {
+            const mkldnn_memory_desc_t *cdesc = mkldnn_primitive_desc_query_md(
+                    get(), mkldnn::convert_to_c(scratchpad_md), 0);
+            if (cdesc == nullptr)
+                return memory::desc();
+            return memory::desc(*cdesc);
+        }
+
+        engine get_engine() { return engine::query(*this); }
+    };
+
+    sum(const primitive_desc &pd): primitive(pd.get()) {}
+};
+
+/// @}
+
+/// @}
+
+/// @addtogroup cpp_api_primitives Primitives
+/// @{
+
+/// @addtogroup cpp_api_primitive_descriptors Primitive descriptors
+/// @{
+
+/// A base class for all primitive descriptors.
+struct primitive_desc : public handle<mkldnn_primitive_desc_t> {
+    primitive_desc(const_mkldnn_op_desc_t desc, const primitive_attr *attr,
+            const engine &e, const_mkldnn_primitive_desc_t hint_fwd_pd) {
+        mkldnn_primitive_desc_iterator_t iterator = nullptr;
+        mkldnn_status_t status = mkldnn_primitive_desc_iterator_create(
+                &iterator, desc, attr ? attr->get() : nullptr, e.get(),
+                hint_fwd_pd);
+        error::wrap_c_api(status,
+                "could not create a primitive descriptor iterator");
+        pd_iterator.reset(iterator);
+        fetch_impl();
+    }
+
+    engine get_engine() { return engine::query(*this); }
+
+    primitive_attr get_primitive_attr() const {
+        const_mkldnn_primitive_attr_t const_cattr;
+        error::wrap_c_api(mkldnn_primitive_desc_get_attr(get(), &const_cattr),
+                "could not get attributes");
+        mkldnn_primitive_attr_t cattr;
+        error::wrap_c_api(mkldnn_primitive_attr_clone(&cattr, const_cattr),
+                "could not clone attributes");
+
+        primitive_attr attr;
+        attr.reset(cattr);
+        return attr;
+    }
+
+    /// Returns implementation name
+    const char *impl_info_str() const {
+        const char *res;
+        error::wrap_c_api(mkldnn_primitive_desc_query(get(),
+                    mkldnn_query_impl_info_str, 0, &res),
+                "could not query implementation info string");
+        return res;
+    }
+
+    /// Queries the memory::dim value (same as int64_t)
+    memory::dim query_s64(query q) const {
+        memory::dim res;
+        mkldnn_status_t status = mkldnn_primitive_desc_query(get(),
+                mkldnn::convert_to_c(q), 0, &res);
+        return status == mkldnn_success ? res : 0;
+    }
+
+    /// Advances the next implementation for the given op descriptor.
+    ///
+    /// Returns:
+    /// - @c true on success
+    /// - @c false if the last implementation reached, and
+    ///   the primitive descriptor itself is kept unchanged
+    bool next_impl() {
+        mkldnn_status_t status = mkldnn_primitive_desc_iterator_next(
+                pd_iterator.get());
+        if (status == mkldnn_iterator_ends) return false;
+        error::wrap_c_api(status, "primitive descriptor iterator next failed");
+
+        fetch_impl();
+        return true;
+    }
+
+    /// Queries and returns requested memory descriptor.
+    memory::desc query_md(query what, int idx = 0) const {
+        std::vector<query> valid_q{src_md, diff_src_md, weights_md,
+            diff_weights_md, dst_md, diff_dst_md, workspace_md, scratchpad_md};
+        if (!std::any_of(valid_q.cbegin(), valid_q.cend(),
+                    [=](query q) { return what == q; }))
+            throw error(mkldnn_invalid_arguments, "invalid memory query");
+
+        const mkldnn_memory_desc_t *cdesc = mkldnn_primitive_desc_query_md(
+                get(), mkldnn::convert_to_c(what), idx);
+        if (cdesc == nullptr) return memory::desc();
+
+        return memory::desc(*cdesc);
+    }
+
+    // register specialized queries, e.g. src_desc()
+#   define REG_QUERY_MD(name, what, idx) \
+    memory::desc name ## _desc() const { return query_md(what ## _md, idx); }
+
+  private:
+    handle<mkldnn_primitive_desc_iterator_t> pd_iterator;
+    void fetch_impl() {
+        mkldnn_primitive_desc_t pd = mkldnn_primitive_desc_iterator_fetch(
+                pd_iterator.get());
+        error::wrap_c_api(pd != nullptr ? mkldnn_success : mkldnn_runtime_error,
+                "could not fetch a primitive descriptor from the iterator");
+        reset(pd);
+    }
+};
+
+/// @}
+
+/// @addtogroup cpp_api_convolution Convolution
+/// A primitive to compute convolution using different algorithms.
+///
+/// @sa @ref c_api_convolution in @ref c_api
+/// @{
+
+struct convolution_forward: public primitive {
+    struct desc {
+        mkldnn_convolution_desc_t data;
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &bias_desc,
+                const memory::desc &dst_desc,
+                const memory::dims strides,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_convolution_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), convert_to_c(aalgorithm),
+                        &src_desc.data, &weights_desc.data, &bias_desc.data,
+                        &dst_desc.data, &strides[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a convolution forward descriptor");
+        }
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &dst_desc,
+                const memory::dims strides,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_convolution_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), convert_to_c(aalgorithm),
+                        &src_desc.data, &weights_desc.data, nullptr,
+                        &dst_desc.data, &strides[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a convolution forward descriptor");
+        }
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &bias_desc,
+                const memory::desc &dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(
+                mkldnn_dilated_convolution_forward_desc_init(&data,
+                    mkldnn::convert_to_c(aprop_kind), convert_to_c(aalgorithm),
+                        &src_desc.data, &weights_desc.data, &bias_desc.data,
+                        &dst_desc.data, &strides[0], &dilates[0],
+                        &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a dilated convolution forward descriptor");
+        }
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(
+                mkldnn_dilated_convolution_forward_desc_init(&data,
+                    mkldnn::convert_to_c(aprop_kind), convert_to_c(aalgorithm),
+                        &src_desc.data, &weights_desc.data, nullptr,
+                        &dst_desc.data, &strides[0], &dilates[0],
+                        &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a dilated convolution forward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(weights, weights, 0);
+        REG_QUERY_MD(bias, weights, 1);
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    convolution_forward(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct convolution_backward_data : public primitive {
+    struct desc {
+        mkldnn_convolution_desc_t data;
+        desc(algorithm aalgorithm,
+                const memory::desc &diff_src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_convolution_backward_data_desc_init(
+                        &data, convert_to_c(aalgorithm), &diff_src_desc.data,
+                        &weights_desc.data, &diff_dst_desc.data,
+                        &strides[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a convolution backward data descriptor");
+        }
+        desc(algorithm aalgorithm,
+                const memory::desc &diff_src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(
+                mkldnn_dilated_convolution_backward_data_desc_init(
+                    &data, convert_to_c(aalgorithm), &diff_src_desc.data,
+                    &weights_desc.data, &diff_dst_desc.data,
+                    &strides[0], &dilates[0], &padding_l[0], &padding_r[0],
+                    mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a convolution backward data descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const convolution_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const convolution_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(diff_src, diff_src, 0);
+        REG_QUERY_MD(weights, weights, 0);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    convolution_backward_data(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct convolution_backward_weights : public primitive {
+    struct desc {
+        mkldnn_convolution_desc_t data;
+        desc(algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &diff_weights_desc,
+                const memory::desc &diff_bias_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_convolution_backward_weights_desc_init(
+                        &data, convert_to_c(aalgorithm), &src_desc.data,
+                        &diff_weights_desc.data, &diff_bias_desc.data,
+                        &diff_dst_desc.data,
+                        &strides[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a convolution backward weights descriptor");
+        }
+        desc(algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &diff_weights_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_convolution_backward_weights_desc_init(
+                        &data, convert_to_c(aalgorithm), &src_desc.data,
+                        &diff_weights_desc.data, nullptr, &diff_dst_desc.data,
+                        &strides[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a convolution backward weights descriptor");
+        }
+        desc(algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &diff_weights_desc,
+                const memory::desc &diff_bias_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_dilated_convolution_backward_weights_desc_init(
+                        &data, convert_to_c(aalgorithm), &src_desc.data,
+                        &diff_weights_desc.data, &diff_bias_desc.data,
+                        &diff_dst_desc.data,
+                        &strides[0], &dilates[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a convolution backward weights descriptor");
+        }
+        desc(algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &diff_weights_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_dilated_convolution_backward_weights_desc_init(
+                        &data, convert_to_c(aalgorithm), &src_desc.data,
+                        &diff_weights_desc.data, nullptr, &diff_dst_desc.data,
+                        &strides[0], &dilates[0],  &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a convolution backward weights descriptor");
+        }
+
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const convolution_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const convolution_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(diff_weights, diff_weights, 0);
+        REG_QUERY_MD(diff_bias, diff_weights, 1);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    convolution_backward_weights(const primitive_desc &pd): primitive(pd) {}
+};
+
+/// @}
+//
+/// @addtogroup cpp_api_deconvolution Deconvolution
+/// A primitive to compute deconvolution using different algorithms.
+///
+/// @sa @ref c_api_deconvolution in @ref c_api
+/// @{
+
+struct deconvolution_forward: public primitive {
+    struct desc {
+        mkldnn_deconvolution_desc_t data;
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &bias_desc,
+                const memory::desc &dst_desc,
+                const memory::dims strides,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_deconvolution_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), convert_to_c(aalgorithm),
+                        &src_desc.data, &weights_desc.data, &bias_desc.data,
+                        &dst_desc.data, &strides[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a deconvolution forward descriptor");
+        }
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &dst_desc,
+                const memory::dims strides,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_deconvolution_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), convert_to_c(aalgorithm),
+                        &src_desc.data, &weights_desc.data, nullptr,
+                        &dst_desc.data, &strides[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a deconvolution forward descriptor");
+        }
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &bias_desc,
+                const memory::desc &dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_dilated_deconvolution_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), convert_to_c(aalgorithm),
+                        &src_desc.data, &weights_desc.data, &bias_desc.data,
+                        &dst_desc.data, &strides[0], &dilates[0], &padding_l[0],
+                        &padding_r[0], mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a dilated deconvolution forward descriptor");
+        }
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_dilated_deconvolution_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), convert_to_c(aalgorithm),
+                        &src_desc.data, &weights_desc.data, nullptr,
+                        &dst_desc.data, &strides[0], &dilates[0], &padding_l[0],
+                        &padding_r[0], mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a dilated deconvolution forward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(weights, weights, 0);
+        REG_QUERY_MD(bias, weights, 1);
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    deconvolution_forward(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct deconvolution_backward_data : public primitive {
+    struct desc {
+        mkldnn_deconvolution_desc_t data;
+        desc(algorithm aalgorithm,
+                const memory::desc &diff_src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_deconvolution_backward_data_desc_init(
+                        &data, convert_to_c(aalgorithm), &diff_src_desc.data,
+                        &weights_desc.data, &diff_dst_desc.data,
+                        &strides[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a deconvolution backward data descriptor");
+        }
+        desc(algorithm aalgorithm,
+                const memory::desc &diff_src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_dilated_deconvolution_backward_data_desc_init(
+                        &data, convert_to_c(aalgorithm), &diff_src_desc.data,
+                        &weights_desc.data, &diff_dst_desc.data,
+                        &strides[0], &dilates[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a dilated deconvolution backward data descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const deconvolution_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const deconvolution_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(diff_src, diff_src, 0);
+        REG_QUERY_MD(weights, weights, 0);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    deconvolution_backward_data(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct deconvolution_backward_weights : public primitive {
+    struct desc {
+        mkldnn_deconvolution_desc_t data;
+        desc(algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &diff_weights_desc,
+                const memory::desc &diff_bias_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_deconvolution_backward_weights_desc_init(
+                        &data, convert_to_c(aalgorithm), &src_desc.data,
+                        &diff_weights_desc.data, &diff_bias_desc.data,
+                        &diff_dst_desc.data,
+                        &strides[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a deconvolution backward weights descriptor");
+        }
+        desc(algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &diff_weights_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_deconvolution_backward_weights_desc_init(
+                        &data, convert_to_c(aalgorithm), &src_desc.data,
+                        &diff_weights_desc.data, nullptr, &diff_dst_desc.data,
+                        &strides[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a deconvolution backward weights descriptor");
+        }
+        desc(algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &diff_weights_desc,
+                const memory::desc &diff_bias_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_dilated_deconvolution_backward_weights_desc_init(
+                        &data, convert_to_c(aalgorithm), &src_desc.data,
+                        &diff_weights_desc.data, &diff_bias_desc.data,
+                        &diff_dst_desc.data,
+                        &strides[0], &dilates[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a dilated  deconvolution backward weights descriptor");
+        }
+        desc(algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &diff_weights_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_dilated_deconvolution_backward_weights_desc_init(
+                        &data, convert_to_c(aalgorithm), &src_desc.data,
+                        &diff_weights_desc.data, nullptr, &diff_dst_desc.data,
+                        &strides[0], &dilates[0], &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not create a dilated deconvolution backward weights descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const deconvolution_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const deconvolution_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(diff_weights, diff_weights, 0);
+        REG_QUERY_MD(diff_bias, diff_weights, 1);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    deconvolution_backward_weights(const primitive_desc &pd): primitive(pd) {}
+};
+
+/// @}
+
+/// @addtogroup cpp_api_lrn LRN
+/// A primitive to perform local response normalization (LRN) across or within
+/// channels.
+///
+/// @sa @ref c_api_lrn in @ref c_api
+/// @{
+
+struct lrn_forward : public primitive {
+    struct desc {
+        mkldnn_lrn_desc_t data;
+
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc, memory::dim local_size,
+                float alpha, float beta, float k = 1.f) {
+            error::wrap_c_api(mkldnn_lrn_forward_desc_init(&data,
+                mkldnn::convert_to_c(aprop_kind), convert_to_c(aalgorithm),
+                &src_desc.data, local_size, alpha, beta, k),
+                "could not create a lrn forward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(workspace, workspace, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    lrn_forward(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct lrn_backward : public primitive {
+    struct desc {
+        mkldnn_lrn_desc_t data;
+
+        desc(algorithm aalgorithm, const memory::desc &data_desc,
+                const memory::desc &diff_data_desc, memory::dim local_size,
+                float alpha, float beta, float k = 1.f) {
+            error::wrap_c_api(mkldnn_lrn_backward_desc_init(&data,
+                convert_to_c(aalgorithm), &diff_data_desc.data,
+                &data_desc.data, local_size, alpha, beta, k),
+                "could not create a lrn backward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const lrn_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const lrn_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(diff_src, diff_src, 0);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(workspace, workspace, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    lrn_backward(const primitive_desc &pd): primitive(pd) {}
+};
+
+/// @}
+
+/// @addtogroup cpp_api_pooling Pooling
+/// A primitive to perform max or average pooling.
+///
+/// @sa @ref c_api_pooling in @ref c_api
+/// @{
+
+struct pooling_forward : public primitive {
+    struct desc {
+        mkldnn_pooling_desc_t data;
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &dst_desc,
+                const memory::dims strides,
+                const memory::dims kernel,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(kernel);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_pooling_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind),
+                        convert_to_c(aalgorithm),
+                        &src_desc.data, &dst_desc.data,
+                        &strides[0], &kernel[0],
+                        &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not init a forward pooling descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(workspace, workspace, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    pooling_forward(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct pooling_backward : public primitive {
+    struct desc {
+        mkldnn_pooling_desc_t data;
+        desc(algorithm aalgorithm,
+                const memory::desc &diff_src_desc,
+                const memory::desc &diff_dst_desc,
+                const memory::dims &strides,
+                const memory::dims &kernel,
+                const memory::dims &padding_l,
+                const memory::dims &padding_r,
+                const padding_kind apadding_kind) {
+            memory::validate_dims(strides);
+            memory::validate_dims(kernel);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(mkldnn_pooling_backward_desc_init(&data,
+                        convert_to_c(aalgorithm),
+                        &diff_src_desc.data, &diff_dst_desc.data,
+                        &strides[0], &kernel[0],
+                        &padding_l[0], &padding_r[0],
+                        mkldnn::convert_to_c(apadding_kind)),
+                    "could not init a backward pooling descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const pooling_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const pooling_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(diff_src, diff_src, 0);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(workspace, workspace, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    pooling_backward(const primitive_desc &pd): primitive(pd) {}
+};
+
+/// @}
+
+/// @addtogroup cpp_api_eltwise Eltwise
+/// A primitive to compute element-wise operations like parametric rectifier
+/// linear unit (ReLU).
+///
+/// @sa @ref c_api_eltwise in @ref c_api
+/// @{
+
+struct eltwise_forward : public primitive {
+    struct desc {
+        mkldnn_eltwise_desc_t data;
+        template <typename T>
+        desc(prop_kind aprop_kind, algorithm alg_kind,
+                const memory::desc &src_desc, T alpha = 0, T beta = 0) {
+            error::wrap_c_api(mkldnn_eltwise_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind),
+                        mkldnn::convert_to_c(alg_kind), &src_desc.data,
+                        static_cast<float>(alpha), static_cast<float>(beta)),
+                    "could not create a eltwise forward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    eltwise_forward(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct eltwise_backward : public primitive {
+    struct desc {
+        mkldnn_eltwise_desc_t data;
+
+        template <typename T>
+        desc(algorithm alg_kind, const memory::desc &diff_data_desc,
+                const memory::desc &data_desc, T alpha = 0, T beta = 0) {
+            error::wrap_c_api(mkldnn_eltwise_backward_desc_init(&data,
+                        mkldnn::convert_to_c(alg_kind), &diff_data_desc.data,
+                        &data_desc.data, static_cast<float>(alpha),
+                        static_cast<float>(beta)),
+                    "could not create a eltwise backward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const eltwise_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const eltwise_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(diff_src, diff_src, 0);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    eltwise_backward(const primitive_desc &pd): primitive(pd) {}
+};
+
+/// @}
+
+/// @addtogroup cpp_api_softmax Softmax
+/// A primitive to perform softmax.
+///
+/// @sa @ref c_api_softmax in @ref c_api
+/// @{
+
+struct softmax_forward : public primitive {
+    struct desc {
+        mkldnn_softmax_desc_t data;
+        desc(prop_kind aprop_kind, const memory::desc &data_desc,
+             int softmax_axis) {
+            error::wrap_c_api(mkldnn_softmax_forward_desc_init(&data,
+                    mkldnn::convert_to_c(aprop_kind), &data_desc.data,
+                    softmax_axis),
+                "could not create a softmax forward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    softmax_forward(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct softmax_backward : public primitive {
+    struct desc {
+        mkldnn_softmax_desc_t data;
+        desc(const memory::desc &diff_desc, const memory::desc &data_desc,
+                int softmax_axis) {
+            error::wrap_c_api(mkldnn_softmax_backward_desc_init(&data,
+                        &diff_desc.data, &data_desc.data, softmax_axis),
+                    "could not init a backward softmax descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const softmax_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const softmax_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(diff_src, diff_src, 0);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(workspace, workspace, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    softmax_backward(const primitive_desc &pd): primitive(pd) {}
+};
+
+/// @}
+
+/// @addtogroup cpp_api_batch_norm Batch normalization
+/// A primitive to perform batch normalization.
+///
+/// @sa @ref c_api_batch_normalization in @ref c_api
+/// @{
+
+struct batch_normalization_forward : public primitive {
+    struct desc {
+        mkldnn_batch_normalization_desc_t data;
+        template <typename T>
+        desc(prop_kind aprop_kind, const memory::desc &src_desc, T epsilon,
+                unsigned flags) {
+            error::wrap_c_api(
+                    mkldnn_batch_normalization_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), &src_desc.data,
+                        static_cast<float>(epsilon), flags),
+                "could not create a batch normalization forward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(weights, weights, 0);
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(workspace, workspace, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+
+        memory::desc mean_desc() const { return stat_desc(mean); }
+        memory::desc variance_desc() const { return stat_desc(var); }
+
+    private:
+        enum { mean = 1, var = 2, };
+        memory::desc stat_desc(int kind) const {
+            mkldnn_batch_normalization_desc_t *p;
+            error::wrap_c_api(mkldnn_primitive_desc_query(
+                    get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
+                    "could not get a batch-normalization descriptor");
+            return query_md(p->flags & use_global_stats ? src_md : dst_md, kind);
+        }
+    };
+
+    batch_normalization_forward(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct batch_normalization_backward : public primitive {
+    struct desc {
+        mkldnn_batch_normalization_desc_t data;
+        template <typename T>
+        desc(prop_kind aprop_kind, const memory::desc &diff_data_desc,
+                const memory::desc &data_desc, T epsilon, unsigned flags) {
+            error::wrap_c_api(
+                    mkldnn_batch_normalization_backward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind),
+                        &diff_data_desc.data, &data_desc.data,
+                        static_cast<float>(epsilon), flags),
+                "could not create a batch normalization backward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const batch_normalization_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const batch_normalization_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(mean, src, 1);
+        REG_QUERY_MD(variance, src, 2);
+        REG_QUERY_MD(weights, weights, 0);
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(workspace, workspace, 0);
+
+        REG_QUERY_MD(diff_src, diff_src, 0);
+        REG_QUERY_MD(diff_weights, diff_weights, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    batch_normalization_backward(const primitive_desc &pd): primitive(pd) {}
+};
+
+/// @}
+
+/// @addtogroup cpp_api_inner_product Inner Product
+/// A primitive to compute an inner product.
+///
+/// @sa @ref c_api_inner_product in @ref c_api
+/// @{
+
+struct inner_product_forward: public primitive {
+    struct desc {
+        mkldnn_inner_product_desc_t data;
+        desc(prop_kind aprop_kind, const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &bias_desc,
+                const memory::desc &dst_desc) {
+            error::wrap_c_api(
+                    mkldnn_inner_product_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), &src_desc.data,
+                        &weights_desc.data, &bias_desc.data, &dst_desc.data),
+                    "could not create a inner product forward descriptor");
+        }
+
+        desc(prop_kind aprop_kind, const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &dst_desc) {
+            error::wrap_c_api(
+                    mkldnn_inner_product_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), &src_desc.data,
+                        &weights_desc.data, nullptr, &dst_desc.data),
+                    "could not create a inner product forward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(weights, weights, 0);
+        REG_QUERY_MD(bias, weights, 1);
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    inner_product_forward(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct inner_product_backward_data: public primitive {
+    struct desc {
+        mkldnn_inner_product_desc_t data;
+        desc(const memory::desc &diff_src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &diff_dst_desc) {
+            error::wrap_c_api(
+                    mkldnn_inner_product_backward_data_desc_init(&data,
+                        &diff_src_desc.data, &weights_desc.data,
+                        &diff_dst_desc.data),
+                "could not create a inner product backward data descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const inner_product_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const inner_product_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(diff_src, diff_src, 0);
+        REG_QUERY_MD(weights, weights, 0);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    inner_product_backward_data(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct inner_product_backward_weights: public primitive {
+    struct desc {
+        mkldnn_inner_product_desc_t data;
+        desc(const memory::desc &src_desc,
+                const memory::desc &diff_weights_desc,
+                const memory::desc &diff_bias_desc,
+                const memory::desc &diff_dst_desc) {
+            error::wrap_c_api(
+                    mkldnn_inner_product_backward_weights_desc_init(
+                        &data, &src_desc.data, &diff_weights_desc.data,
+                        &diff_bias_desc.data, &diff_dst_desc.data),
+                "could not create a inner product backward weights descriptor");
+        }
+        desc(const memory::desc &src_desc,
+                const memory::desc &diff_weights_desc,
+                const memory::desc &diff_dst_desc) {
+            error::wrap_c_api(
+                    mkldnn_inner_product_backward_weights_desc_init(
+                        &data, &src_desc.data, &diff_weights_desc.data,
+                        nullptr, &diff_dst_desc.data),
+                "could not create a inner product backward weights descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const inner_product_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const inner_product_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(diff_weights, diff_weights, 0);
+        REG_QUERY_MD(diff_bias, diff_weights, 1);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    inner_product_backward_weights(const primitive_desc &pd): primitive(pd) {}
+};
+
+/// @}
+
+/// @addtogroup cpp_api_rnn RNN
+/// A primitive to compute common recurrent layer.
+///
+/// @sa @ref c_api_rnn in @ref c_api
+/// @{
+
+struct rnn_cell {
+    struct desc {
+        mkldnn_rnn_cell_desc_t c_rnn_cell_;
+
+        desc(algorithm kind, algorithm activation_f) {
+            error::wrap_c_api(mkldnn_rnn_cell_desc_init(&c_rnn_cell_,
+                        mkldnn::convert_to_c(kind),
+                        mkldnn::convert_to_c(activation_f), 0U, 0, 0),
+                    "could not init an rnn cell descriptor");
+        }
+        desc(algorithm kind): desc(kind, algorithm::algorithm_undef) {}
+
+        operator const mkldnn_rnn_cell_desc_t*() const { return &c_rnn_cell_; }
+
+        algorithm get_cell_kind() const
+        { return algorithm(c_rnn_cell_.cell_kind); }
+        algorithm get_activation() const
+        { return algorithm(c_rnn_cell_.activation_kind); }
+
+        float get_alpha() const { return c_rnn_cell_.alpha; }
+        void set_alpha(float alpha) {
+            c_rnn_cell_.flags |= mkldnn_rnn_cell_with_relu;
+            c_rnn_cell_.alpha = alpha;
+        }
+
+        float get_clipping() const { return c_rnn_cell_.clipping; }
+        void set_clipping(float clipping) {
+            c_rnn_cell_.flags |= mkldnn_rnn_cell_with_clipping;
+            c_rnn_cell_.clipping = clipping;
+        }
+
+        int get_gates_count() const {
+            return mkldnn_rnn_cell_get_gates_count(&c_rnn_cell_);
+        }
+        int get_state_count() const {
+            return mkldnn_rnn_cell_get_states_count(&c_rnn_cell_);
+        }
+    };
+};
+
+struct rnn_forward : public primitive {
+    struct desc {
+        mkldnn_rnn_desc_t data;
+        desc(prop_kind aprop_kind, rnn_cell::desc cell,
+                const rnn_direction direction,
+                const memory::desc &src_layer_desc,
+                const memory::desc &src_iter_desc,
+                const memory::desc &weights_layer_desc,
+                const memory::desc &weights_iter_desc,
+                const memory::desc &bias_desc,
+                const memory::desc &dst_layer_desc,
+                const memory::desc &dst_iter_desc
+            ) {
+            error::wrap_c_api(mkldnn_rnn_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), cell,
+                        mkldnn::convert_to_c(direction),
+                        &src_layer_desc.data, &src_iter_desc.data,
+                        &weights_layer_desc.data, &weights_iter_desc.data,
+                        &bias_desc.data,
+                        &dst_layer_desc.data, &dst_iter_desc.data),
+                    "could not create an RNN forward descriptor");
+        }
+
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MD(src_layer, src, 0);
+        REG_QUERY_MD(src_iter, src, 1);
+        REG_QUERY_MD(weights_layer, weights, 0);
+        REG_QUERY_MD(weights_iter, weights, 1);
+        REG_QUERY_MD(bias, weights, 2);
+        REG_QUERY_MD(dst_layer, dst, 0);
+        REG_QUERY_MD(dst_iter, dst, 1);
+        REG_QUERY_MD(workspace, workspace, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    rnn_forward(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct rnn_backward : public primitive {
+    struct desc {
+        mkldnn_rnn_desc_t data;
+        desc(prop_kind aprop_kind, rnn_cell::desc cell,
+                const rnn_direction direction,
+                const memory::desc &src_layer_desc,
+                const memory::desc &src_iter_desc,
+                const memory::desc &weights_layer_desc,
+                const memory::desc &weights_iter_desc,
+                const memory::desc &bias_desc,
+                const memory::desc &dst_layer_desc,
+                const memory::desc &dst_iter_desc,
+                const memory::desc &diff_src_layer_desc,
+                const memory::desc &diff_src_iter_desc,
+                const memory::desc &diff_weights_layer_desc,
+                const memory::desc &diff_weights_iter_desc,
+                const memory::desc &diff_bias_desc,
+                const memory::desc &diff_dst_layer_desc,
+                const memory::desc &diff_dst_iter_desc) {
+            error::wrap_c_api(mkldnn_rnn_backward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), cell,
+                        mkldnn::convert_to_c(direction),
+                        &src_layer_desc.data, &src_iter_desc.data,
+                        &weights_layer_desc.data, &weights_iter_desc.data,
+                        &bias_desc.data,
+                        &dst_layer_desc.data, &dst_iter_desc.data,
+                        &diff_src_layer_desc.data, &diff_src_iter_desc.data,
+                        &diff_weights_layer_desc.data,
+                        &diff_weights_iter_desc.data, &diff_bias_desc.data,
+                        &diff_dst_layer_desc.data, &diff_dst_iter_desc.data),
+                    "could not create an RNN backward descriptor");
+        }
+
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const rnn_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e,
+                const rnn_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(src_layer, src, 0);
+        REG_QUERY_MD(src_iter, src, 1);
+        REG_QUERY_MD(weights_layer, weights, 0);
+        REG_QUERY_MD(weights_iter, weights, 1);
+        REG_QUERY_MD(bias, weights, 2);
+        REG_QUERY_MD(dst_layer, dst, 0);
+        REG_QUERY_MD(dst_iter, dst, 1);
+        REG_QUERY_MD(workspace, workspace, 0);
+
+        REG_QUERY_MD(diff_src_layer, diff_src, 0);
+        REG_QUERY_MD(diff_src_iter, diff_src, 1);
+        REG_QUERY_MD(diff_weights_layer, diff_weights, 0);
+        REG_QUERY_MD(diff_weights_iter, diff_weights, 1);
+        REG_QUERY_MD(diff_bias, diff_weights, 2);
+        REG_QUERY_MD(diff_dst_layer, diff_dst, 0);
+        REG_QUERY_MD(diff_dst_iter, diff_dst, 1);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    // With last iteration (with and without input src_iter)
+    rnn_backward(const primitive_desc &pd): primitive(pd) {}
+};
+
+/// @}
+
+/// @addtogroup cpp_api_shuffle Shuffle
+/// A primitive to shuffle data along the axis.
+///
+/// @sa @ref c_api_shuffle in @ref c_api
+/// @{
+
+struct shuffle_forward : public primitive {
+    struct desc {
+        mkldnn_shuffle_desc_t data;
+        desc(prop_kind aprop_kind, const memory::desc &data_desc,
+                int axis, int group_size) {
+            error::wrap_c_api(mkldnn_shuffle_forward_desc_init(&data,
+                        mkldnn::convert_to_c(aprop_kind), &data_desc.data,
+                        axis, group_size),
+                    "could not create a shuffle forward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        REG_QUERY_MD(src, src, 0);
+        REG_QUERY_MD(dst, dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    shuffle_forward(const primitive_desc &pd): primitive(pd) {}
+};
+
+struct shuffle_backward : public primitive {
+    struct desc {
+        mkldnn_shuffle_desc_t data;
+        desc(const memory::desc &diff_data_desc, int axis, int group_size) {
+            error::wrap_c_api(mkldnn_shuffle_backward_desc_init(&data,
+                        &diff_data_desc.data, axis, group_size),
+                    "could not create a shuffle backward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e,
+                const shuffle_forward::primitive_desc &hint_fwd_pd)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
+
+        REG_QUERY_MD(diff_src, diff_src, 0);
+        REG_QUERY_MD(diff_dst, diff_dst, 0);
+        REG_QUERY_MD(scratchpad, scratchpad, 0);
+    };
+
+    shuffle_backward(const primitive_desc &pd): primitive(pd) {}
+};
+
+/// @}
+
+/// @} Primitives
+
+/// @} C++ API
+
+#undef REG_QUERY_MD
+
+// implementation section
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+inline primitive::primitive(const_mkldnn_primitive_desc_t c_pd) {
+    mkldnn_primitive_t result;
+    error::wrap_c_api(mkldnn_primitive_create(&result, c_pd),
+            "could not create a primitive");
+    reset(result);
+}
+
+inline primitive::primitive(const primitive_desc &pd): primitive(pd.get()) {}
+
+inline void primitive::execute(stream &astream,
+        const std::unordered_map<int, memory> &args) const {
+    std::vector<mkldnn_exec_arg_t> c_args;
+    c_args.reserve(args.size());
+    for (const auto &a: args)
+        c_args.push_back({a.first, a.second.get()});
+
+    error::wrap_c_api(mkldnn_primitive_execute(get(), astream.get(),
+                (int)c_args.size(), c_args.data()),
+            "primitive execution fail");
+}
+#endif // DOXYGEN_SHOULD_SKIP_THIS
+
+} // namespace mkldnn
+
+#endif
diff --git a/thirdparty/oidn/mkl-dnn/include/mkldnn_debug.h b/thirdparty/oidn/mkl-dnn/include/mkldnn_debug.h
new file mode 100644
index 0000000000..f4dc2fdfa6
--- /dev/null
+++ b/thirdparty/oidn/mkl-dnn/include/mkldnn_debug.h
@@ -0,0 +1,98 @@
+/*******************************************************************************
+* Copyright 2018-2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/* DO NOT EDIT, AUTO-GENERATED */
+
+#ifndef MKLDNN_DEBUG_H
+#define MKLDNN_DEBUG_H
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
+/* All symbols shall be internal unless marked as MKLDNN_API */
+#if defined _WIN32 || defined __CYGWIN__
+#   define MKLDNN_HELPER_DLL_IMPORT __declspec(dllimport)
+#   define MKLDNN_HELPER_DLL_EXPORT __declspec(dllexport)
+#else
+#   if __GNUC__ >= 4
+#       define MKLDNN_HELPER_DLL_IMPORT __attribute__ ((visibility ("default")))
+#       define MKLDNN_HELPER_DLL_EXPORT __attribute__ ((visibility ("default")))
+#   else
+#       define MKLDNN_HELPER_DLL_IMPORT
+#       define MKLDNN_HELPER_DLL_EXPORT
+#   endif
+#endif
+
+#ifdef MKLDNN_DLL
+#   ifdef MKLDNN_DLL_EXPORTS
+#       define MKLDNN_API MKLDNN_HELPER_DLL_EXPORT
+#   else
+#       define MKLDNN_API MKLDNN_HELPER_DLL_IMPORT
+#   endif
+#else
+#   define MKLDNN_API
+#endif
+
+#if defined (__GNUC__)
+#   define MKLDNN_DEPRECATED __attribute__((deprecated))
+#elif defined(_MSC_VER)
+#   define MKLDNN_DEPRECATED __declspec(deprecated)
+#else
+#   define MKLDNN_DEPRECATED
+#endif
+
+#include "mkldnn_types.h"
+#endif /* DOXYGEN_SHOULD_SKIP_THIS */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const char MKLDNN_API *mkldnn_status2str(mkldnn_status_t v);
+const char MKLDNN_API *mkldnn_dt2str(mkldnn_data_type_t v);
+const char MKLDNN_API *mkldnn_fmt_kind2str(mkldnn_format_kind_t v);
+const char MKLDNN_API *mkldnn_fmt_tag2str(mkldnn_format_tag_t v);
+const char MKLDNN_API *mkldnn_prop_kind2str(mkldnn_prop_kind_t v);
+const char MKLDNN_API *mkldnn_prim_kind2str(mkldnn_primitive_kind_t v);
+const char MKLDNN_API *mkldnn_alg_kind2str(mkldnn_alg_kind_t v);
+const char MKLDNN_API *mkldnn_rnn_direction2str(mkldnn_rnn_direction_t v);
+
+/** Forms a format string for a given memory descriptor.
+ *
+ * The format is defined as: 'dt:[p|o|0]:fmt_kind:fmt:extra'.
+ * Here:
+ *  - dt       -- data type
+ *  - p        -- indicates there is non-trivial padding
+ *  - o        -- indicates there is non-trivial padding offset
+ *  - 0        -- indicates there is non-trivial offset0
+ *  - fmt_kind -- format kind (blocked, wino, etc...)
+ *  - fmt      -- extended format string (format_kind specific)
+ *  - extra    -- shows extra fields (underspecified)
+ */
+int MKLDNN_API mkldnn_md2fmt_str(char *fmt_str, size_t fmt_str_len,
+        const mkldnn_memory_desc_t *md);
+
+/** Forms a dimension string for a given memory descriptor.
+ *
+ * The format is defined as: 'dim0xdim1x...xdimN
+ */
+int MKLDNN_API mkldnn_md2dim_str(char *dim_str, size_t dim_str_len,
+        const mkldnn_memory_desc_t *md);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/thirdparty/oidn/mkl-dnn/include/mkldnn_types.h b/thirdparty/oidn/mkl-dnn/include/mkldnn_types.h
new file mode 100644
index 0000000000..1b6c356982
--- /dev/null
+++ b/thirdparty/oidn/mkl-dnn/include/mkldnn_types.h
@@ -0,0 +1,1415 @@
+/*******************************************************************************
+* Copyright 2016-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef MKLDNN_TYPES_H
+#define MKLDNN_TYPES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+#include <stddef.h>
+#include <stdint.h>
+#endif
+
+/** @addtogroup c_api C API
+ *  @{
+ *
+ *  @addtogroup c_api_types Types
+ *  @{
+ *
+ *  @addtogroup c_api_types_generic Generic
+ *  @{ */
+
+/** Intel(R) MKL-DNN Version type */
+typedef struct {
+    int    major;
+    int    minor;
+    int    patch;
+    const char *hash;
+} mkldnn_version_t;
+
+/** Status values returned by Intel(R) MKL-DNN functions. */
+typedef enum {
+    /** The operation was successful */
+    mkldnn_success = 0,
+    /** The operation failed due to an out-of-memory condition */
+    mkldnn_out_of_memory = 1,
+    /** The operation failed and should be retried */
+    mkldnn_try_again = 2,
+    /** The operation failed because of incorrect function arguments  */
+    mkldnn_invalid_arguments = 3,
+    /** The operation failed because a primitive was not ready for execution */
+    mkldnn_not_ready = 4,
+    /** The operation failed because requested functionality is not implemented
+     */
+    mkldnn_unimplemented = 5,
+    /** Primitive iterator passed over last primitive descriptor */
+    mkldnn_iterator_ends = 6,
+    /** Primitive or engine failed on execution */
+    mkldnn_runtime_error = 7,
+    /** Queried element is not required for given primitive */
+    mkldnn_not_required = 8,
+} mkldnn_status_t;
+
+/** Data type specification */
+typedef enum {
+    /** Undefined data type, used for empty memory descriptors. */
+    mkldnn_data_type_undef = 0,
+    /** 32-bit/single-precision floating point. */
+    mkldnn_f32 = 1,
+    /** 32-bit signed integer. */
+    mkldnn_s32 = 2,
+    /** 8-bit signed integer. */
+    mkldnn_s8 = 3,
+    /** 8-bit unsigned integer. */
+    mkldnn_u8 = 4,
+} mkldnn_data_type_t;
+
+/** Memory format kind */
+typedef enum {
+    /** Undefined memory format, used for empty memory descriptors. */
+    mkldnn_format_kind_undef = 0,
+    /** Unspecified format. The primitive selects a format automatically. */
+    mkldnn_format_kind_any,
+    /** A tensor in a generic format described by the stride and blocking
+     * values in each dimension. See #mkldnn_blocking_desc_t for more
+     * information. */
+    mkldnn_blocked,
+    /** Weights format used in 8bit Winograd convolution */
+    mkldnn_format_kind_wino,
+    /** Packed weights format used in RNN */
+    mkldnn_format_kind_rnn_packed,
+} mkldnn_format_kind_t;
+
+/** Memory format tag specification.
+ *
+ * Intel MKL-DNN formats describe physical data layout. The physical layout
+ * is described as a sequence of the dimensions as they are laid out in the
+ * memory (from the outer-most to the inner-most). Note that this order
+ * doesn't affect the logical order of the dimensions that is kept in the
+ * `dims` field of the mkldnn_memory_desc_t structure. The logical order of the
+ * dimensions is specified by the type of tensor.
+ *
+ * For example, CNN 5D tensor always has its logical dimensions in the order
+ * `(batch, channels, depth, height, width)`, while the physical layout might be
+ * #mkldnn_ncdhw or #mkldnn_ndhwc:
+ *
+ * ~~~cpp
+ * int batch = 2, channels = 16, depth = 13, height = 13, width = 13;
+ *
+ * int ndims = 5; // 5D tensor
+ * mkldnn_dims_t dims = {batch, channels, depth, height, width};
+ * mkldnn_memory_desc_t data_in_ncdhw;
+ * mkldnn_memory_desc_init_by_tag(
+ *      &data_in_ncdhw, 5, dims, mkldnn_f32, mkldnn_ncdhw);
+ *
+ * // note that in both cases dims passed are the same
+ * mkldnn_memory_desc_t data_in_ndhwc;
+ * mkldnn_memory_desc_init_by_tag(
+ *      &data_in_ndhwc, 5, dims, mkldnn_f32, mkldnn_ndhwc);
+ * ~~~
+ *
+ * The following notation applies to memory format names:
+ *  - @c 'n' denotes the mini-batch dimension
+ *  - @c 'c' denotes a channels dimension
+ *  - When there are multiple channel dimensions (for example, in convolution
+ *    weights tensor), @c 'i' and @c 'o' denote dimensions of input and output
+ *    channels
+ *  - @c 'd', @c 'h', and @c 'w' denote spatial depth, height, and width
+ *    respectively
+ *  - Upper-case letters indicate that the data is laid out in blocks
+ *    for a particular dimension. In such cases, the format name contains both
+ *    upper- and lower-case letters for that dimension with a lower-case letter
+ *    preceded by the block size. For example: @c 'mkldnn_nChw8c' describes a
+ *    format where the outermost dimension is mini-batch, followed by the
+ *    channel block number, followed by the spatial height and width, and
+ *    finally followed by 8-element channel blocks.
+ *
+ * @note
+ *    Channel designations can be different. For example, both the @c
+ *    'mkldnn_nc' and @c 'mkldnn_io' formats can be used to describe a 2D
+ *    tensor.
+ *
+ * @sa @ref understanding_memory_formats
+ */
+typedef enum {
+    /** Undefined memory format tag */
+    mkldnn_format_tag_undef = 0,
+    /** Undefined memory format tag.
+     * The primitive selects a format automatically. */
+    mkldnn_format_tag_any,
+
+    /* Semantic agnostic section */
+    /* The physical order of dimensions is defined by the permutation of the
+     * characters, assuming that ab..z defines the natural order.
+     */
+
+    /* Plain formats */
+
+    mkldnn_a,
+    mkldnn_ab,
+    mkldnn_abc,
+    mkldnn_abcd,
+    mkldnn_abcde,
+    mkldnn_abcdef,
+    mkldnn_abdec,
+    mkldnn_acb,
+    mkldnn_acbde,
+    mkldnn_acdb,
+    mkldnn_acdeb,
+    mkldnn_ba,
+    mkldnn_bac,
+    mkldnn_bacd,
+    mkldnn_bcda,
+    mkldnn_cba,
+    mkldnn_cdba,
+    mkldnn_cdeba,
+    mkldnn_decab,
+
+    /* Opaque blocked formats */
+
+    mkldnn_Abc16a,
+    mkldnn_ABc16a16b,
+    mkldnn_aBc16b,
+    mkldnn_ABc16b16a,
+    mkldnn_Abc4a,
+    mkldnn_aBc4b,
+    mkldnn_ABc4b16a4b,
+    mkldnn_ABc4b4a,
+    mkldnn_ABc8a16b2a,
+    mkldnn_ABc8a8b,
+    mkldnn_aBc8b,
+    mkldnn_ABc8b16a2b,
+    mkldnn_ABc8b8a,
+    mkldnn_Abcd16a,
+    mkldnn_ABcd16a16b,
+    mkldnn_aBcd16b,
+    mkldnn_ABcd16b16a,
+    mkldnn_aBCd16b16c,
+    mkldnn_aBCd16c16b,
+    mkldnn_Abcd4a,
+    mkldnn_aBcd4b,
+    mkldnn_ABcd4b16a4b,
+    mkldnn_ABcd4b4a,
+    mkldnn_aBCd4c16b4c,
+    mkldnn_aBCd4c4b,
+    mkldnn_ABcd8a16b2a,
+    mkldnn_ABcd8a8b,
+    mkldnn_aBcd8b,
+    mkldnn_ABcd8b16a2b,
+    mkldnn_aBCd8b16c2b,
+    mkldnn_ABcd8b8a,
+    mkldnn_aBCd8b8c,
+    mkldnn_aBCd8c16b2c,
+    mkldnn_aBCd8c8b,
+    mkldnn_Abcde16a,
+    mkldnn_ABcde16a16b,
+    mkldnn_aBcde16b,
+    mkldnn_ABcde16b16a,
+    mkldnn_aBCde16b16c,
+    mkldnn_aBCde16c16b,
+    mkldnn_aBCde2c8b4c,
+    mkldnn_Abcde4a,
+    mkldnn_aBcde4b,
+    mkldnn_ABcde4b4a,
+    mkldnn_aBCde4b4c,
+    mkldnn_aBCde4c16b4c,
+    mkldnn_aBCde4c4b,
+    mkldnn_Abcde8a,
+    mkldnn_ABcde8a8b,
+    mkldnn_aBcde8b,
+    mkldnn_ABcde8b16a2b,
+    mkldnn_aBCde8b16c2b,
+    mkldnn_ABcde8b8a,
+    mkldnn_aBCde8b8c,
+    mkldnn_aBCde8c16b2c,
+    mkldnn_aBCde8c8b,
+    mkldnn_aBcdef16b,
+    mkldnn_aBCdef16b16c,
+    mkldnn_aBCdef16c16b,
+    mkldnn_aBcdef4b,
+    mkldnn_aBCdef4c4b,
+    mkldnn_aBCdef8b8c,
+    mkldnn_aBCdef8c16b2c,
+    mkldnn_aBCdef8c8b,
+    mkldnn_aBdc16b,
+    mkldnn_aBdc4b,
+    mkldnn_aBdc8b,
+    mkldnn_aBdec16b,
+    mkldnn_aBdec4b,
+    mkldnn_aBdec8b,
+    mkldnn_aBdefc16b,
+    mkldnn_aBdefc4b,
+    mkldnn_aBdefc8b,
+    mkldnn_Acb16a,
+    mkldnn_Acb4a,
+    mkldnn_Acb8a,
+    mkldnn_aCBd16b16c,
+    mkldnn_aCBde16b16c,
+    mkldnn_Acdb16a,
+    mkldnn_Acdb4a,
+    mkldnn_Acdb8a,
+    mkldnn_Acdeb16a,
+    mkldnn_Acdeb4a,
+    mkldnn_Acdeb8a,
+    mkldnn_BAc16a16b,
+    mkldnn_BAcd16a16b,
+
+    /** Just a sentinel, not real memory format tag. Must be changed after new
+     * format tag is added. */
+    mkldnn_format_tag_last,
+
+    /* Aliases */
+
+    mkldnn_x = mkldnn_a,
+    mkldnn_nc = mkldnn_ab,
+    mkldnn_cn = mkldnn_ba,
+    mkldnn_ncw = mkldnn_abc,
+    mkldnn_nwc = mkldnn_acb,
+    mkldnn_nchw = mkldnn_abcd,
+    mkldnn_nhwc = mkldnn_acdb,
+    mkldnn_chwn = mkldnn_bcda,
+    mkldnn_ncdhw = mkldnn_abcde,
+    mkldnn_ndhwc = mkldnn_acdeb,
+
+    mkldnn_oi = mkldnn_ab,
+    mkldnn_io = mkldnn_ba,
+    mkldnn_oiw = mkldnn_abc,
+    mkldnn_wio = mkldnn_cba,
+    mkldnn_oihw = mkldnn_abcd,
+    mkldnn_hwio = mkldnn_cdba,
+    mkldnn_ihwo = mkldnn_bcda,
+    mkldnn_iohw = mkldnn_bacd,
+    mkldnn_oidhw = mkldnn_abcde,
+    mkldnn_dhwio = mkldnn_cdeba,
+    mkldnn_goiw = mkldnn_abcd,
+    mkldnn_goihw = mkldnn_abcde,
+    mkldnn_hwigo = mkldnn_decab,
+    mkldnn_giohw = mkldnn_acbde,
+    mkldnn_goidhw = mkldnn_abcdef,
+
+    /** 3D RNN data tensor in the format (seq_length, batch, input channels). */
+    mkldnn_tnc = mkldnn_abc,
+    /** 3D RNN data tensor in the format (batch, seq_length, input channels). */
+    mkldnn_ntc = mkldnn_bac,
+    /** 5D RNN states tensor in the format (num_layers, num_directions,
+     * num_states, batch, state channels). */
+    mkldnn_ldsnc = mkldnn_abcde,
+    /** 5D RNN weights tensor in the format (num_layers, num_directions,
+     *  input_channels, num_gates, output_channels).
+     *
+     *  - For LSTM cells, the gates order is input, forget, candidate
+     *    and output gate.
+     *  - For GRU cells, the gates order is update, reset and output gate. */
+    mkldnn_ldigo = mkldnn_abcde,
+    /** 5D RNN weights tensor in the format (num_layers, num_directions,
+     * num_gates, output_channels, input_channels).
+     *
+     *  - For LSTM cells, the gates order is input, forget, candidate
+     *    and output gate.
+     *  - For GRU cells, the gates order is update, reset and output gate. */
+    mkldnn_ldgoi = mkldnn_abdec,
+    /** 4D RNN bias tensor in the format (num_layers, num_directions,
+     * num_gates, output_channels).
+     *
+     *  - For LSTM cells, the gates order is input, forget, candidate
+     *    and output gate.
+     *  - For GRU cells, the gates order is update, reset and output gate. */
+    mkldnn_ldgo = mkldnn_abcd,
+
+    /* Opaque data types, are not to be used explicitly */
+
+    /* data */
+    mkldnn_nCdhw16c = mkldnn_aBcde16b,
+    mkldnn_nCdhw4c = mkldnn_aBcde4b,
+    mkldnn_nCdhw8c = mkldnn_aBcde8b,
+    mkldnn_nChw16c = mkldnn_aBcd16b,
+    mkldnn_nChw4c = mkldnn_aBcd4b,
+    mkldnn_nChw8c = mkldnn_aBcd8b,
+    mkldnn_nCw16c = mkldnn_aBc16b,
+    mkldnn_nCw4c = mkldnn_aBc4b,
+    mkldnn_nCw8c = mkldnn_aBc8b,
+
+    /* weights, 3D */
+    mkldnn_IOw16o16i = mkldnn_BAc16a16b,
+    mkldnn_OIw16i16o = mkldnn_ABc16b16a,
+    mkldnn_OIw16o16i = mkldnn_ABc16a16b,
+    mkldnn_Oiw16o = mkldnn_Abc16a,
+    mkldnn_OIw4i16o4i = mkldnn_ABc4b16a4b,
+    mkldnn_OIw4i4o = mkldnn_ABc4b4a,
+    mkldnn_Oiw4o = mkldnn_Abc4a,
+    mkldnn_OIw8i16o2i = mkldnn_ABc8b16a2b,
+    mkldnn_OIw8i8o = mkldnn_ABc8b8a,
+    mkldnn_OIw8o16i2o = mkldnn_ABc8a16b2a,
+    mkldnn_OIw8o8i = mkldnn_ABc8a8b,
+    mkldnn_Owi16o = mkldnn_Acb16a,
+    mkldnn_Owi4o = mkldnn_Acb4a,
+    mkldnn_Owi8o = mkldnn_Acb8a,
+
+    /* weights, 4D */
+    mkldnn_IOhw16o16i = mkldnn_BAcd16a16b,
+    mkldnn_Ohwi16o = mkldnn_Acdb16a,
+    mkldnn_Ohwi4o = mkldnn_Acdb4a,
+    mkldnn_Ohwi8o = mkldnn_Acdb8a,
+    mkldnn_OIhw16i16o = mkldnn_ABcd16b16a,
+    mkldnn_OIhw16o16i = mkldnn_ABcd16a16b,
+    mkldnn_Oihw16o = mkldnn_Abcd16a,
+    mkldnn_OIhw4i16o4i = mkldnn_ABcd4b16a4b,
+    mkldnn_OIhw4i4o = mkldnn_ABcd4b4a,
+    mkldnn_Oihw4o = mkldnn_Abcd4a,
+    mkldnn_OIhw8i16o2i = mkldnn_ABcd8b16a2b,
+    mkldnn_OIhw8i8o = mkldnn_ABcd8b8a,
+    mkldnn_OIhw8o16i2o = mkldnn_ABcd8a16b2a,
+    mkldnn_OIhw8o8i = mkldnn_ABcd8a8b,
+
+    /* weights, 5D */
+    mkldnn_Odhwi16o = mkldnn_Acdeb16a,
+    mkldnn_Odhwi4o = mkldnn_Acdeb4a,
+    mkldnn_Odhwi8o = mkldnn_Acdeb8a,
+    mkldnn_OIdhw16i16o = mkldnn_ABcde16b16a,
+    mkldnn_OIdhw16o16i = mkldnn_ABcde16a16b,
+    mkldnn_Oidhw16o = mkldnn_Abcde16a,
+    mkldnn_OIdhw4i4o = mkldnn_ABcde4b4a,
+    mkldnn_Oidhw4o = mkldnn_Abcde4a,
+    mkldnn_OIdhw8i16o2i = mkldnn_ABcde8b16a2b,
+    mkldnn_OIdhw8i8o = mkldnn_ABcde8b8a,
+    mkldnn_OIdhw8o8i = mkldnn_ABcde8a8b,
+
+    /* weights w/ groups, 3D */
+    mkldnn_Goiw16g = mkldnn_Abcd16a,
+    mkldnn_gIOw16o16i = mkldnn_aCBd16b16c,
+    mkldnn_gOIw16i16o = mkldnn_aBCd16c16b,
+    mkldnn_gOIw16o16i = mkldnn_aBCd16b16c,
+    mkldnn_gOiw16o = mkldnn_aBcd16b,
+    mkldnn_gOIw4i16o4i = mkldnn_aBCd4c16b4c,
+    mkldnn_gOIw4i4o = mkldnn_aBCd4c4b,
+    mkldnn_gOiw4o = mkldnn_aBcd4b,
+    mkldnn_gOIw8i16o2i = mkldnn_aBCd8c16b2c,
+    mkldnn_gOIw8i8o = mkldnn_aBCd8c8b,
+    mkldnn_gOIw8o16i2o = mkldnn_aBCd8b16c2b,
+    mkldnn_gOIw8o8i = mkldnn_aBCd8b8c,
+    mkldnn_gOwi16o = mkldnn_aBdc16b,
+    mkldnn_gOwi4o = mkldnn_aBdc4b,
+    mkldnn_gOwi8o = mkldnn_aBdc8b,
+
+    /* weights w/ groups, 4D */
+    mkldnn_gIOhw16o16i = mkldnn_aCBde16b16c,
+    mkldnn_gOhwi16o = mkldnn_aBdec16b,
+    mkldnn_gOhwi4o = mkldnn_aBdec4b,
+    mkldnn_gOhwi8o = mkldnn_aBdec8b,
+    mkldnn_Goihw16g = mkldnn_Abcde16a,
+    mkldnn_gOIhw16i16o = mkldnn_aBCde16c16b,
+    mkldnn_gOIhw16o16i = mkldnn_aBCde16b16c,
+    mkldnn_gOihw16o = mkldnn_aBcde16b,
+    mkldnn_gOIhw2i8o4i = mkldnn_aBCde2c8b4c,
+    mkldnn_gOIhw4i16o4i = mkldnn_aBCde4c16b4c,
+    mkldnn_gOIhw4i4o = mkldnn_aBCde4c4b,
+    mkldnn_gOIhw4o4i = mkldnn_aBCde4b4c,
+    mkldnn_gOihw4o = mkldnn_aBcde4b,
+    mkldnn_Goihw8g = mkldnn_Abcde8a,
+    mkldnn_gOIhw8i16o2i = mkldnn_aBCde8c16b2c,
+    mkldnn_gOIhw8i8o = mkldnn_aBCde8c8b,
+    mkldnn_gOIhw8o16i2o = mkldnn_aBCde8b16c2b,
+    mkldnn_gOIhw8o8i = mkldnn_aBCde8b8c,
+
+    /* weights w/ groups, 6D */
+    mkldnn_gOdhwi16o = mkldnn_aBdefc16b,
+    mkldnn_gOdhwi4o = mkldnn_aBdefc4b,
+    mkldnn_gOdhwi8o = mkldnn_aBdefc8b,
+    mkldnn_gOIdhw16i16o = mkldnn_aBCdef16c16b,
+    mkldnn_gOIdhw16o16i = mkldnn_aBCdef16b16c,
+    mkldnn_gOidhw16o = mkldnn_aBcdef16b,
+    mkldnn_gOIdhw4i4o = mkldnn_aBCdef4c4b,
+    mkldnn_gOidhw4o = mkldnn_aBcdef4b,
+    mkldnn_gOIdhw8i16o2i = mkldnn_aBCdef8c16b2c,
+    mkldnn_gOIdhw8i8o = mkldnn_aBCdef8c8b,
+    mkldnn_gOIdhw8o8i = mkldnn_aBCdef8b8c,
+} mkldnn_format_tag_t;
+
+/** Kinds of padding. Define how to interpret the data in padding regions. */
+typedef enum {
+    /** The data in padding regions is zero. */
+    mkldnn_padding_zero,
+} mkldnn_padding_kind_t;
+
+/** Kinds of propagation. */
+typedef enum {
+    /* TODO: suggest renames */
+    /** Undefined propagation type. */
+    mkldnn_prop_kind_undef = 0,
+    /** Forward data propagation (training mode). In this mode primitives
+     * perform computations necessary for subsequent backward propagation. */
+    mkldnn_forward_training = 64,
+    /** Forward data propagation (inference mode). In this mode primitives
+     * perform only computations that are necessary for inference and omit
+     * computations that are necessary only for backward propagation. */
+    mkldnn_forward_inference = 96,
+    /** Forward data propagation (alias for @c mkldnn_forward_inference) */
+    mkldnn_forward_scoring = mkldnn_forward_inference,
+   /** Forward data propagation (alias for @c mkldnn_forward_training) */
+    mkldnn_forward = mkldnn_forward_training,
+    /** Backward propagation (with respect to all parameters */
+    mkldnn_backward = 128,
+    /** Backward data propagation */
+    mkldnn_backward_data = 160,
+    /** Backward weights propagation */
+    mkldnn_backward_weights = 192,
+    /** Backward bias propagation */
+    mkldnn_backward_bias = 193,
+} mkldnn_prop_kind_t;
+
+/** Kinds of primitives. Used to implement a way to extend the library with new
+ * primitives without changing the ABI. */
+typedef enum {
+    /** Undefined primitive (XXX: why do we have it?). */
+    mkldnn_undefined_primitive,
+    /** A reorder primitive.*/
+    mkldnn_reorder,
+    /** A shuffle primitive.*/
+    mkldnn_shuffle,
+    /** A (out-of-place) concat primitive. */
+    mkldnn_concat,
+    /** A sum primitive. */
+    mkldnn_sum,
+    /** A convolution primitive. */
+    mkldnn_convolution,
+    /** A deconvolution primitive. */
+    mkldnn_deconvolution,
+    /** An element-wise primitive. */
+    mkldnn_eltwise,
+    /** A Softmax primitive. */
+    mkldnn_softmax,
+    /** A pooling primitive. */
+    mkldnn_pooling,
+    /** An LRN primitive. */
+    mkldnn_lrn,
+    /** An batch normalization primitive. */
+    mkldnn_batch_normalization,
+    /** An inner product primitive. */
+    mkldnn_inner_product,
+    /** A rnn primitive. */
+    mkldnn_rnn,
+} mkldnn_primitive_kind_t;
+
+/** Kinds of algorithms. */
+typedef enum {
+    mkldnn_alg_kind_undef,
+    /** Direct convolution */
+    mkldnn_convolution_direct = 0x1,
+    /** Winograd convolution */
+    mkldnn_convolution_winograd = 0x2,
+    /** Convolution algorithm(either direct or Winograd) is chosen just in time **/
+    mkldnn_convolution_auto = 0x3,
+    /** Direct deconvolution */
+    mkldnn_deconvolution_direct = 0xa,
+    /** Winograd deconvolution */
+    mkldnn_deconvolution_winograd = 0xb,
+    /** Eltwise: ReLU */
+    mkldnn_eltwise_relu = 0x1f,
+    /** Eltwise: hyperbolic tangent non-linearity (tanh) */
+    mkldnn_eltwise_tanh = 0x2f,
+    /** Eltwise: parametric exponential linear unit (elu) */
+    mkldnn_eltwise_elu = 0x3f,
+    /** Eltwise: square */
+    mkldnn_eltwise_square = 0x4f,
+    /** Eltwise: abs */
+    mkldnn_eltwise_abs = 0x5f,
+    /** Eltwise: square root */
+    mkldnn_eltwise_sqrt = 0x6f,
+    /** Eltwise: linear */
+    mkldnn_eltwise_linear = 0x7f,
+    /** Eltwise: bounded_relu */
+    mkldnn_eltwise_bounded_relu = 0x8f,
+    /** Eltwise: soft_relu */
+    mkldnn_eltwise_soft_relu = 0x9f,
+    /** Eltwise: logistic */
+    mkldnn_eltwise_logistic = 0xaf,
+    /** Max pooling */
+    mkldnn_pooling_max = 0x1ff,
+    /** Average pooling include padding */
+    mkldnn_pooling_avg_include_padding = 0x2ff,
+    /** Average pooling exclude padding */
+    mkldnn_pooling_avg_exclude_padding = 0x3ff,
+    mkldnn_pooling_avg = mkldnn_pooling_avg_exclude_padding,
+    /** Local response normalization (LRN) across multiple channels */
+    mkldnn_lrn_across_channels = 0xaff,
+    /** LRN within a single channel */
+    mkldnn_lrn_within_channel = 0xbff,
+    /** RNN cell */
+    mkldnn_vanilla_rnn = 0x1fff,
+    /** LSTM cell */
+    mkldnn_vanilla_lstm = 0x2fff,
+    /** GRU cell */
+    mkldnn_vanilla_gru = 0x3fff,
+    /** GRU cell with linear before reset
+     *
+     * Modification of original GRU cell. Differs from #mkldnn_vanilla_gru
+     * in how the new memory gate is calculated:
+     * \f[ c_t = tanh(W_c*x_t + b_{c_x} + r_t*(U_c*h_{t-1}+b_{c_h})) \f]
+     * Primitive expects 4 biases on input:
+     * \f$[b_{u}, b_{r}, b_{c_x}, b_{c_h}]\f$
+     * */
+    mkldnn_gru_linear_before_reset = 0x4fff,
+} mkldnn_alg_kind_t;
+
+/** Flags for batch-normalization primititve. */
+typedef enum {
+    /** Use global statistics
+     *
+     * If specified
+     *  - on forward propagation use mean and variance provided by user (input)
+     *  - on backward propagation reduces the amount of computations, since
+     *    mean and variance are considered as constants
+     *
+     *  If not specified:
+     *   - on forward propagation mean and variance are computed and stored in
+     *     output
+     *   - on backward propagation compute full derivative wrt to data
+     */
+    mkldnn_use_global_stats = 0x1U,
+    /** Use scale and shift parameters
+     *
+     * If specified:
+     *  - on forward propagation use scale and shift (aka scale and bias) for
+     *    the batch normalization results
+     *  - on backward propagation (for prop_kind == #mkldnn_backward) compute
+     *    diff wrt to scale and shift (hence one extra output used)
+     *
+     * If no specified:
+     *  - on backward propagation prop_kind == #mkldnn_backward_data has the
+     *    same behavior as prop_kind == #mkldnn_backward
+     */
+    mkldnn_use_scaleshift = 0x2U,
+    /** Fuse with ReLU
+     *
+     * If specified:
+     *  - on inference this option behaves the same as if the primitive were
+     *    fused with ReLU via post ops API
+     *  - on training primitive requires workspace (required to be able to
+     *    perform backward pass)
+     */
+    mkldnn_fuse_bn_relu = 0x4U,
+} mkldnn_batch_normalization_flag_t;
+
+/** @} */
+
+/** @addtogroup c_api_types_memory Memory
+ *  @{ */
+
+/** Maximum number of dimensions a tensor can have. Only restricts the amount
+ * of space used for the tensor description. Individual computational
+ * primitives may support only tensors of certain dimensions. */
+#define MKLDNN_MAX_NDIMS 12
+
+/** A type to describe tensor dimension. */
+typedef int64_t mkldnn_dim_t;
+
+/** A type to describe tensor dimensions. */
+typedef mkldnn_dim_t mkldnn_dims_t[MKLDNN_MAX_NDIMS];
+
+/** A type to describe strides within a tensor. */
+typedef mkldnn_dim_t mkldnn_strides_t[MKLDNN_MAX_NDIMS];
+
+/** Generic description of blocked data layout for most memory formats.
+ *
+ * @sa @ref understanding_memory_formats */
+typedef struct {
+    /** The strides between the outermost blocks.
+     * In case of plain (non-blocked) formats the strides between dimensions. */
+    mkldnn_dims_t strides;
+    /* Innermost section
+     * ASSUMPTION: the innermost blocks are always dense */
+    /** The number of innermost blocks, e.g. 3 in case of `OIhw_4i16o4i_` */
+    int inner_nblks;
+    /** The size of the blocks, e.g. `{4, 16, 4}` in case of `OIhw_4i16o4i` */
+    mkldnn_dims_t inner_blks;
+    /** The logical indices of the blocks, e.g. `{1, 0, 1}` in case of
+     * `4i16o4i`, because `i` is the 1st dim and `o` is the 0st dim */
+    mkldnn_dims_t inner_idxs;
+} mkldnn_blocking_desc_t;
+
+typedef enum {
+    /** Undefined memory format, used for empty memory descriptors. */
+    mkldnn_wino_undef = 0,
+    /** Tensors of weights for 2x3 winograd convolutions. */
+    mkldnn_wino_wei_aaOIoi,
+    mkldnn_wino_wei_aaOio,
+    mkldnn_wino_wei_aaOBiOo,
+    /** Tensor of weights for 4x3 convolution. */
+    mkldnn_wino_wei_OBaaIBOIio
+} mkldnn_wino_memory_format_t;
+
+/** Description of tensor of weights for winograd 2x3 convolution. */
+typedef struct {
+    mkldnn_wino_memory_format_t wino_format;
+    int r;
+    int alpha;
+    int ic;
+    int oc;
+    int ic_block;
+    int oc_block;
+    int ic2_block;
+    int oc2_block;
+    float adj_scale;
+    size_t size;
+} mkldnn_wino_desc_t;
+
+typedef enum {
+    mkldnn_packed_format_undef = 0,
+    mkldnn_ldigo_p,
+    mkldnn_ldgoi_p
+} mkldnn_rnn_packed_memory_format_t;
+
+/* Maximum number of parts of RNN weights tensor that require separate
+ * computation. */
+#define MKLDNN_RNN_MAX_N_PARTS 4
+
+/** Description of tensor of packed weights for rnn. */
+typedef struct {
+    mkldnn_rnn_packed_memory_format_t format;
+    int n_parts;
+    int n;
+    int parts[MKLDNN_RNN_MAX_N_PARTS];
+    size_t part_pack_size[MKLDNN_RNN_MAX_N_PARTS];
+    size_t offset_compensation;
+    size_t size;
+} mkldnn_rnn_packed_desc_t;
+
+typedef enum {
+    mkldnn_memory_extra_flag_none = 0x0U,
+    /** Indicates the weights have an additional buffer, that depends on the
+     * @p compensation_mask.
+     *
+     * For instance, in 4D case with the compensation mask equals (1 << 0)
+     * the additional buffer would consist of OC values:
+     * O[oc : 0,OC] =
+     *  -128 * SUM(ic : 0,IC; kh : 0,KH; kw : 0,KW){ weights(oc, ic, kh, kw) }
+     */
+    mkldnn_memory_extra_flag_compensation_conv_s8s8 = 0x1U,
+    mkldnn_memory_extra_flag_scale_adjust = 0x2U,
+} mkldnn_memory_extra_flags_t;
+
+/** Description of extra information stored in memory */
+typedef struct {
+    /** The flags contain arbitrary extra information, such as compensation.
+     * @sa mkldnn_memory_extra_flags_t */
+    uint64_t flags;
+    /** Compensation mask */
+    int compensation_mask;
+    /** Scale applied to the data */
+    float scale_adjust;
+    /** For future backwards compatibility */
+    char reserved[64];
+} mkldnn_memory_extra_desc_t;
+
+/** Memory descriptor. The description is based on a number of dimensions,
+ * dimensions themselves, plus information about elements type and memory
+ * format. Additionally, contains format-specific descriptions of the data
+ * layout. */
+typedef struct {
+    /** Number of dimensions */
+    int ndims;
+    /** Dimensions in the following order:
+     * - CNN data tensors: mini-batch, channel, spatial
+     *   (<code>{N, C, [[D,] H,] W}</code>)
+     * - CNN weight tensors: group (optional), output channel, input channel,
+     *   spatial (<code>{[G,] O, I, [[D,] H,] W}</code>)
+     * - RNN data tensors: time, mini-batch, channels (<code>{T, N, C}</code>)
+     *   or layers, directions, states, mini-batch, channels (<code>{L, D, S, N, C}</code>)
+     * - RNN weight tensor: layers, directions, input channel, gates, output channels
+     *   (<code>{L, D, I, G, O}</code>).
+     *
+     * @note
+     *    The order of dimensions does not depend on the memory format, so
+     *    whether the data is laid out in #mkldnn_nchw or #mkldnn_nhwc
+     *    the dims for 4D CN data tensor would be <code>{N, C, H, W}</code>.
+     */
+    mkldnn_dims_t dims;
+    /** Data type of the tensor elements. */
+    mkldnn_data_type_t data_type;
+
+    /** Size of the data including padding in each dimension. */
+    mkldnn_dims_t padded_dims;
+    /** Per-dimension offset from the padding to actual data, the top-level
+     * tensor with offsets applied must lie within the padding area. */
+    mkldnn_dims_t padded_offsets;
+
+    /** Offset from memory origin to the current block, non-zero only in
+     * a description of a memory sub-block. */
+    mkldnn_dim_t offset0;
+
+    /** Memory format kind. */
+    mkldnn_format_kind_t format_kind;
+    union {
+        /** Description of the data layout for memory formats that use
+         * blocking. */
+        mkldnn_blocking_desc_t blocking;
+        /** Tensor of weights for integer 8bit winograd convolution. */
+        mkldnn_wino_desc_t wino_desc;
+        /** Tensor of packed weights for RNN. */
+        mkldnn_rnn_packed_desc_t rnn_packed_desc;
+        /* ... other descriptions possible */
+    } format_desc;
+
+    mkldnn_memory_extra_desc_t extra;
+} mkldnn_memory_desc_t;
+
+/** @struct mkldnn_memory
+ * An opaque structure to describe a memory. */
+struct mkldnn_memory;
+
+/** A memory handle. */
+typedef struct mkldnn_memory *mkldnn_memory_t;
+
+/** A constant memory handle. */
+typedef const struct mkldnn_memory *const_mkldnn_memory_t;
+
+#define MKLDNN_NATIVE_HANDLE_NONE (NULL)
+#define MKLDNN_NATIVE_HANDLE_ALLOCATE ((void *)(size_t)-1)
+
+/** @} */
+
+/** @addtogroup c_api_types_op_descs Operation descriptors
+ *  @{*/
+
+/** A pointer to any of the operation descriptors. */
+typedef void *mkldnn_op_desc_t;
+/** A pointer to any of the operation descriptors (constant variant). */
+typedef const void *const_mkldnn_op_desc_t;
+
+/** A descriptor of a convolution operation. */
+typedef struct {
+    /** The kind of primitive. Used for self-identifying the primitive
+     * descriptor. Must be #mkldnn_convolution. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+     * #mkldnn_forward_inference, #mkldnn_backward_data,
+     * #mkldnn_backward_weights, and #mkldnn_backward_bias. */
+    mkldnn_prop_kind_t prop_kind;
+    /** The kind of the convolution algorithm. Possible values:
+     * #mkldnn_convolution_direct. */
+    mkldnn_alg_kind_t alg_kind;
+    /** Source memory descriptor. */
+    mkldnn_memory_desc_t src_desc;
+    /** Source gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_src_desc;
+    /** Weights memory descriptor. */
+    mkldnn_memory_desc_t weights_desc;
+    /** Weights gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_weights_desc;
+    /** Bias memory descriptor. */
+    mkldnn_memory_desc_t bias_desc;
+    /** Bias gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_bias_desc;
+    /** Destination memory descriptor. */
+    mkldnn_memory_desc_t dst_desc;
+    /** Destination gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_dst_desc;
+    /** Convolution strides in each spatial dimension. */
+    mkldnn_dims_t strides;
+    /** Convolution dilates in each spatial dimension. */
+    mkldnn_dims_t dilates;
+    /** Padding in each spatial dimension. padding[0] is a padding in the
+     * beginning (@p padding_l), padding[1] is a padding in the end (@p
+     * padding_r). */
+    mkldnn_dims_t padding[2];
+    /** The kind of padding to use. */
+    mkldnn_padding_kind_t padding_kind;
+    /** The accumulator data type. Initialized automatically. */
+    mkldnn_data_type_t accum_data_type;
+} mkldnn_convolution_desc_t;
+
+/** A descriptor of a deconvolution operation. */
+typedef mkldnn_convolution_desc_t mkldnn_deconvolution_desc_t;
+
+/** A descriptor of a shuffle operation. */
+typedef struct {
+    /** The kind of primitive. Used for self-identifying the primitive
+     * descriptor. Must be #mkldnn_convolution. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+     * #mkldnn_forward_inference, and #mkldnn_backward_data. */
+    mkldnn_prop_kind_t prop_kind;
+    /** Source and destination memory descriptor,
+     *  and source and destination gradient memory descriptor. */
+    mkldnn_memory_desc_t data_desc;
+    /** axis for shuffling. */
+    int axis;
+    /** number of groups in group convolution */
+    mkldnn_dim_t group_size;
+} mkldnn_shuffle_desc_t;
+
+/** A descriptor of a element-wise operation. */
+typedef struct {
+    /** The kind of primitive. Used for self-identifying the primitive
+     * descriptor. Must be #mkldnn_eltwise. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+     * #mkldnn_forward_inference, #mkldnn_backward, and #mkldnn_backward_data.
+     */
+    mkldnn_prop_kind_t prop_kind;
+    /** The kind of eltwise algorithm. Possible values: #mkldnn_eltwise_relu,
+     * #mkldnn_eltwise_tanh, #mkldnn_eltwise_elu, #mkldnn_eltwise_square,
+     * #mkldnn_eltwise_abs, #mkldnn_eltwise_sqrt, #mkldnn_eltwise_linear,
+     * #mkldnn_eltwise_bounded_relu, #mkldnn_eltwise_soft_relu, and
+     * #mkldnn_eltwise_logistic. */
+    mkldnn_alg_kind_t alg_kind;
+    /** Source and destination memory descriptor. */
+    mkldnn_memory_desc_t data_desc;
+    /** Source and destination gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_data_desc;
+    /** Algorithm specific parameter.
+     * Accordance table:
+     *  - #mkldnn_eltwise_relu: @p alpha -- negative slope, @p beta ignored
+     *  - #mkldnn_eltwise_tanh: @p alpha and @p beta ignored
+     *  - #mkldnn_eltwise_elu: @p alpha -- negative slope, @p beta ignored
+     *  - #mkldnn_eltwise_square: @p alpha and @p beta ignored
+     *  - #mkldnn_eltwise_abs: @p alpha and @p beta ignored
+     *  - #mkldnn_eltwise_sqrt: @p alpha and @p beta ignored
+     *  - #mkldnn_eltwise_linear: @p alpha -- scale, @p beta -- shift
+     *  - #mkldnn_eltwise_bounded_relu: @p alpha -- upper bound, @p beta ignored
+     *  - #mkldnn_eltwise_soft_relu: @p alpha and @p beta ignored
+     *  - #mkldnn_eltwise_logistic: @p alpha and @p beta ignored
+     */
+    float alpha, beta;
+} mkldnn_eltwise_desc_t;
+
+/** A descriptor of a Softmax operation. */
+typedef struct {
+    /** The kind of primitive. Used for self-identifying the primitive
+    * descriptor. Must be #mkldnn_softmax. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training and
+     * #mkldnn_forward_inference. */
+    mkldnn_prop_kind_t prop_kind;
+    /** Source and destination memory descriptor. */
+    mkldnn_memory_desc_t data_desc;
+    /** Source and Destination of gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_desc;
+    /** The axis along which to perform the softmax. */
+    int softmax_axis;
+} mkldnn_softmax_desc_t;
+
+/** A descriptor of a pooling operation. */
+typedef struct {
+    /** The kind of primitive. Used for self-identifying the primitive
+     * descriptor. Must be #mkldnn_pooling. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+     * #mkldnn_forward_inference, #mkldnn_backward, and #mkldnn_backward_data.
+     */
+    mkldnn_prop_kind_t prop_kind;
+    /** The kind of pooling algorithm. Possible values: #mkldnn_pooling_max and
+     * #mkldnn_pooling_avg. */
+    mkldnn_alg_kind_t alg_kind;
+    /** Source memory descriptor. */
+    mkldnn_memory_desc_t src_desc;
+    /** Source gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_src_desc;
+    /** Destination memory descriptor. */
+    mkldnn_memory_desc_t dst_desc;
+    /** Destination gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_dst_desc;
+    /** Pooling kernel strides for spatial dimensions. */
+    mkldnn_dims_t strides;
+    /** Pooling kernel spatial dimensions. */
+    mkldnn_dims_t kernel;
+    /** Padding in each spatial dimension. padding[0] is a padding in the
+     * beginning (@p padding_l), padding[1] is a padding in the end (@p
+     * padding_r). */
+    mkldnn_dims_t padding[2];
+    /** The kind of padding to use. */
+    mkldnn_padding_kind_t padding_kind;
+    /** The accumulator data type. Initialized automatically. */
+    mkldnn_data_type_t accum_data_type;
+} mkldnn_pooling_desc_t;
+
+/** A descriptor of a Local Response Normalization (LRN) operation. */
+typedef struct {
+    /** The kind of primitive. Used for self-identifying the primitive
+     * descriptor. Must be #mkldnn_lrn. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+     * #mkldnn_forward_inference, #mkldnn_backward, and #mkldnn_backward_data.
+     */
+    mkldnn_prop_kind_t prop_kind;
+    /** LRN algorithm. Possible values: #mkldnn_lrn_within_channel and
+     * #mkldnn_lrn_across_channels. */
+    mkldnn_alg_kind_t alg_kind;
+    /** Source and destination memory descriptor. */
+    mkldnn_memory_desc_t data_desc;
+    /** Source and destination gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_data_desc;
+    /** The number of channels to sum over (for cross-channel LRN) or the side
+     * length of the square region to sum over (for within-channel LRN). */
+    mkldnn_dim_t local_size;
+    /** LRN alpha parameter. */
+    float lrn_alpha;
+    /** LRN beta parameter. */
+    float lrn_beta;
+    /** LRN k parameter. */
+    float lrn_k;
+} mkldnn_lrn_desc_t;
+
+/** A descriptor of a Batch Normalization operation. */
+typedef struct {
+    /** The kind of primitive. Used for self-identifying the primitive
+     * descriptor. Must be #mkldnn_batch_normalization. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+     * #mkldnn_forward_inference, #mkldnn_backward, and #mkldnn_backward_data.
+     */
+    mkldnn_prop_kind_t prop_kind;
+    /** Source and destination memory descriptor. */
+    mkldnn_memory_desc_t data_desc;
+    /** Source and destination gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_data_desc;
+    /** Scale and shift data and gradient memory descriptors.
+     *
+     * Scaleshift memory descriptor uses 2D #mkldnn_nc format[2,Channels]. 1-st
+     * dimension contains gamma parameter, 2-nd dimension contains beta
+     * parameter. */
+    mkldnn_memory_desc_t data_scaleshift_desc;
+    mkldnn_memory_desc_t diff_data_scaleshift_desc;
+    /** Mean and variance data memory descriptors.
+     *
+     * Mean and variance memory descriptors use 1D #mkldnn_x format[Channels].
+     */
+    mkldnn_memory_desc_t mean_desc;
+    mkldnn_memory_desc_t variance_desc;
+    /** Batch normalization epsilon parameter. */
+    float batch_norm_epsilon;
+    unsigned flags;
+} mkldnn_batch_normalization_desc_t;
+
+/** A descriptor of an inner product operation. */
+typedef struct {
+    /** The kind of primitive. Used for self-identifying the primitive
+     * descriptor. Must be #mkldnn_inner_product. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+     * #mkldnn_forward_inference, #mkldnn_backward_data,
+     * #mkldnn_backward_weights, and #mkldnn_backward_bias. */
+    mkldnn_prop_kind_t prop_kind;
+    /** Source memory descriptor. */
+    mkldnn_memory_desc_t src_desc;
+    /** Source gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_src_desc;
+    /** Weights memory descriptor. */
+    mkldnn_memory_desc_t weights_desc;
+    /** Weights gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_weights_desc;
+    /** Bias memory descriptor. */
+    mkldnn_memory_desc_t bias_desc;
+    /** Bias gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_bias_desc;
+    /** Destination memory descriptor. */
+    mkldnn_memory_desc_t dst_desc;
+    /** Destination gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_dst_desc;
+    /** The accumulator data type. Initialized automatically. */
+    mkldnn_data_type_t accum_data_type;
+} mkldnn_inner_product_desc_t;
+
+/** Flags for RNN cell. */
+typedef enum {
+    mkldnn_rnn_cell_with_relu = 0x1U,
+    mkldnn_rnn_cell_with_clipping = 0x2U,
+} mkldnn_rnn_cell_flags_t;
+
+typedef struct {
+    /** RNN cell kind. Must be one of #mkldnn_vanilla_rnn,
+     * #mkldnn_vanilla_lstm, #mkldnn_vanilla_gru,
+     * or #mkldnn_gru_linear_before_reset. */
+    mkldnn_alg_kind_t cell_kind;
+    /** Activation function used. Must be either #mkldnn_eltwise_relu or
+     * #mkldnn_eltwise_tanh. */
+    mkldnn_alg_kind_t activation_kind;
+    /** RNN cell flags */
+    unsigned int flags;
+    /** @c alpha is a negative slope parameter (used only if
+     * `(flags & #mkldnn_rnn_cell_with_relu) != 0`) */
+    float alpha;
+    /** clipping parameter (used only if
+     * `(flags & #mkldnn_rnn_cell_with_clipping) != 0`) */
+    float clipping;
+} mkldnn_rnn_cell_desc_t;
+
+/** A direction of RNN primitive execution. */
+typedef enum {
+    /* Unidirectional execution of RNN primitive from left to right. */
+    mkldnn_unidirectional_left2right,
+    /* Unidirectional execution of RNN primitive from right to left. */
+    mkldnn_unidirectional_right2left,
+    /* Bidirectional execution of RNN primitive with concatenation of the
+     * results. */
+    mkldnn_bidirectional_concat,
+    /* Bidirectional execution of RNN primitive with summation of the
+     * results. */
+    mkldnn_bidirectional_sum,
+    mkldnn_unidirectional = mkldnn_unidirectional_left2right,
+} mkldnn_rnn_direction_t;
+
+/** A descriptor for an RNN operation. */
+typedef struct {
+    /** The kind of primitive. Used for self-identifying the primitive
+     * descriptor. Must be #mkldnn_rnn. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+     * #mkldnn_forward_inference, and #mkldnn_backward. */
+    mkldnn_prop_kind_t prop_kind;
+    /** The RNN cell desc. */
+    mkldnn_rnn_cell_desc_t cell_desc;
+    /** The direction of RNN primitive execution. */
+    mkldnn_rnn_direction_t direction;
+    /** Source layer memory descriptor. */
+    mkldnn_memory_desc_t src_layer_desc;
+    /** Source iteration memory descriptor. */
+    mkldnn_memory_desc_t src_iter_desc;
+    /** Weights layer memory descriptor. */
+    mkldnn_memory_desc_t weights_layer_desc;
+    /** Weights iteration memory descriptor. */
+    mkldnn_memory_desc_t weights_iter_desc;
+    /** Bias memory descriptor. */
+    mkldnn_memory_desc_t bias_desc;
+    /** Destination layer memory descriptor. */
+    mkldnn_memory_desc_t dst_layer_desc;
+    /** Destination iter memory descriptor. */
+    mkldnn_memory_desc_t dst_iter_desc;
+    /** Source gradient layer memory descriptor. */
+    mkldnn_memory_desc_t diff_src_layer_desc;
+    /** Source gradient iter memory descriptor. */
+    mkldnn_memory_desc_t diff_src_iter_desc;
+    /** Weights gradient layer memory descriptor. */
+    mkldnn_memory_desc_t diff_weights_layer_desc;
+    /** Weights gradient iter memory descriptor. */
+    mkldnn_memory_desc_t diff_weights_iter_desc;
+    /** Bias gradient memory descriptor. */
+    mkldnn_memory_desc_t diff_bias_desc;
+    /** Destination gradient layer memory descriptor. */
+    mkldnn_memory_desc_t diff_dst_layer_desc;
+    /** Destination gradient iteration memory descriptor. */
+    mkldnn_memory_desc_t diff_dst_iter_desc;
+} mkldnn_rnn_desc_t;
+
+/** @} */
+
+/** @addtogroup c_api_engine_types Engine
+ * @{ */
+
+/** @brief Kinds of engines. */
+typedef enum {
+    /** An unspecified engine. */
+    mkldnn_any_engine,
+    /** CPU engine. */
+    mkldnn_cpu,
+} mkldnn_engine_kind_t;
+
+/** @struct mkldnn_engine
+ * @brief An opaque structure to describe an engine. */
+struct mkldnn_engine;
+/** @brief An engine handle. */
+typedef struct mkldnn_engine *mkldnn_engine_t;
+#if 0
+/* FIXME: looks like this never happens */
+/** @brief A constant engine handle. */
+typedef const struct mkldnn_engine *const_mkldnn_engine_t;
+#endif
+
+/** @} */
+
+/** @addtogroup c_api_primitive_desc_iterators Primitive descriptor iterators
+ * @{ */
+
+/** @struct mkldnn_primitive_desc_iterator
+ * @brief An opaque structure to describe a primitive descriptor iterator. */
+struct mkldnn_primitive_desc_iterator;
+
+/** @brief A primitive descriptor iterator handle. */
+typedef struct mkldnn_primitive_desc_iterator
+    *mkldnn_primitive_desc_iterator_t;
+
+/** @brief A constant primitive descriptor iterator handle. */
+typedef const struct mkldnn_primitive_desc_iterator
+    *const_mkldnn_primitive_desc_iterator_t;
+
+/** @} */
+
+/** @addtogroup c_api_primitive_descs Primitive descriptors
+ * @{ */
+
+/** @struct mkldnn_primitive_desc
+ * @brief An opaque structure to describe a primitive descriptor. */
+struct mkldnn_primitive_desc;
+
+/** @brief A primitive descriptor handle. */
+typedef struct mkldnn_primitive_desc *mkldnn_primitive_desc_t;
+
+/** @brief A constant primitive descriptor handle. */
+typedef const struct mkldnn_primitive_desc *const_mkldnn_primitive_desc_t;
+
+/** @} */
+
+/** @addtogroup c_api_primitive_attr Primitive descriptor attributes
+ * @{ */
+
+/** Scratchpad mode */
+typedef enum {
+    /** The library manages scratchpad (default) */
+    mkldnn_scratchpad_mode_library,
+    /** A user shall query and provide the scratchpad memory to primitives */
+    mkldnn_scratchpad_mode_user,
+} mkldnn_scratchpad_mode_t;
+
+/** @struct mkldnn_primitive_attr
+ * @brief An opaque structure for primitive descriptor attributes.
+ *
+ * Attributes may contain:
+ *  - output scales (to scale the result prior to storing it to the memory)
+ */
+struct mkldnn_primitive_attr;
+
+/** @brief A primitive descriptor attributes handle that controls primitive
+ * behavior. */
+typedef struct mkldnn_primitive_attr *mkldnn_primitive_attr_t;
+
+/** @brief A constant primitive descriptor attributes handle. */
+typedef const struct mkldnn_primitive_attr *const_mkldnn_primitive_attr_t;
+
+/** @struct mkldnn_post_ops
+ * @brief An opaque structure for a chain of post operations.
+ *
+ * mkldnn_post_ops can be used to perform some (trivial) operations like
+ * accumulation or eltwise after certain primitives like convolution.
+ *
+ * Post operations might be combined together, making a chain of post
+ * operations. For instance one can configure convolution followed by
+ * accumulation followed by eltwise. This might be especially beneficial
+ * for residual learning blocks.
+ *
+ * @warning
+ *      Of course not all combinations are supported, so the user should handle
+ *      errors accordingly.
+ *
+ * Supported post operations:
+ *  - accumulation (base primitive: convolution)
+ *  - eltwise (base primitive: convolution)
+ */
+struct mkldnn_post_ops;
+
+/** @brief A post operation chain handle. */
+typedef struct mkldnn_post_ops *mkldnn_post_ops_t;
+
+/** @brief A constant post operation chain handle. */
+typedef const struct mkldnn_post_ops *const_mkldnn_post_ops_t;
+
+/** @} */
+
+/** @addtogroup c_api_types_primitive Primitive
+ * @{ */
+
+/** @struct mkldnn_primitive
+ * An opaque structure to describe a primitive. */
+struct mkldnn_primitive;
+/** A primitive handle. */
+typedef struct mkldnn_primitive *mkldnn_primitive_t;
+/** A constant primitive handle. */
+typedef const struct mkldnn_primitive *const_mkldnn_primitive_t;
+
+/** @addtogroup c_api_types_arguments Argument indices
+ * @{ */
+
+#define MKLDNN_ARG_SRC_0                1
+#define MKLDNN_ARG_SRC                  MKLDNN_ARG_SRC_0
+#define MKLDNN_ARG_SRC_LAYER            MKLDNN_ARG_SRC_0
+#define MKLDNN_ARG_FROM                 MKLDNN_ARG_SRC_0
+
+#define MKLDNN_ARG_SRC_1                2
+#define MKLDNN_ARG_SRC_ITER             MKLDNN_ARG_SRC_1
+
+#define MKLDNN_ARG_DST_0                17
+#define MKLDNN_ARG_DST                  MKLDNN_ARG_DST_0
+#define MKLDNN_ARG_TO                   MKLDNN_ARG_DST_0
+#define MKLDNN_ARG_DST_LAYER            MKLDNN_ARG_DST_0
+
+#define MKLDNN_ARG_DST_1                18
+#define MKLDNN_ARG_DST_ITER             MKLDNN_ARG_DST_1
+
+#define MKLDNN_ARG_WEIGHTS_0            33
+#define MKLDNN_ARG_WEIGHTS              MKLDNN_ARG_WEIGHTS_0
+#define MKLDNN_ARG_SCALE_SHIFT          MKLDNN_ARG_WEIGHTS_0
+#define MKLDNN_ARG_WEIGHTS_LAYER        MKLDNN_ARG_WEIGHTS_0
+
+#define MKLDNN_ARG_WEIGHTS_1            34
+#define MKLDNN_ARG_WEIGHTS_ITER         MKLDNN_ARG_WEIGHTS_1
+
+#define MKLDNN_ARG_BIAS                 41
+
+#define MKLDNN_ARG_MEAN                 49
+#define MKLDNN_ARG_VARIANCE             50
+
+#define MKLDNN_ARG_WORKSPACE            64
+#define MKLDNN_ARG_SCRATCHPAD           80
+
+#define MKLDNN_ARG_DIFF_SRC_0           129
+#define MKLDNN_ARG_DIFF_SRC             MKLDNN_ARG_DIFF_SRC_0
+#define MKLDNN_ARG_DIFF_SRC_LAYER       MKLDNN_ARG_DIFF_SRC_0
+
+#define MKLDNN_ARG_DIFF_SRC_1           130
+#define MKLDNN_ARG_DIFF_SRC_ITER        MKLDNN_ARG_DIFF_SRC_1
+
+#define MKLDNN_ARG_DIFF_DST_0           145
+#define MKLDNN_ARG_DIFF_DST             MKLDNN_ARG_DIFF_DST_0
+#define MKLDNN_ARG_DIFF_DST_LAYER       MKLDNN_ARG_DIFF_DST_0
+
+#define MKLDNN_ARG_DIFF_DST_1           146
+#define MKLDNN_ARG_DIFF_DST_ITER        MKLDNN_ARG_DIFF_DST_1
+
+#define MKLDNN_ARG_DIFF_WEIGHTS_0       161
+#define MKLDNN_ARG_DIFF_WEIGHTS         MKLDNN_ARG_DIFF_WEIGHTS_0
+#define MKLDNN_ARG_DIFF_SCALE_SHIFT     MKLDNN_ARG_DIFF_WEIGHTS_0
+#define MKLDNN_ARG_DIFF_WEIGHTS_LAYER   MKLDNN_ARG_DIFF_WEIGHTS_0
+
+#define MKLDNN_ARG_DIFF_WEIGHTS_1       162
+#define MKLDNN_ARG_DIFF_WEIGHTS_ITER    MKLDNN_ARG_DIFF_WEIGHTS_1
+
+#define MKLDNN_ARG_DIFF_BIAS            169
+
+#define MKLDNN_ARG_MULTIPLE_SRC         1024
+#define MKLDNN_ARG_MULTIPLE_DST         2048
+
+/** @} */
+
+/** An auxiliary structure to specify primitive's inputs/outputs at execution
+ *
+ * @warning
+ *      With this API it's impossible to preserve constness of memory, so all
+ *      memories are passed w/o const qualifier. However only memories with
+ *      output semantics might be changed during the execution */
+typedef struct {
+    int arg; /**< An argument index, e.g. MKLDNN_ARG_SRC */
+    mkldnn_memory_t memory; /**< Input/output memory */
+} mkldnn_exec_arg_t;
+
+/** @} */
+
+/** @addtogroup c_api_types_query Queries
+ * @{ */
+
+/** Primitive descriptor query specification
+ *
+ * For generic function mkldnn_primitive_desc_query(), the type of result must
+ * agree with the queried argument. The correspondence table:
+ *      Query                           | type of result
+ *      --------------------------------------------------------------
+ *      #mkldnn_query_engine            | mkldnn_engine_t *
+ *      #mkldnn_query_scratchpad_engine | mkldnn_engine_t *
+ *      #mkldnn_query_primitive_kind    | mkldnn_primitive_kind_t *
+ *      *_s32                           | int *
+ *      *_s64                           | mkldnn_dim_t * (same as int64_t *)
+ *      *_f64                           | double *
+ *      *_str                           | const char **
+ *      #mkldnn_query_op_d              | const_mkldnn_op_desc_t *
+ *      *_md                            | const mkldnn_memory_desc_t **
+ *      *_${op}_d                       | const mkldnn_${op}_desc_t **
+ *      *_pd                            | const_mkldnn_primitive_desc_t *
+ *
+ * @note
+ *     Rule of thumb: all opaque types and structures are returned by
+ *     reference. All numbers are returned by value.
+ *
+ * @warning
+ *     All returned references point to constant objects and are valid only
+ *     during the lifetime of the queried primitive descriptor. Returned objects
+ *     must not be destroyed by the user. If you need to keep the object longer
+ *     than the lifetime of the queried primitive descriptor, use
+ *     mkldnn_primitive_desc_clone() to make a copy. */
+typedef enum {
+    mkldnn_query_undef = 0,  /**< no query */
+
+    mkldnn_query_engine, /**< execution engine */
+    mkldnn_query_primitive_kind, /**< primitive kind */
+
+    mkldnn_query_num_of_inputs_s32, /**< number of inputs expected */
+    mkldnn_query_num_of_outputs_s32, /**< number of outputs expected */
+
+    mkldnn_query_time_estimate_f64, /**< runtime estimation (seconds) */
+    mkldnn_query_memory_consumption_s64, /**< memory consumption -- extra
+                                           (scratch) memory, additional to all
+                                           inputs and outputs memory (bytes) */
+
+    mkldnn_query_scratchpad_engine, /**< scratchpad engine -- engine to be used
+                                       for creating scratchpad memory */
+
+    mkldnn_query_impl_info_str, /**< implementation name */
+
+    /* memory and op descriptor section */
+    mkldnn_query_some_d = 64, /**< stub */
+    mkldnn_query_op_d, /**< op descriptor */
+    mkldnn_query_convolution_d, /**< convolution descriptor */
+    mkldnn_query_deconvolution_d, /**< deconvolution descriptor */
+    mkldnn_query_shuffle_d, /**< shuffle descriptor */
+    mkldnn_query_eltwise_d, /**< eltwise descriptor */
+    mkldnn_query_softmax_d, /**< softmax descriptor */
+    mkldnn_query_pooling_d, /**< pooling descriptor */
+    mkldnn_query_lrn_d, /**< lrn descriptor */
+    mkldnn_query_batch_normalization_d, /**< batch normalization descriptor */
+    mkldnn_query_inner_product_d, /**< inner product descriptor */
+    mkldnn_query_rnn_d, /**< rnn descriptor */
+
+    /* memory descriptor section */
+    mkldnn_query_some_md = 128, /**< stub */
+    mkldnn_query_src_md, /**< source memory desc */
+    mkldnn_query_diff_src_md, /**< source gradient memory desc */
+    mkldnn_query_weights_md, /**< weights memory descriptor desc */
+    mkldnn_query_diff_weights_md, /**< weights grad. memory desc */
+    mkldnn_query_dst_md, /**< destination memory desc */
+    mkldnn_query_diff_dst_md, /**< destination grad. memory desc */
+    mkldnn_query_workspace_md, /**< workspace memory desc */
+    mkldnn_query_scratchpad_md, /**< scratchpad memory desc */
+} mkldnn_query_t;
+
+/** @} */
+
+/** @addtogroup c_api_types_stream Execution stream
+ * @{ */
+
+/** @brief Stream flags. */
+typedef enum {
+    /** A default stream configuration. */
+    mkldnn_stream_default_flags = 0x0U,
+} mkldnn_stream_flags_t;
+
+/** @struct mkldnn_stream
+ * An opaque structure to describe an execution stream. */
+struct mkldnn_stream;
+/** An execution stream handle. */
+typedef struct mkldnn_stream *mkldnn_stream_t;
+/** A constant execution stream handle. */
+typedef const struct mkldnn_stream *const_mkldnn_stream_t;
+
+/** @} */
+/** @} */
+/** @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
diff --git a/thirdparty/oidn/mkl-dnn/include/mkldnn_version.h b/thirdparty/oidn/mkl-dnn/include/mkldnn_version.h
new file mode 100644
index 0000000000..a2713deccb
--- /dev/null
+++ b/thirdparty/oidn/mkl-dnn/include/mkldnn_version.h
@@ -0,0 +1,32 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef MKLDNN_VERSION_H
+#define MKLDNN_VERSION_H
+
+/* Major version of MKL-DNN */
+#define MKLDNN_VERSION_MAJOR 0
+
+/* Minor version of MKL-DNN */
+#define MKLDNN_VERSION_MINOR 90
+
+/* Patch version of MKL-DNN */
+#define MKLDNN_VERSION_PATCH 0
+
+/* Git Commit Hash of MKL-DNN */
+#define MKLDNN_VERSION_HASH  "096bda1ca23324879f2df5a129e610e4405f775c"
+
+#endif
diff --git a/thirdparty/oidn/mkl-dnn/include/mkldnn_version.h.in b/thirdparty/oidn/mkl-dnn/include/mkldnn_version.h.in
new file mode 100644
index 0000000000..5ee0126188
--- /dev/null
+++ b/thirdparty/oidn/mkl-dnn/include/mkldnn_version.h.in
@@ -0,0 +1,32 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef MKLDNN_VERSION_H
+#define MKLDNN_VERSION_H
+
+/* Major version of MKL-DNN */
+#define MKLDNN_VERSION_MAJOR @MKLDNN_VERSION_MAJOR@
+
+/* Minor version of MKL-DNN */
+#define MKLDNN_VERSION_MINOR @MKLDNN_VERSION_MINOR@
+
+/* Patch version of MKL-DNN */
+#define MKLDNN_VERSION_PATCH @MKLDNN_VERSION_PATCH@
+
+/* Git Commit Hash of MKL-DNN */
+#define MKLDNN_VERSION_HASH  "@MKLDNN_VERSION_HASH@"
+
+#endif
author	Juan Linietsky <reduzio@gmail.com>	2020-05-01 09:34:23 -0300
committer	Juan Linietsky <reduzio@gmail.com>	2020-05-10 15:59:09 -0300
commit	1bea8e1eacc68bcedbd3f207395bccf11011dae2 (patch)
tree	b75303a69491978c1e13360a3e6f355c5234dfe0 /thirdparty/oidn/mkl-dnn/include
parent	6a0473bcc23c096ef9ee929632a209761c2668f6 (diff)