diff options
Diffstat (limited to 'thirdparty/oidn/mkl-dnn/src/cpu/ref_deconvolution.cpp')
-rw-r--r-- | thirdparty/oidn/mkl-dnn/src/cpu/ref_deconvolution.cpp | 199 |
1 files changed, 199 insertions, 0 deletions
diff --git a/thirdparty/oidn/mkl-dnn/src/cpu/ref_deconvolution.cpp b/thirdparty/oidn/mkl-dnn/src/cpu/ref_deconvolution.cpp new file mode 100644 index 0000000000..541a303aab --- /dev/null +++ b/thirdparty/oidn/mkl-dnn/src/cpu/ref_deconvolution.cpp @@ -0,0 +1,199 @@ +/******************************************************************************* +* Copyright 2018 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "c_types_map.hpp" +#include "type_helpers.hpp" +#include "mkldnn_thread.hpp" +#include "mkldnn_traits.hpp" +#include "math_utils.hpp" + +#include "ref_deconvolution.hpp" + +namespace mkldnn { +namespace impl { +namespace cpu { + +void ref_deconvolution_fwd_t::compute_fwd_bias(const data_t *bias, + data_t *dst) const { + const memory_desc_wrapper dst_d(pd()->dst_md()); + + const int G = pd()->G(); + const int MB = pd()->MB(); + const int OH = pd()->OH(); + const int OW = pd()->OW(); + const int OD = pd()->OD(); + const int OC = pd()->OC() / G; + const int ndims = pd()->desc()->src_desc.ndims; + + parallel_nd(MB, G, OC, OD, OH, OW, + [&](int mb, int g, int oc, int od, int oh, int ow) { + auto b = bias[g * OC + oc]; + switch (ndims) { + case 5: dst[dst_d.off(mb, g * OC + oc, od, oh, ow)] += b; break; + case 4: dst[dst_d.off(mb, g * OC + oc, oh, ow)] += b; break; + case 3: dst[dst_d.off(mb, g * OC + oc, ow)] += b; break; + default: assert(!"invalid dimension size"); + } + }); +} + +void ref_deconvolution_fwd_t::compute_fwd_bias_ncdhw(const data_t *bias, + data_t *dst) const { + const memory_desc_wrapper dst_d(pd()->dst_md()); + + const int MB = pd()->MB(); + const int OC = pd()->OC(); + const int SP = pd()->OW()*pd()->OH()*pd()->OD(); + + parallel_nd(MB, OC, [&](int mb, int oc) { + PRAGMA_OMP_SIMD() + for (int sp = 0; sp < SP; ++sp) { + auto offset = (size_t)(mb * OC + oc) * SP + sp; + dst[offset] += bias[oc]; + } + }); +} + +template <int blksize> +void ref_deconvolution_fwd_t::compute_fwd_bias_nCdhwXc(const data_t *bias, + data_t *dst) const { + const memory_desc_wrapper dst_d(pd()->dst_md()); + + const int MB = pd()->MB(); + const int OC = pd()->OC(); + const int SP = pd()->OW() * pd()->OH() * pd()->OD(); + + const ptrdiff_t stride_mb = dst_d.blocking_desc().strides[0]; + + parallel_nd(MB, utils::div_up(OC, blksize), SP, + [&](int mb, int oc_blk, int sp) { + int oc = oc_blk * blksize; + auto offset = mb * stride_mb + oc * SP + sp * blksize; + const int blk = nstl::min(blksize, OC - oc); + + PRAGMA_OMP_SIMD() + for (int i = 0; i < blk; ++i) + dst[offset + i] += bias[oc + i]; + }); +} + +void ref_deconvolution_bwd_weights_t::compute_bwd_bias(const data_t *diff_dst, + data_t *diff_bias) const { + const memory_desc_wrapper diff_dst_d(pd()->diff_dst_md()); + + const int G = pd()->G(); + const int MB = pd()->MB(); + const int OH = pd()->OH(); + const int OW = pd()->OW(); + const int OC = pd()->OC() / G; + const int OD = pd()->OD(); + const int ndims = pd()->desc()->src_desc.ndims; + + parallel_nd(G, OC, [&](int g, int oc) { + data_t db = 0; + for (int mb = 0; mb < MB; ++mb) { + for (int od = 0; od < OD; ++od) { + for (int oh = 0; oh < OH; ++oh) { + for (int ow = 0; ow < OW; ++ow) { + switch (ndims) { + case 5: + db += diff_dst[diff_dst_d.off( + mb, g * OC + oc, od, oh, ow)]; + break; + case 4: + db += diff_dst[diff_dst_d.off( + mb, g * OC + oc, oh, ow)]; + break; + case 3: + db += diff_dst[diff_dst_d.off(mb, g * OC + oc, ow)]; + break; + default: assert(!"invalid dimension size"); + } + } + } + } + } + diff_bias[g * OC + oc] = db; + }); +} + +void ref_deconvolution_bwd_weights_t::compute_bwd_bias_ncdhw( + const data_t *diff_dst, data_t *diff_bias) const { + const memory_desc_wrapper diff_dst_d(pd()->diff_dst_md()); + + const int OC = pd()->OC(); + const int MB = pd()->MB(); + const int SP = pd()->OH()*pd()->OW()*pd()->OD(); + + parallel_nd(OC, [&](int oc) { + data_t db = 0; + for (int mb = 0; mb < MB; ++mb) { + PRAGMA_OMP_SIMD() + for (int sp = 0; sp < SP; ++sp) { + auto offset = (size_t)(mb * OC + oc) * SP + sp; + db += diff_dst[offset]; + } + } + diff_bias[oc] = db; + }); +} + +template <int blksize> +void ref_deconvolution_bwd_weights_t::compute_bwd_bias_nCdhwXc( + const data_t *diff_dst, data_t *diff_bias) const { + const memory_desc_wrapper diff_dst_d(pd()->diff_dst_md()); + + const int OC = pd()->OC(); + const int MB = pd()->MB(); + const int SP = pd()->OH() * pd()->OW() * pd()->OD(); + + const ptrdiff_t stride_mb = diff_dst_d.blocking_desc().strides[0]; + + parallel_nd(utils::div_up(OC, blksize), [&](int ocb) { + data_t db[blksize] = {0}; + + for (int mb = 0; mb < MB; ++mb) { + for (int sp = 0; sp < SP; ++sp) { + auto offset = mb * stride_mb + (ocb * SP + sp) * blksize; + + PRAGMA_OMP_SIMD() + for (int i = 0; i < blksize; ++i) + db[i] += diff_dst[offset+i]; + } + } + + const int blk = nstl::min(blksize, OC - ocb * blksize); + + PRAGMA_OMP_SIMD() + for (int i = 0; i < blk; ++i) + diff_bias[ocb * blksize + i] = db[i]; + }); +} + +template void ref_deconvolution_fwd_t::compute_fwd_bias_nCdhwXc<8>( + const data_t *diff_dst, data_t *diff_bias) const; +template void ref_deconvolution_fwd_t::compute_fwd_bias_nCdhwXc<16>( + const data_t *diff_dst, data_t *diff_bias) const; +template void ref_deconvolution_bwd_weights_t::compute_bwd_bias_nCdhwXc<8>( + const data_t *diff_dst, data_t *diff_bias) const; +template void ref_deconvolution_bwd_weights_t::compute_bwd_bias_nCdhwXc<16>( + const data_t *diff_dst, data_t *diff_bias) const; + +} +} +} + +// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s |