1 files changed, 434 insertions, 0 deletions
diff --git a/thirdparty/oidn/core/network.cpp b/thirdparty/oidn/core/network.cpp
new file mode 100644
index 0000000000..4da32073cd
--- /dev/null
+++ b/thirdparty/oidn/core/network.cpp
@@ -0,0 +1,434 @@
+// ======================================================================== //
+// Copyright 2009-2019 Intel Corporation                                    //
+//                                                                          //
+// Licensed under the Apache License, Version 2.0 (the "License");          //
+// you may not use this file except in compliance with the License.         //
+// You may obtain a copy of the License at                                  //
+//                                                                          //
+//     http://www.apache.org/licenses/LICENSE-2.0                           //
+//                                                                          //
+// Unless required by applicable law or agreed to in writing, software      //
+// distributed under the License is distributed on an "AS IS" BASIS,        //
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
+// See the License for the specific language governing permissions and      //
+// limitations under the License.                                           //
+// ======================================================================== //
+
+#include "network.h"
+#include "upsample.h"
+#include "weights_reorder.h"
+#include <cstring>
+
+namespace oidn {
+
+  template<int K>
+  Network<K>::Network(const Ref<Device>& device, const std::map<std::string, Tensor>& weightMap)
+    : device(device),
+      eng(engine::cpu, 0),
+      sm(eng),
+      weightMap(weightMap)
+  {
+  }
+
+  template<int K>
+  void Network<K>::execute(const Progress& progress, int taskIndex)
+  {
+    if (progress.func)
+    {
+      const double value = double(taskIndex) / double(progress.taskCount);
+      if (!progress.func(progress.userPtr, value))
+        throw Exception(Error::Cancelled, "execution was cancelled");
+    }
+
+    for (size_t i = 0; i < nodes.size(); ++i)
+    {
+      nodes[i]->execute(sm);
+
+      if (progress.func)
+      {
+        const double value = (double(taskIndex) + double(i+1) / double(nodes.size())) / double(progress.taskCount);
+        if (!progress.func(progress.userPtr, value))
+          throw Exception(Error::Cancelled, "execution was cancelled");
+      }
+    }
+  }
+
+  template<int K>
+  std::shared_ptr<memory> Network<K>::allocTensor(const memory::dims& dims,
+                                                  memory::format_tag format,
+                                                  void* data)
+  {
+    if (format == memory::format_tag::any)
+    {
+      if (dims.size() == 4)
+        format = BlockedFormat<K>::nChwKc;
+      else if (dims.size() == 1)
+        format = memory::format_tag::x;
+      else
+        assert(0);
+    }
+    memory::desc desc(dims, memory::data_type::f32, format);
+    if (data == nullptr)
+    {
+      const size_t bytes = getTensorSize(dims) * sizeof(float);
+      if (format == BlockedFormat<K>::nChwKc)
+        activationAllocBytes += bytes;
+      totalAllocBytes += bytes;
+
+      return std::make_shared<memory>(desc, eng);
+    }
+    else
+    {
+      return std::make_shared<memory>(desc, eng, data);
+    }
+  }
+
+  template<int K>
+  std::shared_ptr<memory> Network<K>::castTensor(const memory::dims& dims,
+                                                 const std::shared_ptr<memory>& src,
+                                                 size_t srcOffset,
+                                                 memory::format_tag format)
+  {
+    const mkldnn_memory_desc_t& srcDesc = src->get_desc().data;
+    MAYBE_UNUSED(srcDesc);
+    assert(srcDesc.data_type == memory::data_type::f32);
+    assert(getTensorSize(src) >= srcOffset + getTensorSize(dims));
+
+    if (format == memory::format_tag::any)
+    {
+      if (dims.size() == 4)
+        format = BlockedFormat<K>::nChwKc;
+      else if (dims.size() == 1)
+        format = memory::format_tag::x;
+      else
+        assert(0);
+    }
+    memory::desc desc(dims, memory::data_type::f32, format);
+    float* srcPtr = (float*)src->get_data_handle() + srcOffset;
+    return std::make_shared<memory>(desc, eng, srcPtr);
+  }
+
+  template<int K>
+  std::shared_ptr<memory> Network<K>::castTensor(const memory::dims& dims,
+                                                 const std::shared_ptr<memory>& src,
+                                                 const memory::dims& srcOffset)
+  {
+    return castTensor(dims, src, getTensorSize(srcOffset));
+  }
+
+  template<int K>
+  void Network<K>::zeroTensor(const std::shared_ptr<memory>& dst)
+  {
+    assert(getTensorType(dst) == memory::data_type::f32);
+    memset(dst->get_data_handle(), 0, getTensorSize(dst)*sizeof(float));
+  }
+
+  template<int K>
+  memory::dims Network<K>::getInputReorderDims(const memory::dims& srcDims, int alignment)
+  {
+    memory::dims dstDims = srcDims;
+    dstDims[1] = getPadded<K>(srcDims[1]); // round up C
+    dstDims[2] = roundUp(srcDims[2], memory::dim(alignment)); // round up H
+    dstDims[3] = roundUp(srcDims[3], memory::dim(alignment)); // round up W
+    return dstDims;
+  }
+
+  template<int K>
+  std::shared_ptr<Node> Network<K>::addInputReorder(const Image& color,
+                                                    const Image& albedo,
+                                                    const Image& normal,
+                                                    const std::shared_ptr<TransferFunction>& transferFunc,
+                                                    int alignment,
+                                                    const std::shared_ptr<memory>& userDst)
+  {
+    assert(color);
+    int inputC = 3;
+    if (albedo) inputC += 3;
+    if (normal) inputC += 3;
+
+    memory::dims srcDims = {1, inputC, color.height, color.width};
+    memory::dims dstDims = getInputReorderDims(srcDims, alignment);
+
+    // Allocate padded memory
+    auto dst = userDst;
+    if (!dst)
+      dst = allocTensor(dstDims);
+
+    // Push node
+    std::shared_ptr<Node> node;
+
+    if (auto tf = std::dynamic_pointer_cast<LinearTransferFunction>(transferFunc))
+      node = std::make_shared<InputReorderNode<K, LinearTransferFunction>>(color, albedo, normal, dst, tf);
+    else if (auto tf = std::dynamic_pointer_cast<GammaTransferFunction>(transferFunc))
+      node = std::make_shared<InputReorderNode<K, GammaTransferFunction>>(color, albedo, normal, dst, tf);
+    else if (auto tf = std::dynamic_pointer_cast<LogTransferFunction>(transferFunc))
+      node = std::make_shared<InputReorderNode<K, LogTransferFunction>>(color, albedo, normal, dst, tf);
+    else if (auto tf = std::dynamic_pointer_cast<PQXTransferFunction>(transferFunc))
+      node = std::make_shared<InputReorderNode<K, PQXTransferFunction>>(color, albedo, normal, dst, tf);
+    else
+      assert(0);
+
+    nodes.push_back(node);
+    return node;
+  }
+
+  template<int K>
+  std::shared_ptr<Node> Network<K>::addOutputReorder(const std::shared_ptr<memory>& src,
+                                                     const std::shared_ptr<TransferFunction>& transferFunc,
+                                                     const Image& output)
+  {
+    memory::dims srcDims = getTensorDims(src);
+    assert(srcDims[1] == K);
+
+    // Push node
+    std::shared_ptr<Node> node;
+
+    if (auto tf = std::dynamic_pointer_cast<LinearTransferFunction>(transferFunc))
+      node = std::make_shared<OutputReorderNode<K, LinearTransferFunction>>(src, output, tf);
+    else if (auto tf = std::dynamic_pointer_cast<GammaTransferFunction>(transferFunc))
+      node = std::make_shared<OutputReorderNode<K, GammaTransferFunction>>(src, output, tf);
+    else if (auto tf = std::dynamic_pointer_cast<LogTransferFunction>(transferFunc))
+      node = std::make_shared<OutputReorderNode<K, LogTransferFunction>>(src, output, tf);
+    else if (auto tf = std::dynamic_pointer_cast<PQXTransferFunction>(transferFunc))
+      node = std::make_shared<OutputReorderNode<K, PQXTransferFunction>>(src, output, tf);
+    else
+      assert(0);
+
+    nodes.push_back(node);
+    return node;
+  }
+
+  template<int K>
+  memory::dims Network<K>::getConvDims(const std::string& name, const memory::dims& srcDims)
+  {
+    auto b = weightMap[name + "/b"];
+    memory::dims dstDims = srcDims;
+    dstDims[1] = getPadded<K>(b.dims[0]); // dstDims[C] = getPadded(OC)
+    return dstDims;
+  }
+
+  template<int K>
+  std::shared_ptr<Node> Network<K>::addConv(const std::string& name,
+                                            const std::shared_ptr<memory>& src,
+                                            const std::shared_ptr<memory>& userDst,
+                                            bool relu)
+  {
+    const memory::dims strides = {1, 1};
+    const memory::dims padding = {1, 1};
+
+    memory::dims srcDims = getTensorDims(src);
+
+    // Get the weights
+    const auto& W = weightMap[name + "/W"];
+    if (W.ndims() != 4 || W.format != "oihw")
+      throw Exception(Error::InvalidOperation, "invalid convolution weights");
+    memory::dims weightsDims = W.dims;
+    auto userWeights = allocTensor(weightsDims, memory::format_tag::oihw, W.data);
+
+    // Pad the weights
+    memory::dims weightsPadDims = weightsDims;
+    weightsPadDims[1] = getPadded<K>(weightsDims[1]); // IC
+    weightsPadDims[0] = getPadded<K>(weightsDims[0]); // OC
+    assert(srcDims[1] == weightsPadDims[1]); // srcDims[C] == weightsPadDims[IC]
+    auto weightsPad = allocTensor(weightsPadDims, memory::format_tag::oihw);
+    WeightsReorderNode<K>(userWeights, weightsPad).execute(sm);
+
+    // Get the biases
+    const auto& b = weightMap[name + "/b"];
+    if (b.ndims() != 1)
+      throw Exception(Error::InvalidOperation, "invalid convolution biases");
+    memory::dims biasDims = b.dims;
+
+    // Copy/pad the biases
+    memory::dims biasPadDims = {getPadded<K>(biasDims[0])};
+    auto bias = allocTensor(biasPadDims);
+    if (biasDims[0] != biasPadDims[0])
+      memset(bias->get_data_handle(), 0, biasPadDims[0]*sizeof(float));
+    memcpy(bias->get_data_handle(), b.data, biasDims[0]*sizeof(float));
+
+    // Allocate memory for destination
+    memory::dims dstDims = srcDims;
+    dstDims[1] = weightsPadDims[0]; // dstDims[C] = weightsPadDims[OC]
+
+    std::shared_ptr<memory> dst;
+    if (!userDst)
+      dst = allocTensor(dstDims);
+    else if (getTensorDims(userDst) == dstDims)
+      dst = userDst;
+    else
+      dst = castTensor(dstDims, userDst);
+
+    // Create a convolution
+    // Let the convolution primitive choose the weights format
+    auto weightsDesc = memory::desc({ weightsPadDims }, memory::data_type::f32, memory::format_tag::any);
+
+    auto convAlgo = (K == 16) ? convolution_winograd : convolution_direct;
+    auto convDesc = convolution_forward::desc(
+      prop_kind::forward_inference, convAlgo,
+      src->get_desc(),
+      weightsDesc,
+      bias->get_desc(),
+      dst->get_desc(),
+      strides, padding, padding, padding_kind::zero);
+
+    // Incorporate relu
+    mkldnn::primitive_attr convAttr;
+    if (relu)
+    {
+      mkldnn::post_ops ops;
+      ops.append_eltwise(
+        1.f,   // scale factor, not used
+        algorithm::eltwise_relu,
+        0.f,   // max with
+        0.f    // unused
+      );
+      convAttr.set_post_ops(ops);
+    }
+    convAttr.set_scratchpad_mode(scratchpad_mode_user);
+
+    auto convPrimDesc = convolution_forward::primitive_desc(convDesc, convAttr, eng);
+
+    // Reorder the weights to the final format, if necessary
+    auto weights = weightsPad;
+    if (convPrimDesc.weights_desc() != weightsPad->get_desc())
+    {
+      weights = std::make_shared<memory>(convPrimDesc.weights_desc(), eng);
+      ReorderNode(weightsPad, weights).execute(sm);
+    }
+
+    // Create convolution node and add it to the net
+    auto node = std::make_shared<ConvNode>(convPrimDesc, src, weights, bias, dst);
+    nodes.push_back(node);
+    return node;
+  }
+
+  template<int K>
+  memory::dims Network<K>::getPoolDims(const memory::dims& srcDims)
+  {
+    memory::dims dstDims = srcDims;
+    dstDims[2] /= 2; // H/2
+    dstDims[3] /= 2; // W/2
+    return dstDims;
+  }
+
+  template<int K>
+  std::shared_ptr<Node> Network<K>::addPool(const std::shared_ptr<memory>& src,
+                                            const std::shared_ptr<memory>& userDst)
+  {
+    const memory::dims kernel  = {2, 2};
+    const memory::dims strides = {2, 2};
+    const memory::dims padding = {0, 0};
+
+    memory::dims srcDims = getTensorDims(src);
+    memory::dims dstDims = getPoolDims(srcDims);
+
+    std::shared_ptr<memory> dst;
+    if (!userDst)
+      dst = allocTensor(dstDims);
+    else if (getTensorDims(userDst) == dstDims)
+      dst = userDst;
+    else
+      dst = castTensor(dstDims, userDst);
+
+    auto poolDesc = pooling_forward::desc(
+      prop_kind::forward_inference, pooling_max,
+      src->get_desc(),
+      dst->get_desc(),
+      strides, kernel, padding, padding, padding_kind::zero);
+
+    mkldnn::primitive_attr poolAttr;
+    poolAttr.set_scratchpad_mode(scratchpad_mode_user);
+
+    auto poolPrimDesc = pooling_forward::primitive_desc(poolDesc, poolAttr, eng);
+
+    auto node = std::make_shared<PoolNode>(poolPrimDesc, src, dst);
+    nodes.push_back(node);
+    return node;
+  }
+
+  template<int K>
+  memory::dims Network<K>::getUpsampleDims(const memory::dims& srcDims)
+  {
+    memory::dims dstDims = srcDims;
+    dstDims[2] *= 2; // H*2
+    dstDims[3] *= 2; // W*2
+    return dstDims;
+  }
+
+  template<int K>
+  std::shared_ptr<Node> Network<K>::addUpsample(const std::shared_ptr<memory>& src,
+                                                const std::shared_ptr<memory>& userDst)
+  {
+    memory::dims srcDims = getTensorDims(src);
+    memory::dims dstDims = getUpsampleDims(srcDims);
+
+    std::shared_ptr<memory> dst;
+    if (!userDst)
+      dst = allocTensor(dstDims);
+    else if (getTensorDims(userDst) == dstDims)
+      dst = userDst;
+    else
+      dst = castTensor(dstDims, userDst);
+
+    // Create upsampling node and add it to net
+    auto node = std::make_shared<UpsampleNode<K>>(src, dst);
+    nodes.push_back(node);
+    return node;
+  }
+
+  template<int K>
+  memory::dims Network<K>::getConcatDims(const memory::dims& src1Dims, const memory::dims& src2Dims)
+  {
+    assert(src1Dims[0] == src2Dims[0]); // N
+    assert(src1Dims[2] == src2Dims[2]); // H
+    assert(src1Dims[3] == src2Dims[3]); // W
+
+    memory::dims dstDims = src1Dims;
+    dstDims[1] += src2Dims[1]; // C
+    return dstDims;
+  }
+
+  template<int K>
+  std::shared_ptr<Node> Network<K>::addAutoexposure(const Image& color,
+                                                    const std::shared_ptr<HDRTransferFunction>& transferFunc)
+  {
+    auto node = std::make_shared<AutoexposureNode>(color, transferFunc);
+    nodes.push_back(node);
+    return node;
+  }
+
+  template <int K>
+  void Network<K>::finalize()
+  {
+    // Compute the size of the scratchpad
+    size_t scratchpadSize = 0;
+    for (const auto& node : nodes)
+      scratchpadSize = max(scratchpadSize, node->getScratchpadSize());
+
+    // Allocate the scratchpad
+    memory::dims scratchpadDims = { memory::dim(scratchpadSize) };
+    memory::desc scratchpadDesc(scratchpadDims, memory::data_type::u8, memory::format_tag::x);
+    auto scratchpad = std::make_shared<memory>(scratchpadDesc, eng);
+    activationAllocBytes += scratchpadSize;
+    totalAllocBytes += scratchpadSize;
+
+    // Set the scratchpad for the nodes
+    for (auto& node : nodes)
+      node->setScratchpad(scratchpad);
+
+    // Free the weights
+    weightMap.clear();
+
+    // Print statistics
+    if (device->isVerbose(2))
+    {
+      std::cout << "Activation bytes: " << activationAllocBytes << std::endl;
+      std::cout << "Scratchpad bytes: " << scratchpadSize << std::endl;
+      std::cout << "Total bytes     : " << totalAllocBytes << std::endl;
+    }
+  }
+
+  template class Network<8>;
+  template class Network<16>;
+
+} // namespace oidn