alibaba
diff --git a/‎ODLA/external/tensorrt-7.0.0 b/‎ODLA/external/tensorrt-7.0.0
diff --git a/‎ODLA/include/ODLA/ops/odla_ops_nn.h
+17 b/‎ODLA/include/ODLA/ops/odla_ops_nn.h
+17
diff --git a/‎ODLA/platforms/odla_popart/CMakeLists.txt
+8-2 b/‎ODLA/platforms/odla_popart/CMakeLists.txt
+8-2
diff --git a/‎ODLA/platforms/odla_popart/config.json
+21 b/‎ODLA/platforms/odla_popart/config.json
+21
diff --git a/‎ODLA/platforms/odla_popart/custom_ops/CMakeLists.txt
+3-1 b/‎ODLA/platforms/odla_popart/custom_ops/CMakeLists.txt
+3-1
diff --git a/‎ODLA/platforms/odla_popart/custom_ops/Makefile
+13-9 b/‎ODLA/platforms/odla_popart/custom_ops/Makefile
+13-9
diff --git a/‎ODLA/platforms/odla_popart/custom_ops/attention_mask.cc
+144 b/‎ODLA/platforms/odla_popart/custom_ops/attention_mask.cc
+144
@@ -530,6 +530,23 @@ extern ODLA_API_EXPORT odla_values ODLA_API_CALL odla_TopK(
     odla_uint32 axis, odla_value_type output_value_type,
     odla_value_type output_value_index_type, const odla_value_ids value_ids);
 
+//! \brief Yolov3 Post Process
+/*!
+  PostProcess Return Selected Info (cx, cy, w, h, pred_cls) of Each Class
+
+  \param orig_img_w the width of original image
+  \param orig_img_h the height of original image
+  \param bb13 BBoxes 13 x 13
+  \param bb26 BBoxes 26 x 26
+  \param bb52 BBoxes 52 x 52
+  \param value_id a unique value id (can be NULL)
+
+  \return odla_values
+*/
+extern ODLA_API_EXPORT odla_values ODLA_API_CALL odla_PostProcess(
+    odla_value orig_img_w, odla_value orig_img_h, odla_value bb13,
+    odla_value bb26, odla_value bb52, const odla_value_id value_id);
+
 #ifdef __cplusplus
 } // C extern
 #endif
 
@@ -20,8 +20,9 @@ option(ODLA_BUILD_POPART_CUSTOM_OPS "Link with Popart custom ops" ON)
 add_odla_library(odla_popart SHARED common.cc odla_compute.cc
                                odla_ops_math.cc odla_ops_nn.cc
                                odla_ops_process.cc odla_ops.cc
-                               odla_ops_custom.cc
-                )
+                               odla_ops_custom.cc odla_pipeline.cc
+                               odla_popart.cc popart_config.cc
+                               )
 
 if (NOT POPLAR_ROOT)
   set(POPLAR_ROOT "/opt/poplar_sdk/poplar" CACHE PATH "Path of poplar root")
@@ -51,3 +52,8 @@ if (NOT ODLA_BUILD_POPART_USE_CXX11ABI)
 endif()
 
 target_link_libraries(odla_popart PUBLIC ODLA custom_ops popart-only)
+
+target_include_directories(odla_popart PRIVATE
+   ${CMAKE_CURRENT_SOURCE_DIR}/custom_ops/third_party/onnx/
+   ${CMAKE_CURRENT_SOURCE_DIR}/custom_ops/third_party/include/
+)
@@ -0,0 +1,21 @@
+{
+    "version":"1.0.0",
+    "amp":0.445,
+    "batch_per_step":10,
+    "execution_mode":"pipeline",
+    "ipu_num":2,
+    "load_onnx":false,
+    "load_onnx_path":"path",
+    "pipeline":{
+        "^embedding_"   : [0, 0],
+        "^layer[0-9]_"  : [0, 0],
+        "^layer1[0-1]_" : [0, 0],
+        "^layer1[2-9]_" : [1, 1],
+        "^layer2[0-3]_" : [1, 1],
+        "^squad_"       : [1, 1]
+    },
+    "queue_type":"LockFreeQueue",
+    "queue_capacity":1048576,
+    "save_model" : true,
+    "save_model_path":"pipeline_test.onnx"
+}
@@ -17,6 +17,8 @@
 add_odla_library(custom_ops SHARED
   erf.cc
   rsqrt.cc
+  postprocess.cc
+  attention_mask.cc
 )
 
 set_property(TARGET custom_ops PROPERTY CXX_STANDARD 14)
@@ -31,4 +33,4 @@ target_link_libraries(custom_ops PRIVATE popart-only)
 target_include_directories(custom_ops PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/third_party/onnx/
    ${CMAKE_CURRENT_SOURCE_DIR}/third_party/include/
-)
+)
@@ -1,27 +1,31 @@
 CXX ?= g++
-CXXFLAGS = -std=c++14 -fPIC -g -DONNX_NAMESPACE=onnx -D_GLIBCXX_USE_CXX11_ABI=0
+CXXFLAGS = -std=c++14 -fPIC -g -DONNX_NAMESPACE=onnx
 LDLIBS = -shared -lpopart -lpoplar -lpopops -lpoputil
-INCLUDES = -Iinclude
+INCLUDES = -Iinclude -Ithird_party/onnx/ -Ithird_party/include
 
 BUILD_DIR = build
-SOURCES = rsqrt.cc erf.cc
+SOURCES = rsqrt.cc erf.cc postprocess.cc attention_mask.cc
 TARGET = $(BUILD_DIR)/libcustom_ops.so
 
-all: create_build_dir rsqrt_custom_op rsqrt_test erf_test
+all: create_build_dir rsqrt_custom_op rsqrt_test attention_mask_test
 
 .PHONY: create_build_dir
 create_build_dir:
 	mkdir -p $(BUILD_DIR)
 
-rsqrt_custom_op: rsqrt.cc erf.cc
+rsqrt_custom_op: ${SOURCES}
 	$(CXX) $(SOURCES)  $(LDLIBS) $(CXXFLAGS) $(INCLUDES) -o $(TARGET)
 
 rsqrt_test: rsqrt_test.cc rsqrt_custom_op
-	$(CXX) -std=c++14 rsqrt_test.cc -lpopart -lpoplar -lpopops -ldl -DONNX_NAMESPACE=onnx -o rsqrt_test -D_GLIBCXX_USE_CXX11_ABI=0
+	$(CXX) -std=c++14 rsqrt_test.cc -lpopart -lpoplar -lpopops -ldl -DONNX_NAMESPACE=onnx -o rsqrt_test
 
-erf_test: erf_test.cc rsqrt_custom_op
-	$(CXX) -std=c++14 erf_test.cc -lpopart -lpoplar -lpopops -ldl -DONNX_NAMESPACE=onnx -o erf_test -D_GLIBCXX_USE_CXX11_ABI=0
+#erf_test: erf_test.cc rsqrt_custom_op
+#	$(CXX) -std=c++14 erf_test.cc -lpopart -lpoplar -lpopops -ldl -DONNX_NAMESPACE=onnx -o erf_test
+
+attention_mask_test: attention_mask_test.cc rsqrt_custom_op
+#	$(CXX) $(LDLIBS) $(CXXFLAGS) $(INCLUDES) -o attention_mask_test
+	$(CXX) -std=c++14 -fPIC -g -DONNX_NAMESPACE=onnx attention_mask_test.cc -lpopart -lpoplar -lpopops -ldl -o attention_mask_test
 
 .PHONY: clean
 clean:
-	rm -r  $(BUILD_DIR) rsqrt_test erf_test
+	rm -r  $(BUILD_DIR) rsqrt_test attention_mask_test
@@ -0,0 +1,144 @@
+// Copyright (c) 2019 Graphcore Ltd. All rights reserved.
+
+#include <iostream>
+#include <popart/names.hpp>
+#include <popart/op.hpp>
+#include <popart/opmanager.hpp>
+#include <popart/popx/devicex.hpp>
+#include <popart/popx/opx.hpp>
+#include <popart/popx/opxmanager.hpp>
+#include <popart/region.hpp>
+#include <popart/shapeinference.hpp>
+#include <popops/Cast.hpp>
+#include <popops/ElementWise.hpp>
+#include <popops/Rearrange.hpp>
+#include <poputil/TileMapping.hpp>
+#include <random>
+
+using namespace popart;
+using namespace popart::popx;
+using namespace popops::expr;
+
+namespace CustomOperators {
+const popart::OperatorIdentifier AttentionMask = {"ai.graphcore",
+                                                  "AttentionMask", 1};
+} // namespace CustomOperators
+
+// An InplaceIdentityOp that doesn't return any grad ops. This allows you to
+// disconnect the flow of gradients when creating the backwards pass
+class AttentionMaskOp : public popart::Op {
+ public:
+  poplar::Type dataType;
+
+  AttentionMaskOp(const popart::OperatorIdentifier& _opid,
+                  const Op::Settings& settings_, poplar::Type& dataTypeIn)
+      : Op(_opid, settings_), dataType(dataTypeIn) {}
+
+  void setup() final {
+    // input shape [B, S]
+    Shape inShape = inInfo(0).shape();
+    Shape refShape = inInfo(1).shape();
+
+    // output shape [B, 1, S, S]
+    Shape outShape = {inShape.at(0), 1, inShape.at(1), inShape.at(1)};
+
+    if (dataType == poplar::HALF)
+      outInfo(0) = {"FLOAT16", outShape};
+    else
+      outInfo(0) = {"FLOAT", outShape};
+  }
+
+  std::unique_ptr<Op> clone() const final {
+    return std::make_unique<AttentionMaskOp>(*this);
+  }
+
+  float getSubgraphValue() const final { return getLowSubgraphValue(); }
+};
+
+static popart::OpDefinition attentionMaskOpDef({});
+
+static popart::OpCreator<AttentionMaskOp> attentionMaskOpCreator(
+    popart::OpDefinitions({{CustomOperators::AttentionMask,
+                            attentionMaskOpDef}}),
+    [](const popart::OpCreatorInfo& oci) -> std::unique_ptr<popart::Op> {
+      std::string type =
+          oci.attributes.getAttribute<Attributes::String>("dataType");
+      poplar::Type dataType = (type == "FLOAT") ? poplar::FLOAT : poplar::HALF;
+
+      return std::unique_ptr<AttentionMaskOp>(
+          new AttentionMaskOp(oci.opid, oci.settings, dataType));
+    },
+    true);
+
+class AttentionMaskOpX : public popart::popx::Opx {
+ public:
+  AttentionMaskOpX(popart::Op* op, popart::popx::Devicex* devicex)
+      : popart::popx::Opx(op, devicex) {
+    verifyOp<AttentionMaskOp>(op, CustomOperators::AttentionMask);
+  }
+
+  popart::popx::InputCreatorType getInputCreatorType(popart::InIndex) const {
+    return popart::popx::InputCreatorType::CanUnwind;
+  }
+
+  poplar::Tensor unwindTensorLayout(poplar::Tensor tensor, popart::InIndex,
+                                    popart::OutIndex) const {
+    return tensor;
+  }
+
+  popart::view::RegMap unwindRegion(popart::InIndex, popart::OutIndex) const {
+    return [this](const popart::view::Region& r) {
+      return popart::view::Regions(1, r);
+    };
+  }
+
+  void grow(poplar::program::Sequence& prog) const final {
+    AttentionMaskOp& myOp = getOp<AttentionMaskOp>();
+
+    poplar::Type dataType = myOp.dataType;
+    poplar::Graph& graph = Opx::graph();
+    // input tensor shape [B, S]
+    poplar::Tensor seqIndex = getInTensor(0);
+    std::size_t batchSize = seqIndex.dim(0);
+    std::size_t seqLength = seqIndex.dim(1);
+    seqIndex = seqIndex.reshape({batchSize, seqLength, 1});
+    seqIndex = popops::cast(graph, seqIndex, dataType, prog, "input_mask_f");
+    poplar::Tensor attentionMatrix = getInTensor(1);
+
+    const auto dimOrdering =
+        poputil::detectDimGroupings(graph, attentionMatrix);
+    bool swapOrder = !dimOrdering.empty() && dimOrdering.front().first == 2;
+    auto seqMask =
+        swapOrder ? popops::sub(graph, seqIndex.dimShuffle({0, 2, 1}), seqIndex,
+                                prog, "maskVal")
+                        .dimShuffle({0, 2, 1})
+                  : popops::sub(graph, seqIndex, seqIndex.dimShuffle({0, 2, 1}),
+                                prog, "maskVal");
+    popops::absInPlace(graph, seqMask, prog);
+    popops::tanhInPlace(graph, seqMask, prog);
+
+    // Create constant tensor;
+    std::mt19937 randomEngine;
+    unsigned totalTile = graph.getTarget().getTilesPerIPU();
+    std::uniform_int_distribution<> distrib(0, totalTile - 1);
+    int tileForConst = distrib(randomEngine);
+    poplar::Tensor minValue = graph.addConstant(dataType, {}, -10000.0);
+    graph.setTileMapping(minValue, tileForConst);
+
+    // Create log mask
+    popops::mulInPlace(graph, seqMask, minValue, prog);
+    seqMask = seqMask.reshape({batchSize, 1, seqLength, seqLength});
+    setOutTensor(0, seqMask);
+  }
+};
+
+static popart::popx::OpxCreator<AttentionMaskOpX> attentionMaskOpxCreator(
+    CustomOperators::AttentionMask);
+
+static popart::RegisterShapeInferenceFunction AttentionMaskShapeInfer(
+    CustomOperators::AttentionMask, [](ShapeInferenceContext& ctx) {
+      auto B = ctx.inInfo(1).shape().at(0);
+      auto S = ctx.inInfo(1).shape().at(3);
+      auto dtype = ctx.inInfo(1).data_type();
+      ctx.outInfo(0) = {dtype, Shape({B, 1, S, S})};
+    });