This C++ API example demonstrates how to run AlexNet's conv3 and relu3 with int8 data type.
#include <stdexcept>
#include "example_utils.hpp"
auto eng =
engine(engine_kind, 0);
const int batch = 8;
const std::vector<float> src_scales = {1.8f};
const std::vector<float> weight_scales = {2.0f};
const std::vector<float> bias_scales = {1.0f};
const std::vector<float> dst_scales = {0.55f};
std::vector<float> conv_scales(384);
const int scales_half = 384 / 2;
std::fill(conv_scales.begin(), conv_scales.begin() + scales_half, 0.3f);
std::fill(conv_scales.begin() + scales_half + 1, conv_scales.end(), 0.8f);
const int src_mask = 0;
const int weight_mask = 0;
const int bias_mask = 0;
const int dst_mask = 0;
const int conv_mask = 2;
std::vector<float> user_src(batch * 256 * 13 * 13);
std::vector<float> user_dst(batch * 384 * 13 * 13);
std::vector<float> conv_weights(product(conv_weights_tz));
std::vector<float> conv_bias(product(conv_bias_tz));
auto user_src_memory =
memory({{conv_src_tz}, dt::f32, tag::nchw}, eng);
write_to_dnnl_memory(user_src.data(), user_src_memory);
auto user_weights_memory
=
memory({{conv_weights_tz}, dt::f32, tag::oihw}, eng);
write_to_dnnl_memory(conv_weights.data(), user_weights_memory);
auto user_bias_memory =
memory({{conv_bias_tz}, dt::f32, tag::x}, eng);
write_to_dnnl_memory(conv_bias.data(), user_bias_memory);
auto conv_src_md =
memory::desc({conv_src_tz}, dt::u8, tag::any);
auto conv_bias_md =
memory::desc({conv_bias_tz}, dt::s8, tag::any);
auto conv_weights_md =
memory::desc({conv_weights_tz}, dt::s8, tag::any);
auto conv_dst_md =
memory::desc({conv_dst_tz}, dt::u8, tag::any);
conv_bias_md, conv_dst_md, conv_strides, conv_padding,
conv_padding);
const float ops_scale = 1.f;
const float ops_alpha = 0.f;
const float ops_beta = 0.f;
try {
throw example_allows_unimplemented {
"oneDNN does not have int8 convolution implementation "
"that supports this system.\n"
"Please refer to the developer guide for details."};
throw;
}
auto conv_prim_desc
auto conv_src_memory =
memory(conv_prim_desc.src_desc(), eng);
auto src_reorder_pd
conv_src_memory.get_desc(), src_attr);
auto src_reorder =
reorder(src_reorder_pd);
src_reorder.execute(s, user_src_memory, conv_src_memory);
auto conv_weights_memory =
memory(conv_prim_desc.weights_desc(), eng);
auto weight_reorder_pd
conv_weights_memory.get_desc(), weight_attr);
auto weight_reorder =
reorder(weight_reorder_pd);
weight_reorder.execute(s, user_weights_memory, conv_weights_memory);
auto conv_bias_memory =
memory(conv_prim_desc.bias_desc(), eng);
auto bias_reorder_pd
conv_bias_memory.get_desc(), bias_attr);
auto bias_reorder =
reorder(bias_reorder_pd);
bias_reorder.execute(s, user_bias_memory, conv_bias_memory);
auto conv_dst_memory =
memory(conv_prim_desc.dst_desc(), eng);
conv.execute(s,
auto user_dst_memory =
memory({{conv_dst_tz}, dt::f32, tag::nchw}, eng);
write_to_dnnl_memory(user_dst.data(), user_dst_memory);
auto dst_reorder_pd
user_dst_memory.get_desc(), dst_attr);
auto dst_reorder =
reorder(dst_reorder_pd);
dst_reorder.execute(s, conv_dst_memory, user_dst_memory);
}
int main(int argc, char **argv) {
return handle_example_errors(
simple_net_int8, parse_engine_kind(argc, argv));
}
@ convolution_direct
Direct convolution.
Definition dnnl.hpp:482
@ eltwise_relu
Elementwise: rectified linear unit (ReLU)
Definition dnnl.hpp:490
@ forward
Forward data propagation, alias for dnnl::prop_kind::forward_training.
Definition dnnl.hpp:455
#define DNNL_ARG_DST
A special mnemonic for destination argument for primitives that have a single destination.
Definition dnnl_types.h:1806
#define DNNL_ARG_SRC
A special mnemonic for source argument for primitives that have a single source.
Definition dnnl_types.h:1782
#define DNNL_ARG_BIAS
Bias tensor argument.
Definition dnnl_types.h:1856
#define DNNL_ARG_WEIGHTS
A special mnemonic for primitives that have a single weights argument.
Definition dnnl_types.h:1829
@ dnnl_unimplemented
The operation failed because requested functionality is not implemented.
Definition dnnl_types.h:47
oneDNN namespace
Definition dnnl.hpp:81
Descriptor for a convolution forward propagation primitive.
Definition dnnl.hpp:3542
Primitive descriptor for a convolution forward propagation primitive.
Definition dnnl.hpp:3746
Convolution forward propagation primitive.
Definition dnnl.hpp:3540
An execution engine.
Definition dnnl.hpp:844
kind
Kinds of engines.
Definition dnnl.hpp:849
oneDNN exception class.
Definition dnnl.hpp:91
A memory descriptor.
Definition dnnl.hpp:1729
Memory object.
Definition dnnl.hpp:1188
format_tag
Memory format tag specification.
Definition dnnl.hpp:1282
data_type
Data type specification.
Definition dnnl.hpp:1208
std::vector< dim > dims
Vector of dimensions.
Definition dnnl.hpp:1193
Post-ops.
Definition dnnl.hpp:2205
void append_eltwise(float scale, algorithm algorithm, float alpha, float beta)
Appends an elementwise post-op.
Definition dnnl.hpp:2280
Primitive attributes.
Definition dnnl.hpp:2481
void set_output_scales(int mask, const std::vector< float > &scales)
Sets output scaling factors correspondence mask and values.
Definition dnnl.hpp:2583
void set_post_ops(const post_ops ops)
Sets post-ops.
Definition dnnl.hpp:2711
Primitive descriptor for a reorder primitive.
Definition dnnl.hpp:3120
Reorder primitive.
Definition dnnl.hpp:3118
An execution stream.
Definition dnnl.hpp:1047
stream & wait()
Waits for all primitives executing in the stream to finish.
Definition dnnl.hpp:1107