This example demonstrates the best practices for application performance optimizations with oneDNN.
#include <iostream>
#include <stdexcept>
#include <vector>
#include "example_utils.hpp"
void init_data(
memory &m,
float v) {
std::vector<float> data(size, v);
write_to_dnnl_memory(data.data(), m);
}
}
return attr;
}
conv_dst_md, strides, padding, padding);
conv.execute(s,
create_and_execute_relu(user_dst, eng, s);
}
conv_dst_md, strides, padding, padding);
if (conv_pd.src_desc() != user_src.
get_desc()) {
conv_src =
memory(conv_pd.src_desc(), eng);
}
if (conv_pd.weights_desc() != user_wei.
get_desc()) {
conv_wei =
memory(conv_pd.weights_desc(), eng);
}
if (conv_pd.dst_desc() != user_dst.
get_desc())
conv_dst =
memory(conv_pd.dst_desc(), eng);
conv.execute(s,
create_and_execute_relu(conv_dst, eng, s);
if (conv_pd.dst_desc() != user_dst.
get_desc()) {
}
}
conv_dst_md, strides, padding, padding);
auto attr = create_attr_with_relu_post_op();
if (conv_pd.src_desc() != user_src.
get_desc()) {
conv_src =
memory(conv_pd.src_desc(), eng);
}
if (conv_pd.weights_desc() != user_wei.
get_desc()) {
conv_wei =
memory(conv_pd.weights_desc(), eng);
}
if (conv_pd.dst_desc() != user_dst.
get_desc())
conv_dst =
memory(conv_pd.dst_desc(), eng);
conv.execute(s,
if (conv_pd.dst_desc() != user_dst.
get_desc()) {
}
}
void performance_profiling(
engine::kind engine_kind,
int argc,
char **argv) {
eng);
eng);
eng);
init_data(user_src, 1);
init_data(user_dst, -1);
init_data(user_wei, .5);
std::string implementation;
if (argc <= 2)
implementation = "validation";
else if (argc == 3)
implementation = argv[2];
if (!(implementation == "validation" || implementation == "naive"
|| implementation == "blocked" || implementation == "fused")) {
std::cout << "The implementation can be one of:\n";
std::cout << " - naive: NCHW format without fusion\n";
std::cout << " - blocked: format propagation without fusion\n";
std::cout << " - fused: format propagation with fusion\n";
std::cout << " - validation: runs all implementations\n\n";
std::cout << "Validation will run if no parameters are specified.\n\n";
throw std::invalid_argument("Incorrect input arguments.");
}
if (implementation == "naive" || implementation == "validation") {
std::cout << "Implementation: naive.\n";
conv_relu_naive(user_src, user_wei, user_dst, eng, s);
std::cout << "Conv + ReLU w/ nchw format completed.\n";
}
if (implementation == "blocked" || implementation == "validation") {
std::cout << "Implementation: blocked.\n";
conv_relu_blocked(user_src, user_wei, user_dst, eng, s);
std::cout << "Conv + ReLU w/ blocked format completed.\n";
}
if (implementation == "fused" || implementation == "validation") {
std::cout << "Implementation: fused.\n";
conv_relu_fused(user_src, user_wei, user_dst, eng, s);
std::cout << "Conv + ReLU w/ fusing completed.\n";
}
}
int main(int argc, char **argv) {
engine::kind engine_kind = parse_engine_kind(argc, argv, 1);
return handle_example_errors(
performance_profiling, engine_kind, argc, argv);
}
@ convolution_direct
Direct convolution.
Definition dnnl.hpp:482
@ eltwise_relu
Elementwise: rectified linear unit (ReLU)
Definition dnnl.hpp:490
@ forward_inference
Forward data propagation (inference mode).
Definition dnnl.hpp:449
@ dnnl_format_kind_any
Unspecified format kind.
Definition dnnl_types.h:85
#define DNNL_ARG_DST
A special mnemonic for destination argument for primitives that have a single destination.
Definition dnnl_types.h:1806
#define DNNL_ARG_SRC
A special mnemonic for source argument for primitives that have a single source.
Definition dnnl_types.h:1782
#define DNNL_ARG_WEIGHTS
A special mnemonic for primitives that have a single weights argument.
Definition dnnl_types.h:1829
oneDNN namespace
Definition dnnl.hpp:81
Descriptor for a convolution forward propagation primitive.
Definition dnnl.hpp:3542
Primitive descriptor for a convolution forward propagation primitive.
Definition dnnl.hpp:3746
Convolution forward propagation primitive.
Definition dnnl.hpp:3540
Descriptor for an elementwise forward propagation primitive.
Definition dnnl.hpp:5488
Primitive descriptor for an elementwise forward propagation primitive.
Definition dnnl.hpp:5522
Elementwise unary operation forward propagation primitive.
Definition dnnl.hpp:5486
An execution engine.
Definition dnnl.hpp:844
kind
Kinds of engines.
Definition dnnl.hpp:849
A memory descriptor.
Definition dnnl.hpp:1729
size_t get_size() const
Returns size of the memory descriptor in bytes.
Definition dnnl.hpp:1944
Memory object.
Definition dnnl.hpp:1188
dnnl_dim_t dim
Integer type for representing dimension sizes and indices.
Definition dnnl.hpp:1190
@ oihw
4D CNN weights tensor; an alias for dnnl::memory::format_tag::abcd
Definition dnnl.hpp:1382
@ nchw
4D CNN activations tensor; an alias for dnnl::memory::format_tag::abcd
Definition dnnl.hpp:1359
@ f32
32-bit/single-precision floating point.
Definition dnnl.hpp:1216
desc get_desc() const
Returns the associated memory descriptor.
Definition dnnl.hpp:2010
std::vector< dim > dims
Vector of dimensions.
Definition dnnl.hpp:1193
Post-ops.
Definition dnnl.hpp:2205
void append_eltwise(float scale, algorithm algorithm, float alpha, float beta)
Appends an elementwise post-op.
Definition dnnl.hpp:2280
Primitive attributes.
Definition dnnl.hpp:2481
void set_post_ops(const post_ops ops)
Sets post-ops.
Definition dnnl.hpp:2711
Primitive descriptor for a reorder primitive.
Definition dnnl.hpp:3120
Reorder primitive.
Definition dnnl.hpp:3118
void execute(const stream &stream, memory &src, memory &dst) const
Executes the reorder primitive.
Definition dnnl.hpp:3227
An execution stream.
Definition dnnl.hpp:1047