Deep Neural Network Library (DNNL)
1.90.1
Performance library for Deep Learning
Annotated version: Reorder between CPU and GPU engines
#include <stdio.h>
#include <stdlib.h>
#include "example_utils.h"
size_t product(
int n_dims,
const dnnl_dim_t dims[]) {
size_t n_elems = 1;
for (int d = 0; d < n_dims; ++d) {
n_elems *= (size_t)dims[d];
}
return n_elems;
}
float *array;
const size_t n_elems = product(n_dims, dims);
for (size_t e = 0; e < n_elems; ++e) {
array[e] = e % 7 ? 1.0f : -1.0f;
}
}
int negs = 0;
float *array;
const size_t n_elems = product(n_dims, dims);
for (size_t e = 0; e < n_elems; ++e) {
negs += array[e] < 0.0f;
}
return negs;
}
int doit() {
&m_cpu, &m_cpu_md, engine_cpu, DNNL_MEMORY_ALLOCATE));
&m_gpu, &m_gpu_md, engine_gpu, DNNL_MEMORY_ALLOCATE));
fill(m_cpu, 4, tz);
if (find_negative(m_cpu, 4, tz) == 0) {
printf("Please fix filling of data\n");
exit(2);
}
&r1_pd, &m_cpu_md, engine_cpu, &m_gpu_md, engine_gpu, NULL));
&relu_pd, &relu_d, NULL, engine_gpu, NULL));
&r2_pd, &m_gpu_md, engine_gpu, &m_cpu_md, engine_cpu, NULL));
dnnl_exec_arg_t r1_args[] = {{DNNL_ARG_FROM, m_cpu}, {DNNL_ARG_TO, m_gpu}};
= {{DNNL_ARG_SRC, m_gpu}, {DNNL_ARG_DST, m_gpu}};
dnnl_exec_arg_t r2_args[] = {{DNNL_ARG_FROM, m_gpu}, {DNNL_ARG_TO, m_cpu}};
if (find_negative(m_cpu, 4, tz) != 0) return 2;
return 0;
}
int main() {
int result = doit();
if (result)
printf("failed\n");
else
printf("passed\n");
return result;
}
32-bit/single-precision floating point.
Definition: dnnl_types.h:75
CPU engine.
Definition: dnnl_types.h:1324
dnnl_status_t DNNL_API dnnl_stream_destroy(dnnl_stream_t stream)
Destroys an execution stream.
An opaque structure to describe an engine.
dnnl_status_t DNNL_API dnnl_primitive_desc_create(dnnl_primitive_desc_t *primitive_desc, const_dnnl_op_desc_t op_desc, const_dnnl_primitive_attr_t attr, dnnl_engine_t engine, const_dnnl_primitive_desc_t hint_forward_primitive_desc)
Creates a primitive_desc using op_desc, attr, engine, and optionally a hint primitive descriptor from...
An auxiliary structure to specify primitive's inputs/outputs at execution.
Definition: dnnl_types.h:1519
dnnl_status_t DNNL_API dnnl_memory_destroy(dnnl_memory_t memory)
Deletes a memory.
An opaque structure to describe a primitive descriptor.
dnnl_status_t DNNL_API dnnl_primitive_desc_destroy(dnnl_primitive_desc_t primitive_desc)
Deletes a primitive_desc.
dnnl_status_t DNNL_API dnnl_memory_desc_init_by_tag(dnnl_memory_desc_t *memory_desc, int ndims, const dnnl_dims_t dims, dnnl_data_type_t data_type, dnnl_format_tag_t tag)
Initializes a memory_desc memory descriptor using ndims, dims, data_type, and format tag.
dnnl_status_t DNNL_API dnnl_engine_destroy(dnnl_engine_t engine)
Destroys an engine.
dnnl_status_t DNNL_API dnnl_memory_create(dnnl_memory_t *memory, const dnnl_memory_desc_t *memory_desc, dnnl_engine_t engine, void *handle)
Creates a memory for given memory_desc and engine.
dnnl_status_t DNNL_API dnnl_memory_map_data(const_dnnl_memory_t memory, void **mapped_ptr)
For a memory, maps the data of the memory to mapped_ptr.
dnnl_status_t DNNL_API dnnl_engine_create(dnnl_engine_t *engine, dnnl_engine_kind_t kind, size_t index)
Creates an engine of particular kind and index.
A descriptor of a element-wise operation.
Definition: dnnl_types.h:1016
GPU engine.
Definition: dnnl_types.h:1326
dnnl_status_t DNNL_API dnnl_stream_wait(dnnl_stream_t stream)
Waits for all primitives in the execution stream to finish.
4D CNN activations tensor, an alias to dnnl_abcd
Definition: dnnl_types.h:351
dnnl_status_t DNNL_API dnnl_reorder_primitive_desc_create(dnnl_primitive_desc_t *reorder_primitive_desc, const dnnl_memory_desc_t *src_md, dnnl_engine_t src_engine, const dnnl_memory_desc_t *dst_md, dnnl_engine_t dst_engine, const_dnnl_primitive_attr_t attr)
Initializes a reorder_primitive_desc using the description of the source (src_engine and src_md) and ...
dnnl_status_t DNNL_API dnnl_primitive_create(dnnl_primitive_t *primitive, const_dnnl_primitive_desc_t primitive_desc)
Creates a primitive using a primitive_desc descriptor.
Memory descriptor.
Definition: dnnl_types.h:883
Eltwise: ReLU.
Definition: dnnl_types.h:665
int64_t dnnl_dim_t
A type to describe tensor dimension.
Definition: dnnl_types.h:777
dnnl_status_t DNNL_API dnnl_eltwise_forward_desc_init(dnnl_eltwise_desc_t *eltwise_desc, dnnl_prop_kind_t prop_kind, dnnl_alg_kind_t alg_kind, const dnnl_memory_desc_t *data_desc, float alpha, float beta)
Initializes an eltwise_desc for forward propagation using prop_kind (possible values are dnnl_forward...
dnnl_status_t DNNL_API dnnl_primitive_destroy(dnnl_primitive_t primitive)
Deletes a primitive.
Default stream configuration.
Definition: dnnl_types.h:1626
dnnl_status_t DNNL_API dnnl_stream_create(dnnl_stream_t *stream, dnnl_engine_t engine, unsigned flags)
Creates an execution stream for engine and with flags.
dnnl_status_t DNNL_API dnnl_memory_unmap_data(const_dnnl_memory_t memory, void *mapped_ptr)
For a memory, unmaps a mapped pointer to the data of the memory.
Forward data propagation (alias for dnnl_forward_training).
Definition: dnnl_types.h:601
dnnl_status_t DNNL_API dnnl_primitive_execute(const_dnnl_primitive_t primitive, dnnl_stream_t stream, int nargs, const dnnl_exec_arg_t *args)
Executes a primitive using a stream, and nargs arguments args.