+2016-07-21 Ilya Verbin <ilya.verbin@intel.com>
+
+ * Makefile.am (myo_inc_dir): Remove.
+ (toolexeclib_LTLIBRARIES): Remove libmyo-client.la and
+ libmyo-service.la.
+ (liboffloadmic_cppflags): Remove -DMYO_SUPPORT.
+ (liboffloadmic_host_la_SOURCES): Remove offload_myo_host.cpp.
+ (liboffloadmic_target_la_SOURCES): Remove offload_myo_target.cpp.
+ (liboffloadmic_target_la_LIBADD): Remove libmyo-service.la.
+ (libmyo_client_la_SOURCES, libmyo_service_la_SOURCES): Remove.
+ (libmyo_client_la_DEPENDENCIES, libmyo_service_la_DEPENDENCIES): Remove.
+ (libmyo_client_la_CPPFLAGS, libmyo_service_la_CPPFLAGS): Remove.
+ (libmyo_client_la_LDFLAGS, libmyo_service_la_LDFLAGS): Remove.
+ * Makefile.in: Regenerate.
+ * doc/doxygen/header.tex: Merge from upstream, version 20160715
+ <https://openmprtl.org/sites/default/files/liboffload_oss_20160715.tgz>.
+ * runtime/cean_util.cpp: Likewise.
+ * runtime/cean_util.h: Likewise.
+ * runtime/coi/coi_client.cpp: Likewise.
+ * runtime/coi/coi_client.h: Likewise.
+ * runtime/coi/coi_server.cpp: Likewise.
+ * runtime/coi/coi_server.h: Likewise.
+ * runtime/compiler_if_host.cpp: Likewise.
+ * runtime/compiler_if_host.h: Likewise.
+ * runtime/compiler_if_target.cpp: Likewise.
+ * runtime/compiler_if_target.h: Likewise.
+ * runtime/dv_util.cpp: Likewise.
+ * runtime/dv_util.h: Likewise.
+ * runtime/liboffload_error.c: Likewise.
+ * runtime/liboffload_error_codes.h: Likewise.
+ * runtime/liboffload_msg.c: Likewise.
+ * runtime/liboffload_msg.h: Likewise.
+ * runtime/mic_lib.f90: Likewise.
+ * runtime/offload.h: Likewise.
+ * runtime/offload_common.cpp: Likewise.
+ * runtime/offload_common.h: Likewise.
+ * runtime/offload_engine.cpp: Likewise.
+ * runtime/offload_engine.h: Likewise.
+ * runtime/offload_env.cpp: Likewise.
+ * runtime/offload_env.h: Likewise.
+ * runtime/offload_host.cpp: Likewise.
+ * runtime/offload_host.h: Likewise.
+ * runtime/offload_iterator.h: Likewise.
+ * runtime/offload_myo_host.cpp: Likewise.
+ * runtime/offload_myo_host.h: Likewise.
+ * runtime/offload_myo_target.cpp: Likewise.
+ * runtime/offload_myo_target.h: Likewise.
+ * runtime/offload_omp_host.cpp: Likewise.
+ * runtime/offload_omp_target.cpp: Likewise.
+ * runtime/offload_orsl.cpp: Likewise.
+ * runtime/offload_orsl.h: Likewise.
+ * runtime/offload_table.cpp: Likewise.
+ * runtime/offload_table.h: Likewise.
+ * runtime/offload_target.cpp: Likewise.
+ * runtime/offload_target.h: Likewise.
+ * runtime/offload_target_main.cpp: Likewise.
+ * runtime/offload_timer.h: Likewise.
+ * runtime/offload_timer_host.cpp: Likewise.
+ * runtime/offload_timer_target.cpp: Likewise.
+ * runtime/offload_trace.cpp: Likewise.
+ * runtime/offload_trace.h: Likewise.
+ * runtime/offload_util.cpp: Likewise.
+ * runtime/offload_util.h: Likewise.
+ * runtime/ofldbegin.cpp: Likewise.
+ * runtime/ofldend.cpp: Likewise.
+ * runtime/orsl-lite/include/orsl-lite.h: Likewise.
+ * runtime/orsl-lite/lib/orsl-lite.c: Likewise.
+ * runtime/use_mpss2.txt: Remove.
+ * include/coi/common/COIEngine_common.h: Merge from upstream, MPSS
+ version 3.7.1
+ <http://registrationcenter-download.intel.com/akdlm/irc_nas/9226/
+ mpss-3.7.1-linux.tar>.
+ * include/coi/common/COIEvent_common.h: Likewise.
+ * include/coi/common/COIMacros_common.h: Likewise.
+ * include/coi/common/COIPerf_common.h: Likewise.
+ * include/coi/common/COIResult_common.h: Likewise.
+ * include/coi/common/COISysInfo_common.h: Likewise.
+ * include/coi/common/COITypes_common.h: Likewise.
+ * include/coi/sink/COIBuffer_sink.h: Likewise.
+ * include/coi/sink/COIPipeline_sink.h: Likewise.
+ * include/coi/sink/COIProcess_sink.h: Likewise.
+ * include/coi/source/COIBuffer_source.h: Likewise.
+ * include/coi/source/COIEngine_source.h: Likewise.
+ * include/coi/source/COIEvent_source.h: Likewise.
+ * include/coi/source/COIPipeline_source.h: Likewise.
+ * include/coi/source/COIProcess_source.h: Likewise.
+ * include/myo/myo.h: Remove.
+ * include/myo/myoimpl.h: Remove.
+ * include/myo/myotypes.h: Remove.
+ * plugin/Makefile.am (AM_LDFLAGS): Remove -lmyo-service.
+ * plugin/Makefile.in: Regenerate.
+ * plugin/libgomp-plugin-intelmic.cpp (LD_LIBRARY_PATH_ENV): Remove.
+ (MIC_LD_LIBRARY_PATH_ENV): Remove.
+ (init): Do not set MIC_LD_LIBRARY_PATH. Now liboffloadmic uses only
+ LD_LIBRARY_PATH.
+ * plugin/offload_target_main.cpp: Update copyright years.
+ * runtime/emulator/coi_common.h: Likewise.
+ * runtime/emulator/coi_device.cpp: Likewise.
+ * runtime/emulator/coi_device.h: Likewise.
+ * runtime/emulator/coi_host.cpp: Likewise.
+ (COIBufferCreate): Allow COI_BUFFER_OPENCL.
+ (COIEngineGetInfo): Return COI_DEVICE_KNL instead of COI_ISA_x86_64.
+ * runtime/emulator/coi_host.h: Update copyright years.
+ * runtime/emulator/coi_version_asm.h: Likewise.
+ * runtime/emulator/coi_version_linker_script.map: Likewise.
+ * runtime/emulator/myo_client.cpp: Remove.
+ * runtime/emulator/myo_service.cpp: Remove.
+ * runtime/emulator/myo_service.h: Remove.
+ * runtime/emulator/myo_version_asm.h: Remove.
+ * runtime/emulator/myo_version_linker_script.map: Remove.
+
2016-05-26 Chung-Lin Tang <cltang@codesourcery.com>
* plugin/libgomp-plugin-intelmic.cpp (offload): Change return type
# Directories.
build_dir = $(top_builddir)
coi_inc_dir = $(top_srcdir)/include/coi
-myo_inc_dir = $(top_srcdir)/include/myo
libgomp_dir = $(build_dir)/../libgomp
source_dir = $(top_srcdir)/runtime
# Target list.
if LIBOFFLOADMIC_HOST
nodist_toolexeclib_HEADERS = liboffloadmic_host.spec
- toolexeclib_LTLIBRARIES = libcoi_host.la libmyo-client.la liboffloadmic_host.la
+ toolexeclib_LTLIBRARIES = libcoi_host.la liboffloadmic_host.la
nodist_libsubinclude_HEADERS = runtime/compiler_if_host.h
ofld_obj =
else # LIBOFFLOADMIC_TARGET
nodist_toolexeclib_HEADERS = liboffloadmic_target.spec
- toolexeclib_LTLIBRARIES = libcoi_device.la libmyo-service.la liboffloadmic_target.la
+ toolexeclib_LTLIBRARIES = libcoi_device.la liboffloadmic_target.la
nodist_libsubinclude_HEADERS = runtime/compiler_if_target.h
ofld_obj = ofldbegin.o ofldend.o
endif
runtime/offload_trace.cpp \
runtime/offload_util.cpp
-liboffloadmic_cppflags = -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(source_dir) -I$(libgomp_dir)
+liboffloadmic_cppflags = -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -I$(coi_inc_dir) -I$(source_dir) -I$(libgomp_dir)
liboffloadmic_host_la_SOURCES = $(liboffloadmic_sources) \
runtime/cean_util.cpp \
runtime/offload_engine.cpp \
runtime/offload_env.cpp \
runtime/offload_host.cpp \
- runtime/offload_myo_host.cpp \
runtime/offload_omp_host.cpp \
runtime/offload_orsl.cpp \
runtime/offload_timer_host.cpp \
liboffloadmic_target_la_SOURCES = $(liboffloadmic_sources) \
runtime/coi/coi_server.cpp \
runtime/compiler_if_target.cpp \
- runtime/offload_myo_target.cpp \
runtime/offload_omp_target.cpp \
runtime/offload_target.cpp \
runtime/offload_timer_target.cpp
liboffloadmic_target_la_CPPFLAGS = $(liboffloadmic_cppflags) -DHOST_LIBRARY=0
liboffloadmic_target_la_LDFLAGS = @lt_cv_dlopen_libs@ -version-info 5:0:0
-liboffloadmic_target_la_LIBADD = libcoi_device.la libmyo-service.la
+liboffloadmic_target_la_LIBADD = libcoi_device.la
liboffloadmic_target_la_DEPENDENCIES = $(liboffloadmic_target_la_LIBADD)
# Emulator.
libcoi_host_la_SOURCES = runtime/emulator/coi_host.cpp
libcoi_device_la_SOURCES = runtime/emulator/coi_device.cpp
-libmyo_client_la_SOURCES = runtime/emulator/myo_client.cpp
-libmyo_service_la_SOURCES = runtime/emulator/myo_service.cpp
libcoi_host_la_DEPENDENCIES = runtime/emulator/coi_version_linker_script.map
libcoi_device_la_DEPENDENCIES = runtime/emulator/coi_version_linker_script.map
-libmyo_client_la_DEPENDENCIES = runtime/emulator/myo_version_linker_script.map
-libmyo_service_la_DEPENDENCIES = runtime/emulator/myo_version_linker_script.map
libcoi_host_la_CPPFLAGS = -I$(coi_inc_dir)
libcoi_device_la_CPPFLAGS = -I$(coi_inc_dir)
-libmyo_client_la_CPPFLAGS = -I$(myo_inc_dir)
-libmyo_service_la_CPPFLAGS = -I$(myo_inc_dir)
libcoi_host_la_LDFLAGS = -lrt -Wl,--version-script=$(source_dir)/emulator/coi_version_linker_script.map
libcoi_device_la_LDFLAGS = -lrt -Wl,--version-script=$(source_dir)/emulator/coi_version_linker_script.map
-libmyo_client_la_LDFLAGS = -lrt -Wl,--version-script=$(source_dir)/emulator/myo_version_linker_script.map
-libmyo_service_la_LDFLAGS = -lrt -Wl,--version-script=$(source_dir)/emulator/myo_version_linker_script.map
# ofldbegin, ofldend
all-local: $(ofld_obj)
$(CXXFLAGS) $(libcoi_host_la_LDFLAGS) $(LDFLAGS) -o $@
@LIBOFFLOADMIC_HOST_TRUE@am_libcoi_host_la_rpath = -rpath \
@LIBOFFLOADMIC_HOST_TRUE@ $(toolexeclibdir)
-libmyo_client_la_LIBADD =
-am_libmyo_client_la_OBJECTS = libmyo_client_la-myo_client.lo
-libmyo_client_la_OBJECTS = $(am_libmyo_client_la_OBJECTS)
-libmyo_client_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
- $(CXXFLAGS) $(libmyo_client_la_LDFLAGS) $(LDFLAGS) -o $@
-@LIBOFFLOADMIC_HOST_TRUE@am_libmyo_client_la_rpath = -rpath \
-@LIBOFFLOADMIC_HOST_TRUE@ $(toolexeclibdir)
-libmyo_service_la_LIBADD =
-am_libmyo_service_la_OBJECTS = libmyo_service_la-myo_service.lo
-libmyo_service_la_OBJECTS = $(am_libmyo_service_la_OBJECTS)
-libmyo_service_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
- $(CXXFLAGS) $(libmyo_service_la_LDFLAGS) $(LDFLAGS) -o $@
-@LIBOFFLOADMIC_HOST_FALSE@am_libmyo_service_la_rpath = -rpath \
-@LIBOFFLOADMIC_HOST_FALSE@ $(toolexeclibdir)
liboffloadmic_host_la_LIBADD =
am__objects_1 = liboffloadmic_host_la-dv_util.lo \
liboffloadmic_host_la-liboffload_error.lo \
liboffloadmic_host_la-offload_engine.lo \
liboffloadmic_host_la-offload_env.lo \
liboffloadmic_host_la-offload_host.lo \
- liboffloadmic_host_la-offload_myo_host.lo \
liboffloadmic_host_la-offload_omp_host.lo \
liboffloadmic_host_la-offload_orsl.lo \
liboffloadmic_host_la-offload_timer_host.lo \
am_liboffloadmic_target_la_OBJECTS = $(am__objects_2) \
liboffloadmic_target_la-coi_server.lo \
liboffloadmic_target_la-compiler_if_target.lo \
- liboffloadmic_target_la-offload_myo_target.lo \
liboffloadmic_target_la-offload_omp_target.lo \
liboffloadmic_target_la-offload_target.lo \
liboffloadmic_target_la-offload_timer_target.lo
--mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
SOURCES = $(libcoi_device_la_SOURCES) $(libcoi_host_la_SOURCES) \
- $(libmyo_client_la_SOURCES) $(libmyo_service_la_SOURCES) \
$(liboffloadmic_host_la_SOURCES) \
$(liboffloadmic_target_la_SOURCES)
MULTISRCTOP =
# Directories.
build_dir = $(top_builddir)
coi_inc_dir = $(top_srcdir)/include/coi
-myo_inc_dir = $(top_srcdir)/include/myo
libgomp_dir = $(build_dir)/../libgomp
source_dir = $(top_srcdir)/runtime
# Target list.
@LIBOFFLOADMIC_HOST_TRUE@nodist_toolexeclib_HEADERS = liboffloadmic_host.spec
-@LIBOFFLOADMIC_HOST_FALSE@toolexeclib_LTLIBRARIES = libcoi_device.la libmyo-service.la liboffloadmic_target.la
-@LIBOFFLOADMIC_HOST_TRUE@toolexeclib_LTLIBRARIES = libcoi_host.la libmyo-client.la liboffloadmic_host.la
+@LIBOFFLOADMIC_HOST_FALSE@toolexeclib_LTLIBRARIES = libcoi_device.la liboffloadmic_target.la
+@LIBOFFLOADMIC_HOST_TRUE@toolexeclib_LTLIBRARIES = libcoi_host.la liboffloadmic_host.la
@LIBOFFLOADMIC_HOST_FALSE@nodist_libsubinclude_HEADERS = runtime/compiler_if_target.h
@LIBOFFLOADMIC_HOST_TRUE@nodist_libsubinclude_HEADERS = runtime/compiler_if_host.h
@LIBOFFLOADMIC_HOST_FALSE@ofld_obj = ofldbegin.o ofldend.o
runtime/offload_trace.cpp \
runtime/offload_util.cpp
-liboffloadmic_cppflags = -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(source_dir) -I$(libgomp_dir)
+liboffloadmic_cppflags = -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -I$(coi_inc_dir) -I$(source_dir) -I$(libgomp_dir)
liboffloadmic_host_la_SOURCES = $(liboffloadmic_sources) \
runtime/cean_util.cpp \
runtime/coi/coi_client.cpp \
runtime/offload_engine.cpp \
runtime/offload_env.cpp \
runtime/offload_host.cpp \
- runtime/offload_myo_host.cpp \
runtime/offload_omp_host.cpp \
runtime/offload_orsl.cpp \
runtime/offload_timer_host.cpp \
liboffloadmic_target_la_SOURCES = $(liboffloadmic_sources) \
runtime/coi/coi_server.cpp \
runtime/compiler_if_target.cpp \
- runtime/offload_myo_target.cpp \
runtime/offload_omp_target.cpp \
runtime/offload_target.cpp \
runtime/offload_timer_target.cpp
liboffloadmic_target_la_CPPFLAGS = $(liboffloadmic_cppflags) -DHOST_LIBRARY=0
liboffloadmic_target_la_LDFLAGS = @lt_cv_dlopen_libs@ -version-info 5:0:0
-liboffloadmic_target_la_LIBADD = libcoi_device.la libmyo-service.la
+liboffloadmic_target_la_LIBADD = libcoi_device.la
liboffloadmic_target_la_DEPENDENCIES = $(liboffloadmic_target_la_LIBADD)
# Emulator.
libcoi_host_la_SOURCES = runtime/emulator/coi_host.cpp
libcoi_device_la_SOURCES = runtime/emulator/coi_device.cpp
-libmyo_client_la_SOURCES = runtime/emulator/myo_client.cpp
-libmyo_service_la_SOURCES = runtime/emulator/myo_service.cpp
libcoi_host_la_DEPENDENCIES = runtime/emulator/coi_version_linker_script.map
libcoi_device_la_DEPENDENCIES = runtime/emulator/coi_version_linker_script.map
-libmyo_client_la_DEPENDENCIES = runtime/emulator/myo_version_linker_script.map
-libmyo_service_la_DEPENDENCIES = runtime/emulator/myo_version_linker_script.map
libcoi_host_la_CPPFLAGS = -I$(coi_inc_dir)
libcoi_device_la_CPPFLAGS = -I$(coi_inc_dir)
-libmyo_client_la_CPPFLAGS = -I$(myo_inc_dir)
-libmyo_service_la_CPPFLAGS = -I$(myo_inc_dir)
libcoi_host_la_LDFLAGS = -lrt -Wl,--version-script=$(source_dir)/emulator/coi_version_linker_script.map
libcoi_device_la_LDFLAGS = -lrt -Wl,--version-script=$(source_dir)/emulator/coi_version_linker_script.map
-libmyo_client_la_LDFLAGS = -lrt -Wl,--version-script=$(source_dir)/emulator/myo_version_linker_script.map
-libmyo_service_la_LDFLAGS = -lrt -Wl,--version-script=$(source_dir)/emulator/myo_version_linker_script.map
# Work around what appears to be a GNU make bug handling MAKEFLAGS
# values defined in terms of make variables, as is the case for CC and
$(libcoi_device_la_LINK) $(am_libcoi_device_la_rpath) $(libcoi_device_la_OBJECTS) $(libcoi_device_la_LIBADD) $(LIBS)
libcoi_host.la: $(libcoi_host_la_OBJECTS) $(libcoi_host_la_DEPENDENCIES) $(EXTRA_libcoi_host_la_DEPENDENCIES)
$(libcoi_host_la_LINK) $(am_libcoi_host_la_rpath) $(libcoi_host_la_OBJECTS) $(libcoi_host_la_LIBADD) $(LIBS)
-libmyo-client.la: $(libmyo_client_la_OBJECTS) $(libmyo_client_la_DEPENDENCIES) $(EXTRA_libmyo_client_la_DEPENDENCIES)
- $(libmyo_client_la_LINK) $(am_libmyo_client_la_rpath) $(libmyo_client_la_OBJECTS) $(libmyo_client_la_LIBADD) $(LIBS)
-libmyo-service.la: $(libmyo_service_la_OBJECTS) $(libmyo_service_la_DEPENDENCIES) $(EXTRA_libmyo_service_la_DEPENDENCIES)
- $(libmyo_service_la_LINK) $(am_libmyo_service_la_rpath) $(libmyo_service_la_OBJECTS) $(libmyo_service_la_LIBADD) $(LIBS)
liboffloadmic_host.la: $(liboffloadmic_host_la_OBJECTS) $(liboffloadmic_host_la_DEPENDENCIES) $(EXTRA_liboffloadmic_host_la_DEPENDENCIES)
$(liboffloadmic_host_la_LINK) $(am_liboffloadmic_host_la_rpath) $(liboffloadmic_host_la_OBJECTS) $(liboffloadmic_host_la_LIBADD) $(LIBS)
liboffloadmic_target.la: $(liboffloadmic_target_la_OBJECTS) $(liboffloadmic_target_la_DEPENDENCIES) $(EXTRA_liboffloadmic_target_la_DEPENDENCIES)
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcoi_device_la-coi_device.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libcoi_host_la-coi_host.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmyo_client_la-myo_client.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmyo_service_la-myo_service.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_host_la-cean_util.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_host_la-coi_client.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_host_la-compiler_if_host.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_host_la-offload_engine.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_host_la-offload_env.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_host_la-offload_host.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_host_la-offload_myo_host.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_host_la-offload_omp_host.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_host_la-offload_orsl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_host_la-offload_table.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_target_la-liboffload_error.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_target_la-liboffload_msg.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_target_la-offload_common.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_target_la-offload_myo_target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_target_la-offload_omp_target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_target_la-offload_table.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/liboffloadmic_target_la-offload_target.Plo@am__quote@
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcoi_host_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o libcoi_host_la-coi_host.lo `test -f 'runtime/emulator/coi_host.cpp' || echo '$(srcdir)/'`runtime/emulator/coi_host.cpp
-libmyo_client_la-myo_client.lo: runtime/emulator/myo_client.cpp
-@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmyo_client_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT libmyo_client_la-myo_client.lo -MD -MP -MF $(DEPDIR)/libmyo_client_la-myo_client.Tpo -c -o libmyo_client_la-myo_client.lo `test -f 'runtime/emulator/myo_client.cpp' || echo '$(srcdir)/'`runtime/emulator/myo_client.cpp
-@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/libmyo_client_la-myo_client.Tpo $(DEPDIR)/libmyo_client_la-myo_client.Plo
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='runtime/emulator/myo_client.cpp' object='libmyo_client_la-myo_client.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmyo_client_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o libmyo_client_la-myo_client.lo `test -f 'runtime/emulator/myo_client.cpp' || echo '$(srcdir)/'`runtime/emulator/myo_client.cpp
-
-libmyo_service_la-myo_service.lo: runtime/emulator/myo_service.cpp
-@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmyo_service_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT libmyo_service_la-myo_service.lo -MD -MP -MF $(DEPDIR)/libmyo_service_la-myo_service.Tpo -c -o libmyo_service_la-myo_service.lo `test -f 'runtime/emulator/myo_service.cpp' || echo '$(srcdir)/'`runtime/emulator/myo_service.cpp
-@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/libmyo_service_la-myo_service.Tpo $(DEPDIR)/libmyo_service_la-myo_service.Plo
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='runtime/emulator/myo_service.cpp' object='libmyo_service_la-myo_service.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmyo_service_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o libmyo_service_la-myo_service.lo `test -f 'runtime/emulator/myo_service.cpp' || echo '$(srcdir)/'`runtime/emulator/myo_service.cpp
-
liboffloadmic_host_la-dv_util.lo: runtime/dv_util.cpp
@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(liboffloadmic_host_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT liboffloadmic_host_la-dv_util.lo -MD -MP -MF $(DEPDIR)/liboffloadmic_host_la-dv_util.Tpo -c -o liboffloadmic_host_la-dv_util.lo `test -f 'runtime/dv_util.cpp' || echo '$(srcdir)/'`runtime/dv_util.cpp
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/liboffloadmic_host_la-dv_util.Tpo $(DEPDIR)/liboffloadmic_host_la-dv_util.Plo
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(liboffloadmic_host_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o liboffloadmic_host_la-offload_host.lo `test -f 'runtime/offload_host.cpp' || echo '$(srcdir)/'`runtime/offload_host.cpp
-liboffloadmic_host_la-offload_myo_host.lo: runtime/offload_myo_host.cpp
-@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(liboffloadmic_host_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT liboffloadmic_host_la-offload_myo_host.lo -MD -MP -MF $(DEPDIR)/liboffloadmic_host_la-offload_myo_host.Tpo -c -o liboffloadmic_host_la-offload_myo_host.lo `test -f 'runtime/offload_myo_host.cpp' || echo '$(srcdir)/'`runtime/offload_myo_host.cpp
-@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/liboffloadmic_host_la-offload_myo_host.Tpo $(DEPDIR)/liboffloadmic_host_la-offload_myo_host.Plo
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='runtime/offload_myo_host.cpp' object='liboffloadmic_host_la-offload_myo_host.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(liboffloadmic_host_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o liboffloadmic_host_la-offload_myo_host.lo `test -f 'runtime/offload_myo_host.cpp' || echo '$(srcdir)/'`runtime/offload_myo_host.cpp
-
liboffloadmic_host_la-offload_omp_host.lo: runtime/offload_omp_host.cpp
@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(liboffloadmic_host_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT liboffloadmic_host_la-offload_omp_host.lo -MD -MP -MF $(DEPDIR)/liboffloadmic_host_la-offload_omp_host.Tpo -c -o liboffloadmic_host_la-offload_omp_host.lo `test -f 'runtime/offload_omp_host.cpp' || echo '$(srcdir)/'`runtime/offload_omp_host.cpp
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/liboffloadmic_host_la-offload_omp_host.Tpo $(DEPDIR)/liboffloadmic_host_la-offload_omp_host.Plo
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(liboffloadmic_target_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o liboffloadmic_target_la-compiler_if_target.lo `test -f 'runtime/compiler_if_target.cpp' || echo '$(srcdir)/'`runtime/compiler_if_target.cpp
-liboffloadmic_target_la-offload_myo_target.lo: runtime/offload_myo_target.cpp
-@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(liboffloadmic_target_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT liboffloadmic_target_la-offload_myo_target.lo -MD -MP -MF $(DEPDIR)/liboffloadmic_target_la-offload_myo_target.Tpo -c -o liboffloadmic_target_la-offload_myo_target.lo `test -f 'runtime/offload_myo_target.cpp' || echo '$(srcdir)/'`runtime/offload_myo_target.cpp
-@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/liboffloadmic_target_la-offload_myo_target.Tpo $(DEPDIR)/liboffloadmic_target_la-offload_myo_target.Plo
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='runtime/offload_myo_target.cpp' object='liboffloadmic_target_la-offload_myo_target.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(liboffloadmic_target_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o liboffloadmic_target_la-offload_myo_target.lo `test -f 'runtime/offload_myo_target.cpp' || echo '$(srcdir)/'`runtime/offload_myo_target.cpp
-
liboffloadmic_target_la-offload_omp_target.lo: runtime/offload_omp_target.cpp
@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(liboffloadmic_target_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT liboffloadmic_target_la-offload_omp_target.lo -MD -MP -MF $(DEPDIR)/liboffloadmic_target_la-offload_omp_target.Tpo -c -o liboffloadmic_target_la-offload_omp_target.lo `test -f 'runtime/offload_omp_target.cpp' || echo '$(srcdir)/'`runtime/offload_omp_target.cpp
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/liboffloadmic_target_la-offload_omp_target.Tpo $(DEPDIR)/liboffloadmic_target_la-offload_omp_target.Plo
Intel, Xeon, and Intel Xeon Phi are trademarks of Intel Corporation in the U.S. and/or other countries.
-This document is Copyright \textcopyright 2014-2015, Intel Corporation. All rights reserved.
+This document is Copyright \textcopyright 2014-2016, Intel Corporation. All rights reserved.
\pagenumbering{roman}
\tableofcontents
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
#endif
#endif // DOXYGEN_SHOULD_SKIP_THIS
-#define COI_MAX_ISA_x86_64_DEVICES 1
-#define COI_MAX_ISA_MIC_DEVICES 128
-#define COI_MAX_ISA_KNF_DEVICES COI_MAX_ISA_MIC_DEVICES
-#define COI_MAX_ISA_KNC_DEVICES COI_MAX_ISA_MIC_DEVICES
-
+#define COI_MAX_ISA_x86_64_DEVICES 128
+#define COI_MAX_ISA_MIC_DEVICES 128
+#define COI_MAX_ISA_KNF_DEVICES 0
+#define COI_MAX_ISA_KNC_DEVICES COI_MAX_ISA_MIC_DEVICES
+#define COI_MAX_ISA_KNL_DEVICES COI_MAX_ISA_MIC_DEVICES
///////////////////////////////////////////////////////////////////////////////
///
///
typedef enum
{
- COI_ISA_INVALID = 0, ///< Represents an invalid ISA.
- COI_ISA_x86_64, ///< The ISA for an x86_64 host engine.
- COI_ISA_MIC, ///< Special value used to represent any device
- ///< in the Intel(R) Many Integrated Core
- ///< architecture family.
- COI_ISA_KNF, ///< ISA for L1OM devices.
- COI_ISA_KNC ///< ISA for K1OM devices.
-} COI_ISA_TYPE;
+ COI_DEVICE_INVALID = 0, ///< Represents an invalid device type.
+ COI_DEVICE_SOURCE, ///< The engine from which offload originates
+ COI_DEVICE_MIC, ///< Special value used to represent any device
+ ///< in the Intel(R) Many Integrated Core family.
+ COI_DEVICE_DEPRECATED_0, ///< Placeholder for L1OM devices (deprecated).
+ COI_DEVICE_KNC, ///< K1OM devices (Knigts Corner).
+ COI_DEVICE_KNL, ///< Knights Landing devices
+ COI_DEVICE_MAX,
+ COI_DEVICE_KNF = COI_DEVICE_DEPRECATED_0
+} COI_DEVICE_TYPE;
+
+///////////////////////////////////////////////////////////////////////////////
+///
+/// List of deprecated device types for backward compatibility
+///
+#define COI_ISA_INVALID COI_DEVICE_INVALID
+#define COI_ISA_x86_64 COI_DEVICE_SOURCE
+#define COI_ISA_MIC COI_DEVICE_MIC
+#define COI_ISA_KNF COI_DEVICE_KNF
+#define COI_ISA_KNC COI_DEVICE_KNC
+typedef COI_DEVICE_TYPE COI_ISA_TYPE;
///////////////////////////////////////////////////////////////////////////////
///
/// Get the information about the COIEngine executing this function call.
///
/// @param out_pType
-/// [out] The COI_ISA_TYPE of the engine.
+/// [out] The COI_DEVICE_TYPE of the engine.
///
/// @param out_pIndex
/// [out] The zero-based index of this engine in the collection of
COIACCESSAPI
COIRESULT
COIEngineGetIndex(
- COI_ISA_TYPE* out_pType,
- uint32_t* out_pIndex);
+ COI_DEVICE_TYPE *out_pType,
+ uint32_t *out_pIndex);
#ifdef __cplusplus
} /* extern "C" */
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
#include <sched.h>
#ifndef UNREFERENCED_CONST_PARAM
#define UNREFERENCED_CONST_PARAM(P) { void* x UNUSED_ATTR = \
- (void*)(uint64_t)P; \
- }
+ (void*)(uint64_t)P; \
+}
#endif
-// This seems to work on everything.
+// This seems to work on everything.
#ifndef UNREFERENCED_PARAM
-#define UNREFERENCED_PARAM(P) (P = P)
+ #define UNREFERENCED_PARAM(P) (P = P)
#endif
#ifndef SYMBOL_VERSION
-/* Linux support: */
+ /* Linux support: */
- #define SYMBOL_VERSION( SYMBOL , VERSION ) SYMBOL ## VERSION
+ #define SYMBOL_VERSION( SYMBOL , VERSION ) SYMBOL ## VERSION
#endif
/* Roughly equivalent to CPU_ISSET(). */
static inline uint64_t COI_CPU_MASK_ISSET(int bitNumber, const COI_CPU_MASK cpu_mask)
{
- if ((size_t)bitNumber < sizeof(COI_CPU_MASK)*8)
- return ((cpu_mask)[bitNumber/64] & (((uint64_t)1) << (bitNumber%64)));
+ if ((size_t)bitNumber < sizeof(COI_CPU_MASK) * 8)
+ return ((cpu_mask)[bitNumber / 64] & (((uint64_t)1) << (bitNumber % 64)));
return 0;
}
/* Roughly equivalent to CPU_SET(). */
static inline void COI_CPU_MASK_SET(int bitNumber, COI_CPU_MASK cpu_mask)
{
- if ((size_t)bitNumber < sizeof(COI_CPU_MASK)*8)
- ((cpu_mask)[bitNumber/64] |= (((uint64_t)1) << (bitNumber%64)));
+ if ((size_t)bitNumber < sizeof(COI_CPU_MASK) * 8)
+ ((cpu_mask)[bitNumber / 64] |= (((uint64_t)1) << (bitNumber % 64)));
}
/* Roughly equivalent to CPU_ZERO(). */
static inline void COI_CPU_MASK_ZERO(COI_CPU_MASK cpu_mask)
{
- memset(cpu_mask,0,sizeof(COI_CPU_MASK));
+ memset(cpu_mask, 0, sizeof(COI_CPU_MASK));
}
/* Roughly equivalent to CPU_AND(). */
static inline void COI_CPU_MASK_AND(COI_CPU_MASK dst, const COI_CPU_MASK src1, const COI_CPU_MASK src2)
{
const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(dst[0]);
+ unsigned int i = 0;
- for(unsigned int i=0;i<loopIterations;++i)
+ for (; i < loopIterations; ++i)
dst[i] = src1[i] & src2[i];
}
static inline void COI_CPU_MASK_XOR(COI_CPU_MASK dst, const COI_CPU_MASK src1, const COI_CPU_MASK src2)
{
const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(dst[0]);
+ unsigned int i = 0;
- for(unsigned int i=0;i<loopIterations;++i)
+ for (; i < loopIterations; ++i)
dst[i] = src1[i] ^ src2[i];
}
static inline void COI_CPU_MASK_OR(COI_CPU_MASK dst, const COI_CPU_MASK src1, const COI_CPU_MASK src2)
{
const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(dst[0]);
+ unsigned int i = 0;
- for(unsigned int i=0;i<loopIterations;++i)
+ for (; i < loopIterations; ++i)
dst[i] = src1[i] | src2[i];
}
/* Utility function for COI_CPU_MASK_COUNT() below. */
static inline int __COI_CountBits(uint64_t n)
{
- int cnt=0;
+ int cnt = 0;
- for (;n;cnt++)
- n &= (n-1);
+ for (; n; cnt++)
+ n &= (n - 1);
return cnt;
}
/* Roughly equivalent to CPU_COUNT(). */
static inline int COI_CPU_MASK_COUNT(const COI_CPU_MASK cpu_mask)
{
- int cnt=0;
+ int cnt = 0;
const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(cpu_mask[0]);
+ unsigned int i = 0;
- for(unsigned int i=0;i < loopIterations;++i)
+ for (; i < loopIterations; ++i)
{
cnt += __COI_CountBits(cpu_mask[i]);
}
}
/* Roughly equivalent to CPU_EQUAL(). */
-static inline int COI_CPU_MASK_EQUAL(const COI_CPU_MASK cpu_mask1,const COI_CPU_MASK cpu_mask2)
+static inline int COI_CPU_MASK_EQUAL(const COI_CPU_MASK cpu_mask1, const COI_CPU_MASK cpu_mask2)
{
const unsigned int loopIterations = sizeof(COI_CPU_MASK) / sizeof(cpu_mask1[0]);
+ unsigned int i = 0;
- for(unsigned int i=0;i < loopIterations;++i)
+ for (; i < loopIterations; ++i)
{
if (cpu_mask1[i] != cpu_mask2[i])
return 0;
/* Utility function to translate from cpu_set * to COI_CPU_MASK. */
-static inline void COI_CPU_MASK_XLATE(COI_CPU_MASK dest,const cpu_set_t *src)
+static inline void COI_CPU_MASK_XLATE(COI_CPU_MASK dest, const cpu_set_t *src)
{
+ unsigned int i;
+ unsigned int j;
COI_CPU_MASK_ZERO(dest);
-#if 0
+ #if 0
/* Slightly slower version than the following #else/#endif block. Left here only to
document the intent of the code. */
- for(unsigned int i=0;i < sizeof(cpu_set_t)*8;++i)
- if (CPU_ISSET(i,src))
- COI_CPU_MASK_SET(i,dest);
-#else
- for(unsigned int i=0;i < sizeof(COI_CPU_MASK)/sizeof(dest[0]);++i)
+ for (i = 0; i < sizeof(cpu_set_t) * 8; ++i)
+ if (CPU_ISSET(i, src))
+ COI_CPU_MASK_SET(i, dest);
+ #else
+ for (i = 0; i < sizeof(COI_CPU_MASK) / sizeof(dest[0]); ++i)
{
- for(unsigned int j=0;j < 64;++j)
+ for (j = 0; j < 64; ++j)
{
- if (CPU_ISSET(i*64+j,src))
+ if (CPU_ISSET(i * 64 + j, src))
dest[i] |= ((uint64_t)1) << j;
}
}
-#endif
+ #endif
}
/* Utility function to translate from COI_CPU_MASK to cpu_set *. */
-static inline void COI_CPU_MASK_XLATE_EX(cpu_set_t *dest,const COI_CPU_MASK src)
+static inline void COI_CPU_MASK_XLATE_EX(cpu_set_t *dest, const COI_CPU_MASK src)
{
+ unsigned int i;
+ unsigned int j;
CPU_ZERO(dest);
-#if 0
+ #if 0
/* Slightly slower version than the following #else/#endif block. Left here only to
document the intent of the code. */
- for(unsigned int i=0;i < sizeof(COI_CPU_MASK)*8;++i)
- if (COI_CPU_MASK_ISSET(i,src))
- CPU_SET(i,dest);
-#else
- for(unsigned int i=0;i < sizeof(COI_CPU_MASK)/sizeof(src[0]);++i)
+ for (i = 0; i < sizeof(COI_CPU_MASK) * 8; ++i)
+ if (COI_CPU_MASK_ISSET(i, src))
+ CPU_SET(i, dest);
+ #else
+ for (i = 0; i < sizeof(COI_CPU_MASK) / sizeof(src[0]); ++i)
{
const uint64_t cpu_mask = src[i];
- for(unsigned int j=0;j < 64;++j)
+ for (j = 0; j < 64; ++j)
{
const uint64_t bit = ((uint64_t)1) << j;
if (bit & cpu_mask)
- CPU_SET(i*64+j,dest);
+ CPU_SET(i * 64 + j, dest);
}
}
-#endif
+ #endif
}
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
COI_SUCCESS = 0, ///< The function succeeded without error.
COI_ERROR, ///< Unspecified error.
COI_NOT_INITIALIZED, ///< The function was called before the
- ///< system was initialized.
+ ///< system was initialized.
COI_ALREADY_INITIALIZED, ///< The function was called after the
- ///< system was initialized.
+ ///< system was initialized.
COI_ALREADY_EXISTS, ///< Cannot complete the request due to
- ///< the existence of a similar object.
+ ///< the existence of a similar object.
COI_DOES_NOT_EXIST, ///< The specified object was not found.
COI_INVALID_POINTER, ///< One of the provided addresses was not
- ///< valid.
+ ///< valid.
COI_OUT_OF_RANGE, ///< One of the arguments contains a value
- ///< that is invalid.
+ ///< that is invalid.
COI_NOT_SUPPORTED, ///< This function is not currently
- ///< supported as used.
+ ///< supported as used.
COI_TIME_OUT_REACHED, ///< The specified time out caused the
- ///< function to abort.
+ ///< function to abort.
COI_MEMORY_OVERLAP, ///< The source and destination range
- ///< specified overlaps for the same
- ///< buffer.
+ ///< specified overlaps for the same
+ ///< buffer.
COI_ARGUMENT_MISMATCH, ///< The specified arguments are not
- ///< compatible.
+ ///< compatible.
COI_SIZE_MISMATCH, ///< The specified size does not match the
- ///< expected size.
+ ///< expected size.
COI_OUT_OF_MEMORY, ///< The function was unable to allocate
- ///< the required memory.
+ ///< the required memory.
COI_INVALID_HANDLE, ///< One of the provided handles was not
- ///< valid.
+ ///< valid.
COI_RETRY, ///< This function currently can't
- ///< complete, but might be able to later.
+ ///< complete, but might be able to later.
COI_RESOURCE_EXHAUSTED, ///< The resource was not large enough.
COI_ALREADY_LOCKED, ///< The object was expected to be
- ///< unlocked, but was locked.
+ ///< unlocked, but was locked.
COI_NOT_LOCKED, ///< The object was expected to be locked,
- ///< but was unlocked.
+ ///< but was unlocked.
COI_MISSING_DEPENDENCY, ///< One or more dependent components
- ///< could not be found.
+ ///< could not be found.
COI_UNDEFINED_SYMBOL, ///< One or more symbols the component
- ///< required was not defined in any
- ///< library.
+ ///< required was not defined in any
+ ///< library.
COI_PENDING, ///< Operation is not finished
COI_BINARY_AND_HARDWARE_MISMATCH, ///< A specified binary will not run on
- ///< the specified hardware.
+ ///< the specified hardware.
COI_PROCESS_DIED,
COI_INVALID_FILE, ///< The file is invalid for its intended
- ///< usage in the function.
+ ///< usage in the function.
COI_EVENT_CANCELED, ///< Event wait on a user event that
- ///< was unregistered or is being
- ///< unregistered returns
- ///< COI_EVENT_CANCELED.
+ ///< was unregistered or is being
+ ///< unregistered returns
+ ///< COI_EVENT_CANCELED.
COI_VERSION_MISMATCH, ///< The version of Intel(R) Coprocessor
- ///< Offload Infrastructure on the host
- ///< is not compatible with the version
- ///< on the device.
+ ///< Offload Infrastructure on the host
+ ///< is not compatible with the version
+ ///< on the device.
COI_BAD_PORT, ///< The port that the host is set to
- ///< connect to is invalid.
+ ///< connect to is invalid.
COI_AUTHENTICATION_FAILURE, ///< The daemon was unable to authenticate
- ///< the user that requested an engine.
- ///< Only reported if daemon is set up for
- ///< authorization. Is also reported in
- ///< Windows if host can not find user.
+ ///< the user that requested an engine.
+ ///< Only reported if daemon is set up for
+ ///< authorization. Is also reported in
+ ///< Windows if host can not find user.
+ COI_COMM_NOT_INITIALIZED, ///< The function was called before the
+ ///< comm was initialized.
+ COI_INCORRECT_FORMAT, ///< Format of data is incorrect
COI_NUM_RESULTS ///< Reserved, do not use.
-}
-COIRESULT;
+} COIRESULT;
//////////////////////////////////////////////////////////////////////////////
///
/// @return String version of the passed in COIRESULT code.
///
COIACCESSAPI
-const char*
+const char *
COIResultGetName(
- COIRESULT in_ResultCode);
+ COIRESULT in_ResultCode);
#ifdef __cplusplus
} /* extern "C" */
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* @file common/COITypes_common.h
*/
#ifndef DOXYGEN_SHOULD_SKIP_THIS
- #include <stdint.h>
- #include <wchar.h>
- #define COIACCESSAPI /* nothing */
- #define COIACCESSAPI2 /* nothing */
+ #include <stdint.h>
+ #include <wchar.h>
+ #define COIACCESSAPI /* nothing */
+ #define COIACCESSAPI2 /* nothing */
#endif /* DOXYGEN_SHOULD_SKIP_THIS */
#endif
-struct coievent { uint64_t opaque[2]; };
+struct coievent
+{
+ uint64_t opaque[2];
+};
-typedef struct coiprocess * COIPROCESS;
-typedef struct coipipeline * COIPIPELINE;
-typedef struct coifunction * COIFUNCTION;
-typedef struct coiengine * COIENGINE;
+typedef struct coiprocess *COIPROCESS;
+typedef struct coipipeline *COIPIPELINE;
+typedef struct coifunction *COIFUNCTION;
+typedef struct coiengine *COIENGINE;
typedef struct coievent COIEVENT;
-typedef struct coibuffer * COIBUFFER;
-typedef struct coilibrary * COILIBRARY;
-typedef struct coimapinst * COIMAPINSTANCE;
+typedef struct coibuffer *COIBUFFER;
+typedef struct coilibrary *COILIBRARY;
+typedef struct coimapinst *COIMAPINSTANCE;
typedef uint64_t COI_CPU_MASK[16];
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* @file sink\COIBuffer_sink.h
*/
#ifndef DOXYGEN_SHOULD_SKIP_THIS
-#include "../common/COITypes_common.h"
-#include "../common/COIResult_common.h"
+ #include "../common/COITypes_common.h"
+ #include "../common/COIResult_common.h"
#endif // DOXYGEN_SHOULD_SKIP_THIS
#ifdef __cplusplus
///
COIRESULT
COIBufferAddRef(
- void* in_pBuffer);
+ void *in_pBuffer);
//////////////////////////////////////////////////////////////////////////////
///
COIRESULT
COIBufferReleaseRef(
- void* in_pBuffer);
+ void *in_pBuffer);
#ifdef __cplusplus
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
#include "../common/COIResult_common.h"
#ifdef __FreeBSD__
-#define COINATIVELIBEXPORT_VISIBILITY "extern"
+ #define COINATIVELIBEXPORT_VISIBILITY "extern"
#else
-#define COINATIVELIBEXPORT_VISIBILITY "default"
+ #define COINATIVELIBEXPORT_VISIBILITY "default"
#endif
#ifdef __cplusplus
///
typedef void
(*RunFunctionPtr_t)(
- uint32_t in_BufferCount,
- void** in_ppBufferPointers,
- uint64_t* in_pBufferLengths,
- void* in_pMiscData,
- uint16_t in_MiscDataLength,
- void* in_pReturnValue,
- uint16_t in_ReturnValueLength);
+ uint32_t in_BufferCount,
+ void **in_ppBufferPointers,
+ uint64_t *in_pBufferLengths,
+ void *in_pMiscData,
+ uint16_t in_MiscDataLength,
+ void *in_pReturnValue,
+ uint16_t in_ReturnValueLength);
///////////////////////////////////////////////////////////////////////////////
///
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
COIRESULT
COIProcessProxyFlush();
+
+//////////////////////////////////////////////////////////////////////////////
+///
+/// Loads a shared library from host filesystem into the current sink
+/// process, akin to using dlopen() on a local process in Linux or
+/// LoadLibrary() in Windows.
+///
+/// @param in_pFileName
+/// [in] The name of the shared library file on the source's file
+/// system that is being loaded. If the file name is not an absolute
+/// path, the file is searched for in the same manner as dependencies.
+///
+/// @param in_pLibraryName
+/// [in] Name for the shared library. This optional parameter can
+/// be specified in case the dynamic library doesn't have an
+/// SO_NAME field. If specified, it will take precedence over
+/// the SO_NAME if it exists. If it is not specified then
+/// the library must have a valid SO_NAME field.
+///
+///@param in_LibrarySearchPath
+/// [in] a path to locate dynamic libraries dependencies for the
+/// library being loaded. If not NULL, this path will override the
+/// environment variable SINK_LD_LIBRARY_PATH. If NULL it will use
+/// SINK_LD_LIBRARY_PATH to locate dependencies.
+///
+/// @param in_Flags
+/// [in] Bitmask of the flags that will be passed in as the dlopen()
+/// "flag" parameter on the sink.
+///
+/// @param out_pLibrary
+/// [out] If COI_SUCCESS or COI_ALREADY_EXISTS is returned, the handle
+/// that uniquely identifies the loaded library.
+///
+/// @return COI_SUCCESS if the library was successfully loaded.
+///
+/// @return COI_INVALID_POINTER if in_pFileName is NULL.
+///
+/// @return COI_DOES_NOT_EXIST if in_pFileName cannot be found.
+///
+/// @return COI_INVALID_FILE if the file is not a valid shared library.
+///
+/// @return COI_MISSING_DEPENDENCY if a dependent library is missing from
+/// either SINK_LD_LIBRARY_PATH or the in_LibrarySearchPath parameter.
+///
+/// @return COI_ARGUMENT_MISMATCH if the shared library is missing an SONAME
+/// and in_pLibraryName is NULL.
+///
+/// @return COI_UNDEFINED_SYMBOL if we are unable to load the library due to
+/// an undefined symbol.
+///
+/// @return COI_ALREADY_EXISTS if there is an existing COILIBRARY handle
+/// that identifies this library, and this COILIBRARY hasn't been
+/// unloaded yet.
+///
+/// @return COI_BINARY_AND_HARDWARE_MISMATCH if the target machine of the
+/// binary or any of its recursive dependencies does not match the
+/// engine associated with Process.
+///
+/// @return COI_NOT_INITIALIZED if setup of remote process on host is not
+/// completed yet.
+///
+COIRESULT
+COIProcessLoadSinkLibraryFromFile(
+ const char *in_pFileName,
+ const char *in_pLibraryName,
+ const char *in_LibrarySearchPath,
+ uint32_t in_Flags,
+ COILIBRARY *out_pLibrary);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* @file source\COIBuffer_source.h
*/
#ifndef DOXYGEN_SHOULD_SKIP_THIS
-#include "../common/COITypes_common.h"
-#include "../common/COIResult_common.h"
+ #include "../common/COITypes_common.h"
+ #include "../common/COIResult_common.h"
#endif // DOXYGEN_SHOULD_SKIP_THIS
#ifdef __cplusplus
// Reserved values, not used by COI any more
COI_BUFFER_RESERVED_1,
COI_BUFFER_RESERVED_2,
-
- /// A pinned buffer exists in a shared memory region and is always
- /// available for read or write operations.
- /// Note: Pinned Buffers larger than 4KB are not supported in
- /// Windows 7 kernels.
- /// The value of COI_BUFFER_PINNED is set to specific value
- /// to maintain compatibility with older versions of COI
- COI_BUFFER_PINNED,
+ COI_BUFFER_RESERVED_3,
/// OpenCL buffers are similar to Normal buffers except they don't
/// stall pipelines and don't follow any read write dependencies.
#define COI_OPTIMIZE_NO_DMA 0x00000040
/// Hint to the runtime to try to use huge page sizes for backing store on the
-/// sink. Is currently not compatible with PINNED buffers or the SAME_ADDRESS
+/// sink. Is currently not compatible with the SAME_ADDRESS
/// flags or the SINK_MEMORY flag. It is important to note that this is a hint
/// and internally the runtime may not actually promote to huge pages.
/// Specifically if the buffer is too small (less than 4KiB for example) then
#endif
#define T 1
#define MTM(_BUFFER, B1, B2, B3, B4, B5, B6, B7, B8, B9) \
-(B1 | B2<<1 | B3<<2 | B4<<3 | B5<<4 | B6<<5 | B7<<6 | B8<<7 | B9<<8)
+ (B1 | B2<<1 | B3<<2 | B4<<3 | B5<<4 | B6<<5 | B7<<6 | B8<<7 | B9<<8)
#endif
/// \enum COI_BUFFER_TYPE
/// that may be passed in to COIBufferCreate and COIBufferCreateFromMemory.
/// \code
static const uint64_t
-COI_VALID_BUFFER_TYPES_AND_FLAGS[COI_BUFFER_OPENCL+1] = {
-/* | | SAME | | | | | | | |
- | SAME | ADDR | OPT | OPT | OPT | OPT | OPT | HUGE | COI |
- | ADDR | SINK | SRC | SRC | SINK | SINK | NO | PAGE | SINK |
- | SINKS | SRC | READ | WRITE | READ | WRITE | DMA | SIZE | MEM |
- +-------+------+------+-------+------+-------+-----+------+-----*/
-MTM(INVALID , F , F , F , F , F , F , F , F , F ),
-MTM(NORMAL , T , T , T , T , T , T , T , T , T ),
-MTM(RESERVED1 , F , F , F , F , F , F , F , F , F ),
-MTM(RESERVED2 , F , F , F , F , F , F , F , F , F ),
-MTM(PINNED , T , T , T , T , T , T , F , F , F ),
-MTM(OPENCL , T , T , T , T , T , T , T , T , F ),
+COI_VALID_BUFFER_TYPES_AND_FLAGS[COI_BUFFER_OPENCL + 1] =
+{
+ /* | | SAME | | | | | | | |
+ | SAME | ADDR | OPT | OPT | OPT | OPT | OPT | HUGE | COI |
+ | ADDR | SINK | SRC | SRC | SINK | SINK | NO | PAGE | SINK |
+ | SINKS | SRC | READ | WRITE | READ | WRITE | DMA | SIZE | MEM |
+ +-------+------+------+-------+------+-------+-----+------+-----*/
+ MTM(INVALID , F , F , F , F , F , F , F , F , F),
+ MTM(NORMAL , T , T , T , T , T , T , T , T , T),
+ MTM(RESERVED1 , F , F , F , F , F , F , F , F , F),
+ MTM(RESERVED2 , F , F , F , F , F , F , F , F , F),
+ MTM(RESERVED3 , F , F , F , F , F , F , F , F , F),
+ MTM(OPENCL , T , T , T , T , T , T , T , T , F),
};
///\endcode
#undef MTM
#ifndef DOXYGEN_SHOULD_SKIP_THIS
// Make the flag mask
#define MMM(_BUFFER, B1, B2, B3) \
- { F , B1, B2, B3}
+ { F , B1, B2, B3}
#endif
/// \enum COI_MAP_TYPE
/// This matrix shows the valid combinations of buffer types and map
/// \code
static const uint64_t
COI_VALID_BUFFER_TYPES_AND_MAP
-[COI_BUFFER_OPENCL+1][COI_MAP_WRITE_ENTIRE_BUFFER+1] = {
-/* | MAP | MAP | MAP |
- | READ | READ | WRITE |
- | WRITE | ONLY | ENTIRE|
- +-------+-------+-------+*/
-MMM(INVALID , F , F , F ),
-MMM(NORMAL , T , T , T ),
-MMM(RESERVED1 , F , F , F ),
-MMM(RESERVED2 , F , F , F ),
-MMM(PINNED , T , T , T ),
-MMM(OPENCL , T , T , T ),
+[COI_BUFFER_OPENCL + 1][COI_MAP_WRITE_ENTIRE_BUFFER + 1] =
+{
+ /* | MAP | MAP | MAP |
+ | READ | READ | WRITE |
+ | WRITE | ONLY | ENTIRE|
+ +-------+-------+-------+*/
+ MMM(INVALID , F , F , F),
+ MMM(NORMAL , T , T , T),
+ MMM(RESERVED1 , F , F , F),
+ MMM(RESERVED2 , F , F , F),
+ MMM(RESERVED3 , F , F , F),
+ MMM(OPENCL , T , T , T),
};
///\endcode
#undef MMM
COI_COPY_USE_DMA,
/// The runtime should use a CPU copy to copy the data.
- /// CPU copy is a synchronous copy. So the resulting operations are always
- /// blocking (even though a out_pCompletion event is specified).
COI_COPY_USE_CPU,
/// Same as above, but forces moving entire buffer to target process in Ex
/// The buffer states are used to indicate whether a buffer is available for
/// access in a COIPROCESS. This is used with COIBufferSetState.
///
-/// Buffer state holds only for NORMAL Buffers and OPENCL buffers. Pinned
-/// buffers are always valid everywhere they get created.
-///
/// Rules on State Transition of the buffer:
/// -. When a Buffer is created by default it is valid only on the source,
/// except for buffers created with COI_SINK_MEMORY flag which are valid
/// The buffer states used with COIBufferSetState call to indicate the new
/// state of the buffer on a given process
///
-typedef enum {
+typedef enum
+{
COI_BUFFER_VALID = 0, // Buffer is valid and up-to-date on the process
COI_BUFFER_INVALID , // Buffer is not valid, need valid data
COI_BUFFER_VALID_MAY_DROP, // Same as valid but will drop the content when
- // evicted to avoid overwriting the shadow
- // memory
+ // evicted to avoid overwriting the shadow
+ // memory
COI_BUFFER_RESERVED // Reserved for internal use
} COI_BUFFER_STATE;
///
//////////////////////////////////////////////////////////////////////////////
/// The buffer move flags are used to indicate when a buffer should be moved
/// when it's state is changed. This is used with COIBufferSetState.
-typedef enum {
+typedef enum
+{
COI_BUFFER_MOVE = 0,// Dirty data is moved if state change requires it
COI_BUFFER_NO_MOVE // Change state without moving data
} COI_BUFFER_MOVE_FLAG;
#define COI_SINK_OWNERS ((COIPROCESS)-2)
// Matrix descriptors used with MultiD Read/Write
-typedef struct dim_desc {
+typedef struct dim_desc
+{
int64_t size; // Size of data type
int64_t lindex; // Lower index, used in Fortran
int64_t lower; // Lower section bound
int64_t upper; // Upper section bound
int64_t stride; // Stride, or number of bytes between the start
- // of one element and start of next one divided
- // by size.
+ // of one element and start of next one divided
+ // by size.
} dim_desc;
-typedef struct arr_desc {
+typedef struct arr_desc
+{
int64_t base; // Base address
int64_t rank; // Rank of array, i.e. number of dimensions
dim_desc dim[3]; // This array has as many elements as \93rank\94
- // currently limited to 3.
+ // currently limited to 3.
} arr_desc;
//////////////////////////////////////////////////////////////////////////////
///
/// @return COI_OUT_OF_MEMORY if allocating the buffer fails.
///
-/// @return COI_RESOURCE_EXHAUSTED if the sink is out of buffer memory. This
-/// error can also be thrown from Windows 7 operating systems if
-/// COI_BUFFER_PINNED and a size larger than 4KB is requested.
-/// This is due to a limitation of the Windows 7 memory management unit.
+/// @return COI_RESOURCE_EXHAUSTED if the sink is out of buffer memory.
///
COIACCESSAPI
COIRESULT
COIBufferCreate(
- uint64_t in_Size,
- COI_BUFFER_TYPE in_Type,
- uint32_t in_Flags,
- const void* in_pInitData,
- uint32_t in_NumProcesses,
- const COIPROCESS* in_pProcesses,
- COIBUFFER* out_pBuffer);
+ uint64_t in_Size,
+ COI_BUFFER_TYPE in_Type,
+ uint32_t in_Flags,
+ const void *in_pInitData,
+ uint32_t in_NumProcesses,
+ const COIPROCESS *in_pProcesses,
+ COIBUFFER *out_pBuffer);
//////////////////////////////////////////////////////////////////////////////
///
/// is not page aligned, it will be rounded up.
///
/// @param in_Type
-/// [in] The type of the buffer to create. Only COI_BUFFER_NORMAL and
-/// COI_BUFFER_PINNED buffer types are supported.
+/// [in] The type of the buffer to create. Only COI_BUFFER_NORMAL
+/// buffer type is supported.
///
/// @param in_Flags
/// [in] A bitmask of attributes for the newly created buffer.
/// @return COI_SUCCESS if the buffer was created
///
/// @return COI_NOT_SUPPORTED if the in_Type value is not COI_BUFFER_NORMAL,
-/// COI_BUFFER_PINNED, or COI_BUFFER_OPENCL.
+/// or COI_BUFFER_OPENCL.
///
/// @return COI_NOT_SUPPORTED if in_Memory is read-only memory
///
COIACCESSAPI
COIRESULT
COIBufferCreateFromMemory(
- uint64_t in_Size,
- COI_BUFFER_TYPE in_Type,
- uint32_t in_Flags,
- void* in_Memory,
- uint32_t in_NumProcesses,
- const COIPROCESS* in_pProcesses,
- COIBUFFER* out_pBuffer);
+ uint64_t in_Size,
+ COI_BUFFER_TYPE in_Type,
+ uint32_t in_Flags,
+ void *in_Memory,
+ uint32_t in_NumProcesses,
+ const COIPROCESS *in_pProcesses,
+ COIBUFFER *out_pBuffer);
//////////////////////////////////////////////////////////////////////////////
COIACCESSAPI
COIRESULT
COIBufferDestroy(
- COIBUFFER in_Buffer);
+ COIBUFFER in_Buffer);
//////////////////////////////////////////////////////////////////////////////
///
/// Note that different types of buffers behave differently when mapped.
/// For instance, mapping a COI_BUFFER_NORMAL for write must stall if the
-/// buffer is currently being written to by a run function. Mapping
-/// a COI_BUFFER_PINNED buffer will not affect other functions that use
-/// that buffer since a COI_BUFFER_PINNED buffer can be mapped at any time.
+/// buffer is currently being written to by a run function.
/// The asynchronous operation of COIBufferMap will likely be most useful when
/// paired with a COI_BUFFER_NORMAL.
///
COIACCESSAPI
COIRESULT
COIBufferMap(
- COIBUFFER in_Buffer,
- uint64_t in_Offset,
- uint64_t in_Length,
- COI_MAP_TYPE in_Type,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion,
- COIMAPINSTANCE* out_pMapInstance,
- void** out_ppData);
+ COIBUFFER in_Buffer,
+ uint64_t in_Offset,
+ uint64_t in_Length,
+ COI_MAP_TYPE in_Type,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion,
+ COIMAPINSTANCE *out_pMapInstance,
+ void **out_ppData);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIBufferUnmap(
- COIMAPINSTANCE in_MapInstance,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion);
+ COIMAPINSTANCE in_MapInstance,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIBufferGetSinkAddress(
- COIBUFFER in_Buffer,
- uint64_t* out_pAddress);
+ COIBUFFER in_Buffer,
+ uint64_t *out_pAddress);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIBufferGetSinkAddressEx(
- COIPROCESS in_Process,
- COIBUFFER in_Buffer,
- uint64_t* out_pAddress);
+ COIPROCESS in_Process,
+ COIBUFFER in_Buffer,
+ uint64_t *out_pAddress);
//////////////////////////////////////////////////////////////////////////////
///
///
/// @return COI_OUT_OF_RANGE if in_Length is 0.
///
-/// @return COI_RETRY if in_DestBuffer is mapped and is not a COI_BUFFER_PINNED
-/// buffer or COI_BUFFER_OPENCL buffer.
+/// @return COI_RETRY if in_DestBuffer is mapped and is not COI_BUFFER_OPENCL
+/// buffer.
///
COIACCESSAPI
COIRESULT
COIBufferWriteEx(
- COIBUFFER in_DestBuffer,
+ COIBUFFER in_DestBuffer,
const COIPROCESS in_DestProcess,
- uint64_t in_Offset,
- const void* in_pSourceData,
- uint64_t in_Length,
- COI_COPY_TYPE in_Type,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion);
+ uint64_t in_Offset,
+ const void *in_pSourceData,
+ uint64_t in_Length,
+ COI_COPY_TYPE in_Type,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
///
///
/// @return COI_OUT_OF_MEMORY if any allocation of memory fails
///
-/// @return COI_RETRY if in_DestBuffer is mapped and is not a COI_BUFFER_PINNED
-/// buffer or COI_BUFFER_OPENCL buffer.
+/// @return COI_RETRY if in_DestBuffer is mapped and is not
+/// a COI_BUFFER_OPENCL buffer.
///
COIACCESSAPI
COIRESULT
COIBufferWriteMultiD(
- COIBUFFER in_DestBuffer,
+ COIBUFFER in_DestBuffer,
const COIPROCESS in_DestProcess,
- uint64_t in_Offset,
- struct arr_desc* in_DestArray,
- struct arr_desc* in_SrcArray,
- COI_COPY_TYPE in_Type,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion);
+ uint64_t in_Offset,
+ struct arr_desc *in_DestArray,
+ struct arr_desc *in_SrcArray,
+ COI_COPY_TYPE in_Type,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
///
///
/// @return COI_OUT_OF_MEMORY if any allocation of memory fails
///
-/// @return COI_RETRY if in_SourceBuffer is mapped and is not a COI_BUFFER_PINNED
-/// buffer or COI_BUFFER_OPENCL buffer.
+/// @return COI_RETRY if in_SourceBuffer is mapped and is not
+/// a COI_BUFFER_OPENCL buffer.
///
COIACCESSAPI
COIRESULT
COIBufferReadMultiD(
- COIBUFFER in_SourceBuffer,
- uint64_t in_Offset,
- struct arr_desc* in_DestArray,
- struct arr_desc* in_SrcArray,
- COI_COPY_TYPE in_Type,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion);
+ COIBUFFER in_SourceBuffer,
+ uint64_t in_Offset,
+ struct arr_desc *in_DestArray,
+ struct arr_desc *in_SrcArray,
+ COI_COPY_TYPE in_Type,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
///
///
/// @return COI_OUT_OF_RANGE if in_Length is 0.
///
-/// @return COI_RETRY if in_DestBuffer is mapped and is not a COI_BUFFER_PINNED
-/// buffer or COI_BUFFER_OPENCL buffer.
+/// @return COI_RETRY if in_DestBuffer is mapped and is not
+/// a COI_BUFFER_OPENCL buffer.
///
COIACCESSAPI
COIRESULT
COIBufferWrite(
- COIBUFFER in_DestBuffer,
- uint64_t in_Offset,
- const void* in_pSourceData,
- uint64_t in_Length,
- COI_COPY_TYPE in_Type,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion);
+ COIBUFFER in_DestBuffer,
+ uint64_t in_Offset,
+ const void *in_pSourceData,
+ uint64_t in_Length,
+ COI_COPY_TYPE in_Type,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
///
///
/// @return COI_INVALID_POINTER if the in_pDestData pointer is NULL.
///
-/// @return COI_RETRY if in_SourceBuffer is mapped and is not a
-/// COI_BUFFER_PINNED buffer or COI_BUFFER_OPENCL buffer.
+/// @return COI_RETRY if in_SourceBuffer is mapped and is not
+/// a COI_BUFFER_OPENCL buffer.
///
COIACCESSAPI
COIRESULT
COIBufferRead(
- COIBUFFER in_SourceBuffer,
- uint64_t in_Offset,
- void* in_pDestData,
- uint64_t in_Length,
- COI_COPY_TYPE in_Type,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion);
+ COIBUFFER in_SourceBuffer,
+ uint64_t in_Offset,
+ void *in_pDestData,
+ uint64_t in_Length,
+ COI_COPY_TYPE in_Type,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
///
/// happen in parallel if they can be assigned to different DMA hardware.
/// So it is highly recommended to use explicit event dependencies to
/// order operations where needed.
+/// When a destroyed buffer (destination or source) is provided to the
+/// function, then behavior is unspecified.
///
/// @param in_DestBuffer
/// [in] Buffer to copy into.
/// in_NumDependencies is not 0.
///
/// @return COI_RETRY if in_DestBuffer or in_SourceBuffer are mapped and not
-/// COI_BUFFER_PINNED buffers or COI_BUFFER_OPENCL buffers.
+/// COI_BUFFER_OPENCL buffers.
///
COIACCESSAPI
COIRESULT
COIBufferCopyEx(
- COIBUFFER in_DestBuffer,
+ COIBUFFER in_DestBuffer,
const COIPROCESS in_DestProcess,
- COIBUFFER in_SourceBuffer,
- uint64_t in_DestOffset,
- uint64_t in_SourceOffset,
- uint64_t in_Length,
- COI_COPY_TYPE in_Type,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion);
+ COIBUFFER in_SourceBuffer,
+ uint64_t in_DestOffset,
+ uint64_t in_SourceOffset,
+ uint64_t in_Length,
+ COI_COPY_TYPE in_Type,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
///
/// happen in parallel if they can be assigned to different DMA hardware.
/// So it is highly recommended to use explicit event dependencies to
/// order operations where needed.
+/// When a destroyed buffer (destination or source) is provided to the
+/// function, then behavior is unspecified.
///
/// @param in_DestBuffer
/// [in] Buffer to copy into.
/// in_NumDependencies is not 0.
///
/// @return COI_RETRY if in_DestBuffer or in_SourceBuffer are mapped and not
-/// COI_BUFFER_PINNED buffers or COI_BUFFER_OPENCL buffers.
+/// COI_BUFFER_OPENCL buffers.
///
COIACCESSAPI
COIRESULT
COIBufferCopy(
- COIBUFFER in_DestBuffer,
- COIBUFFER in_SourceBuffer,
- uint64_t in_DestOffset,
- uint64_t in_SourceOffset,
- uint64_t in_Length,
- COI_COPY_TYPE in_Type,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion);
+ COIBUFFER in_DestBuffer,
+ COIBUFFER in_SourceBuffer,
+ uint64_t in_DestOffset,
+ uint64_t in_SourceOffset,
+ uint64_t in_Length,
+ COI_COPY_TYPE in_Type,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIBufferSetState(
- COIBUFFER in_Buffer,
- COIPROCESS in_Process,
- COI_BUFFER_STATE in_State,
- COI_BUFFER_MOVE_FLAG in_DataMove,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion);
+ COIBUFFER in_Buffer,
+ COIPROCESS in_Process,
+ COI_BUFFER_STATE in_State,
+ COI_BUFFER_MOVE_FLAG in_DataMove,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIBufferCreateSubBuffer(
- COIBUFFER in_Buffer,
- uint64_t in_Length,
- uint64_t in_Offset,
- COIBUFFER* out_pSubBuffer);
+ COIBUFFER in_Buffer,
+ uint64_t in_Length,
+ uint64_t in_Offset,
+ COIBUFFER *out_pSubBuffer);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIBufferReleaseRefcnt(
- COIPROCESS in_Process,
- COIBUFFER in_Buffer,
- uint64_t in_ReleaseRefcnt);
+ COIPROCESS in_Process,
+ COIBUFFER in_Buffer,
+ uint64_t in_ReleaseRefcnt);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIBufferAddRefcnt(
- COIPROCESS in_Process,
- COIBUFFER in_Buffer,
- uint64_t in_AddRefcnt);
+ COIPROCESS in_Process,
+ COIBUFFER in_Buffer,
+ uint64_t in_AddRefcnt);
#ifdef __cplusplus
} /* extern "C" */
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* @file source\COIEngine_source.h
*/
#ifndef DOXYGEN_SHOULD_SKIP_THIS
-#include <wchar.h>
-#include "../common/COITypes_common.h"
-#include "../common/COIResult_common.h"
-#include "../common/COIEngine_common.h"
+ #include <wchar.h>
+ #include "../common/COITypes_common.h"
+ #include "../common/COIResult_common.h"
+ #include "../common/COIEngine_common.h"
#endif // DOXYGEN_SHOULD_SKIP_THIS
#ifdef __cplusplus
/// The version string identifying the driver.
coi_wchar_t DriverVersion[COI_MAX_DRIVER_VERSION_STR_LEN];
- /// The ISA supported by the engine.
- COI_ISA_TYPE ISA;
+ /// The DeviceType supported by the engine.
+ COI_DEVICE_TYPE ISA;
/// The number of cores on the engine.
uint32_t NumCores;
///////////////////////////////////////////////////////////////////////////////
///
-/// Returns information related to a specified engine. Note that if Intel(R)
-/// Coprocessor Offload Infrastructure (Intel(R) COI) is unable to query
-/// a value it will be returned as zero but the call will still succeed.
+/// Returns information related to a specified engine. Note that if the runtime
+/// is unable to query a value it will be returned as zero but the call will
+/// still succeed.
///
///
/// @param in_EngineHandle
COIACCESSAPI
COIRESULT
COIEngineGetInfo(
- COIENGINE in_EngineHandle,
- uint32_t in_EngineInfoSize,
- COI_ENGINE_INFO* out_pEngineInfo);
+ COIENGINE in_EngineHandle,
+ uint32_t in_EngineInfoSize,
+ COI_ENGINE_INFO *out_pEngineInfo);
///////////////////////////////////////////////////////////////////////////////
///
-/// Returns the number of engines in the system that match the provided ISA.
+/// Returns the number of engines in the system that match the provided device type.
///
-/// Note that while it is possible to enumerate different types of Intel(R)
-/// Xeon Phi(TM) coprocessors on a single host this is not currently
-/// supported. Intel(R) Coprocessor Offload Infrastructure (Intel(R) COI)
-/// makes an assumption that all Intel(R) Xeon Phi(TM) coprocessors found
-/// in the system are the same architecture as the first coprocessor device.
+/// The number of available coprocessor devices (i.e. cards connected via PCIe)
+/// is detected by the COI runtime.
///
-/// Also, note that this function returns the number of engines that Intel(R)
-/// Coprocessor Offload Infrastructure (Intel(R) COI) is able to detect. Not
-/// all of them may be online.
-///
-/// @param in_ISA
+/// @param in_DeviceType
/// [in] Specifies the ISA type of the engine requested.
///
/// @param out_pNumEngines
///
/// @return COI_SUCCESS if the function completed without error.
///
-/// @return COI_DOES_NOT_EXIST if the in_ISA parameter is not valid.
+/// @return COI_DOES_NOT_EXIST if the in_DeviceType parameter is not valid.
///
/// @return COI_INVALID_POINTER if the out_pNumEngines parameter is NULL.
///
+/// @return COI_OUT_OF_RANGE if number of selected devices is greater than 8.
+///
COIACCESSAPI
COIRESULT
COIEngineGetCount(
- COI_ISA_TYPE in_ISA,
- uint32_t* out_pNumEngines);
+ COI_DEVICE_TYPE in_DeviceType,
+ uint32_t *out_pNumEngines);
///////////////////////////////////////////////////////////////////////////////
///
/// Returns the handle of a user specified engine.
///
-/// @param in_ISA
+/// @param in_DeviceType
/// [in] Specifies the ISA type of the engine requested.
///
/// @param in_EngineIndex
/// [in] A unsigned integer which specifies the zero-based position of
/// the engine in a collection of engines. The makeup of this
-/// collection is defined by the in_ISA parameter.
+/// collection is defined by the in_DeviceType parameter.
///
/// @param out_pEngineHandle
-/// [out] The address of an COIENGINE handle.
+/// [out] The address of a COIENGINE handle.
///
/// @return COI_SUCCESS if the function completed without error.
///
-/// @return COI_DOES_NOT_EXIST if the in_ISA parameter is not valid.
+/// @return COI_DOES_NOT_EXIST if the in_DeviceType parameter is not valid.
///
/// @return COI_OUT_OF_RANGE if in_EngineIndex is greater than or equal to
-/// the number of engines that match the in_ISA parameter.
+/// the number of engines that match the in_DeviceType parameter.
///
/// @return COI_INVALID_POINTER if the out_pEngineHandle parameter is NULL.
///
COIACCESSAPI
COIRESULT
COIEngineGetHandle(
- COI_ISA_TYPE in_ISA,
- uint32_t in_EngineIndex,
- COIENGINE* out_pEngineHandle);
+ COI_DEVICE_TYPE in_DeviceType,
+ uint32_t in_EngineIndex,
+ COIENGINE *out_pEngineHandle);
+
+///////////////////////////////////////////////////////////////////////////////
+///
+/// Returns the hostname for a specified COIEngine.
+///
+/// @param in_EngineHandle
+/// [in] The connected COI Engine Handle passed in by the user that is
+/// used to request the hostname of the device connected by this COIEngine.
+///
+/// @param out_Hostname
+/// [out] The hostname of the device connected by this COIEngine.
+/// COI will write at most 4096 bytes and the user must make sure that the size
+/// of the memory pointed by this argument is large enough.
+///
+/// @return COI_SUCCESS if the hostname was retrieved without error.
+///
+/// @return COI_ERROR if the function was unable to retrieve the hostname and/or
+/// the retrieved out_Hostname is NULL.
+///
+/// @return COI_INVALID_HANDLE if the in_EngineHandle is invalid.
+///
+/// @return COI_INVALID_POINTER if the out_Hostname is NULL.
+///
+COIACCESSAPI
+COIRESULT
+COIEngineGetHostname(
+ COIENGINE in_EngineHandle,
+ char *out_Hostname);
#ifdef __cplusplus
} /* extern "C" */
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
COIACCESSAPI
COIRESULT
COIEventWait(
- uint16_t in_NumEvents,
- const COIEVENT* in_pEvents,
- int32_t in_TimeoutMilliseconds,
- uint8_t in_WaitForAll,
- uint32_t* out_pNumSignaled,
- uint32_t* out_pSignaledIndices);
+ uint16_t in_NumEvents,
+ const COIEVENT *in_pEvents,
+ int32_t in_TimeoutMilliseconds,
+ uint8_t in_WaitForAll,
+ uint32_t *out_pNumSignaled,
+ uint32_t *out_pSignaledIndices);
COIACCESSAPI
COIRESULT
COIEventRegisterUserEvent(
- COIEVENT* out_pEvent);
+ COIEVENT *out_pEvent);
///////////////////////////////////////////////////////////////////////////////
///
/// @return COI_INVALID_HANDLE if in_Event is not a UserEvent
///
-/// @return COI_SUCCESS an event is successfully registered
+/// @return COI_SUCCESS if an event is successfully unregistered
///
COIACCESSAPI
COIRESULT
COIEventUnregisterUserEvent(
- COIEVENT in_Event);
+ COIEVENT in_Event);
//////////////////////////////////////////////////////////////////////////////
/// they can interpret it as they choose.
///
typedef void (*COI_EVENT_CALLBACK)(
- COIEVENT in_Event,
+ COIEVENT in_Event,
const COIRESULT in_Result,
- const void* in_UserData);
+ const void *in_UserData);
COIACCESSAPI
COIRESULT
COIEventRegisterCallback(
- const COIEVENT in_Event,
- COI_EVENT_CALLBACK in_Callback,
- const void* in_UserData,
- const uint64_t in_Flags);
+ const COIEVENT in_Event,
+ COI_EVENT_CALLBACK in_Callback,
+ const void *in_UserData,
+ const uint64_t in_Flags);
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
COIACCESSAPI
COIRESULT
COIPipelineCreate(
- COIPROCESS in_Process,
- COI_CPU_MASK in_Mask,
- uint32_t in_StackSize,
- COIPIPELINE* out_pPipeline);
+ COIPROCESS in_Process,
+ COI_CPU_MASK in_Mask,
+ uint32_t in_StackSize,
+ COIPIPELINE *out_pPipeline);
///////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIPipelineDestroy(
- COIPIPELINE in_Pipeline);
+ COIPIPELINE in_Pipeline);
//////////////////////////////////////////////////////////////////////////////
/// RunFunctions. Setting it incorrectly can lead to cyclic dependencies
/// and can cause the respective pipeline to stall.
/// 2. RunFunctions can also segfault if enough memory space is not available
-/// on the sink for the buffers passed in. Pinned buffers and buffers that
-/// are AddRef'd need to be accounted for available memory space. In other
+/// on the sink for the buffers passed in. Buffers that are AddRef'd
+/// need to be accounted for available memory space. In other
/// words, this memory is not available for use until it is freed up.
/// 3. Unexpected segmentation faults or erroneous behavior can occur if
/// handles or data passed in to Runfunction gets destroyed before the
/// @return COI_ARGUMENT_MISMATCH if in_pReturnValue is non-NULL but
/// in_ReturnValueLen is zero.
///
-/// @return COI_RETRY if any input buffers, which are not pinned buffers,
-/// are still mapped when passed to the run function.
+/// @return COI_RETRY if any input buffers are still mapped when
+/// passed to the run function.
///
/// @return COI_MISSING_DEPENDENCY if buffer was not created on the process
/// associated with the pipeline that was passed in.
COIACCESSAPI
COIRESULT
COIPipelineRunFunction(
- COIPIPELINE in_Pipeline,
- COIFUNCTION in_Function,
- uint32_t in_NumBuffers,
- const COIBUFFER* in_pBuffers,
- const COI_ACCESS_FLAGS* in_pBufferAccessFlags,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- const void* in_pMiscData,
- uint16_t in_MiscDataLen,
- void* out_pAsyncReturnValue,
- uint16_t in_AsyncReturnValueLen,
- COIEVENT* out_pCompletion);
+ COIPIPELINE in_Pipeline,
+ COIFUNCTION in_Function,
+ uint32_t in_NumBuffers,
+ const COIBUFFER *in_pBuffers,
+ const COI_ACCESS_FLAGS *in_pBufferAccessFlags,
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ const void *in_pMiscData,
+ uint16_t in_MiscDataLen,
+ void *out_pAsyncReturnValue,
+ uint16_t in_AsyncReturnValueLen,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
COIACCESSAPI
COIRESULT
COIPipelineGetEngine(
- COIPIPELINE in_Pipeline,
- COIENGINE* out_pEngine);
+ COIPIPELINE in_Pipeline,
+ COIENGINE *out_pEngine);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIPipelineSetCPUMask(
- COIPROCESS in_Process,
- uint32_t in_CoreID,
- uint8_t in_ThreadID,
- COI_CPU_MASK* out_pMask);
+ COIPROCESS in_Process,
+ uint32_t in_CoreID,
+ uint8_t in_ThreadID,
+ COI_CPU_MASK *out_pMask);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIPipelineClearCPUMask(
- COI_CPU_MASK* in_Mask);
+ COI_CPU_MASK *in_Mask);
#ifdef __cplusplus
} /* extern "C" */
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
COIACCESSAPI
COIRESULT
COIProcessCreateFromFile(
- COIENGINE in_Engine,
- const char* in_pBinaryName,
- int in_Argc,
- const char** in_ppArgv,
- uint8_t in_DupEnv,
- const char** in_ppAdditionalEnv,
- uint8_t in_ProxyActive,
- const char* in_Reserved,
- uint64_t in_InitialBufferSpace,
- const char* in_LibrarySearchPath,
- COIPROCESS* out_pProcess);
+ COIENGINE in_Engine,
+ const char *in_pBinaryName,
+ int in_Argc,
+ const char **in_ppArgv,
+ uint8_t in_DupEnv,
+ const char **in_ppAdditionalEnv,
+ uint8_t in_ProxyActive,
+ const char *in_Reserved,
+ uint64_t in_InitialBufferSpace,
+ const char *in_LibrarySearchPath,
+ COIPROCESS *out_pProcess);
///////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIProcessCreateFromMemory(
- COIENGINE in_Engine,
- const char* in_pBinaryName,
- const void* in_pBinaryBuffer,
- uint64_t in_BinaryBufferLength,
- int in_Argc,
- const char** in_ppArgv,
- uint8_t in_DupEnv,
- const char** in_ppAdditionalEnv,
- uint8_t in_ProxyActive,
- const char* in_Reserved,
- uint64_t in_InitialBufferSpace,
- const char* in_LibrarySearchPath,
- const char* in_FileOfOrigin,
- uint64_t in_FileOfOriginOffset,
- COIPROCESS* out_pProcess);
+ COIENGINE in_Engine,
+ const char *in_pBinaryName,
+ const void *in_pBinaryBuffer,
+ uint64_t in_BinaryBufferLength,
+ int in_Argc,
+ const char **in_ppArgv,
+ uint8_t in_DupEnv,
+ const char **in_ppAdditionalEnv,
+ uint8_t in_ProxyActive,
+ const char *in_Reserved,
+ uint64_t in_InitialBufferSpace,
+ const char *in_LibrarySearchPath,
+ const char *in_FileOfOrigin,
+ uint64_t in_FileOfOriginOffset,
+ COIPROCESS *out_pProcess);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT
COIProcessDestroy(
- COIPROCESS in_Process,
- int32_t in_WaitForMainTimeout,
- uint8_t in_ForceDestroy,
- int8_t* out_pProcessReturn,
- uint32_t* out_pTerminationCode);
+ COIPROCESS in_Process,
+ int32_t in_WaitForMainTimeout,
+ uint8_t in_ForceDestroy,
+ int8_t *out_pProcessReturn,
+ uint32_t *out_pTerminationCode);
#define COI_MAX_FUNCTION_NAME_LENGTH 256
COIACCESSAPI
COIRESULT
COIProcessGetFunctionHandles(
- COIPROCESS in_Process,
- uint32_t in_NumFunctions,
- const char** in_ppFunctionNameArray,
- COIFUNCTION* out_pFunctionHandleArray);
+ COIPROCESS in_Process,
+ uint32_t in_NumFunctions,
+ const char **in_ppFunctionNameArray,
+ COIFUNCTION *out_pFunctionHandleArray);
#if COI_LIBRARY_VERSION >= 2
/// @name COIProcessLoadLibrary* flags, named after the corresponding
COIACCESSAPI
COIRESULT
COIProcessLoadLibraryFromMemory(
- COIPROCESS in_Process,
- const void* in_pLibraryBuffer,
- uint64_t in_LibraryBufferLength,
- const char* in_pLibraryName,
- const char* in_LibrarySearchPath,
- const char* in_FileOfOrigin,
- uint64_t in_FileOfOriginOffset,
- uint32_t in_Flags,
- COILIBRARY* out_pLibrary);
+ COIPROCESS in_Process,
+ const void *in_pLibraryBuffer,
+ uint64_t in_LibraryBufferLength,
+ const char *in_pLibraryName,
+ const char *in_LibrarySearchPath,
+ const char *in_FileOfOrigin,
+ uint64_t in_FileOfOriginOffset,
+ uint32_t in_Flags,
+ COILIBRARY *out_pLibrary);
__asm__(".symver COIProcessLoadLibraryFromMemory,"
"COIProcessLoadLibraryFromMemory@COI_2.0");
#else
COIRESULT
COIProcessLoadLibraryFromMemory(
-COIPROCESS in_Process,
- const void* in_pLibraryBuffer,
- uint64_t in_LibraryBufferLength,
- const char* in_pLibraryName,
- const char* in_LibrarySearchPath,
- const char* in_FileOfOrigin,
- uint64_t in_FileOfOriginOffset,
- COILIBRARY* out_pLibrary);
+ COIPROCESS in_Process,
+ const void *in_pLibraryBuffer,
+ uint64_t in_LibraryBufferLength,
+ const char *in_pLibraryName,
+ const char *in_LibrarySearchPath,
+ const char *in_FileOfOrigin,
+ uint64_t in_FileOfOriginOffset,
+ COILIBRARY *out_pLibrary);
__asm__(".symver COIProcessLoadLibraryFromMemory,"
"COIProcessLoadLibraryFromMemory@COI_1.0");
#endif
COIACCESSAPI
COIRESULT
COIProcessLoadLibraryFromFile(
- COIPROCESS in_Process,
- const char* in_pFileName,
- const char* in_pLibraryName,
- const char* in_LibrarySearchPath,
- uint32_t in_Flags,
- COILIBRARY* out_pLibrary);
+ COIPROCESS in_Process,
+ const char *in_pFileName,
+ const char *in_pLibraryName,
+ const char *in_LibrarySearchPath,
+ uint32_t in_Flags,
+ COILIBRARY *out_pLibrary);
__asm__(".symver COIProcessLoadLibraryFromFile,"
"COIProcessLoadLibraryFromFile@COI_2.0");
#else
COIRESULT
COIProcessLoadLibraryFromFile(
- COIPROCESS in_Process,
- const char* in_pFileName,
- const char* in_pLibraryName,
- const char* in_LibrarySearchPath,
- COILIBRARY* out_pLibrary);
+ COIPROCESS in_Process,
+ const char *in_pFileName,
+ const char *in_pLibraryName,
+ const char *in_LibrarySearchPath,
+ COILIBRARY *out_pLibrary);
__asm__(".symver COIProcessLoadLibraryFromFile,"
"COIProcessLoadLibraryFromFile@COI_1.0");
#endif
COIACCESSAPI
COIRESULT
COIProcessUnloadLibrary(
- COIPROCESS in_Process,
- COILIBRARY in_Library);
+ COIPROCESS in_Process,
+ COILIBRARY in_Library);
//////////////////////////////////////////////////////////////////////////////
COIACCESSAPI
COIRESULT
COIProcessRegisterLibraries(
- uint32_t in_NumLibraries,
- const void** in_ppLibraryArray,
- const uint64_t* in_pLibrarySizeArray,
- const char** in_ppFileOfOriginArray,
- const uint64_t* in_pFileOfOriginOffSetArray);
+ uint32_t in_NumLibraries,
+ const void **in_ppLibraryArray,
+ const uint64_t *in_pLibrarySizeArray,
+ const char **in_ppFileOfOriginArray,
+ const uint64_t *in_pFileOfOriginOffSetArray);
//////////////////////////////////////////////////////////////////////////////
/// they can interpret it as they choose.
///
typedef void (*COI_NOTIFICATION_CALLBACK)(
- COI_NOTIFICATIONS in_Type,
- COIPROCESS in_Process,
- COIEVENT in_Event,
- const void* in_UserData);
+ COI_NOTIFICATIONS in_Type,
+ COIPROCESS in_Process,
+ COIEVENT in_Event,
+ const void *in_UserData);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT COIRegisterNotificationCallback(
- COIPROCESS in_Process,
- COI_NOTIFICATION_CALLBACK in_Callback,
- const void* in_UserData);
+ COIPROCESS in_Process,
+ COI_NOTIFICATION_CALLBACK in_Callback,
+ const void *in_UserData);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
COIRESULT COIUnregisterNotificationCallback(
- COIPROCESS in_Process,
- COI_NOTIFICATION_CALLBACK in_Callback);
+ COIPROCESS in_Process,
+ COI_NOTIFICATION_CALLBACK in_Callback);
//////////////////////////////////////////////////////////////////////////////
///
COIACCESSAPI
void COINotificationCallbackSetContext(
- const void* in_UserData);
+ const void *in_UserData);
/// @name COIProcessSetCacheSize flags.
const uint32_t in_HugeFlags,
const uint64_t in_SmallPagePoolSize,
const uint32_t in_SmallFlags,
- uint32_t in_NumDependencies,
- const COIEVENT* in_pDependencies,
- COIEVENT* out_pCompletion);
+ uint32_t in_NumDependencies,
+ const COIEVENT *in_pDependencies,
+ COIEVENT *out_pCompletion);
//////////////////////////////////////////////////////////////////////////////
+++ /dev/null
-/*
- * Copyright 2010-2015 Intel Corporation.
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, version 2.1.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301 USA.
- *
- * Disclaimer: The codes contained in these modules may be specific
- * to the Intel Software Development Platform codenamed Knights Ferry,
- * and the Intel product codenamed Knights Corner, and are not backward
- * compatible with other Intel products. Additionally, Intel will NOT
- * support the codes or instruction set in future products.
- *
- * Intel offers no warranty of any kind regarding the code. This code is
- * licensed on an "AS IS" basis and Intel is not obligated to provide
- * any support, assistance, installation, training, or other services
- * of any kind. Intel is also not obligated to provide any updates,
- * enhancements or extensions. Intel specifically disclaims any warranty
- * of merchantability, non-infringement, fitness for any particular
- * purpose, and any other warranty.
- *
- * Further, Intel disclaims all liability of any kind, including but
- * not limited to liability for infringement of any proprietary rights,
- * relating to the use of the code, even if Intel is notified of the
- * possibility of such liability. Except as expressly stated in an Intel
- * license agreement provided with this code and agreed upon with Intel,
- * no license, express or implied, by estoppel or otherwise, to any
- * intellectual property rights is granted herein.
- */
-/**
- * Description:
- * External APIs of MYO runtime (MYO stands for Mine, Yours and Ours).
- **/
-
-#ifndef _MYO_H_
-#define _MYO_H_
-
-#include "myotypes.h"
-#include "myoimpl.h"
-
-/** @ingroup MYO
- * @addtogroup MYO_API
-@{
-* @file myo.h
-*/
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/****************************************************************************
- Arena-based APIs
- ***************************************************************************/
-
-/*
- * Arena is a set of memory pages with the same ownership type. The ownership
- * type of all the memory pages inside the same arena can only be changed as
- * a whole. For "OURS", it is also the minimal unit of sync operations to
- * implement "release consistency".
- */
-
-/** @fn extern MyoError myoArenaCreate(MyoOwnershipType in_Type,
- * int in_Property, MyoArena *out_pArena)
- * @brief Create an arena with specified ownership type and property.
- *
- * @param in_Type Specified ownership type (MYO_ARENA_OURS or
- * MYO_ARENA_MINE).
- * @param in_Property Specified properties of the arena. Set it
- * to 0 to use default properties.
- *
- * MYO_RELEASE_CONSISTENCY or MYO_STRONG_RELEASE_CONSISTENCY
- * or MYO_STRONG_CONSISTENCY:
- *
- * Consistency modes for "OURS" arenas. For MYO_RELEASE_CONSISTENCY,
- * there are 2 functions, "Acquire" and "Release", which are
- * used for memory ordering. "Release" makes all local stores
- * prior to the release globally visible; "Acquire" syncs up the
- * local memory with all stores that have been made globally
- * visible. However, there is no definite answer as to whether
- * local stores can be globally visible before reaching a release
- * point, nor whether the newest globally visible stores can be
- * updated to local before reaching an acquire point. By using
- * MYO_STRONG_RELEASE_CONSISTENCY, the answer to these questions
- * is "no". A sequential consistency model is maintained to the
- * arena when using MYO_STRONG_CONSISTENCY.
- * MYO_RELEASE_CONSISTENCY is the default property.
- *
- * MYO_UPDATE_ON_DEMAND or MYO_UPDATE_ON_ACQUIRE:
- *
- * Only apply to "OURS" arenas with "Release Consistency".
- * MYO_UPDATE_ON_ACQUIRE means that the shared pages of this
- * arena will be updated on acquire point; MYO_UPDATE_ON_DEMAND
- * means that the shared pages will not be updated until they
- * are accessed. MYO_UPDATE_ON_DEMAND is the default property.
- *
- * MYO_RECORD_DIRTY or MYO_NOT_RECORD_DIRTY:
- *
- * This property controls whether to record dirty pages.
- * There will be runtime overhead when recording dirty pages,
- * as it can reduce the communication data. It is a trade-off
- * for performance. Also when MYO_NOT_RECORD_DIRTY is set for
- * "OURS" arena, the runtime cannot guarantee the correctness
- * when the host and card modify the same shared page between
- * the same sync segment. MYO_RECORD_DIRTY is the default
- * property.
- *
- * MYO_ONE_VERSION or MYO_MULTI_VERSION:
- *
- * Only apply to "OURS" arenas with "Release Consistency". When
- * MYO_MULTI_VERSION is set, this arena can only be "release" on
- * HOST side and "acquire" on CARD side. Releasing the arena on
- * HOST will create a new versioned data and put it into a FIFO;
- * acquiring the arena on CARD will get the versioned data
- * from the FIFO one by one. MYO_ONE_VERSION is the default
- * property.
- *
- * MYO_CONSISTENCY or MYO_NO_CONSISTENCY:
- *
- * Only apply to "OURS" arenas with "Release Consistency". When
- * MYO_NO_CONSISTENCY is set, the consistency of the arena will
- * not be maintained. That is, it is a no-op operation when
- * calling acquire/release for such arenas. MYO_CONSISTENCY is
- * the default property.
- *
- * MYO_HOST_TO_DEVICE and MYO_DEVICE_TO_HOST:
- *
- * When it is certain that there is only one communication
- * direction for this arena, it can be created with only
- * MYO_HOST_TO_DEVICE or MYO_DEVICE_TO_HOST so the runtime
- * can perform optimizations. The default property is
- * MYO_HOST_TO_DEVICE | MYO_DEVICE_TO_HOST.
- *
- * @param out_pArena Used to store the handle of the created arena.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-CILK_SHARED MyoError myoArenaCreate(MyoOwnershipType in_Type, int in_Property, MyoArena *out_pArena);
-
-/** @fn extern MyoError myoArenaDestroy(MyoArena in_Arena)
- * @brief Destroy an arena. As a result, the arena can not be
- * referred any more.
- *
- * @param in_Arena Arena handle returned by previous call to
- * myoArenaCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-CILK_SHARED MyoError myoArenaDestroy(MyoArena in_Arena);
-
-/** @fn extern void *myoArenaMalloc(MyoArena in_Arena, size_t in_Size)
- * @brief Allocates size bytes from the specified arena, and returns
- * the start address of the allocated memory. The memory is not
- * cleared.
- *
- * @param in_Arena Arena handle returned by previous call to
- * myoArenaCreate.
- * @param in_Size Size (bytes) of the required memory space.
- * @return
- * The start address of the allocated memory space.
- * NULL: Failed.
- **/
-MYOACCESSAPI
-void *myoArenaMalloc(MyoArena in_Arena, size_t in_Size);
-
-/** @fn extern void myoArenaFree(MyoArena in_Arena, void *in_pPtr)
- * @brief Frees the memory space allocated by myoArenaMalloc to the
- * specified arena.
- *
- * @param in_Arena Arena handle returned by previous call to
- * myoArenaCreate.
- * @param in_pPtr The start address of the specified memory
- * space, which must be retured by myoArenaMalloc.
- * @return
- **/
-MYOACCESSAPI
-void myoArenaFree(MyoArena in_Arena, void *in_pPtr);
-
-/** @fn extern void *myoArenaAlignedMalloc(MyoArena in_Arena,
- * size_t in_Size, size_t in_Alignment)
- * @brief Allocates size bytes from the specified arena. The
- * start address of the allocated memory will be a multiple of the
- * alignment, which must be a power of two.
- *
- * @param in_Arena Arena handle returned by previous call to
- * myoArenaCreate.
- * @param in_Size Size (bytes) of the required memory space.
- * @param in_Alignment The alignment value (must be a power
- * of two).
- * @return
- * The start address of the allocated memory space.
- * NULL: Failed.
- **/
-MYOACCESSAPI
-void *myoArenaAlignedMalloc(MyoArena in_Arena, size_t in_Size, size_t in_Alignment);
-
-/** @fn extern void myoArenaAlignedFree(MyoArena in_Arena, void *in_pPtr)
- * @brief Frees the memory space allocated by myoArenaAlignedMalloc
- * to the specified arena.
- *
- * @param in_Arena Arena handle returned by previous call to
- * myoArenaCreate.
- * @param in_pPtr The start address of the specified memory space,
- * which must be returned by myoArenaAlignedMalloc.
- * @return
- **/
-MYOACCESSAPI
-void myoArenaAlignedFree(MyoArena in_Arena, void *in_pPtr);
-
-/** @fn extern MyoError myoArenaAcquire(MyoArena in_Arena)
- * @brief myoArenaAcquire is the sync point for "OURS" arena with
- * "Release Consistency". myoArenaAcquire is used to obtain all
- * stores of "OURS" arena that have been made globally visible prior
- * to this point.
- *
- * @param in_Arena Arena handle returned by previous call to
- * myoArenaCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-CILK_SHARED MyoError myoArenaAcquire(MyoArena in_Arena);
-
-/** @fn extern MyoError myoArenaRelease(MyoArena in_Arena)
- * @brief myoArenaRelease is the sync point for "OURS" arena with
- * "Release Consistency". myoArenaRelease is used to flush all prior
- * stores of "OURS" arena to be globally visible.
- *
- * @param in_Arena Arena handle returned by previous call to
- * myoArenaCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-CILK_SHARED MyoError myoArenaRelease(MyoArena in_Arena);
-
-/** @fn extern MyoError myoArenaAcquireOwnership(MyoArena in_Arena)
- * @brief Changes the ownership type of the arena to MYO_ARENA_MINE.
- *
- * @param in_Arena Arena handle returned by previous call to
- * myoArenaCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoArenaAcquireOwnership(MyoArena in_Arena);
-
-/** @fn extern MyoError myoArenaReleaseOwnership(MyoArena in_Arena)
- * @brief Change the ownership type of the arena to MYO_ARENA_OURS.
- *
- * @param in_Arena Arena handle returned by previous call to
- * myoArenaCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoArenaReleaseOwnership(MyoArena in_Arena);
-
-/** @fn extern MyoError myoArenaGetHandle(void *in_pPtr,
- * MyoArena *out_pArena)
- * @brief Gets the arena handle of the arena that contains the memory
- * space pointed to by "in_pPtr". This API can be used when it is
- * not clear which arena handle should be used for other arena
- * related APIs.
- *
- * @param in_pPtr The start address of a chunk of memory space.
- * @param out_pArena Handle of the arena.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-CILK_SHARED MyoError myoArenaGetHandle(void *in_pPtr, MyoArena *out_pArena);
-
-/********************************************************************************
- APIs for the default arena
- *******************************************************************************/
-
-/**
- * There will be a default arena inside MYO runtime, which will be used when
- * there is no specified arena.
- **/
-
-/** @fn extern void* myoSharedMalloc(size_t in_Size)
- * @brief Allocates size bytes from the default arena, and returns the
- * start address of the allocated memory. The memory is not cleared.
- *
- @param in_Size Size (bytes) of the required memory space.
- * @return
- * The start address of the allocated memory space.
- * NULL: Failed.
- **/
-MYOACCESSAPI
-void* myoSharedMalloc(size_t in_Size);
-
-/** @fn extern void myoSharedFree(void *in_pPtr)
- * @brief Frees the memory space allocated by myoArenaMalloc to the
- * default arena.
- *
- * @param in_pPtr The start address of the specified memory space,
- * which must be retured by myoSharedMalloc.
- * @return
- **/
-MYOACCESSAPI
-void myoSharedFree(void *in_pPtr);
-
-/** @fn extern void* myoSharedAlignedMalloc(size_t in_Size,
- * size_t in_Alignment)
- * @brief Allocates size bytes from the default arena. The start
- * address of the allocated memory will be a multiple of alignment,
- * which must be a power of two.
- *
- * @param in_Size Size (bytes) of the required memory space.
- * @param in_Alignment The alignment value, which must be an power of two.
- * @return
- * The start address of the allocated memory space.
- * NULL: Failed.
- **/
-MYOACCESSAPI
-void* myoSharedAlignedMalloc(size_t in_Size, size_t in_Alignment);
-
-/** @fn extern void myoSharedAlignedFree(void *in_pPtr)
- * @brief Frees the memory space allocated by myoArenaAlignedMalloc
- * to the default arena.
- *
- * @param in_pPtr The start address of the specified memory space,
- * which must be returned by myoArenaAlignedMalloc.
- * @return
- **/
-MYOACCESSAPI
-void myoSharedAlignedFree(void *in_pPtr);
-
-/** @fn extern MyoError myoAcquire()
- * @brief myoAcquire is the sync point for the default arena with
- * "Release Consistency". myoAcquire is used to obtain all stores of
- * the default arena that have been made globally visible prior to
- * this point.
- *
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoAcquire();
-
-/** @fn extern MyoError myoRelease()
- * @brief myoRelease is the sync point for the default arena with
- * "Release Consistency". myoRelease is used to flush all prior stores
- * of the default arena to be globally visible.
- *
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoRelease();
-
-/** @fn extern MyoError myoAcquireOwnership()
- * @brief Changes the ownership type of the default arena to
- * MYO_ARENA_MINE.
- *
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoAcquireOwnership();
-
-/** @fn extern MyoError myoReleaseOwnership()
- * @brief Change the ownership type of the default arena to
- * MYO_ARENA_OURS.
- *
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoReleaseOwnership();
-
-/*****************************************************************************
- APIs for global sync operations.
- *****************************************************************************/
-
-/** @fn extern MyoError myoMutexCreate(MyoMutex *out_pMutex)
- * @brief Create a mutex and return the mutex handle.
- *
- * @param out_pMutex Used to store the handle of the created mutex.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoMutexCreate(MyoMutex *out_pMutex);
-
-/** @fn extern MyoError myoMutexLock(MyoMutex in_Mutex)
- * @brief Lock the mutex. If the mutex is already locked by other peers,
- * the call blocks until the mutex becomes available. Currently,
- * attempting to re-acquire the mutex will cause a deadlock.
- *
- * @param in_Mutex the mutex handle returned by myoMutexCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoMutexLock(MyoMutex in_Mutex);
-
-/** @fn extern MyoError myoMutexUnlock(MyoMutex in_Mutex)
- * @brief Release the locked mutex.
- * Currently, attempting to release an unlocked mutex will cause
- * undefined results.
- *
- * @param in_Mutex the mutex handle returned by myoMutexCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoMutexUnlock(MyoMutex in_Mutex);
-
-/** @fn extern MyoError myoMutexTryLock(MyoMutex in_Mutex)
- * @brief Try to lock the mutex. myoMutexTryLock is equivalent to
- * myoMutexLock, except that myoMutexLock will return immediately if
- * the mutex is already locked.
- *
- * @param in_Mutex the mutex handle returned by myoMutexCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoMutexTryLock(MyoMutex in_Mutex);
-
-/** @fn extern MyoError myoMutexDestroy(MyoMutex in_Mutex)
- * @brief Destroy the mutex.
- *
- * @param in_Mutex the mutex handle returned by myoMutexCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoMutexDestroy(MyoMutex in_Mutex);
-
-/** @fn extern MyoError myoSemCreate(int in_Value, MyoSem *out_pSem)
- * @brief Create a semaphore and return the semaphore handle.
- *
- * @param in_Value the initial value for the semaphore.
- * @param out_pSem Used to store the handle of the created semaphore.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoSemCreate(int in_Value, MyoSem *out_pSem);
-
-/** @fn extern MyoError myoSemWait(MyoSem in_Sem)
- * @brief Decrements (locks) the semaphore. If the semaphore value is
- * greater than zero, then the decrement proceeds and the function
- * returns immediately, or else the call blocks until the semaphore
- * value rises above zero.
- *
- * @param in_Sem the semaphore handle returned by myoSemCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoSemWait(MyoSem in_Sem);
-
-/** @fn extern MyoError myoSemPost(MyoSem in_Sem)
- * @brief Increments (unlocks) the semaphore. If the semaphore value
- * becomes greater than zero, one blocked myoSemWait call will be
- * notified to return.
- *
- * @param in_Sem the semaphore handle returned by myoSemCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoSemPost(MyoSem in_Sem);
-
-/** @fn extern MyoError myoSemTryWait(MyoSem in_Sem)
- * @brief Try to lock semaphore. myoSemTryWait is the same as
- * myoSemAcquire, except that if the decrement cannot be immediately
- * performed, then the call returns instead of blocking.
- *
- * @param in_Sem the semaphore handle returned by myoSemCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoSemTryWait(MyoSem in_Sem);
-
-/** @fn extern MyoError myoSemDestroy(MyoSem in_Sem)
- * @brief Destroy the semaphore.
- *
- * @param in_Sem the semaphore handle returned by myoSemCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoSemDestroy(MyoSem in_Sem);
-
-/** @fn extern MyoError myoBarrierCreate(int in_Count, MyoBarrier *out_pBarrier)
- * @brief Create a barrier and return the barrier handle.
- *
- * @param in_Count the number of threads that must call
- * myoBarrierWait before any of them successfully return.
- * @param out_pBarrier Used to store the handle of the created
- * barrier.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoBarrierCreate(int in_Count, MyoBarrier *out_pBarrier);
-
-/** @fn extern MyoError myoBarrierWait(MyoBarrier in_Barrier)
- * @brief The caller will block until the required number of threads
- * have called myoBarrierWait with the same barrier handle.
- *
- * @param in_Barrier the barrier handle returned by myoBarrierCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoBarrierWait(MyoBarrier in_Barrier);
-
-/** @fn extern MyoError myoBarrierDestroy(MyoBarrier in_Barrier)
- * @brief Destroy the barrier.
- *
- * @param in_Barrier the barrier handle returned by myoBarrierCreate.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoBarrierDestroy(MyoBarrier in_Barrier);
-
-/*****************************************************************************
- MISC APIs.
- *****************************************************************************/
-
-/**
- * @cond INCLUDE_MYO_INTERNAL_DOCUMENTATION
- **/
-
-MYOACCESSAPI
-int myoMyId();
-/* int myoNumNodes() returns the number of peers, minus one, to
- equal the number of cards in the system. */
-MYOACCESSAPI
-int myoNumNodes();
-
-MYOACCESSAPI
-unsigned long long myoTicks();
-MYOACCESSAPI
-unsigned long long myoWallTime();
-MYOACCESSAPI
-void myoStatOn();
-MYOACCESSAPI
-void myoStatOff();
-
-/** @fn extern MyoError myoGetMemUsage(uint64 *out_memUsedMB)
- * @brief Retrieves the amount of shared memory currently used.
- * myoGetMemUsage() fills in out_memUsedMB when the pointer is not NULL.
- *
- * @param out_memUsedBytes, pointer to the current size shared memory used.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoGetMemUsage(unsigned int *out_memUsedMB);
-
-/** @fn extern MyoError myoHTimeOn(int in_On)
- * @brief Toggle MYO HTime report feature on/off.
- *
- * @param in_On: 1 turn on MYO HTime report
- * 0 turn off MYO HTime report
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-extern MyoError myoHTimeOn(int in_On);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
- /**
- * @endcond
- **/
-
-/*! @} */
+++ /dev/null
-/*
- * Copyright 2010-2015 Intel Corporation.
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, version 2.1.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301 USA.
- *
- * Disclaimer: The codes contained in these modules may be specific
- * to the Intel Software Development Platform codenamed Knights Ferry,
- * and the Intel product codenamed Knights Corner, and are not backward
- * compatible with other Intel products. Additionally, Intel will NOT
- * support the codes or instruction set in future products.
- *
- * Intel offers no warranty of any kind regarding the code. This code is
- * licensed on an "AS IS" basis and Intel is not obligated to provide
- * any support, assistance, installation, training, or other services
- * of any kind. Intel is also not obligated to provide any updates,
- * enhancements or extensions. Intel specifically disclaims any warranty
- * of merchantability, non-infringement, fitness for any particular
- * purpose, and any other warranty.
- *
- * Further, Intel disclaims all liability of any kind, including but
- * not limited to liability for infringement of any proprietary rights,
- * relating to the use of the code, even if Intel is notified of the
- * possibility of such liability. Except as expressly stated in an Intel
- * license agreement provided with this code and agreed upon with Intel,
- * no license, express or implied, by estoppel or otherwise, to any
- * intellectual property rights is granted herein.
- */
-/**
- Description:
- Define APIs of MYO for compiler or pre-processor to transfer original programs.
- */
-
-#ifndef _MYO_IMPL_H_
-#define _MYO_IMPL_H_
-
-/** @ingroup MYO
- * @addtogroup MYOIMPL_API
-@{
-* @file myoimpl.h
-*/
-
-#ifdef __cplusplus
-extern "C" {
-#define EXTERN_C extern "C"
-#else
-#define EXTERN_C /* nothing */
-#endif
-
-# define MYOACCESSAPI /* nothing */
-
-#ifdef DEFINE_ARENA_API_CILK_SHARED
-#define CILK_SHARED _Cilk_shared
-#else
-#define CILK_SHARED /* nothing */
-#endif
-
-/* **************************************************************************** *\
- APIs to enable functions being remotely called
-\* **************************************************************************** */
-
-typedef void *(*MyoiRemoteFuncType)(void *);
-
-/** @fn extern MyoError myoiRemoteFuncRegister(MyoiRemoteFuncType in_pFuncAddr,
- * const char *in_pFuncName)
- * @brief Register a function so that it can be remotely called. This should be
- * done in myoiUserInit or before calling myoiLibInit. After myoiLibInit,
- * there will be a table on all peers, which contains the information for
- * all remotely callable functions.
- *
- * @param in_pWrapFuncAddr address of the wrapper function.
- * @param in_pFuncName name of the function.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-
-MYOACCESSAPI
-MyoError myoiRemoteFuncRegister(MyoiRemoteFuncType in_pFuncAddr,
- const char *in_pFuncName);
-/** @fn extern MyoError myoiRemoteFuncLookupByName(char *in_pFuncName,
- * MyoiRemoteFuncType *out_pWrapFuncAddr)
- * @brief Get the address of the wrapper function by looking up the table
- * by name. This API can be used when assigning a function pointer to
- * remotely callable functions.
- *
- * @param in_pFuncName name of the function.
- * @param out_pWrapFuncAddr address of the wrapper function.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI MyoError myoiRemoteFuncLookupByName(char *in_pFuncName,
- MyoiRemoteFuncType *out_pWrapFuncAddr);
-
-/** @fn extern MyoError myoiRemoteFuncLookupByAddr(MyoiRemoteFuncType
- * in_pWrapFuncAddr,char **out_pFuncName)
- * @brief Get the name of a remote function by looking up the table by
- * the address. This API can be used when calling a remotely callable
- * function by a function pointer.
- *
- * @param in_pWrapFuncAddr address of the function.
- * @param out_pFuncName name of the function.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI MyoError myoiRemoteFuncLookupByAddr(MyoiRemoteFuncType in_pWrapFuncAddr,
- char **out_pFuncName);
-
-//! Host Side Shared Function Pointer Entry Struct
-typedef struct {
- //! Function Name
- const char *funcName;
- //! Function Address
- void *funcAddr;
- //! Local Thunk Address
- void *localThunkAddr;
-} MyoiHostSharedFptrEntry;
-
-//! Target Side Shared Function Pointer Entry Struct
-typedef struct {
- //! Function Name
- const char *funcName;
- //! Function Address
- void *funcAddr;
- //! Wrap Function Address
- void *wrapFuncAddr;
- //! Locak Thunk Address
- void *localThunkAddr;
-} MyoiTargetSharedFptrEntry;
-
-/**
- * @cond INCLUDE_MYO_INTERNAL_DOCUMENTATION
- * @fn extern MyoError myoiHostFptrTableRegister(void *in_pAddrOfFptrTable,
- * int in_NumEntry, int in_Ordered)
- * @brief Register shared functions on host side. A 16 byte thunk will be
- * allocated for each function entry in non-coherent shared memory. The
- * thunk will contain a jump instruction to the local version of the
- * shared function, which is provided by the second item of the function
- * entry. Also, the address of the thunk will be stored to the 3rd item
- * of the function entry for Compiler usage.
- *
- * @param in_pAddrOfFptrTable start address of the shared function
- * table, assuming it follows the format of MyoiHostSharedFptrEntry.
- * @param in_NumEntry number of entry in the table.
- * @param in_Ordered whether the table is ordered by function name.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoiHostFptrTableRegister(
- void *in_pAddrOfFptrTable, int in_NumEntry, int in_Ordered);
-
-/** @fn extern MyoError myoiTargetFptrTableRegister(void *in_pAddrOfFptrTable,
- * int in_NumEntry, int in_Ordered)
- * @brief Register shared functions on target side. This function is the
- * same as myoiHostFptrTableRegister, except it does not need to allocate
- * thunks from non-coherent shared memory for each function entry, but
- * instead looks up this information from a table retrieved from the
- * Host side.
- *
- * @param in_pAddrOfFptrTable start address of the shared function
- * table, assuming it follows the format of MyoiTargetSharedFptrEntry.
- * @param in_NumEntry number of entry in the table.
- * @param in_Ordered whether the table is ordered by function name.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-extern MyoError myoiTargetFptrTableRegister(
- void *in_pAddrOfFptrTable, int in_NumEntry, int in_Ordered);
-/**
- * @endcond
- **/
-
-/* *************************************************************************** *\
- APIs for remote function calls
-\* *************************************************************************** */
-
-typedef void * MyoiRFuncCallHandle;
-
-/** @fn extern MyoiRFuncCallHandle myoiRemoteCall(char *in_pFuncName,
- * void *in_pArgs, int in_deviceNum)
- * @brief Call a remote callable function. If there are multiple arguments
- * for the function, pack them to a shared buffer beforehand and take the
- * address of the shared buffer as this function. After receiving the call
- * requests from other peers, the arguments should be unpacked from the
- * shared buffer before calling the target function. The shared buffer
- * can also be used to store the return value of the function.
- *
- * @param in_pFuncName name of the function.
- * @param in_pArgs address of the shared buffer.
- * @return
- * Handle used to check the result.
- **/
-MYOACCESSAPI
-MyoiRFuncCallHandle myoiRemoteCall(const char *in_pFuncName, void *in_pArgs, int in_deviceNum);
-
-/**
- * @cond INCLUDE_MYO_INTERNAL_DOCUMENTATION
- * @fn extern MyoError myoiRemoteThunkCall(void *in_funcThunkAddr, void *in_pArgs, int in_deviceNum)
- * @brief Call a remote callable function. If there are multiple arguments for
- * the function, pack them to a shared buffer beforehand and take the address
- * of the shared buffer as this function. After receiving the call requests
- * from other peers, the arguments should be unpacked from the shared buffer
- * before calling the target function. The shared buffer can also be used to
- * store the return value of the function.
- *
- * @param in_funcThunkAddr pointer to function thunk in the non-coherent
- * shared memory.
- * @param in_pArgs address of the shared buffer.
- * @PARAM in_deviceNum: device ID (0-N-1) for the MIC device to run
- * function call. -1 request causes MYO to schedule an available device.
- * For RPC from device to host, in_deviceNum should always be -1.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoiRemoteThunkCall(void *in_funcThunkAddr, void *in_pArgs, int in_deviceNum);
-/**
- * @endcond
- **/
-
-/** @fn extern MyoError myoiCheckResult(MyoiRFuncCallHandle in_Handle)
- * @brief Check whether the remote call is done.
- *
- * @param in_Handle handle of the remote call.
- * @return
- * MYO_SUCCESS (done); or
- * an error number to indicate the error.
- **/
-extern MyoError myoiCheckResult(MyoiRFuncCallHandle in_Handle);
-
-/** @fn extern MyoError myoiGetResult(MyoiRFuncCallHandle in_Handle)
- * @brief Wait till the remote call is done.
- *
- * @param in_Handle handle of the remote call.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoiGetResult(MyoiRFuncCallHandle in_Handle);
-
-/* **************************************************************************** *\
- APIs related with shared variables.
-\* **************************************************************************** */
-
-/*
- * It is Compiler's responsibility to make sure all shared variables
- * located in shared memory space and have the same address in all sides.
- * However, it is hard for Compiler to do this. So we propose a solution
- * which is changing the definition of all shared variables and accessing
- * them indirectly, and making sure all shared variables pointing
- * to the same shared space on all sides. For example,
- *
- * "shared int a;" is changed to "shared int *a";
- * Also all the accesses to "a" is change to "*a".
- *
- * We suppose the shared memory for each shared variable is allocated on
- * host side by Compiler. For the upper example, Compiler can call:
- *
- * a = (shared int *) myoSharedMalloc(sizeof(shared int));
- *
- * Now the issue is how to make "a" on other sides also pointing to the
- * same shared memory on other sides. We provide two methods to do this.
- * They can be used in a hybrid way.
- */
-
-/*
- * The first method is calling myoiVarRegister for each shared variable
- * on all sides in myoiUserInit. On host side, we will get a table containing
- * a table containing the shared address and name of each shared variable.
- * After calling myoiUserInit, we will propagate the table to other sides.
- * On card side, after getting the table from host, myoiUserInit is called.
- * When calling myoiVarRegister in myoiUserInit, we will make local pointer
- * of each shared variable pointing to the same shared memory with the local
- * pointer on host side of the same shared variable pointing to.
- */
-
-/*
- * The second method suppose that Compiler already have a table on all sides.
- * On host side, the table contains the name and the shared address of each
- * shared variable. On card side, the table contains the name of each shared
- * variable and the address of the local pointer which will pointing to shared
- * memory space.
- *
- * On host side, Compiler generates a call to myoiHostVarTablePropagate
- * after initializing MYO runtime and making the host side table ready.
- * On card side, Compiler uses myoiMicVarTableRegister to tell
- * the runtime where the card side table is.
- *
- * Since there may be multiple libraries on card side for the same application,
- * myoiHostVarTablePropagate and myoiMicVarTableRegister can be called multiple
- * times and called simultaneously.
- *
- * Inside runtime, the local pointer of the same shared variable on all sides
- * will be make sure pointing to the same shared space by using the information
- * of the Compiler provided tables.
- */
-
-/*
- * Comipler knows the following two structures to make sure the var table
- * has the following format.
- */
-
-/*
- * This is structure of the Shared var table entry. This table contains
- * the shared address and name of each shared variable
- */
-
-/** @fn extern MyoError myoiVarRegister(void *in_pAddrOfLocalPtrToShared, char *in_pSVarName)
- * @brief Register shared variables. Call it on all sides in myoiUserInit.
- * On host side, make sure calling it after allocating shared memory for
- * the shared variables by calling myoSharedMalloc.
- *
- * @param in_pAddrOfLocalPtrToShared the address assigned by the compiler
- * for the shared variable, which is the address of a local pointer,
- * pointing to shared memory space.
- * @param in_pSVarName name of shared variable.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoiVarRegister(
- void *in_pAddrOfLocalPtrToShared, const char *in_pSVarName);
-
-/*
- * Compiler knows the following two structures to make sure the var table
- * has the following format.
- */
-
-/**
- * This is structure of the Shared var table entry. This table contains
- * the shared address and name of each shared variable
- **/
-typedef struct {
- //! Variable Name
- const char *varName;
- //! Shared Address
- void *sharedAddr;
-} MyoiSharedVarEntry;
-
-//! Structure of the var table entry on host
-typedef struct {
- //! Variable Name
- const char *varName;
- //! Variable Size
- int size;
- //! Local pointer to Shared var
- void *ptrToLocalPtrToShared;
-} MyoiHostSharedVarEntry;
-
-//! Structure of the var table entry on card
-typedef struct {
- //! Variable Name
- const char *varName;
- //! Local pointer to Shared var
- void *ptrToLocalPtrToShared;
-} MyoiMicSharedVarEntry;
-
-/** @fn extern MyoError myoiHostVarTablePropagate(void *in_pAddrOfSVarTable, int in_NumEntry)
- * @brief Send the host side var table to the card side. Card side will also
- * have a copy of the host side var table after this propagation, although it
- * is in an internal format different than the original host side var table,
- * due to implementation convenience.
- *
- * @param in_pAddrOfSVarTable start address of the host side var table,
- * assuming it follows the format of MyoiSharedVarEntry.
- * @param in_NumEntry number of entry in the table.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI MyoError myoiHostVarTablePropagate(
- void *in_pAddrOfSVarTable, int in_NumEntry);
-
-/**
- * @cond INCLUDE_MYO_INTERNAL_DOCUMENTATION
- * @fn extern MyoError myoiMicVarTableRegister(void *in_pAddrOfSVarTable, int in_NumEntry)
- * @brief Tell the runtime where the card side table is.
- *
- * @param in_pAddrOfSVarTable start address of the card side var
- * table, assuming it follows the format of MyoiMicSharedVarEntry.
- * @param in_NumEntry number of entry in the table.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-extern MyoError myoiMicVarTableRegister(
- void *in_pAddrOfSVarTable, int in_NumEntry);
-/**
- * @endcond
- **/
-
-/** @fn MyoError myoiHostSharedMallocTableRegister(void *in_pAddrOfSVarTable, int in_NumEntry, int in_Ordered)
- * @brief Allocate shared memory for all shared variables in the table.
- * Also update local address of the shared variable with new shared address.
- *
- * @param in_pAddrOfSVarTable start address of the shared variable table,
- * assuming it follows the format of MyoiHostSharedVarEntry.
- * @param in_NumEntry number of entry in the table.
- * @param in_Ordered whether the table ordered by name.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoiHostSharedMallocTableRegister(
- void *in_pAddrOfSVarTable, int in_NumEntry, int in_Ordered);
-
-/** @fn extern MyoError myoiTargetSharedMallocTableRegister(void *in_pAddrOfSVarTable, int in_NumEntry, int in_Ordered)
- * @brief Register the shared variables on the target side.
- *
- * @param in_pAddrOfSVarTable start address of the shared varaible table,
- * assuming it follows the format of MyoiMicSharedVarEntry.
- * @param in_NumEntry number of entry in the table.
- * @param in_Ordered whether the table ordered by name.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-extern MyoError myoiTargetSharedMallocTableRegister(
- void *in_pAddrOfSVarTable, int in_NumEntry, int in_Ordered);
-
-/** @fn MyoError myoiLibInit(void * in_args, void (*userInitFunc))
- * @brief Init entry of the MYO library responsible for initializing
- * the runtime.
- *
- * @param in_args mechanism to pass arguments to the Initialization
- * routine. The default value of NULL would mean the host is blocked
- * on the completion of myoiLibInit() on all nodes. A subset of the
- * installed cards can be intialized by passing an array of
- * MyoiUserParams. For example, in a system with two cards, to run a
- * MYO application only on the second card, intialize the array as
- * follows:
- * @code
- * MyoiUserParams UserParas[64];
- * UserParas[0].type = MYOI_USERPARAMS_DEVID;
- * UserParas[0].nodeid = 2;
- * UserParas[1].type = MYOI_USERPARAMS_LAST_MSG;
- * if(MYO_SUCCESS != myoiLibInit(&UserParas, (void*)&myoiUserInit)) {
- * printf("Failed to initialize MYO runtime\n");
- * return -1;
- * }
- * @endcode
- * This intialization is required only in the client/host side
- * of the application. The server/card side executable should be
- * executed only on the second card in this case.
- *
- * Another capability for the MyoiUserParams structure in MYO is specifying
- * a remote procedure call to be executed on the host or card, immediately after
- * myoiLibInit() completes. This capability is useful because some calls in
- * MYO return immediately, but do not actually complete until after the MYO
- * library is completely initialized on all peers. An example follows,
- * showing how to cause MYO to execute the registered function named
- * "PostMyoLibInitFunction" on the first card only:
- * @code
- * MyoiUserParams UserParas[64];
- * UserParas[0].type = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
- * UserParas[0].nodeid = 1;
- * SetPostLibInitFuncName(UserParas[1], "PostMyoLibInitFunction");
- * UserParas[2].type = MYOI_USERPARAMS_LAST_MSG;
- * if(MYO_SUCCESS != myoiLibInit(&UserParas, (void*)&myoiUserInit)) {
- * printf("Failed to initialize MYO runtime\n");
- * return -1;
- * }
- * @endcode
- *
- * Note, to cause PostMyoLibInitFunction to be executed on ALL cards,
- * specify: MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES for the nodeid.
- * That is:
- * @code
- * UserParas[0].nodeid = MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES;
- * @endcode
- *
- * @param userInitFunc Shared variables and remote functions are
- * registered in this routine, which is called by the runtime during
- * library initialization.
- * @return
- * MYO_SUCCESS;
- * MYO_ERROR;
- **/
-MYOACCESSAPI
-MyoError myoiLibInit(void * in_args, void *userInitFunc /*userInitFunc must be: MyoError (*userInitFunc)(void) */);
-
-/** @fn extern MyoError myoiSupportsFeature(MyoFeatureType myoFeature)
- * @brief Supports runtime query to determine whether a feature is supported
- * by the myo that is installed on the system. This function is intended to
- * support client code to query the myo library to determine whether its set
- * of capabilities are able to support the client's needs.
- *
- * @param myoFeature The feature that is to be inquired about.
- * @return
- * MYO_SUCCESS; if the feature is supported.
- * MYO_FEATURE_NOT_IMPLEMENTED if the feature is not supported.
- *
- * (For more information, please also see the declaration of the MyoFeatureType enum declaration.)
- **/
-MYOACCESSAPI
-MyoError myoiSupportsFeature(MyoFeatureType myoFeature);
-
-/** @fn void myoiLibFini()
- * @brief Finalize the MYO library, all resources held by the runtime are
- * released by this routine.
- *
- * @return
- **/
-MYOACCESSAPI
-void myoiLibFini();
-
-/* *************************************************************************** *\
- APIs to set shared memory space consistent/non-consistent.
-\* *************************************************************************** */
-
-/** @fn extern MyoError myoiSetMemNonConsistent(void *in_pAddr, size_t in_Size)
- * @brief Set part of the shared memory space to be non-consistent, which
- * means that the consistency of this part of shared memory space does not
- * need to be maintained between HOST and cards.
- *
- * @param in_pAddr The start address of the specified shared memory space;
- * @param in_Size The size of the specified shared memory space;
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoiSetMemNonConsistent(void *in_pAddr, size_t in_Size);
-
-/** @fn extern MyoError myoiSetMemConsistent(void *in_pAddr, size_t in_Size)
- * @brief Set part of the shared memory space to be consistent, which
- * means that the consistency of this part of shared memory space needs
- * to be maintained between HOST and cards.
- *
- * @param in_pAddr The start address of the specified shared
- * memory space.
- * @param in_size The size of the specified shared memory space.
- * @return
- * MYO_SUCCESS; or
- * an error number to indicate the error.
- **/
-MYOACCESSAPI
-MyoError myoiSetMemConsistent(void *in_pAddr, size_t in_Size);
-
-/* A collection of external data symbols */
-EXTERN_C MYOACCESSAPI unsigned int myoiMyId; /* MYO_MYID if on accelerators */
-EXTERN_C MYOACCESSAPI volatile int myoiInitFlag;
-
- //! Structure of the array element that is passed to myoiLibInit() to initialize a subset of the available cards, or
- //! to specify a remote call function to be called after successful myo library initialization:
-typedef struct {
- //!type = MYOI_USERPARAMS_DEVID or MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC for each element in the array except
- //!the last element, type should be: MYOI_USERPARAMS_LAST_MSG.
- int type;
- //! nodeid refers to the 'one-based' card index. Specifying, 1 represents the first card, mic0, 2 represents the
- // second card, mic1, 3 represents the third card, mic2, ....).
- // NOTE: for type == MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC, specifying MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES
- // for nodeid, will execute the named function, on each card in the system, mic0, mic1, mic2, .... micn.
- int nodeid;
-} MyoiUserParams;
-
-//!The following two types are dealt with entirely with just one MyoiUserParams structure:
-//!MYOI_USERPARAMS_DEVID maps node ids.
-#define MYOI_USERPARAMS_DEVID 1
-//!MYOI_USERPARAMS_LAST_MSG terminates the array of MyoiUserParams.
-#define MYOI_USERPARAMS_LAST_MSG -1
-
-//!The following type requires setting the node id in a MyoiUserParams structure, and then following the struct
-//!with a MyoiUserParamsPostLibInit union:
-#define MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC 2
-//!nodeid can be one of the following macros, or a number >=1, corresponding to the card number (1 == mic0,
-//!2 == mic1, 3 == mic2, ....)
-//!Setting nodeid to MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES causes the function to be called on all
-//!cards:
-#define MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES 0
-//!Setting nodeid to MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE causes the function to be called on the
-//!host instead of the card:
-#define MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE -1
-
-//!The postLibInit union contains two members that serves two different purposes:
-//!1. It can be used to stipulate the name of the function to be remotely called from host to card, on successful
-//!myo library initialization, (member postLibInitRemoveFuncName) using the type:
-//!MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC. OR
-//!2. It can be an actual function pointer (member name: postLibInitHostFuncAddress) that will be called on the host,
-//!on successful myo library initialization, using the type: MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC, with nodeid:
-//!MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE
-typedef union {
- const char *postLibInitRemoveFuncName;
- void (*postLibInitHostFuncAddress)(void);
-} MyoiUserParamsPostLibInit;
-
-/* These are two macros to help get the information in a MyoiUserParamsPostLibInit union from a MyoiUserParams struct; */
-#define GetPostLibInitFuncName(USERPARAMS) ((MyoiUserParamsPostLibInit *) (& (USERPARAMS)))->postLibInitRemoveFuncName
-#define GetPostLibInitFuncAddr(USERPARAMS) ((MyoiUserParamsPostLibInit *) (& (USERPARAMS)))->postLibInitHostFuncAddress
-
-/* These are two macros to help set the information in a MyoiUserParamsPostLibInit union from a MyoiUserParams struct; */
-#define SetPostLibInitFuncName(USERPARAMS,FUNC_NAME) GetPostLibInitFuncName(USERPARAMS) = FUNC_NAME
-#define SetPostLibInitFuncAddr(USERPARAMS,FUNC_ADDR) GetPostLibInitFuncAddr(USERPARAMS) = FUNC_ADDR
-
-#ifdef __cplusplus
-}
-#endif
-#endif // _MYO_IMPL_H_
-/*! @} */
+++ /dev/null
-/*
- * Copyright 2010-2015 Intel Corporation.
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, version 2.1.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301 USA.
- *
- * Disclaimer: The codes contained in these modules may be specific
- * to the Intel Software Development Platform codenamed Knights Ferry,
- * and the Intel product codenamed Knights Corner, and are not backward
- * compatible with other Intel products. Additionally, Intel will NOT
- * support the codes or instruction set in future products.
- *
- * Intel offers no warranty of any kind regarding the code. This code is
- * licensed on an "AS IS" basis and Intel is not obligated to provide
- * any support, assistance, installation, training, or other services
- * of any kind. Intel is also not obligated to provide any updates,
- * enhancements or extensions. Intel specifically disclaims any warranty
- * of merchantability, non-infringement, fitness for any particular
- * purpose, and any other warranty.
- *
- * Further, Intel disclaims all liability of any kind, including but
- * not limited to liability for infringement of any proprietary rights,
- * relating to the use of the code, even if Intel is notified of the
- * possibility of such liability. Except as expressly stated in an Intel
- * license agreement provided with this code and agreed upon with Intel,
- * no license, express or implied, by estoppel or otherwise, to any
- * intellectual property rights is granted herein.
- */
-/**
- Description: Define the types used by APIs of MYO programming.
- */
-
-#ifndef _MYO_TYPES_H_
-#define _MYO_TYPES_H_
-
-#include <string.h> /* For size_t */
-
-/** @ingroup MYO
- * @addtogroup MYOTYPES
-@{
-* @file myotypes.h
-*/
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*! MYO Status
- */
-typedef enum {
- MYO_SUCCESS = 0, /*!< Success */
- MYO_ERROR, /*!< Error */
-
- MYO_INVALID_ENV, /*!< Invalid Env */
- MYO_INVALID_ARGUMENT, /*!< Invalid Argument */
-
- MYO_NOT_INITIALIZED, /*!< Not Initialized */
- MYO_ALREADY_FINALIZED,/*!< Already Finalized */
-
- MYO_BUF_ERROR, /*!< Buffer Error */
- MYO_OUT_OF_RANGE, /*!< Out of Range */
- MYO_OUT_OF_MEMORY, /*!< Out of Memory */
-
- MYO_ALREADY_EXISTS, /*!< Already Exists */
-
- MYO_EOF, /*!< EOF */
- MYO_FEATURE_NOT_IMPLEMENTED = -1, /*!< Feature not implemented (see myoiSupportsFeature(). */
-} MyoError;
-
-
-/*! Arena Ownership */
-typedef enum {
- MYO_ARENA_MINE = 1, /*!< Arena MINE Ownership */
- MYO_ARENA_OURS, /*!< Arena OURS Ownership */
-} MyoOwnershipType;
-
- /*! MYO Features */
-typedef enum {
- /*!< EVERY VALUE that is less than MYO_FEATURE_BEGIN is not implemented. */
- MYO_FEATURE_BEGIN = 1, /*!< The first feature that is supported. */
- MYO_FEATURE_POST_LIB_INIT = MYO_FEATURE_BEGIN, /*!< Allows specifying a function to be executed immediately */
- /* after myoiLibInit() completes. This feature was implemented in version */
- /* 3.3 of MPSS. */
- /* MYO_FEATURE_FUTURE_CAPABILITY = 2, at some time in the future, as new features are added to MYO, new enumeration constants */
- /* will be added to the MyoFeatureType, and the value of the new enumeration constant will be greater */
- /* than the current value of MYO_FEATURE_LAST constant, and then the MYO_FEATURE_LAST constant too, */
- /* will be changed to be the value of the new enumeration constant. For example, in April, 2014, */
- /* the POST_LIB_INIT feature was implemented in version 3.3 of MPSS, and the MYO_FEATURE_BEGIN */
- /* enumeration constant is the same as the MYO_FEATURE_LAST enumeration constant, and both are equal */
- /* to 1. */
- /* Suppose in December, 2014, a new feature is added to the MYO library, for version 3.4 of MPSS. */
- /* Then, MYO_FEATURE_BEGIN enumeration constant will be still the value 1, but the MYO_FEATURE_LAST */
- /* enumeration constant will be set to 2. */
- /* At runtime, one client binary can determine if the MYO that is installed is capable of any */
- /* capability. For example, suppose a future client binary queries version 3.3 of MYO if it is */
- /* capable of some future feature. Version 3.3 of MYO will indicate that the feature is not */
- /* implemented to the client. But, conversely, suppose the future client queries version 3.4 of MYO */
- /* if it is capable of some future feature. Version 3.4 of MYO will indicate that the feature isd */
- /* supported. */
- /* */
- /* Date: | MYO_FEATURE_BEGIN: | MYO_FEATURE_LAST: | MPSS VERSION: | myoiSupportsFeature(MYO_FEATURE_FUTURE_CAPABILITY) */
- /* ---------------+---------------------+--------------------+---------------+--------------------------------------------------- */
- /* April, 2014 | 1 | 1 | 3.3 | MYO_FEATURE_NOT_IMPLEMENTED */
- /* December, 2014 | 1 | 2 | 3.4 | MYO_SUCCESS */
- /* ---------------+---------------------+--------------------+---------------+--------------------------------------------------- */
- MYO_FEATURE_LAST = MYO_FEATURE_POST_LIB_INIT, /*!< The last feature that is supported. */
- /*!< EVERY VALUE that is greater than MYO_FEATURE_LAST is not implemented. */
- /*!< EVERY VALUE that is greater than or equal to MYO_FEATURE_BEGIN AND less than or equal to MYO_FEATURE_LAST is implemented. */
-} MyoFeatureType; /* (For more information, please also see myoiSupportsFeature() function declaration.) */
-
-/*************************************************************
- * define the property of MYO Arena
- ***********************************************************/
-#define MYO_CONSISTENCY_MODE 0x3
-#define MYO_RELEASE_CONSISTENCY 0x1
-#define MYO_STRONG_RELEASE_CONSISTENCY 0x2
-#define MYO_STRONG_CONSISTENCY 0x3
-#define MYO_UPDATE_ON_DEMAND 0x8
-#define MYO_UPDATE_ON_ACQUIRE 0x10
-#define MYO_RECORD_DIRTY 0x20
-#define MYO_NOT_RECORD_DIRTY 0x40
-#define MYO_ONE_VERSION 0x80
-#define MYO_MULTI_VERSIONS 0x100
-#define MYO_CONSISTENCY 0x200
-#define MYO_NO_CONSISTENCY 0x400
-#define MYO_HOST_TO_DEVICE 0x800
-#define MYO_DEVICE_TO_HOST 0x1000
-#define MYO_HYBRID_UPDATE 0x2000
-typedef unsigned int MyoArena;
-
-typedef void * MyoMutex;
-typedef void * MyoSem;
-typedef void * MyoBarrier;
-
-
-#ifdef __cplusplus
-}
-#endif
-#endif // _MYO_TYPES_H_
-/*! @} */
plugin_include_HEADERS = main_target_image.h
AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir)
AM_CXXFLAGS = $(CXXFLAGS)
- AM_LDFLAGS = -L$(liboffload_dir)/.libs -L$(libgomp_dir)/.libs -loffloadmic_target -lcoi_device -lmyo-service -lgomp -rdynamic
+ AM_LDFLAGS = -L$(liboffload_dir)/.libs -L$(libgomp_dir)/.libs -loffloadmic_target -lcoi_device -lgomp -rdynamic
endif
main_target_image.h: offload_target_main
@PLUGIN_HOST_FALSE@plugin_include_HEADERS = main_target_image.h
@PLUGIN_HOST_FALSE@AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir)
@PLUGIN_HOST_FALSE@AM_CXXFLAGS = $(CXXFLAGS)
-@PLUGIN_HOST_FALSE@AM_LDFLAGS = -L$(liboffload_dir)/.libs -L$(libgomp_dir)/.libs -loffloadmic_target -lcoi_device -lmyo-service -lgomp -rdynamic
+@PLUGIN_HOST_FALSE@AM_LDFLAGS = -L$(liboffload_dir)/.libs -L$(libgomp_dir)/.libs -loffloadmic_target -lcoi_device -lgomp -rdynamic
# Work around what appears to be a GNU make bug handling MAKEFLAGS
# values defined in terms of make variables, as is the case for CC and
/* Plugin for offload execution on Intel MIC devices.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
Contributed by Ilya Verbin <ilya.verbin@intel.com>.
#include "main_target_image.h"
#include "gomp-constants.h"
-#define LD_LIBRARY_PATH_ENV "LD_LIBRARY_PATH"
-#define MIC_LD_LIBRARY_PATH_ENV "MIC_LD_LIBRARY_PATH"
#define OFFLOAD_ACTIVE_WAIT_ENV "OFFLOAD_ACTIVE_WAIT"
#ifdef DEBUG
static void
init (void)
{
- const char *ld_lib_path = getenv (LD_LIBRARY_PATH_ENV);
- const char *mic_lib_path = getenv (MIC_LD_LIBRARY_PATH_ENV);
const char *active_wait = getenv (OFFLOAD_ACTIVE_WAIT_ENV);
/* Disable active wait by default to avoid useless CPU usage. */
if (!active_wait)
setenv (OFFLOAD_ACTIVE_WAIT_ENV, "0", 0);
- if (!ld_lib_path)
- goto out;
-
- /* Add path specified in LD_LIBRARY_PATH to MIC_LD_LIBRARY_PATH, which is
- required by liboffloadmic. */
- if (!mic_lib_path)
- setenv (MIC_LD_LIBRARY_PATH_ENV, ld_lib_path, 1);
- else
- {
- size_t len = strlen (mic_lib_path) + strlen (ld_lib_path) + 2;
- bool use_alloca = len <= 2048;
- char *mic_lib_path_new = (char *) (use_alloca ? alloca (len)
- : malloc (len));
- if (!mic_lib_path_new)
- {
- fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
- exit (1);
- }
-
- sprintf (mic_lib_path_new, "%s:%s", mic_lib_path, ld_lib_path);
- setenv (MIC_LD_LIBRARY_PATH_ENV, mic_lib_path_new, 1);
-
- if (!use_alloca)
- free (mic_lib_path_new);
- }
-
-out:
address_table = new ImgDevAddrMap;
image_descriptors = new ImgDescMap;
num_devices = _Offload_number_of_devices ();
/* Plugin for offload execution on Intel MIC devices.
- Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Copyright (C) 2014-2016 Free Software Foundation, Inc.
Contributed by Ilya Verbin <ilya.verbin@intel.com>.
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
generate_mem_ranges(spaces, adp, deref, &print_range, print_values);
}
}
+
+void noncont_struct_dump(
+ const char *spaces,
+ const char *name,
+ struct NonContigDesc *desc_p)
+{
+ OFFLOAD_TRACE(2, "%s%s NonCont Struct expression %p\n",
+ spaces, name, desc_p->base);
+ if (desc_p) {
+ OFFLOAD_TRACE(2, "%s%s base=%p\n", spaces, name, desc_p->base);
+ for (int i = 0; i < desc_p->interval_cnt; i++) {
+ OFFLOAD_TRACE(2,"%s dimension %d: lower=%lld, size=%lld\n",
+ spaces, i, desc_p->interval[i].lower, desc_p->interval[i].size);
+ }
+ }
+}
+
+int64_t get_noncont_struct_size(struct NonContigDesc *desc_p)
+{
+ int index = desc_p->interval_cnt - 1;
+ return(desc_p->interval[index].lower + desc_p->interval[index].size);
+}
+
#endif // OFFLOAD_DEBUG
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
CeanReadDim Dim[1];
};
+struct IntervalDesc {
+ int64_t lower; // Lower index
+ int64_t size; // Size of each element at this interval
+};
+
+struct NonContigDesc {
+ int64_t base; // Base address
+ int64_t interval_cnt; // Number of intervals
+ struct IntervalDesc interval[1];
+};
+
// array descriptor length
#define __arr_desc_length(rank) \
(sizeof(int64_t) + sizeof(Dim_Desc) * (rank))
const Arr_Desc *adp,
bool dereference,
bool print_values);
+
+DLL_LOCAL void noncont_struct_dump(
+ const char *spaces,
+ const char *name,
+ struct NonContigDesc *desc_p);
+
+DLL_LOCAL int64_t get_noncont_struct_size(struct NonContigDesc *desc_p);
+
#define ARRAY_DESC_DUMP(spaces, name, adp, dereference, print_values) \
if (console_enabled >= 2) \
__arr_desc_dump(spaces, name, adp, dereference, print_values);
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
EngineGetInfo =
(COIRESULT (*)(COIENGINE, uint32_t, COI_ENGINE_INFO*))
DL_sym(lib_handle, "COIEngineGetInfo", COI_VERSION1);
- if (COIEngineGetInfo == 0) {
+ if (EngineGetInfo == 0) {
OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
"COIEngineGetInfo");
fini();
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#define COI_CLIENT_H_INCLUDED
#include <common/COIPerf_common.h>
+#include <common/COIMacros_common.h>
#include <source/COIEngine_source.h>
#include <source/COIProcess_source.h>
#include <source/COIPipeline_source.h>
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
return ofld;
}
+// This routine is called for OpenMP4.5 offload calls
+// OpenMP 4.5 offload is always optional.
extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
const int* device_num,
const char* file,
// make sure libray is initialized and at least one device is available
if (!__offload_init_library()) {
- LIBOFFLOAD_ERROR(c_device_is_not_available);
- exit(1);
+ OFFLOAD_DEBUG_TRACE(2, "No device available, fall back to host\n");
+ return NULL;
}
// OFFLOAD_TIMER_INIT must follow call to __offload_init_library
OffloadFlags offload_flags
)
{
+ if (signal) {
+ ofld->set_signal(*signal);
+ }
+
bool ret = ofld->offload(name, is_empty, vars, vars2, num_vars,
waits, num_waits, signal, entry_id,
stack_addr, offload_flags);
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
const int ullong_max_len = 20;
/* Features of liboffloadmic. */
- assert (in_Type == COI_BUFFER_NORMAL);
+ assert (in_Type == COI_BUFFER_NORMAL || in_Type == COI_BUFFER_OPENCL);
assert ((in_Flags & COI_SINK_MEMORY) == 0);
assert ((in_Flags & COI_SAME_ADDRESS_SINKS) == 0);
assert ((in_Flags & COI_SAME_ADDRESS_SINKS_AND_SOURCE) == 0);
assert (out_pEngineInfo != NULL);
- out_pEngineInfo->ISA = COI_ISA_x86_64;
+ out_pEngineInfo->ISA = COI_DEVICE_KNL;
out_pEngineInfo->NumCores = 1;
out_pEngineInfo->NumThreads = 8;
out_pEngineInfo->CoreMaxFrequency = SYMBOL_VERSION(COIPerfGetCycleFrequency,1)() / 1000000;
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
/*
- * Copyright 2010-2015 Intel Corporation.
+ * Copyright 2010-2016 Intel Corporation.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
+++ /dev/null
-/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* We don't need to implement any MYO client functions. */
-
+++ /dev/null
-/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include "myo_service.h"
-
-#include "myo_version_asm.h"
-
-
-extern "C"
-{
-
-MYOACCESSAPI MyoError
-SYMBOL_VERSION (myoAcquire, 1) ()
-{
- MYOTRACE ("myoAcquire");
-
- assert (false);
-
- return MYO_ERROR;
-}
-
-
-MYOACCESSAPI MyoError
-SYMBOL_VERSION (myoRelease, 1) ()
-{
- MYOTRACE ("myoRelease");
-
- assert (false);
-
- return MYO_ERROR;
-}
-
-
-MYOACCESSAPI void
-SYMBOL_VERSION (myoSharedAlignedFree, 1) (void *ptr)
-{
- MYOTRACE ("myoSharedAlignedFree");
-
- assert (false);
-}
-
-
-MYOACCESSAPI void*
-SYMBOL_VERSION (myoSharedAlignedMalloc, 1) (size_t size,
- size_t alignment)
-{
- MYOTRACE ("myoSharedAlignedMalloc");
-
- assert (false);
-
- return 0;
-}
-
-
-MYOACCESSAPI void
-SYMBOL_VERSION (myoSharedFree, 1) (void *ptr)
-{
- MYOTRACE ("myoSharedFree");
-
- assert (false);
-}
-
-
-MYOACCESSAPI void*
-SYMBOL_VERSION (myoSharedMalloc, 1) (size_t size)
-{
- MYOTRACE ("myoSharedMalloc");
-
- assert (false);
-
- return 0;
-}
-
-
-MYOACCESSAPI MyoError
-SYMBOL_VERSION (myoiLibInit, 1) (void *args,
- void *init_func)
-{
- MYOTRACE ("myoiLibInit");
-
- assert (false);
-
- return MYO_ERROR;
-}
-
-
-MYOACCESSAPI void
-SYMBOL_VERSION (myoiLibFini, 1) ()
-{
- MYOTRACE ("myoiLibFini");
-
- assert (false);
-}
-
-
-MyoError
-SYMBOL_VERSION (myoiMicVarTableRegister, 1) (void *table,
- int num)
-{
- MYOTRACE ("myoiMicVarTableRegister");
-
- assert (false);
-
- return MYO_ERROR;
-}
-
-
-MYOACCESSAPI MyoError
-SYMBOL_VERSION (myoiRemoteFuncRegister, 1) (MyoiRemoteFuncType type,
- const char *name)
-{
- MYOTRACE ("myoiRemoteFuncRegister");
-
- /* Looks like we have nothing to do here. */
-
- return MYO_SUCCESS;
-}
-
-
-MyoError
-SYMBOL_VERSION (myoiTargetFptrTableRegister, 1) (void *table,
- int num,
- int ordered)
-{
- MYOTRACE ("myoiTargetFptrTableRegister");
-
- assert (false);
-
- return MYO_ERROR;
-}
-
-
-MYOACCESSAPI MyoError
-SYMBOL_VERSION (myoArenaRelease, 1) (MyoArena in_Arena)
-{
- MYOTRACE ("myoArenaRelease");
-
- assert (false);
-
- return MYO_ERROR;
-}
-
-
-MYOACCESSAPI MyoError
-SYMBOL_VERSION (myoArenaAcquire, 1) (MyoArena in_Arena)
-{
- MYOTRACE ("myoArenaAcquire");
-
- assert (false);
-
- return MYO_ERROR;
-}
-
-
-MYOACCESSAPI void
-SYMBOL_VERSION (myoArenaAlignedFree, 1) (MyoArena in_Arena, void *in_pPtr)
-{
- MYOTRACE ("myoArenaAlignedFree");
-
- assert (false);
-}
-
-
-MYOACCESSAPI void *
-SYMBOL_VERSION (myoArenaAlignedMalloc, 1) (MyoArena in_Arena, size_t in_Size,
- size_t in_Alignment)
-{
- MYOTRACE ("myoArenaAlignedMalloc");
-
- assert (false);
-
- return 0;
-}
-
-
-} // extern "C"
-
+++ /dev/null
-/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef MYO_SERVICE_H_INCLUDED
-#define MYO_SERVICE_H_INCLUDED
-
-#include <myo.h>
-#include <myoimpl.h>
-#include <myotypes.h>
-
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#define SYMBOL_VERSION(SYMBOL,VERSION) SYMBOL ## VERSION
-
-#define MYOERROR(...) \
-{ \
- fprintf (stderr, "MYO ERROR - TARGET: "); \
- fprintf (stderr, __VA_ARGS__); \
- fprintf (stderr, "\n"); \
- perror (NULL); \
- return MYO_ERROR; \
-}
-
-#ifdef DEBUG
- #define MYOTRACE(...) \
- { \
- fprintf (stderr, "MYO TRACE - TARGET: "); \
- fprintf (stderr, __VA_ARGS__); \
- fprintf (stderr, "\n"); \
- }
-#else
- #define MYOTRACE(...) {}
-#endif
-
-#endif // MYO_SERVICE_H_INCLUDED
+++ /dev/null
-/*
- * Copyright 2010-2015 Intel Corporation.
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, version 2.1.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301 USA.
- *
- * Disclaimer: The codes contained in these modules may be specific
- * to the Intel Software Development Platform codenamed Knights Ferry,
- * and the Intel product codenamed Knights Corner, and are not backward
- * compatible with other Intel products. Additionally, Intel will NOT
- * support the codes or instruction set in future products.
- *
- * Intel offers no warranty of any kind regarding the code. This code is
- * licensed on an "AS IS" basis and Intel is not obligated to provide
- * any support, assistance, installation, training, or other services
- * of any kind. Intel is also not obligated to provide any updates,
- * enhancements or extensions. Intel specifically disclaims any warranty
- * of merchantability, non-infringement, fitness for any particular
- * purpose, and any other warranty.
- *
- * Further, Intel disclaims all liability of any kind, including but
- * not limited to liability for infringement of any proprietary rights,
- * relating to the use of the code, even if Intel is notified of the
- * possibility of such liability. Except as expressly stated in an Intel
- * license agreement provided with this code and agreed upon with Intel,
- * no license, express or implied, by estoppel or otherwise, to any
- * intellectual property rights is granted herein.
- */
-
-/*Version for Symbols( only Functions currently versioned)
-Only that Linux Host Side code is versioned currently*/
-#if (! defined MYO_MIC_CARD) && (! defined _WIN32)
-
- __asm__(".symver myoArenaAlignedMalloc1,myoArenaAlignedMalloc@@MYO_1.0");
- __asm__(".symver myoArenaAlignedFree1,myoArenaAlignedFree@@MYO_1.0");
- __asm__(".symver myoArenaAcquire1,myoArenaAcquire@@MYO_1.0");
- __asm__(".symver myoArenaRelease1,myoArenaRelease@@MYO_1.0");
- __asm__(".symver myoAcquire1,myoAcquire@@MYO_1.0");
- __asm__(".symver myoRelease1,myoRelease@@MYO_1.0");
- __asm__(".symver myoSharedAlignedFree1,myoSharedAlignedFree@@MYO_1.0");
- __asm__(".symver myoSharedAlignedMalloc1,myoSharedAlignedMalloc@@MYO_1.0");
- __asm__(".symver myoSharedFree1,myoSharedFree@@MYO_1.0");
- __asm__(".symver myoSharedMalloc1,myoSharedMalloc@@MYO_1.0");
- __asm__(".symver myoiLibInit1,myoiLibInit@@MYO_1.0");
- __asm__(".symver myoiLibFini1,myoiLibFini@@MYO_1.0");
- __asm__(".symver myoiMicVarTableRegister1,myoiMicVarTableRegister@@MYO_1.0");
- __asm__(".symver myoiRemoteFuncRegister1,myoiRemoteFuncRegister@@MYO_1.0");
- __asm__(".symver myoiTargetFptrTableRegister1,myoiTargetFptrTableRegister@@MYO_1.0");
-
-#endif
+++ /dev/null
-/*
- * Copyright 2010-2015 Intel Corporation.
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, version 2.1.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301 USA.
- *
- * Disclaimer: The codes contained in these modules may be specific
- * to the Intel Software Development Platform codenamed Knights Ferry,
- * and the Intel product codenamed Knights Corner, and are not backward
- * compatible with other Intel products. Additionally, Intel will NOT
- * support the codes or instruction set in future products.
- *
- * Intel offers no warranty of any kind regarding the code. This code is
- * licensed on an "AS IS" basis and Intel is not obligated to provide
- * any support, assistance, installation, training, or other services
- * of any kind. Intel is also not obligated to provide any updates,
- * enhancements or extensions. Intel specifically disclaims any warranty
- * of merchantability, non-infringement, fitness for any particular
- * purpose, and any other warranty.
- *
- * Further, Intel disclaims all liability of any kind, including but
- * not limited to liability for infringement of any proprietary rights,
- * relating to the use of the code, even if Intel is notified of the
- * possibility of such liability. Except as expressly stated in an Intel
- * license agreement provided with this code and agreed upon with Intel,
- * no license, express or implied, by estoppel or otherwise, to any
- * intellectual property rights is granted herein.
- */
-
-/***
-* See http://sourceware.org/binutils/docs/ld/VERSION.html#VERSION for more info.
-***/
-
-MYO_1.0
-{
- global:
- myoArenaAlignedMalloc;
- myoArenaAlignedFree;
- myoArenaAcquire;
- myoArenaRelease;
- myoAcquire;
- myoRelease;
- myoSharedAlignedFree;
- myoSharedAlignedMalloc;
- myoSharedFree;
- myoSharedMalloc;
-
- myoiLibInit;
- myoiLibFini;
- myoiMicVarTableRegister;
- myoiRemoteFuncRegister;
- myoiTargetFptrTableRegister;
-
- local:
- *;
-};
-
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
case c_malloc:
write_message(stderr, msg_c_malloc, args);
break;
+ case c_unknown_mic_device_type:
+ write_message(stderr, msg_c_unknown_mic_device_type, args);
+ break;
case c_offload_malloc:
write_message(stderr, msg_c_offload_malloc, args);
break;
write_message(stderr, msg_c_offload1, args);
break;
case c_unknown_var_type:
- write_message(stderr, c_unknown_var_type, args);
+ write_message(stderr, msg_c_unknown_var_type, args);
break;
case c_invalid_env_var_value:
write_message(stderr, msg_c_invalid_env_var_value, args);
case c_mic_init6:
write_message(stderr, msg_c_mic_init6, args);
break;
+ case c_mic_init7:
+ write_message(stderr, msg_c_mic_init7, args);
+ break;
+ case c_mic_init8:
+ write_message(stderr, msg_c_mic_init8, args);
+ break;
+ case c_mic_init9:
+ write_message(stderr, msg_c_mic_init9, args);
+ break;
+ case c_mic_init10:
+ write_message(stderr, msg_c_mic_init10, args);
+ break;
+ case c_mic_init11:
+ write_message(stderr, msg_c_mic_init11, args);
+ break;
case c_no_static_var_data:
write_message(stderr, msg_c_no_static_var_data, args);
break;
case c_cannot_set_affinity:
write_message(stderr, msg_c_cannot_set_affinity, args);
break;
+ case c_mixed_versions:
+ write_message(stderr, msg_c_mixed_versions, args);
+ break;
case c_in_with_preallocated:
write_message(stderr, msg_c_in_with_preallocated, args);
break;
case c_report_no_host_exe:
write_message(stderr, msg_c_report_no_host_exe, args);
break;
+ case c_report_no_target_exe:
+ write_message(stderr, msg_c_report_no_target_exe, args);
+ break;
case c_report_path_buff_overflow:
write_message(stderr, msg_c_report_path_buff_overflow, args);
break;
case c_create_pipeline_for_stream:
write_message(stderr, msg_c_create_pipeline_for_stream, args);
break;
+ case c_offload_streams_are_absent:
+ write_message(stderr, msg_c_offload_streams_are_absent, args);
+ break;
case c_offload_no_stream:
write_message(stderr, msg_c_offload_no_stream, args);
break;
+ case c_offload_device_doesnt_match_to_stream:
+ write_message(stderr,
+ msg_c_offload_device_doesnt_match_to_stream, args);
+ break;
case c_get_engine_info:
write_message(stderr, msg_c_get_engine_info, args);
break;
case c_unload_library:
write_message(stderr, msg_c_unload_library, args);
break;
+ case c_target_myo_library:
+ write_message(stderr, msg_c_target_myo_library, args);
+ break;
+ case c_myo_dl_sym:
+ write_message(stderr, msg_c_myo_dl_sym, args);
+ break;
+ case c_bad_myo_free:
+ write_message(stderr, msg_c_bad_myo_free, args);
+ break;
}
va_end(args);
}
LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
abort();
}
+ return 0;
}
char const * report_get_host_stage_str(int i)
LIBOFFLOAD_ERROR(c_report_unknown_timer_node);
abort();
}
+ return 0;
}
char const * report_get_target_stage_str(int i)
LIBOFFLOAD_ERROR(c_report_unknown_timer_node);
abort();
}
+ return 0;
}
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
c_send_func_ptr,
c_receive_func_ptr,
c_malloc,
+ c_unknown_mic_device_type,
c_offload_malloc,
c_invalid_env_var_value,
c_invalid_env_var_int_value,
c_mic_init4,
c_mic_init5,
c_mic_init6,
+ c_mic_init7,
+ c_mic_init8,
+ c_mic_init9,
+ c_mic_init10,
+ c_mic_init11,
c_no_static_var_data,
c_no_ptr_data,
c_get_engine_handle,
c_no_target_exe,
c_incorrect_affinity,
c_cannot_set_affinity,
+ c_mixed_versions,
c_report_host,
c_report_target,
c_report_title,
c_coipipe_max_number,
c_in_with_preallocated,
c_report_no_host_exe,
+ c_report_no_target_exe,
c_report_path_buff_overflow,
c_create_pipeline_for_stream,
c_offload_no_stream,
+ c_offload_device_doesnt_match_to_stream,
+ c_offload_streams_are_absent,
c_get_engine_info,
c_clear_cpu_mask,
c_set_cpu_mask,
c_report_state_stream,
c_report_stream,
- c_unload_library
+ c_unload_library,
+ c_target_myo_library,
+ c_myo_dl_sym,
+ c_bad_myo_free
} error_types;
enum OffloadHostPhase {
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
msg_c_send_func_ptr,
msg_c_receive_func_ptr,
msg_c_malloc,
+ msg_c_unknown_mic_device_type,
msg_c_offload_malloc,
msg_c_offload1,
msg_c_unknown_var_type,
msg_c_mic_init4,
msg_c_mic_init5,
msg_c_mic_init6,
+ msg_c_mic_init7,
+ msg_c_mic_init8,
+ msg_c_mic_init9,
+ msg_c_mic_init10,
+ msg_c_mic_init11,
msg_c_no_static_var_data,
msg_c_no_ptr_data,
msg_c_get_engine_handle,
msg_c_report_unknown_trace_node,
msg_c_incorrect_affinity,
msg_c_cannot_set_affinity,
+ msg_c_mixed_versions,
msg_c_report_host,
msg_c_report_mic,
msg_c_report_title,
msg_c_pointer_array_mismatch,
msg_c_in_with_preallocated,
msg_c_report_no_host_exe,
+ msg_c_report_no_target_exe,
msg_c_report_path_buff_overflow,
msg_c_create_pipeline_for_stream,
msg_c_offload_no_stream,
+ msg_c_offload_device_doesnt_match_to_stream,
+ msg_c_offload_streams_are_absent,
msg_c_get_engine_info,
msg_c_clear_cpu_mask,
msg_c_set_cpu_mask,
msg_c_report_state_stream,
msg_c_report_stream,
msg_c_unload_library,
- lastMsg = 174,
+ msg_c_target_myo_library,
+ msg_c_myo_dl_sym,
+ msg_c_bad_myo_free,
+ lastMsg = 187,
firstMsg = 1
};
#endif
static char const * MESSAGE_TABLE_NAME[] = {
- /* 0 __dummy__ */ "Un-used message",
- /* 1 msg_c_device_is_not_available */ "offload error: cannot offload to MIC - device is not available",
- /* 2 msg_c_invalid_device_number */ "offload error: expected a number greater than or equal to -1",
- /* 3 msg_c_send_func_ptr */ "offload error: cannot find function name for address %p",
- /* 4 msg_c_receive_func_ptr */ "offload error: cannot find address of function %s",
- /* 5 msg_c_malloc */ "offload error: memory allocation failed",
- /* 6 msg_c_offload_malloc */ "offload error: memory allocation failed (requested=%lld bytes, align %lld)",
- /* 7 msg_c_offload1 */ "offload error: device %d does not have a pending signal for wait(%p)",
- /* 8 msg_c_unknown_var_type */ "offload error: unknown variable type %d",
- /* 9 msg_c_invalid_env_var_value */ "offload warning: ignoring invalid value specified for %s",
- /* 10 msg_c_invalid_env_var_int_value */ "offload warning: specify an integer value for %s",
- /* 11 msg_c_invalid_env_report_value */ "offload warning: ignoring %s setting; use a value in range 1-3",
- /* 12 msg_c_offload_signaled1 */ "offload error: invalid device number %d specified in _Offload_signaled",
- /* 13 msg_c_offload_signaled2 */ "offload error: invalid signal %p specified for _Offload_signaled",
- /* 14 msg_c_myowrapper_checkresult */ "offload error: %s failed with error %d",
- /* 15 msg_c_myotarget_checkresult */ "offload error: %s failed with error %d",
- /* 16 msg_c_offload_descriptor_offload */ "offload error: cannot find offload entry %s",
- /* 17 msg_c_merge_var_descs1 */ "offload error: unexpected number of variable descriptors",
- /* 18 msg_c_merge_var_descs2 */ "offload error: unexpected variable type",
- /* 19 msg_c_mic_parse_env_var_list1 */ "offload_error: MIC environment variable must begin with an alpabetic character",
- /* 20 msg_c_mic_parse_env_var_list2 */ "offload_error: MIC environment variable value must be specified with '='",
- /* 21 msg_c_mic_process_exit_ret */ "offload error: process on the device %d unexpectedly exited with code %d",
- /* 22 msg_c_mic_process_exit_sig */ "offload error: process on the device %d was terminated by signal %d (%s)",
- /* 23 msg_c_mic_process_exit */ "offload error: process on the device %d was unexpectedly terminated",
- /* 24 msg_c_mic_init3 */ "offload warning: ignoring MIC_STACKSIZE setting; use a value >= 16K and a multiple of 4K",
- /* 25 msg_c_mic_init4 */ "offload error: thread key create failed with error %d",
- /* 26 msg_c_mic_init5 */ "offload warning: specify OFFLOAD_DEVICES as comma-separated physical device numbers or 'none'",
- /* 27 msg_c_mic_init6 */ "offload warning: OFFLOAD_DEVICES device number %d does not correspond to a physical device",
- /* 28 msg_c_no_static_var_data */ "offload error: cannot find data associated with statically allocated variable %p",
- /* 29 msg_c_no_ptr_data */ "offload error: cannot find data associated with pointer variable %p",
- /* 30 msg_c_get_engine_handle */ "offload error: cannot get device %d handle (error code %d)",
- /* 31 msg_c_get_engine_index */ "offload error: cannot get physical index for logical device %d (error code %d)",
- /* 32 msg_c_process_create */ "offload error: cannot start process on the device %d (error code %d)",
- /* 33 msg_c_process_set_cache_size */ "offload error: cannot reserve buffer on the device %d (error code %d)",
- /* 34 msg_c_process_get_func_handles */ "offload error: cannot get function handles on the device %d (error code %d)",
- /* 35 msg_c_process_wait_shutdown */ "offload error: wait for process shutdown failed on device %d (error code %d)",
- /* 36 msg_c_process_proxy_flush */ "offload error: cannot flush process output on device %d (error code %d)",
- /* 37 msg_c_load_library */ "offload error: cannot load library to the device %d (error code %d)",
- /* 38 msg_c_pipeline_create */ "offload error: cannot create pipeline on the device %d (error code %d)",
- /* 39 msg_c_pipeline_run_func */ "offload error: cannot execute function on the device %d (error code %d)",
- /* 40 msg_c_pipeline_start_run_funcs */ "offload error: cannot start executing pipeline function on the device %d (error code %d)",
- /* 41 msg_c_buf_create */ "offload error: cannot create buffer on device %d (error code %d)",
- /* 42 msg_c_buf_create_out_of_mem */ "offload error: cannot create buffer on device %d, out of memory",
- /* 43 msg_c_buf_create_from_mem */ "offload error: cannot create buffer from memory on device %d (error code %d)",
- /* 44 msg_c_buf_destroy */ "offload error: buffer destroy failed (error code %d)",
- /* 45 msg_c_buf_map */ "offload error: buffer map failed (error code %d)",
- /* 46 msg_c_buf_unmap */ "offload error: buffer unmap failed (error code %d)",
- /* 47 msg_c_buf_read */ "offload error: buffer read failed (error code %d)",
- /* 48 msg_c_buf_write */ "offload error: buffer write failed (error code %d)",
- /* 49 msg_c_buf_copy */ "offload error: buffer copy failed (error code %d)",
- /* 50 msg_c_buf_get_address */ "offload error: cannot get buffer address on device %d (error code %d)",
- /* 51 msg_c_buf_add_ref */ "offload error: cannot reuse buffer memory on device %d (error code %d)",
- /* 52 msg_c_buf_release_ref */ "offload error: cannot release buffer memory on device %d (error code %d)",
- /* 53 msg_c_buf_set_state */ "offload error: buffer set state failed (error code %d)",
- /* 54 msg_c_event_wait */ "offload error: wait for event to become signaled failed (error code %d)",
- /* 55 msg_c_zero_or_neg_ptr_len */ "offload error: memory allocation of zero or negative length is not supported",
- /* 56 msg_c_zero_or_neg_transfer_size */ "offload error: data transfer of zero or negative size is not supported",
- /* 57 msg_c_bad_ptr_mem_alloc */ "offload error: allocation (base=%p, size=%d) overlaps with existing allocation (base=%p, size=%d)",
- /* 58 msg_c_bad_ptr_mem_range */ "offload error: data transfer (base=%p, size=%d) not subset of existing allocation (base=%p, size=%d)",
- /* 59 msg_c_different_src_and_dstn_sizes */ "offload error: size of the source %d differs from size of the destination %d",
- /* 60 msg_c_non_contiguous_dope_vector */ "offload error: offload data transfer supports only a single contiguous memory range per variable",
- /* 61 msg_c_omp_invalid_device_num_env */ "offload warning: ignoring %s setting; use a non-negative integer value",
- /* 62 msg_c_omp_invalid_device_num */ "offload error: device number should be a non-negative integer value",
- /* 63 msg_c_unknown_binary_type */ "offload error: unexpected embedded target binary type, expected either an executable or shared library",
- /* 64 msg_c_multiple_target_exes */ "offload error: more that one target executable found",
- /* 65 msg_c_no_target_exe */ "offload error: target executable is not available",
- /* 66 msg_c_report_unknown_timer_node */ "offload error: unknown timer node",
- /* 67 msg_c_report_unknown_trace_node */ "offload error: unknown trace node",
- /* 68 msg_c_incorrect_affinity */ "offload error: unknow affinity type %s, specify compact, scatter or balanced",
- /* 69 msg_c_cannot_set_affinity */ "offload_error: unable to set affinity",
- /* 70 msg_c_report_host */ "HOST",
- /* 71 msg_c_report_mic */ "MIC",
- /* 72 msg_c_report_title */ "timer data (sec)",
- /* 73 msg_c_report_seconds */ "(seconds)",
- /* 74 msg_c_report_bytes */ "(bytes)",
- /* 75 msg_c_report_cpu_time */ "CPU Time",
- /* 76 msg_c_report_mic_time */ "MIC Time",
- /* 77 msg_c_report_tag */ "Tag",
- /* 78 msg_c_report_from_file */ "Offload from file",
- /* 79 msg_c_report_file */ "File",
- /* 80 msg_c_report_line */ "Line",
- /* 81 msg_c_report_cpu_to_mic_data */ "CPU->MIC Data",
- /* 82 msg_c_report_mic_to_cpu_data */ "MIC->CPU Data",
- /* 83 msg_c_report_offload */ "Offload",
- /* 84 msg_c_report_w_tag */ "Tag %d",
- /* 85 msg_c_report_state */ "State",
- /* 86 msg_c_report_start */ "Start target",
- /* 87 msg_c_report_init */ "Initialize",
- /* 88 msg_c_report_logical_card */ "logical card",
- /* 89 msg_c_report_physical_card */ "physical card",
- /* 90 msg_c_report_register */ "Register static data tables",
- /* 91 msg_c_report_init_func */ "Setup target entry",
- /* 92 msg_c_report_create_buf_host */ "Create host buffer",
- /* 93 msg_c_report_create_buf_mic */ "Create target buffer",
- /* 94 msg_c_report_send_pointer_data */ "Send pointer data",
- /* 95 msg_c_report_sent_pointer_data */ "Host->target pointer data",
- /* 96 msg_c_report_gather_copyin_data */ "Gather copyin data",
- /* 97 msg_c_report_copyin_data */ "Host->target copyin data",
- /* 98 msg_c_report_state_signal */ "Signal",
- /* 99 msg_c_report_signal */ "signal :",
- /* 100 msg_c_report_wait */ "waits :",
- /* 101 msg_c_report_compute */ "Execute task on target",
- /* 102 msg_c_report_receive_pointer_data */ "Receive pointer data",
- /* 103 msg_c_report_received_pointer_data */ "Target->host pointer data",
- /* 104 msg_c_report_start_target_func */ "Start target entry",
- /* 105 msg_c_report_var */ "Var",
- /* 106 msg_c_report_scatter_copyin_data */ "Scatter copyin data",
- /* 107 msg_c_report_gather_copyout_data */ "Gather copyout data",
- /* 108 msg_c_report_scatter_copyout_data */ "Scatter copyout data",
- /* 109 msg_c_report_copyout_data */ "Target->host copyout data",
- /* 110 msg_c_report_unregister */ "Unregister data tables",
- /* 111 msg_c_report_destroy */ "Destroy",
- /* 112 msg_c_report_myoinit */ "Initialize MYO",
- /* 113 msg_c_report_myoregister */ "Register MYO tables",
- /* 114 msg_c_report_myofini */ "Finalize MYO",
- /* 115 msg_c_report_mic_myo_shared */ "MIC MYO shared table register",
- /* 116 msg_c_report_mic_myo_fptr */ "MIC MYO fptr table register",
- /* 117 msg_c_report_myosharedmalloc */ "MYO shared malloc",
- /* 118 msg_c_report_myosharedfree */ "MYO shared free",
- /* 119 msg_c_report_myosharedalignedmalloc */ "MYO shared aligned malloc",
- /* 120 msg_c_report_myosharedalignedfree */ "MYO shared aligned free",
- /* 121 msg_c_report_myoacquire */ "MYO acquire",
- /* 122 msg_c_report_myorelease */ "MYO release",
- /* 123 msg_c_report_myosupportsfeature */ "MYO supports feature",
- /* 124 msg_c_report_myosharedarenacreate */ "MYO shared arena create",
- /* 125 msg_c_report_myosharedalignedarenamalloc */ "MYO shared aligned arena malloc",
- /* 126 msg_c_report_myosharedalignedarenafree */ "MYO shared aligned arena free",
- /* 127 msg_c_report_myoarenaacquire */ "MYO arena acquire",
- /* 128 msg_c_report_myoarenarelease */ "MYO arena release",
- /* 129 msg_c_report_host_total_offload_time */ "host: total offload time",
- /* 130 msg_c_report_host_initialize */ "host: initialize target",
- /* 131 msg_c_report_host_target_acquire */ "host: acquire target",
- /* 132 msg_c_report_host_wait_deps */ "host: wait dependencies",
- /* 133 msg_c_report_host_setup_buffers */ "host: setup buffers",
- /* 134 msg_c_report_host_alloc_buffers */ "host: allocate buffers",
- /* 135 msg_c_report_host_setup_misc_data */ "host: setup misc_data",
- /* 136 msg_c_report_host_alloc_data_buffer */ "host: allocate buffer",
- /* 137 msg_c_report_host_send_pointers */ "host: send pointers",
- /* 138 msg_c_report_host_gather_inputs */ "host: gather inputs",
- /* 139 msg_c_report_host_map_in_data_buffer */ "host: map IN data buffer",
- /* 140 msg_c_report_host_unmap_in_data_buffer */ "host: unmap IN data buffer",
- /* 141 msg_c_report_host_start_compute */ "host: initiate compute",
- /* 142 msg_c_report_host_wait_compute */ "host: wait compute",
- /* 143 msg_c_report_host_start_buffers_reads */ "host: initiate pointer reads",
- /* 144 msg_c_report_host_scatter_outputs */ "host: scatter outputs",
- /* 145 msg_c_report_host_map_out_data_buffer */ "host: map OUT data buffer",
- /* 146 msg_c_report_host_unmap_out_data_buffer */ "host: unmap OUT data buffer",
- /* 147 msg_c_report_host_wait_buffers_reads */ "host: wait pointer reads",
- /* 148 msg_c_report_host_destroy_buffers */ "host: destroy buffers",
- /* 149 msg_c_report_target_total_time */ "target: total time",
- /* 150 msg_c_report_target_descriptor_setup */ "target: setup offload descriptor",
- /* 151 msg_c_report_target_func_lookup */ "target: entry lookup",
- /* 152 msg_c_report_target_func_time */ "target: entry time",
- /* 153 msg_c_report_target_scatter_inputs */ "target: scatter inputs",
- /* 154 msg_c_report_target_add_buffer_refs */ "target: add buffer reference",
- /* 155 msg_c_report_target_compute */ "target: compute",
- /* 156 msg_c_report_target_gather_outputs */ "target: gather outputs",
- /* 157 msg_c_report_target_release_buffer_refs */ "target: remove buffer reference",
- /* 158 msg_c_coi_pipeline_max_number */ "number of host threads doing offload exceeds maximum of %d",
- /* 159 msg_c_ranges_dont_match */ "ranges of source and destination don't match together",
- /* 160 msg_c_destination_is_over */ "insufficient destination memory to transfer source",
- /* 161 msg_c_slice_of_noncont_array */ "a non-contiguous slice may be taken of contiguous arrays only",
- /* 162 msg_c_pointer_array_mismatch */ "number of %s elements is less than described by the source",
- /* 163 msg_c_in_with_preallocated */ "offload error: preallocated targetptr alloc_if(1) may not be used with an in clause",
- /* 164 msg_c_report_no_host_exe */ "offload error: Cannot find host executable",
- /* 165 msg_c_report_path_buff_overflow */ "offload error: Size of host executable path exceeded 4KB",
- /* 166 msg_c_create_pipeline_for_stream */ "offload error: number of cpus exceeds maximum of %d",
- /* 167 msg_c_offload_no_stream */ "offload error: the stream isn't found on device %d",
- /* 168 msg_c_get_engine_info */ "offload error: cannot get device %d info (error code %d)",
- /* 169 msg_c_clear_cpu_mask */ "offload error: cannot clear cpu mask (error code %d)",
- /* 170 msg_c_set_cpu_mask */ "offload error: cannot set cpu mask (error code %d)",
- /* 171 msg_c_report_state_stream */ "Stream",
- /* 172 msg_c_report_stream */ "stream :",
- /* 173 msg_c_unload_library */ "offload error: cannot unload library from the device %d (error code %d)",
+ /* 0 __dummy__ */ "Un-used message",
+ /* 1 msg_c_device_is_not_available */ "offload error: cannot offload to MIC - device is not available",
+ /* 2 msg_c_invalid_device_number */ "offload error: expected a number greater than or equal to -1",
+ /* 3 msg_c_send_func_ptr */ "offload error: cannot find function name for address %p",
+ /* 4 msg_c_receive_func_ptr */ "offload error: cannot find address of function %s",
+ /* 5 msg_c_malloc */ "offload error: memory allocation failed",
+ /* 6 msg_c_unknown_mic_device_type */ "offload error: unknown mic device type ",
+ /* 7 msg_c_offload_malloc */ "offload error: memory allocation failed (requested=%lld bytes, align %lld)",
+ /* 8 msg_c_offload1 */ "offload error: device %d does not have a pending signal for wait(%p)",
+ /* 9 msg_c_unknown_var_type */ "offload error: unknown variable type %d",
+ /* 10 msg_c_invalid_env_var_value */ "offload warning: ignoring invalid value specified for %s",
+ /* 11 msg_c_invalid_env_var_int_value */ "offload warning: specify an integer value for %s",
+ /* 12 msg_c_invalid_env_report_value */ "offload warning: ignoring %s setting; use a value in range 1-3",
+ /* 13 msg_c_offload_signaled1 */ "offload error: invalid device number %d specified in _Offload_signaled",
+ /* 14 msg_c_offload_signaled2 */ "offload error: invalid signal %p specified for _Offload_signaled",
+ /* 15 msg_c_myowrapper_checkresult */ "offload error: %s failed with error %d",
+ /* 16 msg_c_myotarget_checkresult */ "offload error: %s failed with error %d",
+ /* 17 msg_c_offload_descriptor_offload */ "offload error: cannot find offload entry %s",
+ /* 18 msg_c_merge_var_descs1 */ "offload error: unexpected number of variable descriptors",
+ /* 19 msg_c_merge_var_descs2 */ "offload error: unexpected variable type",
+ /* 20 msg_c_mic_parse_env_var_list1 */ "offload_error: MIC environment variable must begin with an alpabetic character",
+ /* 21 msg_c_mic_parse_env_var_list2 */ "offload_error: MIC environment variable value must be specified with '='",
+ /* 22 msg_c_mic_process_exit_ret */ "offload error: process on the device %d unexpectedly exited with code %d",
+ /* 23 msg_c_mic_process_exit_sig */ "offload error: process on the device %d was terminated by signal %d (%s)",
+ /* 24 msg_c_mic_process_exit */ "offload error: process on the device %d was unexpectedly terminated",
+ /* 25 msg_c_mic_init3 */ "offload warning: ignoring MIC_STACKSIZE setting; use a value >= 16K and a multiple of 4K",
+ /* 26 msg_c_mic_init4 */ "offload error: thread key create failed with error %d",
+ /* 27 msg_c_mic_init5 */ "offload error: specify OFFLOAD_DEVICES as comma-separated physical device numbers or 'none'",
+ /* 28 msg_c_mic_init6 */ "offload error: OFFLOAD_DEVICES device number %d does not correspond to a physical device",
+ /* 29 msg_c_mic_init7 */ "offload error: specify a number as a device in OFFLOAD_DEVICES",
+ /* 30 msg_c_mic_init8 */ "offload error: specify a number as the start of a device range in OFFLOAD_DEVICES",
+ /* 31 msg_c_mic_init9 */ "offload error: specify a number as the end of a device range in OFFLOAD_DEVICES",
+ /* 32 msg_c_mic_init10 */ "offload error: specify a low to high device range in OFFLOAD_DEVICES",
+ /* 33 msg_c_mic_init11 */ "offload error: specify comma-separated device or device ranges in OFFLOAD_DEVICES",
+ /* 34 msg_c_no_static_var_data */ "offload error: cannot find data associated with statically allocated variable %p",
+ /* 35 msg_c_no_ptr_data */ "offload error: cannot find data associated with pointer variable %p",
+ /* 36 msg_c_get_engine_handle */ "offload error: cannot get device %d handle (error code %d)",
+ /* 37 msg_c_get_engine_index */ "offload error: cannot get physical index for logical device %d (error code %d)",
+ /* 38 msg_c_process_create */ "offload error: cannot start process on the device %d (error code %d)",
+ /* 39 msg_c_process_set_cache_size */ "offload error: cannot reserve buffer on the device %d (error code %d)",
+ /* 40 msg_c_process_get_func_handles */ "offload error: cannot get function handles on the device %d (error code %d)",
+ /* 41 msg_c_process_wait_shutdown */ "offload error: wait for process shutdown failed on device %d (error code %d)",
+ /* 42 msg_c_process_proxy_flush */ "offload error: cannot flush process output on device %d (error code %d)",
+ /* 43 msg_c_load_library */ "offload error: cannot load library %s to the device %d (error code %d)",
+ /* 44 msg_c_pipeline_create */ "offload error: cannot create pipeline on the device %d (error code %d)",
+ /* 45 msg_c_pipeline_run_func */ "offload error: cannot execute function on the device %d (error code %d)",
+ /* 46 msg_c_pipeline_start_run_funcs */ "offload error: cannot start executing pipeline function on the device %d (error code %d)",
+ /* 47 msg_c_buf_create */ "offload error: cannot create buffer on device %d (error code %d)",
+ /* 48 msg_c_buf_create_out_of_mem */ "offload error: cannot create buffer on device %d, out of memory",
+ /* 49 msg_c_buf_create_from_mem */ "offload error: cannot create buffer from memory on device %d (error code %d)",
+ /* 50 msg_c_buf_destroy */ "offload error: buffer destroy failed (error code %d)",
+ /* 51 msg_c_buf_map */ "offload error: buffer map failed (error code %d)",
+ /* 52 msg_c_buf_unmap */ "offload error: buffer unmap failed (error code %d)",
+ /* 53 msg_c_buf_read */ "offload error: buffer read failed (error code %d)",
+ /* 54 msg_c_buf_write */ "offload error: buffer write failed (error code %d)",
+ /* 55 msg_c_buf_copy */ "offload error: buffer copy failed (error code %d)",
+ /* 56 msg_c_buf_get_address */ "offload error: cannot get buffer address on device %d (error code %d)",
+ /* 57 msg_c_buf_add_ref */ "offload error: cannot reuse buffer memory on device %d (error code %d)",
+ /* 58 msg_c_buf_release_ref */ "offload error: cannot release buffer memory on device %d (error code %d)",
+ /* 59 msg_c_buf_set_state */ "offload error: buffer set state failed (error code %d)",
+ /* 60 msg_c_event_wait */ "offload error: wait for event to become signaled failed (error code %d)",
+ /* 61 msg_c_zero_or_neg_ptr_len */ "offload error: memory allocation of zero or negative length is not supported",
+ /* 62 msg_c_zero_or_neg_transfer_size */ "offload error: data transfer of zero or negative size is not supported",
+ /* 63 msg_c_bad_ptr_mem_alloc */ "offload error: allocation (base=%p, size=%d) overlaps with existing allocation (base=%p, size=%d)",
+ /* 64 msg_c_bad_ptr_mem_range */ "offload error: data transfer (base=%p, size=%d) not subset of existing allocation (base=%p, size=%d)",
+ /* 65 msg_c_different_src_and_dstn_sizes */ "offload error: size of the source %d differs from size of the destination %d",
+ /* 66 msg_c_non_contiguous_dope_vector */ "offload error: offload data transfer supports only a single contiguous memory range per variable",
+ /* 67 msg_c_omp_invalid_device_num_env */ "offload warning: ignoring %s setting; use a non-negative integer value",
+ /* 68 msg_c_omp_invalid_device_num */ "offload error: device number should be a non-negative integer value",
+ /* 69 msg_c_unknown_binary_type */ "offload error: unexpected embedded target binary type, expected either an executable or shared library",
+ /* 70 msg_c_multiple_target_exes */ "offload error: more that one target executable found",
+ /* 71 msg_c_no_target_exe */ "offload error: target executable is not available",
+ /* 72 msg_c_report_unknown_timer_node */ "offload error: unknown timer node",
+ /* 73 msg_c_report_unknown_trace_node */ "offload error: unknown trace node",
+ /* 74 msg_c_incorrect_affinity */ "offload error: unknow affinity type %s, specify compact, scatter or balanced",
+ /* 75 msg_c_cannot_set_affinity */ "offload_error: unable to set affinity",
+ /* 76 msg_c_mixed_versions */ "offload_error: all application executables that contain offload must be built with the same compiler version",
+ /* 77 msg_c_report_host */ "HOST",
+ /* 78 msg_c_report_mic */ "MIC",
+ /* 79 msg_c_report_title */ "timer data (sec)",
+ /* 80 msg_c_report_seconds */ "(seconds)",
+ /* 81 msg_c_report_bytes */ "(bytes)",
+ /* 82 msg_c_report_cpu_time */ "CPU Time",
+ /* 83 msg_c_report_mic_time */ "MIC Time",
+ /* 84 msg_c_report_tag */ "Tag",
+ /* 85 msg_c_report_from_file */ "Offload from file",
+ /* 86 msg_c_report_file */ "File",
+ /* 87 msg_c_report_line */ "Line",
+ /* 88 msg_c_report_cpu_to_mic_data */ "CPU->MIC Data",
+ /* 89 msg_c_report_mic_to_cpu_data */ "MIC->CPU Data",
+ /* 90 msg_c_report_offload */ "Offload",
+ /* 91 msg_c_report_w_tag */ "Tag %d",
+ /* 92 msg_c_report_state */ "State",
+ /* 93 msg_c_report_start */ "Start target",
+ /* 94 msg_c_report_init */ "Initialize",
+ /* 95 msg_c_report_logical_card */ "logical card",
+ /* 96 msg_c_report_physical_card */ "physical card",
+ /* 97 msg_c_report_register */ "Register static data tables",
+ /* 98 msg_c_report_init_func */ "Setup target entry",
+ /* 99 msg_c_report_create_buf_host */ "Create host buffer",
+ /* 100 msg_c_report_create_buf_mic */ "Create target buffer",
+ /* 101 msg_c_report_send_pointer_data */ "Send pointer data",
+ /* 102 msg_c_report_sent_pointer_data */ "Host->target pointer data",
+ /* 103 msg_c_report_gather_copyin_data */ "Gather copyin data",
+ /* 104 msg_c_report_copyin_data */ "Host->target copyin data",
+ /* 105 msg_c_report_state_signal */ "Signal",
+ /* 106 msg_c_report_signal */ "signal :",
+ /* 107 msg_c_report_wait */ "waits :",
+ /* 108 msg_c_report_compute */ "Execute task on target",
+ /* 109 msg_c_report_receive_pointer_data */ "Receive pointer data",
+ /* 110 msg_c_report_received_pointer_data */ "Target->host pointer data",
+ /* 111 msg_c_report_start_target_func */ "Start target entry",
+ /* 112 msg_c_report_var */ "Var",
+ /* 113 msg_c_report_scatter_copyin_data */ "Scatter copyin data",
+ /* 114 msg_c_report_gather_copyout_data */ "Gather copyout data",
+ /* 115 msg_c_report_scatter_copyout_data */ "Scatter copyout data",
+ /* 116 msg_c_report_copyout_data */ "Target->host copyout data",
+ /* 117 msg_c_report_unregister */ "Unregister data tables",
+ /* 118 msg_c_report_destroy */ "Destroy",
+ /* 119 msg_c_report_myoinit */ "Initialize MYO",
+ /* 120 msg_c_report_myoregister */ "Register MYO tables",
+ /* 121 msg_c_report_myofini */ "Finalize MYO",
+ /* 122 msg_c_report_mic_myo_shared */ "MIC MYO shared table register",
+ /* 123 msg_c_report_mic_myo_fptr */ "MIC MYO fptr table register",
+ /* 124 msg_c_report_myosharedmalloc */ "MYO shared malloc",
+ /* 125 msg_c_report_myosharedfree */ "MYO shared free",
+ /* 126 msg_c_report_myosharedalignedmalloc */ "MYO shared aligned malloc",
+ /* 127 msg_c_report_myosharedalignedfree */ "MYO shared aligned free",
+ /* 128 msg_c_report_myoacquire */ "MYO acquire",
+ /* 129 msg_c_report_myorelease */ "MYO release",
+ /* 130 msg_c_report_myosupportsfeature */ "MYO supports feature",
+ /* 131 msg_c_report_myosharedarenacreate */ "MYO shared arena create",
+ /* 132 msg_c_report_myosharedalignedarenamalloc */ "MYO shared aligned arena malloc",
+ /* 133 msg_c_report_myosharedalignedarenafree */ "MYO shared aligned arena free",
+ /* 134 msg_c_report_myoarenaacquire */ "MYO arena acquire",
+ /* 135 msg_c_report_myoarenarelease */ "MYO arena release",
+ /* 136 msg_c_report_host_total_offload_time */ "host: total offload time",
+ /* 137 msg_c_report_host_initialize */ "host: initialize target",
+ /* 138 msg_c_report_host_target_acquire */ "host: acquire target",
+ /* 139 msg_c_report_host_wait_deps */ "host: wait dependencies",
+ /* 140 msg_c_report_host_setup_buffers */ "host: setup buffers",
+ /* 141 msg_c_report_host_alloc_buffers */ "host: allocate buffers",
+ /* 142 msg_c_report_host_setup_misc_data */ "host: setup misc_data",
+ /* 143 msg_c_report_host_alloc_data_buffer */ "host: allocate buffer",
+ /* 144 msg_c_report_host_send_pointers */ "host: send pointers",
+ /* 145 msg_c_report_host_gather_inputs */ "host: gather inputs",
+ /* 146 msg_c_report_host_map_in_data_buffer */ "host: map IN data buffer",
+ /* 147 msg_c_report_host_unmap_in_data_buffer */ "host: unmap IN data buffer",
+ /* 148 msg_c_report_host_start_compute */ "host: initiate compute",
+ /* 149 msg_c_report_host_wait_compute */ "host: wait compute",
+ /* 150 msg_c_report_host_start_buffers_reads */ "host: initiate pointer reads",
+ /* 151 msg_c_report_host_scatter_outputs */ "host: scatter outputs",
+ /* 152 msg_c_report_host_map_out_data_buffer */ "host: map OUT data buffer",
+ /* 153 msg_c_report_host_unmap_out_data_buffer */ "host: unmap OUT data buffer",
+ /* 154 msg_c_report_host_wait_buffers_reads */ "host: wait pointer reads",
+ /* 155 msg_c_report_host_destroy_buffers */ "host: destroy buffers",
+ /* 156 msg_c_report_target_total_time */ "target: total time",
+ /* 157 msg_c_report_target_descriptor_setup */ "target: setup offload descriptor",
+ /* 158 msg_c_report_target_func_lookup */ "target: entry lookup",
+ /* 159 msg_c_report_target_func_time */ "target: entry time",
+ /* 160 msg_c_report_target_scatter_inputs */ "target: scatter inputs",
+ /* 161 msg_c_report_target_add_buffer_refs */ "target: add buffer reference",
+ /* 162 msg_c_report_target_compute */ "target: compute",
+ /* 163 msg_c_report_target_gather_outputs */ "target: gather outputs",
+ /* 164 msg_c_report_target_release_buffer_refs */ "target: remove buffer reference",
+ /* 165 msg_c_coi_pipeline_max_number */ "number of host threads doing offload exceeds maximum of %d",
+ /* 166 msg_c_ranges_dont_match */ "ranges of source and destination don't match together",
+ /* 167 msg_c_destination_is_over */ "insufficient destination memory to transfer source",
+ /* 168 msg_c_slice_of_noncont_array */ "a non-contiguous slice may be taken of contiguous arrays only",
+ /* 169 msg_c_pointer_array_mismatch */ "number of %s elements is less than described by the source",
+ /* 170 msg_c_in_with_preallocated */ "offload error: preallocated targetptr alloc_if(1) may not be used with an in clause",
+ /* 171 msg_c_report_no_host_exe */ "offload error: Cannot find host executable",
+ /* 172 msg_c_report_no_target_exe */ "offload error: Cannot find target executable (%s)",
+ /* 173 msg_c_report_path_buff_overflow */ "offload error: Size of host executable path exceeded 4KB",
+ /* 174 msg_c_create_pipeline_for_stream */ "offload error: number of cpus exceeds maximum of %d",
+ /* 175 msg_c_offload_no_stream */ "offload error: the stream isn't found on device %d",
+ /* 176 msg_c_offload_device_doesnt_match_to_stream */ "offload_error: the stream was created for device %d",
+ /* 177 msg_c_offload_streams_are_absent */ "offload error: there was created no streams",
+ /* 178 msg_c_get_engine_info */ "offload error: cannot get device %d info (error code %d)",
+ /* 179 msg_c_clear_cpu_mask */ "offload error: cannot clear cpu mask (error code %d)",
+ /* 180 msg_c_set_cpu_mask */ "offload error: cannot set cpu mask (error code %d)",
+ /* 181 msg_c_report_state_stream */ "Stream",
+ /* 182 msg_c_report_stream */ "stream :",
+ /* 183 msg_c_unload_library */ "offload error: cannot unload library from the device %d (error code %d)",
+ /* 184 msg_c_target_myo_library */ "offload error: cannot find MYO library on device %s",
+ /* 185 msg_c_myo_dl_sym */ "offload error: cannot find MYO func on device %s",
+ /* 186 msg_c_bad_myo_free */ "offload error: cannot free MYO shared memory %p",
};
!
-! Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+! Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#ifndef OFFLOAD_H_INCLUDED
#define OFFLOAD_H_INCLUDED
+#ifdef __cplusplus
#if defined(LINUX) || defined(FREEBSD)
#include <bits/functexcept.h>
#endif
+#endif
#include <stddef.h>
#include <omp.h>
#ifdef TARGET_WINNT
-// <stdint.h> is not compatible with Windows
+// <stdint.h> is incompatible on Windows.
typedef unsigned long long int uint64_t;
+typedef signed long long int int64_t;
#else
#include <stdint.h>
#endif // TARGET_WINNT
size_t data_received; /* number of bytes received by host */
} _Offload_status;
-typedef uint64_t _Offload_stream;
+typedef int64_t _Offload_stream;
#define OFFLOAD_STATUS_INIT(x) \
((x).result = OFFLOAD_DISABLED)
_Offload_stream stream // stream handle
);
+extern int _Offload_stream_delete(
+ _Offload_stream handle // stream handle
+);
+
extern int _Offload_stream_completed(
int device, // MIC device number
_Offload_stream handle // stream handle
);
+extern int _Offload_device_streams_completed(
+ int device // MIC device number
+);
+
+extern int _Offload_stream_is_empty(
+ _Offload_stream handle // stream handle
+);
+
/*
* _Offload_shared_malloc/free are only supported when offload is enabled
* else they are defined to malloc and free
extern int omp_get_default_device(void) __GOMP_NOTHROW;
extern int omp_get_num_devices(void) __GOMP_NOTHROW;
+// OpenMP 4.5 APIs
+
+/*! \fn omp_get_initial_device
+ \brief Return the device id of the initial device.
+ \return Returns the device id of the initial device.
+*/
+extern int omp_get_initial_device(
+ void
+) __GOMP_NOTHROW;
+
+/*! \fn omp_target_alloc
+ \brief Allocate memory in the device data environment.
+ \param size Number of bytes to allocate.
+ \param device_num The device number on which to allocate.
+ \return Returns a pointer to the allocated memory.
+*/
+extern void* omp_target_alloc(
+ size_t size,
+ int device_num
+) __GOMP_NOTHROW;
+
+/*! \fn omp_target_free
+ \brief Free memory in the device data environment.
+ \param device_ptr Address of allocated device memory.
+ \param device_num The device number on which to free.
+*/
+extern void omp_target_free(
+ void *device_ptr,
+ int device_num
+) __GOMP_NOTHROW;
+
+/*! \fn omp_target_is_present
+ \brief Test whether a host pointer has corresponding storage on a device.
+ \param device_ptr Address of allocated device memory.
+ \param device_num The device number on which to test..
+ \return true if storage is found, false otherwise.
+*/
+extern int omp_target_is_present(
+ void *ptr,
+ int device_num
+) __GOMP_NOTHROW;
+
+/*! \fn omp_target_memcpy
+ \brief Copy memory between host/device pointers.
+ \param dst Address of destination memory.
+ \param src Address of source memory.
+ \param length Number of bytes to copy.
+ \param dst_offset Destination offset in bytes.
+ \param src_offset Source offset in bytes.
+ \param dst_device Destination device number.
+ \param src_device Source device number.
+ \return 0 on success, 1 otherwise.
+*/
+extern int omp_target_memcpy(
+ void *dst,
+ void *src,
+ size_t length,
+ size_t dst_offset,
+ size_t src_offset,
+ int dst_device,
+ int src_device
+) __GOMP_NOTHROW;
+
+/*! \fn omp_target_memcpy_rect
+ \brief Copy a rectangular subsection from
+ \brief one multi-dimensional array to another.
+ \param dst Address of destination array.
+ \param src Address of source array.
+ \param element_size Number of bytes in each array element.
+ \param num_dims Number of dimensions.
+ \param volume Array of element counts to copy in each dimension.
+ \param dst_offsets Destination offsets array.
+ \param src_offsets Source offsets array.
+ \param dst_dims Destination array dimensions array.
+ \param src_dims Source array dimensions array.
+ \param dst_device Destination device number.
+ \param src_device Source device number.
+ \return 0 on success, 1 otherwise.
+*/
+extern int omp_target_memcpy_rect(
+ void *dst,
+ void *src,
+ size_t element_size,
+ int num_dims,
+ const size_t *volume,
+ const size_t *dst_offsets,
+ const size_t *src_offsets,
+ const size_t *dst_dimensions,
+ const size_t *src_dimensions,
+ int dst_device,
+ int src_device
+) __GOMP_NOTHROW;
+
+/*! \fn omp_target_associate_ptr
+ \brief Map a device pointer to a host pointer.
+ \param host_ptr The host pointer.
+ \param device_ptr The device pointer.
+ \param size Number of bytes to map.
+ \param device_offset Offset on device of mapped memory.
+ \param device_num Device number.
+ \return 0 on success, 1 otherwise.
+*/
+extern int omp_target_associate_ptr(
+ void *host_ptr,
+ void *device_ptr,
+ size_t size,
+ size_t device_offset,
+ int device_num
+) __GOMP_NOTHROW;
+
+/*! \fn omp_target_disassociate_ptr
+ \brief Remove a host pointer to device pointer association.
+ \param ptr The host pointer to disassociate.
+ \param device_num Device number.
+ \return 0 on success, 1 otherwise.
+*/
+extern int omp_target_disassociate_ptr(
+ void *host_ptr,
+ int device_num
+) __GOMP_NOTHROW;
+
+// End of OpenMP 4.5 APIs
+
/* OpenMP API wrappers */
/* Set num_threads on target */
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#define getenv(x) __secure_getenv(x)
#endif
+// Offload Library versioning
+DLL_LOCAL extern int offload_version;
+DLL_LOCAL extern int offload_version_count;
+
// The debug routines
// Host console and file logging
c_cean_var, //!< CEAN variable
c_cean_var_ptr, //!< Pointer to CEAN variable
c_data_ptr_array, //!< Pointer to data pointer array
+ c_extended_type, //!< Is used to extend OffloadItemType
+ //!< Actual OffloadItemType is in the
+ //!< structure VarDescExtendedType
c_func_ptr_array, //!< Pointer to function pointer array
c_void_ptr_array, //!< Pointer to void* pointer array
- c_string_ptr_array //!< Pointer to char* pointer array
+ c_string_ptr_array, //!< Pointer to char* pointer array
+ c_data_ptr_ptr, //!< Pointer to pointer to data (struct member)
+ c_func_ptr_ptr, //!< Pointer to pointer to function (struct member)
+ c_void_ptr_ptr, //!< Pointer to pointer to void* (struct member)
+ c_string_ptr_ptr, //!< Pointer to pointer to string (struct member)
+ c_cean_var_ptr_ptr //!< Pointer to pointer to cean var (struct member)
};
+#define TYPE_IS_PTR_TO_PTR(t) ((t) == c_string_ptr_ptr || \
+ (t) == c_data_ptr_ptr || \
+ (t) == c_func_ptr_ptr || \
+ (t) == c_void_ptr_ptr || \
+ (t) == c_cean_var_ptr_ptr)
+
#define VAR_TYPE_IS_PTR(t) ((t) == c_string_ptr || \
(t) == c_data_ptr || \
(t) == c_cean_var_ptr || \
- (t) == c_dv_ptr)
+ (t) == c_dv_ptr || \
+ TYPE_IS_PTR_TO_PTR(t))
#define VAR_TYPE_IS_SCALAR(t) ((t) == c_data || \
(t) == c_void_ptr || \
#define VAR_TYPE_IS_DV_DATA_SLICE(t) ((t) == c_dv_data_slice || \
(t) == c_dv_ptr_data_slice)
-
//! \enum Specify direction to copy offloaded variable.
enum OffloadParameterType {
c_parameter_unknown = -1, //!< Unknown clause
uint32_t targetptr : 1;
//! "preallocated" modifier used
uint32_t preallocated : 1;
- //! Needs documentation
+ //! pointer to a pointer array
uint32_t is_pointer : 1;
//! buffer address is sent in data
uint32_t always_copy : 1;
//! "OpenMP delete" modifier used
uint32_t always_delete : 1;
+ //! structured data is noncontiguous
+ uint32_t is_non_cont_struct : 1;
//! CPU memory pinning/unpinning operation
uint32_t pin : 1;
+ //! Pointer to device memory
+ uint32_t is_device_ptr : 1;
+ //! Hostpointer with associated device pointer
+ uint32_t use_device_ptr : 1;
};
uint32_t bits;
};
const int flag_alloc_elements_is_scalar = 19;
const int flag_alloc_elements_is_array = 20;
+//! Extended Variable Descriptor. Since VarDesc uses 16 bits for
+//! OffloadItemType, we have exceeded that limit, So any Type
+//! greater than 15 will have Type set in VarDesc as c_extended_type
+//! and this structure will be used to represent those Types.
+typedef struct VarDescExtendedType {
+
+ // Represents overflow of OffloadItemType
+ uint32_t extended_type;
+
+ //! For extended_type
+ //! address of the variable
+ //! Future Types can point to other descriptors
+ void *ptr;
+} VarDescExtendedType;
+
// The Marshaller
class Marshaller
{
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#include "offload_engine.h"
#include <signal.h>
#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
#include <algorithm>
#include <vector>
#include "offload_table.h"
#include "offload_iterator.h"
+#if defined(HOST_WINNT)
+#define PATH_SEPARATOR ";"
+#else
+#define PATH_SEPARATOR ":"
+#endif
+
// Static members of Stream class must be described somewhere.
// This members describe the list of all streams defined in programm
// via call to _Offload_stream_create.
uint64_t Stream::m_streams_count = 0;
StreamMap Stream::all_streams;
mutex_t Stream::m_stream_lock;
+char* mic_library_path = 0;
const char* Engine::m_func_names[Engine::c_funcs_total] =
{
// it is ready now
m_ready = true;
+
+ // Inform the debugger
+ if (__dbg_is_attached) {
+ __dbg_target_so_loaded();
+ }
}
}
}
+void Engine::print_stream_cpu_list(const char * str)
+{
+ int count = 0;
+ char buffer[1024];
+ CpuEl* cpu_el = m_cpu_head;
+
+ OFFLOAD_DEBUG_TRACE(3,
+ "%s : cpu list as Index(Count) for the streams is :\n", str);
+ buffer[0] = 0;
+ for (int i = 0; i < m_num_threads; i++) {
+ cpu_el = m_cpus + i;
+ if (m_assigned_cpus == 0 || (*m_assigned_cpus)[i]) {
+ count++;
+ sprintf(buffer + strlen(buffer), "%d(%d) ", CPU_INDEX(cpu_el), cpu_el->count);
+ if (count % 20 == 0) {
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", buffer);
+ buffer[0] = 0;
+ }
+ }
+ }
+ if (count % 20 != 0) {
+ OFFLOAD_DEBUG_TRACE(3, "%s\n", buffer);
+ }
+}
+
void Engine::init_process(void)
{
COIENGINE engine;
COIRESULT res;
const char **environ;
char buf[4096]; // For exe path name
+ char* mic_device_main = 0;
// create environment for the target process
environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
res = COI::EngineGetInfo(engine, sizeof(COI_ENGINE_INFO), &engine_info);
check_result(res, c_get_engine_info, m_index, res);
+ if (mic_library_path == 0 ) {
+ if (engine_info.ISA == COI_DEVICE_KNC) {
+ mic_library_path = knc_library_path;
+ }
+ else if (engine_info.ISA == COI_DEVICE_KNL) {
+ mic_library_path = knl_library_path;
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_unknown_mic_device_type);
+ }
+ }
+
+ // m_cpus is the list of all available threads.
+ // At the begining all threads made available through OFFLOAD_DEVICES
+ // or all threads existed at the engine if OFFLOAD_DEVICES isn't set.
+ // m_cpu_head points to the head of the m_cpus list.
+ // m_cpus is ordered by number of streams using the thread.
+ // m_cpu_head points to the least used thread.
+ // After creating and destroying a stream the m_cpus list must be fixed
+ // to be ordered.
+
+ m_cpus = (CpuEl*)malloc(engine_info.NumThreads * sizeof(CpuEl));
+ if (m_cpus == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ memset(m_cpus, 0, engine_info.NumThreads * sizeof(CpuEl));
+ CpuEl* prev_cpu = NULL;
- // m_cpus bitset has 1 for available thread. At the begining all threads
- // are available and m_cpus(i) is set to
- // 1 for i = [0...engine_info.NumThreads].
- m_cpus.reset();
for (int i = 0; i < engine_info.NumThreads; i++) {
- m_cpus.set(i);
+ if (m_assigned_cpus == 0 || (*m_assigned_cpus)[i]) {
+ if (prev_cpu) {
+ prev_cpu->next = m_cpus + i;
+ }
+ else {
+ m_cpu_head = m_cpus + i;
+ }
+ m_cpus[i].prev = prev_cpu;
+ m_cpus[i].count = 0;
+ prev_cpu = m_cpus + i;
+ }
}
// The following values will be used at pipeline creation for streams
m_num_cores = engine_info.NumCores;
m_num_threads = engine_info.NumThreads;
+ print_stream_cpu_list("init_process");
+
// Check if OFFLOAD_DMA_CHANNEL_COUNT is set to 2
// Only the value 2 is supported in 16.0
if (mic_dma_channel_count == 2) {
// Note: putenv requires its argument can't be freed or modified.
// So no free after call to putenv or elsewhere.
char * env_var = strdup("COI_DMA_CHANNEL_COUNT=2");
- if (env_var == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- putenv(env_var);
+ if (env_var == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ putenv(env_var);
}
}
// Target executable is not available then use compiler provided offload_main
if (__target_exe == 0) {
- if (mic_device_main == 0)
- LIBOFFLOAD_ERROR(c_report_no_host_exe);
+ // find target executable to be used if main application is not an
+ // offload build application.
+ const char *base_name = "offload_main";
+ if (mic_library_path != 0) {
+ char *buf = strdup(mic_library_path);
+ if (buf == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ char *try_name = (char*) alloca(strlen(mic_library_path) +
+ strlen(base_name) + 2);
+ char *dir, *ptr;
+
+ for (dir = strtok_r(buf, PATH_SEPARATOR, &ptr); dir != 0;
+ dir = strtok_r(0, PATH_SEPARATOR, &ptr)) {
+ // compose a full path
+ sprintf(try_name, "%s/%s", dir, base_name);
+
+ // check if such file exists
+ struct stat st;
+ if (stat(try_name, &st) == 0 && S_ISREG(st.st_mode)) {
+ mic_device_main = strdup(try_name);
+ if (mic_device_main == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ break;
+ }
+ }
+ free(buf);
+ }
+ if (mic_device_main == 0) {
+ LIBOFFLOAD_ERROR(c_report_no_target_exe, "offload_main");
+ exit(1);
+ }
OFFLOAD_DEBUG_TRACE(2,
"Loading target executable %s\n",mic_device_main);
if (__dbg_is_attached) {
// TODO: we have in-memory executable now.
// Check with IDB team what should we provide them now?
- if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
- strcpy(__dbg_target_exe_name, __target_exe->name);
+ if (__target_exe == 0) {
+ strcpy(__dbg_target_exe_name, "offload_main");
+ }
+ else {
+ if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
+ strcpy(__dbg_target_exe_name, __target_exe->name);
+ }
}
__dbg_target_so_pid = pid;
__dbg_target_id = m_physical_index;
- __dbg_target_so_loaded();
+ // The call to __dbg_target_so_loaded() is moved
+ // to Engine:init so all the libraries are loaded before
+ // informing debugger so debugger can access them.
+ // __dbg_target_so_loaded();
}
}
m_dyn_libs.push_front(DynLib(it->name, it->data, lib));
if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) {
- check_result(res, c_load_library, m_index, res);
+ check_result(res, c_load_library, it->origin, m_index, res);
}
}
m_images.clear();
if (is_new) {
ptr->mic_addr = ti->addr;
ptr->is_static = true;
+ ptr->var_alloc_type = (*hi)->var_alloc_type;
}
ptr->alloc_ptr_data_lock.unlock();
hi++;
LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
LIBOFFLOAD_ABORT;
}
- // create pipeline for this thread
- res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
+
+ // Create pipeline for this thread
+ if (m_assigned_cpus == 0) {
+ // If m_assigned_cpus is NULL, it implies all threads
+ // Create the pipeline with no CPU mask
+ res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
+ } else {
+ // Create COI CPU mask
+ COI_CPU_MASK in_Mask;
+ res = COI::PipelineClearCPUMask(in_Mask);
+ check_result(res, c_clear_cpu_mask, m_index, res);
+
+ int threads_per_core = m_num_threads / m_num_cores;
+
+ // Available threads are defined by examining of m_assigned_cpus bitset.
+ // We skip thread 0.
+ for (int i = 1; i < m_num_threads; i++) {
+ // For available thread i m_assigned_cpus[i] is equal to 1
+ if ((*m_assigned_cpus)[i]) {
+ COI_CPU_MASK_SET(i, in_Mask);
+ }
+ }
+ OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask for this CPU thread\n"
+ "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n"
+ "%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n",
+ in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3],
+ in_Mask[4], in_Mask[5], in_Mask[6], in_Mask[7],
+ in_Mask[8], in_Mask[9], in_Mask[10], in_Mask[11],
+ in_Mask[12], in_Mask[13], in_Mask[14], in_Mask[15]);
+
+ // Create the pipeline with allowable CPUs
+ res = COI::PipelineCreate(m_process, in_Mask, mic_stack_size, &pipeline);
+ }
check_result(res, c_pipeline_create, m_index, res);
thread->set_pipeline(m_index, pipeline);
}
return stream;
}
+void Engine::move_cpu_el_after(CpuEl* cpu_what, CpuEl* cpu_after)
+{
+ if (cpu_what == cpu_after) {
+ return;
+ }
+ CpuEl* cpu_prev = cpu_what->prev;
+
+ // remove cpu_what
+ if (!cpu_prev) {
+ m_cpu_head = cpu_what->next;
+ }
+ else {
+ cpu_prev->next = cpu_what->next;
+ }
+ if (cpu_what->next) {
+ cpu_what->next->prev = cpu_prev;
+ }
+
+ // insert cpu_what after cpu_after
+ cpu_what->prev = cpu_after;
+ cpu_what->next = cpu_after->next;
+ if (cpu_after->next) {
+ cpu_after->next->prev = cpu_what;
+ }
+ cpu_after->next = cpu_what;
+}
+
COIPIPELINE Engine::get_pipeline(_Offload_stream handle)
{
Stream * stream = Stream::find_stream(handle, false);
int threads_per_core = m_num_threads / m_num_cores;
- // The "stream_cpu_num" available threads is set in mask.
- // Available threads are defined by examining of m_cpus bitset.
- // We skip thread 0 .
- for (int i = 1; i < m_num_threads; i++) {
- // for available thread i m_cpus[i] is equal to 1
- if (m_cpus[i]) {
- res = COI::PipelineSetCPUMask(m_process,
- i / threads_per_core,
- i % threads_per_core,
- in_Mask);
-
- check_result(res, c_set_cpu_mask, res);
- // mark thread i as nonavailable
- m_cpus.set(i,0);
- // Mark thread i as given for the stream.
- // In case of stream destroying by call to
- // _Offload_stream_destroy we can mark the thread i as
- // available.
- stream->m_stream_cpus.set(i);
- if (--stream_cpu_num <= 0) {
+
+ // Available threads is taken from m_cpus list.
+ // m_cpu_head points to the head of m_cpus.
+ // the elements of m_cpus is ordered by the number of usage in streams.
+
+ CpuEl *cpu_el = m_cpu_head;
+ CpuEl *cpu_used_el, *cpu_used_prev, *cpu_prev;
+
+ for (int i = 0; i < stream_cpu_num; i++) {
+ COI_CPU_MASK_SET(CPU_INDEX(cpu_el), in_Mask);
+ stream->m_stream_cpus.set(CPU_INDEX(cpu_el));
+ //If the number of availabale threads is less than stream_cpu_num,
+ // the stream_cpu_num is restricted to this number.
+ if (!cpu_el->next) {
+ break;
+ }
+ if (i + 1 < stream_cpu_num) {
+ cpu_el = cpu_el->next;
+ }
+ }
+
+ // assertion : cpu_el points to the last used thread
+ cpu_used_el = cpu_el;
+ while (cpu_used_el) {
+ cpu_used_el->count++;
+ cpu_el = cpu_prev = cpu_used_el;
+ cpu_used_prev = cpu_used_el->prev;
+ if (!cpu_el->next) {
+ cpu_used_el = cpu_used_prev;
+ continue;
+ }
+
+ while (cpu_el) {
+ if (cpu_used_el->count < cpu_el->count) {
break;
}
+ // Equal used threads are ordered by thread number to
+ // assign to a stream as contiguous threads as possible.
+ else if (cpu_used_el->count == cpu_el->count &&
+ CPU_INDEX(cpu_used_el) < CPU_INDEX(cpu_el)) {
+ break;
+ }
+ cpu_prev = cpu_el;
+ cpu_el = cpu_el->next;
}
+ if (cpu_used_el != cpu_prev) {
+ move_cpu_el_after(cpu_used_el, cpu_prev);
+ }
+ cpu_used_el = cpu_used_prev;
}
+ print_stream_cpu_list("get_pipeline");
- // if stream_cpu_num is greater than 0 there are not enough
- // available threads
- if (stream_cpu_num > 0) {
- LIBOFFLOAD_ERROR(c_create_pipeline_for_stream, m_num_threads);
- LIBOFFLOAD_ABORT;
- }
// create pipeline for this thread
- OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask\n"
+ OFFLOAD_DEBUG_TRACE(2, "COIPipelineCreate Mask for this Stream\n"
"%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n"
"%016lx %016lx %016lx %016lx\n%016lx %016lx %016lx %016lx\n",
in_Mask[0], in_Mask[1], in_Mask[2], in_Mask[3],
// return cpus for future use
for (int i = 0; i < m_num_threads; i++) {
if (stream->m_stream_cpus.test(i)) {
- m_cpus.set(i);
+ CpuEl *cpu_el = m_cpus + i;
+ CpuEl *cpu_first_el = cpu_el;
+ // decrease count of thread "i" and move its CpuEl to the
+ // proper place into the ordered list
+ cpu_el->count--;
+ while (cpu_el->prev) {
+ if (cpu_first_el->count > cpu_el->prev->count) {
+ break;
+ }
+ else if (cpu_first_el->count == cpu_el->prev->count &&
+ CPU_INDEX(cpu_first_el) > CPU_INDEX(cpu_el->prev)) {
+ break;
+ }
+ cpu_el = cpu_el->prev;
+ }
+ cpu_el = cpu_el->prev;
+ // If cpu_el for thread "i" must be moved in the list
+ if (cpu_first_el != cpu_el) {
+ // Thread "i" is used the least times. It must be set as
+ // the m_cpu_head.
+ if (!cpu_el) {
+ if (!cpu_first_el->prev) {
+ continue;
+ }
+ // remove cpu_el.
+ cpu_first_el->prev->next = cpu_first_el->next;
+ if (cpu_first_el->next) {
+ cpu_first_el->next->prev = cpu_first_el->prev;
+ }
+ // make cpu_first_el as new m_cpu_head
+ cpu_first_el->prev = NULL;
+ cpu_first_el->next = m_cpu_head;
+ m_cpu_head->prev = cpu_first_el;
+ m_cpu_head = cpu_first_el;
+ }
+ else {
+ move_cpu_el_after(cpu_first_el, cpu_el);
+ }
+ }
}
}
+ print_stream_cpu_list("stream_destroy");
delete stream;
}
else {
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#include "offload_common.h"
#include "coi/coi_client.h"
-#define SIGNAL_IS_REMOVED ((OffloadDescriptor *)-1)
+#define SIGNAL_HAS_COMPLETED ((OffloadDescriptor *)-1)
const int64_t no_stream = -1;
// Address range
PtrData(const void *addr, uint64_t len) :
cpu_addr(addr, len), cpu_buf(0),
mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0),
- ref_count(0), is_static(false)
+ ref_count(0), is_static(false), is_omp_associate(false)
{}
//
cpu_addr(ptr.cpu_addr), cpu_buf(ptr.cpu_buf),
mic_addr(ptr.mic_addr), alloc_disp(ptr.alloc_disp),
mic_buf(ptr.mic_buf), mic_offset(ptr.mic_offset),
- ref_count(ptr.ref_count), is_static(ptr.is_static)
+ ref_count(ptr.ref_count), is_static(ptr.is_static),
+ is_omp_associate(ptr.is_omp_associate),
+ var_alloc_type(0)
{}
bool operator<(const PtrData &o) const {
}
long add_reference() {
- if (is_static) {
+ if (is_omp_associate || (is_static && !var_alloc_type)) {
return LONG_MAX;
}
#ifndef TARGET_WINNT
}
long remove_reference() {
- if (is_static) {
+ if (is_omp_associate || (is_static && !var_alloc_type)) {
return LONG_MAX;
}
#ifndef TARGET_WINNT
}
long get_reference() const {
- if (is_static) {
+ if (is_omp_associate || (is_static && !var_alloc_type)) {
return LONG_MAX;
}
return ref_count;
// if true buffers are created from static memory
bool is_static;
+
+ // true if MIC buffer created by omp_target_associate
+ bool is_omp_associate;
+
+ bool var_alloc_type;
mutex_t alloc_ptr_data_lock;
private:
static Stream* find_stream(uint64_t handle, bool remove);
static _Offload_stream add_stream(int device, int number_of_cpus) {
+ _Offload_stream result;
m_stream_lock.lock();
- all_streams[++m_streams_count] = new Stream(device, number_of_cpus);
+ result = ++m_streams_count;
+ all_streams[m_streams_count] = new Stream(device, number_of_cpus);
m_stream_lock.unlock();
- return(m_streams_count);
+ return(result);
+ }
+
+ static uint64_t get_streams_count() {
+ return m_streams_count;
}
typedef std::map<uint64_t, Stream*> StreamMap;
};
typedef std::map<uint64_t, Stream*> StreamMap;
+typedef std::bitset<COI_MAX_HW_THREADS> micLcpuMask;
+
+// ordered by count double linked list of cpus used by streams
+typedef struct CpuEl{
+ uint64_t count; // number of streams using the cpu
+ struct CpuEl* prev; // cpu with the same or lesser count
+ struct CpuEl* next; // cpu with the same or greater count
+} CpuEl;
// class representing a single engine
struct Engine {
friend void __offload_init_library_once(void);
friend void __offload_fini_library(void);
+#define CPU_INDEX(x) (x - m_cpus)
#define check_result(res, tag, ...) \
{ \
if (res == COI_PROCESS_DIED) { \
return m_process;
}
+ bool get_ready() {
+ return m_ready;
+ }
+
uint64_t get_thread_id(void);
// initialize device
if (it != m_signal_map.end()) {
desc = it->second;
if (remove) {
- it->second = SIGNAL_IS_REMOVED;
+ it->second = SIGNAL_HAS_COMPLETED;
}
}
}
return desc;
}
+ void complete_signaled_ofld(const void *signal) {
+
+ m_signal_lock.lock();
+ {
+ SignalMap::iterator it = m_signal_map.find(signal);
+ if (it != m_signal_map.end()) {
+ it->second = SIGNAL_HAS_COMPLETED;
+ }
+ }
+ m_signal_lock.unlock();
+ }
+
void stream_destroy(_Offload_stream handle);
+ void move_cpu_el_after(CpuEl* cpu_what, CpuEl* cpu_after);
+ void print_stream_cpu_list(const char *);
+
COIPIPELINE get_pipeline(_Offload_stream stream);
StreamMap get_stream_map() {
private:
Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false),
- m_proc_number(0)
+ m_proc_number(0), m_assigned_cpus(0), m_cpus(0), m_cpu_head(0)
{}
~Engine() {
+ m_ready = false;
for (StreamMap::iterator it = m_stream_map.begin();
it != m_stream_map.end(); it++) {
Stream * stream = it->second;
if (m_process != 0) {
fini_process(false);
}
+ if (m_assigned_cpus) {
+ delete m_assigned_cpus;
+ }
}
// set indexes
m_physical_index = physical_index;
}
+ // set CPU mask
+ void set_cpu_mask(micLcpuMask *cpu_mask)
+ {
+ m_assigned_cpus = cpu_mask;
+ }
+
// start process on device
void init_process();
int m_index;
int m_physical_index;
+ // cpu mask
+ micLcpuMask *m_assigned_cpus;
+
// number of COI pipes created for the engine
long m_proc_number;
mutex_t m_signal_lock;
// streams
- StreamMap m_stream_map;
- mutex_t m_stream_lock;
- int m_num_cores;
- int m_num_threads;
- std::bitset<COI_MAX_HW_THREADS> m_cpus;
+ StreamMap m_stream_map;
+ mutex_t m_stream_lock;
+ int m_num_cores;
+ int m_num_threads;
+ CpuEl* m_cpus;
+ CpuEl* m_cpu_head;
// List of dynamic libraries to be registred
DynLibList m_dyn_libs;
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
*env_var_name_length = 3;
*env_var_name = *env_var_def = c;
*env_var_def = strdup(*env_var_def);
- if (*env_var_def == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
+ if (*env_var_def == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
return c_mic_var;
}
*env_var_def = c + strlen("ENV=");
*env_var_def = strdup(*env_var_def);
- if (*env_var_def == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
+ if (*env_var_def == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
return c_mic_card_env;
}
if (isalpha(*c)) {
}
*env_var_def = strdup(*env_var_def);
if (*env_var_def == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
+ LIBOFFLOAD_ERROR(c_malloc);
return card_is_set? c_mic_card_var : c_mic_var;
}
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
-#include <sys/stat.h>
#include <algorithm>
#include <bitset>
+#include <iostream>
#if defined(HOST_WINNT)
#define PATH_SEPARATOR ";"
static const char *offload_report_envname = "OFFLOAD_REPORT";
static const char *timer_envname = "H_TIME";
-// location of offload_main executable
-// To be used if the main application has no offload and is not built
-// with -offload but dynamic library linked in has offload pragma
-char* mic_device_main = 0;
-
// DMA channel count used by COI and set via
// OFFLOAD_DMA_CHANNEL_COUNT environment variable
uint32_t mic_dma_channel_count;
"cean_var",
"cean_var_ptr",
"c_data_ptr_array",
+ "c_extended_type",
"c_func_ptr_array",
"c_void_ptr_array",
- "c_string_ptr_array"
+ "c_string_ptr_array",
+ "c_data_ptr_ptr",
+ "c_func_ptr_ptr",
+ "c_void_ptr_ptr",
+ "c_string_ptr_ptr",
+ "c_cean_var_ptr_ptr",
};
Engine* mic_engines = 0;
uint64_t mic_2m_buffer_size = 0;
-// MIC_LD_LIBRARY_PATH
-char* mic_library_path = 0;
+// LD_LIBRARY_PATH for KNC
+char* knc_library_path = 0;
+
+// LD_LIBRARY_PATH for KNL
+char* knl_library_path = 0;
+
// MIC_PROXY_IO
bool mic_proxy_io = true;
static bool __offload_active_wait = true;
static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT";
+// wait even for asynchronous offload
+// true for now still the performance issue with COI is not fixed
+static bool __offload_always_wait = true;
+static const char *offload_always_wait_envname = "OFFLOAD_ALWAYS_WAIT";
+
// OMP_DEFAULT_DEVICE
int __omp_device_num = 0;
static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE";
static TargetImageList __target_libs_list;
static mutex_t __target_libs_lock;
static mutex_t stack_alloc_lock;
+static mutex_t lock_complete;
+
+// Set of OffloadDescriptors of asynchronous offloads that are not destroyed
+std::map<void *, bool> offload_descr_map;
// Target executable
TargetImage* __target_exe;
+// is true if last loaded image is dll
+bool __current_image_is_dll = false;
+// is true if myo library is loaded when dll is loaded
+bool __myo_init_in_so = false;
// Print readable offload flags
static void trace_offload_flags(
varDescFlags offload_flags
)
{
- // SIzed big enough for all flag names
+ // Sized big enough for all flag names
char fbuffer[256];
bool first = true;
if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) {
first ? "always_delete" : ",always_delete");
first = false;
}
+ if (offload_flags.is_non_cont_struct) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "is_non_cont_struct" : ",is_non_cont_struct");
+ first = false;
+ }
+ if (offload_flags.pin) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "pin" : ",pin");
+ first = false;
+ }
+ if (offload_flags.is_device_ptr) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "is_device_ptr" : ",is_device_ptr");
+ first = false;
+ }
+ if (offload_flags.use_device_ptr) {
+ sprintf(fbuffer+strlen(fbuffer),
+ first ? "use_device_ptr" : ",use_device_ptr");
+ }
OFFLOAD_DEBUG_TRACE_1(1,
GET_OFFLOAD_NUMBER(timer_data), c_offload_init_func,
"%s)\n", fbuffer);
length = alloc_disp ? length : size + disp;
res = COI::BufferCreate(
length,
- COI_BUFFER_NORMAL,
+ COI_BUFFER_OPENCL,
buffer_flags,
0,
1,
// instead of COIBufferCopy.
COI::BufferCreateFromMemory(length,
- COI_BUFFER_NORMAL,
+ COI_BUFFER_OPENCL,
0,
base,
1,
OFFLOAD_DEBUG_TRACE_1(3,
GET_OFFLOAD_NUMBER(get_timer_data()),
c_offload_create_buf_mic,
- "Creating buffer from sink memory: size %lld, offset %d, "
- "flags =0x%x\n", buffer_size,
- ptr_data->mic_offset, buffer_flags);
+ "Creating buffer from sink memory: "
+ "addr %p, size %lld, offset %d, flags 0x%x\n",
+ base, buffer_size, ptr_data->mic_offset,
+ buffer_flags);
res = COI::BufferCreateFromMemory(ptr_data->cpu_addr.length(),
COI_BUFFER_NORMAL,
COI_SINK_MEMORY,
return true;
}
+void OffloadDescriptor::find_device_ptr(
+ int64_t* &device_ptr,
+ void *host_ptr
+)
+{
+ PtrData* ptr_data;
+ char *base = reinterpret_cast<char *>(host_ptr);
+
+ OFFLOAD_TRACE(3, "Looking for association for data: addr %p\n", base);
+
+ // find existing association in pointer table
+ ptr_data = m_device.find_ptr_data(base);
+
+// MIC address should have been assigned.
+// For now assume does not exist and get the addr
+// if ((ptr_data == 0) || ptr_data->mic_addr) {
+
+ if (ptr_data == 0) {
+ OFFLOAD_TRACE(3, "Association does not exist\n");
+ LIBOFFLOAD_ERROR(c_no_ptr_data, base);
+ exit(1);
+ }
+ if (!ptr_data->mic_addr) {
+ COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf,
+ &ptr_data->mic_addr);
+ if (res != COI_SUCCESS) {
+ if (m_status != 0)
+ m_status->result = translate_coi_error(res);
+ report_coi_error(c_buf_get_address, res);
+ }
+ }
+
+ device_ptr = (int64_t *) ptr_data->mic_addr;
+
+ OFFLOAD_TRACE(3, "Found association: host_ptr %p, device_ptr = %p\n",
+ ptr_data->cpu_addr.start(), device_ptr);
+}
+
bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data)
{
OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
COIRESULT res = COI::BufferCreateFromMemory(
ptr_data->cpu_addr.length(),
- COI_BUFFER_NORMAL,
+ COI_BUFFER_OPENCL,
0,
const_cast<void*>(ptr_data->cpu_addr.start()),
1, &m_device.get_process(),
return true;
}
+static void print_persistList_item(
+ const char *msg,
+ PersistData *cur_el
+)
+{
+ OFFLOAD_TRACE(4, "%s\n", msg);
+ OFFLOAD_TRACE(4, " stack_cpu_addr = %p\n", cur_el->stack_cpu_addr);
+ OFFLOAD_TRACE(4, " routine_id = %d\n", cur_el->routine_id);
+ OFFLOAD_TRACE(4, " thread_id = %lld\n", cur_el->thread_id);
+ OFFLOAD_TRACE(4, " stack_ptr_data = %p\n", cur_el->stack_ptr_data);
+ OFFLOAD_TRACE(4, " MIC buffer = %p\n", cur_el->stack_ptr_data->mic_buf);
+ OFFLOAD_TRACE(4, " MIC addr = %p\n", cur_el->stack_ptr_data->mic_addr);
+ OFFLOAD_TRACE(4, " cpu_stack_addr = %p\n", cur_el->cpu_stack_addr);
+}
+
+static mutex_t stack_memory_manager_lock;
+
bool OffloadDescriptor::offload_stack_memory_manager(
const void * stack_begin,
int routine_id,
int buf_size,
int align,
+ bool thread_specific_function_locals,
bool *is_new)
{
- mutex_locker_t locker(stack_alloc_lock);
+ //mutex_locker_t locker(stack_alloc_lock);
+ stack_memory_manager_lock.lock();
PersistData * new_el;
PersistDataList::iterator it_begin = m_device.m_persist_list.begin();
int erase = 0;
uint64_t cur_thread_id = m_device.get_thread_id();
+ OFFLOAD_TRACE(3, "offload_stack_memory_manager("
+ "stack_begin=%p, routine_id=%d, buf_size=%d,"
+ "align=%d, thread_specific_function_locals=%d, bool=%p)\n",
+ stack_begin, routine_id, buf_size,
+ align, thread_specific_function_locals, is_new);
+ OFFLOAD_TRACE(3, "cur_thread_id=%lld\n", cur_thread_id);
*is_new = false;
for (PersistDataList::iterator it = m_device.m_persist_list.begin();
it != m_device.m_persist_list.end(); it++) {
PersistData cur_el = *it;
+ print_persistList_item("Current element in persist list:", &cur_el);
if (stack_begin > it->stack_cpu_addr) {
- // this stack data must be destroyed
if (cur_thread_id == cur_el.thread_id) {
+ // this stack data must be destroyed
m_destroy_stack.push_front(cur_el.stack_ptr_data);
it_end = it;
erase++;
+ OFFLOAD_TRACE(3, "Current element below TOS: so delete\n");
}
}
else if (stack_begin == it->stack_cpu_addr) {
if (routine_id != it-> routine_id) {
// this stack data must be destroyed
+ // because the current function is a dynamic sibling
m_destroy_stack.push_front(cur_el.stack_ptr_data);
it_end = it;
erase++;
+ OFFLOAD_TRACE(3, "Current element is sibling: so delete\n");
break;
}
- else {
+ else if (!thread_specific_function_locals ||
+ cur_thread_id == cur_el.thread_id) {
// stack data is reused
m_stack_ptr_data = it->stack_ptr_data;
if (erase > 0) {
// all obsolete stack sections must be erased from the list
m_device.m_persist_list.erase(it_begin, ++it_end);
-
m_in_datalen +=
erase * sizeof(new_el->stack_ptr_data->mic_addr);
}
OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n",
m_stack_ptr_data->mic_addr);
+ stack_memory_manager_lock.unlock();
return true;
}
}
else if (stack_begin < it->stack_cpu_addr &&
cur_thread_id == cur_el.thread_id) {
+ OFFLOAD_TRACE(3, "Current element is above TOS\n");
break;
}
}
else if (m_is_mandatory) {
report_coi_error(c_buf_create, res);
}
+ stack_memory_manager_lock.unlock();
return false;
}
// make buffer valid on the device.
else if (m_is_mandatory) {
report_coi_error(c_buf_set_state, res);
}
+ stack_memory_manager_lock.unlock();
return false;
}
res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
else if (m_is_mandatory) {
report_coi_error(c_buf_set_state, res);
}
+ stack_memory_manager_lock.unlock();
return false;
}
// persistence algorithm requires target stack initialy to be nullified
if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) {
+ stack_memory_manager_lock.unlock();
return false;
}
m_device.m_persist_list.push_front(*new_el);
init_mic_address(new_el->stack_ptr_data);
*is_new = true;
+
+ stack_memory_manager_lock.unlock();
return true;
}
+// Search through persistent stack buffers
+// for the top-of-stack buffer for this thread
+char* OffloadDescriptor::get_this_threads_cpu_stack_addr(
+ const void * stack_begin,
+ int routine_id,
+ bool thread_specific_function_locals
+)
+{
+ uint64_t cur_thread_id = m_device.get_thread_id();
+ char* matched = 0;
+
+ OFFLOAD_TRACE(3, "get_this_threads_cpu_stack_addr("
+ "stack_begin=%p, routine_id=%d, thread_specific_function_locals=%d)\n",
+ stack_begin, routine_id, thread_specific_function_locals);
+ OFFLOAD_TRACE(3, "cur_thread_id=%lld\n", cur_thread_id);
+
+ stack_memory_manager_lock.lock();
+ for (PersistDataList::iterator it = m_device.m_persist_list.begin();
+ it != m_device.m_persist_list.end(); it++)
+ {
+ PersistData cur_el = *it;
+ print_persistList_item("Current element in persist list:", &cur_el);
+ if (stack_begin == cur_el.stack_cpu_addr)
+ {
+ // For OpenMP shared function locals matching is done without
+ // regard to thread id. But, we return the last match, which
+ // corresponds to the outer stack.
+ if (!thread_specific_function_locals)
+ {
+ matched = cur_el.cpu_stack_addr;
+ continue;
+ }
+ // For non-OpenMP shared function-local variables
+ // the thread-id must match
+ if (cur_thread_id == cur_el.thread_id)
+ {
+ matched = cur_el.cpu_stack_addr;
+ break;
+ }
+ }
+ }
+ stack_memory_manager_lock.unlock();
+ if (matched != 0)
+ {
+ OFFLOAD_TRACE(3, "get_this_threads_cpu_stack_addr() => %p\n", matched);
+ return matched;
+ }
+
+ OFFLOAD_TRACE(1,
+ "Could not find persistent data; expect Read/Write failure\n");
+ return 0;
+}
+
+// Search through persistent stack buffers
+// for the top-of-stack MIC buffer for this thread
+PtrData* OffloadDescriptor::get_this_threads_mic_stack_addr(
+ const void * stack_begin,
+ int routine_id,
+ bool thread_specific_function_locals
+)
+{
+ uint64_t cur_thread_id = m_device.get_thread_id();
+ PtrData* matched = 0;
+
+ OFFLOAD_TRACE(3, "get_this_threads_mic_stack_addr("
+ "stack_begin=%p, routine_id=%d, thread_specific_function_locals=%d)\n",
+ stack_begin, routine_id, thread_specific_function_locals);
+ OFFLOAD_TRACE(3, "cur_thread_id=%lld\n", cur_thread_id);
+
+ stack_memory_manager_lock.lock();
+ for (PersistDataList::iterator it = m_device.m_persist_list.begin();
+ it != m_device.m_persist_list.end(); it++)
+ {
+ PersistData cur_el = *it;
+ print_persistList_item("Current element in persist list:", &cur_el);
+ if (stack_begin == cur_el.stack_cpu_addr)
+ {
+ // For OpenMP shared function locals matching is done without
+ // regard to thread id. But, we return the last match, which
+ // corresponds to the outer stack.
+ if (!thread_specific_function_locals)
+ {
+ matched = cur_el.stack_ptr_data;
+ continue;
+ }
+ // For non-OpenMP shared function-local variables
+ // the thread-id must match
+ if (cur_thread_id == cur_el.thread_id)
+ {
+ matched = cur_el.stack_ptr_data;
+ break;
+ }
+ }
+ }
+ stack_memory_manager_lock.unlock();
+ if (matched != 0)
+ {
+ OFFLOAD_TRACE(3, "get_this_threads_mic_stack_addr() => %p\n", matched);
+ return matched;
+ }
+
+ OFFLOAD_TRACE(1,
+ "Could not find persistent data; expect Read/Write failure\n");
+ return 0;
+}
+
+void OffloadDescriptor::setup_use_device_ptr(int i)
+{
+ PtrData *ptr_data;
+ ArrDesc *dvp;
+ void *base;
+ if (m_vars_extra[i].type_src == c_dv_ptr) {
+ dvp = *static_cast<ArrDesc**>(m_vars[i].ptr);
+ base = reinterpret_cast<void*>(dvp->Base);
+ }
+ else {
+ base = *static_cast<void**>(m_vars[i].ptr);
+ }
+ if (m_vars[i].direction.in) {
+ int64_t *device_ptr;
+ bool is_new = true;
+
+ find_device_ptr(device_ptr, base);
+
+ // Create a entry in targetptr table using device_ptr
+ // as lookup for later recover the host pointer
+ ptr_data = m_device.insert_targetptr_data(device_ptr,
+ 0, is_new);
+
+ // Actually the base is a host pointer and cpu_addr is
+ // device pointer. This is special case where the 2
+ // address usage is reversed to enable using existing
+ // PtrData structure instead of adding new fields.
+ ptr_data->mic_addr = (uint64_t) base;
+
+ ptr_data->alloc_ptr_data_lock.unlock();
+
+ // Replace host pointer with device pointer
+ if (m_vars_extra[i].type_src == c_dv_ptr) {
+ dvp->Base = reinterpret_cast<dv_size>(device_ptr);
+ }
+ else {
+ *static_cast<void**>(m_vars[i].ptr) = device_ptr;
+ }
+ }
+ else if (m_vars[i].direction.out) {
+ // For use_device_ptr and out find associated host ptr
+ // and assign to host ptr
+ ptr_data = m_device.find_targetptr_data(base);
+ if (!ptr_data) {
+ LIBOFFLOAD_ERROR(c_no_ptr_data, base);
+ exit(1);
+ }
+ if (m_vars_extra[i].type_src == c_dv_ptr) {
+ dvp->Base = ptr_data->mic_addr;
+ }
+ else {
+ *static_cast<void**>(m_vars[i].ptr) =
+ reinterpret_cast<void*>(ptr_data->mic_addr);
+ }
+ m_device.remove_targetptr_data(
+ ptr_data->cpu_addr.start());
+ }
+}
+
bool OffloadDescriptor::setup_descriptors(
VarDesc *vars,
VarDesc2 *vars2,
)
{
COIRESULT res;
+ // To enable caching the CPU stack base address for stack variables
+ char* this_threads_cpu_stack_addr = 0;
+ // To properly deal with non-OpenMP threading and function-local variables
+ // For OpenMP threading we support all function-locals in shared mode only
+ bool thread_specific_function_locals = !omp_in_parallel();
OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers);
-
// make a copy of variable descriptors
m_vars_total = vars_total;
if (vars_total > 0) {
if (m_out_deps == NULL)
LIBOFFLOAD_ERROR(c_malloc);
}
-
// copyin/copyout data length
m_in_datalen = 0;
m_out_datalen = 0;
int64_t alloc_size = 0;
bool src_is_for_mic = (m_vars[i].direction.out ||
m_vars[i].into == NULL);
-
+ bool src_is_for_host = (m_vars[i].direction.in ||
+ m_vars[i].into == NULL);
const char *var_sname = "";
if (vars2 != NULL && i < vars_total) {
if (vars2[i].sname != NULL) {
var_sname = vars2[i].sname;
}
}
+
+ // instead of m_vars[i].type.src we will use m_vars_extra[i].type_src
+ if (m_vars[i].type.src == c_extended_type) {
+ VarDescExtendedType *etype =
+ reinterpret_cast<VarDescExtendedType*>(m_vars[i].ptr);
+ m_vars_extra[i].type_src = etype->extended_type;
+ m_vars[i].ptr = etype->ptr;
+ }
+ else {
+ m_vars_extra[i].type_src = m_vars[i].type.src;
+ }
+ // instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst
+ if (m_vars[i].type.dst == c_extended_type) {
+ VarDescExtendedType *etype =
+ reinterpret_cast<VarDescExtendedType*>(m_vars[i].into);
+ if (etype) {
+ m_vars_extra[i].type_dst = etype->extended_type;
+ m_vars[i].into = etype->ptr;
+ }
+ else {
+ m_vars_extra[i].type_dst = m_vars_extra[i].type_src;
+ }
+ }
+ else {
+ m_vars_extra[i].type_dst = m_vars[i].type.dst;
+ }
OFFLOAD_TRACE(2, " VarDesc %d, var=%s, %s, %s\n",
i, var_sname,
vardesc_direction_as_string[m_vars[i].direction.bits],
- vardesc_type_as_string[m_vars[i].type.src]);
+ vardesc_type_as_string[m_vars_extra[i].type_src]);
if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) {
OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
- vardesc_type_as_string[m_vars[i].type.dst]);
+ vardesc_type_as_string[m_vars_extra[i].type_dst]);
}
OFFLOAD_TRACE(2,
" type_src=%d, type_dstn=%d, direction=%d, "
"alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
"offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n",
- m_vars[i].type.src,
- m_vars[i].type.dst,
+ m_vars_extra[i].type_src,
+ m_vars_extra[i].type_dst,
m_vars[i].direction.bits,
m_vars[i].alloc_if,
m_vars[i].free_if,
}
m_vars_extra[i].alloc = m_vars[i].alloc;
+ m_vars_extra[i].auto_data = 0;
m_vars_extra[i].cpu_disp = 0;
m_vars_extra[i].cpu_offset = 0;
m_vars_extra[i].src_data = 0;
if (i < vars_total) {
m_vars_extra[i].is_arr_ptr_el = 0;
}
+ if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) ||
+ TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst) ||
+ m_vars[i].flags.is_pointer) {
+ m_vars_extra[i].pointer_offset = m_vars[i].offset;
+ m_vars[i].offset = 0;
+ m_in_datalen += sizeof(m_vars[i].offset);
+ }
- switch (m_vars[i].type.src) {
+ switch (m_vars_extra[i].type_src) {
case c_data_ptr_array:
{
const Arr_Desc *ap;
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
// In all uses later
// VarDesc.size will have the length of the data to be
// transferred
// VarDesc.disp will have an offset from base
- if (m_vars[i].type.src == c_cean_var) {
+
+ if (m_vars[i].flags.is_non_cont_struct && src_is_for_host) {
+ NonContigDesc *desc =
+ static_cast<NonContigDesc*>(m_vars[i].ptr);
+ noncont_struct_dump(" ", "DATA", desc);
+ m_vars_extra[i].noncont_desc = desc;
+ m_vars[i].ptr = reinterpret_cast<void*>(desc->base);
+ m_vars[i].size = get_noncont_struct_size(desc);
+ m_vars[i].disp = 0;
+ }
+ else if (m_vars_extra[i].type_src == c_cean_var) {
// array descriptor
const Arr_Desc *ap =
static_cast<const Arr_Desc*>(m_vars[i].ptr);
if (m_vars[i].flags.is_static) {
PtrData *ptr_data;
-
// find data associated with variable
if (!find_ptr_data(ptr_data,
m_vars[i].ptr,
m_vars_extra[i].src_data = ptr_data;
}
- if (m_is_openmp) {
- if (m_vars[i].flags.is_static) {
- // Static data is transferred either by omp target
- // update construct which passes zeros for
- // alloc_if and free_if or by always modifier.
- if (!m_vars[i].flags.always_copy &&
- (m_vars[i].alloc_if || m_vars[i].free_if)) {
- m_vars[i].direction.bits = c_parameter_nocopy;
- }
- }
- else {
- AutoData *auto_data;
- if (m_vars[i].alloc_if) {
- auto_data = m_device.insert_auto_data(
- m_vars[i].ptr, m_vars[i].size);
- auto_data->add_reference();
- }
- else {
- // TODO: what should be done if var is not in
- // the table?
- auto_data = m_device.find_auto_data(
- m_vars[i].ptr);
- }
-
- // For automatic variables data is transferred:
- // - if always modifier is used OR
- // - if alloc_if == 0 && free_if == 0 OR
- // - if reference count is 1
- if (!m_vars[i].flags.always_copy &&
- (m_vars[i].alloc_if || m_vars[i].free_if) &&
- auto_data != 0 &&
- auto_data->get_reference() != 1) {
- m_vars[i].direction.bits = c_parameter_nocopy;
- }
-
- // save data for later use
- m_vars_extra[i].auto_data = auto_data;
- }
- }
-
if (m_vars[i].direction.in &&
- !m_vars[i].flags.is_static) {
+ !m_vars[i].flags.is_static &&
+ !m_vars[i].flags.is_stack_buf) {
m_in_datalen += m_vars[i].size;
// for non-static target destination defined as CEAN
// expression we pass to target its size and dist
if (m_vars[i].into == NULL &&
- m_vars[i].type.src == c_cean_var) {
+ m_vars_extra[i].type_src == c_cean_var) {
m_in_datalen += 2 * sizeof(uint64_t);
}
m_need_runfunction = true;
}
if (m_vars[i].direction.out &&
- !m_vars[i].flags.is_static) {
+ !m_vars[i].flags.is_static &&
+ !m_vars[i].flags.is_stack_buf) {
m_out_datalen += m_vars[i].size;
m_need_runfunction = true;
}
}
+ if (m_is_openmp && src_is_for_host &&
+ !m_vars[i].flags.is_device_ptr) {
+ if (m_vars[i].flags.is_static) {
+ PtrData *ptr_data = m_vars_extra[i].src_data;
+ // Static data is transferred either by omp target
+ // update construct which passes zeros for
+ // alloc_if and free_if or by always modifier.
+ // Implicit openmp reference is transfered also
+ // if its reference count is equal to 1
+ if (ptr_data &&
+ IS_OPENMP_IMPLICIT_OR_LINK(ptr_data->var_alloc_type)) {
+ if (m_vars[i].alloc_if) {
+ ptr_data->add_reference();
+ }
+
+ if (!m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if || m_vars[i].free_if) &&
+ ptr_data->get_reference() != 1) {
+ m_vars[i].direction.bits = c_parameter_nocopy;
+ }
+ }
+ else if (
+ !m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if || m_vars[i].free_if)) {
+ m_vars[i].direction.bits = c_parameter_nocopy;
+ }
+ }
+ else {
+ AutoData *auto_data;
+ if (m_vars[i].alloc_if) {
+ auto_data = m_device.insert_auto_data(
+ m_vars[i].ptr, m_vars[i].size);
+ auto_data->add_reference();
+ }
+ else {
+ // TODO: what should be done if var is not in
+ // the table?
+ auto_data = m_device.find_auto_data(
+ m_vars[i].ptr);
+ }
+
+ // For automatic variables data is transferred:
+ // - if always modifier is used OR
+ // - if alloc_if == 0 && free_if == 0 OR
+ // - if reference count is 1
+ if (!m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if || m_vars[i].free_if) &&
+ auto_data != 0 &&
+ auto_data->get_reference() != 1) {
+ m_vars[i].direction.bits = c_parameter_nocopy;
+ }
+
+ // save data for later use
+ m_vars_extra[i].auto_data = auto_data;
+ }
+ }
break;
case c_dv:
- if (m_vars[i].direction.bits ||
+ if (m_vars[i].flags.use_device_ptr) {
+ setup_use_device_ptr(i);
+ break;
+ }
+ else if (m_vars[i].direction.bits ||
m_vars[i].alloc_if ||
m_vars[i].free_if) {
ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr);
break;
case c_string_ptr:
+ case c_string_ptr_ptr:
if ((m_vars[i].direction.bits ||
m_vars[i].alloc_if ||
m_vars[i].free_if) &&
/* fallthru */
case c_data_ptr:
+ case c_data_ptr_ptr:
if (m_vars[i].flags.is_stack_buf &&
!m_vars[i].direction.bits &&
m_vars[i].alloc_if) {
if (!offload_stack_memory_manager(
stack_addr, entry_id,
- m_vars[i].count, m_vars[i].align, &is_new)) {
+ m_vars[i].count, m_vars[i].align,
+ thread_specific_function_locals, &is_new)) {
return false;
}
if (is_new) {
m_stack_ptr_data->mic_buf);
m_device.m_persist_list.front().cpu_stack_addr =
static_cast<char*>(m_vars[i].ptr);
+ PersistData *new_el = &m_device.m_persist_list.front();
+ print_persistList_item(
+ "New element in persist list:",
+ new_el);
}
else {
m_vars[i].flags.sink_addr = 1;
m_in_datalen += sizeof(m_stack_ptr_data->mic_addr);
+ if (thread_specific_function_locals) {
+ m_stack_ptr_data = get_this_threads_mic_stack_addr(
+ stack_addr, entry_id,
+ thread_specific_function_locals);
+ }
}
m_vars[i].size = m_destroy_stack.size();
m_vars_extra[i].src_data = m_stack_ptr_data;
/* fallthru */
case c_cean_var_ptr:
- case c_dv_ptr:
- if (m_vars[i].type.src == c_cean_var_ptr) {
+ case c_cean_var_ptr_ptr:
+ case c_dv_ptr:
+ if (m_vars[i].flags.is_non_cont_struct && src_is_for_host) {
+ NonContigDesc *desc =
+ static_cast<NonContigDesc*>(m_vars[i].ptr);
+ noncont_struct_dump(" ", "PTR", desc);
+ m_vars_extra[i].noncont_desc = desc;
+ m_vars[i].ptr = reinterpret_cast<void*>(desc->base);
+ m_vars[i].disp = 0;
+ }
+ else if (m_vars_extra[i].type_src == c_cean_var_ptr ||
+ m_vars_extra[i].type_src == c_cean_var_ptr_ptr) {
// array descriptor
const Arr_Desc *ap =
static_cast<const Arr_Desc*>(m_vars[i].ptr);
// array descriptor to the target side.
m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
}
- else if (m_vars[i].type.src == c_dv_ptr) {
+ else if (m_vars_extra[i].type_src == c_dv_ptr) {
// need to send DV to the device unless it is 'nocopy'
if (m_vars[i].direction.bits ||
m_vars[i].alloc_if ||
// debug dump
__dv_desc_dump("IN/OUT", dvp);
- m_vars[i].direction.bits = c_parameter_in;
+ // for use_device_ptr don't need to change
+ // OUT direction to IN direction
+ if (!m_vars[i].flags.use_device_ptr) {
+ m_vars[i].direction.bits = c_parameter_in;
+ }
}
// no displacement
m_vars[i].disp = 0;
}
else {
+ // For "use_device_ptr" if direction is "in" then need to
+ // find the associated device pointer and replace the host
+ // pointer with device pointer. Also save the host pointer
+ // to restore when "out" is encountered.
+ // For "out" find the host pointer associated with the
+ // device pointer and restore the host pointer
+ if (m_vars[i].flags.use_device_ptr && src_is_for_host) {
+ setup_use_device_ptr(i);
+ break;
+ }
+
// c_data_ptr or c_string_ptr
m_vars[i].size *= m_vars[i].count;
m_vars[i].disp = 0;
// by var_desc with number 0.
// Its ptr_data is stored at m_stack_ptr_data
ptr_data = m_stack_ptr_data;
- m_vars[i].flags.sink_addr = 1;
}
else if (m_vars[i].alloc_if) {
if (m_vars[i].flags.preallocated) {
!m_vars[i].flags.preallocated) {
// will send buffer address to device
m_vars[i].flags.sink_addr = 1;
+ m_in_datalen += sizeof(ptr_data->mic_addr);
}
if (!m_vars[i].flags.pin &&
if (ptr_data != 0) {
m_vars[i].flags.sink_addr = 1;
+ m_in_datalen += sizeof(ptr_data->mic_addr);
}
}
if (ptr_data != 0) {
- if (m_is_openmp) {
- // data is transferred only if
- // alloc_if == 0 && free_if == 0
- // or reference count is 1
- if (!m_vars[i].flags.always_copy &&
- ((m_vars[i].alloc_if ||
- m_vars[i].free_if) &&
- ptr_data->get_reference() != 1)) {
- m_vars[i].direction.bits =
- c_parameter_nocopy;
- }
- }
if (ptr_data->alloc_disp != 0) {
m_vars[i].flags.alloc_disp = 1;
}
}
+ if (m_is_openmp) {
+ if (m_vars[i].flags.use_device_ptr) {
+ setup_use_device_ptr(i);
+ }
+ // for TO transfer of stack buffer's variable
+ if (src_is_for_host && m_vars[i].flags.is_stack_buf) {
+ AutoData *auto_data;
+ char *base = *static_cast<char**>(m_vars[i].ptr);
+ if (m_vars[i].alloc_if) {
+ auto_data =m_device.insert_auto_data(
+ base + m_vars[i].disp,
+ m_vars[i].size);
+ auto_data->add_reference();
+ }
+ else {
+ auto_data = m_device.find_auto_data(
+ base + m_vars[i].disp);
+ }
+ // save data for later use
+ m_vars_extra[i].auto_data = auto_data;
+
+ // For automatic variables
+ // data is transferred:
+ // - if always modifier is used OR
+ // - if alloc_if == 0 && free_if == 0 OR
+ // - if reference count is 1
+ if (!m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if ||
+ m_vars[i].free_if) &&
+ auto_data != 0 &&
+ auto_data->get_reference() != 1) {
+ m_vars[i].direction.bits =
+ c_parameter_nocopy;
+ }
+ }
+ // for FROM transfer of global pointer variable
+ // FROM transfer of stack buffer's variable
+ // is treated at INTO branch
+ else if (src_is_for_mic &&
+ !m_vars[i].flags.is_stack_buf) {
+ // data is transferred only if
+ // alloc_if == 0 && free_if == 0
+ // or reference count is 1
+ if (!m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if ||
+ m_vars[i].free_if) &&
+ ptr_data &&
+ ptr_data->get_reference() != 1)
+ {
+ m_vars[i].direction.bits =
+ c_parameter_nocopy;
+ }
+ }
+ }
// save pointer data
m_vars_extra[i].src_data = ptr_data;
}
break;
case c_func_ptr:
+ case c_func_ptr_ptr:
if (m_vars[i].direction.in) {
m_in_datalen += __offload_funcs.max_name_length();
}
case c_dv_data_slice:
case c_dv_ptr_data_slice:
ArrDesc *dvp;
- if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+ if (m_vars[i].flags.is_non_cont_struct) {
+ NonContigDesc *desc =
+ static_cast<NonContigDesc*>(m_vars[i].ptr);
+ noncont_struct_dump(" ", "DV-DATA", desc);
+ dvp = reinterpret_cast<ArrDesc*>(desc->base);
+ }
+ else if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_src)) {
const Arr_Desc *ap;
ap = static_cast<const Arr_Desc*>(m_vars[i].ptr);
- dvp = (m_vars[i].type.src == c_dv_data_slice) ?
+ dvp = (m_vars_extra[i].type_src == c_dv_data_slice) ?
reinterpret_cast<ArrDesc*>(ap->base) :
*reinterpret_cast<ArrDesc**>(ap->base);
}
else {
- dvp = (m_vars[i].type.src == c_dv_data) ?
+ dvp = (m_vars_extra[i].type_src == c_dv_data) ?
static_cast<ArrDesc*>(m_vars[i].ptr) :
*static_cast<ArrDesc**>(m_vars[i].ptr);
}
m_vars[i].free_if) {
const Arr_Desc *ap;
- if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+ if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_src)) {
ap = static_cast<const Arr_Desc*>(m_vars[i].ptr);
// debug dump
}
// size and displacement
- if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+ if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_src)) {
// offset and length are derived from the
// array descriptor
__arr_data_offset_and_length(ap,
break;
default:
- LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
+ LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_src);
LIBOFFLOAD_ABORT;
}
- if (m_vars[i].type.src == c_data_ptr_array) {
+ if (m_vars_extra[i].type_src == c_data_ptr_array) {
continue;
}
if (src_is_for_mic && m_vars[i].flags.is_stack_buf) {
- m_vars[i].offset = static_cast<char*>(m_vars[i].ptr) -
- m_device.m_persist_list.front().cpu_stack_addr;
+ if (this_threads_cpu_stack_addr == 0) {
+ this_threads_cpu_stack_addr =
+ get_this_threads_cpu_stack_addr(
+ stack_addr, entry_id, thread_specific_function_locals);
+ }
+ m_vars[i].offset = static_cast<char*>
+ (m_vars[i].ptr) -
+ this_threads_cpu_stack_addr;
}
// if source is used at CPU save its offset and disp
if (m_vars[i].into == NULL || m_vars[i].direction.in) {
int64_t into_disp =0, into_offset = 0;
- switch (m_vars[i].type.dst) {
+ switch (m_vars_extra[i].type_dst) {
case c_data_ptr_array:
break;
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var: {
int64_t size = m_vars[i].size;
- if (m_vars[i].type.dst == c_cean_var) {
+ if (m_vars[i].flags.is_non_cont_struct && src_is_for_mic) {
+ NonContigDesc *desc =
+ static_cast<NonContigDesc*>(m_vars[i].into);
+ noncont_struct_dump("", "INTO DATA", desc);
+ m_vars_extra[i].noncont_desc = desc;
+ m_vars[i].into = reinterpret_cast<void*>(desc->base);
+ size = get_noncont_struct_size(desc);
+ into_disp = 0;
+ }
+ else if (m_vars_extra[i].type_dst == c_cean_var) {
// array descriptor
const Arr_Desc *ap =
static_cast<const Arr_Desc*>(m_vars[i].into);
m_vars[i].into = reinterpret_cast<void*>(ap->base);
}
- int64_t size_src = m_vars_extra[i].read_rng_src ?
+ int64_t size_src = m_vars_extra[i].read_rng_src &&
+ !m_vars[i].flags.is_non_cont_struct ?
cean_get_transf_size(m_vars_extra[i].read_rng_src) :
m_vars[i].size;
int64_t size_dst = m_vars_extra[i].read_rng_dst ?
// for non-static target destination defined as CEAN
// expression we pass to target its size and dist
- if (m_vars[i].type.dst == c_cean_var) {
+ if (m_vars_extra[i].type_dst == c_cean_var) {
m_in_datalen += 2 * sizeof(uint64_t);
}
m_need_runfunction = true;
}
+
+ if (m_is_openmp && src_is_for_mic) {
+ if (m_vars[i].flags.is_static_dstn) {
+ // Static data is transferred either by omp target
+ // update construct which passes zeros for
+ // alloc_if and free_if or by always modifier.
+ if (!m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if || m_vars[i].free_if)) {
+ m_vars[i].direction.bits = c_parameter_nocopy;
+ }
+ }
+ else {
+ AutoData *auto_data;
+ if (m_vars[i].alloc_if) {
+ auto_data = m_device.insert_auto_data(
+ m_vars[i].into, size_dst);
+ auto_data->add_reference();
+ }
+ else {
+ // TODO: what should be done if var is not in
+ // the table?
+ auto_data = m_device.find_auto_data(
+ m_vars[i].into);
+ }
+
+ // For automatic variables data is transferred:
+ // - if always modifier is used OR
+ // - if alloc_if == 0 && free_if == 0 OR
+ // - if reference count is 1
+ if (!m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if || m_vars[i].free_if) &&
+ (auto_data == 0 ||
+ auto_data->get_reference() != 1)) {
+ m_vars[i].direction.bits = c_parameter_nocopy;
+ }
+ // save data for later use
+ m_vars_extra[i].auto_data = auto_data;
+ }
+ }
break;
}
case c_string_ptr:
case c_data_ptr:
+ case c_string_ptr_ptr:
+ case c_data_ptr_ptr:
case c_cean_var_ptr:
+ case c_cean_var_ptr_ptr:
case c_dv_ptr: {
int64_t size = m_vars[i].size;
- if (m_vars[i].type.dst == c_cean_var_ptr) {
+ if (m_vars_extra[i].type_dst == c_cean_var_ptr ||
+ m_vars_extra[i].type_dst == c_cean_var_ptr_ptr) {
// array descriptor
const Arr_Desc *ap =
static_cast<const Arr_Desc*>(m_vars[i].into);
}
m_vars[i].into = reinterpret_cast<char**>(ap->base);
}
- else if (m_vars[i].type.dst == c_dv_ptr) {
+ else if (m_vars_extra[i].type_dst == c_dv_ptr) {
// need to send DV to the device unless it is 'nocopy'
if (m_vars[i].direction.bits ||
m_vars[i].alloc_if ||
}
}
- int64_t size_src = m_vars_extra[i].read_rng_src ?
+ int64_t size_src = m_vars_extra[i].read_rng_src &&
+ !m_vars[i].flags.is_non_cont_struct ?
cean_get_transf_size(m_vars_extra[i].read_rng_src) :
m_vars[i].size;
int64_t size_dst = m_vars_extra[i].read_rng_dst ?
// by var_desc with number 0.
// Its ptr_data is stored at m_stack_ptr_data
ptr_data = m_stack_ptr_data;
- m_vars[i].flags.sink_addr = 1;
}
else if (m_vars[i].alloc_if) {
if (m_vars[i].flags.preallocated) {
(char*) ptr_data->cpu_addr.start() :
0;
}
+
+ if (m_is_openmp) {
+ // for FROM transfer of stack buffer's variable
+ if (src_is_for_mic && m_vars[i].flags.is_stack_buf) {
+ AutoData *auto_data;
+ char *base = *static_cast<char**>(m_vars[i].into);
+ if (m_vars[i].alloc_if) {
+ auto_data =m_device.insert_auto_data(
+ base + into_disp,
+ size);
+ auto_data->add_reference();
+ }
+ else {
+ auto_data = m_device.find_auto_data(
+ base + into_disp);
+ }
+ // save data for later use
+ m_vars_extra[i].auto_data = auto_data;
+ // For automatic variables
+ // data is transferred:
+ // - if always modifier is used OR
+ // - if alloc_if == 0 && free_if == 0 OR
+ // - if reference count is 1
+ if (!m_vars[i].flags.always_copy &&
+ (m_vars[i].alloc_if ||
+ m_vars[i].free_if) &&
+ auto_data != 0 &&
+ auto_data->get_reference() != 1) {
+ m_vars[i].direction.bits =
+ c_parameter_nocopy;
+ }
+ }
+ }
// save pointer data
m_vars_extra[i].dst_data = ptr_data;
}
}
case c_func_ptr:
+ case c_func_ptr_ptr:
break;
case c_dv_data:
int64_t disp;
int64_t size;
- if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+ if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_dst)) {
ap = static_cast<const Arr_Desc*>(m_vars[i].into);
// debug dump
ARRAY_DESC_DUMP(" ", "INTO", ap, 0, src_is_for_mic);
- dvp = (m_vars[i].type.dst == c_dv_data_slice) ?
+ dvp = (m_vars_extra[i].type_dst == c_dv_data_slice) ?
reinterpret_cast<ArrDesc*>(ap->base) :
*reinterpret_cast<ArrDesc**>(ap->base);
}
else {
- dvp = (m_vars[i].type.dst == c_dv_data) ?
+ dvp = (m_vars_extra[i].type_dst == c_dv_data) ?
static_cast<ArrDesc*>(m_vars[i].into) :
*static_cast<ArrDesc**>(m_vars[i].into);
}
init_read_ranges_dv(dvp);
}
// size and displacement
- if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+ if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_dst)) {
// offset and length are derived from the array
// descriptor
__arr_data_offset_and_length(ap, into_disp, size);
}
int64_t size_src =
- m_vars_extra[i].read_rng_src ?
+ m_vars_extra[i].read_rng_src &&
+ (!m_vars[i].flags.is_non_cont_struct ||
+ src_is_for_mic) ?
cean_get_transf_size(m_vars_extra[i].read_rng_src) :
m_vars[i].size;
int64_t size_dst =
break;
default:
- LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
+ LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_src);
LIBOFFLOAD_ABORT;
}
// if into is used at CPU save its offset and disp
}
else {
if (m_vars[i].flags.is_stack_buf) {
- into_offset = static_cast<char*>(m_vars[i].into) -
- m_device.m_persist_list.front().cpu_stack_addr;
+ if (this_threads_cpu_stack_addr == 0) {
+ this_threads_cpu_stack_addr =
+ get_this_threads_cpu_stack_addr(
+ stack_addr, entry_id,
+ thread_specific_function_locals);
+ }
+ into_offset = static_cast<char*>
+ (m_vars[i].into) -
+ this_threads_cpu_stack_addr;
}
m_vars[i].offset = into_offset;
m_vars[i].disp = into_disp;
// send/receive data using buffer
COIRESULT res = COI::BufferCreate(data_len,
- COI_BUFFER_NORMAL,
+ COI_BUFFER_OPENCL,
0, 0,
1, &m_device.get_process(),
&m_inout_buf);
}
// initialize function descriptor
- m_func_desc = (FunctionDescriptor*) calloc(1, m_func_desc_size
- + misc_data_size);
+ m_func_desc = (FunctionDescriptor*) malloc(m_func_desc_size +
+ misc_data_size);
if (m_func_desc == NULL)
LIBOFFLOAD_ERROR(c_malloc);
m_func_desc->console_enabled = console_enabled;
int i;
for (i = m_vars_total - 1; i >=0; i--) {
- switch (m_vars[i].type.dst) {
- case c_data:
- case c_void_ptr:
- case c_cean_var:
- if (m_vars[i].direction.out &&
- m_vars[i].flags.is_static_dstn) {
- event_type = c_last_read;
- }
- else if (last_in < 0 && m_vars[i].direction.in &&
- m_vars[i].flags.is_static_dstn) {
- last_in = i;
- }
- break;
- case c_string_ptr:
- case c_data_ptr:
- case c_cean_var_ptr:
- case c_dv_ptr:
- case c_dv_data:
- case c_dv_ptr_data:
- case c_dv_data_slice:
- case c_dv_ptr_data_slice:
-
- if (m_vars[i].direction.out) {
- event_type = c_last_read;
- }
- else if (last_in < 0 && m_vars[i].direction.in) {
- last_in = i;
- }
- break;
- default:
- break;
+ bool src_is_target = (m_vars[i].direction.out || !m_vars[i].into);
+ int var_type = src_is_target ? m_vars_extra[i].type_src :
+ m_vars_extra[i].type_dst;
+ bool target_is_static = src_is_target ? m_vars[i].flags.is_static :
+ m_vars[i].flags.is_static_dstn;
+ switch (var_type) {
+ case c_data:
+ case c_void_ptr:
+ case c_cean_var:
+ if (m_vars[i].direction.out && target_is_static) {
+ event_type = c_last_read;
}
- if (event_type == c_last_read) {
- break;
- }
+ else if (last_in < 0 && m_vars[i].direction.in &&
+ target_is_static) {
+ last_in = i;
+ }
+ break;
+ case c_string_ptr:
+ case c_data_ptr:
+ case c_string_ptr_ptr:
+ case c_data_ptr_ptr:
+ case c_cean_var_ptr:
+ case c_cean_var_ptr_ptr:
+ case c_dv_ptr:
+ case c_dv_data:
+ case c_dv_ptr_data:
+ case c_dv_data_slice:
+ case c_dv_ptr_data_slice:
+
+ if (m_vars[i].direction.out) {
+ event_type = c_last_read;
+ }
+ else if (last_in < 0 && m_vars[i].direction.in) {
+ last_in = i;
+ }
+ break;
+ default:
+ break;
}
-
if (event_type == c_last_read) {
- m_vars_extra[i].omp_last_event_type = c_last_read;
- }
- else if (event_type == c_last_write) {
- m_vars_extra[last_in].omp_last_event_type = c_last_write;
+ break;
}
- m_omp_async_last_event_type = event_type;
+ }
+
+ if (event_type == c_last_read) {
+ m_vars_extra[i].omp_last_event_type = c_last_read;
+ }
+ else if (event_type == c_last_write) {
+ m_vars_extra[last_in].omp_last_event_type = c_last_write;
+ }
+ m_omp_async_last_event_type = event_type;
OFFLOAD_TRACE(2, "setup_omp_async_info: event_type=%d\n",
m_omp_async_last_event_type);
}
{
task_completion_callback ((void *) info);
}
+
+ // Callback function for asynchronous offloads
+ void offload_complete_task(
+ COIEVENT e,
+ const COIRESULT r,
+ const void *info
+ )
+ {
+ Stream *stream;
+ OffloadDescriptor *task = const_cast<OffloadDescriptor*>(
+ reinterpret_cast<const OffloadDescriptor*>(info));
+ uint32_t events_remained;
+
+ lock_complete.lock();
+ if (!offload_descr_map[task]) {
+ lock_complete.unlock();
+ return;
+ }
+
+#ifndef TARGET_WINNT
+ events_remained = __sync_sub_and_fetch(&task->m_event_count, 1);
+#else // TARGET_WINNT
+ events_remained = _InterlockedDecrement(&task->m_event_count);
+#endif // TARGET_WINNT
+ // Waiting for the last event
+ if (events_remained != 0) {
+ lock_complete.unlock();
+ return;
+ }
+
+ // Callback could be called when execution at host is completed.
+ // Do nothing as engine data is destructed
+ if (!task->get_device().get_ready()) {
+ lock_complete.unlock();
+ return;
+ }
+
+ void * signal = task->get_signal();
+ _Offload_stream stream_handle = task->get_stream();
+
+ OFFLOAD_TRACE(2, "Call function offload_complete_task(%p)\n", info);
+
+ // Completed offload has a signal
+ if (task->m_has_signal) {
+ if (!offload_descr_map[task]) {
+ lock_complete.unlock();
+ return;
+ }
+ task->get_device().complete_signaled_ofld(signal);
+ // Asynchronous offload can have both signal and stream. Need to
+ // clean stream if any.
+ stream_handle = task->get_stream();
+ if (stream_handle != -1) {
+ stream = Stream::find_stream(stream_handle, false);
+ if (stream && stream->get_last_offload() == task) {
+ stream->set_last_offload(NULL);
+ }
+ }
+ offload_descr_map[task] = false;
+ lock_complete.unlock();
+
+ if (task->offload_finish(0)) { //arg is 0 for is_traceback
+ task->cleanup();
+ }
+ delete task;
+ }
+ // Asynchronous by stream
+ else {
+ if (stream_handle != 0) {
+ stream = Stream::find_stream(stream_handle, false);
+
+ // the stream was not created or was destroyed
+ if (!stream) {
+ LIBOFFLOAD_ERROR(c_offload_no_stream,
+ task->get_device().get_logical_index());
+ LIBOFFLOAD_ABORT;
+ }
+ if (!offload_descr_map[task]) {
+ lock_complete.unlock();
+ return;
+ }
+ if (task == stream->get_last_offload()) {
+ stream->set_last_offload(NULL);
+ }
+ // if the offload has both signal and stream we will complete
+ // it as it has the signal. So we don't need to mark signal
+ // as completed.
+ offload_descr_map[task] = false;
+ lock_complete.unlock();
+ if (task->offload_finish(0)) { //arg is 0 for is_traceback
+ task->cleanup();
+ }
+ delete task;
+ }
+ }
+ }
}
void OffloadDescriptor::register_omp_event_call_back(
const COIEVENT *event,
const void *info)
{
- OFFLOAD_TRACE(2, "register_omp_event_call_back(event=%p, info=%p)\n",
+ register_event_call_back(&offload_proxy_task_completed_ooo, event, info);
+}
+
+void OffloadDescriptor::register_event_call_back(
+ void (*func)(COIEVENT, const COIRESULT, const void*),
+ const COIEVENT *event,
+ const void *info)
+{
+ OFFLOAD_TRACE(2, "register_event_call_back(event=%p, info=%p)\n",
event, info);
if (COI::EventRegisterCallback) {
COI::EventRegisterCallback(
*event,
- &offload_proxy_task_completed_ooo,
+ func,
info, 0);
OFFLOAD_TRACE(2,
"COI::EventRegisterCallback found; callback registered\n");
OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps);
bool ret = true;
OffloadDescriptor *task;
+ void * signal;
+
if (num_waits == 0) {
+ // Prepare in dependencies for stream
+ get_stream_in_dependencies(m_num_in_dependencies,m_p_in_dependencies);
return true;
}
Stream * stream;
// some specific stream of the device
if (handle != 0) {
+ lock_complete.lock();
stream = Stream::find_stream(handle, false);
// the stream was not created or was destroyed
// offload was completed by previous offload_wait pragma
// or wait clause
- if (task == 0) {
+ if (!offload_descr_map[task]) {
+ lock_complete.unlock();
return true;
}
+ stream->set_last_offload(NULL);
+ if (task->m_has_signal) {
+ signal = task->get_signal();
+ if (m_device.find_signal(signal, false) == task) {
+ m_device.complete_signaled_ofld(signal);
+ }
+ }
+ offload_descr_map[task] = false;
+ lock_complete.unlock();
+
if (!task->offload_finish(0)) { //arg is 0 for is_traceback
ret = false;
}
task->cleanup();
- stream->set_last_offload(NULL);
delete task;
}
// all streams of the device or over all devices
for (StreamMap::iterator it = stream_map.begin();
it != stream_map.end(); it++) {
Stream * stream = it->second;
-
if (!m_wait_all_devices &&
stream->get_device() != m_device.get_logical_index()) {
continue;
}
+ lock_complete.lock();
+
// get associated async task
OffloadDescriptor *task = stream->get_last_offload();
-
// offload was completed by offload_wait pragma or wait clause
- if (task == 0) {
+ if (!offload_descr_map[task]) {
+ lock_complete.unlock();
continue;
}
+ if (task->m_has_signal) {
+ signal = task->get_signal();
+ if (task->get_device().find_signal(signal, false) ==
+ task) {
+ task->get_device().complete_signaled_ofld(signal);
+ }
+ }
+ stream->set_last_offload(NULL);
+ offload_descr_map[task] = false;
+ lock_complete.unlock();
if (!task->offload_finish(0)) { //arg is 0 for is_traceback
ret = false;
}
task->cleanup();
- stream->set_last_offload(NULL);
delete task;
}
// no uncompleted streams
}
}
else {
- // if handle is equal to no_stream it's wait for signals
- for (int i = 0; i < num_waits; i++) {
- _Offload_stream stream_handle;
- Stream *stream;
- task = m_device.find_signal(waits[i], true);
- if (task == 0) {
- LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
- waits[i]);
- LIBOFFLOAD_ABORT;
+
+ // If offload is asynchronous we will not really wait for signals.
+ // We will collect all waited events into m_p_in_dependencies vector
+ // to be used in future calls to COI::Copy... API.
+
+ if (!__offload_always_wait && (m_has_signal || (get_stream() > 0))) {
+ uint64_t num_in_dep = 0,
+ num_in_dep_prev = 0;
+ COIEVENT *p_in_dep = NULL;
+ _Offload_stream stream_handle = get_stream();
+ Stream *stream;
+ bool stream_need_connection = stream_handle > 0;
+
+ if (stream_need_connection) {
+ stream = Stream::find_stream(stream_handle, false);
+ // check previous offload with the stream_handle
+ // to be noncompleted
+ if (!stream) {
+ stream_need_connection = false;
+ }
}
- else if (task == SIGNAL_IS_REMOVED) {
- continue;
+ for (int i = 0; i < num_waits; i++) {
+ task = m_device.find_signal(waits[i], false);
+ if (task == 0) {
+ LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
+ waits[i]);
+ LIBOFFLOAD_ABORT;
+ }
+ else if (task == SIGNAL_HAS_COMPLETED) {
+ continue;
+ }
+ if (stream_need_connection &&
+ stream->get_last_offload() == task) {
+ stream_need_connection = false;
+ }
+ if (!task->m_num_in_dependencies) {
+ continue;
+ }
+ num_in_dep += task->m_num_in_dependencies;
+ p_in_dep = (COIEVENT*)realloc(p_in_dep,
+ sizeof(COIEVENT) * num_in_dep);
+ if (p_in_dep == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ memcpy(p_in_dep + num_in_dep_prev, task->m_p_in_dependencies,
+ task->m_num_in_dependencies * sizeof(COIEVENT));
+ num_in_dep_prev = num_in_dep;
}
- if (!task->offload_finish(0)) { //arg is 0 for is_traceback
- ret = false;
+ if (stream_need_connection) {
+ task = stream->get_last_offload();
+ if (task) {
+ num_in_dep += task->m_num_in_dependencies;
+ p_in_dep = (COIEVENT*)realloc(p_in_dep,
+ sizeof(COIEVENT) * num_in_dep);
+ if (p_in_dep == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ memcpy(p_in_dep + num_in_dep_prev,
+ task->m_p_in_dependencies,
+ task->m_num_in_dependencies * sizeof(COIEVENT));
+ num_in_dep_prev = num_in_dep;
+ }
}
- task->cleanup();
- // if the offload both has signal and is last offload of its
- // stream, we must wipe out the "last_offload" reference as
- // the offload already is finished.
- stream_handle = task->m_stream;
- if (stream_handle != -1) {
- stream = Stream::find_stream(stream_handle, false);
- if (stream && stream->get_last_offload() == task) {
- stream->set_last_offload(NULL);
+ m_num_in_dependencies = num_in_dep ? num_in_dep :
+ m_num_in_dependencies;
+ m_p_in_dependencies = num_in_dep ? p_in_dep : m_p_in_dependencies;
+ }
+ // wait and do offload_finish for serial offload
+ else {
+ for (int i = 0; i < num_waits; i++) {
+ _Offload_stream stream_handle;
+ Stream *stream;
+
+ lock_complete.lock();
+ task = m_device.find_signal(waits[i], false);
+ if (task == 0) {
+ LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
+ waits[i]);
+ LIBOFFLOAD_ABORT;
+ }
+ else if (!offload_descr_map[task]) {
+ lock_complete.unlock();
+ continue;
+ }
+ // Need to mark signal as completed to prevent run condition
+ // with the call to "offload_complete_task" for the same
+ // signal.
+ m_device.complete_signaled_ofld(waits[i]);
+
+ // Asynchronous offload can have both signal and stream.
+ // Need to clean stream if any.
+
+ stream_handle = task->m_stream;
+ if (stream_handle != -1) {
+ stream = Stream::find_stream(stream_handle, false);
+ if (stream && stream->get_last_offload() == task) {
+ stream->set_last_offload(NULL);
+ }
+ }
+ offload_descr_map[task] = false;
+ lock_complete.unlock();
+
+ if (!task->offload_finish(0)) { //arg is 0 for is_traceback
+ ret = false;
}
+ task->cleanup();
+
+ delete task;
}
- delete task;
}
}
return ret;
bool is_traceback = offload_flags.bits.fortran_traceback;
// define kind of wait if any;
- // there can be one off the following kind:
+ // there can be one of the following kind:
// 1. c_offload_wait_signal for "offload_wait wait(signal)"
// 2. c_offload_wait_stream for "offload_wait stream(stream)"
// 3. c_offload_wait_all_streams for "offload_wait stream(0)"
char buf[35];
const char *stream_str;
- if (m_stream == no_stream || num_waits >= 0) {
+ if (m_stream == no_stream || num_waits ==-1) {
stream_str = "none";
}
else if (m_stream == 0) {
stream_str = buf;
}
- if (signal == 0) {
+ if (m_has_signal) {
OFFLOAD_DEBUG_TRACE_1(1,
GET_OFFLOAD_NUMBER(get_timer_data()),
c_offload_init_func,
c_offload_init_func,
"Offload function %s, is_empty=%d, #varDescs=%d, "
"signal=%p, stream=%s, #waits=%d%c",
- name, is_empty, vars_total, *signal, stream_str, num_waits,
- num_waits == 0 ? '\n' : ' ');
+ name, is_empty, vars_total, signal, stream_str,
+ num_waits, num_waits == 0 ? '\n' : ' ');
// Breaks the norm of using OFFLOAD_DEBUG_TRACE to print the waits
// since the number of waits is not fixed.
if (!OFFLOAD_DO_TRACE && (console_enabled >= 1)) {
m_initial_need_runfunction = m_need_runfunction = !is_empty;
- // wait for dependencies to finish
+ // wait for dependencies to finish or set
+ // m_num_in_dependencies and m_p_in_dependencies for asynchronous offload
if (!wait_dependencies(waits, num_waits, m_stream)) {
cleanup();
return false;
cleanup();
return false;
}
+
if (offload_flags.bits.omp_async) {
return true;
}
+
// if there is a signal or stream save descriptor for the later use.
// num_waits == -1 is for offload_wait and there is nothing to save
if (num_waits != -1 && (signal != 0 || m_stream != no_stream)) {
+
if (signal != 0) {
m_device.add_signal(*signal, this);
}
LIBOFFLOAD_ABORT;
}
}
- // if there is a clause with alloc_if(1) and preallocated need to call
- // offload_finish after runfunction
- if (!m_preallocated_alloc) {
+ // Register callback function "offload_complete_task" for all out
+ // events or for all in events if there are no out transfers
+ if (!m_preallocated_alloc) {
+ m_event_count = m_out_deps_total ?
+ m_out_deps_total : m_in_deps_total;
+ COIEVENT *event_list = m_out_deps_total ? m_out_deps : m_in_deps;
+
+ for (int i = 0; i < m_event_count; i++) {
+ register_event_call_back(&offload_complete_task,
+ &event_list[i], this);
+ }
+ offload_descr_map[this] = true;
return true;
}
}
COIRESULT res;
// wait for compute dependencies to become signaled
- if (m_in_deps_total > 0) {
+ if (m_in_deps_total > 0 &&
+ (m_out_deps_total <= 0 || m_preallocated_alloc)) {
OffloadTimer timer(get_timer_data(), c_offload_host_wait_compute);
if (__offload_active_wait) {
}
return false;
}
-
if (is_traceback && !m_traceback_called) {
OFFLOAD_TRACE(3,
"Calling Fortran library to continue traceback from MIC\n");
FORTRAN_TRACE_BACK(OFFLOAD_ERROR);
- m_traceback_called = true;
+ exit(1);
}
-
report_coi_error(c_event_wait, res);
}
}
- // scatter copyout data received from target
- if (!scatter_copyout_data()) {
- return false;
- }
-
- if (m_out_with_preallocated &&
- !receive_pointer_data(m_out_deps_total > 0, false, NULL)) {
- cleanup();
- return false;
+ // need to do scatter copyout data received from target after
+ // completing in dependencies to get preallocated buffers.
+ // If there are no preallocated buffers we will scatter_copyout_data
+ // after completing out dependencies. In this case we dont need wait
+ // in dependencies as they are already in DAG.
+ if (m_out_with_preallocated) {
+ if (!scatter_copyout_data()) {
+ return false;
+ }
+ if (!receive_pointer_data(m_out_deps_total > 0, false, NULL)) {
+ cleanup();
+ return false;
+ }
}
// wait for receive dependencies to become signaled
}
if (res != COI_SUCCESS) {
- if (m_status != 0) {
+ if (m_status != 0 && !m_traceback_called) {
m_status->result = translate_coi_error(res);
+ if (is_traceback) {
+ OFFLOAD_TRACE(3,
+ "Calling Fortran library to continue traceback from MIC\n");
+ FORTRAN_TRACE_BACK(m_status->result);
+ m_traceback_called = true;
+ }
return false;
}
+ if (is_traceback && !m_traceback_called) {
+ OFFLOAD_TRACE(3,
+ "Calling Fortran library to continue traceback from MIC\n");
+ FORTRAN_TRACE_BACK(OFFLOAD_ERROR);
+ exit(1);
+ }
report_coi_error(c_event_wait, res);
}
}
+ if (!m_out_with_preallocated && !scatter_copyout_data()) {
+ return false;
+ }
// destroy buffers
{
OffloadTimer timer(get_timer_data(), c_offload_host_destroy_buffers);
COIRESULT res;
// check compute and receive dependencies
- if (m_in_deps_total > 0) {
- res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
- signaled = signaled && (res == COI_SUCCESS);
- }
if (m_out_deps_total > 0) {
res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
signaled = signaled && (res == COI_SUCCESS);
}
+ else if (m_in_deps_total > 0) {
+ res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
+ signaled = signaled && (res == COI_SUCCESS);
+ }
return signaled;
}
COIEVENT *in_deps
)
{
+ NonContigDesc *desc;
+ int noncont_num;
int64_t offset_src, offset_dst;
int64_t length_src, length_dst;
int64_t length_src_cur, length_dst_cur;
bool dst_is_empty = true;
bool src_is_empty = true;
- data_sent = 0;
-
- // Set length_src and length_dst
- length_src = (m_vars_extra[i].read_rng_src) ?
- m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
- length_dst = !m_vars[i].into ? length_src :
- (m_vars_extra[i].read_rng_dst) ?
- m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
- send_size = (length_src < length_dst) ? length_src : length_dst;
-
// If BufferWriteMultiD is defined we can set values of required arguments
// and transfer noncontiguous data via call to the COI routine.
- if (__offload_use_coi_noncontiguous_transfer && COI::BufferWriteMultiD) {
+ if (!m_vars[i].flags.is_non_cont_struct &&
+ __offload_use_coi_noncontiguous_transfer && COI::BufferWriteMultiD) {
struct Arr_Desc* arr_desc_dst;
struct Arr_Desc* arr_desc_src;
int64_t size_src, size_dst;
char *base = offload_get_src_base(static_cast<char*>(m_vars[i].ptr),
- m_vars[i].type.src);
+ m_vars_extra[i].type_src);
COIBUFFER dst_buf = m_vars[i].into ?
m_vars_extra[i].dst_data->mic_buf :
m_vars_extra[i].src_data->mic_buf;
m_vars_extra[i].dst_data->alloc_disp :
m_vars_extra[i].src_data->alloc_disp;
- arr_desc_src->base = reinterpret_cast<int64_t>(base);
arr_desc_dst->base = 0;
+ arr_desc_src->base = reinterpret_cast<int64_t>(base);
res = COI::BufferWriteMultiD(
- dst_buf, // in_DestBuffer,
- m_device.get_process(), // DestProcess,
+ dst_buf, // in_DestBuffer,
+ NULL, // DestProcess,
m_vars[i].offset + m_vars[i].mic_offset -
- alloc_disp, // Offset
- (void*)arr_desc_dst, // descriptor of DestArray
- (void*)arr_desc_src, // descriptor of SrcArray
- COI_COPY_UNSPECIFIED, // Type
- in_deps_amount, // Number of in Dependencies
- in_deps, // array of in Dependencies
- event); // out Dependency
+ alloc_disp, // Offset
+ (void*)arr_desc_dst, // descriptor of DestArray
+ (void*)arr_desc_src, // descriptor of SrcArray
+ COI_COPY_UNSPECIFIED, // Type
+ m_num_in_dependencies, // Number of in Dependencies
+ m_p_in_dependencies, // array of in Dependencies
+ event); // out Dependency
if (res != COI_SUCCESS) {
if (m_status != 0) {
m_status->result = translate_coi_error(res);
return(true);
}
- // if event is defined we must multiplate it for all contiguous intervals
+ data_sent = 0;
+ if (m_vars[i].flags.is_non_cont_struct) {
+ desc = m_vars_extra[i].noncont_desc;
+ noncont_num = 0;
+ }
+ else {
+ // Set length_src and length_dst
+ length_src = (m_vars_extra[i].read_rng_src) ?
+ m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
+ length_dst = !m_vars[i].into ? length_src :
+ (m_vars_extra[i].read_rng_dst) ?
+ m_vars_extra[i].read_rng_dst->range_size :
+ m_vars[i].size;
+ send_size = (length_src < length_dst) ? length_src : length_dst;
+ }
+
+ // if event is defined we must multiplate it for all contiguous ranges
// that will be Copied/Write.
// Take in account that we already have 1 event.
if (event) {
- m_in_deps_allocated += (length_src / send_size) *
+ uint32_t range_num = m_vars[i].flags.is_non_cont_struct ?
+ desc->interval_cnt :
+ (length_src / send_size) *
((m_vars_extra[i].read_rng_src) ?
- m_vars_extra[i].read_rng_src->range_max_number : 1) ;
+ m_vars_extra[i].read_rng_src->range_max_number : 1) ;
+ m_in_deps_allocated += range_num ;
m_in_deps =
(COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * m_in_deps_allocated);
m_in_deps_total--;
// consequently get contiguous ranges,
// define corresponded destination offset and send data
do {
- if (src_is_empty) {
- if (m_vars_extra[i].read_rng_src) {
- if (!get_next_range(m_vars_extra[i].read_rng_src,
- &offset_src)) {
- // source ranges are over - nothing to send
+ if (m_vars[i].flags.is_non_cont_struct) {
+ // ranges are over
+ if (noncont_num >= desc->interval_cnt) {
+ break;
+ }
+ offset_src = offset_dst = desc->interval[noncont_num].lower;
+ send_size = desc->interval[noncont_num].size;
+ noncont_num++;
+ }
+ else {
+ if (src_is_empty) {
+ if (m_vars_extra[i].read_rng_src) {
+ if (!get_next_range(m_vars_extra[i].read_rng_src,
+ &offset_src)) {
+ // source ranges are over - nothing to send
+ break;
+ }
+ }
+ else if (data_sent == 0) {
+ offset_src = m_vars_extra[i].cpu_disp;
+ }
+ else {
break;
}
- }
- else if (data_sent == 0) {
- offset_src = m_vars_extra[i].cpu_disp;
+ length_src_cur = length_src;
}
else {
- break;
+ // if source is contiguous or its contiguous range is greater
+ // than destination one
+ offset_src += send_size;
}
- length_src_cur = length_src;
- }
- else {
- // if source is contiguous or its contiguous range is greater
- // than destination one
- offset_src += send_size;
- }
- length_src_cur -= send_size;
- src_is_empty = length_src_cur == 0;
-
- if (dst_is_empty) {
- if (m_vars[i].into) {
- if (m_vars_extra[i].read_rng_dst) {
- if (!get_next_range(m_vars_extra[i].read_rng_dst,
- &offset_dst)) {
- // destination ranges are over
- LIBOFFLOAD_ERROR(c_destination_is_over);
- return false;
+ length_src_cur -= send_size;
+ src_is_empty = length_src_cur == 0;
+
+ if (dst_is_empty) {
+ if (m_vars[i].into) {
+ if (m_vars_extra[i].read_rng_dst) {
+ if (!get_next_range(m_vars_extra[i].read_rng_dst,
+ &offset_dst)) {
+ // destination ranges are over
+ LIBOFFLOAD_ERROR(c_destination_is_over);
+ return false;
+ }
+ }
+ // into is contiguous.
+ else {
+ offset_dst = m_vars[i].disp;
}
+ length_dst_cur = length_dst;
}
- // into is contiguous.
+ // same as source
else {
- offset_dst = m_vars[i].disp;
+ offset_dst = offset_src;
+ length_dst_cur = length_src;
}
- length_dst_cur = length_dst;
}
- // same as source
else {
- offset_dst = offset_src;
- length_dst_cur = length_src;
+ // if destination is contiguous or its contiguous range is greater
+ // than source one
+ offset_dst += send_size;
}
+ length_dst_cur -= send_size;
+ dst_is_empty = length_dst_cur == 0;
}
- else {
- // if destination is contiguous or its contiguous range is greater
- // than source one
- offset_dst += send_size;
- }
- length_dst_cur -= send_size;
- dst_is_empty = length_dst_cur == 0;
-
if (event) {
event = &m_in_deps[m_in_deps_total++];
- }
+ }
if (src_data != 0 && src_data->cpu_buf != 0) {
res = COI::BufferCopy(
dst_data->mic_buf,
m_vars_extra[i].cpu_offset + offset_src,
send_size,
COI_COPY_UNSPECIFIED,
- in_deps_amount, in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
}
else {
char *base = offload_get_src_base(m_vars[i].ptr,
- m_vars[i].type.src);
+ m_vars_extra[i].type_src);
res = COI::BufferWrite(
dst_data->mic_buf,
base + offset_src,
send_size,
COI_COPY_UNSPECIFIED,
- in_deps_amount, in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
for (int i = 0; i < m_vars_total; i++) {
if (m_vars[i].direction.in &&
m_vars[i].size >= __offload_use_async_buffer_write) {
- switch (m_vars[i].type.dst) {
+ switch (m_vars_extra[i].type_dst) {
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
if (m_vars[i].flags.is_static_dstn) {
big_size_count++;
}
break;
case c_string_ptr:
+ case c_string_ptr_ptr:
case c_data_ptr:
+ case c_data_ptr_ptr:
case c_cean_var_ptr:
+ case c_cean_var_ptr_ptr:
case c_dv_ptr:
case c_dv_data:
case c_dv_ptr_data:
}
}
- if (m_stream != no_stream && m_vars_total != 0) {
- get_stream_in_dependencies(in_deps_amount, in_deps);
- }
-
// Initiate send for pointer data
for (int i = 0; i < m_vars_total; i++) {
uint64_t sent_data = m_vars[i].size;
- uint32_t in_deps_amount_save;
- COIEVENT *in_deps_save;
-
- if (m_vars_extra[i].omp_last_event_type == c_last_write) {
- in_deps_amount_save = in_deps_amount;
- in_deps_save = in_deps;
- in_deps_amount = m_in_deps_total;
- if (in_deps_amount > 0) {
- in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * in_deps_amount);
- if (in_deps == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- memcpy(in_deps, m_in_deps,in_deps_amount * sizeof(COIEVENT));
- }
- }
- switch (m_vars[i].type.dst) {
+
+ if (m_vars_extra[i].omp_last_event_type == c_last_write &&
+ m_in_deps_total > 0) {
+ m_num_in_dependencies = m_in_deps_total;
+ m_p_in_dependencies = m_in_deps;
+ }
+ switch (m_vars_extra[i].type_dst) {
case c_data_ptr_array:
break;
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
if (m_vars[i].direction.in &&
m_vars[i].flags.is_static_dstn) {
COIEVENT *event =
- (is_async ||
+ (m_stream != no_stream ||
+ is_async ||
(should_use_async_buffer_write &&
m_vars[i].size >= __offload_use_async_buffer_write)) ?
&m_in_deps[m_in_deps_total++] : 0;
m_vars_extra[i].dst_data :
m_vars_extra[i].src_data;
PtrData* src_data =
- VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
- VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+ VAR_TYPE_IS_PTR(m_vars_extra[i].type_src) ||
+ VAR_TYPE_IS_SCALAR(m_vars_extra[i].type_src) &&
m_vars[i].flags.is_static ?
m_vars_extra[i].src_data : 0;
- if (m_vars[i].flags.is_noncont_src ||
+ if (m_vars[i].flags.is_non_cont_struct ||
+ m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
if (!send_noncontiguous_pointer_data(
i, src_data, dst_data, event, sent_data,
- in_deps_amount, in_deps)) {
+ m_num_in_dependencies, m_p_in_dependencies)) {
return false;
}
}
m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount, in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
}
else {
char *base = offload_get_src_base(m_vars[i].ptr,
- m_vars[i].type.src);
+ m_vars_extra[i].type_src);
res = COI::BufferWrite(
dst_data->mic_buf,
m_vars[i].mic_offset +
base + m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount, in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
}
break;
- case c_string_ptr:
case c_data_ptr:
+ // If use_device_ptr no data needs to be sent
+ if (m_vars[i].flags.use_device_ptr) {
+ break;
+ }
+ case c_string_ptr:
+ case c_string_ptr_ptr:
+ case c_data_ptr_ptr:
case c_cean_var_ptr:
+ case c_cean_var_ptr_ptr:
case c_dv_ptr:
if (m_vars[i].direction.in && m_vars[i].size > 0) {
COIEVENT *event =
- (is_async ||
+ (m_stream != no_stream ||
+ is_async ||
(should_use_async_buffer_write &&
m_vars[i].size >= __offload_use_async_buffer_write)) ?
&m_in_deps[m_in_deps_total++] : 0;
m_vars_extra[i].dst_data :
m_vars_extra[i].src_data;
PtrData* src_data =
- VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
- VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+ VAR_TYPE_IS_PTR(m_vars_extra[i].type_src) ||
+ VAR_TYPE_IS_SCALAR(m_vars_extra[i].type_src) &&
m_vars[i].flags.is_static ?
m_vars_extra[i].src_data : 0;
- if (m_vars[i].flags.is_noncont_src ||
+ if (m_vars[i].flags.is_non_cont_struct ||
+ m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
send_noncontiguous_pointer_data(
i, src_data, dst_data, event, sent_data,
m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount, in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
}
else {
char *base = offload_get_src_base(m_vars[i].ptr,
- m_vars[i].type.src);
+ m_vars_extra[i].type_src);
res = COI::BufferWrite(
dst_data->mic_buf,
m_vars[i].mic_offset +
base + m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount, in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
PtrData* src_data = m_vars_extra[i].src_data;
COIEVENT *event =
- (is_async ||
+ (m_stream != no_stream ||
+ is_async ||
(should_use_async_buffer_write &&
m_vars[i].size >= __offload_use_async_buffer_write)) ?
&m_in_deps[m_in_deps_total++] : 0;
- if (m_vars[i].flags.is_noncont_src ||
+ if (m_vars[i].flags.is_non_cont_struct ||
+ m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
send_noncontiguous_pointer_data(
i, src_data, ptr_data, event, sent_data,
m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount, in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
}
else {
char *base = offload_get_src_base(m_vars[i].ptr,
- m_vars[i].type.src);
+ m_vars_extra[i].type_src);
res = COI::BufferWrite(
ptr_data->mic_buf,
ptr_data->mic_offset +
base + m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount, in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
m_vars_extra[i].dst_data :
m_vars_extra[i].src_data;
PtrData* src_data =
- (VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
- VAR_TYPE_IS_DV_DATA(m_vars[i].type.src) ||
- VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) ||
- VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+ (VAR_TYPE_IS_PTR(m_vars_extra[i].type_src) ||
+ VAR_TYPE_IS_DV_DATA(m_vars_extra[i].type_src) ||
+ VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_src) ||
+ VAR_TYPE_IS_SCALAR(m_vars_extra[i].type_src) &&
m_vars[i].flags.is_static) ?
m_vars_extra[i].src_data : 0;
COIEVENT *event =
- (is_async ||
+ (m_stream != no_stream ||
+ is_async ||
(should_use_async_buffer_write &&
m_vars[i].size >= __offload_use_async_buffer_write)) ?
&m_in_deps[m_in_deps_total++] : 0;
- if (m_vars[i].flags.is_noncont_src ||
+ if (m_vars[i].flags.is_non_cont_struct ||
+ m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
send_noncontiguous_pointer_data(
i, src_data, dst_data, event, sent_data,
m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount, in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
}
else {
char *base = offload_get_src_base(m_vars[i].ptr,
- m_vars[i].type.src);
+ m_vars_extra[i].type_src);
res = COI::BufferWrite(
dst_data->mic_buf,
dst_data->mic_offset +
base + m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount, in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
break;
}
if (m_vars_extra[i].omp_last_event_type == c_last_write) {
- in_deps_amount = in_deps_amount_save;
- in_deps = in_deps_save;
register_omp_event_call_back(&m_in_deps[m_in_deps_total - 1], info);
}
// alloc field isn't used at target.
m_vars[i].ptr_arr_offset = m_vars_extra[i].ptr_arr_offset;
}
}
+ // list of out events created while send_pointer_data now became input
+ // dependencies for runfunction (or Read transfers from target if
+ // runfunction is absent)
+ m_num_in_dependencies = m_in_deps_total ? m_in_deps_total :
+ m_num_in_dependencies;
+ m_p_in_dependencies = m_in_deps_total ? m_in_deps : m_p_in_dependencies;
if (m_status) {
m_status->data_sent += ptr_sent;
m_in.send_data(&ptr_data->alloc_disp,
sizeof(ptr_data->alloc_disp));
}
-
+ if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) ||
+ TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst) ||
+ (m_vars_extra[i].type_src == c_data_ptr_array &&
+ m_vars[i].flags.is_pointer)) {
+ m_in.send_data(&m_vars_extra[i].pointer_offset,
+ sizeof(m_vars_extra[i].pointer_offset));
+ }
// send sink address to the target
if (m_vars[i].flags.sink_addr) {
m_in.send_data(&ptr_data->mic_addr,
sizeof(ptr_data->mic_addr));
}
- switch (m_vars[i].type.dst) {
+ switch (m_vars_extra[i].type_dst) {
case c_data_ptr_array:
break;
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
if (m_vars[i].direction.in &&
!m_vars[i].flags.is_static_dstn) {
char *ptr = offload_get_src_base(m_vars[i].ptr,
- m_vars[i].type.src);
- if (m_vars[i].type.dst == c_cean_var) {
+ m_vars_extra[i].type_src);
+ if (m_vars_extra[i].type_dst == c_cean_var) {
// offset and length are derived from the array
// descriptor
int64_t size = m_vars[i].size;
}
break;
case c_func_ptr:
+ case c_func_ptr_ptr:
if (m_vars[i].direction.in) {
m_in.send_func_ptr(*((const void**) m_vars[i].ptr));
}
// dispatch task
COIRESULT res;
COIEVENT event;
- uint32_t in_deps_amount = m_in_deps_total;
- COIEVENT *in_deps = m_in_deps_total > 0 ? m_in_deps : 0;
-
- if (0 == m_in_deps_total && m_stream != no_stream) {
- get_stream_in_dependencies(in_deps_amount, in_deps);
- }
res = m_device.compute(m_stream,
m_compute_buffers,
misc, misc_len,
ret, ret_len,
- in_deps_amount,
- in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
&event);
if (res != COI_SUCCESS) {
register_omp_event_call_back(&event, info);
}
- m_in_deps_total = 1;
+ m_in_deps_total = m_num_in_dependencies = 1;
m_in_deps[0] = event;
+ m_p_in_dependencies = m_in_deps;
}
return true;
uint32_t in_deps_amount,
COIEVENT *in_deps
)
-{
+{
+ NonContigDesc *desc;
+ int noncont_num;
int64_t offset_src, offset_dst;
int64_t length_src, length_dst;
int64_t length_src_cur, length_dst_cur;
m_vars[i].into ?
static_cast<char*>(m_vars[i].into) :
static_cast<char*>(m_vars[i].ptr),
- m_vars[i].type.dst);
+ m_vars_extra[i].type_dst);
received_data = 0;
- // Set length_src and length_dst
- length_src = (m_vars_extra[i].read_rng_src) ?
- m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
- length_dst = !m_vars[i].into ? length_src :
- (m_vars_extra[i].read_rng_dst) ?
- m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
- receive_size = (length_src < length_dst) ? length_src : length_dst;
-
// If BufferReadMultiD is defined we can set values of required arguments
// and transfer noncontiguous data via call to the COI routine.
if (__offload_use_coi_noncontiguous_transfer && COI::BufferReadMultiD) {
m_vars_extra[i].src_data->mic_buf, // SourceBuffer
m_vars[i].offset + m_vars[i].mic_offset -
m_vars_extra[i].src_data->alloc_disp, // Offset
- (void*)arr_desc_dst, // descriptor of DestArray
- (void*)arr_desc_src, // descriptor of SrcArray
+ (void*)arr_desc_dst, // descriptor of DestArray
+ (void*)arr_desc_src, // descriptor of SrcArray
COI_COPY_UNSPECIFIED, // Type
- in_deps_amount, // Number of in Dependencies
- in_deps, // array of in Dependencies
+ m_num_in_dependencies, // Number of in Dependencies
+ m_p_in_dependencies, // array of in Dependencies
event); // out Dependency
if (res != COI_SUCCESS) {
if (m_status != 0) {
}
return(true);
}
+ if (m_vars[i].flags.is_non_cont_struct) {
+ desc = m_vars_extra[i].noncont_desc;
+ noncont_num = 0;
+ }
+ else {
+ // Set length_src and length_dst
+ length_src = (m_vars_extra[i].read_rng_src) ?
+ m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
+ length_dst = !m_vars[i].into ? length_src :
+ (m_vars_extra[i].read_rng_dst) ?
+ m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
+ receive_size = (length_src < length_dst) ? length_src : length_dst;
+ }
+
// if event is defined we must multiplate for all contiguous intervals
// that will be Copied/Read.
// Take in account that we already have 1 event.
if (event) {
- m_out_deps_allocated += (length_src / receive_size) *
+ uint32_t range_num = m_vars[i].flags.is_non_cont_struct ?
+ desc->interval_cnt :
+ (length_src / receive_size) *
((m_vars_extra[i].read_rng_src) ?
m_vars_extra[i].read_rng_src->range_max_number : 1) ;
+ m_out_deps_allocated += range_num;
m_out_deps =
(COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_out_deps_allocated);
m_out_deps_total--;
}
-
+
// consequently get contiguous ranges,
// define corresponded destination offset and receive data
do {
- // get sorce offset
- if (src_is_empty) {
- if (m_vars_extra[i].read_rng_src) {
- if (!get_next_range(m_vars_extra[i].read_rng_src,
- &offset_src)) {
- // source ranges are over - nothing to send
+ if (m_vars[i].flags.is_non_cont_struct) {
+ // ranges are over
+ if (noncont_num >= desc->interval_cnt) {
+ break;
+ }
+ offset_src = offset_dst = desc->interval[noncont_num].lower;
+ receive_size = desc->interval[noncont_num].size;
+ noncont_num++;
+ }
+ else { // get source offset
+ if (src_is_empty) {
+ if (m_vars_extra[i].read_rng_src) {
+ if (!get_next_range(m_vars_extra[i].read_rng_src,
+ &offset_src)) {
+ // source ranges are over - nothing to send
+ break;
+ }
+ }
+ else if (received_data == 0) {
+ offset_src = m_vars[i].disp;
+ }
+ else {
break;
}
- }
- else if (received_data == 0) {
- offset_src = m_vars[i].disp;
+ length_src_cur = length_src;
}
else {
- break;
+ // if source is contiguous or its contiguous range is greater
+ // than destination one
+ offset_src += receive_size;
}
- length_src_cur = length_src;
- }
- else {
- // if source is contiguous or its contiguous range is greater
- // than destination one
- offset_src += receive_size;
- }
- length_src_cur -= receive_size;
- src_is_empty = length_src_cur == 0;
-
- // get destination offset
- if (dst_is_empty) {
- if (m_vars[i].into) {
- if (m_vars_extra[i].read_rng_dst) {
- if (!get_next_range(m_vars_extra[i].read_rng_dst,
- &offset_dst)) {
- // destination ranges are over
- LIBOFFLOAD_ERROR(c_destination_is_over);
- return false;
+ length_src_cur -= receive_size;
+ src_is_empty = length_src_cur == 0;
+
+ // get destination offset
+ if (dst_is_empty) {
+ if (m_vars[i].into) {
+ if (m_vars_extra[i].read_rng_dst) {
+ if (!get_next_range(m_vars_extra[i].read_rng_dst,
+ &offset_dst)) {
+ // destination ranges are over
+ LIBOFFLOAD_ERROR(c_destination_is_over);
+ return false;
+ }
}
+ // destination is contiguous.
+ else {
+ offset_dst = m_vars_extra[i].cpu_disp;
+ }
+ length_dst_cur = length_dst;
}
- // destination is contiguous.
+ // same as source
else {
- offset_dst = m_vars_extra[i].cpu_disp;
+ offset_dst = offset_src;
+ length_dst_cur = length_src;
}
- length_dst_cur = length_dst;
}
- // same as source
else {
- offset_dst = offset_src;
- length_dst_cur = length_src;
+ // if destination is contiguous or its contiguous range is greater
+ // than source one
+ offset_dst += receive_size;
}
+ length_dst_cur -= receive_size;
+ dst_is_empty = length_dst_cur == 0;
}
- else {
- // if destination is contiguous or its contiguous range is greater
- // than source one
- offset_dst += receive_size;
- }
- length_dst_cur -= receive_size;
- dst_is_empty = length_dst_cur == 0;
if (event) {
event = &m_out_deps[m_out_deps_total++];
}
m_vars[i].mic_offset,
receive_size,
COI_COPY_UNSPECIFIED,
- in_deps_amount,
- in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
base + offset_dst,
receive_size,
COI_COPY_UNSPECIFIED,
- in_deps_amount,
- in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
if (first_run == m_vars[i].flags.preallocated) {
continue;
}
- switch (m_vars[i].type.src) {
+ switch (m_vars_extra[i].type_src) {
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
if (m_vars[i].flags.is_static) {
big_size_count++;
break;
case c_string_ptr:
case c_data_ptr:
+ case c_string_ptr_ptr:
+ case c_data_ptr_ptr:
case c_cean_var_ptr:
+ case c_cean_var_ptr_ptr:
case c_dv_data:
case c_dv_ptr_data:
case c_dv_data_slice:
uint32_t in_deps_amount = m_in_deps_total;
COIEVENT *in_deps = m_in_deps_total > 0 ? m_in_deps : 0;
- if (0 == m_in_deps_total &&
- m_stream != no_stream &&
- m_vars_total != 0) {
- get_stream_in_dependencies(in_deps_amount, in_deps);
- }
-
for (int i = 0; i < m_vars_total; i++) {
uint64_t received_data = m_vars[i].size;
- uint32_t in_deps_amount_save;
- COIEVENT *in_deps_save;
-
- if (m_vars_extra[i].omp_last_event_type == c_last_read) {
- in_deps_amount_save = in_deps_amount;
- in_deps_save = in_deps;
-
- in_deps_amount += m_out_deps_total;
- if (in_deps_amount > 0) {
- in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * in_deps_amount);
- if (in_deps == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- memcpy(in_deps, in_deps_save,
- in_deps_amount_save * sizeof(COIEVENT));
- memcpy(in_deps + in_deps_amount_save * sizeof(COIEVENT),
- m_out_deps,
- m_out_deps_total * sizeof(COIEVENT));
- }
+
+ // Nothing to receive if use_device_ptr
+ if (m_vars[i].flags.use_device_ptr )
+ continue;
+ if (m_vars_extra[i].omp_last_event_type == c_last_read &&
+ m_out_deps_total > 0) {
+ m_num_in_dependencies = m_out_deps_total;
+ m_p_in_dependencies = m_out_deps;
}
// At first run don't receive by preallocated target pointer as the
//pointer value will be ready later after call to scatter_copyout_data
}
continue;
}
- switch (m_vars[i].type.src) {
+ switch (m_vars_extra[i].type_src) {
case c_data_ptr_array:
break;
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
if (m_vars[i].direction.out &&
m_vars[i].flags.is_static) {
COIEVENT *event =
- (is_async ||
+ (m_stream != no_stream ||
+ is_async ||
m_in_deps_total > 0 ||
(should_use_async_buffer_read &&
m_vars[i].size >= __offload_use_async_buffer_read)) ?
COIBUFFER dst_buf = NULL; // buffer at host
char *base;
- if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
+ if (VAR_TYPE_IS_PTR(m_vars_extra[i].type_dst)) {
ptr_data = m_vars[i].into ?
m_vars_extra[i].dst_data :
m_vars_extra[i].src_data;
}
- else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
+ else if (VAR_TYPE_IS_SCALAR(m_vars_extra[i].type_dst)) {
if (m_vars[i].flags.is_static_dstn) {
ptr_data = m_vars[i].into ?
m_vars_extra[i].dst_data :
m_vars[i].into ?
static_cast<char*>(m_vars[i].into) :
static_cast<char*>(m_vars[i].ptr),
- m_vars[i].type.dst);
+ m_vars_extra[i].type_dst);
}
- if (m_vars[i].flags.is_noncont_src ||
+ if (m_vars[i].flags.is_non_cont_struct ||
+ m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
receive_noncontiguous_pointer_data(
i, dst_buf, event, received_data,
- in_deps_amount, in_deps);
+ m_num_in_dependencies, m_p_in_dependencies);
}
else if (dst_buf != 0) {
res = COI::BufferCopy(
m_vars[i].offset + m_vars[i].disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount,
- in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
m_vars_extra[i].cpu_disp,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount,
- in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
case c_string_ptr:
case c_data_ptr:
+ case c_string_ptr_ptr:
+ case c_data_ptr_ptr:
case c_cean_var_ptr:
+ case c_cean_var_ptr_ptr:
case c_dv_data:
case c_dv_ptr_data:
case c_dv_data_slice:
COIBUFFER dst_buf = NULL; // buffer on host
if (m_vars[i].direction.out && m_vars[i].size > 0) {
COIEVENT *event =
- (is_async ||
+ (m_stream != no_stream ||
+ is_async ||
m_in_deps_total > 0 ||
(should_use_async_buffer_read &&
m_vars[i].size >= __offload_use_async_buffer_read)) ?
uint64_t dst_offset = 0;
char *base = static_cast<char*>(m_vars[i].ptr);
- if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
+ if (VAR_TYPE_IS_PTR(m_vars_extra[i].type_dst)) {
PtrData *ptr_data = m_vars[i].into ?
m_vars_extra[i].dst_data :
m_vars_extra[i].src_data;
dst_offset = m_vars_extra[i].cpu_offset +
m_vars_extra[i].cpu_disp;
}
- else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
+ else if (VAR_TYPE_IS_SCALAR(m_vars_extra[i].type_dst)) {
if (m_vars[i].flags.is_static_dstn) {
dst_buf = m_vars[i].into ?
m_vars_extra[i].dst_data->cpu_buf :
m_vars[i].into ?
static_cast<char*>(m_vars[i].into) :
static_cast<char*>(m_vars[i].ptr),
- m_vars[i].type.dst);
+ m_vars_extra[i].type_dst);
}
dst_offset = m_vars_extra[i].cpu_offset +
m_vars_extra[i].cpu_disp;
}
- else if (VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst) ||
- VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+ else if (VAR_TYPE_IS_DV_DATA(m_vars_extra[i].type_dst) ||
+ VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_dst)) {
PtrData *ptr_data = m_vars[i].into != 0 ?
m_vars_extra[i].dst_data :
m_vars_extra[i].src_data;
m_vars[i].into ?
static_cast<char*>(m_vars[i].into) :
static_cast<char*>(m_vars[i].ptr),
- m_vars[i].type.dst);
+ m_vars_extra[i].type_dst);
}
dst_offset = m_vars_extra[i].cpu_offset +
m_vars_extra[i].cpu_disp;
}
- if (m_vars[i].flags.is_noncont_src ||
+ if (m_vars[i].flags.is_non_cont_struct ||
+ m_vars[i].flags.is_noncont_src ||
m_vars[i].flags.is_noncont_dst) {
receive_noncontiguous_pointer_data(
i, dst_buf, event, received_data,
- in_deps_amount,
- in_deps);
+ m_num_in_dependencies, m_p_in_dependencies);
}
else if (dst_buf != 0) {
res = COI::BufferCopy(
m_vars[i].mic_offset,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount,
- in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
base + dst_offset,
m_vars[i].size,
COI_COPY_UNSPECIFIED,
- in_deps_amount,
- in_deps,
+ m_num_in_dependencies,
+ m_p_in_dependencies,
event);
if (res != COI_SUCCESS) {
if (m_status != 0) {
}
if (m_vars_extra[i].omp_last_event_type == c_last_read) {
- in_deps_amount = in_deps_amount_save;
- in_deps = in_deps_save;
register_omp_event_call_back(&m_out_deps[m_out_deps_total - 1], info);
}
// destroy buffers for obsolete stacks
- if (m_destroy_stack.size() != 0) {
+ if (m_destroy_stack.size() != 0) {
for (PtrDataList::iterator it = m_destroy_stack.begin();
it != m_destroy_stack.end(); it++) {
PtrData *ptr_data = *it;
}
if (m_vars[i].free_if) {
// remove association for automatic variables
- if (m_is_openmp && !m_vars[i].flags.is_static &&
- (m_vars[i].type.src == c_data ||
- m_vars[i].type.src == c_void_ptr ||
- m_vars[i].type.src == c_cean_var)) {
- AutoData *auto_data = m_vars_extra[i].auto_data;
- if (auto_data != 0) {
+ if (m_is_openmp) {
+ if (m_vars_extra[i].auto_data) {
+ AutoData *auto_data = m_vars_extra[i].auto_data;
if (m_vars[i].flags.always_delete) {
auto_data->nullify_reference();
}
- else if(auto_data->remove_reference() == 0) {
- m_device.remove_auto_data(auto_data->cpu_addr.start());
+ else if (auto_data->remove_reference() == 0) {
+ m_device.remove_auto_data(auto_data->cpu_addr.start());
}
+ continue;
}
+ else {
+ PtrData *ptr_data = m_vars_extra[i].src_data;
+ if (ptr_data &&
+ IS_OPENMP_IMPLICIT_OR_LINK(ptr_data->var_alloc_type)) {
+ if (ptr_data->get_reference() > 0) {
+ ptr_data->remove_reference();
+ }
+ continue;
+ }
+ }
}
// destroy buffers
if (m_vars[i].direction.out || m_vars[i].into == NULL) {
- if (!VAR_TYPE_IS_PTR(m_vars[i].type.src) &&
- !VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) &&
- !VAR_TYPE_IS_DV_DATA(m_vars[i].type.src)) {
+ if (!VAR_TYPE_IS_PTR(m_vars_extra[i].type_src) &&
+ !VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_src) &&
+ !VAR_TYPE_IS_DV_DATA(m_vars_extra[i].type_src)) {
continue;
}
}
}
}
- else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) ||
- VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst) ||
- VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst)) {
+ else if (VAR_TYPE_IS_PTR(m_vars_extra[i].type_dst) ||
+ VAR_TYPE_IS_DV_DATA_SLICE(m_vars_extra[i].type_dst) ||
+ VAR_TYPE_IS_DV_DATA(m_vars_extra[i].type_dst)) {
PtrData *ptr_data = m_vars_extra[i].dst_data;
+
if (ptr_data->remove_reference() == 0) {
// destroy buffers
if (ptr_data->cpu_buf != 0) {
m_status->data_received += ptr_received;
}
+ m_num_in_dependencies = m_out_deps_total ? m_out_deps_total :
+ m_num_in_dependencies;
+ m_p_in_dependencies = m_out_deps_total ? m_out_deps : m_p_in_dependencies;
+
OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), ptr_received);
OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
c_offload_received_pointer_data,
bool src_is_for_mic = (m_vars[i].direction.out ||
m_vars[i].into == NULL);
- if (m_vars[i].type.src != c_data_ptr_array &&
+ if (m_vars_extra[i].type_src != c_data_ptr_array &&
m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
PtrData *ptr_data;
void *ptr_value;
(char*) ptr_data->cpu_addr.start();
}
- switch (m_vars[i].type.src) {
+ switch (m_vars_extra[i].type_src) {
case c_data_ptr_array:
break;
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
if (m_vars[i].direction.out &&
!m_vars[i].flags.is_static) {
if (m_vars[i].into) {
char *ptr = offload_get_src_base(
static_cast<char*>(m_vars[i].into),
- m_vars[i].type.dst);
+ m_vars_extra[i].type_dst);
m_out.receive_data(ptr + m_vars_extra[i].cpu_disp,
m_vars[i].size);
}
break;
case c_func_ptr:
+ case c_func_ptr_ptr:
if (m_vars[i].direction.out) {
m_out.receive_func_ptr((const void**) m_vars[i].ptr);
}
// of the var_desc's array
get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size,
pointers_number, ptr.ranges);
- ptr.base = (m_vars[i].flags.is_pointer) ?
- *(reinterpret_cast<char**>(ap->base)) :
- reinterpret_cast<char*>(ap->base);
+ ptr.base = reinterpret_cast<char*>(ap->base);
// 2. prepare memory for new var_descs
m_vars_total += pointers_number;
m_vars[new_index + k].align = align.val;
m_vars[new_index + k].mic_offset = 0;
m_vars[new_index + k].flags.bits = m_vars[i].flags.bits;
+ m_vars[new_index + k].flags.is_pointer = 0;
m_vars[new_index + k].offset = 0;
m_vars[new_index + k].size = m_vars[i].size;
m_vars[new_index + k].flags.targetptr = m_vars[i].flags.targetptr;
m_vars[new_index + k].alloc = NULL;
}
- m_vars[new_index + k].type.src = type_src;
- m_vars[new_index + k].type.dst = type_dst;
+ m_vars[new_index + k].type.src =
+ m_vars_extra[new_index + k].type_src = type_src;
+ m_vars[new_index + k].type.dst =
+ m_vars_extra[new_index + k].type_dst = type_dst;
m_vars_extra[new_index + k].alloc = m_vars[new_index + k].alloc;
m_vars_extra[new_index + k].is_arr_ptr_el = 1;
// Gets in dependencies of the previous offload via the stream "m_stream".
// Out argument in_deps_amount - address of amount of the dependencies
-// Out argument in_deps - array of dependencies.
+// Out argument in_deps - address of array of dependencies.
// Description of the dependencies scheme for streams :
// ----------------------------------------------------
// Every offload forms DAG consisted of 3 nodes:
mic_proxy_fs_root = 0;
}
- if (mic_library_path != 0) {
- free(mic_library_path);
- mic_library_path = 0;
+ if (knc_library_path != 0) {
+ free(knc_library_path);
+ knc_library_path = 0;
+ }
+
+ if (knl_library_path != 0) {
+ free(knl_library_path);
+ knl_library_path = 0;
}
// destroy thread key
OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ... done\n");
}
+typedef std::pair<int, micLcpuMask*> deviceLcpu;
+typedef std::list<deviceLcpu> deviceLcpuList;
+
+static int process_offload_devices(
+ const char *env_var,
+ uint32_t num_devices,
+ deviceLcpuList &device_cpu_list
+)
+{
+ // Value is composed of comma separated physical device index
+ // optionally qualified by logical CPU subset, e.g. 0[60,70-80]
+ char *buf = strdup(env_var);
+ if (buf == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ char *str = buf;
+ bool device_set_finished = false;
+ int num_devices_specified = 0;
+ do {
+ char *dev_ptr = str;
+ int dev_len = strcspn(str, "[,");
+ micLcpuMask* cpu_mask = 0;
+ if (str[dev_len] == '[') {
+ // CPU subset specified
+ cpu_mask = new micLcpuMask;
+ cpu_mask->reset();
+ char *cpu_ptr = str + dev_len + 1;
+ do {
+ int64_t cnum;
+ bool cpu_set_finished = false;
+ int cpu_len = strcspn(cpu_ptr, ",-]");
+ if (cpu_ptr[cpu_len] == ',' || cpu_ptr[cpu_len] == ']') {
+ // A single CPU specified
+ cpu_set_finished = cpu_ptr[cpu_len] == ']';
+ cpu_ptr[cpu_len] = '\0';
+ // Convert cpu string to an int
+ if (!__offload_parse_int_string(cpu_ptr, cnum)) {
+ LIBOFFLOAD_ERROR(c_mic_init7);
+ delete cpu_mask;
+ free(buf);
+ return 0;
+ } else {
+ OFFLOAD_DEBUG_TRACE(3,
+ "Single CPU %d selected\n", cnum);
+ cpu_mask->set(cnum);
+ }
+ cpu_ptr = cpu_ptr + cpu_len + 1;
+ if (cpu_set_finished) {
+ break;
+ }
+ } else if (cpu_ptr[cpu_len] == '-') {
+ int64_t range_start, range_end;
+ // A range of CPUs specified
+ cpu_ptr[cpu_len] = '\0';
+ // Convert cpu string to an int
+ if (!__offload_parse_int_string(cpu_ptr, range_start)) {
+ LIBOFFLOAD_ERROR(c_mic_init8);
+ delete cpu_mask;
+ free(buf);
+ return 0;
+ } else {
+ OFFLOAD_DEBUG_TRACE(3,
+ "Start of CPU range specified as %d\n",
+ range_start);
+ cpu_ptr = cpu_ptr + cpu_len + 1;
+ cpu_len = strcspn(cpu_ptr, ",]");
+ if (cpu_ptr[cpu_len] == ',' ||
+ cpu_ptr[cpu_len] == ']') {
+ cpu_set_finished = cpu_ptr[cpu_len] == ']';
+ cpu_ptr[cpu_len] = '\0';
+ // Convert cpu string to an int
+ if (!__offload_parse_int_string(
+ cpu_ptr, range_end)) {
+ LIBOFFLOAD_ERROR(c_mic_init9);
+ delete cpu_mask;
+ free(buf);
+ return 0;
+ } else {
+ OFFLOAD_DEBUG_TRACE(3,
+ "End of CPU range specified as %d\n",
+ range_end);
+ if (range_end < range_start) {
+ LIBOFFLOAD_ERROR(c_mic_init10);
+ delete cpu_mask;
+ free(buf);
+ return 0;
+ } else {
+ for (int i=range_start; i<=range_end; i++)
+ {
+ OFFLOAD_DEBUG_TRACE(3,
+ "CPU %d selected as part of range\n",
+ i);
+ cpu_mask->set(i);
+ }
+ cpu_ptr = cpu_ptr + cpu_len + 1;
+ if (cpu_set_finished) {
+ break;
+ }
+ }
+ }
+ } else {
+ LIBOFFLOAD_ERROR(c_mic_init10);
+ delete cpu_mask;
+ free(buf);
+ return 0;
+ }
+ }
+ } else {
+ // Error: expected , or - or ]
+ LIBOFFLOAD_ERROR(c_mic_init11);
+ delete cpu_mask;
+ free(buf);
+ return 0;
+ }
+ } while (true);
+ // Point to next device specification
+ str = cpu_ptr;
+ if (*str == '\0') {
+ device_set_finished = true;
+ } else {
+ // Skip the comma after a device specification
+ str++;
+ }
+ } else if (str[dev_len] == ',') {
+ // CPU subset not specified
+ // Point to next device specification
+ str = str + dev_len + 1;
+ } else {
+ // No more device specifications
+ device_set_finished = true;
+ }
+ dev_ptr[dev_len] = '\0';
+ // Convert device string to an int
+ int64_t num;
+ if (!__offload_parse_int_string(dev_ptr, num)) {
+ LIBOFFLOAD_ERROR(c_mic_init5);
+ delete cpu_mask;
+ free(buf);
+ return 0;
+ }
+ if (num < 0 || num >= num_devices) {
+ LIBOFFLOAD_ERROR(c_mic_init6, num);
+ delete cpu_mask;
+ free(buf);
+ return 0;
+ }
+ OFFLOAD_DEBUG_TRACE(3, "Offloadable MIC = %d\n", num);
+ // Save the specified physical device and cpu mask
+ device_cpu_list.push_back(make_pair(num, cpu_mask));
+ num_devices_specified++;
+
+ if (device_set_finished) {
+ break;
+ }
+ } while (true);
+
+ free(buf);
+ return num_devices_specified;
+}
+
static void __offload_init_library_once(void)
{
COIRESULT res;
uint32_t num_devices;
- std::bitset<MIC_ENGINES_MAX> devices;
+ deviceLcpuList device_cpu_list;
prefix = report_get_message_str(c_report_host);
// initialize trace
console_enabled = new_val & 0x0f;
}
}
-
+
+ OFFLOAD_DEBUG_TRACE(2, "---- Start of environment variable processing\n");
env_var = getenv(offload_report_envname);
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n",
+ offload_report_envname, env_var);
int64_t env_val;
if (__offload_parse_int_string(env_var, env_val)) {
if (env_val == OFFLOAD_REPORT_1 ||
env_val == OFFLOAD_REPORT_2 ||
env_val == OFFLOAD_REPORT_3) {
offload_report_level = env_val;
+ OFFLOAD_DEBUG_TRACE(2, "Offload report level set to %d\n",
+ offload_report_level);
}
else {
LIBOFFLOAD_ERROR(c_invalid_env_report_value,
else if (!offload_report_level) {
env_var = getenv(timer_envname);
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n", timer_envname, env_var);
timer_enabled = atoi(env_var);
+ OFFLOAD_DEBUG_TRACE(2, "Timer enable flag set to %d\n",
+ timer_enabled);
}
}
return;
}
+ // Process OFFLOAD_NODES, specification of physical MICs available
+ env_var = getenv("OFFLOAD_NODES");
+ if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- OFFLOAD_NODES=%s\n", env_var);
+ // Pass env var on to COI
+ char * new_env_var =
+ (char*) malloc(sizeof("COI_OFFLOAD_NODES=") +
+ strlen(env_var) + 1);
+ if (new_env_var == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ sprintf(new_env_var, "COI_OFFLOAD_NODES=%s", env_var);
+ putenv(new_env_var);
+ OFFLOAD_DEBUG_TRACE(2, "Setting COI_OFFLOAD_NODES = %s \n", getenv("COI_OFFLOAD_NODES"));
+
+ // value is composed of comma separated physical device indexes
+ char *buf = strdup(env_var);
+ if (buf == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ char *str, *ptr;
+ int num_mics = 0;
+ for (str = strtok_r(buf, ",", &ptr); str != 0;
+ str = strtok_r(0, ",", &ptr)) {
+ // count this MIC
+ num_mics++;
+ }
+ OFFLOAD_DEBUG_TRACE(2, "Number of offloadable MICs = %d\n", num_mics);
+ free(buf);
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "OFFLOAD_NODES is not set\n");
+ }
+
// get number of devices installed in the system
res = COI::EngineGetCount(COI_ISA_MIC, &num_devices);
if (res != COI_SUCCESS) {
num_devices = MIC_ENGINES_MAX;
}
- // fill in the list of devices that can be used for offloading
+ // Determine devices & cpus that can be used for offloading
env_var = getenv("OFFLOAD_DEVICES");
- if (env_var != 0) {
+ if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- OFFLOAD_DEVICES=%s\n", env_var);
if (strcasecmp(env_var, "none") != 0) {
- // value is composed of comma separated physical device indexes
- char *buf = strdup(env_var);
- if (buf == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- char *str, *ptr;
- for (str = strtok_r(buf, ",", &ptr); str != 0;
- str = strtok_r(0, ",", &ptr)) {
- // convert string to an int
- int64_t num;
- if (!__offload_parse_int_string(str, num)) {
- LIBOFFLOAD_ERROR(c_mic_init5);
-
- // fallback to using all installed devices
- devices.reset();
- for (int i = 0; i < num_devices; i++) {
- devices.set(i);
- }
- break;
- }
- if (num < 0 || num >= num_devices) {
- LIBOFFLOAD_ERROR(c_mic_init6, num);
- continue;
- }
- devices.set(num);
+ mic_engines_total =
+ process_offload_devices(
+ env_var, num_devices, device_cpu_list);
+ if (mic_engines_total > 0) {
+ OFFLOAD_DEBUG_TRACE(2, "Valid value, %d device(s) specified\n",
+ mic_engines_total);
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "Invalid value, will not offload\n");
+ return;
}
- free(buf);
+ }
+ else {
+ // No need to continue since no offload devices
+ return;
}
}
else {
- // use all available devices
+ OFFLOAD_DEBUG_TRACE(2, "OFFLOAD_DEVICES is not set\n");
+ }
+ if (mic_engines_total == 0) {
+ // Fallback to using all available devices and all CPUs on each
+ OFFLOAD_DEBUG_TRACE(2, "Fallback to all devices\n");
+ device_cpu_list.clear();
+ mic_engines_total = 0;
for (int i = 0; i < num_devices; i++) {
COIENGINE engine;
res = COI::EngineGetHandle(COI_ISA_MIC, i, &engine);
if (res == COI_SUCCESS) {
- devices.set(i);
+ device_cpu_list.push_back(make_pair(i, (micLcpuMask*)0));
+ OFFLOAD_DEBUG_TRACE(2, "Device %d is available\n", i);
+ mic_engines_total++;
}
}
}
-
- mic_engines_total = devices.count();
-
+
// no need to continue if there are no devices to offload to
if (mic_engines_total <= 0) {
return;
}
- // initialize indexes for available devices
+ // Initialize indexes for available devices
mic_engines = new Engine[mic_engines_total];
- for (int p_idx = 0, l_idx = 0; p_idx < num_devices; p_idx++) {
- if (devices[p_idx]) {
- mic_engines[l_idx].set_indexes(l_idx, p_idx);
- l_idx++;
+ std::list<deviceLcpu>::iterator deviceIterator;
+ int l_idx = 0;
+ for (deviceIterator = device_cpu_list.begin();
+ deviceIterator != device_cpu_list.end();
+ deviceIterator++)
+ {
+ deviceLcpu device_mask_pair = *deviceIterator;
+ int device_num = device_mask_pair.first;
+ micLcpuMask *device_mask = device_mask_pair.second;
+
+ mic_engines[l_idx].set_indexes(l_idx, device_num);
+ mic_engines[l_idx].set_cpu_mask(device_mask);
+ OFFLOAD_DEBUG_TRACE(2,
+ "Logical MIC%d => Physical MIC%d\n", l_idx, device_num);
+ if (device_mask != NULL) {
+ std::string cpu_string =
+ device_mask->to_string<
+ char,
+ std::string::traits_type,
+ std::string::allocator_type>();
+ OFFLOAD_DEBUG_TRACE(2, " CPUs: %s\n", cpu_string.data());
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE(2, " CPUs: all\n");
}
+ l_idx++;
}
-
+
// Get DMA channel count to pass it to COI
env_var = getenv("OFFLOAD_DMA_CHANNEL_COUNT");
- if (env_var != 0) {
+ if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- OFFLOAD_DMA_CHANNEL_COUNT=%s\n", env_var);
int64_t new_val;
if (__offload_parse_int_string(env_var, new_val)) {
mic_dma_channel_count = new_val;
+ OFFLOAD_DEBUG_TRACE(2, "Using %d DMA channels\n",
+ mic_dma_channel_count);
}
else {
LIBOFFLOAD_ERROR(c_invalid_env_var_value,
"OFFLOAD_DMA_CHANNEL_COUNT");
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "OFFLOAD_DMA_CHANNEL_COUNT is not set\n");
+ }
// Set COI_HOST_THREAD_AFFINITY if OFFLOAD_HOST_THREAD_AFFINITY is set.
// Use putenv instead of setenv as Windows has no setenv.
// Note: putenv requires its argument can't be freed or modified.
// So no free after call to putenv or elsewhere.
env_var = getenv("OFFLOAD_HOST_THREAD_AFFINITY");
- if (env_var != 0) {
+ if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- OFFLOAD_HOST_THREAD_AFFINITY=%s\n", env_var);
char * new_env_var =
(char*) malloc(sizeof("COI_HOST_THREAD_AFFINITY=") +
- strlen(env_var));
- if (new_env_var == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
+ strlen(env_var) + 1);
+ if (new_env_var == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
sprintf(new_env_var, "COI_HOST_THREAD_AFFINITY=%s", env_var);
putenv(new_env_var);
+ OFFLOAD_DEBUG_TRACE(2, "Setting COI_HOST_THREAD_AFFINITY = %s \n",
+ getenv("COI_HOST_THREAD_AFFINITY"));
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "OFFLOAD_HOST_THREAD_AFFINITY is not set\n");
}
- // library search path for device binaries
+ // library search path for KNC device binaries
env_var = getenv("MIC_LD_LIBRARY_PATH");
if (env_var != 0) {
- mic_library_path = strdup(env_var);
- if (mic_library_path == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- }
-
-
- // find target executable to be used if main application is not an
- // offload build application.
- const char *base_name = "offload_main";
- if (mic_library_path != 0) {
- char *buf = strdup(mic_library_path);
- if (buf == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- char *try_name = (char*) alloca(strlen(mic_library_path) +
- strlen(base_name) + 2);
- char *dir, *ptr;
-
- for (dir = strtok_r(buf, PATH_SEPARATOR, &ptr); dir != 0;
- dir = strtok_r(0, PATH_SEPARATOR, &ptr)) {
- // compose a full path
- sprintf(try_name, "%s/%s", dir, base_name);
-
- // check if such file exists
- struct stat st;
- if (stat(try_name, &st) == 0 && S_ISREG(st.st_mode)) {
- mic_device_main = strdup(try_name);
- if (mic_device_main == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- break;
- }
- }
+ OFFLOAD_DEBUG_TRACE(2, "---- MIC_LD_LIBRARY_PATH=%s\n", env_var);
+ knc_library_path = strdup(env_var);
+ if (knc_library_path == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ OFFLOAD_DEBUG_TRACE(2, "KNC library path set to %s\n", knc_library_path);
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "MIC_LD_LIBRARY_PATH is not set\n");
+ }
- free(buf);
+ // library search path for KNL device binaries
+ env_var = getenv("LD_LIBRARY_PATH");
+ if (env_var != 0) {
+ OFFLOAD_DEBUG_TRACE(2, "---- LD_LIBRARY_PATH=%s\n", env_var);
+ knl_library_path = strdup(env_var);
+ if (knl_library_path == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ OFFLOAD_DEBUG_TRACE(2, "KNL library path set to %s\n", knl_library_path);
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "LD_LIBRARY_PATH is not set\n");
}
// memory size reserved for COI buffers
env_var = getenv("MIC_BUFFERSIZE");
- if (env_var != 0) {
+ if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- MIC_BUFFERSIZE=%s\n", env_var);
uint64_t new_size;
if (__offload_parse_size_string(env_var, new_size)) {
mic_buffer_size = new_size;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Reserved memory for COI buffers set to %lld bytes\n",
+ mic_buffer_size);
}
else {
LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_BUFFERSIZE");
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "MIC_BUFFERSIZE is not set\n");
+ }
// memory size reserved for 4K pages for COI buffers
env_var = getenv("MIC_4K_BUFFER_RESERVE_SIZE");
- if (env_var != 0) {
+ if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- MIC_4K_BUFFER_RESERVE_SIZE=%s\n", env_var);
uint64_t new_size;
if (__offload_parse_size_string(env_var, new_size)) {
mic_4k_buffer_size = new_size;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Reserved memory for 4K COI buffers set to %lld bytes\n",
+ mic_4k_buffer_size);
}
else {
LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_4K_BUFFER_RESERVE_SIZE");
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "MIC_4K_BUFFER_RESERVE_SIZE is not set\n");
+ }
// memory size reserved for 2M pages for COI buffers
env_var = getenv("MIC_2M_BUFFER_RESERVE_SIZE");
- if (env_var != 0) {
+ if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- MIC_2M_BUFFER_RESERVE_SIZE=%s\n", env_var);
uint64_t new_size;
if (__offload_parse_size_string(env_var, new_size)) {
mic_2m_buffer_size = new_size;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Reserved memory for 2M COI buffers set to %lld bytes\n",
+ mic_2m_buffer_size);
}
else {
- LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_2M_BUFFER_RESERVE_SIZE");
+ LIBOFFLOAD_ERROR(c_invalid_env_var_value,
+ "MIC_2M_BUFFER_RESERVE_SIZE");
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "MIC_2M_BUFFER_RESERVE_SIZE is not set\n");
+ }
// determine stacksize for the pipeline on the device
env_var = getenv("MIC_STACKSIZE");
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- MIC_STACKSIZE=%s\n", env_var);
uint64_t new_size;
if (__offload_parse_size_string(env_var, new_size) &&
(new_size >= 16384) && ((new_size & 4095) == 0)) {
mic_stack_size = new_size;
+ OFFLOAD_DEBUG_TRACE(2, "MIC stack size set to %lld bytes\n",
+ mic_stack_size);
}
else {
LIBOFFLOAD_ERROR(c_mic_init3);
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "MIC_STACKSIZE is not set\n");
+ }
// proxy I/O
env_var = getenv("MIC_PROXY_IO");
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- MIC_PROXY_IO=%s\n", env_var);
int64_t new_val;
if (__offload_parse_int_string(env_var, new_val)) {
mic_proxy_io = new_val;
+ OFFLOAD_DEBUG_TRACE(2, "MIC proxy i/o set to %s\n",
+ mic_proxy_io);
}
else {
LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, "MIC_PROXY_IO");
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "MIC_PROXY_IO is not set\n");
+ }
+
+
env_var = getenv("MIC_PROXY_FS_ROOT");
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- MIC_PROXY_FS_ROOT=%s\n", env_var);
mic_proxy_fs_root = strdup(env_var);
- if (mic_proxy_fs_root == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
+ if (mic_proxy_fs_root == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+ OFFLOAD_DEBUG_TRACE(2, "MIC proxy fs root set to %s\n",
+ mic_proxy_fs_root);
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "MIC_PROXY_FS_ROOT is not set\n");
}
// Prepare environment for the target process using the following
// environment is duplicated.
env_var = getenv("MIC_ENV_PREFIX");
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- MIC_ENV_PREFIX=%s\n", env_var);
mic_env_vars.set_prefix(env_var);
int len = strlen(env_var);
}
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "MIC_ENV_PREFIX is not set\n");
+ }
// create key for thread data
if (thread_key_create(&mic_thread_key, Engine::destroy_thread_data)) {
// cpu frequency
cpu_frequency = COI::PerfGetCycleFrequency();
-
+
env_var = getenv(mic_use_2mb_buffers_envname);
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n",
+ mic_use_2mb_buffers_envname, env_var);
uint64_t new_size;
if (__offload_parse_size_string(env_var, new_size)) {
__offload_use_2mb_buffers = new_size;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Threshold for use of 2M buffers set to %lld\n",
+ __offload_use_2mb_buffers);
}
else {
LIBOFFLOAD_ERROR(c_invalid_env_var_value,
mic_use_2mb_buffers_envname);
}
}
-
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "%s is not set\n", mic_use_2mb_buffers_envname);
+ }
+
env_var = getenv(mic_use_async_buffer_write_envname);
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n",
+ mic_use_async_buffer_write_envname, env_var);
uint64_t new_size;
if (__offload_parse_size_string(env_var, new_size)) {
__offload_use_async_buffer_write = new_size;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Threshold for async buffer write set to %lld\n",
+ __offload_use_async_buffer_write);
}
}
-
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "%s is not set\n",
+ mic_use_async_buffer_write_envname);
+ }
+
env_var = getenv(mic_use_async_buffer_read_envname);
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n",
+ mic_use_async_buffer_read_envname, env_var);
uint64_t new_size;
if (__offload_parse_size_string(env_var, new_size)) {
__offload_use_async_buffer_read = new_size;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Threshold for async buffer read set to %lld\n",
+ __offload_use_async_buffer_read);
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "%s is not set\n",
+ mic_use_async_buffer_read_envname);
+ }
// mic initialization type
env_var = getenv(offload_init_envname);
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n",
+ offload_init_envname, env_var);
if (strcmp(env_var, "on_offload") == 0) {
__offload_init_type = c_init_on_offload;
+ OFFLOAD_DEBUG_TRACE(2,
+ "A MIC device will be initialized "
+ "on first offload to that device\n");
}
else if (strcmp(env_var, "on_offload_all") == 0) {
__offload_init_type = c_init_on_offload_all;
+ OFFLOAD_DEBUG_TRACE(2,
+ "All MIC devices will be initialized "
+ "on first offload to any device\n");
}
else if (strcmp(env_var, "on_start") == 0) {
__offload_init_type = c_init_on_start;
+ OFFLOAD_DEBUG_TRACE(2,
+ "All MIC devices will be initialized "
+ "at program start\n");
}
else {
LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname);
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "%s is not set\n", offload_init_envname);
+ }
// active wait
env_var = getenv(offload_active_wait_envname);
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n",
+ offload_active_wait_envname, env_var);
int64_t new_val;
if (__offload_parse_int_string(env_var, new_val)) {
__offload_active_wait = new_val;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Flag to poll on event completion is set to %d\n",
+ __offload_active_wait);
}
else {
LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
offload_active_wait_envname);
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "%s is not set\n", offload_active_wait_envname);
+ }
+
+ // always wait
+ env_var = getenv(offload_always_wait_envname);
+ if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n",
+ offload_always_wait_envname, env_var);
+ int64_t new_val;
+ if (__offload_parse_int_string(env_var, new_val)) {
+ __offload_always_wait = new_val;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Flag to poll on event completion is set to %d\n",
+ __offload_active_wait);
+ }
+ else {
+ LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
+ offload_always_wait_envname);
+ }
+ }
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "%s is not set\n", offload_always_wait_envname);
+ }
// omp device num
env_var = getenv(omp_device_num_envname);
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n",
+ omp_device_num_envname, env_var);
int64_t new_val;
if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
__omp_device_num = new_val;
+ OFFLOAD_DEBUG_TRACE(2, "OpenMP default device number is set to %d\n",
+ __omp_device_num);
}
else {
LIBOFFLOAD_ERROR(c_omp_invalid_device_num_env,
omp_device_num_envname);
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "%s is not set\n", omp_device_num_envname);
+ }
// parallel copy of offload_transfer
env_var = getenv(parallel_copy_envname);
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n",
+ parallel_copy_envname, env_var);
int64_t new_val;
if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
__offload_parallel_copy = new_val;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Flag for using async buffer copy is set to %d\n",
+ __offload_parallel_copy);
}
else {
LIBOFFLOAD_ERROR(c_invalid_env_var_value,
parallel_copy_envname);
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "%s is not set\n", parallel_copy_envname);
+ }
// use COI interface for noncontiguous arrays transfer
env_var = getenv(use_coi_noncontiguous_transfer_envname);
if (env_var != 0 && *env_var != '\0') {
+ OFFLOAD_DEBUG_TRACE(2, "---- %s=%s\n",
+ use_coi_noncontiguous_transfer_envname, env_var);
uint64_t new_size;
if (__offload_parse_size_string(env_var, new_size)) {
__offload_use_coi_noncontiguous_transfer = new_size;
+ OFFLOAD_DEBUG_TRACE(2,
+ "Flag for using new COI noncontiguous API is set to %d\n",
+ __offload_use_coi_noncontiguous_transfer);
}
else {
LIBOFFLOAD_ERROR(c_invalid_env_var_value,
use_coi_noncontiguous_transfer_envname);
}
}
+ else {
+ OFFLOAD_DEBUG_TRACE(2, "%s is not set\n",
+ use_coi_noncontiguous_transfer_envname);
+ }
+
+ OFFLOAD_DEBUG_TRACE(2, "---- End of environment variable processing\n");
// init ORSL
ORSL::init();
extern "C" bool __offload_register_image(const void *target_image)
{
const struct Image *image = static_cast<const struct Image*>(target_image);
-
- // decode image
- const char *name = image->data;
const void *data = image->data + strlen(image->data) + 1;
uint64_t size = image->size;
- char *origin = (char *) malloc(strlen(image->data) + 1);
uint64_t offset = 0;
- const char *host_name = image->data;
+
+ // decode image
+ const char *fat_name = image->data;
+ char *mic_name = (char *) malloc(strlen(image->data) + 1);
+ char *host_name = (char *) malloc(strlen(image->data));
int i;
- if (origin == NULL)
+ if ((mic_name == NULL) || (host_name == NULL))
LIBOFFLOAD_ERROR(c_malloc);
// The origin name is the name of the file on the host
// use the host file name of the fat binary.
// Driver prepends the host file name ending with "?"
// to the image->data name so need to extract the string
+ // name format: <mic_name>?<origin>
+
+ // Get <mic_name>
i = 0;
- while (*host_name != '\0' && *host_name != '?') {
- origin[i] = *host_name;
- host_name++;
+ while ((*fat_name != '\0') && (*fat_name != '?')) {
+ mic_name[i] = *fat_name;
+ fat_name++;
i++;
}
- origin[i] = '\0';
- // Implies the host name does not exist which really should
- // not occur. Allow this since only consumer is Vtune.
- if ((i == 0) || (*host_name != '?')) {
- free(origin);
- origin = 0;
+
+ // Remove the host file name by inserting end of string marker
+ mic_name[i] = '\0';
+
+ // Get <host_name>
+ if (*fat_name == '?') {
+ // The string following "?" is the name of the host file name.
+ fat_name++;
+ i = 0;
+ while (*fat_name != '\0') {
+ host_name[i] = *fat_name;
+ fat_name++;
+ i++;
+ }
+ host_name[i] = '\0';
+ }
+ else {
+ // Windows current does not have host name
+ free(host_name);
+ host_name = 0;
}
// our actions depend on the image type
const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
switch (hdr->e_type) {
case ET_EXEC:
+ __current_image_is_dll = false;
// Each offload application is supposed to have only one target
// image representing target executable.
// No thread synchronization is required here as the initialization
LIBOFFLOAD_ERROR(c_multiple_target_exes);
exit(1);
}
- __target_exe = new TargetImage(name, data, size, origin, offset);
+ __target_exe = new TargetImage(mic_name, data, size, host_name, offset);
// Registration code for execs is always called from the context
// of main and thus we can safely call any function here,
case ET_DYN:
{
- char *fullname = origin;
- // We add the library to a list of pending libraries
- __target_libs_lock.lock();
- __target_libs = true;
- __target_libs_list.push_back(
- TargetImage(name, data, size, fullname, offset));
- __target_libs_lock.unlock();
- // If __target_exe is set, then main has started running
- // If not main, then we can't do anything useful here
- // because this registration code is called from DllMain
- // context (on windows).
- if (__target_exe != 0) {
- // There is no need to delay loading the library
- if (!__offload_init_library()) {
- // Couldn't validate library as a fat offload library
- LIBOFFLOAD_ERROR(c_unknown_binary_type);
- exit(1);
- }
+ char * fullname = NULL;
+ __current_image_is_dll = true;
+ // We add the library to a list of pending libraries
+ __target_libs_lock.lock();
+ __target_libs = true;
+ __target_libs_list.push_back(
+ TargetImage(mic_name, data, size, fullname, offset));
+ __target_libs_lock.unlock();
+ // If __target_exe is set, then main has started running
+ // If not main, then we can't do anything useful here
+ // because this registration code is called from DllMain
+ // context (on windows).
+ if (__target_exe != 0) {
+ // There is no need to delay loading the library
+ if (!__offload_init_library()) {
+ // Couldn't validate library as a fat offload library
+ LIBOFFLOAD_ERROR(c_unknown_binary_type);
+ exit(1);
}
+ }
return true;
}
}
}
+// When dlopen is used dlclose may happen after the COI process
+// is destroyed. In which case images cannot be unloaded and should
+// be skipped. So track if coi has been unloaded.
+static bool coi_may_have_been_unloaded = false;
+
extern "C" void __offload_unregister_image(const void *target_image)
{
// Target image is packed as follows:
Offload_Timer_Print();
}
+ coi_may_have_been_unloaded = true;
+
+ // Do not unload the MYO library if it loaded in dll.
+ if (!__myo_init_in_so)
+ {
#ifdef MYO_SUPPORT
- __offload_myoFini();
+ __offload_myoFini();
#endif // MYO_SUPPORT
- __offload_fini_library();
+ __offload_fini_library();
+ }
}
- else if (hdr->e_type == ET_DYN) {
+ else if ((hdr->e_type == ET_DYN) && !coi_may_have_been_unloaded) {
for (int i = 0; i < mic_engines_total; i++) {
mic_engines[i].unload_library(data, name);
}
LIBOFFLOAD_ABORT;
}
// if signal is removed by wait completing
- else if (task == SIGNAL_IS_REMOVED) {
+ else if (task == SIGNAL_HAS_COMPLETED) {
return (true);
}
return task->is_signaled();
_Offload_stream handle // stream to destroy
)
{
- __offload_init_library();
-
+ if (Stream::get_streams_count() == 0) {
+ LIBOFFLOAD_ERROR(c_offload_streams_are_absent);
+ LIBOFFLOAD_ABORT;
+ }
// check target value
if (device < 0) {
LIBOFFLOAD_ERROR(c_offload_signaled1, device);
return(true);
}
-int _Offload_stream_completed(int device, _Offload_stream handler)
+int _Offload_stream_delete(
+ _Offload_stream handle // stream to destroy
+ )
{
- __offload_init_library();
+ int device; // MIC device number
+ Stream * stream;
- // check index value
- if (device < 0) {
- LIBOFFLOAD_ERROR(c_offload_signaled1, device);
+ if (Stream::get_streams_count() == 0) {
+ LIBOFFLOAD_ERROR(c_offload_streams_are_absent);
LIBOFFLOAD_ABORT;
}
- device %= mic_engines_total;
+ stream = Stream::find_stream(handle, false);
+ // the stream was not created or was destroyed
+ if (!stream) {
+ LIBOFFLOAD_ERROR(c_offload_no_stream, device);
+ LIBOFFLOAD_ABORT;
+ }
+
+ device = stream->get_device();
+
+ mic_engines[device].stream_destroy(handle);
+
+ return(true);
+}
+int _Offload_stream_completed(int device, _Offload_stream handler)
+{
+ if (Stream::get_streams_count() == 0) {
+ LIBOFFLOAD_ERROR(c_offload_streams_are_absent);
+ LIBOFFLOAD_ABORT;
+ }
+ // check device index value
+ if (device < -1) {
+ LIBOFFLOAD_ERROR(c_offload_signaled1, device);
+ LIBOFFLOAD_ABORT;
+ }
+ else if (device > -1) {
+ device %= mic_engines_total;
+ }
// get stream
Stream * stream;
LIBOFFLOAD_ABORT;
}
+ if (device != stream->get_device()) {
+ LIBOFFLOAD_ERROR(c_offload_device_doesnt_match_to_stream,
+ stream->get_device());
+ LIBOFFLOAD_ABORT;
+ }
// find associated async task
OffloadDescriptor *task = stream->get_last_offload();
for (StreamMap::iterator it = stream_map.begin();
it != stream_map.end(); it++) {
Stream * stream = it->second;
+ if (device != -1 && device != stream->get_device()) {
+ continue;
+ }
// find associated async task
OffloadDescriptor *task = stream->get_last_offload();
// offload was completed by offload_wait pragma or wait clause
if (task == 0) {
- return(true);
+ continue;
}
// if even one stream is not completed result is false
if (!task->is_signaled()) {
}
}
+int _Offload_stream_is_empty(_Offload_stream handle)
+{
+ int device;
+
+ if (Stream::get_streams_count() == 0) {
+ LIBOFFLOAD_ERROR(c_offload_streams_are_absent);
+ LIBOFFLOAD_ABORT;
+ }
+ if (handle != 0) {
+ Stream * stream = Stream::find_stream(handle, false);
+
+ // the stream was not created or was destroyed
+ if (!stream) {
+ LIBOFFLOAD_ERROR(c_offload_no_stream, device);
+ LIBOFFLOAD_ABORT;
+ }
+ device = stream->get_device();
+ }
+ else {
+ device = -1;
+ }
+ // Use 0 for device index as _Offload_stream_completed
+ // ignores this value while defining streams completion
+ return _Offload_stream_completed(device, handle);
+}
+
+int _Offload_device_streams_completed(int device)
+{
+ if (Stream::get_streams_count() == 0) {
+ LIBOFFLOAD_ERROR(c_offload_streams_are_absent);
+ LIBOFFLOAD_ABORT;
+ }
+ // check index value
+ if (device < -1) {
+ LIBOFFLOAD_ERROR(c_offload_signaled1, device);
+ LIBOFFLOAD_ABORT;
+ }
+ else if (device > -1) {
+ device %= mic_engines_total;
+ }
+
+ StreamMap stream_map = Stream::all_streams;
+ for (StreamMap::iterator it = stream_map.begin();
+ it != stream_map.end(); it++)
+ {
+ Stream * stream = it->second;
+
+ if (device != -1 && device != stream->get_device()) {
+ continue;
+ }
+ // find associated async task
+ OffloadDescriptor *task = stream->get_last_offload();
+
+ // offload was completed by offload_wait pragma or wait clause
+ if (task == 0) {
+ continue;
+ }
+ // if even one stream is not completed result is false
+ if (!task->is_signaled()) {
+ return false;
+ }
+ }
+ // no uncompleted streams
+ return true;
+}
+
// IDB support
int __dbg_is_attached = 0;
int __dbg_target_id = -1;
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
m_inout_buf(0),
m_func_desc(0),
m_func_desc_size(0),
+ m_num_in_dependencies(0),
+ m_p_in_dependencies(0),
m_in_deps(0),
m_in_deps_total(0),
m_in_deps_allocated(0),
m_preallocated_alloc(false),
m_traceback_called(false),
m_stream(-1),
+ m_signal(0),
+ m_has_signal(0),
m_omp_async_last_event_type(c_last_not)
{
m_wait_all_devices = index == -1;
return(m_stream);
}
+ Engine& get_device() {
+ return m_device;
+ }
+
+ void* get_signal() {
+ return(m_signal);
+ }
+
+ void set_signal(const void* signal) {
+ m_has_signal = 1;
+ m_signal = const_cast<void*>(signal);
+ }
+
+ void cleanup();
+
+ uint32_t m_event_count;
+ bool m_has_signal;
+
private:
bool offload_wrap(const char *name, bool is_empty,
VarDesc *vars, VarDesc2 *vars2, int vars_total,
bool receive_pointer_data(bool is_async, bool first_run, void * info);
bool scatter_copyout_data();
- void cleanup();
-
bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
int64_t length, bool is_targptr,
bool error_does_not_exist = true);
+
+ void find_device_ptr( int64_t* &device_ptr,
+ void *host_ptr);
+
bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
int64_t length, int64_t alloc_disp, int align,
bool is_targptr, bool is_prealloc, bool pin);
bool create_preallocated_buffer(PtrData* ptr_data, void *base);
bool init_static_ptr_data(PtrData *ptr_data);
bool init_mic_address(PtrData *ptr_data);
- bool offload_stack_memory_manager(const void * stack_begin, int routine_id,
- int buf_size, int align, bool *is_new);
+ bool offload_stack_memory_manager(
+ const void * stack_begin,
+ int routine_id,
+ int buf_size,
+ int align,
+ bool thread_specific_function_locals,
+ bool *is_new);
+ char *get_this_threads_cpu_stack_addr(
+ const void * stack_begin,
+ int routine_id,
+ bool thread_specific_function_locals);
+ PtrData *get_this_threads_mic_stack_addr(
+ const void * stack_begin,
+ int routine_id,
+ bool thread_specific_function_locals);
bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size);
bool gen_var_descs_for_pointer_array(int i);
void report_coi_error(error_types msg, COIRESULT res);
_Offload_result translate_coi_error(COIRESULT res) const;
-
+
void setup_omp_async_info();
+
+ void setup_use_device_ptr(int i);
+
+ void register_event_call_back(void (*)(
+ COIEVENT,
+ const COIRESULT,
+ const void*),
+ const COIEVENT *event,
+ const void *info);
+
void register_omp_event_call_back(const COIEVENT *event, const void *info);
-
+
private:
typedef std::list<COIBUFFER> BufferList;
int64_t cpu_disp;
int64_t cpu_offset;
void *alloc;
- CeanReadRanges *read_rng_src;
+ union {
+ CeanReadRanges *read_rng_src;
+ NonContigDesc *noncont_desc;
+ };
CeanReadRanges *read_rng_dst;
int64_t ptr_arr_offset;
bool is_arr_ptr_el;
OmpAsyncLastEventType omp_last_event_type;
+ int64_t pointer_offset;
+ uint16_t type_src;
+ uint16_t type_dst;
};
template<typename T> class ReadArrElements {
// Buffer for transferring copyin/copyout data
COIBUFFER m_inout_buf;
+
// Dependencies
COIEVENT *m_in_deps;
uint32_t m_in_deps_total;
uint32_t m_in_deps_allocated;
COIEVENT *m_out_deps;
uint32_t m_out_deps_total;
- uint32_t m_out_deps_allocated;
+ uint32_t m_out_deps_allocated;
+
+ // 2 variables defines input dependencies for current COI API.
+ // The calls to routines as BufferWrite/PipelineRunFunction/BufferRead
+ // is supposed to have input dependencies.
+ // 2 variables below defines the number and vector of dependencies
+ // in every current moment of offload.
+ // So any phase of offload can use its values as input dependencies
+ // for the COI API that the phase calls.
+ // It means that all phases (of Write, RunFunction,Read) must keep
+ // the variables correct to be used by following phase.
+ // If some consequent offloads are connected (i.e. by the same stream)
+ // the final 2 variables of the offload is used as initial inputs
+ // for the next offload.
+ uint32_t m_num_in_dependencies;
+ COIEVENT *m_p_in_dependencies;
// Stream
_Offload_stream m_stream;
+ // Signal
+ void* m_signal;
+
// Timer data
OffloadHostTimerData *m_timer_data;
// CPU frequency
DLL_LOCAL extern uint64_t cpu_frequency;
-// LD_LIBRARY_PATH for MIC libraries
-DLL_LOCAL extern char* mic_library_path;
+// LD_LIBRARY_PATH for KNC libraries
+DLL_LOCAL extern char* knc_library_path;
+
+// LD_LIBRARY_PATH for KNL libraries
+DLL_LOCAL extern char* knl_library_path;
// stack size for target
DLL_LOCAL extern uint32_t mic_stack_size;
// target executable
DLL_LOCAL extern TargetImage* __target_exe;
+// is true if last loaded image is dll
+DLL_LOCAL extern bool __current_image_is_dll;
+// is true if myo library is loaded when dll is loaded
+DLL_LOCAL extern bool __myo_init_in_so;
+
// IDB support
// Called by the offload runtime after initialization of offload infrastructure
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
+++ /dev/null
-/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-#if defined(LINUX) || defined(FREEBSD)
-#include <mm_malloc.h>
-#endif
-
-#include "offload_myo_host.h"
-#include <errno.h>
-#include <malloc.h>
-#include "offload_host.h"
-//#include "offload_util.h"
-
-#define MYO_VERSION1 "MYO_1.0"
-
-extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t);
-extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t);
-
-#ifndef TARGET_WINNT
-#pragma weak __cilkrts_cilk_for_32
-#pragma weak __cilkrts_cilk_for_64
-#endif // TARGET_WINNT
-
-static void __offload_myoProcessDeferredTables();
-
-class MyoWrapper {
-public:
- MyoWrapper() : m_lib_handle(0), m_is_available(false)
- {}
-
- bool is_available() const {
- return m_is_available;
- }
-
- bool LoadLibrary(void);
-
- // unloads the library
- void UnloadLibrary(void) {
-// if (m_lib_handle != 0) {
-// DL_close(m_lib_handle);
-// m_lib_handle = 0;
-// }
- }
-
- // Wrappers for MYO client functions
- void LibInit(void *arg, void *func) const {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoinit,
- "%s(%p, %p)\n", __func__, arg, func);
- CheckResult(__func__, m_lib_init(arg, func));
- }
-
- void LibFini(void) const {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myofini, "%s()\n", __func__);
- m_lib_fini();
- }
-
- void* SharedMalloc(size_t size) const {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedmalloc,
- "%s(%lld)\n", __func__, size);
- return m_shared_malloc(size);
- }
-
- void SharedFree(void *ptr) const {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedfree,
- "%s(%p)\n", __func__, ptr);
- m_shared_free(ptr);
- }
-
- void* SharedAlignedMalloc(size_t size, size_t align) const {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedmalloc,
- "%s(%lld, %lld)\n", __func__, size, align);
- return m_shared_aligned_malloc(size, align);
- }
-
- void SharedAlignedFree(void *ptr) const {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedfree,
- "%s(%p)\n", __func__, ptr);
- m_shared_aligned_free(ptr);
- }
-
- void Acquire(void) const {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoacquire,
- "%s()\n", __func__);
- CheckResult(__func__, m_acquire());
- }
-
- void Release(void) const {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myorelease,
- "%s()\n", __func__);
- CheckResult(__func__, m_release());
- }
-
- void HostVarTablePropagate(void *table, int num_entries) const {
- OFFLOAD_DEBUG_TRACE(4, "%s(%p, %d)\n", __func__, table, num_entries);
- CheckResult(__func__, m_host_var_table_propagate(table, num_entries));
- }
-
- void HostFptrTableRegister(void *table, int num_entries,
- int ordered) const {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoregister,
- "%s(%p, %d, %d)\n", __func__, table,
- num_entries, ordered);
- CheckResult(__func__,
- m_host_fptr_table_register(table, num_entries, ordered));
- }
-
- void RemoteThunkCall(void *thunk, void *args, int device) {
- OFFLOAD_DEBUG_TRACE(4, "%s(%p, %p, %d)\n", __func__, thunk, args,
- device);
- CheckResult(__func__, m_remote_thunk_call(thunk, args, device));
- }
-
- MyoiRFuncCallHandle RemoteCall(const char *func, void *args, int device) const {
- OFFLOAD_DEBUG_TRACE(4, "%s(%s, %p, %d)\n", __func__, func, args,
- device);
- return m_remote_call(func, args, device);
- }
-
- void GetResult(MyoiRFuncCallHandle handle) const {
- OFFLOAD_DEBUG_TRACE(4, "%s(%p)\n", __func__, handle);
- CheckResult(__func__, m_get_result(handle));
- }
-
- bool PostInitFuncSupported() const {
- OFFLOAD_DEBUG_TRACE(4, "%s()\n", __func__);
- if (m_feature_available) {
- return m_feature_available(MYO_FEATURE_POST_LIB_INIT) ==
- MYO_SUCCESS;
- } else {
- return false;
- }
- }
-
- void CreateVtableArena();
-
- MyoArena GetVtableArena()const {
- return m_vtable_arena;
- }
-
- void ArenaCreate(
- MyoOwnershipType ownership,
- int consistency,
- MyoArena* arena
- ) const
- {
- OFFLOAD_DEBUG_TRACE(4, "%s(%d, %d, %p)\n",
- __func__, ownership, consistency, arena);
- CheckResult(__func__, m_arena_create(ownership, consistency, arena));
- }
-
- void* SharedAlignedArenaMalloc(
- MyoArena arena,
- size_t size,
- size_t align
- ) const
- {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedarenamalloc,
- "%s(%u, %lld, %lld)\n",
- __func__, arena, size, align);
- return m_arena_aligned_malloc(arena, size, align);
- }
-
- void* SharedAlignedArenaFree(
- MyoArena arena,
- void* ptr
- ) const
- {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedarenafree,
- "%s(%u, %p)\n", __func__, arena, ptr);
- return m_arena_aligned_free(arena, ptr);
- }
-
- void ArenaAcquire(
- MyoArena arena
- ) const
- {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoarenaacquire,
- "%s()\n", __func__);
- CheckResult(__func__, m_arena_acquire(arena));
- }
-
- void ArenaRelease(
- MyoArena arena
- ) const
- {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoarenarelease,
- "%s()\n", __func__);
- CheckResult(__func__, m_arena_release(arena));
- }
-
-private:
- void CheckResult(const char *func, MyoError error) const {
- if (error != MYO_SUCCESS) {
- LIBOFFLOAD_ERROR(c_myowrapper_checkresult, func, error);
- exit(1);
- }
- }
-
-private:
- void* m_lib_handle;
- bool m_is_available;
- int m_post_init_func;
- MyoArena m_vtable_arena;
-
- // pointers to functions from myo library
- MyoError (*m_lib_init)(void*, void*);
- void (*m_lib_fini)(void);
- void* (*m_shared_malloc)(size_t);
- void (*m_shared_free)(void*);
- void* (*m_shared_aligned_malloc)(size_t, size_t);
- void (*m_shared_aligned_free)(void*);
- MyoError (*m_acquire)(void);
- MyoError (*m_release)(void);
- MyoError (*m_host_var_table_propagate)(void*, int);
- MyoError (*m_host_fptr_table_register)(void*, int, int);
- MyoError (*m_remote_thunk_call)(void*, void*, int);
- MyoiRFuncCallHandle (*m_remote_call)(const char*, void*, int);
- MyoError (*m_get_result)(MyoiRFuncCallHandle);
- MyoError (*m_arena_create)(MyoOwnershipType, int, MyoArena*);
- void* (*m_arena_aligned_malloc)(MyoArena, size_t, size_t);
- void* (*m_arena_aligned_free)(MyoArena, void*);
- MyoError (*m_arena_acquire)(MyoArena);
- MyoError (*m_arena_release)(MyoArena);
- // Placeholder until MYO headers support enum type for feature
- MyoError (*m_feature_available)(int feature);
-};
-
-DLL_LOCAL bool MyoWrapper::LoadLibrary(void)
-{
-#ifndef TARGET_WINNT
- const char *lib_name = "libmyo-client.so";
-#else // TARGET_WINNT
- const char *lib_name = "myo-client.dll";
-#endif // TARGET_WINNT
-
- OFFLOAD_DEBUG_TRACE(2, "Loading MYO library %s ...\n", lib_name);
-
- m_lib_handle = DL_open(lib_name);
- if (m_lib_handle == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to load the library. errno = %d\n",
- errno);
- return false;
- }
-
- m_lib_init = (MyoError (*)(void*, void*))
- DL_sym(m_lib_handle, "myoiLibInit", MYO_VERSION1);
- if (m_lib_init == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoiLibInit");
- UnloadLibrary();
- return false;
- }
-
- m_lib_fini = (void (*)(void))
- DL_sym(m_lib_handle, "myoiLibFini", MYO_VERSION1);
- if (m_lib_fini == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoiLibFini");
- UnloadLibrary();
- return false;
- }
-
- m_shared_malloc = (void* (*)(size_t))
- DL_sym(m_lib_handle, "myoSharedMalloc", MYO_VERSION1);
- if (m_shared_malloc == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoSharedMalloc");
- UnloadLibrary();
- return false;
- }
-
- m_shared_free = (void (*)(void*))
- DL_sym(m_lib_handle, "myoSharedFree", MYO_VERSION1);
- if (m_shared_free == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoSharedFree");
- UnloadLibrary();
- return false;
- }
-
- m_shared_aligned_malloc = (void* (*)(size_t, size_t))
- DL_sym(m_lib_handle, "myoSharedAlignedMalloc", MYO_VERSION1);
- if (m_shared_aligned_malloc == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoSharedAlignedMalloc");
- UnloadLibrary();
- return false;
- }
-
- m_shared_aligned_free = (void (*)(void*))
- DL_sym(m_lib_handle, "myoSharedAlignedFree", MYO_VERSION1);
- if (m_shared_aligned_free == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoSharedAlignedFree");
- UnloadLibrary();
- return false;
- }
-
- m_acquire = (MyoError (*)(void))
- DL_sym(m_lib_handle, "myoAcquire", MYO_VERSION1);
- if (m_acquire == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoAcquire");
- UnloadLibrary();
- return false;
- }
-
- m_release = (MyoError (*)(void))
- DL_sym(m_lib_handle, "myoRelease", MYO_VERSION1);
- if (m_release == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoRelease");
- UnloadLibrary();
- return false;
- }
-
- m_host_var_table_propagate = (MyoError (*)(void*, int))
- DL_sym(m_lib_handle, "myoiHostVarTablePropagate", MYO_VERSION1);
- if (m_host_var_table_propagate == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoiHostVarTablePropagate");
- UnloadLibrary();
- return false;
- }
-
- m_host_fptr_table_register = (MyoError (*)(void*, int, int))
- DL_sym(m_lib_handle, "myoiHostFptrTableRegister", MYO_VERSION1);
- if (m_host_fptr_table_register == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoiHostFptrTableRegister");
- UnloadLibrary();
- return false;
- }
-
- m_remote_thunk_call = (MyoError (*)(void*, void*, int))
- DL_sym(m_lib_handle, "myoiRemoteThunkCall", MYO_VERSION1);
- if (m_remote_thunk_call == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoiRemoteThunkCall");
- UnloadLibrary();
- return false;
- }
-
- m_remote_call = (MyoiRFuncCallHandle (*)(const char*, void*, int))
- DL_sym(m_lib_handle, "myoiRemoteCall", MYO_VERSION1);
- if (m_remote_call == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoiRemoteCall");
- UnloadLibrary();
- return false;
- }
-
- m_get_result = (MyoError (*)(MyoiRFuncCallHandle))
- DL_sym(m_lib_handle, "myoiGetResult", MYO_VERSION1);
- if (m_get_result == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoiGetResult");
- UnloadLibrary();
- return false;
- }
-
- m_arena_create = (MyoError (*)(MyoOwnershipType, int, MyoArena*))
- DL_sym(m_lib_handle, "myoArenaCreate", MYO_VERSION1);
- if (m_arena_create == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoArenaCreate");
- UnloadLibrary();
- return false;
- }
-
- m_arena_aligned_malloc = (void* (*)(MyoArena, size_t, size_t))
- DL_sym(m_lib_handle, "myoArenaAlignedMalloc", MYO_VERSION1);
- if (m_arena_aligned_malloc == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoArenaAlignedMalloc");
- UnloadLibrary();
- return false;
- }
-
- m_arena_aligned_free = (void* (*)(MyoArena, void*))
- DL_sym(m_lib_handle, "myoArenaAlignedFree", MYO_VERSION1);
- if (m_arena_aligned_free == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoArenaAlignedFree");
- UnloadLibrary();
- return false;
- }
-
- m_arena_acquire = (MyoError (*)(MyoArena))
- DL_sym(m_lib_handle, "myoArenaAcquire", MYO_VERSION1);
- if (m_acquire == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoArenaAcquire");
- UnloadLibrary();
- return false;
- }
-
- m_arena_release = (MyoError (*)(MyoArena))
- DL_sym(m_lib_handle, "myoArenaRelease", MYO_VERSION1);
- if (m_release == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoArenaRelease");
- UnloadLibrary();
- return false;
- }
-
- // Check for "feature-available" API added in MPSS 3.3.
- // Not finding it is not an error.
- m_feature_available = (MyoError (*)(int))
- DL_sym(m_lib_handle, "myoiSupportsFeature", MYO_VERSION1);
- if (m_feature_available == 0) {
- OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
- "myoiSupportsFeature");
- }
-
- OFFLOAD_DEBUG_TRACE(2, "The library was successfully loaded\n");
-
- // Create arena if supported
- CreateVtableArena();
- OFFLOAD_DEBUG_TRACE(3, "Vtable arena created\n");
-
- m_is_available = true;
-
- return true;
-}
-
-static bool myo_is_available;
-static MyoWrapper myo_wrapper;
-
-void MyoWrapper::CreateVtableArena()
-{
- MyoArena* vtable_arena;
-
- // Check if this MYO supports arenas for vtables
- if (myo_wrapper.PostInitFuncSupported()) {
- // Create arena for vtables
- vtable_arena = (MyoArena *)myo_wrapper.SharedMalloc(sizeof(MyoArena));
- myo_wrapper.ArenaCreate(
- MYO_ARENA_OURS, MYO_NO_CONSISTENCY, vtable_arena);
- m_vtable_arena = *vtable_arena;
- OFFLOAD_DEBUG_TRACE(4, "created arena = %d\n", m_vtable_arena);
- } else {
- m_vtable_arena = 0;
- }
-}
-
-struct MyoTable
-{
- MyoTable(SharedTableEntry *tab, int len) : var_tab(tab), var_tab_len(len)
- {}
-
- SharedTableEntry* var_tab;
- int var_tab_len;
-};
-
-typedef std::list<MyoTable> MyoTableList;
-static MyoTableList __myo_table_list;
-static mutex_t __myo_table_lock;
-static bool __myo_tables = false;
-
-static void __offload_myo_shared_vtable_process(SharedTableEntry *entry);
-static void __offload_myo_shared_table_process(SharedTableEntry *entry);
-static void __offload_myo_shared_init_table_process(InitTableEntry* entry);
-static void __offload_myo_fptr_table_process(FptrTableEntry *entry);
-static void __offload_propagate_shared_vars();
-
-static void __offload_myoLoadLibrary_once(void)
-{
- if (__offload_init_library()) {
- myo_wrapper.LoadLibrary();
- }
-}
-
-static bool __offload_myoLoadLibrary(void)
-{
- OFFLOAD_DEBUG_TRACE(4, "__offload_myoLoadLibrary\n");
- static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
- __offload_run_once(&ctrl, __offload_myoLoadLibrary_once);
-
- return myo_wrapper.is_available();
-}
-
-static void __offload_myoInit_once(void)
-{
- if (!__offload_myoLoadLibrary()) {
- return;
- }
-
- // initialize all devices
- for (int i = 0; i < mic_engines_total; i++) {
- mic_engines[i].init();
- }
-
- // load and initialize MYO library
- OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n");
-
- COIEVENT events[MIC_ENGINES_MAX];
-
- // One entry per device +
- // A pair of entries for the Host postInit func +
- // A pair of entries for the MIC postInit func +
- // end marker
- MyoiUserParams params[MIC_ENGINES_MAX+5];
-
- // Load target library to all devices and
- // create libinit parameters for all devices
- for (int i = 0; i < mic_engines_total; i++) {
- mic_engines[i].init_myo(&events[i]);
-
- params[i].type = MYOI_USERPARAMS_DEVID;
- params[i].nodeid = mic_engines[i].get_physical_index() + 1;
- OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
- i, params[i].type, params[i].nodeid);
- }
-
- // Check if V2 myoLibInit is available
- if (myo_wrapper.PostInitFuncSupported()) {
- // Set the host post libInit function indicator
- params[mic_engines_total].type =
- MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
- params[mic_engines_total].nodeid =
- MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_HOST_NODE;
- OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
- mic_engines_total,
- params[mic_engines_total].type, params[mic_engines_total].nodeid);
-
- // Set the host post libInit host function address
- ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))->
- postLibInitHostFuncAddress =
- (void (*)())&__offload_propagate_shared_vars;
- OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %p }\n",
- mic_engines_total+1,
- ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))->
- postLibInitHostFuncAddress);
-
- // Set the target post libInit function indicator
- params[mic_engines_total+2].type =
- MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC;
- params[mic_engines_total+2].nodeid =
- MYOI_USERPARAMS_POST_MYO_LIB_INIT_FUNC_ALL_NODES;
-
- // Set the target post libInit target function name
- ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+3]))->
- postLibInitRemoveFuncName = "--vtable_initializer--";
- OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %s }\n",
- mic_engines_total+3,
- ((MyoiUserParamsPostLibInit*)(¶ms[mic_engines_total+1]))->
- postLibInitRemoveFuncName);
-
- params[mic_engines_total+4].type = MYOI_USERPARAMS_LAST_MSG;
- params[mic_engines_total+4].nodeid = 0;
- OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
- mic_engines_total+4,
- params[mic_engines_total+4].type,
- params[mic_engines_total+4].nodeid);
- } else {
- params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG;
- params[mic_engines_total].nodeid = 0;
- OFFLOAD_DEBUG_TRACE(2, "params[%d] = { %d, %d }\n",
- mic_engines_total,
- params[mic_engines_total].type, params[mic_engines_total].nodeid);
- }
-
- // initialize myo runtime on host
- myo_wrapper.LibInit(params, 0);
-
- // wait for the target init calls to finish
- COIRESULT res;
- res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_event_wait, res);
- exit(1);
- }
-
- myo_is_available = true;
- OFFLOAD_DEBUG_TRACE(2, "setting myo_is_available=%d\n", myo_is_available);
-
- OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n");
-}
-
-static bool __offload_myoInit(void)
-{
- static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
- __offload_run_once(&ctrl, __offload_myoInit_once);
-
- // Check if using V1 myoLibInit
- if (!myo_wrapper.PostInitFuncSupported()) {
- __offload_propagate_shared_vars();
- }
-
- return myo_is_available;
-}
-
-static void __offload_propagate_shared_vars()
-{
- // Propagate pending shared var tables
- if (__myo_tables) {
- mutex_locker_t locker(__myo_table_lock);
-
- if (__myo_tables) {
- // Give tables with MYO so it can propagate to target
- for(MyoTableList::const_iterator it = __myo_table_list.begin();
- it != __myo_table_list.end(); ++it) {
-#ifdef TARGET_WINNT
- for (SharedTableEntry *entry = it->var_tab;
- entry->varName != MYO_TABLE_END_MARKER(); entry++) {
- if (entry->varName == 0) {
- continue;
- }
- myo_wrapper.HostVarTablePropagate(entry, 1);
- OFFLOAD_DEBUG_TRACE(2, "HostVarTablePropagate(%s, 1)\n",
- entry->varName);
- }
-#else // TARGET_WINNT
- myo_wrapper.HostVarTablePropagate(it->var_tab,
- it->var_tab_len);
-#endif // TARGET_WINNT
- }
-
- __myo_table_list.clear();
- __myo_tables = false;
- }
- }
-}
-
-static bool shared_table_entries(
- SharedTableEntry *entry
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
-
- for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
-#ifdef TARGET_WINNT
- if (entry->varName == 0) {
- continue;
- }
-#endif // TARGET_WINNT
-
- return true;
- }
-
- return false;
-}
-
-static bool fptr_table_entries(
- FptrTableEntry *entry
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
-
- for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
-#ifdef TARGET_WINNT
- if (entry->funcName == 0) {
- continue;
- }
-#endif // TARGET_WINNT
-
- return true;
- }
-
- return false;
-}
-
-extern "C" void __offload_myoRegisterTables(
- InitTableEntry* init_table,
- SharedTableEntry *shared_table,
- FptrTableEntry *fptr_table
-)
-{
- // check whether we need to initialize MYO library. It is
- // initialized only if at least one myo table is not empty
- if (shared_table_entries(shared_table) || fptr_table_entries(fptr_table)) {
- // make sure myo library is loaded
- __offload_myoLoadLibrary();
-
- // register tables
- __offload_myo_shared_table_process(shared_table);
- __offload_myo_fptr_table_process(fptr_table);
- __offload_myo_shared_init_table_process(init_table);
- }
-}
-
-extern "C" bool __offload_myoProcessTables(
- const void* image,
- MYOInitTableList::Node *init_table,
- MYOVarTableList::Node *shared_table,
- MYOVarTableList::Node *shared_vtable,
- MYOFuncTableList::Node *fptr_table
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
-
- // Collect the tables in this .dll/.so
- __offload_myoRegisterTables1(
- init_table, shared_table, shared_vtable, fptr_table);
-
- // Now check what type of module we are dealing with
- if (__offload_target_image_is_executable(image)) {
- OFFLOAD_DEBUG_TRACE(2, "Main encountered\n");
- OFFLOAD_DEBUG_TRACE(2, "MYO initialization not deferred\n");
- // MYO tables across dlls have been collected
- // Now init MYO and process the tables
- __offload_myoProcessDeferredTables();
- // Return true to indicate that atexit needs to be calld by ofldbegin
- return true;
- } else {
- // This is a shared library, either auto-loaded or dynamically loaded
- // If __target_exe is set, then main has started running
- if (__target_exe != 0) {
- // Main is running: this is a dynamic load of a shared library
- // Finish processing the tables in this library
- OFFLOAD_DEBUG_TRACE(2,
- "Dynamically loaded shared library encountered\n");
- OFFLOAD_DEBUG_TRACE(2,
- "MYO initialization not deferred\n");
- __offload_myoProcessDeferredTables();
- } else {
- // Main is not running: this is an auto-loaded shared library
- // Tables have been collected, nothing else to do
- OFFLOAD_DEBUG_TRACE(2,
- "Auto-loaded shared library encountered\n");
- OFFLOAD_DEBUG_TRACE(2, "Deferring initialization of MYO\n");
- }
- return false;
- }
-}
-
-// Process contents of all Var tables
-void MYOVarTableList::process()
-{
- OFFLOAD_DEBUG_TRACE(2, "Process MYO Var tables:\n");
-
- m_lock.lock();
-
- for (Node *n = m_head; n != 0; n = n->next) {
- __offload_myo_shared_table_process(
- (SharedTableEntry*)n->table.entries);
- }
- for (Node *n = m_head; n != 0; n = n->next) {
- remove_table(n);
- }
-
- m_lock.unlock();
-}
-
-// Process contents of all Var tables
-void MYOVarTableList::process_vtable()
-{
- OFFLOAD_DEBUG_TRACE(2, "Process MYO Vtable tables:\n");
-
- m_lock.lock();
-
- for (Node *n = m_head; n != 0; n = n->next) {
- __offload_myo_shared_vtable_process(
- (SharedTableEntry*)n->table.entries);
- }
- for (Node *n = m_head; n != 0; n = n->next) {
- remove_table(n);
- }
-
- m_lock.unlock();
-}
-
-// Process contents of all Func tables
-void MYOFuncTableList::process()
-{
- OFFLOAD_DEBUG_TRACE(2, "Process MYO Func tables:\n");
-
- m_lock.lock();
-
- for (Node *n = m_head; n != 0; n = n->next) {
- __offload_myo_fptr_table_process(
- (FptrTableEntry*)n->table.entries);
- }
- for (Node *n = m_head; n != 0; n = n->next) {
- remove_table(n);
- }
-
- m_lock.unlock();
-}
-
-// Process contents of all Init tables
-void MYOInitTableList::process()
-{
- OFFLOAD_DEBUG_TRACE(2, "Process MYO Init tables:\n");
-
- m_lock.lock();
-
- for (Node *n = m_head; n != 0; n = n->next) {
- __offload_myo_shared_init_table_process(
- (InitTableEntry*)n->table.entries);
- }
- for (Node *n = m_head; n != 0; n = n->next) {
- remove_table(n);
- }
-
- m_lock.unlock();
-}
-
-static void __offload_myoProcessDeferredTables()
-{
- OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__);
-
- // Debug dumps of MYO tables
- if (console_enabled >= 2) {
- __offload_myo_var_tables.dump();
- __offload_myo_vtable_tables.dump();
- __offload_myo_func_tables.dump();
- __offload_myo_init_tables.dump();
- }
-
- if (!__offload_myo_var_tables.is_empty() ||
- !__offload_myo_vtable_tables.is_empty() ||
- !__offload_myo_func_tables.is_empty() ||
- !__offload_myo_init_tables.is_empty())
- {
- OFFLOAD_DEBUG_TRACE(3, "MYO usage detected in program\n");
-
- // Make sure myo library is loaded
- __offload_myoLoadLibrary();
- OFFLOAD_DEBUG_TRACE(3, "Initialized MYO\n");
-
- __offload_myo_var_tables.process();
- __offload_myo_vtable_tables.process_vtable();
- __offload_myo_func_tables.process();
- __offload_myo_init_tables.process();
- OFFLOAD_DEBUG_TRACE(3, "Finished processing MYO tables\n");
- } else {
- OFFLOAD_DEBUG_TRACE(3,
- "MYO tables are empty; Will not initialize MYO\n");
- }
-}
-
-DLL_LOCAL void __offload_myoFini(void)
-{
- if (myo_is_available) {
- OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
-
- COIEVENT events[MIC_ENGINES_MAX];
-
- // kick off myoiLibFini calls on all devices
- for (int i = 0; i < mic_engines_total; i++) {
- mic_engines[i].fini_myo(&events[i]);
- }
-
- // cleanup myo runtime on host
- myo_wrapper.LibFini();
-
- // wait for the target fini calls to finish
- COIRESULT res;
- res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0);
- if (res != COI_SUCCESS) {
- LIBOFFLOAD_ERROR(c_event_wait, res);
- exit(1);
- }
- }
-}
-
-static void __offload_myo_shared_table_process(
- SharedTableEntry *entry
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
-
- SharedTableEntry *start = entry;
- int entries = 0;
-
- // allocate shared memory for vars
- for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
-#ifdef TARGET_WINNT
- if (entry->varName == 0) {
- OFFLOAD_DEBUG_TRACE(4,
- "skip registering a NULL MyoSharedTable entry\n");
- continue;
- }
-#endif // TARGET_WINNT
-
- OFFLOAD_DEBUG_TRACE(4, "registering MyoSharedTable entry for %s @%p\n",
- entry->varName, entry);
-
- // Invoke the function to create shared memory
- reinterpret_cast<void(*)(void)>(entry->sharedAddr)();
- entries++;
- }
-
- // and table to the list if it is not empty
- if (entries > 0) {
- mutex_locker_t locker(__myo_table_lock);
- __myo_table_list.push_back(MyoTable(start, entries));
- __myo_tables = true;
- }
-}
-
-static void __offload_myo_shared_vtable_process(
- SharedTableEntry *entry
-)
-{
- SharedTableEntry *start = entry;
- int entries = 0;
-
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
-
- // allocate shared memory for vtables
- for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
-#ifdef TARGET_WINNT
- if (entry->varName == 0) {
- OFFLOAD_DEBUG_TRACE(4,
- "skip registering a NULL MyoSharedVTable entry\n");
- continue;
- }
-#endif // TARGET_WINNT
-
- OFFLOAD_DEBUG_TRACE(4,
- "registering MyoSharedVTable entry for %s @%p\n",
- entry->varName, entry);
-
- // Invoke the function to create shared memory
- reinterpret_cast<void(*)(MyoArena)>(entry->sharedAddr)(
- myo_wrapper.GetVtableArena());
- entries++;
- }
-
- // add table to the list if it is not empty
- if (entries > 0) {
- mutex_locker_t locker(__myo_table_lock);
- __myo_table_list.push_back(MyoTable(start, entries));
- __myo_tables = true;
- }
-}
-
-void __offload_myo_shared_init_table_process(InitTableEntry* entry)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
-
-#ifdef TARGET_WINNT
- for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
- if (entry->funcName == 0) {
- OFFLOAD_DEBUG_TRACE(4,
- "skip registering a NULL MyoSharedInit entry\n");
- continue;
- }
-
- // Invoke the function to init the shared memory
- OFFLOAD_DEBUG_TRACE(4, "execute MyoSharedInit routine for %s\n",
- entry->funcName);
- entry->func(myo_wrapper.GetVtableArena());
- }
-#else // TARGET_WINNT
- for (; entry->func != 0; entry++) {
- // Invoke the function to init the shared memory
- entry->func(myo_wrapper.GetVtableArena());
- }
-#endif // TARGET_WINNT
-}
-
-static void __offload_myo_fptr_table_process(
- FptrTableEntry *entry
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
-
- FptrTableEntry *start = entry;
- int entries = 0;
-
- for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
-#ifdef TARGET_WINNT
- if (entry->funcName == 0) {
- OFFLOAD_DEBUG_TRACE(4,
- "skip registering a NULL MyoFptrTable entry\n");
- continue;
- }
-#endif // TARGET_WINNT
-
- if (!myo_wrapper.is_available()) {
- *(static_cast<void**>(entry->localThunkAddr)) = entry->funcAddr;
- }
-
- OFFLOAD_DEBUG_TRACE(4, "registering MyoFptrTable entry for %s @%p\n",
- entry->funcName, entry);
-
-#ifdef TARGET_WINNT
- if (myo_wrapper.is_available()) {
- myo_wrapper.HostFptrTableRegister(entry, 1, false);
- }
-#endif // TARGET_WINNT
-
- entries++;
- }
-
-#ifndef TARGET_WINNT
- if (myo_wrapper.is_available() && entries > 0) {
- myo_wrapper.HostFptrTableRegister(start, entries, false);
- }
-#endif // TARGET_WINNT
-}
-
-extern "C" int __offload_myoIsAvailable(int target_number)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%d)\n", __func__, target_number);
-
- if (target_number >= -2) {
- bool is_default_number = (target_number == -2);
-
- if (__offload_myoInit()) {
- if (target_number >= 0) {
- // User provided the device number
- int num = target_number % mic_engines_total;
-
- // reserve device in ORSL
- target_number = ORSL::reserve(num) ? num : -1;
- }
- else {
- // try to use device 0
- target_number = ORSL::reserve(0) ? 0 : -1;
- }
-
- // make sure device is initialized
- if (target_number >= 0) {
- mic_engines[target_number].init();
- }
- }
- else {
- // fallback to CPU
- target_number = -1;
- }
-
- if (target_number < 0 && !is_default_number) {
- LIBOFFLOAD_ERROR(c_device_is_not_available);
- exit(1);
- }
- }
- else {
- LIBOFFLOAD_ERROR(c_invalid_device_number);
- exit(1);
- }
-
- return target_number;
-}
-
-extern "C" void __offload_myoiRemoteIThunkCall(
- void *thunk,
- void *arg,
- int target_number
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p, %p, %d)\n", __func__, thunk, arg,
- target_number);
-
- myo_wrapper.Release();
- myo_wrapper.RemoteThunkCall(thunk, arg, target_number);
- myo_wrapper.Acquire();
-
- ORSL::release(target_number);
-}
-
-extern "C" void* _Offload_shared_malloc(size_t size)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__, size);
-
- if (__offload_myoLoadLibrary()) {
- return myo_wrapper.SharedMalloc(size);
- }
- else {
- return malloc(size);
- }
-}
-
-extern "C" void _Offload_shared_free(void *ptr)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
-
- if (__offload_myoLoadLibrary()) {
- myo_wrapper.SharedFree(ptr);
- }
- else {
- free(ptr);
- }
-}
-
-extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align);
-
- if (__offload_myoLoadLibrary()) {
- return myo_wrapper.SharedAlignedMalloc(size, align);
- }
- else {
- if (align < sizeof(void*)) {
- align = sizeof(void*);
- }
- return _mm_malloc(size, align);
- }
-}
-
-extern "C" void _Offload_shared_aligned_free(void *ptr)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
-
- if (__offload_myoLoadLibrary()) {
- myo_wrapper.SharedAlignedFree(ptr);
- }
- else {
- _mm_free(ptr);
- }
-}
-
-extern "C" void _Offload_shared_arena_create(
- MyoOwnershipType ownership,
- int consistency,
- MyoArena* arena
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%d, %d, %p)\n",
- __func__, ownership, consistency, arena);
-
- if (__offload_myoLoadLibrary()) {
- myo_wrapper.ArenaCreate(ownership, consistency, arena);
- }
-}
-
-extern "C" void* _Offload_shared_aligned_arena_malloc(
- MyoArena arena,
- size_t size,
- size_t align
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%u, %lld, %lld)\n",
- __func__, arena, size, align);
-
- if (__offload_myoLoadLibrary()) {
- void *p = myo_wrapper.SharedAlignedArenaMalloc(arena, size, align);
- OFFLOAD_DEBUG_TRACE(3, "%s(%u, %lld, %lld)->%p\n",
- __func__, arena, size, align, p);
- return p;
- }
- else {
- if (align < sizeof(void*)) {
- align = sizeof(void*);
- }
- return _mm_malloc(size, align);
- }
-}
-
-extern "C" void _Offload_shared_aligned_arena_free(
- MyoArena arena,
- void *ptr
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%u, %p)\n", __func__, arena, ptr);
-
- if (__offload_myoLoadLibrary()) {
- myo_wrapper.SharedAlignedArenaFree(arena, ptr);
- }
- else {
- _mm_free(ptr);
- }
-}
-
-extern "C" void _Offload_shared_arena_acquire(
- MyoArena arena
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
-
- if (__offload_myoLoadLibrary()) {
- myo_wrapper.ArenaAcquire(arena);
- }
-}
-
-extern "C" void _Offload_shared_arena_release(
- MyoArena arena
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
-
- if (__offload_myoLoadLibrary()) {
- myo_wrapper.ArenaRelease(arena);
- }
-}
-
-extern "C" void __intel_cilk_for_32_offload(
- int size,
- void (*copy_constructor)(void*, void*),
- int target_number,
- void *raddr,
- void *closure_object,
- unsigned int iters,
- unsigned int grain_size)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
-
- target_number = __offload_myoIsAvailable(target_number);
- if (target_number >= 0) {
- struct S {
- void *M1;
- unsigned int M2;
- unsigned int M3;
- char closure[];
- } *args;
-
- args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size);
- if (args == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- args->M1 = raddr;
- args->M2 = iters;
- args->M3 = grain_size;
-
- if (copy_constructor == 0) {
- memcpy(args->closure, closure_object, size);
- }
- else {
- copy_constructor(args->closure, closure_object);
- }
-
- myo_wrapper.Release();
- myo_wrapper.GetResult(
- myo_wrapper.RemoteCall("__intel_cilk_for_32_offload",
- args, target_number)
- );
- myo_wrapper.Acquire();
-
- _Offload_shared_free(args);
-
- ORSL::release(target_number);
- }
- else {
- __cilkrts_cilk_for_32(raddr,
- closure_object,
- iters,
- grain_size);
- }
-}
-
-extern "C" void __intel_cilk_for_64_offload(
- int size,
- void (*copy_constructor)(void*, void*),
- int target_number,
- void *raddr,
- void *closure_object,
- uint64_t iters,
- uint64_t grain_size)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
-
- target_number = __offload_myoIsAvailable(target_number);
- if (target_number >= 0) {
- struct S {
- void *M1;
- uint64_t M2;
- uint64_t M3;
- char closure[];
- } *args;
-
- args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size);
- if (args == NULL)
- LIBOFFLOAD_ERROR(c_malloc);
- args->M1 = raddr;
- args->M2 = iters;
- args->M3 = grain_size;
-
- if (copy_constructor == 0) {
- memcpy(args->closure, closure_object, size);
- }
- else {
- copy_constructor(args->closure, closure_object);
- }
-
- myo_wrapper.Release();
- myo_wrapper.GetResult(
- myo_wrapper.RemoteCall("__intel_cilk_for_64_offload", args,
- target_number)
- );
- myo_wrapper.Acquire();
-
- _Offload_shared_free(args);
-
- ORSL::release(target_number);
- }
- else {
- __cilkrts_cilk_for_64(raddr,
- closure_object,
- iters,
- grain_size);
- }
-}
+++ /dev/null
-/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-#ifndef OFFLOAD_MYO_HOST_H_INCLUDED
-#define OFFLOAD_MYO_HOST_H_INCLUDED
-
-#include <myotypes.h>
-#include <myoimpl.h>
-#include <myo.h>
-
-#include "offload.h"
-// undefine the following since offload.h defines them to malloc and free if __INTEL_OFFLOAD
-// is not defined which is the case when building the offload library
-#undef _Offload_shared_malloc
-#undef _Offload_shared_free
-#undef _Offload_shared_aligned_malloc
-#undef _Offload_shared_aligned_free
-#include "offload_table.h"
-
-// This function retained for compatibility with 15.0
-extern "C" void __offload_myoRegisterTables(
- InitTableEntry *init_table,
- SharedTableEntry *shared_table,
- FptrTableEntry *fptr_table
-);
-
-// Process shared variable, shared vtable and function and init routine tables.
-// In .dlls/.sos these will be collected together.
-// In the main program, all collected tables will be processed.
-extern "C" bool __offload_myoProcessTables(
- const void* image,
- MYOInitTableList::Node *init_table,
- MYOVarTableList::Node *shared_table,
- MYOVarTableList::Node *shared_vtable,
- MYOFuncTableList::Node *fptr_table
-);
-
-extern void __offload_myoFini(void);
-extern bool __offload_myo_init_is_deferred(const void *image);
-
-#endif // OFFLOAD_MYO_HOST_H_INCLUDED
+++ /dev/null
-/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-#include "offload_myo_target.h"
-#include "offload_target.h"
-
-extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t);
-extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t);
-
-#pragma weak __cilkrts_cilk_for_32
-#pragma weak __cilkrts_cilk_for_64
-
-static void CheckResult(const char *func, MyoError error) {
- if (error != MYO_SUCCESS) {
- LIBOFFLOAD_ERROR(c_myotarget_checkresult, func, error);
- exit(1);
- }
-}
-
-static void __offload_myo_shared_table_process(SharedTableEntry *entry)
-{
- int entries = 0;
- SharedTableEntry *t_start;
-
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
-
- t_start = entry;
- while (t_start->varName != 0) {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_shared,
- "myo shared entry name = \"%s\" addr = %p\n",
- t_start->varName, t_start->sharedAddr);
- t_start++;
- entries++;
- }
-
- if (entries > 0) {
- OFFLOAD_DEBUG_TRACE(3, "myoiMicVarTableRegister(%p, %d)\n", entry,
- entries);
- CheckResult("myoiMicVarTableRegister",
- myoiMicVarTableRegister(entry, entries));
- }
-}
-
-static void __offload_myo_shared_vtable_process(SharedTableEntry *entry)
-{
- int entries = 0;
- SharedTableEntry *t_start;
-
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
-
- t_start = entry;
- while (t_start->varName != 0) {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_shared,
- "myo shared vtable entry name"
- " = \"%s\" addr = %p\n",
- t_start->varName, t_start->sharedAddr);
- t_start++;
- entries++;
- }
-
- if (entries > 0) {
- OFFLOAD_DEBUG_TRACE(3, "myoiMicVarTableRegister(%p, %d)\n", entry,
- entries);
- CheckResult("myoiMicVarTableRegister",
- myoiMicVarTableRegister(entry, entries));
- }
-}
-
-static void __offload_myo_fptr_table_process(
- FptrTableEntry *entry
-)
-{
- int entries = 0;
- FptrTableEntry *t_start;
-
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
-
- t_start = entry;
- while (t_start->funcName != 0) {
- OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_mic_myo_fptr,
- "myo fptr entry name = \"%s\" addr = %p\n",
- t_start->funcName, t_start->funcAddr);
- t_start++;
- entries++;
- }
-
- if (entries > 0) {
- OFFLOAD_DEBUG_TRACE(3, "myoiTargetFptrTableRegister(%p, %d, 0)\n",
- entry, entries);
- CheckResult("myoiTargetFptrTableRegister",
- myoiTargetFptrTableRegister(entry, entries, 0));
- }
-}
-
-void __offload_myo_shared_init_table_process(InitTableEntry* entry)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
-
- for (; entry->func != 0; entry++) {
- // Invoke the function to init the shared memory
- OFFLOAD_DEBUG_TRACE(3, "Invoked a shared init function @%p\n",
- (void *)(entry->func));
- entry->func();
- }
-}
-
-extern "C" void __offload_myoAcquire(void)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
-
- CheckResult("myoAcquire", myoAcquire());
-}
-
-extern "C" void __offload_myoRelease(void)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
- CheckResult("myoRelease", myoRelease());
-}
-
-extern "C" void __intel_cilk_for_32_offload_wrapper(void *args_)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
-
- struct S {
- void *M1;
- unsigned int M2;
- unsigned int M3;
- char closure[];
- } *args = (struct S*) args_;
-
- __cilkrts_cilk_for_32(args->M1, args->closure, args->M2, args->M3);
-}
-
-extern "C" void __intel_cilk_for_64_offload_wrapper(void *args_)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
-
- struct S {
- void *M1;
- uint64_t M2;
- uint64_t M3;
- char closure[];
- } *args = (struct S*) args_;
-
- __cilkrts_cilk_for_64(args->M1, args->closure, args->M2, args->M3);
-}
-
-static void __offload_myo_once_init(void)
-{
- CheckResult("myoiRemoteFuncRegister",
- myoiRemoteFuncRegister(
- (MyoiRemoteFuncType) __intel_cilk_for_32_offload_wrapper,
- "__intel_cilk_for_32_offload"));
- CheckResult("myoiRemoteFuncRegister",
- myoiRemoteFuncRegister(
- (MyoiRemoteFuncType) __intel_cilk_for_64_offload_wrapper,
- "__intel_cilk_for_64_offload"));
-}
-
-extern "C" void __offload_myoRegisterTables(
- SharedTableEntry *shared_table,
- FptrTableEntry *fptr_table
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
-
- // one time registration of Intel(R) Cilk(TM) language entries
- static pthread_once_t once_control = PTHREAD_ONCE_INIT;
- pthread_once(&once_control, __offload_myo_once_init);
-
- // register module's tables
- if (shared_table->varName == 0 && fptr_table->funcName == 0) {
- return;
- }
-
- __offload_myo_shared_table_process(shared_table);
- __offload_myo_fptr_table_process(fptr_table);
-}
-
-extern "C" void __offload_myoProcessTables(
- InitTableEntry* init_table,
- SharedTableEntry *shared_table,
- SharedTableEntry *shared_vtable,
- FptrTableEntry *fptr_table
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
-
- // one time registration of Intel(R) Cilk(TM) language entries
- static pthread_once_t once_control = PTHREAD_ONCE_INIT;
- pthread_once(&once_control, __offload_myo_once_init);
-
- // register module's tables
- // check slot-1 of the function table because
- // slot-0 is predefined with --vtable_initializer--
- if (shared_table->varName == 0 &&
- shared_vtable->varName == 0 &&
- fptr_table[1].funcName == 0) {
- return;
- }
-
- __offload_myo_shared_table_process(shared_table);
- __offload_myo_shared_vtable_process(shared_vtable);
- __offload_myo_fptr_table_process(fptr_table);
-}
-
-extern "C" void* _Offload_shared_malloc(size_t size)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__, size);
- return myoSharedMalloc(size);
-}
-
-extern "C" void _Offload_shared_free(void *ptr)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
- myoSharedFree(ptr);
-}
-
-extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align);
- return myoSharedAlignedMalloc(size, align);
-}
-
-extern "C" void _Offload_shared_aligned_free(void *ptr)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
- myoSharedAlignedFree(ptr);
-}
-
-extern "C" void* _Offload_shared_aligned_arena_malloc(
- MyoArena arena,
- size_t size,
- size_t align
-)
-{
- OFFLOAD_DEBUG_TRACE(
- 3, "%s(%u, %lld, %lld)\n", __func__, arena, size, align);
-
- return myoArenaAlignedMalloc(arena, size, align);
-}
-
-extern "C" void _Offload_shared_aligned_arena_free(
- MyoArena arena,
- void *ptr
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%u, %p)\n", __func__, arena, ptr);
-
- myoArenaAlignedFree(arena, ptr);
-}
-
-extern "C" void _Offload_shared_arena_acquire(
- MyoArena arena
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
-
- myoArenaAcquire(arena);
-}
-
-extern "C" void _Offload_shared_arena_release(
- MyoArena arena
-)
-{
- OFFLOAD_DEBUG_TRACE(3, "%s(%u)\n", __func__, arena);
-
- myoArenaRelease(arena);
-}
-
-// temporary workaround for blocking behavior of myoiLibInit/Fini calls
-extern "C" void __offload_myoLibInit()
-{
- OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__);
- CheckResult("myoiLibInit", myoiLibInit(0, 0));
-}
-
-extern "C" void __offload_myoLibFini()
-{
- OFFLOAD_DEBUG_TRACE(3, "%s()\n", __func__);
- myoiLibFini();
-}
+++ /dev/null
-/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of Intel Corporation nor the names of its
- contributors may be used to endorse or promote products derived
- from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-#ifndef OFFLOAD_MYO_TARGET_H_INCLUDED
-#define OFFLOAD_MYO_TARGET_H_INCLUDED
-
-
-#include "offload.h"
-// undefine the following since offload.h defines them to malloc and free if __INTEL_OFFLOAD
-// is not defined which is the case when building the offload library
-#undef _Offload_shared_malloc
-#undef _Offload_shared_free
-#undef _Offload_shared_aligned_malloc
-#undef _Offload_shared_aligned_free
-#include "offload_table.h"
-
-// This function retained for compatibility with 15.0
-extern "C" void __offload_myoRegisterTables(
- SharedTableEntry *shared_table,
- FptrTableEntry *fptr_table
-);
-
-// Process shared variable, shared vtable and function and init routine tables.
-// On the target side the contents of the tables are registered with MYO.
-extern "C" void __offload_myoProcessTables(
- InitTableEntry* init_table,
- SharedTableEntry *shared_table,
- SharedTableEntry *shared_vtable,
- FptrTableEntry *fptr_table
-);
-
-extern "C" void __offload_myoAcquire(void);
-extern "C" void __offload_myoRelease(void);
-
-// Call the compiler-generated routines for initializing shared variables.
-// This can only be done after shared memory allocation has been done.
-extern void __offload_myo_shared_init_table_process(InitTableEntry* entry);
-
-// temporary workaround for blocking behavior for myoiLibInit/Fini calls
-extern "C" void __offload_myoLibInit();
-extern "C" void __offload_myoLibFini();
-
-#endif // OFFLOAD_MYO_TARGET_H_INCLUDED
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#include <omp.h>
-#include "offload.h"
+//#include <stdlib.h>
+//#include "offload.h"
#include "compiler_if_host.h"
+
// OpenMP API
void omp_set_default_device(int num) __GOMP_NOTHROW
return mic_engines_total;
}
+// OpenMP 4.5 APIs
+
+// COI supports 3-dim multiD transfers
+#define MAX_ARRAY_RANK 3
+
+int omp_get_initial_device(
+ void
+) __GOMP_NOTHROW
+{
+ return -1;
+}
+
+void* omp_target_alloc(
+ size_t size,
+ int device_num
+) __GOMP_NOTHROW
+{
+ __offload_init_library();
+
+ OFFLOAD_TRACE(2, "omp_target_alloc(%lld, %d)\n", size, device_num);
+
+ if (device_num < -1) {
+ LIBOFFLOAD_ERROR(c_invalid_device_number);
+ exit(1);
+ }
+
+ void* result = 0;
+
+ // malloc on CPU
+ if (device_num == -1) {
+ // We do not check for malloc returning NULL because the
+ // specification of this API includes the possibility of failure.
+ // The user will check the returned result
+ result = malloc(size);
+ return result;
+ }
+
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(
+ TARGET_MIC, device_num, 0, NULL, __func__, 0);
+ if (ofld != 0) {
+ VarDesc vars[2] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].size = sizeof(size);
+ vars[0].count = 1;
+ vars[0].ptr = &size;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_out;
+ vars[1].size = sizeof(result);
+ vars[1].count = 1;
+ vars[1].ptr = &result;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_target_alloc_target",
+ 0, 2, vars, NULL, 0, 0, 0);
+ }
+ return result;
+}
+
+void omp_target_free(
+ void *device_ptr,
+ int device_num
+) __GOMP_NOTHROW
+{
+ __offload_init_library();
+
+ OFFLOAD_TRACE(2, "omp_target_free(%p, %d)\n", device_ptr, device_num);
+
+ if (device_num < -1) {
+ LIBOFFLOAD_ERROR(c_invalid_device_number);
+ exit(1);
+ }
+
+ // free on CPU
+ if (device_num == -1) {
+ free(device_ptr);
+ return;
+ }
+
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(
+ TARGET_MIC, device_num, 0, NULL, __func__, 0);
+ if (ofld) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].size = sizeof(device_ptr);
+ vars[0].count = 1;
+ vars[0].ptr = &device_ptr;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_target_free_target",
+ 0, 1, vars, NULL, 0, 0, 0);
+ }
+}
+
+int omp_target_is_present(
+ void *ptr,
+ int device_num
+) __GOMP_NOTHROW
+{
+ __offload_init_library();
+
+ OFFLOAD_TRACE(2, "omp_target_is_present(%p, %d)\n", ptr, device_num);
+
+ if (device_num < -1) {
+ LIBOFFLOAD_ERROR(c_invalid_device_number);
+ exit(1);
+ }
+
+ if (device_num == -1) {
+ return false;
+ }
+
+ // If OpenMP allows wrap-around for device numbers, enable next line
+ //device_num %= mic_engines_total;
+
+ // lookup existing association in pointer table
+ PtrData* ptr_data = mic_engines[device_num].find_ptr_data(ptr);
+ if (ptr_data == 0) {
+ OFFLOAD_TRACE(3, "Address %p is not mapped on device %d\n",
+ ptr, device_num);
+ return false;
+ }
+
+ OFFLOAD_TRACE(3, "Address %p found mapped on device %d\n",
+ ptr, device_num);
+ return true;
+}
+
+int omp_target_memcpy(
+ void *dst,
+ void *src,
+ size_t length,
+ size_t dst_offset,
+ size_t src_offset,
+ int dst_device,
+ int src_device
+) __GOMP_NOTHROW
+{
+ __offload_init_library();
+
+ OFFLOAD_TRACE(2, "omp_target_memcpy(%p, %p, %lld, %lld, %lld, %d, %d)\n",
+ dst, src, length, dst_offset, src_offset, dst_device, src_device);
+
+ if (dst_device < -1 || src_device < -1) {
+ LIBOFFLOAD_ERROR(c_invalid_device_number);
+ exit(1);
+ }
+
+ char* srcp = (char *)src + src_offset;
+ char* dstp = (char *)dst + dst_offset;
+
+ if (src_device == -1) {
+ // Source is CPU
+ if (dst_device == -1) {
+ // CPU -> CPU
+ memcpy(dstp, srcp, length);
+ return 0;
+ } else {
+ // CPU -> MIC
+ // COIBufferWrite
+ // If OpenMP allows wrap-around for device numbers, enable next line
+ //dst_device %= mic_engines_total;
+
+ OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", dstp);
+ COIBUFFER mic_buf;
+ COIRESULT res = COI::BufferCreateFromMemory(length,
+ COI_BUFFER_NORMAL, COI_SINK_MEMORY, dstp,
+ 1, &mic_engines[dst_device].get_process(),
+ &mic_buf);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
+ return 1;
+ }
+ res = COI::BufferWrite(mic_buf, 0, srcp, length,
+ COI_COPY_UNSPECIFIED, 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_write, res);
+ return 1;
+ }
+ res = COI::BufferDestroy(mic_buf);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_destroy, res);
+ return 1;
+ }
+ return 0;
+ }
+ } else {
+ // Source is device
+ if (dst_device == -1) {
+ // MIC -> CPU
+ // COIBufferRead
+
+ // If OpenMP allows wrap-around for device numbers, enable next line
+ //src_device %= mic_engines_total;
+
+ OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", srcp);
+ COIBUFFER mic_buf;
+ COIRESULT res = COI::BufferCreateFromMemory(length,
+ COI_BUFFER_NORMAL, COI_SINK_MEMORY, srcp,
+ 1, &mic_engines[src_device].get_process(),
+ &mic_buf);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
+ return 1;
+ }
+ res = COI::BufferRead(mic_buf, 0, dstp, length,
+ COI_COPY_UNSPECIFIED, 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_read, res);
+ return 1;
+ }
+ res = COI::BufferDestroy(mic_buf);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_destroy, res);
+ return 1;
+ }
+ return 0;
+ } else {
+ // some MIC -> some MIC
+ if (src_device == dst_device) {
+ // MIC local copy will be done as remote memcpy
+
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(TARGET_MIC, src_device,
+ 0, NULL, __func__, 0);
+ if (ofld) {
+ VarDesc vars[3] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].size = sizeof(dstp);
+ vars[0].count = 1;
+ vars[0].ptr = &dstp;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_in;
+ vars[1].size = sizeof(srcp);
+ vars[1].count = 1;
+ vars[1].ptr = &srcp;
+
+ vars[2].type.src = c_data;
+ vars[2].type.dst = c_data;
+ vars[2].direction.bits = c_parameter_in;
+ vars[2].size = sizeof(length);
+ vars[2].count = 1;
+ vars[2].ptr = &length;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_target_memcpy_target",
+ 0, 3, vars, NULL, 0, 0, 0);
+ return 0;
+ } else {
+ return 1;
+ }
+ } else {
+ // MICx -> MICy
+ // Allocate CPU buffer
+ char *cpu_mem = (char *)malloc(length);
+ if (cpu_mem == 0) {
+ LIBOFFLOAD_ERROR(c_malloc);
+ return 1;
+ }
+ int retval = 1;
+ if (omp_target_memcpy(
+ cpu_mem, srcp, length, 0, 0, -1, src_device) == 0) {
+ retval = omp_target_memcpy(
+ dstp, cpu_mem, length, 0, 0, dst_device, -1);
+ }
+ free(cpu_mem);
+ return retval;
+ }
+ }
+ }
+}
+
+static size_t bytesize_at_this_dimension(
+ size_t element_size,
+ int num_dims,
+ const size_t* dimensions
+)
+{
+ if (num_dims > 1) {
+ return dimensions[1] *
+ bytesize_at_this_dimension(
+ element_size, num_dims-1, dimensions+1);
+ } else {
+ return element_size;
+ }
+}
+
+static void memcpy_rect(
+ char *dst,
+ char *src,
+ size_t element_size,
+ int num_dims,
+ const size_t *volume,
+ const size_t *dst_offsets,
+ const size_t *src_offsets,
+ const size_t *dst_dimensions,
+ const size_t *src_dimensions
+)
+{
+ if (num_dims > 1) {
+ int count = volume[0];
+ int dst_index = dst_offsets[0];
+ int src_index = src_offsets[0];
+ size_t dst_element_size =
+ bytesize_at_this_dimension(element_size, num_dims, dst_dimensions);
+ size_t src_element_size =
+ bytesize_at_this_dimension(element_size, num_dims, src_dimensions);
+ for (; count>0; dst_index++, src_index++, count--) {
+ memcpy_rect(dst+dst_element_size*dst_index,
+ src+src_element_size*src_index,
+ element_size, num_dims-1, volume+1,
+ dst_offsets+1, src_offsets+1,
+ dst_dimensions+1, src_dimensions+1);
+ }
+ } else {
+ memcpy(dst+dst_offsets[0]*element_size,
+ src+src_offsets[0]*element_size,
+ element_size * volume[0]);
+ }
+}
+
+int omp_target_memcpy_rect(
+ void *dst_,
+ void *src_,
+ size_t element_size,
+ int num_dims,
+ const size_t *volume,
+ const size_t *dst_offsets,
+ const size_t *src_offsets,
+ const size_t *dst_dimensions,
+ const size_t *src_dimensions,
+ int dst_device,
+ int src_device
+) __GOMP_NOTHROW
+{
+ char *dst = (char *)dst_;
+ char *src = (char *)src_;
+
+ __offload_init_library();
+
+ OFFLOAD_TRACE(2, "omp_target_memcpy_rect(%p, %p, %lld, %d, "
+ "%p, %p, %p, %p, %p, %d, %d)\n",
+ dst, src, element_size, num_dims,
+ volume, dst_offsets, src_offsets,
+ dst_dimensions, src_dimensions, dst_device, src_device);
+
+ // MAX_ARRAY_RANK dimensions are supported
+ if (dst == 0 && src == 0) {
+ return MAX_ARRAY_RANK;
+ }
+
+ if (num_dims < 1 || num_dims > MAX_ARRAY_RANK ||
+ element_size < 1 ||
+ volume == 0 || dst_offsets == 0 || src_offsets == 0 ||
+ dst_dimensions == 0 || src_dimensions == 0) {
+ return 1;
+ }
+
+ if (dst_device < -1 || src_device < -1) {
+ LIBOFFLOAD_ERROR(c_invalid_device_number);
+ exit(1);
+ }
+
+ if (src_device == -1) {
+ // Source is CPU
+ if (dst_device == -1) {
+ // CPU -> CPU
+ memcpy_rect((char*)dst, (char*)src, element_size, num_dims, volume,
+ dst_offsets, src_offsets,
+ dst_dimensions, src_dimensions);
+ return 0;
+ } else {
+ // CPU -> MIC
+ // COIBufferWriteMultiD
+ struct arr_desc dst_desc;
+ struct arr_desc src_desc;
+
+ dst_desc.base = (int64_t)dst;
+ dst_desc.rank = num_dims;
+
+ src_desc.base = (int64_t)src;
+ src_desc.rank = num_dims;
+
+ for (int i=0; i<num_dims; i++)
+ {
+ dst_desc.dim[i].size = bytesize_at_this_dimension(
+ element_size,
+ num_dims - i,
+ dst_dimensions + i);
+ dst_desc.dim[i].lindex = 0;
+ dst_desc.dim[i].lower = dst_offsets[i];
+ dst_desc.dim[i].upper = dst_offsets[i] + volume[i] - 1;
+ dst_desc.dim[i].stride = 1;
+
+ src_desc.dim[i].size = bytesize_at_this_dimension(
+ element_size,
+ num_dims - i,
+ src_dimensions + i);
+ src_desc.dim[i].lindex = 0;
+ src_desc.dim[i].lower = src_offsets[i];
+ src_desc.dim[i].upper = src_offsets[i] + volume[i] - 1;
+ src_desc.dim[i].stride = 1;
+ }
+ __arr_desc_dump("", "dst", (const Arr_Desc*)&dst_desc, false, false);
+ __arr_desc_dump("", "src", (const Arr_Desc*)&src_desc, false, false);
+
+ // If OpenMP allows wrap-around for device numbers, enable next line
+ //dst_device %= mic_engines_total;
+
+ // Compute MIC buffer size
+ size_t dst_length = dst_dimensions[0] * bytesize_at_this_dimension(
+ element_size,
+ num_dims,
+ dst_dimensions);
+
+ OFFLOAD_TRACE(3,
+ "Creating buffer from sink memory %llx of size %lld\n",
+ dst, dst_length);
+ COIBUFFER mic_buf;
+ COIRESULT res = COI::BufferCreateFromMemory(dst_length,
+ COI_BUFFER_NORMAL, COI_SINK_MEMORY, dst,
+ 1, &mic_engines[dst_device].get_process(),
+ &mic_buf);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
+ return 1;
+ }
+ res = COI::BufferWriteMultiD(mic_buf,
+ mic_engines[dst_device].get_process(),
+ 0, &dst_desc, &src_desc,
+ COI_COPY_UNSPECIFIED, 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_write, res);
+ return 1;
+ }
+ res = COI::BufferDestroy(mic_buf);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_destroy, res);
+ return 1;
+ }
+ return 0;
+ }
+ } else {
+ // Source is device
+ if (dst_device == -1) {
+ // COIBufferReadMultiD
+ struct arr_desc dst_desc;
+ struct arr_desc src_desc;
+
+ dst_desc.base = (int64_t)dst;
+ dst_desc.rank = num_dims;
+
+ src_desc.base = (int64_t)src;
+ src_desc.rank = num_dims;
+
+ for (int i=0; i<num_dims; i++)
+ {
+ dst_desc.dim[i].size = bytesize_at_this_dimension(
+ element_size,
+ num_dims - i,
+ dst_dimensions + i);
+ dst_desc.dim[i].lindex = 0;
+ dst_desc.dim[i].lower = dst_offsets[i];
+ dst_desc.dim[i].upper = dst_offsets[i] + volume[i] - 1;
+ dst_desc.dim[i].stride = 1;
+
+ src_desc.dim[i].size = bytesize_at_this_dimension(
+ element_size,
+ num_dims - i,
+ src_dimensions + i);
+ src_desc.dim[i].lindex = 0;
+ src_desc.dim[i].lower = src_offsets[i];
+ src_desc.dim[i].upper = src_offsets[i] + volume[i] - 1;
+ src_desc.dim[i].stride = 1;
+ }
+ __arr_desc_dump("", "dst", (const Arr_Desc*)&dst_desc, false, false);
+ __arr_desc_dump("", "src", (const Arr_Desc*)&src_desc, false, false);
+
+ // If OpenMP allows wrap-around for device numbers, enable next line
+ //src_device %= mic_engines_total;
+
+ // Compute MIC buffer size
+ size_t src_length = src_dimensions[0] * bytesize_at_this_dimension(
+ element_size,
+ num_dims,
+ src_dimensions);
+
+ OFFLOAD_TRACE(3,
+ "Creating buffer from sink memory %llx of size %lld\n",
+ src, src_length);
+ COIBUFFER mic_buf;
+ COIRESULT res = COI::BufferCreateFromMemory(src_length,
+ COI_BUFFER_NORMAL, COI_SINK_MEMORY, src,
+ 1, &mic_engines[src_device].get_process(),
+ &mic_buf);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_create_from_mem, res);
+ return 1;
+ }
+ res = COI::BufferReadMultiD(mic_buf, 0,
+ &dst_desc, &src_desc,
+ COI_COPY_UNSPECIFIED, 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_write, res);
+ return 1;
+ }
+ res = COI::BufferDestroy(mic_buf);
+ if (res != COI_SUCCESS) {
+ LIBOFFLOAD_ERROR(c_buf_destroy, res);
+ return 1;
+ }
+ return 0;
+ } else {
+ // some MIC -> some MIC
+ if (src_device == dst_device) {
+ // MIC local copy will be done as remote memcpy_rect
+ struct parameters {
+ void *dst;
+ void *src;
+ size_t element_size;
+ int num_dims;
+ size_t array_info[MAX_ARRAY_RANK*5];
+ } parameters = {dst, src, element_size, num_dims};
+ int result;
+
+ for (int i=0; i<num_dims; i++)
+ {
+ parameters.array_info[i] = volume[i];
+ parameters.array_info[i+num_dims] = dst_offsets[i];
+ parameters.array_info[i+num_dims*2] = src_offsets[i];
+ parameters.array_info[i+num_dims*3] = dst_dimensions[i];
+ parameters.array_info[i+num_dims*4] = src_dimensions[i];
+ }
+
+ OFFLOAD ofld = OFFLOAD_TARGET_ACQUIRE(TARGET_MIC, src_device,
+ 0, NULL, __func__, 0);
+ if (ofld) {
+ VarDesc vars[1] = {0};
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].size = sizeof(parameters) -
+ (MAX_ARRAY_RANK - num_dims) *
+ 5 * sizeof(size_t);
+ vars[0].count = 1;
+ vars[0].ptr = ¶meters;
+
+ OFFLOAD_OFFLOAD(ofld, "omp_target_memcpy_rect_target",
+ 0, 1, vars, NULL, 0, 0, 0);
+ return 0;
+ } else {
+ return 1;
+ }
+ } else {
+ // MICx -> MICy
+
+ // Compute transfer byte-count
+ size_t dst_length = element_size;
+ for (int i=0; i<num_dims; i++) {
+ dst_length *= volume[i];
+ }
+
+ // Allocate CPU buffer
+ char *cpu_mem = (char *)malloc(dst_length);
+ if (cpu_mem == 0) {
+ LIBOFFLOAD_ERROR(c_malloc);
+ return 1;
+ }
+
+ // Create CPU offset and dimension arrays
+ // The CPU array collects the data in a contiguous block
+ size_t cpu_offsets[MAX_ARRAY_RANK];
+ size_t cpu_dimensions[MAX_ARRAY_RANK];
+ for (int i=0; i<num_dims; i++) {
+ cpu_offsets[i] = 0;
+ cpu_dimensions[i] = volume[i];
+ }
+
+ int retval = 1;
+ if (omp_target_memcpy_rect(
+ cpu_mem, src, element_size, num_dims, volume,
+ cpu_offsets, src_offsets,
+ cpu_dimensions, src_dimensions,
+ -1, src_device) == 0) {
+ retval = omp_target_memcpy_rect(
+ dst, cpu_mem, element_size, num_dims, volume,
+ dst_offsets, cpu_offsets,
+ dst_dimensions, cpu_dimensions,
+ dst_device, -1);
+ }
+ free(cpu_mem);
+ return retval;
+ }
+ }
+ }
+}
+
+// host_ptr is key in table that yields association on device
+// A COIBUFFER of specified size is created from the memory at
+// device_ptr+device_offset on device_num
+int omp_target_associate_ptr(
+ void *host_ptr,
+ void *device_ptr,
+ size_t size,
+ size_t device_offset,
+ int device_num
+) __GOMP_NOTHROW
+{
+ COIRESULT res;
+
+ __offload_init_library();
+
+ OFFLOAD_TRACE(2, "omp_target_associate_ptr(%p, %p, %lld, %lld, %d)\n",
+ host_ptr, device_ptr, size, device_offset, device_num);
+
+ if (device_num < -1) {
+ LIBOFFLOAD_ERROR(c_invalid_device_number);
+ exit(1);
+ }
+
+ // Associating to CPU is treated as failure
+ if (device_num == -1) {
+ return 1;
+ }
+
+ // An incorrect size is treated as failure
+ if (size < 0) {
+ return 1;
+ }
+
+ // If OpenMP allows wrap-around for device numbers, enable next line
+ //Engine& device = mic_engines[device_num % mic_engines_total];
+ Engine& device = mic_engines[device_num];
+
+ // Does host pointer have association already?
+ // lookup existing association in pointer table
+ PtrData* ptr_data = device.find_ptr_data(host_ptr);
+ if (ptr_data != 0) {
+ OFFLOAD_TRACE(3, "Address %p is already mapped on device %d\n",
+ host_ptr, device_num);
+ // Is current device pointer and offset same as existing?
+ if ((void*)ptr_data->mic_addr == device_ptr &&
+ (size_t)ptr_data->alloc_disp == device_offset) {
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+
+ // Create association
+ OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
+ host_ptr, size);
+
+ bool is_new;
+ ptr_data = device.insert_ptr_data(host_ptr, size, is_new);
+ ptr_data->is_omp_associate = true;
+
+ // create CPU buffer
+ OFFLOAD_TRACE(3,
+ "Creating buffer from source memory %p, length %lld\n",
+ host_ptr, size);
+
+ // result is not checked because we can continue without cpu
+ // buffer. In this case we will use COIBufferRead/Write
+ // instead of COIBufferCopy.
+
+ COI::BufferCreateFromMemory(size,
+ COI_BUFFER_OPENCL,
+ 0,
+ host_ptr,
+ 1,
+ &device.get_process(),
+ &ptr_data->cpu_buf);
+
+ // create MIC buffer
+ OFFLOAD_TRACE(3,
+ "Creating buffer from sink memory: addr %p, size %lld\n",
+ (char *)device_ptr + device_offset, size);
+ res = COI::BufferCreateFromMemory(size,
+ COI_BUFFER_NORMAL,
+ COI_SINK_MEMORY,
+ device_ptr,
+ 1,
+ &device.get_process(),
+ &ptr_data->mic_buf);
+ if (res != COI_SUCCESS) {
+ ptr_data->alloc_ptr_data_lock.unlock();
+ return 1;
+ }
+
+ // make buffer valid on the device.
+ res = COI::BufferSetState(ptr_data->mic_buf,
+ device.get_process(),
+ COI_BUFFER_VALID,
+ COI_BUFFER_NO_MOVE,
+ 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ ptr_data->alloc_ptr_data_lock.unlock();
+ return 1;
+ }
+
+ res = COI::BufferSetState(ptr_data->mic_buf,
+ COI_PROCESS_SOURCE,
+ COI_BUFFER_INVALID,
+ COI_BUFFER_NO_MOVE,
+ 0, 0, 0);
+ if (res != COI_SUCCESS) {
+ ptr_data->alloc_ptr_data_lock.unlock();
+ return 1;
+ }
+ ptr_data->alloc_disp = device_offset;
+ ptr_data->alloc_ptr_data_lock.unlock();
+
+ return 0;
+}
+
+int omp_target_disassociate_ptr(
+ void *host_ptr,
+ int device_num
+) __GOMP_NOTHROW
+{
+ COIRESULT res;
+
+ __offload_init_library();
+
+ OFFLOAD_TRACE(2, "omp_target_disassociate_ptr(%p, %d)\n",
+ host_ptr, device_num);
+
+ if (device_num < -1) {
+ LIBOFFLOAD_ERROR(c_invalid_device_number);
+ exit(1);
+ }
+
+ // Dissociating from CPU is treated as failure
+ if (device_num == -1) {
+ return 1;
+ }
+
+ // If OpenMP allows wrap-around for device numbers, enable next line
+ //Engine& device = mic_engines[device_num % mic_engines_total];
+ Engine& device = mic_engines[device_num];
+
+ // Lookup existing association in pointer table
+ PtrData* ptr_data = device.find_ptr_data(host_ptr);
+
+ // Attempt to disassociate unassociated pointer is a failure
+ if (ptr_data == 0) {
+ return 1;
+ }
+
+ // Destroy buffers
+ if (ptr_data->cpu_buf != 0) {
+ OFFLOAD_TRACE(3, "Destroying CPU buffer %p\n", ptr_data->cpu_buf);
+ COI::BufferDestroy(ptr_data->cpu_buf);
+ }
+ if (ptr_data->mic_buf != 0) {
+ OFFLOAD_TRACE(3, "Destroying MIC buffer %p\n", ptr_data->mic_buf);
+ COI::BufferDestroy(ptr_data->mic_buf);
+ }
+
+ // Remove association from map
+ OFFLOAD_TRACE(3, "Removing association for addr %p\n",
+ ptr_data->cpu_addr.start());
+ device.remove_ptr_data(ptr_data->cpu_addr.start());
+
+ return 0;
+}
+
+// End of OpenMP 4.5 APIs
+
+
// OpenMP API wrappers
static void omp_set_int_target(
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
return mic_engines_total;
}
+// OpenMP 4.5 APIs
+
+// COI supports 3-dim multiD transfers
+#define MAX_ARRAY_RANK 3
+
+DLL_LOCAL void omp_target_alloc_target(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[2] = {0};
+ size_t size;
+ void* memory;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].ptr = &size;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_out;
+ vars[1].ptr = &memory;
+
+ OFFLOAD_TARGET_ENTER(ofld, 2, vars, NULL);
+ OFFLOAD_DEBUG_TRACE(2, "omp_target_alloc(%lld)\n", size);
+ // We do not check for malloc returning NULL because the
+ // specification of this API includes the possibility of failure.
+ // The user will check the returned result
+ memory = malloc(size);
+ OFFLOAD_DEBUG_TRACE(2, "omp_target_alloc allocated at %p\n", memory);
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+DLL_LOCAL void omp_target_free_target(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ void* memory;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].ptr = &memory;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ OFFLOAD_DEBUG_TRACE(2, "omp_target_free(%p)\n", memory);
+ free(memory);
+ OFFLOAD_DEBUG_TRACE(2, "omp_target_free freed\n");
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+DLL_LOCAL void omp_target_memcpy_target(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[3] = {0};
+ void* dst;
+ void* src;
+ size_t length;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].ptr = &dst;
+
+ vars[1].type.src = c_data;
+ vars[1].type.dst = c_data;
+ vars[1].direction.bits = c_parameter_in;
+ vars[1].ptr = &src;
+
+ vars[2].type.src = c_data;
+ vars[2].type.dst = c_data;
+ vars[2].direction.bits = c_parameter_in;
+ vars[2].ptr = &length;
+
+ OFFLOAD_TARGET_ENTER(ofld, 3, vars, NULL);
+ OFFLOAD_DEBUG_TRACE(2, "omp_target_memcpy(%p, %p, %lld)\n",
+ dst, src, length);
+ memcpy(dst, src, length);
+
+ OFFLOAD_DEBUG_TRACE(2, "omp_target_memcpy done\n");
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+static size_t bytesize_at_this_dimension(
+ size_t element_size,
+ int num_dims,
+ const size_t* dimensions
+)
+{
+ if (num_dims > 1) {
+ return dimensions[1] *
+ bytesize_at_this_dimension(
+ element_size, num_dims-1, dimensions+1);
+ } else {
+ return element_size;
+ }
+}
+
+static void memcpy_rect(
+ char *dst,
+ char *src,
+ size_t element_size,
+ int num_dims,
+ const size_t *volume,
+ const size_t *dst_offsets,
+ const size_t *src_offsets,
+ const size_t *dst_dimensions,
+ const size_t *src_dimensions
+)
+{
+ if (num_dims > 1) {
+ int count = volume[0];
+ int dst_index = dst_offsets[0];
+ int src_index = src_offsets[0];
+ size_t dst_element_size =
+ bytesize_at_this_dimension(element_size, num_dims, dst_dimensions);
+ size_t src_element_size =
+ bytesize_at_this_dimension(element_size, num_dims, src_dimensions);
+ for (; count>0; dst_index++, src_index++, count--) {
+ memcpy_rect(dst+dst_element_size*dst_index,
+ src+src_element_size*src_index,
+ element_size, num_dims-1, volume+1,
+ dst_offsets+1, src_offsets+1,
+ dst_dimensions+1, src_dimensions+1);
+ }
+ } else {
+ memcpy(dst+dst_offsets[0]*element_size,
+ src+src_offsets[0]*element_size,
+ element_size * volume[0]);
+ }
+}
+
+DLL_LOCAL void omp_target_memcpy_rect_target(
+ void *ofld_
+)
+{
+ OFFLOAD ofld = (OFFLOAD) ofld_;
+ VarDesc vars[1] = {0};
+ struct parameters {
+ void *dst;
+ void *src;
+ size_t element_size;
+ int num_dims;
+ size_t array_info[MAX_ARRAY_RANK*5];
+ } parameters;
+
+ vars[0].type.src = c_data;
+ vars[0].type.dst = c_data;
+ vars[0].direction.bits = c_parameter_in;
+ vars[0].ptr = ¶meters;
+
+ OFFLOAD_TARGET_ENTER(ofld, 1, vars, NULL);
+ OFFLOAD_DEBUG_TRACE(2, "omp_target_memcpy_rect(%p, %p, %lld, %d)\n",
+ parameters.dst, parameters.src,
+ parameters.element_size, parameters.num_dims);
+ memcpy_rect(
+ (char*)parameters.dst, (char*)parameters.src, parameters.element_size,
+ parameters.num_dims,
+ ¶meters.array_info[0],
+ ¶meters.array_info[parameters.num_dims],
+ ¶meters.array_info[parameters.num_dims*2],
+ ¶meters.array_info[parameters.num_dims*3],
+ ¶meters.array_info[parameters.num_dims*4]);
+
+ OFFLOAD_DEBUG_TRACE(2, "omp_target_memcpy_rect done\n");
+ OFFLOAD_TARGET_LEAVE(ofld);
+}
+
+// End of OpenMP 4.5 APIs
+
+
// OpenMP API wrappers
static void omp_send_int_to_host(
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#include "offload_table.h"
#include "offload_common.h"
+// Offload Library versioning
+// We initialize version to OFFLOAD_VERSION_16
+// 15.0 application downgrades this to 1500 for MYO to use the older version.
+// 15.0 pragma works without needing version-specific code.
+// 16.0-U2 added a call from ofldbegin.cpp to set the version explicitly.
+// Pre-16.0-U2 application will find pre-initialized version number as 1600.
+// Post 16.0-U2 application will set its own version explicitly.
+int offload_version = OFFLOAD_VERSION_16;
+int offload_version_count = 0;
+
#if !HOST_LIBRARY
// Predefined offload entries
extern void omp_set_num_threads_lrb(void*);
extern void omp_unset_nest_lock_lrb(void*);
extern void omp_test_nest_lock_lrb(void*);
+// OpenMP 4.5 APIs
+extern void omp_target_alloc_target(void*);
+extern void omp_target_free_target(void*);
+extern void omp_target_memcpy_target(void*);
+extern void omp_target_memcpy_rect_target(void*);
+
// Predefined entries on the target side
static FuncTable::Entry predefined_entries[] = {
"omp_set_num_threads_target",
"omp_test_nest_lock_target",
(void*) &omp_test_nest_lock_lrb,
+ "omp_target_alloc_target",
+ (void*) &omp_target_alloc_target,
+ "omp_target_free_target",
+ (void*) &omp_target_free_target,
+ "omp_target_memcpy_target",
+ (void*) &omp_target_memcpy_target,
+ "omp_target_memcpy_rect_target",
+ (void*) &omp_target_memcpy_rect_target,
+
(const char*) -1,
(void*) -1
};
FuncList __offload_entries;
#endif // !HOST_LIBRARY
+extern "C" {
+
+// Set library version
+void __offload_set_version(int v)
+{
+ offload_version_count++;
+ if (offload_version_count == 1)
+ {
+ offload_version = v;
+ }
+ else
+ {
+ // Mix of versions is not supported
+ if (v != offload_version)
+ {
+ LIBOFFLOAD_ERROR(c_mixed_versions);
+ exit(1);
+ }
+ }
+}
+
+} // extern "C"
// Function table. No predefined entries.
FuncList __offload_funcs;
}
}
+#if HOST_LIBRARY
+// 16.0 and earlier compilers used the following VarTable
+struct OldVarTable {
+ const char* name;
+ void* addr;
+ // uint64_t var_alloc_type missing in 16.0 and earlier
+ uint64_t size;
+};
+
+static void convert_OldVarTable_to_NewVarTable(VarList::Node *vt_start)
+{
+ int table_size = 0;
+ char * new_var_table;
+ OldVarTable *old_var_table;
+
+ OFFLOAD_DEBUG_TRACE(2,
+ "Converting old var table to new var table to support backward compatiblity\n");
+
+ // Calculate size of memory to be malloced
+ old_var_table = (OldVarTable *) vt_start->table.entries;
+ while (old_var_table->name != (const char*) -1) {
+ table_size++;
+ old_var_table++;
+ }
+
+ if (table_size != 0) {
+ // Add 1 to table_size for end of table signature
+ VarTable::Entry *new_var_table =
+ new VarTable::Entry[table_size+1];
+
+ if (new_var_table == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+
+ old_var_table = (OldVarTable *) vt_start->table.entries;
+
+ // Update VarList with new table
+ vt_start->table.entries = new_var_table;
+
+ // Fix up the new table value from old table
+ for (int i=0; i< table_size; i++) {
+ new_var_table->name = old_var_table->name;
+ new_var_table->addr = old_var_table->addr;
+ new_var_table->size = old_var_table->size;
+ // Assign value of 0 for the missing field.
+ // Implying it is neither IMPLICIT or LINK variable as
+ // they were not supported in earlier compilers
+ new_var_table->var_alloc_type = 0;
+ old_var_table++;
+ new_var_table++;
+ }
+ new_var_table->name = (const char *)-1;
+ }
+
+}
+#endif //HOST_LIBRARY
+
// Adds given list element to the global lookup table list
extern "C" void __offload_register_tables(
FuncList::Node *entry_table,
__offload_funcs.add_table(func_table);
OFFLOAD_DEBUG_TRACE(2, "Registering var table %p\n", var_table);
+
+ // Compiler earlier than 17.0 used a different var_table.
+ // Convert the old table to new var_table format.
+ // Only the host table for LINUX has changed.
+#ifndef TARGET_WINNT
+#if HOST_LIBRARY
+ if (offload_version < OFFLOAD_VERSION_17) {
+ convert_OldVarTable_to_NewVarTable(var_table);
+ }
+#endif
+#endif
__offload_vars.add_table(var_table);
}
__offload_funcs.remove_table(func_table);
OFFLOAD_DEBUG_TRACE(2, "Unregistering var table %p\n", var_table);
+#ifndef TARGET_WINNT
+#if HOST_LIBRARY
+ if (offload_version < OFFLOAD_VERSION_17) {
+ // Free the malloced var_table created for backward compatiblity
+ delete var_table->table.entries;
+ }
+#endif
+#endif
__offload_vars.remove_table(var_table);
}
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#include "offload_util.h"
+#define OFFLOAD_VERSION_16 1600
+#define OFFLOAD_VERSION_17 1700
+
// Template representing double linked list of tables
template <typename T> class TableList {
public:
int64_t m_max_name_len;
};
+#define VAR_ALLOC_TYPE uint64_t
+#define OPENMP_IMPLICIT 1 // Compiler promoted openmp declare var
+ // due to implicit use without openmp declare
+#define OPENMP_LINK 2 // Openmp link clause in openmp declare
+
+#define IS_OPENMP_IMPLICIT(var_alloc_type) (var_alloc_type & 1)
+#define IS_OPENMP_LINK(var_alloc_type) (var_alloc_type & 2)
+#define IS_OPENMP_IMPLICIT_OR_LINK(var_alloc_type) (var_alloc_type & 3)
+
// Table entry for static variables
struct VarTable {
//! Variable table entry
void* addr; //!< Address of the variable
#if HOST_LIBRARY
+ VAR_ALLOC_TYPE var_alloc_type;
uint64_t size;
-
-#ifdef TARGET_WINNT
- // padding to make entry size a power of 2
- uint64_t padding;
-#endif // TARGET_WINNT
#endif
};
#pragma section(OFFLOAD_VAR_TABLE_SECTION_END, read, write)
+// Set library version
+extern "C" void __offload_set_version(int v);
+
// register/unregister given tables
extern "C" void __offload_register_tables(
FuncList::Node *entry_table,
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
"dv_ptr_data_slice",
"cean_var",
"cean_var_ptr",
- "c_data_ptr_array"
+ "c_data_ptr_array",
+ "c_extended_type",
+ "c_func_ptr_array",
+ "c_void_ptr_array",
+ "c_string_ptr_array",
+ "c_data_ptr_ptr",
+ "c_func_ptr_ptr",
+ "c_void_ptr_ptr",
+ "c_string_ptr_ptr",
+ "c_cean_var_ptr_ptr",
};
int mic_index = -1;
--info->count;
if (info->count == 0 && info->is_added) {
OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n",
- ((RefInfo *) ref_data[buf])->count);
+ ((RefInfo *) ref_data[buf])->count);
BufferReleaseRef(buf);
info->is_added = 0;
}
LIBOFFLOAD_ERROR(c_malloc);
memcpy(ofld.m_vars, in_data, var_data_len);
+ ofld.m_vars_extra =
+ (VarExtra*) malloc(ofld.m_vars_total * sizeof(VarExtra));
+ if (ofld.m_vars == NULL)
+ LIBOFFLOAD_ERROR(c_malloc);
+
in_data += var_data_len;
func->in_datalen -= var_data_len;
}
}
for (int i = 0; i < m_vars_total; i++) {
+ // instead of m_vars[i].type.src we will use m_vars_extra[i].type_src
+
if (i < vars_total) {
// variable type must match
if (m_vars[i].type.bits != vars[i].type.bits) {
+ OFFLOAD_TRACE(2,
+ "m_vars[%d].type.bits=%08x, vars[%d].type.bits=%08x\n",
+ i, m_vars[i].type.bits, i, vars[i].type.bits);
LIBOFFLOAD_ERROR(c_merge_var_descs2);
exit(1);
}
- m_vars[i].ptr = vars[i].ptr;
- m_vars[i].into = vars[i].into;
+ if (m_vars[i].type.src == c_extended_type) {
+ VarDescExtendedType *etype =
+ reinterpret_cast<VarDescExtendedType*>(vars[i].ptr);
+ m_vars_extra[i].type_src = etype->extended_type;
+ m_vars[i].ptr = etype->ptr;
+ }
+ else {
+ m_vars_extra[i].type_src = m_vars[i].type.src;
+ if (!(m_vars[i].flags.use_device_ptr &&
+ m_vars[i].type.src == c_dv)) {
+ m_vars[i].ptr = vars[i].ptr;
+ }
+ }
+ // instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst
+ if (m_vars[i].type.dst == c_extended_type && i < vars_total) {
+ VarDescExtendedType *etype =
+ reinterpret_cast<VarDescExtendedType*>(vars[i].into);
+ m_vars_extra[i].type_dst = etype->extended_type;
+ m_vars[i].into = etype->ptr;
+ }
+ else {
+ m_vars_extra[i].type_dst = m_vars[i].type.dst;
+ m_vars[i].into = vars[i].into;
+ }
const char *var_sname = "";
if (vars2 != NULL) {
" VarDesc %d, var=%s, %s, %s\n",
i, var_sname,
vardesc_direction_as_string[m_vars[i].direction.bits],
- vardesc_type_as_string[m_vars[i].type.src]);
+ vardesc_type_as_string[m_vars_extra[i].type_src]);
if (vars2 != NULL && vars2[i].dname != NULL) {
OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
- vardesc_type_as_string[m_vars[i].type.dst]);
+ vardesc_type_as_string[m_vars_extra[i].type_dst]);
}
}
+ else {
+ m_vars_extra[i].type_src = m_vars[i].type.src;
+ m_vars_extra[i].type_dst = m_vars[i].type.dst;
+ }
+
OFFLOAD_TRACE(2,
" type_src=%d, type_dstn=%d, direction=%d, "
"alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
"offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
- m_vars[i].type.src,
- m_vars[i].type.dst,
+ m_vars_extra[i].type_src,
+ m_vars_extra[i].type_dst,
m_vars[i].direction.bits,
m_vars[i].alloc_if,
m_vars[i].free_if,
void** ptr_addr = src_is_for_mic ?
static_cast<void**>(m_vars[i].ptr) :
static_cast<void**>(m_vars[i].into);
- int type = src_is_for_mic ? m_vars[i].type.src :
- m_vars[i].type.dst;
+ int type = src_is_for_mic ? m_vars_extra[i].type_src :
+ m_vars_extra[i].type_dst;
bool is_static = src_is_for_mic ?
m_vars[i].flags.is_static :
m_vars[i].flags.is_static_dstn;
*(reinterpret_cast<char**>(m_vars[i].ptr)) :
reinterpret_cast<char*>(m_vars[i].into);
+ // if is_pointer is 1 it means that pointer array itself
+ // is defined either via pointer or as class member.
+ // i.e. arr_ptr[0:5] or this->ARR[0:5]
if (m_vars[i].flags.is_pointer) {
- dst_arr_ptr = *((char**)dst_arr_ptr);
+ int64_t offset = 0;
+ m_in.receive_data(&offset, sizeof(offset));
+ dst_arr_ptr = *((char**)dst_arr_ptr) + offset;
}
for (; j < max_el; j++) {
if (src_is_for_mic) {
break;
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
case c_dv:
break;
case c_string_ptr:
case c_data_ptr:
+ case c_string_ptr_ptr:
+ case c_data_ptr_ptr:
case c_cean_var_ptr:
+ case c_cean_var_ptr_ptr:
case c_dv_ptr:
+ // Don't need ptr_addr value for variables from stack buffer.
+ // Stack buffer address is set at var_desc with #0.
+ if (i != 0 && m_vars[i].flags.is_stack_buf) {
+ break;
+ }
+ if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) ||
+ TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) {
+ int64_t offset;
+
+ m_in.receive_data(&offset, sizeof(offset));
+ ptr_addr = reinterpret_cast<void**>(
+ reinterpret_cast<char*>(*ptr_addr) + offset);
+
+ }
+
if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
void *buf = NULL;
if (m_vars[i].flags.sink_addr) {
m_vars[i].mic_offset +
(m_vars[i].flags.is_stack_buf ?
0 : m_vars[i].offset);
+
}
*ptr_addr = ptr;
}
break;
case c_func_ptr:
+ case c_func_ptr_ptr:
break;
case c_dv_data:
LIBOFFLOAD_ERROR(c_unknown_var_type, type);
abort();
}
- // Release obsolete buffers for stack of persistent objects
- if (type = c_data_ptr &&
+ // Release obsolete buffers for stack of persistent objects.
+ // The vardesc with i==0 and flags.is_stack_buf==TRUE is always for
+ // stack buffer pointer.
+ if (i == 0 &&
m_vars[i].flags.is_stack_buf &&
!m_vars[i].direction.bits &&
m_vars[i].alloc_if &&
for (int j=0; j < m_vars[i].size; j++) {
void *buf;
m_in.receive_data(&buf, sizeof(buf));
+ OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf);
BufferReleaseRef(buf);
ref_data.erase(buf);
}
}
// Do copyin
- switch (m_vars[i].type.dst) {
+ switch (m_vars_extra[i].type_dst) {
case c_data_ptr_array:
break;
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
if (m_vars[i].direction.in &&
!m_vars[i].flags.is_static_dstn) {
char* ptr = m_vars[i].into ?
static_cast<char*>(m_vars[i].into) :
static_cast<char*>(m_vars[i].ptr);
- if (m_vars[i].type.dst == c_cean_var) {
+ if (m_vars_extra[i].type_dst == c_cean_var) {
m_in.receive_data((&size), sizeof(int64_t));
m_in.receive_data((&disp), sizeof(int64_t));
}
case c_string_ptr:
case c_data_ptr:
+ case c_string_ptr_ptr:
+ case c_data_ptr_ptr:
case c_cean_var_ptr:
+ case c_cean_var_ptr_ptr:
case c_dv_ptr:
case c_dv_data:
case c_dv_ptr_data:
break;
case c_func_ptr:
+ case c_func_ptr_ptr:
if (m_vars[i].direction.in) {
m_in.receive_func_ptr((const void**) m_vars[i].ptr);
}
break;
default:
- LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
+ LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
abort();
}
}
for (int i = 0; i < m_vars_total; i++) {
bool src_is_for_mic = (m_vars[i].direction.out ||
m_vars[i].into == NULL);
-
- switch (m_vars[i].type.src) {
+ if (m_vars[i].flags.is_stack_buf) {
+ continue;
+ }
+ switch (m_vars_extra[i].type_src) {
case c_data_ptr_array:
break;
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
if (m_vars[i].direction.out &&
!m_vars[i].flags.is_static) {
case c_string_ptr:
case c_data_ptr:
+ case c_string_ptr_ptr:
+ case c_data_ptr_ptr:
case c_cean_var_ptr:
+ case c_cean_var_ptr_ptr:
case c_dv_ptr:
if (m_vars[i].free_if &&
src_is_for_mic &&
break;
case c_func_ptr:
+ case c_func_ptr_ptr:
if (m_vars[i].direction.out) {
m_out.send_func_ptr(*((void**) m_vars[i].ptr));
}
if (src_is_for_mic &&
m_vars[i].free_if &&
!m_vars[i].flags.is_static) {
- ArrDesc *dvp = (m_vars[i].type.src == c_dv_data ||
- m_vars[i].type.src == c_dv_data_slice) ?
- static_cast<ArrDesc*>(m_vars[i].ptr) :
- *static_cast<ArrDesc**>(m_vars[i].ptr);
+ ArrDesc *dvp = (m_vars_extra[i].type_src == c_dv_data ||
+ m_vars_extra[i].type_src == c_dv_data_slice) ?
+ static_cast<ArrDesc*>(m_vars[i].ptr) :
+ *static_cast<ArrDesc**>(m_vars[i].ptr);
void *buf = reinterpret_cast<char*>(dvp->Base) -
m_vars[i].mic_offset -
break;
default:
- LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
+ LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
abort();
}
if (m_vars[i].into) {
- switch (m_vars[i].type.dst) {
+ switch (m_vars_extra[i].type_dst) {
case c_data_ptr_array:
break;
case c_data:
case c_void_ptr:
+ case c_void_ptr_ptr:
case c_cean_var:
case c_dv:
break;
case c_string_ptr:
case c_data_ptr:
+ case c_string_ptr_ptr:
+ case c_data_ptr_ptr:
case c_cean_var_ptr:
+ case c_cean_var_ptr_ptr:
case c_dv_ptr:
if (m_vars[i].direction.in &&
m_vars[i].free_if &&
break;
case c_func_ptr:
+ case c_func_ptr_ptr:
break;
case c_dv_data:
m_vars[i].direction.in &&
!m_vars[i].flags.is_static_dstn) {
ArrDesc *dvp =
- (m_vars[i].type.dst == c_dv_data_slice ||
- m_vars[i].type.dst == c_dv_data) ?
+ (m_vars_extra[i].type_dst == c_dv_data_slice ||
+ m_vars_extra[i].type_dst == c_dv_data) ?
static_cast<ArrDesc*>(m_vars[i].into) :
*static_cast<ArrDesc**>(m_vars[i].into);
void *buf = reinterpret_cast<char*>(dvp->Base) -
break;
default:
- LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
+ LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
abort();
}
}
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
~OffloadDescriptor() {
if (m_vars != 0) {
free(m_vars);
+ free(m_vars_extra);
}
}
VarDesc* m_vars;
int m_vars_total;
int m_offload_number;
+
+ // extra data associated with each variable descriptor
+ struct VarExtra {
+ uint16_t type_src;
+ uint16_t type_dst;
+ };
+
+ VarExtra* m_vars_extra;
};
// one time target initialization in main
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#if HOST_LIBRARY
#include "offload_table.h"
+#ifdef MYO_SUPPORT
#include "offload_myo_host.h"
+#endif // MYO_SUPPORT
#else
#include "compiler_if_target.h"
#include "offload_target.h"
+#ifdef MYO_SUPPORT
#include "offload_myo_target.h"
-#endif
+#endif // MYO_SUPPORT
+#endif // HOST_LIBRARY
// Initializes library and registers specified offload image.
// Don't use this declarations from offload_host.h as offload_table.h
{
bool success;
+ // Set offload version
+ __offload_set_version(OFFLOAD_VERSION_17);
+
// register offload tables
__offload_register_tables(&__offload_entry_node,
&__offload_func_node,
#ifndef TARGET_WINNT
static void offload_fini_so() __attribute__((destructor(101)));
-#else // TARGET_WINNT
-static void offload_init_so();
#endif // TARGET_WINNT
static void offload_fini()
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
#if HOST_LIBRARY
#include "offload_table.h"
+#ifdef MYO_SUPPORT
#include "offload_myo_host.h"
+#endif // MYO_SUPPORT
#else
#include "offload_target.h"
+#ifdef MYO_SUPPORT
#include "offload_myo_target.h"
-#endif
+#endif // MYO_SUPPORT
+#endif // HOST_LIBRARY
#ifdef TARGET_WINNT
#define ALLOCATE(name) __declspec(allocate(name))
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
/*
- Copyright (c) 2014-2015 Intel Corporation. All Rights Reserved.
+ Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions