Creating release candidate final from release_390 branch

git-svn-id: https://llvm.org/svn/llvm-project/openmp/tags/RELEASE_390@280323 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/final/CMakeLists.txt b/final/CMakeLists.txt
new file mode 100644
index 0000000..3d60633
--- /dev/null
+++ b/final/CMakeLists.txt
@@ -0,0 +1,5 @@
+cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
+
+set(OPENMP_LLVM_TOOLS_DIR "" CACHE PATH "Path to LLVM tools for testing")
+
+add_subdirectory(runtime)
diff --git a/final/CREDITS.txt b/final/CREDITS.txt
new file mode 100644
index 0000000..a8ab67a
--- /dev/null
+++ b/final/CREDITS.txt
@@ -0,0 +1,57 @@
+This file is a partial list of people who have contributed to the LLVM/openmp
+project.  If you have contributed a patch or made some other contribution to
+LLVM/openmp, please submit a patch to this file to add yourself, and it will be
+done!
+
+The list is sorted by surname and formatted to allow easy grepping and
+beautification by scripts.  The fields are: name (N), email (E), web-address
+(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
+(S).
+
+N: Carlo Bertolli
+W: http://ibm.com
+D: IBM contributor to PowerPC support in CMake files and elsewhere.
+
+N: Sunita Chandrasekaran
+D: Contributor to testsuite from OpenUH
+
+N: Barbara Chapman
+D: Contributor to testsuite from OpenUH
+
+N: University of Houston
+W: http://web.cs.uh.edu/~openuh/download/
+D: OpenUH test suite
+
+N: Intel Corporation OpenMP runtime team
+W: http://openmprtl.org
+D: Created the runtime.
+
+N: John Mellor-Crummey and other members of the OpenMP Tools Working Group
+E: johnmc@rice.edu
+D: OpenMP Tools Interface (OMPT)
+
+N: Matthias Muller
+D: Contributor to testsuite from OpenUH
+
+N: Tal Nevo
+E: tal@scalemp.com
+D: ScaleMP contributor to improve runtime performance there.
+W: http://scalemp.com
+
+N: Pavel Neytchev
+D: Contributor to testsuite from OpenUH
+
+N: Steven Noonan
+E: steven@uplinklabs.net
+D: Patches for the ARM architecture and removal of several inconsistencies.
+
+N: Alp Toker
+E: alp@nuanti.com
+D: Making build work for FreeBSD.
+
+N: Cheng Wang
+D: Contributor to testsuite from OpenUH
+
+N: Diego Caballero
+E: diego.l.caballero@gmail.com
+D: Fork performance improvements
diff --git a/final/LICENSE.txt b/final/LICENSE.txt
new file mode 100644
index 0000000..d858552
--- /dev/null
+++ b/final/LICENSE.txt
@@ -0,0 +1,174 @@
+==============================================================================
+
+The software contained in this directory tree is dual licensed under both the
+University of Illinois "BSD-Like" license and the MIT license.  As a user of
+this code you may choose to use it under either license.  As a contributor,
+you agree to allow your code to be used under both.  The full text of the
+relevant licenses is included below.
+
+In addition, a license agreement from the copyright/patent holders of the
+software contained in this directory tree is included below.
+
+==============================================================================
+
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 1997-2016 Intel Corporation
+
+All rights reserved.
+
+Developed by:
+    OpenMP Runtime Team
+    Intel Corporation
+    http://www.openmprtl.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimers.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimers in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the names of Intel Corporation OpenMP Runtime Team nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this Software without specific prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+
+Copyright (c) 1997-2016 Intel Corporation
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+==============================================================================
+
+Intel Corporation
+
+Software Grant License Agreement ("Agreement")
+
+Except for the license granted herein to you, Intel Corporation ("Intel") reserves
+all right, title, and interest in and to the Software (defined below).
+
+Definition
+
+"Software" means the code and documentation as well as any original work of
+authorship, including any modifications or additions to an existing work, that
+is intentionally submitted by Intel to llvm.org (http://llvm.org) ("LLVM") for
+inclusion in, or documentation of, any of the products owned or managed by LLVM
+(the "Work"). For the purposes of this definition, "submitted" means any form of
+electronic, verbal, or written communication sent to LLVM or its
+representatives, including but not limited to communication on electronic
+mailing lists, source code control systems, and issue tracking systems that are
+managed by, or on behalf of, LLVM for the purpose of discussing and improving
+the Work, but excluding communication that is conspicuously marked otherwise.
+
+1. Grant of Copyright License. Subject to the terms and conditions of this
+   Agreement, Intel hereby grants to you and to recipients of the Software
+   distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+   royalty-free, irrevocable copyright license to reproduce, prepare derivative
+   works of, publicly display, publicly perform, sublicense, and distribute the
+   Software and such derivative works.
+
+2. Grant of Patent License. Subject to the terms and conditions of this
+   Agreement, Intel hereby grants you and to recipients of the Software
+   distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+   royalty-free, irrevocable (except as stated in this section) patent license
+   to make, have made, use, offer to sell, sell, import, and otherwise transfer
+   the Work, where such license applies only to those patent claims licensable
+   by Intel that are necessarily infringed by Intel's Software alone or by
+   combination of the Software with the Work to which such Software was
+   submitted. If any entity institutes patent litigation against Intel or any
+   other entity (including a cross-claim or counterclaim in a lawsuit) alleging
+   that Intel's Software, or the Work to which Intel has contributed constitutes
+   direct or contributory patent infringement, then any patent licenses granted
+   to that entity under this Agreement for the Software or Work shall terminate
+   as of the date such litigation is filed.
+
+Unless required by applicable law or agreed to in writing, the software is
+provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+either express or implied, including, without limitation, any warranties or
+conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE.
+
+==============================================================================
+
+ARM Limited
+
+Software Grant License Agreement ("Agreement")
+
+Except for the license granted herein to you, ARM Limited ("ARM") reserves all
+right, title, and interest in and to the Software (defined below).
+
+Definition
+
+"Software" means the code and documentation as well as any original work of
+authorship, including any modifications or additions to an existing work, that
+is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for
+inclusion in, or documentation of, any of the products owned or managed by LLVM
+(the "Work"). For the purposes of this definition, "submitted" means any form of
+electronic, verbal, or written communication sent to LLVM or its
+representatives, including but not limited to communication on electronic
+mailing lists, source code control systems, and issue tracking systems that are
+managed by, or on behalf of, LLVM for the purpose of discussing and improving
+the Work, but excluding communication that is conspicuously marked otherwise.
+
+1. Grant of Copyright License. Subject to the terms and conditions of this
+   Agreement, ARM hereby grants to you and to recipients of the Software
+   distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+   royalty-free, irrevocable copyright license to reproduce, prepare derivative
+   works of, publicly display, publicly perform, sublicense, and distribute the
+   Software and such derivative works.
+
+2. Grant of Patent License. Subject to the terms and conditions of this
+   Agreement, ARM hereby grants you and to recipients of the Software
+   distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge,
+   royalty-free, irrevocable (except as stated in this section) patent license
+   to make, have made, use, offer to sell, sell, import, and otherwise transfer
+   the Work, where such license applies only to those patent claims licensable
+   by ARM that are necessarily infringed by ARM's Software alone or by
+   combination of the Software with the Work to which such Software was
+   submitted. If any entity institutes patent litigation against ARM or any
+   other entity (including a cross-claim or counterclaim in a lawsuit) alleging
+   that ARM's Software, or the Work to which ARM has contributed constitutes
+   direct or contributory patent infringement, then any patent licenses granted
+   to that entity under this Agreement for the Software or Work shall terminate
+   as of the date such litigation is filed.
+
+Unless required by applicable law or agreed to in writing, the software is
+provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+either express or implied, including, without limitation, any warranties or
+conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE.
+
+==============================================================================
diff --git a/final/offload/Makefile b/final/offload/Makefile
new file mode 100755
index 0000000..75e3744
--- /dev/null
+++ b/final/offload/Makefile
@@ -0,0 +1,224 @@
+#

+##//===----------------------------------------------------------------------===//

+#//

+#//                     The LLVM Compiler Infrastructure

+#//

+#// This file is dual licensed under the MIT and the University of Illinois Open

+#// Source Licenses. See LICENSE.txt for details.

+#//

+#//===----------------------------------------------------------------------===//

+#

+

+# MAKEFILE PARAMETERS

+#

+# root_dir - path to root directory of liboffload

+# build_dir - path to build directory

+# mpss_dir - path to root directory of mpss

+# mpss_version - version of the mpss (e.g., version "3.3.x" would be "33")

+# libiomp_host_dir - path to host libiomp directory (unnecessary if compiler_host is icc)

+# libiomp_target_dir - path to target libiomp directory (unnecesarry if compiler_target is icc)

+# omp_header_dir - path to omp.h (unnecessary if compiler_host and compiler_target are icc)

+# os_host - host operating system

+# os_target - target operating system

+# compiler_host - host compiler

+# compiler_target - target compiler

+# options_host - additional options for host compiler

+# options_target - additional options for target compiler

+#

+

+# Directories

+root_dir?=.

+build_dir?=$(root_dir)/build

+build_host_dir=$(build_dir)/host

+build_target_dir=$(build_dir)/target

+obj_host_dir=$(build_dir)/obj_host

+obj_target_dir=$(build_dir)/obj_target

+source_dir=$(root_dir)/src

+imported_dir=$(source_dir)/imported

+

+# OS

+os_host?=linux

+os_target?=linux

+ifneq ($(os_host)_$(os_target), linux_linux)

+  $(error "Only linux is supported")

+endif

+

+# Compilers

+compiler_host?=gcc

+compiler_target?=gcc

+

+# MPSS

+mpss_version?=30

+mpss_dir?=/

+mpss_present=$(shell if test -d $(mpss_dir); then echo OK; else echo KO; fi)

+ifneq ($(mpss_present), OK)

+  $(error "Cannot find MPSS directory $(mpss_dir)")

+endif

+

+ifeq ($(shell test $(mpss_version) -gt 33; echo $$?), 0)

+  coi_dir=$(mpss_dir)/sysroots/k1om-mpss-linux/usr

+  coi_include=$(coi_dir)/include/intel-coi

+  coi_lib_host=$(mpss_dir)/lib64

+  coi_lib_device=$(coi_dir)/lib64

+else

+  coi_dir=$(mpss_dir)/opt/intel/mic/coi

+  coi_include=$(coi_dir)/include

+  coi_lib_host=$(coi_dir)/host-linux-release/lib

+  coi_lib_device=$(coi_dir)/device-linux-release/lib

+endif

+myo_dir=$(mpss_dir)/opt/intel/mic/myo

+

+# Sources

+src_liboffload_common=dv_util.cpp liboffload_error.c liboffload_msg.c offload_common.cpp offload_table.cpp offload_trace.cpp offload_util.cpp

+

+src_liboffload_host=$(src_liboffload_common) cean_util.cpp coi/coi_client.cpp compiler_if_host.cpp offload_engine.cpp offload_env.cpp offload_host.cpp offload_omp_host.cpp offload_timer_host.cpp offload_orsl.cpp orsl-lite/lib/orsl-lite.c offload_myo_host.cpp

+src_liboffload_host:=$(foreach file,$(src_liboffload_host),$(source_dir)/$(file))

+

+src_liboffload_target=$(src_liboffload_common) coi/coi_server.cpp compiler_if_target.cpp offload_omp_target.cpp offload_target.cpp offload_timer_target.cpp offload_myo_target.cpp

+src_liboffload_target:=$(foreach file,$(src_liboffload_target),$(source_dir)/$(file))

+

+src_ofld=ofldbegin.cpp ofldend.cpp

+src_ofld:=$(foreach file,$(src_ofld),$(source_dir)/$(file))

+

+headers=$(wildcard $(source_dir)/*.h) $(wildcard $(source_dir)/coi/*.h) $(wildcard $(source_dir)/orsl-lite/include/*.h)

+ifneq ($(omp_header_dir), )

+  headers+=$(imported_dir)/omp.h

+endif

+

+# Objects

+obj_liboffload_host=$(notdir $(src_liboffload_host))

+obj_liboffload_host:=$(obj_liboffload_host:.cpp=.o)

+obj_liboffload_host:=$(obj_liboffload_host:.c=.o)

+obj_liboffload_host:=$(foreach file,$(obj_liboffload_host),$(obj_host_dir)/$(file))

+

+obj_liboffload_target=$(notdir $(src_liboffload_target))

+obj_liboffload_target:=$(obj_liboffload_target:.cpp=.o)

+obj_liboffload_target:=$(obj_liboffload_target:.c=.o)

+obj_liboffload_target:=$(foreach file,$(obj_liboffload_target),$(obj_target_dir)/$(file))

+

+obj_ofld=$(notdir $(src_ofld))

+obj_ofld:=$(obj_ofld:.cpp=.o)

+obj_ofld_host=$(foreach file,$(obj_ofld),$(build_host_dir)/$(file))

+obj_ofld_target=$(foreach file,$(obj_ofld),$(build_target_dir)/$(file))

+

+# Options

+opts_common=-O2 -w -fpic -c -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -I$(coi_include) -I$(myo_dir)/include -I$(source_dir)

+ifneq ($(omp_header_dir), )

+  opts_common+=-I$(imported_dir)

+endif

+

+opts_liboffload=-shared -Wl,-soname,liboffload.so.5 -ldl -lstdc++ -liomp5

+

+opts_liboffload_host=$(opts_liboffload) -L$(coi_lib_host) -lcoi_host -L$(myo_dir)/lib -lmyo-client

+ifneq ($(libiomp_host_dir), )

+  opts_liboffload_host+=-L$(libiomp_host_dir)

+endif

+

+opts_liboffload_target=$(opts_liboffload) -L$(coi_lib_device) -lcoi_device -L$(myo_dir)/lib -lmyo-service

+ifneq ($(libiomp_target_dir), )

+  opts_liboffload_target+=-L$(libiomp_target_dir)

+endif

+

+options_host?=

+opts_host=$(options_host) -DHOST_LIBRARY=1 -DMPSS_VERSION=$(mpss_version)

+ifeq ($(os_host), linux)

+  opts_host+=-DLINUX

+endif

+

+options_target?=

+opts_target=$(options_target) -DHOST_LIBRARY=0

+ifeq ($(os_target), linux)

+  opts_target+=-DLINUX

+endif

+ifeq ($(compiler_target), icc)

+  opts_target+=-mmic

+endif

+

+# Make targets

+.PHONY: all clean info

+

+all: info $(build_host_dir)/liboffload.so $(build_target_dir)/liboffload.so $(obj_ofld_host) $(obj_ofld_target)

+

+

+$(build_host_dir)/liboffload.so: $(build_host_dir)/liboffload.so.5 | $(build_host_dir)

+	ln -f $< $@

+

+$(build_host_dir)/liboffload.so.5: $(obj_liboffload_host) | $(build_host_dir)

+	$(compiler_host) $(opts_liboffload_host) $(opts_host) $^ -o $@

+

+$(obj_host_dir)/%.o: $(source_dir)/%.c $(headers) | $(obj_host_dir)

+	$(compiler_host) $(opts_common) $(opts_host) $< -o $@

+

+$(obj_host_dir)/%.o: $(source_dir)/%.cpp $(headers) | $(obj_host_dir)

+	$(compiler_host) $(opts_common) $(opts_host) $< -o $@

+

+$(obj_host_dir)/%.o: $(source_dir)/coi/%.cpp $(headers) | $(obj_host_dir)

+	$(compiler_host) $(opts_common) $(opts_host) $< -o $@

+

+$(obj_host_dir)/%.o: $(source_dir)/orsl-lite/lib/%.c $(headers) | $(obj_host_dir)

+	$(compiler_host) $(opts_common) $(opts_host) $< -o $@

+

+

+$(build_target_dir)/liboffload.so: $(build_target_dir)/liboffload.so.5 | $(build_target_dir)

+	ln -f $< $@

+

+$(build_target_dir)/liboffload.so.5: $(obj_liboffload_target) | $(build_target_dir)

+	$(compiler_target) $(opts_liboffload_target) $(opts_target) $^ -o $@

+

+$(obj_target_dir)/%.o: $(source_dir)/%.c $(headers) | $(obj_target_dir)

+	$(compiler_target) $(opts_common) $(opts_target) $< -o $@

+

+$(obj_target_dir)/%.o: $(source_dir)/%.cpp $(headers) | $(obj_target_dir)

+	$(compiler_target) $(opts_common) $(opts_target) $< -o $@

+

+$(obj_target_dir)/%.o: $(source_dir)/coi/%.cpp $(headers) | $(obj_target_dir)

+	$(compiler_target) $(opts_common) $(opts_target) $< -o $@

+

+$(obj_target_dir)/%.o: $(source_dir)/orsl-lite/lib/%.c $(headers) | $(obj_target_dir)

+	$(compiler_target) $(opts_common) $(opts_target) $< -o $@

+

+

+$(build_host_dir)/%.o: $(source_dir)/%.cpp $(headers) | $(build_host_dir)

+	$(compiler_host) $(opts_common) $(opts_host) $< -o $@

+

+$(build_target_dir)/%.o: $(source_dir)/%.cpp $(headers) | $(build_target_dir)

+	$(compiler_target) $(opts_common) $(opts_target) $< -o $@

+

+

+$(imported_dir)/omp.h: $(omp_header_dir)/omp.h | $(imported_dir)

+	cp $< $@

+

+

+$(build_host_dir) $(build_target_dir) $(obj_host_dir) $(obj_target_dir): | $(build_dir)

+	$(shell mkdir -p $@ >/dev/null 2>/dev/null)

+	@echo "Created $@ directory"

+

+$(build_dir):

+	$(shell mkdir -p $@ >/dev/null 2>/dev/null)

+	@echo "Created $@ directory"

+

+$(imported_dir):

+	$(shell mkdir -p $@ >/dev/null 2>/dev/null)

+	@echo "Created $@ directory"

+

+

+clean:

+	$(shell rm -rf $(build_dir))

+	@echo "Remove $(build_dir) directory"

+

+

+info:

+	@echo "root_dir = $(root_dir)"

+	@echo "build_dir = $(build_dir)"

+	@echo "mpss_dir = $(mpss_dir)"

+	@echo "mpss_version = $(mpss_version)"

+	@echo "libiomp_host_dir = $(libiomp_host_dir)"

+	@echo "libiomp_target_dir = $(libiomp_target_dir)"

+	@echo "omp_header_dir = $(omp_header_dir)"

+	@echo "os_host = $(os_host)"

+	@echo "os_target = $(os_target)"

+	@echo "compiler_host = $(compiler_host)"

+	@echo "compiler_target = $(compiler_target)"

+	@echo "options_host = $(options_host)"

+	@echo "options_target = $(options_target)"

+

diff --git a/final/offload/README.txt b/final/offload/README.txt
new file mode 100755
index 0000000..eb9fb1d
--- /dev/null
+++ b/final/offload/README.txt
@@ -0,0 +1,129 @@
+

+	       README for Intel(R) Offload Runtime Library

+	       ===========================================

+

+How to Build Documentation

+==========================

+

+The main documentation is in Doxygen* format, and this distribution

+should come with pre-built PDF documentation in doc/Reference.pdf.

+However, an HTML version can be built by executing:

+

+% doxygen doc/doxygen/config

+

+in this directory.

+

+That will produce HTML documentation in the doc/doxygen/generated

+directory, which can be accessed by pointing a web browser at the

+index.html file there.

+

+If you don't have Doxygen installed, you can download it from

+www.doxygen.org.

+

+

+Software Requirements

+=====================

+

+Intel(R) Offload Runtime Library requires additional software:

+

+1) Intel(R) OpenMP* Runtime Library.  You can either download the source

+code for that (from openmprtl.org or openmp.llvm.org) or simply use the

+compiled version distributed with the Intel compilers.

+2) Intel(R) COI Runtime Library and Intel(R) MYO Runtime Library.  These

+libraries are part of Intel(R) Manycore Platform Software Stack (MPSS).  You

+can download MPSS source code or binaries from

+software.intel.com/en-us/articles/intel-manycore-platform-software-stack-mpss.

+Binaries include host libraries for Intel(R) 64 Architecture and target

+libraries for Intel(R) Many Integrated Core Architecture.

+

+Also you will require all of the libraries that enable the target code to run

+on device.  If you target the Intel(R) Xeon Phi (TM) coprocessor, these

+libraries can be taken from MPSS too.

+

+

+How to Build the Intel(R) Offload Runtime Library

+=================================================

+

+The Makefile at the top-level will attempt to detect what it needs to

+build the Intel(R) Offload Runtime Library.  To see the default settings,

+type:

+

+make info

+

+You can change the Makefile's behavior with the following options:

+

+root_dir:	      The path to the top-level directory containing the

+		      top-level Makefile.  By default, this will take on the

+		      value of the current working directory.

+

+build_dir:	      The path to the build directory.  By default, this will

+		      take on value [root_dir]/build.

+

+mpss_dir:	      The path to the Intel(R) Manycore Platform Software

+		      Stack install directory.  By default, this will take on

+		      the value of operating system's root directory.

+

+libiomp_host_dir:     The path to the host Intel(R) OpenMP* Runtime Library.

+		      This option is required when the host compiler is other

+		      than icc.

+

+libiomp_target_dir:   The path to the target Intel(R) OpenMP* Runtime

+		      Library.  This option is required when the target

+		      compiler is other than icc.

+

+omp_header_dir:       The path to the header file <omp.h> of Intel(R) OpenMP*

+		      Runtime Library.  This option is required if either host

+		      or target compiler is other than icc.

+

+os_host:	      Operating system on host.  Currently supports only

+		      "linux" which is set by default.

+

+os_target:	      Operating system on target device.  Currently supports

+		      only "linux" which is set by default.

+

+compiler_host:	      Which compiler to use for the build of the host part.

+		      Defaults to "gcc"*.  Also supports "icc" and "clang"*.

+		      You should provide the full path to the compiler or it

+		      should be in the user's path.

+

+compiler_host:	      Which compiler to use for the build of the target part.

+		      Defaults to "gcc"*.  Also supports "icc" and "clang"*.

+		      You should provide the full path to the compiler or it

+		      should be in the user's path.

+

+options_host:	      Additional options for the host compiler.

+

+options_target:       Additional options for the target compiler.

+

+To use any of the options above, simple add <option_name>=<value>.  For

+example, if you want to build with icc instead of gcc, type:

+

+make compiler_host=icc compiler_target=icc

+

+

+Supported RTL Build Configurations

+==================================

+

+Supported Architectures: Intel(R) 64, and Intel(R) Many Integrated

+Core Architecture

+

+	      ---------------------------------------------

+	      |   icc/icl     |    gcc      |    clang    |

+--------------|---------------|---------------------------|

+| Linux* OS   |      Yes      |     Yes(1)  |     Yes(1)  |

+| OS X*       |       No      |      No     |      No     |

+| Windows* OS |       No      |      No     |      No     |

+-----------------------------------------------------------

+

+(1) Liboffload requires _rdtsc intrinsic, which may be unsupported by some

+    versions of compiler.  In this case you need to include src/rdtsc.h

+    manually by using Makefile options options_host and options_target:

+

+    make options_host="-include src/rdtsc.h" options_target="-include src/rdtsc.h"

+

+-----------------------------------------------------------------------

+

+Notices

+=======

+

+*Other names and brands may be claimed as the property of others.

diff --git a/final/offload/doc/Reference.pdf b/final/offload/doc/Reference.pdf
new file mode 100644
index 0000000..b9176f0
--- /dev/null
+++ b/final/offload/doc/Reference.pdf
Binary files differ
diff --git a/final/offload/doc/doxygen/config b/final/offload/doc/doxygen/config
new file mode 100755
index 0000000..d45b696
--- /dev/null
+++ b/final/offload/doc/doxygen/config
@@ -0,0 +1,2328 @@
+# Doxyfile 1.8.6
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all text
+# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
+# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
+# for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = "Intel&reg;&nbsp;Offload Runtime Library"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER         = 
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          = 
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is included in
+# the documentation. The maximum height of the logo should not exceed 55 pixels
+# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo
+# to the output directory.
+
+PROJECT_LOGO           = 
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = doc/doxygen/generated
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        = src/
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    = src/
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a
+# new page for each member. If set to NO, the documentation of a member will be
+# part of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines.
+
+ALIASES                = 
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST              = 
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
+# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
+# (default is Fortran), use: inc=Fortran f=C.
+#
+# Note For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING      = 
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by by putting a % sign in front of the word
+# or globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = YES
+
+# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = YES
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO these classes will be included in the various overviews. This option has
+# no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# (class|struct|union) declarations. If set to NO these declarations will be
+# included in the documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the
+# todo list. This list is created by putting \todo commands in the
+# documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the
+# test list. This list is created by putting \test commands in the
+# documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       = 
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES the list
+# will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    = 
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            = 
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. Do not use file names with spaces, bibtex cannot handle them. See
+# also \cite for info how to create references.
+
+CITE_BIB_FILES         = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO doxygen will only warn about wrong or incomplete parameter
+# documentation, but not about the absence of documentation.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE           = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces.
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = src
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: http://www.gnu.org/software/libiconv) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank the
+# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii,
+# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp,
+# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown,
+# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
+# *.qsf, *.as and *.js.
+
+FILE_PATTERNS          = *.c *.h *.cpp *.f90
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                = src/imported src/rdtsc.h
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       = 
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        = 
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           = 
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             = 
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+
+INPUT_FILTER           = 
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS        = 
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER ) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS = 
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# function all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES, then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see http://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+# If the CLANG_ASSISTED_PARSING tag is set to YES, then doxygen will use the
+# clang parser (see: http://clang.llvm.org/) for more acurate parsing at the
+# cost of reduced performance. This can be particularly helpful with template
+# rich C++ code for which doxygen's built-in parser lacks the necessary type
+# information.
+# Note: The availability of this option depends on whether or not doxygen was
+# compiled with the --with-libclang option.
+# The default value is: NO.
+
+CLANG_ASSISTED_PARSING = NO
+
+# If clang assisted parsing is enabled you can provide the compiler with command
+# line options that you would normally use when invoking the compiler. Note that
+# the include paths will already be set by doxygen for the files and directories
+# specified with INPUT and INCLUDE_PATH.
+# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
+
+CLANG_OPTIONS          = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            = 
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            = 
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        = 
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user-
+# defined cascading style sheet that is included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefor more robust against future updates.
+# Doxygen will copy the style sheet file to the output directory. For an example
+# see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  = 
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       = 
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the stylesheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: http://developer.apple.com/tools/xcode/), introduced with
+# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               = 
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler ( hhc.exe). If non-empty
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           = 
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated (
+# YES) or that it should be included in the master .chm file ( NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     = 
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated (
+# YES) or a normal table of contents ( NO) in the .chm file.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               = 
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
+# folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   = 
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  = 
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  = 
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           = 
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# http://www.mathjax.org) which uses client side Javascript for the rendering
+# instead of using prerendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from http://www.mathjax.org before deployment.
+# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     = 
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       = 
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript. There
+# are two flavours of web server based searching depending on the
+# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for
+# searching and an index file used by the script. When EXTERNAL_SEARCH is
+# enabled the indexing and searching needs to be provided by external tools. See
+# the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer ( doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer ( doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: http://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       = 
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     = 
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when enabling USE_PDFLATEX this option is only used for generating
+# bitmaps for formulas in the HTML output, but not in the Makefile that is
+# written to the output directory.
+# The default file is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. To get the times font for
+# instance you can specify
+# EXTRA_PACKAGES=times
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         = 
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber. Doxygen will
+# replace them by respectively the title of the page, the current date and time,
+# only the current date, the version number of doxygen, the project name (see
+# PROJECT_NAME), or the project number (see PROJECT_NUMBER).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           = doc/doxygen/header.tex
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           = 
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      = 
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the LATEX_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE      = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's config
+# file, i.e. a series of assignments. You only have to provide replacements,
+# missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    = 
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's config file. A template extensions file can be generated
+# using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify a XML schema, which can be used by a
+# validating XML parser to check the syntax of the XML files.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_SCHEMA             = 
+
+# The XML_DTD tag can be used to specify a XML DTD, which can be used by a
+# validating XML parser to check the syntax of the XML files.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_DTD                = 
+
+# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen
+# Definitions (see http://autogen.sf.net) file that captures the structure of
+# the code including all documentation. Note that this feature is still
+# experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names
+# in the source code. If set to NO only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = YES
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = YES
+
+# If the SEARCH_INCLUDES tag is set to YES the includes files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           = 
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  = 
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             = COI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      = 
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have an
+# all uppercase name, and do not end with a semicolon. Such function macros are
+# typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have an unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               = 
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       = 
+
+# If the ALLEXTERNALS tag is set to YES all external class will be listed in the
+# class index. If set to NO only the inherited external classes will be listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in
+# the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of 'which perl').
+# The default file (with absolute path) is: /usr/bin/perl.
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see:
+# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            = 
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               = 
+
+# If set to YES, the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# When you want a differently looking font n the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           = 
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot.
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif and svg.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               = 
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           = 
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           = 
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           = 
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP            = YES
diff --git a/final/offload/doc/doxygen/header.tex b/final/offload/doc/doxygen/header.tex
new file mode 100755
index 0000000..5e963c2
--- /dev/null
+++ b/final/offload/doc/doxygen/header.tex
@@ -0,0 +1,90 @@
+% Latex header for doxygen 1.8.3.1

+\documentclass{book}

+\usepackage[a4paper,top=2.5cm,bottom=2.5cm,left=2.5cm,right=2.5cm]{geometry}

+\usepackage{makeidx}

+\usepackage{natbib}

+\usepackage{graphicx}

+\usepackage{multicol}

+\usepackage{float}

+\usepackage{listings}

+\usepackage{color}

+\usepackage{ifthen}

+\usepackage[table]{xcolor}

+\usepackage{textcomp}

+\usepackage{alltt}

+\usepackage{ifpdf}

+\ifpdf

+\usepackage[pdftex,

+            pagebackref=true,

+            colorlinks=true,

+            linkcolor=blue,

+            unicode

+           ]{hyperref}

+\else

+\usepackage[ps2pdf,

+            pagebackref=true,

+            colorlinks=true,

+            linkcolor=blue,

+            unicode

+           ]{hyperref}

+\usepackage{pspicture}

+\fi

+\usepackage[utf8]{inputenc}

+\usepackage{mathptmx}

+\usepackage[scaled=.90]{helvet}

+\usepackage{courier}

+\usepackage{sectsty}

+\usepackage{amssymb}

+\usepackage[titles]{tocloft}

+\usepackage{doxygen}

+\usepackage{fancyhdr}

+\pagestyle{fancy}

+\lstset{language=C++,inputencoding=utf8,basicstyle=\footnotesize,breaklines=true,breakatwhitespace=true,tabsize=4,numbers=left }

+\makeindex

+\setcounter{tocdepth}{3}

+\renewcommand{\footrulewidth}{0.4pt}

+\renewcommand{\familydefault}{\sfdefault}

+\hfuzz=15pt

+\setlength{\emergencystretch}{15pt}

+\hbadness=750

+\tolerance=750

+\begin{document}

+\hypersetup{pageanchor=false,citecolor=blue}

+\begin{titlepage}

+\vspace*{7cm}

+\begin{center}

+{\Large Intel\textsuperscript{\textregistered} Offload Runtime Library }\\

+\vspace*{1cm}

+{\large Generated by Doxygen $doxygenversion }\\

+\vspace*{0.5cm}

+{\small $datetime }\\

+\end{center}

+\end{titlepage}

+

+{\bf FTC Optimization Notice}

+

+Intel's compilers may or may not optimize to the same degree for non-Intel microprocessors for

+optimizations that are not unique to Intel microprocessors. These optimizations include SSE2,

+SSE3, and SSSE3 instruction sets and other optimizations. Intel does not guarantee the

+availability, functionality, or effectiveness of any optimization on microprocessors not

+manufactured by Intel.

+

+Microprocessor-dependent optimizations in this product are intended for use with Intel

+microprocessors. Certain optimizations not specific to Intel microarchitecture are reserved for

+Intel microprocessors. Please refer to the applicable product User and Reference Guides for

+more information regarding the specific instruction sets covered by this notice.

+

+Notice revision \#20110804

+

+\vspace*{0.5cm}

+

+{\bf Trademarks}

+

+Intel, Xeon, and Intel Xeon Phi are trademarks of Intel Corporation in the U.S. and/or other countries.

+

+This document is Copyright \textcopyright 2014, Intel Corporation. All rights reserved. 

+

+\pagenumbering{roman}

+\tableofcontents

+\pagenumbering{arabic}

+\hypersetup{pageanchor=true,citecolor=blue}

diff --git a/final/offload/src/cean_util.cpp b/final/offload/src/cean_util.cpp
new file mode 100644
index 0000000..fe1890b
--- /dev/null
+++ b/final/offload/src/cean_util.cpp
@@ -0,0 +1,344 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "cean_util.h"
+#include "offload_common.h"
+
+// 1. allocate element of CeanReadRanges type
+// 2. initialized it for reading consequently contiguous ranges
+//    described by "ap" argument
+CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap)
+{
+    CeanReadRanges * res;
+
+    // find the max contiguous range
+    int64_t rank = ap->rank - 1;
+    int64_t length = ap->dim[rank].size;
+    for (; rank >= 0; rank--) {
+        if (ap->dim[rank].stride == 1) {
+            length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
+            if (rank > 0 && length != ap->dim[rank - 1].size) {
+                break;
+            }
+        }
+        else {
+            break;
+        }
+    }
+
+    res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) +
+                                  (ap->rank - rank) * sizeof(CeanReadDim));
+    res->current_number = 0;
+    res->range_size = length;
+    res->last_noncont_ind = rank;
+
+    // calculate number of contiguous ranges inside noncontiguous dimensions
+    int count = 1;
+    bool prev_is_cont = true;
+    int64_t offset = 0;
+
+    for (; rank >= 0; rank--) {
+        res->Dim[rank].count = count;
+        res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size;
+        count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 :
+            (ap->dim[rank].upper - ap->dim[rank].lower +
+            ap->dim[rank].stride) / ap->dim[rank].stride);
+        prev_is_cont = false;
+        offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) *
+                 ap->dim[rank].size;
+    }
+    res->range_max_number = count;
+    res -> ptr = (void*)ap->base;
+    res -> init_offset = offset;
+    return res;
+}
+
+// check if ranges described by 1 argument could be transferred into ranges
+// described by 2-nd one
+bool cean_ranges_match(
+    CeanReadRanges * read_rng1,
+    CeanReadRanges * read_rng2
+)
+{
+    return ( read_rng1 == NULL || read_rng2 == NULL ||
+            (read_rng1->range_size % read_rng2->range_size == 0 ||
+            read_rng2->range_size % read_rng1->range_size == 0));
+}
+
+// Set next offset and length and returns true for next range.
+// Returns false if the ranges are over.
+bool get_next_range(
+    CeanReadRanges * read_rng,
+    int64_t *offset
+)
+{
+    if (++read_rng->current_number > read_rng->range_max_number) {
+        read_rng->current_number = 0;
+        return false;
+    }
+    int rank = 0;
+    int num = read_rng->current_number - 1;
+    int64_t cur_offset = 0;
+    int num_loc;
+    for (; rank <= read_rng->last_noncont_ind; rank++) {
+        num_loc = num / read_rng->Dim[rank].count;
+        cur_offset += num_loc * read_rng->Dim[rank].size;
+        num = num % read_rng->Dim[rank].count;
+    }
+    *offset = cur_offset + read_rng->init_offset;
+    return true;
+}
+
+bool is_arr_desc_contiguous(const arr_desc *ap)
+{
+    int64_t rank = ap->rank - 1;
+    int64_t length = ap->dim[rank].size;
+    for (; rank >= 0; rank--) {
+        if (ap->dim[rank].stride > 1 &&
+            ap->dim[rank].upper - ap->dim[rank].lower != 0) {
+                return false;
+        }
+        else if (length != ap->dim[rank].size) {
+            for (; rank >= 0; rank--) {
+                if (ap->dim[rank].upper - ap->dim[rank].lower != 0) {
+                    return false;
+                }
+            }
+            return true;
+        }
+        length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
+    }
+    return true;
+}
+
+int64_t cean_get_transf_size(CeanReadRanges * read_rng)
+{
+    return(read_rng->range_max_number * read_rng->range_size);
+}
+
+static uint64_t last_left, last_right;
+typedef void (*fpp)(const char *spaces, uint64_t low, uint64_t high, int esize);
+
+static void generate_one_range(
+    const char *spaces,
+    uint64_t lrange,
+    uint64_t rrange,
+    fpp fp,
+    int esize
+)
+{
+    OFFLOAD_TRACE(3,
+        "%s    generate_one_range(lrange=%p, rrange=%p, esize=%d)\n",
+        spaces, (void*)lrange, (void*)rrange, esize);
+    if (last_left == -1) {
+        // First range
+        last_left = lrange;
+    }
+    else {
+        if (lrange == last_right+1) {
+            // Extend previous range, don't print
+        }
+        else {
+            (*fp)(spaces, last_left, last_right, esize);
+            last_left = lrange;
+        }
+    }
+    last_right = rrange;
+}
+
+static void generate_mem_ranges_one_rank(
+    const char *spaces,
+    uint64_t base,
+    uint64_t rank,
+    const struct dim_desc *ddp,
+    fpp fp,
+    int esize
+)
+{
+    uint64_t lindex = ddp->lindex;
+    uint64_t lower = ddp->lower;
+    uint64_t upper = ddp->upper;
+    uint64_t stride = ddp->stride;
+    uint64_t size = ddp->size;
+    OFFLOAD_TRACE(3,
+        "%s    "
+        "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
+        "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
+        spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize);
+    if (rank == 1) {
+        uint64_t lrange, rrange;
+        if (stride == 1) {
+            lrange = base + (lower-lindex)*size;
+            rrange = lrange + (upper-lower+1)*size - 1;
+            generate_one_range(spaces, lrange, rrange, fp, esize);
+        }
+        else {
+            for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
+                lrange = base + i*size;
+                rrange = lrange + size - 1;
+                generate_one_range(spaces, lrange, rrange, fp, esize);
+            }
+        }
+    }
+    else {
+        for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
+            generate_mem_ranges_one_rank(
+                spaces, base+i*size, rank-1, ddp+1, fp, esize);
+
+        }
+    }
+}
+
+static void generate_mem_ranges(
+    const char *spaces,
+    const arr_desc *adp,
+    bool deref,
+    fpp fp
+)
+{
+    uint64_t esize;
+
+    OFFLOAD_TRACE(3,
+        "%s    "
+        "generate_mem_ranges(adp=%p, deref=%d, fp)\n",
+        spaces, adp, deref);
+    last_left = -1;
+    last_right = -2;
+
+    // Element size is derived from last dimension
+    esize = adp->dim[adp->rank-1].size;
+
+    generate_mem_ranges_one_rank(
+        // For c_cean_var the base addr is the address of the data
+        // For c_cean_var_ptr the base addr is dereferenced to get to the data
+        spaces, deref ? *((uint64_t*)(adp->base)) : adp->base,
+        adp->rank, &adp->dim[0], fp, esize);
+    (*fp)(spaces, last_left, last_right, esize);
+}
+
+// returns offset and length of the data to be transferred
+void __arr_data_offset_and_length(
+    const arr_desc *adp,
+    int64_t &offset,
+    int64_t &length
+)
+{
+    int64_t rank = adp->rank - 1;
+    int64_t size = adp->dim[rank].size;
+    int64_t r_off = 0; // offset from right boundary
+
+    // find the rightmost dimension which takes just part of its
+    // range. We define it if the size of left rank is not equal
+    // the range's length between upper and lower boungaries
+    while (rank > 0) {
+        size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1);
+        if (size != adp->dim[rank - 1].size) {
+            break;
+        }
+        rank--;
+    }
+
+    offset = (adp->dim[rank].lower - adp->dim[rank].lindex) *
+             adp->dim[rank].size;
+
+    // find gaps both from the left - offset and from the right - r_off
+    for (rank--; rank >= 0; rank--) {
+        offset += (adp->dim[rank].lower - adp->dim[rank].lindex) *
+                  adp->dim[rank].size;
+        r_off += adp->dim[rank].size -
+                 (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) *
+                 adp->dim[rank + 1].size;
+    }
+    length = (adp->dim[0].upper - adp->dim[0].lindex + 1) *
+             adp->dim[0].size - offset - r_off;
+}
+
+#if OFFLOAD_DEBUG > 0
+
+void print_range(
+    const char *spaces,
+    uint64_t low,
+    uint64_t high,
+    int esize
+)
+{
+    char buffer[1024];
+    char number[32];
+
+    OFFLOAD_TRACE(3, "%s        print_range(low=%p, high=%p, esize=%d)\n",
+        spaces, (void*)low, (void*)high, esize);
+
+    if (console_enabled < 4) {
+        return;
+    }
+    OFFLOAD_TRACE(4, "%s            values:\n", spaces);
+    int count = 0;
+    buffer[0] = '\0';
+    while (low <= high)
+    {
+        switch (esize)
+        {
+        case 1:
+            sprintf(number, "%d ", *((char *)low));
+            low += 1;
+            break;
+        case 2:
+            sprintf(number, "%d ", *((short *)low));
+            low += 2;
+            break;
+        case 4:
+            sprintf(number, "%d ", *((int *)low));
+            low += 4;
+            break;
+        default:
+            sprintf(number, "0x%016x ", *((uint64_t *)low));
+            low += 8;
+            break;
+        }
+        strcat(buffer, number);
+        count++;
+        if (count == 10) {
+            OFFLOAD_TRACE(4, "%s            %s\n", spaces, buffer);
+            count = 0;
+            buffer[0] = '\0';
+        }
+    }
+    if (count != 0) {
+        OFFLOAD_TRACE(4, "%s            %s\n", spaces, buffer);
+    }
+}
+
+void __arr_desc_dump(
+    const char *spaces,
+    const char *name,
+    const arr_desc *adp,
+    bool deref
+)
+{
+    OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp);
+
+    if (adp != 0) {
+        OFFLOAD_TRACE(2, "%s    base=%llx, rank=%lld\n",
+            spaces, adp->base, adp->rank);
+
+        for (int i = 0; i < adp->rank; i++) {
+            OFFLOAD_TRACE(2,
+                          "%s    dimension %d: size=%lld, lindex=%lld, "
+                          "lower=%lld, upper=%lld, stride=%lld\n",
+                          spaces, i, adp->dim[i].size, adp->dim[i].lindex,
+                          adp->dim[i].lower, adp->dim[i].upper,
+                          adp->dim[i].stride);
+        }
+        // For c_cean_var the base addr is the address of the data
+        // For c_cean_var_ptr the base addr is dereferenced to get to the data
+        generate_mem_ranges(spaces, adp, deref, &print_range);
+    }
+}
+#endif // OFFLOAD_DEBUG
diff --git a/final/offload/src/cean_util.h b/final/offload/src/cean_util.h
new file mode 100644
index 0000000..d0debcc
--- /dev/null
+++ b/final/offload/src/cean_util.h
@@ -0,0 +1,101 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef CEAN_UTIL_H_INCLUDED
+#define CEAN_UTIL_H_INCLUDED
+
+#if MPSS_VERSION > 33
+#include <source/COIBuffer_source.h>
+#endif
+#include <stdint.h>
+
+#if MPSS_VERSION <= 33
+// CEAN expression representation
+struct dim_desc {
+    int64_t size;       // Length of data type
+    int64_t lindex;     // Lower index
+    int64_t lower;      // Lower section bound
+    int64_t upper;      // Upper section bound
+    int64_t stride;     // Stride
+};
+
+struct arr_desc {
+    int64_t base;       // Base address
+    int64_t rank;       // Rank of array
+    dim_desc dim[1];
+};
+#endif
+
+struct CeanReadDim {
+    int64_t count; // The number of elements in this dimension
+    int64_t size;  // The number of bytes between successive
+                   // elements in this dimension.
+};
+
+struct CeanReadRanges {
+    void *  ptr;
+    int64_t current_number;   // the number of ranges read
+    int64_t range_max_number; // number of contiguous ranges
+    int64_t range_size;       // size of max contiguous range
+    int     last_noncont_ind; // size of Dim array
+    int64_t init_offset;      // offset of 1-st element from array left bound
+    CeanReadDim Dim[1];
+};
+
+// array descriptor length
+#define __arr_desc_length(rank) \
+    (sizeof(int64_t) + sizeof(dim_desc) * (rank))
+
+// returns offset and length of the data to be transferred
+void __arr_data_offset_and_length(const arr_desc *adp,
+                                  int64_t &offset,
+                                  int64_t &length);
+
+// define if data array described by argument is contiguous one
+bool is_arr_desc_contiguous(const arr_desc *ap);
+
+// allocate element of CeanReadRanges type initialized
+// to read consequently contiguous ranges described by "ap" argument
+CeanReadRanges * init_read_ranges_arr_desc(const arr_desc *ap);
+
+// check if ranges described by 1 argument could be transferred into ranges
+// described by 2-nd one
+bool cean_ranges_match(
+    CeanReadRanges * read_rng1,
+    CeanReadRanges * read_rng2
+);
+
+// first argument - returned value by call to init_read_ranges_arr_desc.
+// returns true if offset and length of next range is set successfuly.
+// returns false if the ranges is over.
+bool get_next_range(
+    CeanReadRanges * read_rng,
+    int64_t *offset
+);
+
+// returns number of transferred bytes
+int64_t cean_get_transf_size(CeanReadRanges * read_rng);
+
+#if OFFLOAD_DEBUG > 0
+// prints array descriptor contents to stderr
+void    __arr_desc_dump(
+    const char *spaces,
+    const char *name,
+    const arr_desc *adp,
+    bool dereference);
+#else
+#define __arr_desc_dump(
+    spaces,
+    name,
+    adp,
+    dereference)
+#endif // OFFLOAD_DEBUG
+
+#endif // CEAN_UTIL_H_INCLUDED
diff --git a/final/offload/src/coi/coi_client.cpp b/final/offload/src/coi/coi_client.cpp
new file mode 100644
index 0000000..ab8c7f5
--- /dev/null
+++ b/final/offload/src/coi/coi_client.cpp
@@ -0,0 +1,350 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// The COI host interface
+
+#include "coi_client.h"
+#include "../offload_common.h"
+
+namespace COI {
+
+#define COI_VERSION1    "COI_1.0"
+#define COI_VERSION2    "COI_2.0"
+
+bool            is_available;
+static void*    lib_handle;
+
+// pointers to functions from COI library
+COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*);
+COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*);
+
+COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*, const void*,
+                                     uint64_t, int, const char**, uint8_t,
+                                     const char**, uint8_t, const char*,
+                                     uint64_t, const char*, const char*,
+                                     uint64_t, COIPROCESS*);
+COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t, int8_t*, uint32_t*);
+COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t, const char**,
+                                       COIFUNCTION*);
+COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS, const void*, uint64_t,
+                                          const char*, const char*,
+                                          const char*, uint64_t, uint32_t,
+                                          COILIBRARY*);
+COIRESULT (*ProcessRegisterLibraries)(uint32_t, const void**, const uint64_t*,
+                                      const char**, const uint64_t*);
+
+COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t, COIPIPELINE*);
+COIRESULT (*PipelineDestroy)(COIPIPELINE);
+COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION, uint32_t,
+                                 const COIBUFFER*, const COI_ACCESS_FLAGS*,
+                                 uint32_t, const COIEVENT*, const void*,
+                                 uint16_t, void*, uint16_t, COIEVENT*);
+
+COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t, const void*,
+                          uint32_t, const COIPROCESS*, COIBUFFER*);
+COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE, uint32_t,
+                                    void*, uint32_t, const COIPROCESS*,
+                                    COIBUFFER*);
+COIRESULT (*BufferDestroy)(COIBUFFER);
+COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t, COI_MAP_TYPE, uint32_t,
+                       const COIEVENT*, COIEVENT*, COIMAPINSTANCE*, void**);
+COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*, uint64_t,
+                         COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t, COI_COPY_TYPE,
+                        uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t,
+                        COI_COPY_TYPE, uint32_t, const COIEVENT*, COIEVENT*);
+COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
+COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
+                            COI_BUFFER_MOVE_FLAG, uint32_t,
+                            const   COIEVENT*, COIEVENT*);
+
+COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t, uint8_t, uint32_t*,
+                       uint32_t*);
+
+uint64_t  (*PerfGetCycleFrequency)(void);
+
+bool init(void)
+{
+#ifndef TARGET_WINNT
+    const char *lib_name = "libcoi_host.so.0";
+#else // TARGET_WINNT
+    const char *lib_name = "coi_host.dll";
+#endif // TARGET_WINNT
+
+    OFFLOAD_DEBUG_TRACE(2, "Loading COI library %s ...\n", lib_name);
+    lib_handle = DL_open(lib_name);
+    if (lib_handle == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to load the library\n");
+        return false;
+    }
+
+    EngineGetCount =
+        (COIRESULT (*)(COI_ISA_TYPE, uint32_t*))
+            DL_sym(lib_handle, "COIEngineGetCount", COI_VERSION1);
+    if (EngineGetCount == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIEngineGetCount");
+        fini();
+        return false;
+    }
+
+    EngineGetHandle =
+        (COIRESULT (*)(COI_ISA_TYPE, uint32_t, COIENGINE*))
+            DL_sym(lib_handle, "COIEngineGetHandle", COI_VERSION1);
+    if (EngineGetHandle == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIEngineGetHandle");
+        fini();
+        return false;
+    }
+
+    ProcessCreateFromMemory =
+        (COIRESULT (*)(COIENGINE, const char*, const void*, uint64_t, int,
+                       const char**, uint8_t, const char**, uint8_t,
+                       const char*, uint64_t, const char*, const char*,
+                       uint64_t, COIPROCESS*))
+            DL_sym(lib_handle, "COIProcessCreateFromMemory", COI_VERSION1);
+    if (ProcessCreateFromMemory == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIProcessCreateFromMemory");
+        fini();
+        return false;
+    }
+
+    ProcessDestroy =
+        (COIRESULT (*)(COIPROCESS, int32_t, uint8_t, int8_t*,
+                       uint32_t*))
+            DL_sym(lib_handle, "COIProcessDestroy", COI_VERSION1);
+    if (ProcessDestroy == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIProcessDestroy");
+        fini();
+        return false;
+    }
+
+    ProcessGetFunctionHandles =
+        (COIRESULT (*)(COIPROCESS, uint32_t, const char**, COIFUNCTION*))
+            DL_sym(lib_handle, "COIProcessGetFunctionHandles", COI_VERSION1);
+    if (ProcessGetFunctionHandles == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIProcessGetFunctionHandles");
+        fini();
+        return false;
+    }
+
+    ProcessLoadLibraryFromMemory =
+        (COIRESULT (*)(COIPROCESS, const void*, uint64_t, const char*,
+                       const char*, const char*, uint64_t, uint32_t,
+                       COILIBRARY*))
+            DL_sym(lib_handle, "COIProcessLoadLibraryFromMemory", COI_VERSION2);
+    if (ProcessLoadLibraryFromMemory == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIProcessLoadLibraryFromMemory");
+        fini();
+        return false;
+    }
+
+    ProcessRegisterLibraries =
+        (COIRESULT (*)(uint32_t, const void**, const uint64_t*, const char**,
+                       const uint64_t*))
+            DL_sym(lib_handle, "COIProcessRegisterLibraries", COI_VERSION1);
+    if (ProcessRegisterLibraries == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIProcessRegisterLibraries");
+        fini();
+        return false;
+    }
+
+    PipelineCreate =
+        (COIRESULT (*)(COIPROCESS, COI_CPU_MASK, uint32_t, COIPIPELINE*))
+            DL_sym(lib_handle, "COIPipelineCreate", COI_VERSION1);
+    if (PipelineCreate == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIPipelineCreate");
+        fini();
+        return false;
+    }
+
+    PipelineDestroy =
+        (COIRESULT (*)(COIPIPELINE))
+            DL_sym(lib_handle, "COIPipelineDestroy", COI_VERSION1);
+    if (PipelineDestroy == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIPipelineDestroy");
+        fini();
+        return false;
+    }
+
+    PipelineRunFunction =
+        (COIRESULT (*)(COIPIPELINE, COIFUNCTION, uint32_t, const COIBUFFER*,
+                       const COI_ACCESS_FLAGS*, uint32_t, const COIEVENT*,
+                       const void*, uint16_t, void*, uint16_t, COIEVENT*))
+            DL_sym(lib_handle, "COIPipelineRunFunction", COI_VERSION1);
+    if (PipelineRunFunction == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIPipelineRunFunction");
+        fini();
+        return false;
+    }
+
+    BufferCreate =
+        (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, const void*,
+                       uint32_t, const COIPROCESS*, COIBUFFER*))
+            DL_sym(lib_handle, "COIBufferCreate", COI_VERSION1);
+    if (BufferCreate == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferCreate");
+        fini();
+        return false;
+    }
+
+    BufferCreateFromMemory =
+        (COIRESULT (*)(uint64_t, COI_BUFFER_TYPE, uint32_t, void*,
+                       uint32_t, const COIPROCESS*, COIBUFFER*))
+            DL_sym(lib_handle, "COIBufferCreateFromMemory", COI_VERSION1);
+    if (BufferCreateFromMemory == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferCreateFromMemory");
+        fini();
+        return false;
+    }
+
+    BufferDestroy =
+        (COIRESULT (*)(COIBUFFER))
+            DL_sym(lib_handle, "COIBufferDestroy", COI_VERSION1);
+    if (BufferDestroy == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferDestroy");
+        fini();
+        return false;
+    }
+
+    BufferMap =
+        (COIRESULT (*)(COIBUFFER, uint64_t, uint64_t, COI_MAP_TYPE, uint32_t,
+                       const COIEVENT*, COIEVENT*, COIMAPINSTANCE*,
+                       void**))
+            DL_sym(lib_handle, "COIBufferMap", COI_VERSION1);
+    if (BufferMap == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferMap");
+        fini();
+        return false;
+    }
+
+    BufferUnmap =
+        (COIRESULT (*)(COIMAPINSTANCE, uint32_t, const COIEVENT*,
+                       COIEVENT*))
+            DL_sym(lib_handle, "COIBufferUnmap", COI_VERSION1);
+    if (BufferUnmap == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferUnmap");
+        fini();
+        return false;
+    }
+
+    BufferWrite =
+        (COIRESULT (*)(COIBUFFER, uint64_t, const void*, uint64_t,
+                       COI_COPY_TYPE, uint32_t, const COIEVENT*,
+                       COIEVENT*))
+            DL_sym(lib_handle, "COIBufferWrite", COI_VERSION1);
+    if (BufferWrite == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferWrite");
+        fini();
+        return false;
+    }
+
+    BufferRead =
+        (COIRESULT (*)(COIBUFFER, uint64_t, void*, uint64_t,
+                                     COI_COPY_TYPE, uint32_t,
+                                     const COIEVENT*, COIEVENT*))
+            DL_sym(lib_handle, "COIBufferRead", COI_VERSION1);
+    if (BufferRead == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferRead");
+        fini();
+        return false;
+    }
+
+    BufferCopy =
+        (COIRESULT (*)(COIBUFFER, COIBUFFER, uint64_t, uint64_t, uint64_t,
+                       COI_COPY_TYPE, uint32_t, const COIEVENT*,
+                       COIEVENT*))
+            DL_sym(lib_handle, "COIBufferCopy", COI_VERSION1);
+    if (BufferCopy == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferCopy");
+        fini();
+        return false;
+    }
+
+    BufferGetSinkAddress =
+        (COIRESULT (*)(COIBUFFER, uint64_t*))
+            DL_sym(lib_handle, "COIBufferGetSinkAddress", COI_VERSION1);
+    if (BufferGetSinkAddress == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferGetSinkAddress");
+        fini();
+        return false;
+    }
+
+    BufferSetState =
+        (COIRESULT(*)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
+                      COI_BUFFER_MOVE_FLAG, uint32_t, const COIEVENT*,
+                      COIEVENT*))
+            DL_sym(lib_handle, "COIBufferSetState", COI_VERSION1);
+    if (BufferSetState == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIBufferSetState");
+        fini();
+        return false;
+    }
+
+    EventWait =
+        (COIRESULT (*)(uint16_t, const COIEVENT*, int32_t, uint8_t,
+                       uint32_t*, uint32_t*))
+            DL_sym(lib_handle, "COIEventWait", COI_VERSION1);
+    if (EventWait == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIEventWait");
+        fini();
+        return false;
+    }
+
+    PerfGetCycleFrequency =
+        (uint64_t (*)(void))
+            DL_sym(lib_handle, "COIPerfGetCycleFrequency", COI_VERSION1);
+    if (PerfGetCycleFrequency == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in COI library\n",
+                            "COIPerfGetCycleFrequency");
+        fini();
+        return false;
+    }
+
+    is_available = true;
+
+    return true;
+}
+
+void fini(void)
+{
+    is_available = false;
+
+    if (lib_handle != 0) {
+#ifndef TARGET_WINNT
+        DL_close(lib_handle);
+#endif // TARGET_WINNT
+        lib_handle = 0;
+    }
+}
+
+} // namespace COI
diff --git a/final/offload/src/coi/coi_client.h b/final/offload/src/coi/coi_client.h
new file mode 100644
index 0000000..4775a8b
--- /dev/null
+++ b/final/offload/src/coi/coi_client.h
@@ -0,0 +1,118 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// The interface between offload library and the COI API on the host
+
+#ifndef COI_CLIENT_H_INCLUDED
+#define COI_CLIENT_H_INCLUDED
+
+#include <common/COIPerf_common.h>
+#include <source/COIEngine_source.h>
+#include <source/COIProcess_source.h>
+#include <source/COIPipeline_source.h>
+#include <source/COIBuffer_source.h>
+#include <source/COIEvent_source.h>
+
+#include <string.h>
+
+#include "../liboffload_error_codes.h"
+#include "../offload_util.h"
+
+#define MIC_ENGINES_MAX     128
+
+#if MIC_ENGINES_MAX < COI_MAX_ISA_MIC_DEVICES
+#error MIC_ENGINES_MAX need to be increased
+#endif
+
+// COI library interface
+namespace COI {
+
+extern bool init(void);
+extern void fini(void);
+
+extern bool is_available;
+
+// pointers to functions from COI library
+extern COIRESULT (*EngineGetCount)(COI_ISA_TYPE, uint32_t*);
+extern COIRESULT (*EngineGetHandle)(COI_ISA_TYPE, uint32_t, COIENGINE*);
+
+extern COIRESULT (*ProcessCreateFromMemory)(COIENGINE, const char*,
+                                           const void*, uint64_t, int,
+                                           const char**, uint8_t,
+                                           const char**, uint8_t,
+                                           const char*, uint64_t,
+                                           const char*,
+                                           const char*, uint64_t,
+                                           COIPROCESS*);
+extern COIRESULT (*ProcessDestroy)(COIPROCESS, int32_t, uint8_t,
+                                  int8_t*, uint32_t*);
+extern COIRESULT (*ProcessGetFunctionHandles)(COIPROCESS, uint32_t,
+                                             const char**,
+                                             COIFUNCTION*);
+extern COIRESULT (*ProcessLoadLibraryFromMemory)(COIPROCESS,
+                                                const void*,
+                                                uint64_t,
+                                                const char*,
+                                                const char*,
+                                                const char*,
+                                                uint64_t,
+                                                uint32_t,
+                                                COILIBRARY*);
+extern COIRESULT (*ProcessRegisterLibraries)(uint32_t,
+                                            const void**,
+                                            const uint64_t*,
+                                            const char**,
+                                            const uint64_t*);
+
+extern COIRESULT (*PipelineCreate)(COIPROCESS, COI_CPU_MASK, uint32_t,
+                                  COIPIPELINE*);
+extern COIRESULT (*PipelineDestroy)(COIPIPELINE);
+extern COIRESULT (*PipelineRunFunction)(COIPIPELINE, COIFUNCTION,
+                                       uint32_t, const COIBUFFER*,
+                                       const COI_ACCESS_FLAGS*,
+                                       uint32_t, const COIEVENT*,
+                                       const void*, uint16_t, void*,
+                                       uint16_t, COIEVENT*);
+
+extern COIRESULT (*BufferCreate)(uint64_t, COI_BUFFER_TYPE, uint32_t,
+                                const void*, uint32_t,
+                                const COIPROCESS*, COIBUFFER*);
+extern COIRESULT (*BufferCreateFromMemory)(uint64_t, COI_BUFFER_TYPE,
+                                          uint32_t, void*,
+                                          uint32_t, const COIPROCESS*,
+                                          COIBUFFER*);
+extern COIRESULT (*BufferDestroy)(COIBUFFER);
+extern COIRESULT (*BufferMap)(COIBUFFER, uint64_t, uint64_t,
+                             COI_MAP_TYPE, uint32_t, const COIEVENT*,
+                             COIEVENT*, COIMAPINSTANCE*, void**);
+extern COIRESULT (*BufferUnmap)(COIMAPINSTANCE, uint32_t,
+                               const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferWrite)(COIBUFFER, uint64_t, const void*,
+                               uint64_t, COI_COPY_TYPE, uint32_t,
+                               const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferRead)(COIBUFFER, uint64_t, void*, uint64_t,
+                              COI_COPY_TYPE, uint32_t,
+                              const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferCopy)(COIBUFFER, COIBUFFER, uint64_t, uint64_t,
+                              uint64_t, COI_COPY_TYPE, uint32_t,
+                              const COIEVENT*, COIEVENT*);
+extern COIRESULT (*BufferGetSinkAddress)(COIBUFFER, uint64_t*);
+extern COIRESULT (*BufferSetState)(COIBUFFER, COIPROCESS, COI_BUFFER_STATE,
+                                   COI_BUFFER_MOVE_FLAG, uint32_t,
+                                   const   COIEVENT*, COIEVENT*);
+
+extern COIRESULT (*EventWait)(uint16_t, const COIEVENT*, int32_t,
+                           uint8_t, uint32_t*, uint32_t*);
+
+extern uint64_t  (*PerfGetCycleFrequency)(void);
+
+} // namespace COI
+
+#endif // COI_CLIENT_H_INCLUDED
diff --git a/final/offload/src/coi/coi_server.cpp b/final/offload/src/coi/coi_server.cpp
new file mode 100644
index 0000000..73e6c2d
--- /dev/null
+++ b/final/offload/src/coi/coi_server.cpp
@@ -0,0 +1,130 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// The COI interface on the target
+
+#include "coi_server.h"
+
+#include "../offload_target.h"
+#include "../offload_timer.h"
+#ifdef MYO_SUPPORT
+#include "../offload_myo_target.h"      // for __offload_myoLibInit/Fini
+#endif // MYO_SUPPORT
+
+COINATIVELIBEXPORT
+void server_compute(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    OffloadDescriptor::offload(buffer_count, buffers,
+                               misc_data, misc_data_len,
+                               return_data, return_data_len);
+}
+
+COINATIVELIBEXPORT
+void server_init(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    struct init_data {
+        int  device_index;
+        int  devices_total;
+        int  console_level;
+        int  offload_report_level;
+    } *data = (struct init_data*) misc_data;
+
+    // set device index and number of total devices
+    mic_index = data->device_index;
+    mic_engines_total = data->devices_total;
+
+    // initialize trace level
+    console_enabled = data->console_level;
+    offload_report_level = data->offload_report_level;
+
+    // return back the process id
+    *((pid_t*) return_data) = getpid();
+}
+
+COINATIVELIBEXPORT
+void server_var_table_size(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    struct Params {
+        int64_t nelems;
+        int64_t length;
+    } *params;
+
+    params = static_cast<Params*>(return_data);
+    params->length = __offload_vars.table_size(params->nelems);
+}
+
+COINATIVELIBEXPORT
+void server_var_table_copy(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    __offload_vars.table_copy(buffers[0], *static_cast<int64_t*>(misc_data));
+}
+
+#ifdef MYO_SUPPORT
+// temporary workaround for blocking behavior of myoiLibInit/Fini calls
+COINATIVELIBEXPORT
+void server_myoinit(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    __offload_myoLibInit();
+}
+
+COINATIVELIBEXPORT
+void server_myofini(
+    uint32_t  buffer_count,
+    void**    buffers,
+    uint64_t* buffers_len,
+    void*     misc_data,
+    uint16_t  misc_data_len,
+    void*     return_data,
+    uint16_t  return_data_len
+)
+{
+    __offload_myoLibFini();
+}
+#endif // MYO_SUPPORT
diff --git a/final/offload/src/coi/coi_server.h b/final/offload/src/coi/coi_server.h
new file mode 100644
index 0000000..e744d9e
--- /dev/null
+++ b/final/offload/src/coi/coi_server.h
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//The interface between offload library and the COI API on the target.
+
+#ifndef COI_SERVER_H_INCLUDED
+#define COI_SERVER_H_INCLUDED
+
+#include <common/COIEngine_common.h>
+#include <common/COIPerf_common.h>
+#include <sink/COIProcess_sink.h>
+#include <sink/COIPipeline_sink.h>
+#include <sink/COIBuffer_sink.h>
+#include <list>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "../liboffload_error_codes.h"
+
+// wrappers for COI API
+#define PipelineStartExecutingRunFunctions() \
+    { \
+        COIRESULT res = COIPipelineStartExecutingRunFunctions(); \
+        if (res != COI_SUCCESS) { \
+            LIBOFFLOAD_ERROR(c_pipeline_start_run_funcs, mic_index, res); \
+            exit(1); \
+        } \
+    }
+
+#define ProcessWaitForShutdown() \
+    { \
+        COIRESULT res = COIProcessWaitForShutdown(); \
+        if (res != COI_SUCCESS) { \
+            LIBOFFLOAD_ERROR(c_process_wait_shutdown, mic_index, res); \
+            exit(1); \
+        } \
+    }
+
+#define BufferAddRef(buf) \
+    { \
+        COIRESULT res = COIBufferAddRef(buf); \
+        if (res != COI_SUCCESS) { \
+            LIBOFFLOAD_ERROR(c_buf_add_ref, mic_index, res); \
+            exit(1); \
+        } \
+    }
+
+#define BufferReleaseRef(buf) \
+    { \
+        COIRESULT res = COIBufferReleaseRef(buf); \
+        if (res != COI_SUCCESS) { \
+            LIBOFFLOAD_ERROR(c_buf_release_ref, mic_index, res); \
+            exit(1); \
+        } \
+    }
+
+#define EngineGetIndex(index) \
+    { \
+        COI_ISA_TYPE isa_type; \
+        COIRESULT res = COIEngineGetIndex(&isa_type, index); \
+        if (res != COI_SUCCESS) { \
+            LIBOFFLOAD_ERROR(c_get_engine_index, mic_index, res); \
+            exit(1); \
+        } \
+    }
+
+#endif // COI_SERVER_H_INCLUDED
diff --git a/final/offload/src/compiler_if_host.cpp b/final/offload/src/compiler_if_host.cpp
new file mode 100644
index 0000000..2bc430b
--- /dev/null
+++ b/final/offload/src/compiler_if_host.cpp
@@ -0,0 +1,323 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "compiler_if_host.h"
+
+#include <malloc.h>
+#ifndef TARGET_WINNT
+#include <alloca.h>
+#endif // TARGET_WINNT
+
+// Global counter on host. 
+// This variable is used if P2OPT_offload_do_data_persistence == 2.
+// The variable used to identify offload constructs contained in one procedure.
+// Increment of OFFLOAD_CALL_COUNT is inserted at entries of HOST routines with
+// offload constructs.
+static int offload_call_count = 0;
+
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
+    TARGET_TYPE      target_type,
+    int              target_number,
+    int              is_optional,
+    _Offload_status* status,
+    const char*      file,
+    uint64_t         line
+)
+{
+    bool retval;
+    OFFLOAD ofld;
+
+    // initialize status
+    if (status != 0) {
+        status->result = OFFLOAD_UNAVAILABLE;
+        status->device_number = -1;
+        status->data_sent = 0;
+        status->data_received = 0;
+    }
+
+    // make sure libray is initialized
+    retval = __offload_init_library();
+
+    // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
+    OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+
+    // initialize all devices is init_type is on_offload_all
+    if (retval && __offload_init_type == c_init_on_offload_all) {
+        for (int i = 0; i < mic_engines_total; i++) {
+             mic_engines[i].init();
+        }
+    }
+    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
+
+    if (target_type == TARGET_HOST) {
+        // Host always available
+        retval = true;
+    }
+    else if (target_type == TARGET_MIC) {
+        if (target_number >= -1) {
+            if (retval) {
+                if (target_number >= 0) {
+                    // User provided the device number
+                    target_number = target_number % mic_engines_total;
+                }
+                else {
+                    // use device 0
+                    target_number = 0;
+                }
+
+                // reserve device in ORSL
+                if (is_optional) {
+                    if (!ORSL::try_reserve(target_number)) {
+                        target_number = -1;
+                    }
+                }
+                else {
+                    if (!ORSL::reserve(target_number)) {
+                        target_number = -1;
+                    }
+                }
+
+                // initialize device
+                if (target_number >= 0 &&
+                    __offload_init_type == c_init_on_offload) {
+                    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+                    mic_engines[target_number].init();
+                    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+                }
+            }
+            else {
+                // fallback to CPU
+                target_number = -1;
+            }
+
+            if (target_number < 0 || !retval) {
+                if (!is_optional && status == 0) {
+                    LIBOFFLOAD_ERROR(c_device_is_not_available);
+                    exit(1);
+                }
+
+                retval = false;
+            }
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_device_number);
+            exit(1);
+        }
+    }
+
+    if (retval) {
+        ofld = new OffloadDescriptor(target_number, status,
+                                     !is_optional, false, timer_data);
+        OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
+        Offload_Report_Prolog(timer_data);
+        OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
+                              "Starting offload: target_type = %d, "
+                              "number = %d, is_optional = %d\n",
+                              target_type, target_number, is_optional);
+
+        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+    }
+    else {
+        ofld = NULL;
+
+        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+        OFFLOAD_TIMER_STOP(timer_data, c_offload_host_total_offload);
+        offload_report_free_data(timer_data);
+    }
+
+    return ofld;
+}
+
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
+    const int*  device_num,
+    const char* file,
+    uint64_t    line
+)
+{
+    int target_number;
+
+    // make sure libray is initialized and at least one device is available
+    if (!__offload_init_library()) {
+        LIBOFFLOAD_ERROR(c_device_is_not_available);
+        exit(1);
+    }
+
+    // OFFLOAD_TIMER_INIT must follow call to __offload_init_library
+
+    OffloadHostTimerData * timer_data = OFFLOAD_TIMER_INIT(file, line);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_total_offload);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+
+    if (__offload_init_type == c_init_on_offload_all) {
+        for (int i = 0; i < mic_engines_total; i++) {
+             mic_engines[i].init();
+        }
+    }
+
+    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_target_acquire);
+
+    // use default device number if it is not provided
+    if (device_num != 0) {
+        target_number = *device_num;
+    }
+    else {
+        target_number = __omp_device_num;
+    }
+
+    // device number should be a non-negative integer value
+    if (target_number < 0) {
+        LIBOFFLOAD_ERROR(c_omp_invalid_device_num);
+        exit(1);
+    }
+
+    // should we do this for OpenMP?
+    target_number %= mic_engines_total;
+
+    // reserve device in ORSL
+    if (!ORSL::reserve(target_number)) {
+        LIBOFFLOAD_ERROR(c_device_is_not_available);
+        exit(1);
+    }
+
+    // initialize device(s)
+    OFFLOAD_TIMER_START(timer_data, c_offload_host_initialize);
+
+    if (__offload_init_type == c_init_on_offload) {
+        mic_engines[target_number].init();
+    }
+
+    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_initialize);
+
+    OFFLOAD ofld =
+        new OffloadDescriptor(target_number, 0, true, true, timer_data);
+
+    OFFLOAD_TIMER_HOST_MIC_NUM(timer_data, target_number);
+
+    Offload_Report_Prolog(timer_data);
+
+    OFFLOAD_DEBUG_TRACE_1(2, timer_data->offload_number, c_offload_start,
+                          "Starting OpenMP offload, device = %d\n",
+                          target_number);
+
+    OFFLOAD_TIMER_STOP(timer_data, c_offload_host_target_acquire);
+
+    return ofld;
+}
+
+int offload_offload_wrap(
+    OFFLOAD ofld,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void **waits,
+    const void **signal,
+    int entry_id,
+    const void *stack_addr
+)
+{
+    bool ret = ofld->offload(name, is_empty, vars, vars2, num_vars,
+                             waits, num_waits, signal, entry_id, stack_addr);
+    if (!ret || signal == 0) {
+        delete ofld;
+    }
+    return ret;
+}
+
+extern "C" int OFFLOAD_OFFLOAD1(
+    OFFLOAD ofld,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void **waits,
+    const void **signal
+)
+{
+    return offload_offload_wrap(ofld, name, is_empty,
+                            num_vars, vars, vars2,
+                            num_waits, waits,
+                            signal, NULL, NULL);
+}
+
+extern "C" int OFFLOAD_OFFLOAD2(
+    OFFLOAD ofld,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void** waits,
+    const void** signal,
+    int entry_id,
+    const void *stack_addr
+)
+{
+    return offload_offload_wrap(ofld, name, is_empty,
+                            num_vars, vars, vars2,
+                            num_waits, waits,
+                            signal, entry_id, stack_addr);
+}
+
+extern "C" int OFFLOAD_OFFLOAD(
+    OFFLOAD ofld,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void **waits,
+    const void *signal,
+    int entry_id,
+    const void *stack_addr
+)
+{
+    // signal is passed by reference now
+    const void **signal_new = (signal != 0) ? &signal : 0;
+    const void **waits_new = 0;
+    int num_waits_new = 0;
+
+    // remove NULL values from the list of signals to wait for
+    if (num_waits > 0) {
+        waits_new = (const void**) alloca(sizeof(void*) * num_waits);
+        for (int i = 0; i < num_waits; i++) {
+            if (waits[i] != 0) {
+                waits_new[num_waits_new++] = waits[i];
+            }
+        }
+    }
+
+    return OFFLOAD_OFFLOAD1(ofld, name, is_empty,
+                            num_vars, vars, vars2,
+                            num_waits_new, waits_new,
+                            signal_new);
+}
+
+extern "C" int OFFLOAD_CALL_COUNT()
+{
+    offload_call_count++;
+    return offload_call_count;
+}
diff --git a/final/offload/src/compiler_if_host.h b/final/offload/src/compiler_if_host.h
new file mode 100644
index 0000000..4b34c51
--- /dev/null
+++ b/final/offload/src/compiler_if_host.h
@@ -0,0 +1,133 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*! \file
+    \brief The interface between compiler-generated host code and runtime library
+*/
+
+#ifndef COMPILER_IF_HOST_H_INCLUDED
+#define COMPILER_IF_HOST_H_INCLUDED
+
+#include "offload_host.h"
+
+#define OFFLOAD_TARGET_ACQUIRE          OFFLOAD_PREFIX(target_acquire)
+#define OFFLOAD_TARGET_ACQUIRE1         OFFLOAD_PREFIX(target_acquire1)
+#define OFFLOAD_OFFLOAD                 OFFLOAD_PREFIX(offload)
+#define OFFLOAD_OFFLOAD1                OFFLOAD_PREFIX(offload1)
+#define OFFLOAD_OFFLOAD2                OFFLOAD_PREFIX(offload2)
+#define OFFLOAD_CALL_COUNT              OFFLOAD_PREFIX(offload_call_count)
+
+
+/*! \fn OFFLOAD_TARGET_ACQUIRE
+    \brief Attempt to acquire the target.
+    \param target_type   The type of target.
+    \param target_number The device number.
+    \param is_optional   Whether CPU fall-back is allowed.
+    \param status        Address of variable to hold offload status.
+    \param file          Filename in which this offload occurred.
+    \param line          Line number in the file where this offload occurred.
+*/
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE(
+    TARGET_TYPE      target_type,
+    int              target_number,
+    int              is_optional,
+    _Offload_status* status,
+    const char*      file,
+    uint64_t         line
+);
+
+/*! \fn OFFLOAD_TARGET_ACQUIRE1
+    \brief Acquire the target for offload (OpenMP).
+    \param device_number Device number or null if not specified.
+    \param file          Filename in which this offload occurred
+    \param line          Line number in the file where this offload occurred.
+*/
+extern "C" OFFLOAD OFFLOAD_TARGET_ACQUIRE1(
+    const int*      device_number,
+    const char*     file,
+    uint64_t        line
+);
+
+/*! \fn OFFLOAD_OFFLOAD1
+    \brief Run function on target using interface for old data persistence.
+    \param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
+    \param name Name of offload entry point.
+    \param is_empty If no code to execute (e.g. offload_transfer)
+    \param num_vars Number of variable descriptors.
+    \param vars Pointer to VarDesc array.
+    \param vars2 Pointer to VarDesc2 array.
+    \param num_waits Number of "wait" values.
+    \param waits Pointer to array of wait values.
+    \param signal Pointer to signal value or NULL.
+*/
+extern "C" int OFFLOAD_OFFLOAD1(
+    OFFLOAD o,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void** waits,
+    const void** signal
+);
+
+/*! \fn OFFLOAD_OFFLOAD2
+    \brief Run function on target using interface for new data persistence.
+    \param o Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
+    \param name Name of offload entry point.
+    \param is_empty If no code to execute (e.g. offload_transfer)
+    \param num_vars Number of variable descriptors.
+    \param vars Pointer to VarDesc array.
+    \param vars2 Pointer to VarDesc2 array.
+    \param num_waits Number of "wait" values.
+    \param waits Pointer to array of wait values.
+    \param signal Pointer to signal value or NULL.
+    \param entry_id A signature for the function doing the offload.
+    \param stack_addr The stack frame address of the function doing offload.
+*/
+extern "C" int OFFLOAD_OFFLOAD2(
+    OFFLOAD o,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void** waits,
+    const void** signal,
+    int entry_id,
+    const void *stack_addr
+);
+
+// Run function on target (obsolete).
+// @param o    OFFLOAD object
+// @param name function name
+extern "C" int OFFLOAD_OFFLOAD(
+    OFFLOAD o,
+    const char *name,
+    int is_empty,
+    int num_vars,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int num_waits,
+    const void** waits,
+    const void* signal,
+    int entry_id = 0,
+    const void *stack_addr = NULL
+);
+
+// Global counter on host.
+// This variable is used if P2OPT_offload_do_data_persistence == 2.
+// The variable used to identify offload constructs contained in one procedure.
+// Call to OFFLOAD_CALL_COUNT() is inserted at HOST on entry of the routine.
+extern "C" int  OFFLOAD_CALL_COUNT();
+
+#endif // COMPILER_IF_HOST_H_INCLUDED
diff --git a/final/offload/src/compiler_if_target.cpp b/final/offload/src/compiler_if_target.cpp
new file mode 100644
index 0000000..1af82b8
--- /dev/null
+++ b/final/offload/src/compiler_if_target.cpp
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "compiler_if_target.h"
+
+extern "C" void OFFLOAD_TARGET_ENTER(
+    OFFLOAD ofld,
+    int vars_total,
+    VarDesc *vars,
+    VarDesc2 *vars2
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p, %d, %p, %p)\n", __func__, ofld,
+                        vars_total, vars, vars2);
+    ofld->merge_var_descs(vars, vars2, vars_total);
+    ofld->scatter_copyin_data();
+}
+
+extern "C" void OFFLOAD_TARGET_LEAVE(
+    OFFLOAD ofld
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ofld);
+    ofld->gather_copyout_data();
+}
+
+extern "C" void OFFLOAD_TARGET_MAIN(void)
+{
+    // initialize target part
+    __offload_target_init();
+
+    // pass control to COI
+    PipelineStartExecutingRunFunctions();
+    ProcessWaitForShutdown();
+
+    OFFLOAD_DEBUG_TRACE(2, "Exiting main...\n");
+}
diff --git a/final/offload/src/compiler_if_target.h b/final/offload/src/compiler_if_target.h
new file mode 100644
index 0000000..49d2c1c
--- /dev/null
+++ b/final/offload/src/compiler_if_target.h
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*! \file
+    \brief The interface between compiler-generated target code and runtime library
+*/
+
+#ifndef COMPILER_IF_TARGET_H_INCLUDED
+#define COMPILER_IF_TARGET_H_INCLUDED
+
+#include "offload_target.h"
+
+#define OFFLOAD_TARGET_ENTER            OFFLOAD_PREFIX(target_enter)
+#define OFFLOAD_TARGET_LEAVE            OFFLOAD_PREFIX(target_leave)
+#define OFFLOAD_TARGET_MAIN             OFFLOAD_PREFIX(target_main)
+
+/*! \fn OFFLOAD_TARGET_ENTER
+    \brief Fill in variable addresses using VarDesc array.
+    \brief Then call back the runtime library to fetch data.
+    \param ofld         Offload descriptor created by runtime.
+    \param var_desc_num Number of variable descriptors.
+    \param var_desc     Pointer to VarDesc array.
+    \param var_desc2    Pointer to VarDesc2 array.
+*/
+extern "C" void OFFLOAD_TARGET_ENTER(
+    OFFLOAD ofld,
+    int var_desc_num,
+    VarDesc *var_desc,
+    VarDesc2 *var_desc2
+);
+
+/*! \fn OFFLOAD_TARGET_LEAVE
+    \brief Call back the runtime library to gather outputs using VarDesc array.
+    \param ofld Offload descriptor created by OFFLOAD_TARGET_ACQUIRE.
+*/
+extern "C" void OFFLOAD_TARGET_LEAVE(
+    OFFLOAD ofld
+);
+
+// Entry point for the target application.
+extern "C" void OFFLOAD_TARGET_MAIN(void);
+
+#endif // COMPILER_IF_TARGET_H_INCLUDED
diff --git a/final/offload/src/dv_util.cpp b/final/offload/src/dv_util.cpp
new file mode 100644
index 0000000..4ad7271
--- /dev/null
+++ b/final/offload/src/dv_util.cpp
@@ -0,0 +1,131 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_common.h"
+
+bool __dv_is_contiguous(const ArrDesc *dvp)
+{
+    if (dvp->Flags & ArrDescFlagsContiguous) {
+        return true;
+    }
+
+    if (dvp->Rank != 0) {
+        if (dvp->Dim[0].Mult != dvp->Len) {
+            return false;
+        }
+        for (int i = 1; i < dvp->Rank; i++) {
+            if (dvp->Dim[i].Mult !=
+                dvp->Dim[i-1].Extent * dvp->Dim[i-1].Mult) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+bool __dv_is_allocated(const ArrDesc *dvp)
+{
+    return (dvp->Flags & ArrDescFlagsDefined);
+}
+
+uint64_t __dv_data_length(const ArrDesc *dvp)
+{
+    uint64_t size;
+
+    if (dvp->Rank == 0) {
+        size = dvp->Len;
+        return size;
+    }
+
+    size = dvp->Len;
+    for (int i = 0; i < dvp->Rank; ++i) {
+        size += (dvp->Dim[i].Extent-1) * dvp->Dim[i].Mult;
+    }
+    return size;
+}
+
+uint64_t __dv_data_length(const ArrDesc *dvp, int64_t count)
+{
+    if (dvp->Rank == 0) {
+        return count;
+    }
+
+    return count * dvp->Dim[0].Mult;
+}
+
+// Create CeanReadRanges data for reading contiguous ranges of
+// noncontiguous array defined by the argument
+CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp)
+{
+    int64_t         len;
+    int             count;
+    int             rank = dvp->Rank;
+    CeanReadRanges *res = NULL;
+
+    if (rank != 0) {
+        int i = 0;
+        len = dvp->Len;
+        if (dvp->Dim[0].Mult == len) {
+            for (i = 1; i < rank; i++) {
+                len *= dvp->Dim[i-1].Extent;
+                if (dvp->Dim[i].Mult != len) {
+                    break;
+                }
+            }
+        }
+        res = (CeanReadRanges *)malloc(
+            sizeof(CeanReadRanges) + (rank - i) * sizeof(CeanReadDim));
+        res -> last_noncont_ind = rank - i - 1;
+        count = 1;
+        for (; i < rank; i++) {
+            res->Dim[rank - i - 1].count = count;
+            res->Dim[rank - i - 1].size = dvp->Dim[i].Mult;
+            count *= dvp->Dim[i].Extent;
+        }
+        res -> range_max_number = count;
+        res -> range_size = len;
+        res -> ptr = (void*)dvp->Base;
+        res -> current_number = 0;
+        res -> init_offset = 0;
+    }
+    return res;
+}
+
+#if OFFLOAD_DEBUG > 0
+void __dv_desc_dump(const char *name, const ArrDesc *dvp)
+{
+    OFFLOAD_TRACE(3, "%s DV %p\n", name, dvp);
+
+    if (dvp != 0) {
+        OFFLOAD_TRACE(3,
+                      "    dv->Base   = 0x%lx\n"
+                      "    dv->Len    = 0x%lx\n"
+                      "    dv->Offset = 0x%lx\n"
+                      "    dv->Flags  = 0x%lx\n"
+                      "    dv->Rank   = 0x%lx\n"
+                      "    dv->Resrvd = 0x%lx\n",
+                      dvp->Base,
+                      dvp->Len,
+                      dvp->Offset,
+                      dvp->Flags,
+                      dvp->Rank,
+                      dvp->Reserved);
+
+        for (int i = 0 ; i < dvp->Rank; i++) {
+            OFFLOAD_TRACE(3,
+                          "    (%d) Extent=%ld, Multiplier=%ld, LowerBound=%ld\n",
+                          i,
+                          dvp->Dim[i].Extent,
+                          dvp->Dim[i].Mult,
+                          dvp->Dim[i].LowerBound);
+        }
+    }
+}
+#endif // OFFLOAD_DEBUG > 0
diff --git a/final/offload/src/dv_util.h b/final/offload/src/dv_util.h
new file mode 100644
index 0000000..fdfa77d
--- /dev/null
+++ b/final/offload/src/dv_util.h
@@ -0,0 +1,63 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef DV_UTIL_H_INCLUDED
+#define DV_UTIL_H_INCLUDED
+
+#include <stdint.h>
+
+// Dope vector declarations
+#define ArrDescMaxArrayRank         31
+
+// Dope vector flags
+#define ArrDescFlagsDefined         1
+#define ArrDescFlagsNodealloc       2
+#define ArrDescFlagsContiguous      4
+
+typedef int64_t dv_size;
+
+typedef struct DimDesc {
+    dv_size        Extent;      // Number of elements in this dimension
+    dv_size        Mult;        // Multiplier for this dimension.
+                                // The number of bytes between successive
+                                // elements in this dimension.
+    dv_size        LowerBound;  // LowerBound of this dimension
+} DimDesc ;
+
+typedef struct ArrDesc {
+    dv_size        Base;        // Base address
+    dv_size        Len;         // Length of data type, used only for
+                                // character strings.
+    dv_size        Offset;
+    dv_size        Flags;       // Flags
+    dv_size        Rank;        // Rank of pointer
+    dv_size        Reserved;    // reserved for openmp requests
+    DimDesc Dim[ArrDescMaxArrayRank];
+} ArrDesc ;
+
+typedef ArrDesc* pArrDesc;
+
+bool __dv_is_contiguous(const ArrDesc *dvp);
+
+bool __dv_is_allocated(const ArrDesc *dvp);
+
+uint64_t __dv_data_length(const ArrDesc *dvp);
+
+uint64_t __dv_data_length(const ArrDesc *dvp, int64_t nelems);
+
+CeanReadRanges * init_read_ranges_dv(const ArrDesc *dvp);
+
+#if OFFLOAD_DEBUG > 0
+void    __dv_desc_dump(const char *name, const ArrDesc *dvp);
+#else // OFFLOAD_DEBUG
+#define __dv_desc_dump(name, dvp)
+#endif // OFFLOAD_DEBUG
+
+#endif // DV_UTIL_H_INCLUDED
diff --git a/final/offload/src/liboffload_error.c b/final/offload/src/liboffload_error.c
new file mode 100644
index 0000000..fc15f8b
--- /dev/null
+++ b/final/offload/src/liboffload_error.c
@@ -0,0 +1,452 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#ifndef va_copy
+#define va_copy(dst, src) ((dst) = (src))
+#endif
+
+#include "liboffload_msg.h"
+
+#include "liboffload_error_codes.h"
+
+/***********************************************/
+/* error-handling function, liboffload_error_support */
+/***********************************************/
+
+void __liboffload_error_support(error_types input_tag, ...)
+{
+    va_list args;
+    va_start(args, input_tag);
+
+    switch (input_tag) {
+        case c_device_is_not_available:
+            write_message(stderr, msg_c_device_is_not_available, args);
+            break;
+        case c_invalid_device_number:
+            write_message(stderr, msg_c_invalid_device_number, args);
+            break;
+        case c_send_func_ptr:
+            write_message(stderr, msg_c_send_func_ptr, args);
+            break;
+        case c_receive_func_ptr:
+            write_message(stderr, msg_c_receive_func_ptr, args);
+            break;
+        case c_offload_malloc:
+            write_message(stderr, msg_c_offload_malloc, args);
+            break;
+        case c_offload1:
+            write_message(stderr, msg_c_offload1, args);
+            break;
+        case c_unknown_var_type:
+            write_message(stderr, c_unknown_var_type, args);
+            break;
+        case c_invalid_env_var_value:
+            write_message(stderr, msg_c_invalid_env_var_value, args);
+            break;
+        case c_invalid_env_var_int_value:
+            write_message(stderr, msg_c_invalid_env_var_int_value, args);
+            break;
+        case c_invalid_env_report_value:
+            write_message(stderr, msg_c_invalid_env_report_value, args);
+            break;
+        case c_offload_signaled1:
+            write_message(stderr, msg_c_offload_signaled1, args);
+            break;
+        case c_offload_signaled2:
+            write_message(stderr, msg_c_offload_signaled2, args);
+            break;
+        case c_myowrapper_checkresult:
+            write_message(stderr, msg_c_myowrapper_checkresult, args);
+            break;
+        case c_myotarget_checkresult:
+            write_message(stderr, msg_c_myotarget_checkresult, args);
+            break;
+        case c_offload_descriptor_offload:
+            write_message(stderr, msg_c_offload_descriptor_offload, args);
+            break;
+        case c_merge_var_descs1:
+            write_message(stderr, msg_c_merge_var_descs1, args);
+            break;
+        case c_merge_var_descs2:
+            write_message(stderr, msg_c_merge_var_descs2, args);
+            break;
+        case c_mic_parse_env_var_list1:
+            write_message(stderr, msg_c_mic_parse_env_var_list1, args);
+            break;
+        case c_mic_parse_env_var_list2:
+            write_message(stderr, msg_c_mic_parse_env_var_list2, args);
+            break;
+        case c_mic_process_exit_ret:
+            write_message(stderr, msg_c_mic_process_exit_ret, args);
+            break;
+        case c_mic_process_exit_sig:
+            write_message(stderr, msg_c_mic_process_exit_sig, args);
+            break;
+        case c_mic_process_exit:
+            write_message(stderr, msg_c_mic_process_exit, args);
+            break;
+        case c_mic_init3:
+            write_message(stderr, msg_c_mic_init3, args);
+            break;
+        case c_mic_init4:
+            write_message(stderr, msg_c_mic_init4, args);
+            break;
+        case c_mic_init5:
+            write_message(stderr, msg_c_mic_init5, args);
+            break;
+        case c_mic_init6:
+            write_message(stderr, msg_c_mic_init6, args);
+            break;
+        case c_no_static_var_data:
+            write_message(stderr, msg_c_no_static_var_data, args);
+            break;
+        case c_no_ptr_data:
+            write_message(stderr, msg_c_no_ptr_data, args);
+            break;
+        case c_get_engine_handle:
+            write_message(stderr, msg_c_get_engine_handle, args);
+            break;
+        case c_get_engine_index:
+            write_message(stderr, msg_c_get_engine_index, args);
+            break;
+        case c_process_create:
+            write_message(stderr, msg_c_process_create, args);
+            break;
+        case c_process_wait_shutdown:
+            write_message(stderr, msg_c_process_wait_shutdown, args);
+            break;
+        case c_process_proxy_flush:
+            write_message(stderr, msg_c_process_proxy_flush, args);
+            break;
+        case c_process_get_func_handles:
+            write_message(stderr, msg_c_process_get_func_handles, args);
+            break;
+        case c_load_library:
+            write_message(stderr, msg_c_load_library, args);
+            break;
+        case c_coipipe_max_number:
+            write_message(stderr, msg_c_coi_pipeline_max_number, args);
+            break;
+        case c_pipeline_create:
+            write_message(stderr, msg_c_pipeline_create, args);
+            break;
+        case c_pipeline_run_func:
+            write_message(stderr, msg_c_pipeline_run_func, args);
+            break;
+        case c_pipeline_start_run_funcs:
+            write_message(stderr, msg_c_pipeline_start_run_funcs, args);
+            break;
+        case c_buf_create:
+            write_message(stderr, msg_c_buf_create, args);
+            break;
+        case c_buf_create_out_of_mem:
+            write_message(stderr, msg_c_buf_create_out_of_mem, args);
+            break;
+        case c_buf_create_from_mem:
+            write_message(stderr, msg_c_buf_create_from_mem, args);
+            break;
+        case c_buf_destroy:
+            write_message(stderr, msg_c_buf_destroy, args);
+            break;
+        case c_buf_map:
+            write_message(stderr, msg_c_buf_map, args);
+            break;
+        case c_buf_unmap:
+            write_message(stderr, msg_c_buf_unmap, args);
+            break;
+        case c_buf_read:
+            write_message(stderr, msg_c_buf_read, args);
+            break;
+        case c_buf_write:
+            write_message(stderr, msg_c_buf_write, args);
+            break;
+        case c_buf_copy:
+            write_message(stderr, msg_c_buf_copy, args);
+            break;
+        case c_buf_get_address:
+            write_message(stderr, msg_c_buf_get_address, args);
+            break;
+        case c_buf_add_ref:
+            write_message(stderr, msg_c_buf_add_ref, args);
+            break;
+        case c_buf_release_ref:
+            write_message(stderr, msg_c_buf_release_ref, args);
+            break;
+        case c_buf_set_state:
+            write_message(stderr, msg_c_buf_set_state, args);
+            break;
+        case c_event_wait:
+            write_message(stderr, msg_c_event_wait, args);
+            break;
+        case c_zero_or_neg_ptr_len:
+            write_message(stderr, msg_c_zero_or_neg_ptr_len, args);
+            break;
+        case c_zero_or_neg_transfer_size:
+            write_message(stderr, msg_c_zero_or_neg_transfer_size, args);
+            break;
+        case c_bad_ptr_mem_range:
+            write_message(stderr, msg_c_bad_ptr_mem_range, args);
+            break;
+        case c_different_src_and_dstn_sizes:
+            write_message(stderr, msg_c_different_src_and_dstn_sizes, args);
+            break;
+        case c_ranges_dont_match:
+            write_message(stderr, msg_c_ranges_dont_match, args);
+            break;
+        case c_destination_is_over:
+            write_message(stderr, msg_c_destination_is_over, args);
+            break;
+        case c_slice_of_noncont_array:
+            write_message(stderr, msg_c_slice_of_noncont_array, args);
+            break;
+        case c_non_contiguous_dope_vector:
+            write_message(stderr, msg_c_non_contiguous_dope_vector, args);
+            break;
+        case c_pointer_array_mismatch:
+            write_message(stderr, msg_c_pointer_array_mismatch, args);
+            break;
+        case c_omp_invalid_device_num_env:
+            write_message(stderr, msg_c_omp_invalid_device_num_env, args);
+            break;
+        case c_omp_invalid_device_num:
+            write_message(stderr, msg_c_omp_invalid_device_num, args);
+            break;
+        case c_unknown_binary_type:
+            write_message(stderr, msg_c_unknown_binary_type, args);
+            break;
+        case c_multiple_target_exes:
+            write_message(stderr, msg_c_multiple_target_exes, args);
+            break;
+        case c_no_target_exe:
+            write_message(stderr, msg_c_no_target_exe, args);
+            break;
+        case c_report_unknown_timer_node:
+            write_message(stderr, msg_c_report_unknown_timer_node, args);
+            break;
+        case c_report_unknown_trace_node:
+            write_message(stderr, msg_c_report_unknown_trace_node, args);
+            break;
+    }
+    va_end(args);
+}
+
+char const * report_get_message_str(error_types input_tag)
+{
+    switch (input_tag) {
+        case c_report_title:
+            return (offload_get_message_str(msg_c_report_title));
+        case c_report_from_file:
+            return (offload_get_message_str(msg_c_report_from_file));
+        case c_report_offload:
+            return (offload_get_message_str(msg_c_report_offload));
+        case c_report_mic:
+            return (offload_get_message_str(msg_c_report_mic));
+        case c_report_file:
+            return (offload_get_message_str(msg_c_report_file));
+        case c_report_line:
+            return (offload_get_message_str(msg_c_report_line));
+        case c_report_host:
+            return (offload_get_message_str(msg_c_report_host));
+        case c_report_tag:
+            return (offload_get_message_str(msg_c_report_tag));
+        case c_report_cpu_time:
+            return (offload_get_message_str(msg_c_report_cpu_time));
+        case c_report_seconds:
+            return (offload_get_message_str(msg_c_report_seconds));
+        case c_report_cpu_to_mic_data:
+            return (offload_get_message_str(msg_c_report_cpu_to_mic_data));
+        case c_report_bytes:
+            return (offload_get_message_str(msg_c_report_bytes));
+        case c_report_mic_time:
+            return (offload_get_message_str(msg_c_report_mic_time));
+        case c_report_mic_to_cpu_data:
+            return (offload_get_message_str(msg_c_report_mic_to_cpu_data));
+        case c_report_compute:
+            return (offload_get_message_str(msg_c_report_compute));
+        case c_report_copyin_data:
+            return (offload_get_message_str(msg_c_report_copyin_data));
+        case c_report_copyout_data:
+            return (offload_get_message_str(msg_c_report_copyout_data));
+        case c_report_create_buf_host:
+            return (offload_get_message_str(c_report_create_buf_host));
+        case c_report_create_buf_mic:
+            return (offload_get_message_str(msg_c_report_create_buf_mic));
+        case c_report_destroy:
+            return (offload_get_message_str(msg_c_report_destroy));
+        case c_report_gather_copyin_data:
+            return (offload_get_message_str(msg_c_report_gather_copyin_data));
+        case c_report_gather_copyout_data:
+            return (offload_get_message_str(msg_c_report_gather_copyout_data));
+        case c_report_state_signal:
+            return (offload_get_message_str(msg_c_report_state_signal));
+        case c_report_signal:
+            return (offload_get_message_str(msg_c_report_signal));
+        case c_report_wait:
+            return (offload_get_message_str(msg_c_report_wait));
+        case c_report_init:
+            return (offload_get_message_str(msg_c_report_init));
+        case c_report_init_func:
+            return (offload_get_message_str(msg_c_report_init_func));
+        case c_report_logical_card:
+            return (offload_get_message_str(msg_c_report_logical_card));
+        case c_report_mic_myo_fptr:
+            return (offload_get_message_str(msg_c_report_mic_myo_fptr));
+        case c_report_mic_myo_shared:
+            return (offload_get_message_str(msg_c_report_mic_myo_shared));
+        case c_report_myoacquire:
+            return (offload_get_message_str(msg_c_report_myoacquire));
+        case c_report_myofini:
+            return (offload_get_message_str(msg_c_report_myofini));
+        case c_report_myoinit:
+            return (offload_get_message_str(msg_c_report_myoinit));
+        case c_report_myoregister:
+            return (offload_get_message_str(msg_c_report_myoregister));
+        case c_report_myorelease:
+            return (offload_get_message_str(msg_c_report_myorelease));
+        case c_report_myosharedalignedfree:
+            return (
+                offload_get_message_str(msg_c_report_myosharedalignedfree));
+        case c_report_myosharedalignedmalloc:
+            return (
+                offload_get_message_str(msg_c_report_myosharedalignedmalloc));
+        case c_report_myosharedfree:
+            return (offload_get_message_str(msg_c_report_myosharedfree));
+        case c_report_myosharedmalloc:
+            return (offload_get_message_str(msg_c_report_myosharedmalloc));
+        case c_report_physical_card:
+            return (offload_get_message_str(msg_c_report_physical_card));
+        case c_report_receive_pointer_data:
+            return (
+                offload_get_message_str(msg_c_report_receive_pointer_data));
+        case c_report_received_pointer_data:
+            return (
+                offload_get_message_str(msg_c_report_received_pointer_data));
+        case c_report_register:
+            return (offload_get_message_str(msg_c_report_register));
+        case c_report_scatter_copyin_data:
+            return (offload_get_message_str(msg_c_report_scatter_copyin_data));
+        case c_report_scatter_copyout_data:
+            return (
+                offload_get_message_str(msg_c_report_scatter_copyout_data));
+        case c_report_send_pointer_data:
+            return (offload_get_message_str(msg_c_report_send_pointer_data));
+        case c_report_sent_pointer_data:
+            return (offload_get_message_str(msg_c_report_sent_pointer_data));
+        case c_report_start:
+            return (offload_get_message_str(msg_c_report_start));
+        case c_report_start_target_func:
+            return (offload_get_message_str(msg_c_report_start_target_func));
+        case c_report_state:
+            return (offload_get_message_str(msg_c_report_state));
+        case c_report_unregister:
+            return (offload_get_message_str(msg_c_report_unregister));
+        case c_report_var:
+            return (offload_get_message_str(msg_c_report_var));
+
+        default:
+            LIBOFFLOAD_ERROR(c_report_unknown_trace_node);
+            abort();
+    }
+}
+
+char const * report_get_host_stage_str(int i)
+{
+    switch (i) {
+        case c_offload_host_total_offload:
+            return (
+               offload_get_message_str(msg_c_report_host_total_offload_time));
+        case c_offload_host_initialize:
+            return (offload_get_message_str(msg_c_report_host_initialize));
+        case c_offload_host_target_acquire:
+            return (
+                offload_get_message_str(msg_c_report_host_target_acquire));
+        case c_offload_host_wait_deps:
+            return (offload_get_message_str(msg_c_report_host_wait_deps));
+        case c_offload_host_setup_buffers:
+            return (offload_get_message_str(msg_c_report_host_setup_buffers));
+        case c_offload_host_alloc_buffers:
+            return (offload_get_message_str(msg_c_report_host_alloc_buffers));
+        case c_offload_host_setup_misc_data:
+            return (
+                offload_get_message_str(msg_c_report_host_setup_misc_data));
+        case c_offload_host_alloc_data_buffer:
+            return (
+                offload_get_message_str(msg_c_report_host_alloc_data_buffer));
+        case c_offload_host_send_pointers:
+            return (offload_get_message_str(msg_c_report_host_send_pointers));
+        case c_offload_host_gather_inputs:
+            return (offload_get_message_str(msg_c_report_host_gather_inputs));
+        case c_offload_host_map_in_data_buffer:
+            return (
+                offload_get_message_str(msg_c_report_host_map_in_data_buffer));
+        case c_offload_host_unmap_in_data_buffer:
+            return (offload_get_message_str(
+                msg_c_report_host_unmap_in_data_buffer));
+        case c_offload_host_start_compute:
+            return (offload_get_message_str(msg_c_report_host_start_compute));
+        case c_offload_host_wait_compute:
+            return (offload_get_message_str(msg_c_report_host_wait_compute));
+        case c_offload_host_start_buffers_reads:
+            return (offload_get_message_str(
+                msg_c_report_host_start_buffers_reads));
+        case c_offload_host_scatter_outputs:
+            return (
+                offload_get_message_str(msg_c_report_host_scatter_outputs));
+        case c_offload_host_map_out_data_buffer:
+            return (offload_get_message_str(
+                msg_c_report_host_map_out_data_buffer));
+        case c_offload_host_unmap_out_data_buffer:
+            return (offload_get_message_str(
+                msg_c_report_host_unmap_out_data_buffer));
+        case c_offload_host_wait_buffers_reads:
+            return (
+                offload_get_message_str(msg_c_report_host_wait_buffers_reads));
+        case c_offload_host_destroy_buffers:
+            return (
+                offload_get_message_str(msg_c_report_host_destroy_buffers));
+        default:
+            LIBOFFLOAD_ERROR(c_report_unknown_timer_node);
+            abort();
+    }
+}
+
+char const * report_get_target_stage_str(int i)
+{
+    switch (i) {
+        case c_offload_target_total_time:
+            return (offload_get_message_str(msg_c_report_target_total_time));
+        case c_offload_target_descriptor_setup:
+            return (
+                offload_get_message_str(msg_c_report_target_descriptor_setup));
+        case c_offload_target_func_lookup:
+            return (offload_get_message_str(msg_c_report_target_func_lookup));
+        case c_offload_target_func_time:
+            return (offload_get_message_str(msg_c_report_target_func_time));
+        case c_offload_target_scatter_inputs:
+            return (
+                offload_get_message_str(msg_c_report_target_scatter_inputs));
+        case c_offload_target_add_buffer_refs:
+            return (
+                offload_get_message_str(msg_c_report_target_add_buffer_refs));
+        case c_offload_target_compute:
+            return (offload_get_message_str(msg_c_report_target_compute));
+        case c_offload_target_gather_outputs:
+            return (offload_get_message_str
+                (msg_c_report_target_gather_outputs));
+        case c_offload_target_release_buffer_refs:
+            return (offload_get_message_str(
+                msg_c_report_target_release_buffer_refs));
+        default:
+            LIBOFFLOAD_ERROR(c_report_unknown_timer_node);
+            abort();
+    }
+}
diff --git a/final/offload/src/liboffload_error_codes.h b/final/offload/src/liboffload_error_codes.h
new file mode 100644
index 0000000..982167b
--- /dev/null
+++ b/final/offload/src/liboffload_error_codes.h
@@ -0,0 +1,276 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#if !defined(LIBOFFLOAD_ERROR_CODES_H)
+#define LIBOFFLOAD_ERROR_CODES_H
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+typedef enum
+{
+    c_device_is_not_available = 0,
+    c_invalid_device_number,
+    c_offload1,
+    c_unknown_var_type,
+    c_send_func_ptr,
+    c_receive_func_ptr,
+    c_offload_malloc,
+    c_invalid_env_var_value,
+    c_invalid_env_var_int_value,
+    c_invalid_env_report_value,
+    c_offload_signaled1,
+    c_offload_signaled2,
+    c_myotarget_checkresult,
+    c_myowrapper_checkresult,
+    c_offload_descriptor_offload,
+    c_merge_var_descs1,
+    c_merge_var_descs2,
+    c_mic_parse_env_var_list1,
+    c_mic_parse_env_var_list2,
+    c_mic_process_exit_ret,
+    c_mic_process_exit_sig,
+    c_mic_process_exit,
+    c_mic_init3,
+    c_mic_init4,
+    c_mic_init5,
+    c_mic_init6,
+    c_no_static_var_data,
+    c_no_ptr_data,
+    c_get_engine_handle,
+    c_get_engine_index,
+    c_process_create,
+    c_process_get_func_handles,
+    c_process_wait_shutdown,
+    c_process_proxy_flush,
+    c_load_library,
+    c_pipeline_create,
+    c_pipeline_run_func,
+    c_pipeline_start_run_funcs,
+    c_buf_create,
+    c_buf_create_out_of_mem,
+    c_buf_create_from_mem,
+    c_buf_destroy,
+    c_buf_map,
+    c_buf_unmap,
+    c_buf_read,
+    c_buf_write,
+    c_buf_copy,
+    c_buf_get_address,
+    c_buf_add_ref,
+    c_buf_release_ref,
+    c_buf_set_state,
+    c_event_wait,
+    c_zero_or_neg_ptr_len,
+    c_zero_or_neg_transfer_size,
+    c_bad_ptr_mem_range,
+    c_different_src_and_dstn_sizes,
+    c_ranges_dont_match,
+    c_destination_is_over,
+    c_slice_of_noncont_array,
+    c_non_contiguous_dope_vector,
+    c_pointer_array_mismatch,
+    c_omp_invalid_device_num_env,
+    c_omp_invalid_device_num,
+    c_unknown_binary_type,
+    c_multiple_target_exes,
+    c_no_target_exe,
+    c_report_host,
+    c_report_target,
+    c_report_title,
+    c_report_from_file,
+    c_report_file,
+    c_report_line,
+    c_report_tag,
+    c_report_seconds,
+    c_report_bytes,
+    c_report_mic,
+    c_report_cpu_time,
+    c_report_cpu_to_mic_data,
+    c_report_mic_time,
+    c_report_mic_to_cpu_data,
+    c_report_unknown_timer_node,
+    c_report_unknown_trace_node,
+    c_report_offload,
+    c_report_w_tag,
+    c_report_state,
+    c_report_start,
+    c_report_init,
+    c_report_logical_card,
+    c_report_physical_card,
+    c_report_register,
+    c_report_init_func,
+    c_report_create_buf_host,
+    c_report_create_buf_mic,
+    c_report_send_pointer_data,
+    c_report_sent_pointer_data,
+    c_report_gather_copyin_data,
+    c_report_copyin_data,
+    c_report_state_signal,
+    c_report_signal,
+    c_report_wait,
+    c_report_compute,
+    c_report_receive_pointer_data,
+    c_report_received_pointer_data,
+    c_report_start_target_func,
+    c_report_var,
+    c_report_scatter_copyin_data,
+    c_report_gather_copyout_data,
+    c_report_scatter_copyout_data,
+    c_report_copyout_data,
+    c_report_unregister,
+    c_report_destroy,
+    c_report_myoinit,
+    c_report_myoregister,
+    c_report_myofini,
+    c_report_mic_myo_shared,
+    c_report_mic_myo_fptr,
+    c_report_myosharedmalloc,
+    c_report_myosharedfree,
+    c_report_myosharedalignedmalloc,
+    c_report_myosharedalignedfree,
+    c_report_myoacquire,
+    c_report_myorelease,
+    c_coipipe_max_number
+} error_types;
+
+enum OffloadHostPhase {
+    // Total time on host for entire offload
+    c_offload_host_total_offload = 0,
+
+    // Time to load target binary
+    c_offload_host_initialize,
+
+    // Time to acquire lrb availability dynamically
+    c_offload_host_target_acquire,
+
+    // Time to wait for dependencies
+    c_offload_host_wait_deps,
+
+    // Time to allocate pointer buffers, initiate writes for pointers
+    // and calculate size of copyin/copyout buffer
+    c_offload_host_setup_buffers,
+
+    // Time to allocate pointer buffers
+    c_offload_host_alloc_buffers,
+
+    // Time to initialize misc data
+    c_offload_host_setup_misc_data,
+
+    // Time to allocate copyin/copyout buffer
+    c_offload_host_alloc_data_buffer,
+
+    // Time to initiate writes from host pointers to buffers
+    c_offload_host_send_pointers,
+
+    // Time to Gather IN data of offload into buffer
+    c_offload_host_gather_inputs,
+
+    // Time to map buffer
+    c_offload_host_map_in_data_buffer,
+
+    // Time to unmap buffer
+    c_offload_host_unmap_in_data_buffer,
+
+    // Time to start remote function call that does computation on lrb
+    c_offload_host_start_compute,
+
+    // Time to wait for compute to finish
+    c_offload_host_wait_compute,
+
+    // Time to initiate reads from pointer buffers
+    c_offload_host_start_buffers_reads,
+
+    // Time to update host variabels with OUT data from buffer
+    c_offload_host_scatter_outputs,
+
+    // Time to map buffer
+    c_offload_host_map_out_data_buffer,
+
+    // Time to unmap buffer
+    c_offload_host_unmap_out_data_buffer,
+
+    // Time to wait reads from buffers to finish
+    c_offload_host_wait_buffers_reads,
+
+    // Time to destroy buffers that are no longer needed
+    c_offload_host_destroy_buffers,
+
+    // LAST TIME MONITOR
+    c_offload_host_max_phase
+};
+
+enum OffloadTargetPhase {
+    // Total time spent on the target
+    c_offload_target_total_time = 0,
+
+    // Time to initialize offload descriptor
+    c_offload_target_descriptor_setup,
+
+    // Time to find target entry point in lookup table
+    c_offload_target_func_lookup,
+
+    // Total time spend executing offload entry
+    c_offload_target_func_time,
+
+    // Time to initialize target variables with IN values from buffer
+    c_offload_target_scatter_inputs,
+
+    // Time to add buffer reference for pointer buffers
+    c_offload_target_add_buffer_refs,
+
+    // Total time on lrb for computation
+    c_offload_target_compute,
+
+    // On lrb, time to copy OUT into buffer
+    c_offload_target_gather_outputs,
+
+    // Time to release buffer references
+    c_offload_target_release_buffer_refs,
+
+    // LAST TIME MONITOR
+    c_offload_target_max_phase
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void __liboffload_error_support(error_types input_tag, ...);
+void __liboffload_report_support(error_types input_tag, ...);
+char const *offload_get_message_str(int msgCode);
+char const * report_get_message_str(error_types input_tag);
+char const * report_get_host_stage_str(int i);
+char const * report_get_target_stage_str(int i);
+#ifdef __cplusplus
+}
+#endif
+
+#define test_msg_cat(nm, msg) \
+    fprintf(stderr, "\t TEST for %s \n \t", nm); \
+    __liboffload_error_support(msg);
+
+#define test_msg_cat1(nm, msg, ...) \
+    fprintf(stderr, "\t TEST for %s \n \t", nm); \
+    __liboffload_error_support(msg, __VA_ARGS__);
+
+void write_message(FILE * file, int msgCode, va_list args_p);
+
+#define LIBOFFLOAD_ERROR __liboffload_error_support
+
+#ifdef TARGET_WINNT
+#define LIBOFFLOAD_ABORT \
+         _set_abort_behavior(0, _WRITE_ABORT_MSG); \
+         abort()
+#else
+#define LIBOFFLOAD_ABORT \
+         abort()
+#endif
+
+#endif // !defined(LIBOFFLOAD_ERROR_CODES_H)
diff --git a/final/offload/src/liboffload_msg.c b/final/offload/src/liboffload_msg.c
new file mode 100644
index 0000000..b160392
--- /dev/null
+++ b/final/offload/src/liboffload_msg.c
@@ -0,0 +1,35 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+#include <stdio.h>
+
+// ===========================================================================
+// Bring in the static string table and the enumerations for indexing into
+// it.
+// ===========================================================================
+
+#include "liboffload_msg.h"
+
+# define DYNART_STDERR_PUTS(__message_text__) fputs((__message_text__),stderr)
+
+// ===========================================================================
+// Now the code for accessing the message catalogs
+// ===========================================================================
+
+
+    void write_message(FILE * file, int msgCode) {
+        fputs(MESSAGE_TABLE_NAME[ msgCode ], file);
+        fflush(file);
+    }
+
+    char const *offload_get_message_str(int msgCode) {
+        return MESSAGE_TABLE_NAME[ msgCode ];
+    }
diff --git a/final/offload/src/liboffload_msg.h b/final/offload/src/liboffload_msg.h
new file mode 100644
index 0000000..c1445f9
--- /dev/null
+++ b/final/offload/src/liboffload_msg.h
@@ -0,0 +1,326 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+// file: liboffload_msg.h
+enum {
+	__dummy__ = 0,
+	msg_c_device_is_not_available,
+	msg_c_invalid_device_number,
+	msg_c_send_func_ptr,
+	msg_c_receive_func_ptr,
+	msg_c_offload_malloc,
+	msg_c_offload1,
+	msg_c_unknown_var_type,
+	msg_c_invalid_env_var_value,
+	msg_c_invalid_env_var_int_value,
+	msg_c_invalid_env_report_value,
+	msg_c_offload_signaled1,
+	msg_c_offload_signaled2,
+	msg_c_myowrapper_checkresult,
+	msg_c_myotarget_checkresult,
+	msg_c_offload_descriptor_offload,
+	msg_c_merge_var_descs1,
+	msg_c_merge_var_descs2,
+	msg_c_mic_parse_env_var_list1,
+	msg_c_mic_parse_env_var_list2,
+	msg_c_mic_process_exit_ret,
+	msg_c_mic_process_exit_sig,
+	msg_c_mic_process_exit,
+	msg_c_mic_init3,
+	msg_c_mic_init4,
+	msg_c_mic_init5,
+	msg_c_mic_init6,
+	msg_c_no_static_var_data,
+	msg_c_no_ptr_data,
+	msg_c_get_engine_handle,
+	msg_c_get_engine_index,
+	msg_c_process_create,
+	msg_c_process_get_func_handles,
+	msg_c_process_wait_shutdown,
+	msg_c_process_proxy_flush,
+	msg_c_load_library,
+	msg_c_pipeline_create,
+	msg_c_pipeline_run_func,
+	msg_c_pipeline_start_run_funcs,
+	msg_c_buf_create,
+	msg_c_buf_create_out_of_mem,
+	msg_c_buf_create_from_mem,
+	msg_c_buf_destroy,
+	msg_c_buf_map,
+	msg_c_buf_unmap,
+	msg_c_buf_read,
+	msg_c_buf_write,
+	msg_c_buf_copy,
+	msg_c_buf_get_address,
+	msg_c_buf_add_ref,
+	msg_c_buf_release_ref,
+	msg_c_buf_set_state,
+	msg_c_event_wait,
+	msg_c_zero_or_neg_ptr_len,
+	msg_c_zero_or_neg_transfer_size,
+	msg_c_bad_ptr_mem_range,
+	msg_c_different_src_and_dstn_sizes,
+	msg_c_non_contiguous_dope_vector,
+	msg_c_omp_invalid_device_num_env,
+	msg_c_omp_invalid_device_num,
+	msg_c_unknown_binary_type,
+	msg_c_multiple_target_exes,
+	msg_c_no_target_exe,
+	msg_c_report_unknown_timer_node,
+	msg_c_report_unknown_trace_node,
+	msg_c_report_host,
+	msg_c_report_mic,
+	msg_c_report_title,
+	msg_c_report_seconds,
+	msg_c_report_bytes,
+	msg_c_report_cpu_time,
+	msg_c_report_mic_time,
+	msg_c_report_tag,
+	msg_c_report_from_file,
+	msg_c_report_file,
+	msg_c_report_line,
+	msg_c_report_cpu_to_mic_data,
+	msg_c_report_mic_to_cpu_data,
+	msg_c_report_offload,
+	msg_c_report_w_tag,
+	msg_c_report_state,
+	msg_c_report_start,
+	msg_c_report_init,
+	msg_c_report_logical_card,
+	msg_c_report_physical_card,
+	msg_c_report_register,
+	msg_c_report_init_func,
+	msg_c_report_create_buf_host,
+	msg_c_report_create_buf_mic,
+	msg_c_report_send_pointer_data,
+	msg_c_report_sent_pointer_data,
+	msg_c_report_gather_copyin_data,
+	msg_c_report_copyin_data,
+	msg_c_report_state_signal,
+	msg_c_report_signal,
+	msg_c_report_wait,
+	msg_c_report_compute,
+	msg_c_report_receive_pointer_data,
+	msg_c_report_received_pointer_data,
+	msg_c_report_start_target_func,
+	msg_c_report_var,
+	msg_c_report_scatter_copyin_data,
+	msg_c_report_gather_copyout_data,
+	msg_c_report_scatter_copyout_data,
+	msg_c_report_copyout_data,
+	msg_c_report_unregister,
+	msg_c_report_destroy,
+	msg_c_report_myoinit,
+	msg_c_report_myoregister,
+	msg_c_report_myofini,
+	msg_c_report_mic_myo_shared,
+	msg_c_report_mic_myo_fptr,
+	msg_c_report_myosharedmalloc,
+	msg_c_report_myosharedfree,
+	msg_c_report_myosharedalignedmalloc,
+	msg_c_report_myosharedalignedfree,
+	msg_c_report_myoacquire,
+	msg_c_report_myorelease,
+	msg_c_report_host_total_offload_time,
+	msg_c_report_host_initialize,
+	msg_c_report_host_target_acquire,
+	msg_c_report_host_wait_deps,
+	msg_c_report_host_setup_buffers,
+	msg_c_report_host_alloc_buffers,
+	msg_c_report_host_setup_misc_data,
+	msg_c_report_host_alloc_data_buffer,
+	msg_c_report_host_send_pointers,
+	msg_c_report_host_gather_inputs,
+	msg_c_report_host_map_in_data_buffer,
+	msg_c_report_host_unmap_in_data_buffer,
+	msg_c_report_host_start_compute,
+	msg_c_report_host_wait_compute,
+	msg_c_report_host_start_buffers_reads,
+	msg_c_report_host_scatter_outputs,
+	msg_c_report_host_map_out_data_buffer,
+	msg_c_report_host_unmap_out_data_buffer,
+	msg_c_report_host_wait_buffers_reads,
+	msg_c_report_host_destroy_buffers,
+	msg_c_report_target_total_time,
+	msg_c_report_target_descriptor_setup,
+	msg_c_report_target_func_lookup,
+	msg_c_report_target_func_time,
+	msg_c_report_target_scatter_inputs,
+	msg_c_report_target_add_buffer_refs,
+	msg_c_report_target_compute,
+	msg_c_report_target_gather_outputs,
+	msg_c_report_target_release_buffer_refs,
+	msg_c_coi_pipeline_max_number,
+	msg_c_ranges_dont_match,
+	msg_c_destination_is_over,
+	msg_c_slice_of_noncont_array,
+	msg_c_pointer_array_mismatch,
+	lastMsg = 152,
+	firstMsg = 1
+};
+
+
+#if !defined(MESSAGE_TABLE_NAME)
+#    define MESSAGE_TABLE_NAME __liboffload_message_table
+#endif
+
+static char const * MESSAGE_TABLE_NAME[] = {
+	/*   0 __dummy__                               */ "Un-used message",
+	/*   1 msg_c_device_is_not_available           */ "offload error: cannot offload to MIC - device is not available",
+	/*   2 msg_c_invalid_device_number             */ "offload error: expected a number greater than or equal to -1",
+	/*   3 msg_c_send_func_ptr                     */ "offload error: cannot find function name for address %p",
+	/*   4 msg_c_receive_func_ptr                  */ "offload error: cannot find address of function %s",
+	/*   5 msg_c_offload_malloc                    */ "offload error: memory allocation failed (requested=%lld bytes, align %lld)",
+	/*   6 msg_c_offload1                          */ "offload error: device %d does not have a pending signal for wait(%p)",
+	/*   7 msg_c_unknown_var_type                  */ "offload error: unknown variable type %d",
+	/*   8 msg_c_invalid_env_var_value             */ "offload warning: ignoring invalid value specified for %s",
+	/*   9 msg_c_invalid_env_var_int_value         */ "offload warning: specify an integer value for %s",
+	/*  10 msg_c_invalid_env_report_value          */ "offload warning: ignoring %s setting; use a value in range 1-3",
+	/*  11 msg_c_offload_signaled1                 */ "offload error: invalid device number %d specified in _Offload_signaled",
+	/*  12 msg_c_offload_signaled2                 */ "offload error: invalid signal %p specified for _Offload_signaled",
+	/*  13 msg_c_myowrapper_checkresult            */ "offload error: %s failed with error %d",
+	/*  14 msg_c_myotarget_checkresult             */ "offload error: %s failed with error %d",
+	/*  15 msg_c_offload_descriptor_offload        */ "offload error: cannot find offload entry %s",
+	/*  16 msg_c_merge_var_descs1                  */ "offload error: unexpected number of variable descriptors",
+	/*  17 msg_c_merge_var_descs2                  */ "offload error: unexpected variable type",
+	/*  18 msg_c_mic_parse_env_var_list1           */ "offload_error: MIC environment variable must begin with an alpabetic character",
+	/*  19 msg_c_mic_parse_env_var_list2           */ "offload_error: MIC environment variable value must be specified with ‘=’",
+	/*  20 msg_c_mic_process_exit_ret              */ "offload error: process on the device %d unexpectedly exited with code %d",
+	/*  21 msg_c_mic_process_exit_sig              */ "offload error: process on the device %d was terminated by signal %d (%s)",
+	/*  22 msg_c_mic_process_exit                  */ "offload error: process on the device %d was unexpectedly terminated",
+	/*  23 msg_c_mic_init3                         */ "offload warning: ignoring MIC_STACKSIZE setting; use a value >= 16K and a multiple of 4K",
+	/*  24 msg_c_mic_init4                         */ "offload error: thread key create failed with error %d",
+	/*  25 msg_c_mic_init5                         */ "offload warning: specify OFFLOAD_DEVICES as comma-separated physical device numbers or 'none'",
+	/*  26 msg_c_mic_init6                         */ "offload warning: OFFLOAD_DEVICES device number %d does not correspond to a physical device",
+	/*  27 msg_c_no_static_var_data                */ "offload error: cannot find data associated with statically allocated variable %p",
+	/*  28 msg_c_no_ptr_data                       */ "offload error: cannot find data associated with pointer variable %p",
+	/*  29 msg_c_get_engine_handle                 */ "offload error: cannot get device %d handle (error code %d)",
+	/*  30 msg_c_get_engine_index                  */ "offload error: cannot get physical index for logical device %d (error code %d)",
+	/*  31 msg_c_process_create                    */ "offload error: cannot start process on the device %d (error code %d)",
+	/*  32 msg_c_process_get_func_handles          */ "offload error: cannot get function handles on the device %d (error code %d)",
+	/*  33 msg_c_process_wait_shutdown             */ "offload error: wait for process shutdown failed on device %d (error code %d)",
+	/*  34 msg_c_process_proxy_flush               */ "offload error: cannot flush process output on device %d (error code %d)",
+	/*  35 msg_c_load_library                      */ "offload error: cannot load library to the device %d (error code %d)",
+	/*  36 msg_c_pipeline_create                   */ "offload error: cannot create pipeline on the device %d (error code %d)",
+	/*  37 msg_c_pipeline_run_func                 */ "offload error: cannot execute function on the device %d (error code %d)",
+	/*  38 msg_c_pipeline_start_run_funcs          */ "offload error: cannot start executing pipeline function on the device %d (error code %d)",
+	/*  39 msg_c_buf_create                        */ "offload error: cannot create buffer on device %d (error code %d)",
+	/*  40 msg_c_buf_create_out_of_mem             */ "offload error: cannot create buffer on device %d, out of memory",
+	/*  41 msg_c_buf_create_from_mem               */ "offload error: cannot create buffer from memory on device %d (error code %d)",
+	/*  42 msg_c_buf_destroy                       */ "offload error: buffer destroy failed (error code %d)",
+	/*  43 msg_c_buf_map                           */ "offload error: buffer map failed (error code %d)",
+	/*  44 msg_c_buf_unmap                         */ "offload error: buffer unmap failed (error code %d)",
+	/*  45 msg_c_buf_read                          */ "offload error: buffer read failed (error code %d)",
+	/*  46 msg_c_buf_write                         */ "offload error: buffer write failed (error code %d)",
+	/*  47 msg_c_buf_copy                          */ "offload error: buffer copy failed (error code %d)",
+	/*  48 msg_c_buf_get_address                   */ "offload error: cannot get buffer address on device %d (error code %d)",
+	/*  49 msg_c_buf_add_ref                       */ "offload error: cannot reuse buffer memory on device %d (error code %d)",
+	/*  50 msg_c_buf_release_ref                   */ "offload error: cannot release buffer memory on device %d (error code %d)",
+	/*  51 msg_c_buf_set_state                     */ "offload error: buffer set state failed (error code %d)",
+	/*  52 msg_c_event_wait                        */ "offload error: wait for event to become signaled failed (error code %d)",
+	/*  53 msg_c_zero_or_neg_ptr_len               */ "offload error: memory allocation of negative length is not supported",
+	/*  54 msg_c_zero_or_neg_transfer_size         */ "offload error: data transfer of zero or negative size is not supported",
+	/*  55 msg_c_bad_ptr_mem_range                 */ "offload error: address range partially overlaps with existing allocation",
+	/*  56 msg_c_different_src_and_dstn_sizes      */ "offload error: size of the source %d differs from size of the destination %d",
+	/*  57 msg_c_non_contiguous_dope_vector        */ "offload error: offload data transfer supports only a single contiguous memory range per variable",
+	/*  58 msg_c_omp_invalid_device_num_env        */ "offload warning: ignoring %s setting; use a non-negative integer value",
+	/*  59 msg_c_omp_invalid_device_num            */ "offload error: device number should be a non-negative integer value",
+	/*  60 msg_c_unknown_binary_type               */ "offload error: unexpected embedded target binary type, expected either an executable or shared library",
+	/*  61 msg_c_multiple_target_exes              */ "offload error: more that one target executable found",
+	/*  62 msg_c_no_target_exe                     */ "offload error: target executable is not available",
+	/*  63 msg_c_report_unknown_timer_node         */ "offload error: unknown timer node",
+	/*  64 msg_c_report_unknown_trace_node         */ "offload error: unknown trace node",
+	/*  65 msg_c_report_host                       */ "HOST",
+	/*  66 msg_c_report_mic                        */ "MIC",
+	/*  67 msg_c_report_title                      */ "timer data       (sec)",
+	/*  68 msg_c_report_seconds                    */ "(seconds)",
+	/*  69 msg_c_report_bytes                      */ "(bytes)",
+	/*  70 msg_c_report_cpu_time                   */ "CPU Time",
+	/*  71 msg_c_report_mic_time                   */ "MIC Time",
+	/*  72 msg_c_report_tag                        */ "Tag",
+	/*  73 msg_c_report_from_file                  */ "Offload from file",
+	/*  74 msg_c_report_file                       */ "File",
+	/*  75 msg_c_report_line                       */ "Line",
+	/*  76 msg_c_report_cpu_to_mic_data            */ "CPU->MIC Data",
+	/*  77 msg_c_report_mic_to_cpu_data            */ "MIC->CPU Data",
+	/*  78 msg_c_report_offload                    */ "Offload",
+	/*  79 msg_c_report_w_tag                      */ "Tag %d",
+	/*  80 msg_c_report_state                      */ "State",
+	/*  81 msg_c_report_start                      */ "Start target",
+	/*  82 msg_c_report_init                       */ "Initialize",
+	/*  83 msg_c_report_logical_card               */ "logical card",
+	/*  84 msg_c_report_physical_card              */ "physical card",
+	/*  85 msg_c_report_register                   */ "Register static data tables",
+	/*  86 msg_c_report_init_func                  */ "Setup target entry",
+	/*  87 msg_c_report_create_buf_host            */ "Create host buffer",
+	/*  88 msg_c_report_create_buf_mic             */ "Create target buffer",
+	/*  89 msg_c_report_send_pointer_data          */ "Send pointer data",
+	/*  90 msg_c_report_sent_pointer_data          */ "Host->target pointer data",
+	/*  91 msg_c_report_gather_copyin_data         */ "Gather copyin data",
+	/*  92 msg_c_report_copyin_data                */ "Host->target copyin data",
+	/*  93 msg_c_report_state_signal               */ "Signal",
+	/*  94 msg_c_report_signal                     */ "signal :",
+	/*  95 msg_c_report_wait                       */ "waits  :",
+	/*  96 msg_c_report_compute                    */ "Execute task on target",
+	/*  97 msg_c_report_receive_pointer_data       */ "Receive pointer data",
+	/*  98 msg_c_report_received_pointer_data      */ "Target->host pointer data",
+	/*  99 msg_c_report_start_target_func          */ "Start target entry",
+	/* 100 msg_c_report_var                        */ "Var",
+	/* 101 msg_c_report_scatter_copyin_data        */ "Scatter copyin data",
+	/* 102 msg_c_report_gather_copyout_data        */ "Gather copyout data",
+	/* 103 msg_c_report_scatter_copyout_data       */ "Scatter copyout data",
+	/* 104 msg_c_report_copyout_data               */ "Target->host copyout data",
+	/* 105 msg_c_report_unregister                 */ "Unregister data tables",
+	/* 106 msg_c_report_destroy                    */ "Destroy",
+	/* 107 msg_c_report_myoinit                    */ "Initialize MYO",
+	/* 108 msg_c_report_myoregister                */ "Register MYO tables",
+	/* 109 msg_c_report_myofini                    */ "Finalize MYO",
+	/* 110 msg_c_report_mic_myo_shared             */ "MIC MYO shared table register",
+	/* 111 msg_c_report_mic_myo_fptr               */ "MIC MYO fptr table register",
+	/* 112 msg_c_report_myosharedmalloc            */ "MYO shared malloc",
+	/* 113 msg_c_report_myosharedfree              */ "MYO shared free",
+	/* 114 msg_c_report_myosharedalignedmalloc     */ "MYO shared aligned malloc",
+	/* 115 msg_c_report_myosharedalignedfree       */ "MYO shared aligned free",
+	/* 116 msg_c_report_myoacquire                 */ "MYO acquire",
+	/* 117 msg_c_report_myorelease                 */ "MYO release",
+	/* 118 msg_c_report_host_total_offload_time    */ "host: total offload time",
+	/* 119 msg_c_report_host_initialize            */ "host: initialize target",
+	/* 120 msg_c_report_host_target_acquire        */ "host: acquire target",
+	/* 121 msg_c_report_host_wait_deps             */ "host: wait dependencies",
+	/* 122 msg_c_report_host_setup_buffers         */ "host: setup buffers",
+	/* 123 msg_c_report_host_alloc_buffers         */ "host: allocate buffers",
+	/* 124 msg_c_report_host_setup_misc_data       */ "host: setup misc_data",
+	/* 125 msg_c_report_host_alloc_data_buffer     */ "host: allocate buffer",
+	/* 126 msg_c_report_host_send_pointers         */ "host: send pointers",
+	/* 127 msg_c_report_host_gather_inputs         */ "host: gather inputs",
+	/* 128 msg_c_report_host_map_in_data_buffer    */ "host: map IN data buffer",
+	/* 129 msg_c_report_host_unmap_in_data_buffer  */ "host: unmap IN data buffer",
+	/* 130 msg_c_report_host_start_compute         */ "host: initiate compute",
+	/* 131 msg_c_report_host_wait_compute          */ "host: wait compute",
+	/* 132 msg_c_report_host_start_buffers_reads   */ "host: initiate pointer reads",
+	/* 133 msg_c_report_host_scatter_outputs       */ "host: scatter outputs",
+	/* 134 msg_c_report_host_map_out_data_buffer   */ "host: map OUT data buffer",
+	/* 135 msg_c_report_host_unmap_out_data_buffer */ "host: unmap OUT data buffer",
+	/* 136 msg_c_report_host_wait_buffers_reads    */ "host: wait pointer reads",
+	/* 137 msg_c_report_host_destroy_buffers       */ "host: destroy buffers",
+	/* 138 msg_c_report_target_total_time          */ "target: total time",
+	/* 139 msg_c_report_target_descriptor_setup    */ "target: setup offload descriptor",
+	/* 140 msg_c_report_target_func_lookup         */ "target: entry lookup",
+	/* 141 msg_c_report_target_func_time           */ "target: entry time",
+	/* 142 msg_c_report_target_scatter_inputs      */ "target: scatter inputs",
+	/* 143 msg_c_report_target_add_buffer_refs     */ "target: add buffer reference",
+	/* 144 msg_c_report_target_compute             */ "target: compute",
+	/* 145 msg_c_report_target_gather_outputs      */ "target: gather outputs",
+	/* 146 msg_c_report_target_release_buffer_refs */ "target: remove buffer reference",
+	/* 147 msg_c_coi_pipeline_max_number           */ "number of host threads doing offload exceeds maximum of %d",
+	/* 148 msg_c_ranges_dont_match                 */ "ranges of source and destination don't match together",
+	/* 149 msg_c_destination_is_over               */ "insufficient destination memory to transfer source",
+	/* 150 msg_c_slice_of_noncont_array            */ "a non-contiguous slice may be taken of contiguous arrays only",
+	/* 151 msg_c_pointer_array_mismatch            */ "number of %s elements is less than described by the source",
+};
diff --git a/final/offload/src/mic_lib.f90 b/final/offload/src/mic_lib.f90
new file mode 100644
index 0000000..0c2e4de
--- /dev/null
+++ b/final/offload/src/mic_lib.f90
@@ -0,0 +1,441 @@
+!
+!//===----------------------------------------------------------------------===//
+!//
+!//                     The LLVM Compiler Infrastructure
+!//
+!// This file is dual licensed under the MIT and the University of Illinois Open
+!// Source Licenses. See LICENSE.txt for details.
+!//
+!//===----------------------------------------------------------------------===//
+!
+
+
+! **********************************************************************************
+! * This file is intended to support the Intel(r) Many Integrated Core Architecture.
+! **********************************************************************************
+! free form Fortran source - should be named .f90
+! lines are longer than 72 characters
+
+module mic_lib
+use, intrinsic :: iso_c_binding
+
+integer, parameter:: target_mic=2
+integer, parameter:: default_target_type=target_mic
+integer, parameter:: default_target_number=0
+
+enum, bind(C)
+    enumerator :: OFFLOAD_SUCCESS  = 0
+    enumerator :: OFFLOAD_DISABLED          ! offload is disabled
+    enumerator :: OFFLOAD_UNAVAILABLE       ! card is not available
+    enumerator :: OFFLOAD_OUT_OF_MEMORY     ! not enough memory on device
+    enumerator :: OFFLOAD_PROCESS_DIED      ! target process has died
+    enumerator :: OFFLOAD_ERROR             ! unspecified error
+end enum
+
+type, bind (C) :: offload_status
+    integer(kind=c_int)    ::  result          = OFFLOAD_DISABLED
+    integer(kind=c_int)    ::  device_number   = -1
+    integer(kind=c_size_t) ::  data_sent       = 0
+    integer(kind=c_size_t) ::  data_received   = 0
+end type offload_status
+
+interface
+function offload_number_of_devices ()                                  &
+           bind (C, name = "_Offload_number_of_devices")
+!dec$ attributes default :: offload_number_of_devices
+  import :: c_int
+  integer (kind=c_int)        :: offload_number_of_devices
+!dec$ attributes offload:mic :: offload_number_of_devices
+!dir$ attributes known_intrinsic ::  offload_number_of_devices
+end function offload_number_of_devices
+
+function offload_signaled(target_number, signal)                       &
+           bind (C, name = "_Offload_signaled")
+!dec$ attributes default :: offload_signaled
+  import :: c_int, c_int64_t
+  integer (kind=c_int) :: offload_signaled
+  integer (kind=c_int), value :: target_number
+  integer (kind=c_int64_t), value :: signal
+!dec$ attributes offload:mic :: offload_signaled
+end function offload_signaled
+
+subroutine offload_report(val)                                         &
+           bind (C, name = "_Offload_report")
+!dec$ attributes default :: offload_report
+  import :: c_int
+  integer (kind=c_int), value :: val
+!dec$ attributes offload:mic :: offload_report
+end subroutine offload_report
+
+function offload_get_device_number()                                   &
+           bind (C, name = "_Offload_get_device_number")
+!dec$ attributes default :: offload_get_device_number
+  import :: c_int
+  integer (kind=c_int)        :: offload_get_device_number
+!dec$ attributes offload:mic :: offload_get_device_number
+end function offload_get_device_number
+
+function offload_get_physical_device_number()                          &
+           bind (C, name = "_Offload_get_physical_device_number")
+!dec$ attributes default :: offload_get_physical_device_number
+  import :: c_int
+  integer (kind=c_int)        :: offload_get_physical_device_number
+!dec$ attributes offload:mic :: offload_get_physical_device_number
+end function offload_get_physical_device_number
+
+! OpenMP API wrappers
+
+subroutine omp_set_num_threads_target (target_type,                    &
+                                       target_number,                  &
+                                       num_threads)                    &
+           bind (C, name = "omp_set_num_threads_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, num_threads
+end subroutine omp_set_num_threads_target
+
+function omp_get_max_threads_target (target_type,                      &
+                                     target_number)                    &
+         bind (C, name = "omp_get_max_threads_target")
+  import :: c_int
+  integer (kind=c_int)        :: omp_get_max_threads_target
+  integer (kind=c_int), value :: target_type, target_number
+end function omp_get_max_threads_target
+
+function omp_get_num_procs_target (target_type,                        &
+                                   target_number)                      &
+         bind (C, name = "omp_get_num_procs_target")
+  import :: c_int
+  integer (kind=c_int)        :: omp_get_num_procs_target
+  integer (kind=c_int), value :: target_type, target_number
+end function omp_get_num_procs_target
+
+subroutine omp_set_dynamic_target (target_type,                        &
+                                   target_number,                      &
+                                   num_threads)                        &
+           bind (C, name = "omp_set_dynamic_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, num_threads
+end subroutine omp_set_dynamic_target
+
+function omp_get_dynamic_target (target_type,                          &
+                                 target_number)                        &
+         bind (C, name = "omp_get_dynamic_target")
+  import :: c_int
+  integer (kind=c_int)        :: omp_get_dynamic_target
+  integer (kind=c_int), value :: target_type, target_number
+end function omp_get_dynamic_target
+
+subroutine omp_set_nested_target (target_type,                         &
+                                  target_number,                       &
+                                  nested)                              &
+           bind (C, name = "omp_set_nested_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, nested
+end subroutine omp_set_nested_target
+
+function omp_get_nested_target (target_type,                           &
+                                target_number)                         &
+         bind (C, name = "omp_get_nested_target")
+  import :: c_int
+  integer (kind=c_int)        :: omp_get_nested_target
+  integer (kind=c_int), value :: target_type, target_number
+end function omp_get_nested_target
+
+subroutine omp_set_schedule_target (target_type,                       &
+                                    target_number,                     &
+                                    kind,                              &
+                                    modifier)                          &
+           bind (C, name = "omp_set_schedule_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, kind, modifier
+end subroutine omp_set_schedule_target
+
+subroutine omp_get_schedule_target (target_type,                       &
+                                    target_number,                     &
+                                    kind,                              &
+                                    modifier)                          &
+           bind (C, name = "omp_get_schedule_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: kind, modifier
+end subroutine omp_get_schedule_target
+
+! lock API functions
+
+subroutine omp_init_lock_target (target_type,                          &
+                                 target_number,                        &
+                                 lock)                                 &
+           bind (C, name = "omp_init_lock_target")
+  import :: c_int, c_intptr_t
+  !dir$ attributes known_intrinsic ::  omp_init_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_init_lock_target
+
+subroutine omp_destroy_lock_target (target_type,                       &
+                                    target_number,                     &
+                                    lock)                              &
+           bind (C, name = "omp_destroy_lock_target")
+  import :: c_int, c_intptr_t
+  !dir$ attributes known_intrinsic ::  omp_destroy_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_destroy_lock_target
+
+subroutine omp_set_lock_target (target_type,                           &
+                                target_number,                         &
+                                lock)                                  &
+           bind (C, name = "omp_set_lock_target")
+  import :: c_int, c_intptr_t
+  !dir$ attributes known_intrinsic ::  omp_set_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_set_lock_target
+
+subroutine omp_unset_lock_target (target_type,                         &
+                                  target_number,                       &
+                                  lock)                                &
+           bind (C, name = "omp_unset_lock_target")
+  import :: c_int, c_intptr_t
+  !dir$ attributes known_intrinsic ::  omp_unset_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_unset_lock_target
+
+function omp_test_lock_target (target_type,                            &
+                               target_number,                          &
+                               lock)                                   &
+           bind (C, name = "omp_test_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: omp_test_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end function omp_test_lock_target
+
+! nested lock API functions
+
+subroutine omp_init_nest_lock_target (target_type,                     &
+                                      target_number,                   &
+                                      lock)                            &
+           bind (C, name = "omp_init_nest_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_init_nest_lock_target
+
+subroutine omp_destroy_nest_lock_target (target_type,                  &
+                                         target_number,                &
+                                         lock)                         &
+           bind (C, name = "omp_destroy_nest_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_destroy_nest_lock_target
+
+subroutine omp_set_nest_lock_target (target_type,                      &
+                                     target_number,                    &
+                                     lock)                             &
+           bind (C, name = "omp_set_nest_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_set_nest_lock_target
+
+subroutine omp_unset_nest_lock_target (target_type,                    &
+                                       target_number,                  &
+                                       lock)                           &
+           bind (C, name = "omp_unset_nest_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end subroutine omp_unset_nest_lock_target
+
+function omp_test_nest_lock_target (target_type,                       &
+                                    target_number,                     &
+                                    lock)                              &
+           bind (C, name = "omp_test_nest_lock_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: omp_test_nest_lock_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: lock
+end function omp_test_nest_lock_target
+
+! kmp API functions
+
+subroutine kmp_set_stacksize_target (target_type,                      &
+                                     target_number,                    &
+                                     size)                             &
+           bind (C, name = "kmp_set_stacksize_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, size
+end subroutine kmp_set_stacksize_target
+
+function kmp_get_stacksize_target (target_type,                        &
+                                   target_number)                      &
+         bind (C, name = "kmp_get_stacksize_target")
+  import :: c_int
+  integer (kind=c_int)        :: kmp_get_stacksize_target
+  integer (kind=c_int), value :: target_type, target_number
+end function kmp_get_stacksize_target
+
+subroutine kmp_set_stacksize_s_target (target_type,                    &
+                                       target_number,                  &
+                                       size)                           &
+           bind (C, name = "kmp_set_stacksize_s_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, size
+end subroutine kmp_set_stacksize_s_target
+
+function kmp_get_stacksize_s_target (target_type,                      &
+                                     target_number)                    &
+         bind (C, name = "kmp_get_stacksize_s_target")
+  import :: c_int
+  integer (kind=c_int)        :: kmp_get_stacksize_s_target
+  integer (kind=c_int), value :: target_type, target_number
+end function kmp_get_stacksize_s_target
+
+subroutine kmp_set_blocktime_target (target_type,                      &
+                                     target_number,                    &
+                                     time)                             &
+           bind (C, name = "kmp_set_blocktime_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, time
+end subroutine kmp_set_blocktime_target
+
+function kmp_get_blocktime_target (target_type,                        &
+                                   target_number)                      &
+         bind (C, name = "kmp_get_blocktime_target")
+  import :: c_int
+  integer (kind=c_int)        :: kmp_get_blocktime_target
+  integer (kind=c_int), value :: target_type, target_number
+end function kmp_get_blocktime_target
+
+subroutine kmp_set_library_serial_target (target_type,                 &
+                                          target_number)               &
+           bind (C, name = "kmp_set_library_serial_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number
+end subroutine kmp_set_library_serial_target
+
+subroutine kmp_set_library_turnaround_target (target_type,             &
+                                              target_number)           &
+           bind (C, name = "kmp_set_library_turnaround_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number
+end subroutine kmp_set_library_turnaround_target
+
+subroutine kmp_set_library_throughput_target (target_type,             &
+                                              target_number)           &
+           bind (C, name = "kmp_set_library_throughput_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number
+end subroutine kmp_set_library_throughput_target
+
+subroutine kmp_set_library_target (target_type,                        &
+                                   target_number,                      &
+                                   mode)                               &
+           bind (C, name = "kmp_set_library_target")
+  import :: c_int
+  integer (kind=c_int), value :: target_type, target_number, mode
+end subroutine kmp_set_library_target
+
+function kmp_get_library_target (target_type,                          &
+                                 target_number)                        &
+         bind (C, name = "kmp_get_library_target")
+  import :: c_int
+  integer (kind=c_int)        :: kmp_get_library_target
+  integer (kind=c_int), value :: target_type, target_number
+end function kmp_get_library_target
+
+subroutine kmp_set_defaults_target (target_type,                       &
+                                    target_number,                     &
+                                    defaults)                          &
+           bind (C, name = "kmp_set_defaults_target")
+  import :: c_int, c_char
+ character (kind=c_char) :: defaults(*)
+ integer (kind=c_int), value :: target_type, target_number
+end subroutine kmp_set_defaults_target
+
+! affinity API functions
+
+subroutine kmp_create_affinity_mask_target (target_type,               &
+                                            target_number,             &
+                                            mask)                      &
+           bind (C, name = "kmp_create_affinity_mask_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: mask
+end subroutine kmp_create_affinity_mask_target
+
+subroutine kmp_destroy_affinity_mask_target (target_type,              &
+                                             target_number,            &
+                                             mask)                     &
+           bind (C, name = "kmp_destroy_affinity_mask_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: mask
+end subroutine kmp_destroy_affinity_mask_target
+
+function kmp_set_affinity_target (target_type,                         &
+                                  target_number,                       &
+                                  mask)                                &
+           bind (C, name = "kmp_set_affinity_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: kmp_set_affinity_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: mask
+end function kmp_set_affinity_target
+
+function kmp_get_affinity_target (target_type,                         &
+                                  target_number,                       &
+                                  mask)                                &
+           bind (C, name = "kmp_get_affinity_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: kmp_get_affinity_target
+  integer (kind=c_int), value :: target_type, target_number
+  integer (kind=c_intptr_t), value :: mask
+end function kmp_get_affinity_target
+
+function kmp_get_affinity_max_proc_target (target_type,                &
+                                           target_number)              &
+           bind (C, name = "kmp_get_affinity_max_proc_target")
+  import :: c_int
+  integer (kind=c_int)        :: kmp_get_affinity_max_proc_target
+  integer (kind=c_int), value :: target_type, target_number
+end function kmp_get_affinity_max_proc_target
+
+function kmp_set_affinity_mask_proc_target (target_type,               &
+                                            target_number,             &
+                                            proc,                      &
+                                            mask)                      &
+           bind (C, name = "kmp_set_affinity_mask_proc_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: kmp_set_affinity_mask_proc_target
+  integer (kind=c_int), value :: target_type, target_number, proc
+  integer (kind=c_intptr_t), value :: mask
+end function kmp_set_affinity_mask_proc_target
+
+function kmp_unset_affinity_mask_proc_target (target_type,             &
+                                              target_number,           &
+                                              proc,                    &
+                                              mask)                    &
+           bind (C, name = "kmp_unset_affinity_mask_proc_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: kmp_unset_affinity_mask_proc_target
+  integer (kind=c_int), value :: target_type, target_number, proc
+  integer (kind=c_intptr_t), value :: mask
+end function kmp_unset_affinity_mask_proc_target
+
+function kmp_get_affinity_mask_proc_target (target_type,               &
+                                            target_number,             &
+                                            proc,                      &
+                                            mask)                      &
+           bind (C, name = "kmp_get_affinity_mask_proc_target")
+  import :: c_int, c_intptr_t
+  integer (kind=c_int)        :: kmp_get_affinity_mask_proc_target
+  integer (kind=c_int), value :: target_type, target_number, proc
+  integer (kind=c_intptr_t), value :: mask
+end function kmp_get_affinity_mask_proc_target
+
+end interface
+end module mic_lib
diff --git a/final/offload/src/offload.h b/final/offload/src/offload.h
new file mode 100644
index 0000000..68914b7
--- /dev/null
+++ b/final/offload/src/offload.h
@@ -0,0 +1,474 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*
+ * Include file for Offload API.
+ */
+
+#ifndef OFFLOAD_H_INCLUDED
+#define OFFLOAD_H_INCLUDED
+
+#if defined(LINUX) || defined(FREEBSD)
+#include <bits/functexcept.h>
+#endif
+
+#include <stddef.h>
+#include <omp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define TARGET_ATTRIBUTE __declspec(target(mic))
+
+/*
+ *  The target architecture.
+ */
+typedef enum TARGET_TYPE {
+    TARGET_NONE,    /* Undefine target */
+    TARGET_HOST,    /* Host used as target */
+    TARGET_MIC      /* MIC target */
+} TARGET_TYPE;
+
+/*
+ *  The default target type.
+ */
+#define DEFAULT_TARGET_TYPE TARGET_MIC
+
+/*
+ *  The default target number.
+ */
+#define DEFAULT_TARGET_NUMBER 0
+
+/*
+ *  Offload status.
+ */
+typedef enum {
+    OFFLOAD_SUCCESS = 0,
+    OFFLOAD_DISABLED,               /* offload is disabled */
+    OFFLOAD_UNAVAILABLE,            /* card is not available */
+    OFFLOAD_OUT_OF_MEMORY,          /* not enough memory on device */
+    OFFLOAD_PROCESS_DIED,           /* target process has died */
+    OFFLOAD_ERROR                   /* unspecified error */
+} _Offload_result;
+
+typedef struct {
+    _Offload_result result;         /* result, see above */
+    int             device_number;  /* device number */
+    size_t          data_sent;      /* number of bytes sent to the target */
+    size_t          data_received;  /* number of bytes received by host */
+} _Offload_status;
+
+#define OFFLOAD_STATUS_INIT(x) \
+    ((x).result = OFFLOAD_DISABLED)
+
+#define OFFLOAD_STATUS_INITIALIZER \
+    { OFFLOAD_DISABLED, -1, 0, 0 }
+
+/* Offload runtime interfaces */
+
+extern int _Offload_number_of_devices(void);
+extern int _Offload_get_device_number(void);
+extern int _Offload_get_physical_device_number(void);
+
+extern void* _Offload_shared_malloc(size_t size);
+extern void  _Offload_shared_free(void *ptr);
+
+extern void* _Offload_shared_aligned_malloc(size_t size, size_t align);
+extern void  _Offload_shared_aligned_free(void *ptr);
+
+extern int _Offload_signaled(int index, void *signal);
+extern void _Offload_report(int val);
+
+/* OpenMP API */
+
+extern void omp_set_default_device(int num);
+extern int  omp_get_default_device(void);
+extern int  omp_get_num_devices(void);
+
+/* OpenMP API wrappers */
+
+/* Set num_threads on target */
+extern void omp_set_num_threads_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+);
+
+/* Get max_threads from target */
+extern int omp_get_max_threads_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+/* Get num_procs from target */
+extern int omp_get_num_procs_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+/* Set dynamic on target */
+extern void omp_set_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+);
+
+/* Get dynamic from target */
+extern int omp_get_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+/* Set nested on target */
+extern void omp_set_nested_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int nested
+);
+
+/* Get nested from target */
+extern int omp_get_nested_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void omp_set_num_threads_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+);
+
+extern int omp_get_max_threads_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern int omp_get_num_procs_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void omp_set_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+);
+
+extern int omp_get_dynamic_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void omp_set_nested_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int num_threads
+);
+
+extern int omp_get_nested_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void omp_set_schedule_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_sched_t kind,
+    int modifier
+);
+
+extern void omp_get_schedule_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_sched_t *kind,
+    int *modifier
+);
+
+/* lock API functions */
+
+typedef struct {
+    omp_lock_t lock;
+} omp_lock_target_t;
+
+extern void omp_init_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+);
+
+extern void omp_destroy_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+);
+
+extern void omp_set_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+);
+
+extern void omp_unset_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+);
+
+extern int omp_test_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_lock_target_t *lock
+);
+
+/* nested lock API functions */
+
+typedef struct {
+    omp_nest_lock_t lock;
+} omp_nest_lock_target_t;
+
+extern void omp_init_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+);
+
+extern void omp_destroy_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+);
+
+extern void omp_set_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+);
+
+extern void omp_unset_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+);
+
+extern int omp_test_nest_lock_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    omp_nest_lock_target_t *lock
+);
+
+/* kmp API functions */
+
+extern void kmp_set_stacksize_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int size
+);
+
+extern int kmp_get_stacksize_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_stacksize_s_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    size_t size
+);
+
+extern size_t kmp_get_stacksize_s_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_blocktime_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int time
+);
+
+extern int kmp_get_blocktime_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_library_serial_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_library_turnaround_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_library_throughput_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_library_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int mode
+);
+
+extern int kmp_get_library_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern void kmp_set_defaults_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    char const *defaults
+);
+
+/* affinity API functions */
+
+typedef struct {
+    kmp_affinity_mask_t mask;
+} kmp_affinity_mask_target_t;
+
+extern void kmp_create_affinity_mask_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern void kmp_destroy_affinity_mask_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern int kmp_set_affinity_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern int kmp_get_affinity_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern int kmp_get_affinity_max_proc_target(
+    TARGET_TYPE target_type,
+    int target_number
+);
+
+extern int kmp_set_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern int kmp_unset_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+);
+
+extern int kmp_get_affinity_mask_proc_target(
+    TARGET_TYPE target_type,
+    int target_number,
+    int proc,
+    kmp_affinity_mask_target_t *mask
+);
+
+#ifdef __cplusplus
+} /* extern "C" */
+
+/* Namespace for the shared_allocator. */
+namespace __offload {
+  /* This follows the specification for std::allocator. */
+  /* Forward declaration of the class template. */
+  template <typename T>
+  class shared_allocator;
+
+  /* Specialization for shared_allocator<void>. */
+  template <>
+  class shared_allocator<void> {
+  public:
+    typedef void       *pointer;
+    typedef const void *const_pointer;
+    typedef void        value_type;
+    template <class U> struct rebind { typedef shared_allocator<U> other; };
+  };
+
+  /* Definition of shared_allocator<T>. */
+  template <class T>
+  class shared_allocator {
+  public:
+    typedef size_t     size_type;
+    typedef ptrdiff_t  difference_type;
+    typedef T         *pointer;
+    typedef const T   *const_pointer;
+    typedef T         &reference;
+    typedef const T   &const_reference;
+    typedef T          value_type;
+    template <class U> struct rebind { typedef shared_allocator<U> other; };
+    shared_allocator() throw() { }
+    shared_allocator(const shared_allocator&) throw() { }
+    template <class U> shared_allocator(const shared_allocator<U>&) throw() { }
+    ~shared_allocator() throw() { }
+    pointer address(reference x) const { return &x; }
+    const_pointer address(const_reference x) const { return &x; }
+    pointer allocate(
+      size_type, shared_allocator<void>::const_pointer hint = 0);
+    void deallocate(pointer p, size_type n);
+    size_type max_size() const throw() {
+      return size_type(-1)/sizeof(T);
+    } /* max_size */
+    void construct(pointer p, const T& arg) {
+      ::new (p) T(arg);
+    } /* construct */
+    void destroy(pointer p) {
+      p->~T();
+    } /* destroy */
+  };
+
+  /* Definition for allocate. */
+  template <class T>
+  typename shared_allocator<T>::pointer
+  shared_allocator<T>::allocate(shared_allocator<T>::size_type s,
+                                shared_allocator<void>::const_pointer) {
+    /* Allocate from shared memory. */
+    void *ptr = _Offload_shared_malloc(s*sizeof(T));
+    if (ptr == 0) std::__throw_bad_alloc();
+    return static_cast<pointer>(ptr);
+  } /* allocate */
+
+  template <class T>
+  void shared_allocator<T>::deallocate(pointer p,
+                                       shared_allocator<T>::size_type) {
+    /* Free the shared memory. */
+    _Offload_shared_free(p);
+  } /* deallocate */
+
+  template <typename _T1, typename _T2>
+  inline bool operator==(const shared_allocator<_T1> &, 
+                         const shared_allocator<_T2> &) throw() {
+    return true;
+  }  /* operator== */
+
+  template <typename _T1, typename _T2>
+  inline bool operator!=(const shared_allocator<_T1> &, 
+                         const shared_allocator<_T2> &) throw() {
+    return false;
+  }  /* operator!= */
+}  /* __offload */
+#endif /* __cplusplus */
+
+#endif /* OFFLOAD_H_INCLUDED */
diff --git a/final/offload/src/offload_common.cpp b/final/offload/src/offload_common.cpp
new file mode 100644
index 0000000..3681b06
--- /dev/null
+++ b/final/offload/src/offload_common.cpp
@@ -0,0 +1,170 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#if defined(LINUX) || defined(FREEBSD)
+#include <mm_malloc.h>
+#endif
+
+#include "offload_common.h"
+
+// The debug routines
+
+#if OFFLOAD_DEBUG > 0
+
+void __dump_bytes(
+    int trace_level,
+    const void *data,
+    int len
+)
+{
+    if (console_enabled > trace_level) {
+        const uint8_t *arr = (const uint8_t*) data;
+        char buffer[4096];
+        char *bufferp;
+        int count = 0;
+
+        bufferp = buffer;
+        while (len--) {
+            sprintf(bufferp, "%02x", *arr++);
+            bufferp += 2;
+            count++;
+            if ((count&3) == 0) {
+                sprintf(bufferp, " ");
+                bufferp++;
+            }
+            if ((count&63) == 0) {
+                OFFLOAD_DEBUG_TRACE(trace_level, "%s\n", buffer);
+                bufferp = buffer;
+                count = 0;
+            }
+        }
+        if (count) {
+            OFFLOAD_DEBUG_TRACE(trace_level, "%s\n", buffer);
+        }
+    }
+}
+#endif // OFFLOAD_DEBUG
+
+// The Marshaller and associated routines
+
+void Marshaller::send_data(
+    const void *data,
+    int64_t length
+)
+{
+    OFFLOAD_DEBUG_TRACE(2, "send_data(%p, %lld)\n",
+                        data, length);
+    memcpy(buffer_ptr, data, (size_t)length);
+    buffer_ptr += length;
+    tfr_size += length;
+}
+
+void Marshaller::receive_data(
+    void *data,
+    int64_t length
+)
+{
+    OFFLOAD_DEBUG_TRACE(2, "receive_data(%p, %lld)\n",
+                        data, length);
+    memcpy(data, buffer_ptr, (size_t)length);
+    buffer_ptr += length;
+    tfr_size += length;
+}
+
+// Send function pointer
+void Marshaller::send_func_ptr(
+    const void* data
+)
+{
+    const char* name;
+    size_t      length;
+
+    if (data != 0) {
+        name = __offload_funcs.find_name(data);
+        if (name == 0) {
+#if OFFLOAD_DEBUG > 0
+            if (console_enabled > 2) {
+                __offload_funcs.dump();
+            }
+#endif // OFFLOAD_DEBUG > 0
+
+            LIBOFFLOAD_ERROR(c_send_func_ptr, data);
+            exit(1);
+        }
+        length = strlen(name) + 1;
+    }
+    else {
+        name = "";
+        length = 1;
+    }
+
+    memcpy(buffer_ptr, name, length);
+    buffer_ptr += length;
+    tfr_size += length;
+}
+
+// Receive function pointer
+void Marshaller::receive_func_ptr(
+    const void** data
+)
+{
+    const char* name;
+    size_t      length;
+
+    name = (const char*) buffer_ptr;
+    if (name[0] != '\0') {
+        *data = __offload_funcs.find_addr(name);
+        if (*data == 0) {
+#if OFFLOAD_DEBUG > 0
+            if (console_enabled > 2) {
+                __offload_funcs.dump();
+            }
+#endif // OFFLOAD_DEBUG > 0
+
+            LIBOFFLOAD_ERROR(c_receive_func_ptr, name);
+            exit(1);
+        }
+        length = strlen(name) + 1;
+    }
+    else {
+        *data = 0;
+        length = 1;
+    }
+
+    buffer_ptr += length;
+    tfr_size += length;
+}
+
+// End of the Marshaller and associated routines
+
+extern void *OFFLOAD_MALLOC(
+    size_t size,
+    size_t align
+)
+{
+    void *ptr;
+    int   err;
+
+    OFFLOAD_DEBUG_TRACE(2, "%s(%lld, %lld)\n", __func__, size, align);
+
+    if (align < sizeof(void*)) {
+        align = sizeof(void*);
+    }
+
+    ptr = _mm_malloc(size, align);
+    if (ptr == NULL) {
+        LIBOFFLOAD_ERROR(c_offload_malloc, size, align);
+        exit(1);
+    }
+
+    OFFLOAD_DEBUG_TRACE(2, "%s returned %p\n", __func__, ptr);
+
+    return ptr;
+}
diff --git a/final/offload/src/offload_common.h b/final/offload/src/offload_common.h
new file mode 100644
index 0000000..11cb8bb
--- /dev/null
+++ b/final/offload/src/offload_common.h
@@ -0,0 +1,444 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*! \file
+    \brief The parts of the runtime library common to host and target
+*/
+
+#ifndef OFFLOAD_COMMON_H_INCLUDED
+#define OFFLOAD_COMMON_H_INCLUDED
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+
+#include "offload.h"
+#include "offload_table.h"
+#include "offload_trace.h"
+#include "offload_timer.h"
+#include "offload_util.h"
+#include "cean_util.h"
+#include "dv_util.h"
+#include "liboffload_error_codes.h"
+
+#include <stdarg.h>
+
+// The debug routines
+
+// Host console and file logging
+extern int console_enabled;
+extern int offload_report_level;
+
+#define OFFLOAD_DO_TRACE (offload_report_level == 3)
+
+extern const char *prefix;
+extern int offload_number;
+#if !HOST_LIBRARY
+extern int mic_index;
+#endif
+
+#if HOST_LIBRARY
+void Offload_Report_Prolog(OffloadHostTimerData* timer_data);
+void Offload_Report_Epilog(OffloadHostTimerData* timer_data);
+void offload_report_free_data(OffloadHostTimerData * timer_data);
+void Offload_Timer_Print(void);
+
+#ifndef TARGET_WINNT
+#define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
+        __sync_add_and_fetch(&offload_number, 1)
+#else
+#define OFFLOAD_DEBUG_INCR_OFLD_NUM() \
+        _InterlockedIncrement(reinterpret_cast<long*>(&offload_number))
+#endif
+
+#define OFFLOAD_DEBUG_PRINT_TAG_PREFIX() \
+        printf("%s:  ", prefix);
+
+#define OFFLOAD_DEBUG_PRINT_PREFIX() \
+        printf("%s:  ", prefix);
+#else
+#define OFFLOAD_DEBUG_PRINT_PREFIX() \
+        printf("%s%d:  ", prefix, mic_index);
+#endif // HOST_LIBRARY
+
+#define OFFLOAD_TRACE(trace_level, ...)  \
+    if (console_enabled >= trace_level) { \
+        OFFLOAD_DEBUG_PRINT_PREFIX(); \
+        printf(__VA_ARGS__); \
+        fflush(NULL); \
+    }
+
+#if OFFLOAD_DEBUG > 0
+
+#define OFFLOAD_DEBUG_TRACE(level, ...) \
+    OFFLOAD_TRACE(level, __VA_ARGS__)
+
+#define OFFLOAD_REPORT(level, offload_number, stage, ...) \
+    if (OFFLOAD_DO_TRACE) { \
+        offload_stage_print(stage, offload_number, __VA_ARGS__); \
+        fflush(NULL); \
+    }
+
+#define OFFLOAD_DEBUG_TRACE_1(level, offload_number, stage, ...) \
+    if (OFFLOAD_DO_TRACE) { \
+        offload_stage_print(stage, offload_number, __VA_ARGS__); \
+        fflush(NULL); \
+    } \
+    if (!OFFLOAD_DO_TRACE) { \
+        OFFLOAD_TRACE(level, __VA_ARGS__) \
+    }
+
+#define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b) \
+    __dump_bytes(level, a, b)
+
+extern void __dump_bytes(
+    int level,
+    const void *data,
+    int len
+);
+
+#else
+
+#define OFFLOAD_DEBUG_LOG(level, ...)
+#define OFFLOAD_DEBUG_DUMP_BYTES(level, a, b)
+
+#endif
+
+// Runtime interface
+
+#define OFFLOAD_PREFIX(a) __offload_##a
+
+#define OFFLOAD_MALLOC            OFFLOAD_PREFIX(malloc)
+#define OFFLOAD_FREE(a)           _mm_free(a)
+
+// Forward functions
+
+extern void *OFFLOAD_MALLOC(size_t size, size_t align);
+
+// The Marshaller
+
+//! \enum Indicator for the type of entry on an offload item list.
+enum OffloadItemType {
+    c_data =   1,       //!< Plain data
+    c_data_ptr,         //!< Pointer data
+    c_func_ptr,         //!< Function pointer
+    c_void_ptr,         //!< void*
+    c_string_ptr,       //!< C string
+    c_dv,               //!< Dope vector variable
+    c_dv_data,          //!< Dope-vector data
+    c_dv_data_slice,    //!< Dope-vector data's slice
+    c_dv_ptr,           //!< Dope-vector variable pointer
+    c_dv_ptr_data,      //!< Dope-vector pointer data
+    c_dv_ptr_data_slice,//!< Dope-vector pointer data's slice
+    c_cean_var,         //!< CEAN variable
+    c_cean_var_ptr,     //!< Pointer to CEAN variable
+    c_data_ptr_array,   //!< Pointer to data pointer array
+    c_func_ptr_array,   //!< Pointer to function pointer array
+    c_void_ptr_array,   //!< Pointer to void* pointer array
+    c_string_ptr_array  //!< Pointer to char* pointer array
+};
+
+#define VAR_TYPE_IS_PTR(t) ((t) == c_string_ptr || \
+                            (t) == c_data_ptr || \
+                            (t) == c_cean_var_ptr || \
+                            (t) == c_dv_ptr)
+
+#define VAR_TYPE_IS_SCALAR(t) ((t) == c_data || \
+                               (t) == c_void_ptr || \
+                               (t) == c_cean_var || \
+                               (t) == c_dv)
+
+#define VAR_TYPE_IS_DV_DATA(t) ((t) == c_dv_data || \
+                                (t) == c_dv_ptr_data)
+
+#define VAR_TYPE_IS_DV_DATA_SLICE(t) ((t) == c_dv_data_slice || \
+                                      (t) == c_dv_ptr_data_slice)
+
+
+//! \enum Specify direction to copy offloaded variable.
+enum OffloadParameterType {
+    c_parameter_unknown = -1, //!< Unknown clause
+    c_parameter_nocopy,       //!< Variable listed in "nocopy" clause
+    c_parameter_in,           //!< Variable listed in "in" clause
+    c_parameter_out,          //!< Variable listed in "out" clause
+    c_parameter_inout         //!< Variable listed in "inout" clause
+};
+
+//! An Offload Variable descriptor
+struct VarDesc {
+    //! OffloadItemTypes of source and destination
+    union {
+        struct {
+            uint8_t dst : 4; //!< OffloadItemType of destination
+            uint8_t src : 4; //!< OffloadItemType of source
+        };
+        uint8_t bits;
+    } type;
+
+    //! OffloadParameterType that describes direction of data transfer
+    union {
+        struct {
+            uint8_t in  : 1; //!< Set if IN or INOUT
+            uint8_t out : 1; //!< Set if OUT or INOUT
+        };
+        uint8_t bits;
+    } direction;
+
+    uint8_t alloc_if;        //!< alloc_if modifier value
+    uint8_t free_if;         //!< free_if modifier value
+    uint32_t align;          //!< MIC alignment requested for pointer data
+    //! Not used by compiler; set to 0
+    /*! Used by runtime as offset to data from start of MIC buffer */
+    uint32_t mic_offset;
+    //! Flags describing this variable
+    union {
+        struct {
+            //! source variable has persistent storage
+            uint32_t is_static : 1;
+            //! destination variable has persistent storage
+            uint32_t is_static_dstn : 1;
+            //! has length for c_dv && c_dv_ptr
+            uint32_t has_length : 1;
+            //! persisted local scalar is in stack buffer
+            uint32_t is_stack_buf : 1;
+            //! buffer address is sent in data
+            uint32_t sink_addr : 1;
+            //! alloc displacement is sent in data
+            uint32_t alloc_disp : 1;
+            //! source data is noncontiguous
+            uint32_t is_noncont_src : 1;
+            //! destination data is noncontiguous
+            uint32_t is_noncont_dst : 1;
+        };
+        uint32_t bits;
+    } flags;
+    //! Not used by compiler; set to 0
+    /*! Used by runtime as offset to base from data stored in a buffer */
+    int64_t offset;
+    //! Element byte-size of data to be transferred
+    /*! For dope-vector, the size of the dope-vector      */
+    int64_t size;
+    union {
+        //! Set to 0 for array expressions and dope-vectors
+        /*! Set to 1 for scalars                          */
+        /*! Set to value of length modifier for pointers  */
+        int64_t count;
+        //! Displacement not used by compiler
+        int64_t disp;
+    };
+
+    //! This field not used by OpenMP 4.0
+    /*! The alloc section expression in #pragma offload   */
+    union {
+       void *alloc;
+       int64_t ptr_arr_offset;
+    };
+
+    //! This field not used by OpenMP 4.0
+    /*! The into section expression in #pragma offload    */
+    /*! For c_data_ptr_array this is the into ptr array   */
+    void *into;
+
+    //! For an ordinary variable, address of the variable
+    /*! For c_cean_var (C/C++ array expression),
+        pointer to arr_desc, which is an array descriptor. */
+    /*! For c_data_ptr_array (array of data pointers),
+        pointer to ptr_array_descriptor,
+        which is a descriptor for pointer array transfers. */
+    void *ptr;
+};
+
+//! Auxiliary struct used when -g is enabled that holds variable names
+struct VarDesc2 {
+    const char *sname; //!< Source name
+    const char *dname; //!< Destination name (when "into" is used)
+};
+
+/*! When the OffloadItemType is c_data_ptr_array
+    the ptr field of the main descriptor points to this struct.          */
+/*! The type in VarDesc1 merely says c_cean_data_ptr, but the pointer
+    type can be c_data_ptr, c_func_ptr, c_void_ptr, or c_string_ptr.
+    Therefore the actual pointer type is in the flags field of VarDesc3. */
+/*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array
+    is 0 then alignment/alloc_if/free_if are specified in VarDesc1.      */
+/*! If flag_align_is_array/flag_alloc_if_is_array/flag_free_if_is_array
+    is 1 then align_array/alloc_if_array/free_if_array specify
+    the set of alignment/alloc_if/free_if values.                        */
+/*! For the other fields, if neither the scalar nor the array flag
+    is set, then that modifier was not specified. If the bits are set
+    they specify which modifier was set and whether it was a
+    scalar or an array expression.                                       */
+struct VarDesc3
+{
+    void *ptr_array;        //!< Pointer to arr_desc of array of pointers
+    void *align_array;      //!< Scalar value or pointer to arr_desc
+    void *alloc_if_array;   //!< Scalar value or pointer to arr_desc
+    void *free_if_array;    //!< Scalar value or pointer to arr_desc
+    void *extent_start;     //!< Scalar value or pointer to arr_desc
+    void *extent_elements;  //!< Scalar value or pointer to arr_desc
+    void *into_start;       //!< Scalar value or pointer to arr_desc
+    void *into_elements;    //!< Scalar value or pointer to arr_desc
+    void *alloc_start;      //!< Scalar value or pointer to arr_desc
+    void *alloc_elements;   //!< Scalar value or pointer to arr_desc
+    /*! Flags that describe the pointer type and whether each field
+        is a scalar value or an array expression.        */
+    /*! First 6 bits are pointer array element type:
+        c_data_ptr, c_func_ptr, c_void_ptr, c_string_ptr */
+    /*! Then single bits specify:                        */
+    /*!     align_array is an array                      */
+    /*!     alloc_if_array is an array                   */
+    /*!     free_if_array is an array                    */
+    /*!     extent_start is a scalar expression          */
+    /*!     extent_start is an array expression          */
+    /*!     extent_elements is a scalar expression       */
+    /*!     extent_elements is an array expression       */
+    /*!     into_start is a scalar expression            */
+    /*!     into_start is an array expression            */
+    /*!     into_elements is a scalar expression         */
+    /*!     into_elements is an array expression         */
+    /*!     alloc_start is a scalar expression           */
+    /*!     alloc_start is an array expression           */
+    /*!     alloc_elements is a scalar expression        */
+    /*!     alloc_elements is an array expression        */
+    uint32_t array_fields;
+};
+const int flag_align_is_array = 6;
+const int flag_alloc_if_is_array = 7;
+const int flag_free_if_is_array = 8;
+const int flag_extent_start_is_scalar = 9;
+const int flag_extent_start_is_array = 10;
+const int flag_extent_elements_is_scalar = 11;
+const int flag_extent_elements_is_array = 12;
+const int flag_into_start_is_scalar = 13;
+const int flag_into_start_is_array = 14;
+const int flag_into_elements_is_scalar = 15;
+const int flag_into_elements_is_array = 16;
+const int flag_alloc_start_is_scalar = 17;
+const int flag_alloc_start_is_array = 18;
+const int flag_alloc_elements_is_scalar = 19;
+const int flag_alloc_elements_is_array = 20;
+
+// The Marshaller
+class Marshaller
+{
+private:
+    // Start address of buffer
+    char *buffer_start;
+
+    // Current pointer within buffer
+    char *buffer_ptr;
+
+    // Physical size of data sent (including flags)
+    long long buffer_size;
+
+    // User data sent/received
+    long long tfr_size;
+
+public:
+    // Constructor
+    Marshaller() :
+        buffer_start(0), buffer_ptr(0),
+        buffer_size(0), tfr_size(0)
+    {
+    }
+
+    // Return count of user data sent/received
+    long long get_tfr_size() const
+    {
+        return tfr_size;
+    }
+
+    // Return pointer to buffer
+    char *get_buffer_start() const
+    {
+        return buffer_start;
+    }
+
+    // Return current size of data in buffer
+    long long get_buffer_size() const
+    {
+        return buffer_size;
+    }
+
+    // Set buffer pointer
+    void init_buffer(
+        char *d,
+        long long s
+    )
+    {
+        buffer_start = buffer_ptr = d;
+        buffer_size = s;
+    }
+
+    // Send data
+    void send_data(
+        const void *data,
+        int64_t length
+    );
+
+    // Receive data
+    void receive_data(
+        void *data,
+        int64_t length
+    );
+
+    // Send function pointer
+    void send_func_ptr(
+        const void* data
+    );
+
+    // Receive function pointer
+    void receive_func_ptr(
+        const void** data
+    );
+};
+
+// End of the Marshaller
+
+// The offloaded function descriptor.
+// Sent from host to target to specify which function to run.
+// Also, sets console and file tracing levels.
+struct FunctionDescriptor
+{
+    // Input data size.
+    long long in_datalen;
+
+    // Output data size.
+    long long out_datalen;
+
+    // Whether trace is requested on console.
+    // A value of 1 produces only function name and data sent/received.
+    // Values > 1 produce copious trace information.
+    uint8_t console_enabled;
+
+    // Flag controlling timing on the target side.
+    // Values > 0 enable timing on sink.
+    uint8_t timer_enabled;
+
+    int offload_report_level;
+    int offload_number;
+
+    // number of variable descriptors
+    int vars_num;
+
+    // inout data offset if data is passed as misc/return data
+    // otherwise it should be zero.
+    int data_offset;
+
+    // The name of the offloaded function
+    char data[];
+};
+
+// typedef OFFLOAD.
+// Pointer to OffloadDescriptor.
+typedef struct OffloadDescriptor *OFFLOAD;
+
+#endif // OFFLOAD_COMMON_H_INCLUDED
diff --git a/final/offload/src/offload_engine.cpp b/final/offload/src/offload_engine.cpp
new file mode 100644
index 0000000..069b604
--- /dev/null
+++ b/final/offload/src/offload_engine.cpp
@@ -0,0 +1,531 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_engine.h"
+#include <signal.h>
+#include <errno.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "offload_host.h"
+#include "offload_table.h"
+
+const char* Engine::m_func_names[Engine::c_funcs_total] =
+{
+    "server_compute",
+#ifdef MYO_SUPPORT
+    "server_myoinit",
+    "server_myofini",
+#endif // MYO_SUPPORT
+    "server_init",
+    "server_var_table_size",
+    "server_var_table_copy"
+};
+
+// Symbolic representation of system signals. Fix for CQ233593
+const char* Engine::c_signal_names[Engine::c_signal_max] =
+{
+    "Unknown SIGNAL",
+    "SIGHUP",    /*  1, Hangup (POSIX).  */
+    "SIGINT",    /*  2, Interrupt (ANSI).  */
+    "SIGQUIT",   /*  3, Quit (POSIX).  */
+    "SIGILL",    /*  4, Illegal instruction (ANSI).  */
+    "SIGTRAP",   /*  5, Trace trap (POSIX).  */
+    "SIGABRT",   /*  6, Abort (ANSI).  */
+    "SIGBUS",    /*  7, BUS error (4.2 BSD).  */
+    "SIGFPE",    /*  8, Floating-point exception (ANSI).  */
+    "SIGKILL",   /*  9, Kill, unblockable (POSIX).  */
+    "SIGUSR1",   /* 10, User-defined signal 1 (POSIX).  */
+    "SIGSEGV",   /* 11, Segmentation violation (ANSI).  */
+    "SIGUSR2",   /* 12, User-defined signal 2 (POSIX).  */
+    "SIGPIPE",   /* 13, Broken pipe (POSIX).  */
+    "SIGALRM",   /* 14, Alarm clock (POSIX).  */
+    "SIGTERM",   /* 15, Termination (ANSI).  */
+    "SIGSTKFLT", /* 16, Stack fault.  */
+    "SIGCHLD",   /* 17, Child status has changed (POSIX).  */
+    "SIGCONT",   /* 18, Continue (POSIX).  */
+    "SIGSTOP",   /* 19, Stop, unblockable (POSIX).  */
+    "SIGTSTP",   /* 20, Keyboard stop (POSIX).  */
+    "SIGTTIN",   /* 21, Background read from tty (POSIX).  */
+    "SIGTTOU",   /* 22, Background write to tty (POSIX).  */
+    "SIGURG",    /* 23, Urgent condition on socket (4.2 BSD).  */
+    "SIGXCPU",   /* 24, CPU limit exceeded (4.2 BSD).  */
+    "SIGXFSZ",   /* 25, File size limit exceeded (4.2 BSD).  */
+    "SIGVTALRM", /* 26, Virtual alarm clock (4.2 BSD).  */
+    "SIGPROF",   /* 27, Profiling alarm clock (4.2 BSD).  */
+    "SIGWINCH",  /* 28, Window size change (4.3 BSD, Sun).  */
+    "SIGIO",     /* 29, I/O now possible (4.2 BSD).  */
+    "SIGPWR",    /* 30, Power failure restart (System V).  */
+    "SIGSYS"     /* 31, Bad system call.  */
+};
+
+void Engine::init(void)
+{
+    if (!m_ready) {
+        mutex_locker_t locker(m_lock);
+
+        if (!m_ready) {
+            // start process if not done yet
+            if (m_process == 0) {
+                init_process();
+            }
+
+            // load penging images
+            load_libraries();
+
+            // and (re)build pointer table
+            init_ptr_data();
+
+            // it is ready now
+            m_ready = true;
+        }
+    }
+}
+
+void Engine::init_process(void)
+{
+    COIENGINE engine;
+    COIRESULT res;
+    const char **environ;
+
+    // create environment for the target process
+    environ = (const char**) mic_env_vars.create_environ_for_card(m_index);
+    if (environ != 0) {
+        for (const char **p = environ; *p != 0; p++) {
+            OFFLOAD_DEBUG_TRACE(3, "Env Var for card %d: %s\n", m_index, *p);
+        }
+    }
+
+    // Create execution context in the specified device
+    OFFLOAD_DEBUG_TRACE(2, "Getting device %d (engine %d) handle\n", m_index,
+                        m_physical_index);
+    res = COI::EngineGetHandle(COI_ISA_KNC, m_physical_index, &engine);
+    check_result(res, c_get_engine_handle, m_index, res);
+
+    // Target executable should be available by the time when we
+    // attempt to initialize the device
+    if (__target_exe == 0) {
+        LIBOFFLOAD_ERROR(c_no_target_exe);
+        exit(1);
+    }
+
+    OFFLOAD_DEBUG_TRACE(2,
+        "Loading target executable \"%s\" from %p, size %lld\n",
+        __target_exe->name, __target_exe->data, __target_exe->size);
+
+    res = COI::ProcessCreateFromMemory(
+        engine,                 // in_Engine
+        __target_exe->name,     // in_pBinaryName
+        __target_exe->data,     // in_pBinaryBuffer
+        __target_exe->size,     // in_BinaryBufferLength,
+        0,                      // in_Argc
+        0,                      // in_ppArgv
+        environ == 0,           // in_DupEnv
+        environ,                // in_ppAdditionalEnv
+        mic_proxy_io,           // in_ProxyActive
+        mic_proxy_fs_root,      // in_ProxyfsRoot
+        mic_buffer_size,        // in_BufferSpace
+        mic_library_path,       // in_LibrarySearchPath
+        __target_exe->origin,   // in_FileOfOrigin
+        __target_exe->offset,   // in_FileOfOriginOffset
+        &m_process              // out_pProcess
+    );
+    check_result(res, c_process_create, m_index, res);
+
+    // get function handles
+    res = COI::ProcessGetFunctionHandles(m_process, c_funcs_total,
+                                         m_func_names, m_funcs);
+    check_result(res, c_process_get_func_handles, m_index, res);
+
+    // initialize device side
+    pid_t pid = init_device();
+
+    // For IDB
+    if (__dbg_is_attached) {
+        // TODO: we have in-memory executable now.
+        // Check with IDB team what should we provide them now?
+        if (strlen(__target_exe->name) < MAX_TARGET_NAME) {
+            strcpy(__dbg_target_exe_name, __target_exe->name);
+        }
+        __dbg_target_so_pid = pid;
+        __dbg_target_id = m_physical_index;
+        __dbg_target_so_loaded();
+    }
+}
+
+void Engine::fini_process(bool verbose)
+{
+    if (m_process != 0) {
+        uint32_t sig;
+        int8_t ret;
+
+        // destroy target process
+        OFFLOAD_DEBUG_TRACE(2, "Destroying process on the device %d\n",
+                            m_index);
+
+        COIRESULT res = COI::ProcessDestroy(m_process, -1, 0, &ret, &sig);
+        m_process = 0;
+
+        if (res == COI_SUCCESS) {
+            OFFLOAD_DEBUG_TRACE(3, "Device process: signal %d, exit code %d\n",
+                                sig, ret);
+            if (verbose) {
+                if (sig != 0) {
+                    LIBOFFLOAD_ERROR(
+                        c_mic_process_exit_sig, m_index, sig,
+                        c_signal_names[sig >= c_signal_max ? 0 : sig]);
+                }
+                else {
+                    LIBOFFLOAD_ERROR(c_mic_process_exit_ret, m_index, ret);
+                }
+            }
+
+            // for idb
+            if (__dbg_is_attached) {
+                __dbg_target_so_unloaded();
+            }
+        }
+        else {
+            if (verbose) {
+                LIBOFFLOAD_ERROR(c_mic_process_exit, m_index);
+            }
+        }
+    }
+}
+
+void Engine::load_libraries()
+{
+    // load libraries collected so far
+    for (TargetImageList::iterator it = m_images.begin();
+         it != m_images.end(); it++) {
+        OFFLOAD_DEBUG_TRACE(2, "Loading library \"%s\" from %p, size %llu\n",
+                            it->name, it->data, it->size);
+
+        // load library to the device
+        COILIBRARY lib;
+        COIRESULT res;
+        res = COI::ProcessLoadLibraryFromMemory(m_process,
+                                                it->data,
+                                                it->size,
+                                                it->name,
+                                                mic_library_path,
+                                                it->origin,
+                                                it->offset,
+                                                COI_LOADLIBRARY_V1_FLAGS,
+                                                &lib);
+
+        if (res != COI_SUCCESS && res != COI_ALREADY_EXISTS) {
+            check_result(res, c_load_library, m_index, res);
+        }
+    }
+    m_images.clear();
+}
+
+static bool target_entry_cmp(
+    const VarList::BufEntry &l,
+    const VarList::BufEntry &r
+)
+{
+    const char *l_name = reinterpret_cast<const char*>(l.name);
+    const char *r_name = reinterpret_cast<const char*>(r.name);
+    return strcmp(l_name, r_name) < 0;
+}
+
+static bool host_entry_cmp(
+    const VarTable::Entry *l,
+    const VarTable::Entry *r
+)
+{
+    return strcmp(l->name, r->name) < 0;
+}
+
+void Engine::init_ptr_data(void)
+{
+    COIRESULT res;
+    COIEVENT event;
+
+    // Prepare table of host entries
+    std::vector<const VarTable::Entry*> host_table(__offload_vars.begin(),
+                                                   __offload_vars.end());
+
+    // no need to do anything further is host table is empty
+    if (host_table.size() <= 0) {
+        return;
+    }
+
+    // Get var table entries from the target.
+    // First we need to get size for the buffer to copy data
+    struct {
+        int64_t nelems;
+        int64_t length;
+    } params;
+
+    res = COI::PipelineRunFunction(get_pipeline(),
+                                   m_funcs[c_func_var_table_size],
+                                   0, 0, 0,
+                                   0, 0,
+                                   0, 0,
+                                   &params, sizeof(params),
+                                   &event);
+    check_result(res, c_pipeline_run_func, m_index, res);
+
+    res = COI::EventWait(1, &event, -1, 1, 0, 0);
+    check_result(res, c_event_wait, res);
+
+    if (params.length == 0) {
+        return;
+    }
+
+    // create buffer for target entries and copy data to host
+    COIBUFFER buffer;
+    res = COI::BufferCreate(params.length, COI_BUFFER_NORMAL, 0, 0, 1,
+                            &m_process, &buffer);
+    check_result(res, c_buf_create, m_index, res);
+
+    COI_ACCESS_FLAGS flags = COI_SINK_WRITE;
+    res = COI::PipelineRunFunction(get_pipeline(),
+                                   m_funcs[c_func_var_table_copy],
+                                   1, &buffer, &flags,
+                                   0, 0,
+                                   &params.nelems, sizeof(params.nelems),
+                                   0, 0,
+                                   &event);
+    check_result(res, c_pipeline_run_func, m_index, res);
+
+    res = COI::EventWait(1, &event, -1, 1, 0, 0);
+    check_result(res, c_event_wait, res);
+
+    // patch names in target data
+    VarList::BufEntry *target_table;
+    COIMAPINSTANCE map_inst;
+    res = COI::BufferMap(buffer, 0, params.length, COI_MAP_READ_ONLY, 0, 0,
+                         0, &map_inst,
+                         reinterpret_cast<void**>(&target_table));
+    check_result(res, c_buf_map, res);
+
+    VarList::table_patch_names(target_table, params.nelems);
+
+    // and sort entries
+    std::sort(target_table, target_table + params.nelems, target_entry_cmp);
+    std::sort(host_table.begin(), host_table.end(), host_entry_cmp);
+
+    // merge host and target entries and enter matching vars map
+    std::vector<const VarTable::Entry*>::const_iterator hi =
+        host_table.begin();
+    std::vector<const VarTable::Entry*>::const_iterator he =
+        host_table.end();
+    const VarList::BufEntry *ti = target_table;
+    const VarList::BufEntry *te = target_table + params.nelems;
+
+    while (hi != he && ti != te) {
+        int res = strcmp((*hi)->name, reinterpret_cast<const char*>(ti->name));
+        if (res == 0) {
+            // add matching entry to var map
+            std::pair<PtrSet::iterator, bool> res =
+                m_ptr_set.insert(PtrData((*hi)->addr, (*hi)->size));
+
+            // store address for new entries
+            if (res.second) {
+                PtrData *ptr = const_cast<PtrData*>(res.first.operator->());
+                ptr->mic_addr = ti->addr;
+                ptr->is_static = true;
+            }
+
+            hi++;
+            ti++;
+        }
+        else if (res < 0) {
+            hi++;
+        }
+        else {
+            ti++;
+        }
+    }
+
+    // cleanup
+    res = COI::BufferUnmap(map_inst, 0, 0, 0);
+    check_result(res, c_buf_unmap, res);
+
+    res = COI::BufferDestroy(buffer);
+    check_result(res, c_buf_destroy, res);
+}
+
+COIRESULT Engine::compute(
+    const std::list<COIBUFFER> &buffers,
+    const void*         data,
+    uint16_t            data_size,
+    void*               ret,
+    uint16_t            ret_size,
+    uint32_t            num_deps,
+    const COIEVENT*     deps,
+    COIEVENT*           event
+) /* const */
+{
+    COIBUFFER *bufs;
+    COI_ACCESS_FLAGS *flags;
+    COIRESULT res;
+
+    // convert buffers list to array
+    int num_bufs = buffers.size();
+    if (num_bufs > 0) {
+        bufs = (COIBUFFER*) alloca(num_bufs * sizeof(COIBUFFER));
+        flags = (COI_ACCESS_FLAGS*) alloca(num_bufs *
+                                           sizeof(COI_ACCESS_FLAGS));
+
+        int i = 0;
+        for (std::list<COIBUFFER>::const_iterator it = buffers.begin();
+             it != buffers.end(); it++) {
+            bufs[i] = *it;
+
+            // TODO: this should be fixed
+            flags[i++] = COI_SINK_WRITE;
+        }
+    }
+    else {
+        bufs = 0;
+        flags = 0;
+    }
+
+    // start computation
+    res = COI::PipelineRunFunction(get_pipeline(),
+                                   m_funcs[c_func_compute],
+                                   num_bufs, bufs, flags,
+                                   num_deps, deps,
+                                   data, data_size,
+                                   ret, ret_size,
+                                   event);
+    return res;
+}
+
+pid_t Engine::init_device(void)
+{
+    struct init_data {
+        int  device_index;
+        int  devices_total;
+        int  console_level;
+        int  offload_report_level;
+    } data;
+    COIRESULT res;
+    COIEVENT event;
+    pid_t pid;
+
+    OFFLOAD_DEBUG_TRACE_1(2, 0, c_offload_init,
+                          "Initializing device with logical index %d "
+                          "and physical index %d\n",
+                           m_index, m_physical_index);
+
+    // setup misc data
+    data.device_index = m_index;
+    data.devices_total = mic_engines_total;
+    data.console_level = console_enabled;
+    data.offload_report_level = offload_report_level;
+
+    res = COI::PipelineRunFunction(get_pipeline(),
+                                   m_funcs[c_func_init],
+                                   0, 0, 0, 0, 0,
+                                   &data, sizeof(data),
+                                   &pid, sizeof(pid),
+                                   &event);
+    check_result(res, c_pipeline_run_func, m_index, res);
+
+    res = COI::EventWait(1, &event, -1, 1, 0, 0);
+    check_result(res, c_event_wait, res);
+
+    OFFLOAD_DEBUG_TRACE(2, "Device process pid is %d\n", pid);
+
+    return pid;
+}
+
+// data associated with each thread
+struct Thread {
+    Thread(long* addr_coipipe_counter) {
+        m_addr_coipipe_counter = addr_coipipe_counter;
+        memset(m_pipelines, 0, sizeof(m_pipelines));
+    }
+
+    ~Thread() {
+#ifndef TARGET_WINNT
+        __sync_sub_and_fetch(m_addr_coipipe_counter, 1);
+#else // TARGET_WINNT
+        _InterlockedDecrement(m_addr_coipipe_counter);
+#endif // TARGET_WINNT
+        for (int i = 0; i < mic_engines_total; i++) {
+            if (m_pipelines[i] != 0) {
+                COI::PipelineDestroy(m_pipelines[i]);
+            }
+        }
+    }
+
+    COIPIPELINE get_pipeline(int index) const {
+        return m_pipelines[index];
+    }
+
+    void set_pipeline(int index, COIPIPELINE pipeline) {
+        m_pipelines[index] = pipeline;
+    }
+
+    AutoSet& get_auto_vars() {
+        return m_auto_vars;
+    }
+
+private:
+    long*       m_addr_coipipe_counter;
+    AutoSet     m_auto_vars;
+    COIPIPELINE m_pipelines[MIC_ENGINES_MAX];
+};
+
+COIPIPELINE Engine::get_pipeline(void)
+{
+    Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
+    if (thread == 0) {
+        thread = new Thread(&m_proc_number);
+        thread_setspecific(mic_thread_key, thread);
+    }
+
+    COIPIPELINE pipeline = thread->get_pipeline(m_index);
+    if (pipeline == 0) {
+        COIRESULT res;
+        int proc_num;
+
+#ifndef TARGET_WINNT
+        proc_num = __sync_fetch_and_add(&m_proc_number, 1);
+#else // TARGET_WINNT
+        proc_num = _InterlockedIncrement(&m_proc_number);
+#endif // TARGET_WINNT
+
+        if (proc_num > COI_PIPELINE_MAX_PIPELINES) {
+            LIBOFFLOAD_ERROR(c_coipipe_max_number, COI_PIPELINE_MAX_PIPELINES);
+            LIBOFFLOAD_ABORT;
+        }
+        // create pipeline for this thread
+        res = COI::PipelineCreate(m_process, 0, mic_stack_size, &pipeline);
+        check_result(res, c_pipeline_create, m_index, res);
+
+        thread->set_pipeline(m_index, pipeline);
+    }
+    return pipeline;
+}
+
+AutoSet& Engine::get_auto_vars(void)
+{
+    Thread* thread = (Thread*) thread_getspecific(mic_thread_key);
+    if (thread == 0) {
+        thread = new Thread(&m_proc_number);
+        thread_setspecific(mic_thread_key, thread);
+    }
+
+    return thread->get_auto_vars();
+}
+
+void Engine::destroy_thread_data(void *data)
+{
+    delete static_cast<Thread*>(data);
+}
diff --git a/final/offload/src/offload_engine.h b/final/offload/src/offload_engine.h
new file mode 100644
index 0000000..d1a9631
--- /dev/null
+++ b/final/offload/src/offload_engine.h
@@ -0,0 +1,482 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef OFFLOAD_ENGINE_H_INCLUDED
+#define OFFLOAD_ENGINE_H_INCLUDED
+
+#include <limits.h>
+
+#include <list>
+#include <set>
+#include <map>
+#include "offload_common.h"
+#include "coi/coi_client.h"
+
+// Address range
+class MemRange {
+public:
+    MemRange() : m_start(0), m_length(0) {}
+    MemRange(const void *addr, uint64_t len) : m_start(addr), m_length(len) {}
+
+    const void* start() const {
+        return m_start;
+    }
+
+    const void* end() const {
+        return static_cast<const char*>(m_start) + m_length;
+    }
+
+    uint64_t length() const {
+        return m_length;
+    }
+
+    // returns true if given range overlaps with another one
+    bool overlaps(const MemRange &o) const {
+        // Two address ranges A[start, end) and B[start,end) overlap
+        // if A.start < B.end and A.end > B.start.
+        return start() < o.end() && end() > o.start();
+    }
+
+    // returns true if given range contains the other range
+    bool contains(const MemRange &o) const {
+        return start() <= o.start() && o.end() <= end();
+    }
+
+private:
+    const void* m_start;
+    uint64_t    m_length;
+};
+
+// Data associated with a pointer variable
+class PtrData {
+public:
+    PtrData(const void *addr, uint64_t len) :
+        cpu_addr(addr, len), cpu_buf(0),
+        mic_addr(0), alloc_disp(0), mic_buf(0), mic_offset(0),
+        ref_count(0), is_static(false)
+    {}
+
+    //
+    // Copy constructor
+    //
+    PtrData(const PtrData& ptr):
+        cpu_addr(ptr.cpu_addr), cpu_buf(ptr.cpu_buf),
+        mic_addr(ptr.mic_addr), alloc_disp(ptr.alloc_disp),
+        mic_buf(ptr.mic_buf), mic_offset(ptr.mic_offset),
+        ref_count(ptr.ref_count), is_static(ptr.is_static)
+    {}
+
+    bool operator<(const PtrData &o) const {
+        // Variables are sorted by the CPU start address.
+        // Overlapping memory ranges are considered equal.
+        return (cpu_addr.start() < o.cpu_addr.start()) &&
+               !cpu_addr.overlaps(o.cpu_addr);
+    }
+
+    long add_reference() {
+        if (is_static) {
+            return LONG_MAX;
+        }
+#ifndef TARGET_WINNT
+        return __sync_fetch_and_add(&ref_count, 1);
+#else // TARGET_WINNT
+        return _InterlockedIncrement(&ref_count) - 1;
+#endif // TARGET_WINNT
+    }
+
+    long remove_reference() {
+        if (is_static) {
+            return LONG_MAX;
+        }
+#ifndef TARGET_WINNT
+        return __sync_sub_and_fetch(&ref_count, 1);
+#else // TARGET_WINNT
+        return _InterlockedDecrement(&ref_count);
+#endif // TARGET_WINNT
+    }
+
+    long get_reference() const {
+        if (is_static) {
+            return LONG_MAX;
+        }
+        return ref_count;
+    }
+
+public:
+    // CPU address range
+    const MemRange  cpu_addr;
+
+    // CPU and MIC buffers
+    COIBUFFER       cpu_buf;
+    COIBUFFER       mic_buf;
+
+    // placeholder for buffer address on mic
+    uint64_t        mic_addr;
+
+    uint64_t        alloc_disp;
+
+    // additional offset to pointer data on MIC for improving bandwidth for
+    // data which is not 4K aligned
+    uint32_t        mic_offset;
+
+    // if true buffers are created from static memory
+    bool            is_static;
+    mutex_t         alloc_ptr_data_lock;
+
+private:
+    // reference count for the entry
+    long            ref_count;
+};
+
+typedef std::list<PtrData*> PtrDataList;
+
+// Data associated with automatic variable
+class AutoData {
+public:
+    AutoData(const void *addr, uint64_t len) :
+        cpu_addr(addr, len), ref_count(0)
+    {}
+
+    bool operator<(const AutoData &o) const {
+        // Variables are sorted by the CPU start address.
+        // Overlapping memory ranges are considered equal.
+        return (cpu_addr.start() < o.cpu_addr.start()) &&
+               !cpu_addr.overlaps(o.cpu_addr);
+    }
+
+    long add_reference() {
+#ifndef TARGET_WINNT
+        return __sync_fetch_and_add(&ref_count, 1);
+#else // TARGET_WINNT
+        return _InterlockedIncrement(&ref_count) - 1;
+#endif // TARGET_WINNT
+    }
+
+    long remove_reference() {
+#ifndef TARGET_WINNT
+        return __sync_sub_and_fetch(&ref_count, 1);
+#else // TARGET_WINNT
+        return _InterlockedDecrement(&ref_count);
+#endif // TARGET_WINNT
+    }
+
+    long get_reference() const {
+        return ref_count;
+    }
+
+public:
+    // CPU address range
+    const MemRange cpu_addr;
+
+private:
+    // reference count for the entry
+    long ref_count;
+};
+
+// Set of autimatic variables
+typedef std::set<AutoData> AutoSet;
+
+// Target image data
+struct TargetImage
+{
+    TargetImage(const char *_name, const void *_data, uint64_t _size,
+                const char *_origin, uint64_t _offset) :
+        name(_name), data(_data), size(_size),
+        origin(_origin), offset(_offset)
+    {}
+
+    // library name
+    const char* name;
+
+    // contents and size
+    const void* data;
+    uint64_t    size;
+
+    // file of origin and offset within that file
+    const char* origin;
+    uint64_t    offset;
+};
+
+typedef std::list<TargetImage> TargetImageList;
+
+// Data associated with persistent auto objects
+struct PersistData
+{
+    PersistData(const void *addr, uint64_t routine_num, uint64_t size) :
+        stack_cpu_addr(addr), routine_id(routine_num)
+    {
+        stack_ptr_data = new PtrData(0, size);
+    }
+    // 1-st key value - beginning of the stack at CPU
+    const void *   stack_cpu_addr;
+    // 2-nd key value - identifier of routine invocation at CPU
+    uint64_t   routine_id;
+    // corresponded PtrData; only stack_ptr_data->mic_buf is used
+    PtrData * stack_ptr_data;
+    // used to get offset of the variable in stack buffer
+    char * cpu_stack_addr;
+};
+
+typedef std::list<PersistData> PersistDataList;
+
+// class representing a single engine
+struct Engine {
+    friend void __offload_init_library_once(void);
+    friend void __offload_fini_library(void);
+
+#define check_result(res, tag, ...) \
+    { \
+        if (res == COI_PROCESS_DIED) { \
+            fini_process(true); \
+            exit(1); \
+        } \
+        if (res != COI_SUCCESS) { \
+            __liboffload_error_support(tag, __VA_ARGS__); \
+            exit(1); \
+        } \
+    }
+
+    int get_logical_index() const {
+        return m_index;
+    }
+
+    int get_physical_index() const {
+        return m_physical_index;
+    }
+
+    const COIPROCESS& get_process() const {
+        return m_process;
+    }
+
+    // initialize device
+    void init(void);
+
+    // add new library
+    void add_lib(const TargetImage &lib)
+    {
+        m_lock.lock();
+        m_ready = false;
+        m_images.push_back(lib);
+        m_lock.unlock();
+    }
+
+    COIRESULT compute(
+        const std::list<COIBUFFER> &buffers,
+        const void*         data,
+        uint16_t            data_size,
+        void*               ret,
+        uint16_t            ret_size,
+        uint32_t            num_deps,
+        const COIEVENT*     deps,
+        COIEVENT*           event
+    );
+
+#ifdef MYO_SUPPORT
+    // temporary workaround for blocking behavior for myoiLibInit/Fini calls
+    void init_myo(COIEVENT *event) {
+        COIRESULT res;
+        res = COI::PipelineRunFunction(get_pipeline(),
+                                       m_funcs[c_func_myo_init],
+                                       0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                       event);
+        check_result(res, c_pipeline_run_func, m_index, res);
+    }
+
+    void fini_myo(COIEVENT *event) {
+        COIRESULT res;
+        res = COI::PipelineRunFunction(get_pipeline(),
+                                       m_funcs[c_func_myo_fini],
+                                       0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                       event);
+        check_result(res, c_pipeline_run_func, m_index, res);
+    }
+#endif // MYO_SUPPORT
+
+    //
+    // Memory association table
+    //
+    PtrData* find_ptr_data(const void *ptr) {
+        m_ptr_lock.lock();
+        PtrSet::iterator res = m_ptr_set.find(PtrData(ptr, 0));
+        m_ptr_lock.unlock();
+        if (res == m_ptr_set.end()) {
+            return 0;
+        }
+        return const_cast<PtrData*>(res.operator->());
+    }
+
+    PtrData* insert_ptr_data(const void *ptr, uint64_t len, bool &is_new) {
+        m_ptr_lock.lock();
+        std::pair<PtrSet::iterator, bool> res =
+            m_ptr_set.insert(PtrData(ptr, len));
+        PtrData* ptr_data = const_cast<PtrData*>(res.first.operator->());
+        m_ptr_lock.unlock();
+
+        is_new = res.second;
+        if (is_new) {
+            // It's necessary to lock as soon as possible.
+            // unlock must be done at call site of insert_ptr_data at
+            // branch for is_new
+            ptr_data->alloc_ptr_data_lock.lock();
+        }
+        return ptr_data;
+    }
+
+    void remove_ptr_data(const void *ptr) {
+        m_ptr_lock.lock();
+        m_ptr_set.erase(PtrData(ptr, 0));
+        m_ptr_lock.unlock();
+    }
+
+    //
+    // Automatic variables
+    //
+    AutoData* find_auto_data(const void *ptr) {
+        AutoSet &auto_vars = get_auto_vars();
+        AutoSet::iterator res = auto_vars.find(AutoData(ptr, 0));
+        if (res == auto_vars.end()) {
+            return 0;
+        }
+        return const_cast<AutoData*>(res.operator->());
+    }
+
+    AutoData* insert_auto_data(const void *ptr, uint64_t len) {
+        AutoSet &auto_vars = get_auto_vars();
+        std::pair<AutoSet::iterator, bool> res =
+            auto_vars.insert(AutoData(ptr, len));
+        return const_cast<AutoData*>(res.first.operator->());
+    }
+
+    void remove_auto_data(const void *ptr) {
+        get_auto_vars().erase(AutoData(ptr, 0));
+    }
+
+    //
+    // Signals
+    //
+    void add_signal(const void *signal, OffloadDescriptor *desc) {
+        m_signal_lock.lock();
+        m_signal_map[signal] = desc;
+        m_signal_lock.unlock();
+    }
+
+    OffloadDescriptor* find_signal(const void *signal, bool remove) {
+        OffloadDescriptor *desc = 0;
+
+        m_signal_lock.lock();
+        {
+            SignalMap::iterator it = m_signal_map.find(signal);
+            if (it != m_signal_map.end()) {
+                desc = it->second;
+                if (remove) {
+                    m_signal_map.erase(it);
+                }
+            }
+        }
+        m_signal_lock.unlock();
+
+        return desc;
+    }
+
+    // stop device process
+    void fini_process(bool verbose);
+
+    // list of stacks active at the engine
+    PersistDataList m_persist_list;
+
+private:
+    Engine() : m_index(-1), m_physical_index(-1), m_process(0), m_ready(false),
+               m_proc_number(0)
+    {}
+
+    ~Engine() {
+        if (m_process != 0) {
+            fini_process(false);
+        }
+    }
+
+    // set indexes
+    void set_indexes(int logical_index, int physical_index) {
+        m_index = logical_index;
+        m_physical_index = physical_index;
+    }
+
+    // start process on device
+    void init_process();
+
+    void load_libraries(void);
+    void init_ptr_data(void);
+
+    // performs library intialization on the device side
+    pid_t init_device(void);
+
+private:
+    // get pipeline associated with a calling thread
+    COIPIPELINE get_pipeline(void);
+
+    // get automatic vars set associated with the calling thread
+    AutoSet& get_auto_vars(void);
+
+    // destructor for thread data
+    static void destroy_thread_data(void *data);
+
+private:
+    typedef std::set<PtrData> PtrSet;
+    typedef std::map<const void*, OffloadDescriptor*> SignalMap;
+
+    // device indexes
+    int         m_index;
+    int         m_physical_index;
+
+    // number of COI pipes created for the engine
+    long        m_proc_number;
+
+    // process handle
+    COIPROCESS  m_process;
+
+    // If false, device either has not been initialized or new libraries
+    // have been added.
+    bool        m_ready;
+    mutex_t     m_lock;
+
+    // List of libraries to be loaded
+    TargetImageList m_images;
+
+    // var table
+    PtrSet      m_ptr_set;
+    mutex_t     m_ptr_lock;
+
+    // signals
+    SignalMap m_signal_map;
+    mutex_t   m_signal_lock;
+
+    // constants for accessing device function handles
+    enum {
+        c_func_compute = 0,
+#ifdef MYO_SUPPORT
+        c_func_myo_init,
+        c_func_myo_fini,
+#endif // MYO_SUPPORT
+        c_func_init,
+        c_func_var_table_size,
+        c_func_var_table_copy,
+        c_funcs_total
+    };
+    static const char* m_func_names[c_funcs_total];
+
+    // device function handles
+    COIFUNCTION m_funcs[c_funcs_total];
+
+    // int -> name mapping for device signals
+    static const int   c_signal_max = 32;
+    static const char* c_signal_names[c_signal_max];
+};
+
+#endif // OFFLOAD_ENGINE_H_INCLUDED
diff --git a/final/offload/src/offload_env.cpp b/final/offload/src/offload_env.cpp
new file mode 100644
index 0000000..d037338
--- /dev/null
+++ b/final/offload/src/offload_env.cpp
@@ -0,0 +1,354 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_env.h"
+#include <string.h>
+#include <ctype.h>
+#include "offload_util.h"
+#include "liboffload_error_codes.h"
+
+// for environment variables valid on all cards
+const int MicEnvVar::any_card = -1;
+
+MicEnvVar::~MicEnvVar()
+{
+    for (std::list<MicEnvVar::CardEnvVars*>::const_iterator
+         it = card_spec_list.begin();
+         it != card_spec_list.end(); it++) {
+        CardEnvVars *card_data = *it;
+        delete card_data;
+    }
+}
+
+MicEnvVar::VarValue::~VarValue()
+{
+    free(env_var_value);
+}
+
+MicEnvVar::CardEnvVars::~CardEnvVars()
+{
+    for (std::list<MicEnvVar::VarValue*>::const_iterator it = env_vars.begin();
+        it != env_vars.end(); it++) {
+            VarValue *var_value = *it;
+            delete var_value;
+    }
+}
+
+// Searching for card in "card_spec_list" list with the same "number"
+
+MicEnvVar::CardEnvVars* MicEnvVar::get_card(int number)
+{
+    if (number == any_card) {
+        return &common_vars;
+    }
+    for (std::list<MicEnvVar::CardEnvVars*>::const_iterator
+         it = card_spec_list.begin();
+         it != card_spec_list.end(); it++) {
+        CardEnvVars *card_data = *it;
+        if (card_data->card_number == number) {
+            return card_data;
+        }
+    }
+    return NULL;
+}
+
+// Searching for environment variable in "env_var" list with the same name
+
+MicEnvVar::VarValue* MicEnvVar::CardEnvVars::find_var(
+    char* env_var_name,
+    int env_var_name_length
+)
+{
+    for (std::list<MicEnvVar::VarValue*>::const_iterator it = env_vars.begin();
+        it != env_vars.end(); it++) {
+            VarValue *var_value = *it;
+            if (var_value->length == env_var_name_length &&
+                !strncmp(var_value->env_var, env_var_name,
+                         env_var_name_length)) {
+                return var_value;
+            }
+    }
+    return NULL;
+}
+
+void MicEnvVar::analyze_env_var(char *env_var_string)
+{
+    char          *env_var_name;
+    char          *env_var_def;
+    int           card_number;
+    int           env_var_name_length;
+    MicEnvVarKind env_var_kind;
+
+    env_var_kind = get_env_var_kind(env_var_string,
+                                    &card_number,
+                                    &env_var_name,
+                                    &env_var_name_length,
+                                    &env_var_def);
+    switch (env_var_kind) {
+        case c_mic_var:
+        case c_mic_card_var:
+            add_env_var(card_number,
+                        env_var_name,
+                        env_var_name_length,
+                        env_var_def);
+            break;
+        case c_mic_card_env:
+            mic_parse_env_var_list(card_number, env_var_def);
+            break;
+        case c_no_mic:
+        default:
+            break;
+    }
+}
+
+void MicEnvVar::add_env_var(
+    int card_number,
+    char *env_var_name,
+    int env_var_name_length,
+    char *env_var_def
+)
+{
+    VarValue *var;
+    CardEnvVars *card;
+
+    // The case corresponds to common env var definition of kind
+    // <mic-prefix>_<var>
+    if (card_number == any_card) {
+        card = &common_vars;
+    }
+    else {
+        card = get_card(card_number);
+        if (!card) {
+            // definition for new card occurred
+            card = new CardEnvVars(card_number);
+            card_spec_list.push_back(card);
+        }
+
+    }
+    var = card->find_var(env_var_name, env_var_name_length);
+    if (!var) {
+        // put new env var definition in "env_var" list
+        var = new VarValue(env_var_name, env_var_name_length, env_var_def);
+        card->env_vars.push_back(var);
+    }
+}
+
+// The routine analyses string pointed by "env_var_string" argument
+// according to the following syntax:
+//
+// Specification of prefix for MIC environment variables
+// MIC_ENV_PREFIX=<mic-prefix>
+//
+// Setting single MIC environment variable
+// <mic-prefix>_<var>=<value>
+// <mic-prefix>_<card-number>_<var>=<value>
+
+// Setting multiple MIC environment variables
+// <mic-prefix>_<card-number>_ENV=<env-vars>
+
+MicEnvVarKind MicEnvVar::get_env_var_kind(
+    char *env_var_string,
+    int *card_number,
+    char **env_var_name,
+    int *env_var_name_length,
+    char **env_var_def
+)
+{
+    int len = strlen(prefix);
+    char *c = env_var_string;
+    int num = 0;
+    bool card_is_set = false;
+
+    if (strncmp(c, prefix, len) != 0 || c[len] != '_') {
+            return c_no_mic;
+    }
+    c += len + 1;
+
+    *card_number = any_card;
+    if (isdigit(*c)) {
+        while (isdigit (*c)) {
+            num = (*c++ - '0') + (num * 10);
+        }
+    if (*c != '_') {
+        return c_no_mic;
+    }
+    c++;
+        *card_number = num;
+        card_is_set = true;
+    }
+    if (!isalpha(*c)) {
+        return c_no_mic;
+    }
+    *env_var_name = *env_var_def = c;
+    if (strncmp(c, "ENV=", 4) == 0) {
+        if (!card_is_set) {
+            *env_var_name_length = 3;
+            *env_var_name = *env_var_def = c;
+            *env_var_def = strdup(*env_var_def);
+            return  c_mic_var;
+        }
+        *env_var_def = c + strlen("ENV=");
+        *env_var_def = strdup(*env_var_def);
+        return c_mic_card_env;
+    }
+    if (isalpha(*c)) {
+        *env_var_name_length = 0;
+        while (isalnum(*c) || *c == '_') {
+            c++;
+            (*env_var_name_length)++;
+        }
+    }
+    if (*c != '=') {
+        return c_no_mic;
+    }
+    *env_var_def = strdup(*env_var_def);
+    return card_is_set? c_mic_card_var : c_mic_var;
+}
+
+// analysing <env-vars> in form:
+// <mic-prefix>_<card-number>_ENV=<env-vars>
+// where:
+//
+// <env-vars>:
+//                <env-var>
+//                <env-vars> | <env-var>
+//
+// <env-var>:
+//                variable=value
+//                variable="value"
+//                variable=
+
+void MicEnvVar::mic_parse_env_var_list(
+    int card_number, char *env_vars_def_list)
+{
+    char *c = env_vars_def_list;
+    char *env_var_name;
+    int  env_var_name_length;
+    char *env_var_def;
+    bool var_is_quoted;
+
+    if (*c == '"') {
+        c++;
+    }
+    while (*c != 0) {
+        var_is_quoted = false;
+        env_var_name = c;
+        env_var_name_length = 0;
+        if (isalpha(*c)) {
+            while (isalnum(*c) || *c == '_') {
+                c++;
+                env_var_name_length++;
+            }
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_mic_parse_env_var_list1);
+            return;
+        }
+        if (*c != '=') {
+            LIBOFFLOAD_ERROR(c_mic_parse_env_var_list2);
+            return;
+        }
+        c++;
+
+        if (*c == '"') {
+            var_is_quoted = true;
+            c++;
+        }
+        // Environment variable values that contain | will need to be escaped.
+        while (*c != 0 && *c != '|' &&
+               (!var_is_quoted || *c != '"'))
+        {
+            // skip escaped symbol
+            if (*c == '\\') {
+                c++;
+            }
+            c++;
+        }
+        if (var_is_quoted) {
+            c++; // for "
+            while (*c != 0 && *c != '|') {
+                c++;
+            }
+        }
+
+        int sz = c - env_var_name;
+        env_var_def = (char*)malloc(sz);
+        memcpy(env_var_def, env_var_name, sz);
+        env_var_def[sz] = 0;
+
+        if (*c == '|') {
+            c++;
+            while (*c != 0 && *c == ' ') {
+                c++;
+            }
+        }
+        add_env_var(card_number,
+                    env_var_name,
+                    env_var_name_length,
+                    env_var_def);
+    }
+}
+
+// Collect all definitions for the card with number "card_num".
+// The returned result is vector of string pointers defining one
+// environment variable. The vector is terminated by NULL pointer.
+// In the beginning of the vector there are env vars defined as
+// <mic-prefix>_<card-number>_<var>=<value>
+// or
+// <mic-prefix>_<card-number>_ENV=<env-vars>
+// where <card-number> is equal to "card_num"
+// They are followed by definitions valid for any card
+// and absent in previous definitions.
+
+char** MicEnvVar::create_environ_for_card(int card_num)
+{
+    VarValue *var_value;
+    VarValue *var_value_find;
+    CardEnvVars *card_data = get_card(card_num);
+    CardEnvVars *card_data_common;
+    std::list<char*> new_env;
+    char **rez;
+
+    if (!prefix) {
+        return NULL;
+    }
+    // There is no personel env var definitions for the card with
+    // number "card_num"
+    if (!card_data) {
+        return create_environ_for_card(any_card);
+    }
+
+    for (std::list<MicEnvVar::VarValue*>::const_iterator
+         it = card_data->env_vars.begin();
+         it != card_data->env_vars.end(); it++) {
+        var_value = *it;
+        new_env.push_back(var_value->env_var_value);
+    }
+
+    if (card_num != any_card) {
+        card_data_common = get_card(any_card);
+        for (std::list<MicEnvVar::VarValue*>::const_iterator
+             it = card_data_common->env_vars.begin();
+             it != card_data_common->env_vars.end(); it++) {
+            var_value = *it;
+            var_value_find = card_data->find_var(var_value->env_var,
+                                                 var_value->length);
+            if (!var_value_find) {
+                new_env.push_back(var_value->env_var_value);
+            }
+        }
+    }
+
+    int new_env_size = new_env.size();
+    rez = (char**) malloc((new_env_size + 1) * sizeof(char*));
+    std::copy(new_env.begin(), new_env.end(), rez);
+    rez[new_env_size] = 0;
+    return rez;
+}
diff --git a/final/offload/src/offload_env.h b/final/offload/src/offload_env.h
new file mode 100644
index 0000000..f035ff6
--- /dev/null
+++ b/final/offload/src/offload_env.h
@@ -0,0 +1,91 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef OFFLOAD_ENV_H_INCLUDED
+#define OFFLOAD_ENV_H_INCLUDED
+
+#include <list>
+
+// data structure and routines to parse MIC user environment and pass to MIC
+
+enum MicEnvVarKind
+{
+    c_no_mic,         // not MIC env var
+    c_mic_var,        // for <mic-prefix>_<var>
+    c_mic_card_var,   // for <mic-prefix>_<card-number>_<var>
+    c_mic_card_env    // for <mic-prefix>_<card-number>_ENV
+};
+
+struct MicEnvVar {
+public:
+    MicEnvVar() : prefix(0) {}
+    ~MicEnvVar();
+
+    void analyze_env_var(char *env_var_string);
+    char** create_environ_for_card(int card_num);
+    MicEnvVarKind get_env_var_kind(
+        char *env_var_string,
+        int *card_number,
+        char **env_var_name,
+        int *env_var_name_length,
+        char **env_var_def
+    );
+    void add_env_var(
+        int card_number,
+        char *env_var_name,
+        int env_var_name_length,
+        char *env_var_def
+    );
+
+    void set_prefix(const char *pref) {
+        prefix = (pref && *pref != '\0') ? pref : 0;
+    }
+
+    struct VarValue {
+    public:
+        char* env_var;
+        int   length;
+        char* env_var_value;
+
+        VarValue(char* var, int ln, char* value)
+        {
+            env_var = var;
+            length = ln;
+            env_var_value = value;
+        }
+        ~VarValue();
+    };
+
+    struct CardEnvVars {
+    public:
+
+        int card_number;
+        std::list<struct VarValue*> env_vars;
+
+        CardEnvVars() { card_number = any_card; }
+        CardEnvVars(int num) { card_number = num; }
+        ~CardEnvVars();
+
+        void add_new_env_var(int number, char *env_var, int length,
+                             char *env_var_value);
+        VarValue* find_var(char* env_var_name, int env_var_name_length);
+    };
+    static const int any_card;
+
+private:
+    void         mic_parse_env_var_list(int card_number, char *env_var_def);
+    CardEnvVars* get_card(int number);
+
+    const char *prefix;
+    std::list<struct CardEnvVars *> card_spec_list;
+    CardEnvVars common_vars;
+};
+
+#endif // OFFLOAD_ENV_H_INCLUDED
diff --git a/final/offload/src/offload_host.cpp b/final/offload/src/offload_host.cpp
new file mode 100644
index 0000000..38d5139
--- /dev/null
+++ b/final/offload/src/offload_host.cpp
@@ -0,0 +1,4360 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Forward declaration as the following 2 functions are declared as friend in offload_engine.h
+// CLANG does not like static to been after friend declaration.
+static void __offload_init_library_once(void);
+static void __offload_fini_library(void);
+
+#include "offload_host.h"
+#ifdef MYO_SUPPORT
+#include "offload_myo_host.h"
+#endif
+
+#include <malloc.h>
+#ifndef TARGET_WINNT
+#include <alloca.h>
+#include <elf.h>
+#endif // TARGET_WINNT
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <algorithm>
+#include <bitset>
+
+#if defined(HOST_WINNT)
+#define PATH_SEPARATOR ";"
+#else
+#define PATH_SEPARATOR ":"
+#endif
+
+#define GET_OFFLOAD_NUMBER(timer_data) \
+    timer_data? timer_data->offload_number : 0
+
+#ifdef TARGET_WINNT
+// Small subset of ELF declarations for Windows which is needed to compile
+// this file. ELF header is used to understand what binary type is contained
+// in the target image - shared library or executable.
+
+typedef uint16_t Elf64_Half;
+typedef uint32_t Elf64_Word;
+typedef uint64_t Elf64_Addr;
+typedef uint64_t Elf64_Off;
+
+#define EI_NIDENT   16
+
+#define ET_EXEC     2
+#define ET_DYN      3
+
+typedef struct
+{
+    unsigned char e_ident[EI_NIDENT];
+    Elf64_Half    e_type;
+    Elf64_Half    e_machine;
+    Elf64_Word    e_version;
+    Elf64_Addr    e_entry;
+    Elf64_Off     e_phoff;
+    Elf64_Off     e_shoff;
+    Elf64_Word    e_flags;
+    Elf64_Half    e_ehsize;
+    Elf64_Half    e_phentsize;
+    Elf64_Half    e_phnum;
+    Elf64_Half    e_shentsize;
+    Elf64_Half    e_shnum;
+    Elf64_Half    e_shstrndx;
+} Elf64_Ehdr;
+#endif // TARGET_WINNT
+
+// Host console and file logging
+const char *prefix;
+int console_enabled = 0;
+int offload_number = 0;
+
+static const char *htrace_envname = "H_TRACE";
+static const char *offload_report_envname = "OFFLOAD_REPORT";
+static char *timer_envname = "H_TIME";
+
+// Trace information
+static const char* vardesc_direction_as_string[] = {
+    "NOCOPY",
+    "IN",
+    "OUT",
+    "INOUT"
+};
+static const char* vardesc_type_as_string[] = {
+    "unknown",
+    "data",
+    "data_ptr",
+    "func_ptr",
+    "void_ptr",
+    "string_ptr",
+    "dv",
+    "dv_data",
+    "dv_data_slice",
+    "dv_ptr",
+    "dv_ptr_data",
+    "dv_ptr_data_slice",
+    "cean_var",
+    "cean_var_ptr",
+    "c_data_ptr_array",
+    "c_func_ptr_array",
+    "c_void_ptr_array",
+    "c_string_ptr_array"
+};
+
+Engine*         mic_engines = 0;
+uint32_t        mic_engines_total = 0;
+pthread_key_t   mic_thread_key;
+MicEnvVar       mic_env_vars;
+uint64_t        cpu_frequency = 0;
+
+// MIC_STACKSIZE
+uint32_t mic_stack_size = 12 * 1024 * 1024;
+
+// MIC_BUFFERSIZE
+uint64_t mic_buffer_size = 0;
+
+// MIC_LD_LIBRARY_PATH
+char* mic_library_path = 0;
+
+// MIC_PROXY_IO
+bool mic_proxy_io = true;
+
+// MIC_PROXY_FS_ROOT
+char* mic_proxy_fs_root = 0;
+
+// Threshold for creating buffers with large pages. Buffer is created
+// with large pages hint if its size exceeds the threshold value.
+// By default large pages are disabled right now (by setting default
+// value for threshold to MAX) due to HSD 4114629.
+uint64_t __offload_use_2mb_buffers = 0xffffffffffffffffULL;
+static const char *mic_use_2mb_buffers_envname  =
+    "MIC_USE_2MB_BUFFERS";
+
+static uint64_t __offload_use_async_buffer_write = 2 * 1024 * 1024;
+static const char *mic_use_async_buffer_write_envname  =
+    "MIC_USE_ASYNC_BUFFER_WRITE";
+
+static uint64_t __offload_use_async_buffer_read = 2 * 1024 * 1024;
+static const char *mic_use_async_buffer_read_envname  =
+    "MIC_USE_ASYNC_BUFFER_READ";
+
+// device initialization type
+OffloadInitType __offload_init_type = c_init_on_offload_all;
+static const char *offload_init_envname = "OFFLOAD_INIT";
+
+// active wait
+static bool __offload_active_wait = true;
+static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT";
+
+// OMP_DEFAULT_DEVICE
+int __omp_device_num = 0;
+static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE";
+
+// The list of pending target libraries
+static bool            __target_libs;
+static TargetImageList __target_libs_list;
+static mutex_t         __target_libs_lock;
+static mutex_t         stack_alloc_lock;
+
+// Target executable
+TargetImage*           __target_exe;
+
+static char * offload_get_src_base(void * ptr, uint8_t type)
+{
+    char *base;
+    if (VAR_TYPE_IS_PTR(type)) {
+        base = *static_cast<char**>(ptr);
+    }
+    else if (VAR_TYPE_IS_SCALAR(type)) {
+        base = static_cast<char*>(ptr);
+    }
+    else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) {
+        ArrDesc *dvp;
+        if (VAR_TYPE_IS_DV_DATA_SLICE(type)) {
+            const arr_desc *ap = static_cast<const arr_desc*>(ptr);
+            dvp = (type == c_dv_data_slice) ?
+                  reinterpret_cast<ArrDesc*>(ap->base) :
+                  *reinterpret_cast<ArrDesc**>(ap->base);
+        }
+        else {
+            dvp = (type == c_dv_data) ?
+                  static_cast<ArrDesc*>(ptr) :
+                  *static_cast<ArrDesc**>(ptr);
+        }
+        base = reinterpret_cast<char*>(dvp->Base);
+    }
+    else {
+        base = NULL;
+    }
+    return base;
+}
+
+void OffloadDescriptor::report_coi_error(error_types msg, COIRESULT res)
+{
+    // special case for the 'process died' error
+    if (res == COI_PROCESS_DIED) {
+        m_device.fini_process(true);
+    }
+    else {
+        switch (msg) {
+            case c_buf_create:
+                if (res == COI_OUT_OF_MEMORY) {
+                    msg = c_buf_create_out_of_mem;
+                }
+                /* fallthru */
+
+            case c_buf_create_from_mem:
+            case c_buf_get_address:
+            case c_pipeline_create:
+            case c_pipeline_run_func:
+                LIBOFFLOAD_ERROR(msg, m_device.get_logical_index(), res);
+                break;
+
+            case c_buf_read:
+            case c_buf_write:
+            case c_buf_copy:
+            case c_buf_map:
+            case c_buf_unmap:
+            case c_buf_destroy:
+            case c_buf_set_state:
+                LIBOFFLOAD_ERROR(msg, res);
+                break;
+
+            default:
+                break;
+        }
+    }
+
+    exit(1);
+}
+
+_Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const
+{
+    switch (res) {
+        case COI_SUCCESS:
+            return OFFLOAD_SUCCESS;
+
+        case COI_PROCESS_DIED:
+            return OFFLOAD_PROCESS_DIED;
+
+        case COI_OUT_OF_MEMORY:
+            return OFFLOAD_OUT_OF_MEMORY;
+
+        default:
+            return OFFLOAD_ERROR;
+    }
+}
+
+bool OffloadDescriptor::alloc_ptr_data(
+    PtrData* &ptr_data,
+    void *base,
+    int64_t disp,
+    int64_t size,
+    int64_t alloc_disp,
+    int align
+)
+{
+    // total length of base
+    int64_t length = disp + size;
+    bool is_new;
+
+    OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
+                  base, length);
+
+    // add new entry
+    ptr_data = m_device.insert_ptr_data(base, length, is_new);
+    if (is_new) {
+
+        OFFLOAD_TRACE(3, "Added new association\n");
+
+        if (length > 0) {
+            OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
+            COIRESULT res;
+
+            // align should be a power of 2
+            if (align > 0 && (align & (align - 1)) == 0) {
+                // offset within mic_buffer. Can do offset optimization
+                // only when source address alignment satisfies requested
+                // alignment on the target (cq172736).
+                if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) {
+                    ptr_data->mic_offset = reinterpret_cast<intptr_t>(base) & 4095;
+                }
+            }
+
+            // buffer size and flags
+            uint64_t buffer_size = length + ptr_data->mic_offset;
+            uint32_t buffer_flags = 0;
+
+            // create buffer with large pages if data length exceeds
+            // large page threshold
+            if (length >= __offload_use_2mb_buffers) {
+                buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
+            }
+
+            // create CPU buffer
+            OFFLOAD_DEBUG_TRACE_1(3,
+                          GET_OFFLOAD_NUMBER(get_timer_data()),
+                          c_offload_create_buf_host,
+                          "Creating buffer from source memory %p, "
+                          "length %lld\n", base, length);
+
+            // result is not checked because we can continue without cpu
+            // buffer. In this case we will use COIBufferRead/Write instead
+            // of COIBufferCopy.
+            COI::BufferCreateFromMemory(length,
+                                        COI_BUFFER_NORMAL,
+                                        0,
+                                        base,
+                                        1,
+                                        &m_device.get_process(),
+                                        &ptr_data->cpu_buf);
+
+            OFFLOAD_DEBUG_TRACE_1(3,
+                          GET_OFFLOAD_NUMBER(get_timer_data()),
+                          c_offload_create_buf_mic,
+                          "Creating buffer for sink: size %lld, offset %d, "
+                          "flags =0x%x\n", buffer_size - alloc_disp,
+                          ptr_data->mic_offset, buffer_flags);
+
+            // create MIC buffer
+            res = COI::BufferCreate(buffer_size - alloc_disp,
+                                    COI_BUFFER_NORMAL,
+                                    buffer_flags,
+                                    0,
+                                    1,
+                                    &m_device.get_process(),
+                                    &ptr_data->mic_buf);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                }
+                else if (m_is_mandatory) {
+                    report_coi_error(c_buf_create, res);
+                }
+                ptr_data->alloc_ptr_data_lock.unlock();
+                return false;
+            }
+
+            // make buffer valid on the device.
+            res = COI::BufferSetState(ptr_data->mic_buf,
+                                      m_device.get_process(),
+                                      COI_BUFFER_VALID,
+                                      COI_BUFFER_NO_MOVE,
+                                      0, 0, 0);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                }
+                else if (m_is_mandatory) {
+                    report_coi_error(c_buf_set_state, res);
+                }
+                ptr_data->alloc_ptr_data_lock.unlock();
+                return false;
+            }
+
+            res = COI::BufferSetState(ptr_data->mic_buf,
+                                      COI_PROCESS_SOURCE,
+                                      COI_BUFFER_INVALID,
+                                      COI_BUFFER_NO_MOVE,
+                                      0, 0, 0);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                }
+                else if (m_is_mandatory) {
+                    report_coi_error(c_buf_set_state, res);
+                }
+                ptr_data->alloc_ptr_data_lock.unlock();
+                return false;
+            }
+        }
+
+        ptr_data->alloc_disp = alloc_disp;
+        ptr_data->alloc_ptr_data_lock.unlock();
+    }
+    else {
+        mutex_locker_t locker(ptr_data->alloc_ptr_data_lock);
+
+        OFFLOAD_TRACE(3, "Found existing association: addr %p, length %lld, "
+                      "is_static %d\n",
+                      ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
+                      ptr_data->is_static);
+
+        // This is not a new entry. Make sure that provided address range fits
+        // into existing one.
+        MemRange addr_range(base, length - ptr_data->alloc_disp);
+        if (!ptr_data->cpu_addr.contains(addr_range)) {
+            LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
+            exit(1);
+        }
+
+        // if the entry is associated with static data it may not have buffers
+        // created because they are created on demand.
+        if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool OffloadDescriptor::find_ptr_data(
+    PtrData* &ptr_data,
+    void *base,
+    int64_t disp,
+    int64_t size,
+    bool report_error
+)
+{
+    // total length of base
+    int64_t length = disp + size;
+
+    OFFLOAD_TRACE(3, "Looking for association for data: addr %p, "
+                  "length %lld\n", base, length);
+
+    // find existing association in pointer table
+    ptr_data = m_device.find_ptr_data(base);
+    if (ptr_data == 0) {
+        if (report_error) {
+            LIBOFFLOAD_ERROR(c_no_ptr_data, base);
+            exit(1);
+        }
+        OFFLOAD_TRACE(3, "Association does not exist\n");
+        return true;
+    }
+
+    OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n",
+                  ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
+                  ptr_data->is_static);
+
+    // make sure that provided address range fits into existing one
+    MemRange addr_range(base, length);
+    if (!ptr_data->cpu_addr.contains(addr_range)) {
+        if (report_error) {
+            LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
+            exit(1);
+        }
+        OFFLOAD_TRACE(3, "Existing association partially overlaps with "
+                      "data address range\n");
+        ptr_data = 0;
+        return true;
+    }
+
+    // if the entry is associated with static data it may not have buffers
+    // created because they are created on demand.
+    if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
+        return false;
+    }
+
+    return true;
+}
+
+bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data)
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
+
+    if (ptr_data->cpu_buf == 0) {
+        OFFLOAD_TRACE(3, "Creating buffer from source memory %llx\n",
+                      ptr_data->cpu_addr.start());
+
+        COIRESULT res = COI::BufferCreateFromMemory(
+            ptr_data->cpu_addr.length(),
+            COI_BUFFER_NORMAL,
+            0,
+            const_cast<void*>(ptr_data->cpu_addr.start()),
+            1, &m_device.get_process(),
+            &ptr_data->cpu_buf);
+
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+                return false;
+            }
+            report_coi_error(c_buf_create_from_mem, res);
+        }
+    }
+
+    if (ptr_data->mic_buf == 0) {
+        OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n",
+                      ptr_data->mic_addr);
+
+        COIRESULT res = COI::BufferCreateFromMemory(
+            ptr_data->cpu_addr.length(),
+            COI_BUFFER_NORMAL,
+            COI_SINK_MEMORY,
+            reinterpret_cast<void*>(ptr_data->mic_addr),
+            1, &m_device.get_process(),
+            &ptr_data->mic_buf);
+
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+                return false;
+            }
+            report_coi_error(c_buf_create_from_mem, res);
+        }
+    }
+
+    return true;
+}
+
+bool OffloadDescriptor::init_mic_address(PtrData *ptr_data)
+{
+    if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) {
+        COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf,
+                                                  &ptr_data->mic_addr);
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+            }
+            else if (m_is_mandatory) {
+                report_coi_error(c_buf_get_address, res);
+            }
+            return false;
+        }
+    }
+    return true;
+}
+
+bool OffloadDescriptor::nullify_target_stack(
+    COIBUFFER targ_buf,
+    uint64_t size
+)
+{
+    char * ptr = (char*)malloc(size);
+    COIRESULT res;
+
+    memset(ptr, 0, size);
+    res = COI::BufferWrite(
+        targ_buf,
+        0,
+        ptr,
+        size,
+        COI_COPY_UNSPECIFIED,
+        0, 0, 0);
+    free(ptr);
+    if (res != COI_SUCCESS) {
+        if (m_status != 0) {
+            m_status->result = translate_coi_error(res);
+            return false;
+        }
+        report_coi_error(c_buf_write, res);
+    }
+    return true;
+}
+
+bool OffloadDescriptor::offload_stack_memory_manager(
+    const void * stack_begin,
+    int  routine_id,
+    int  buf_size,
+    int  align,
+    bool *is_new)
+{
+    mutex_locker_t locker(stack_alloc_lock);
+
+    PersistData * new_el;
+    PersistDataList::iterator it_begin = m_device.m_persist_list.begin();
+    PersistDataList::iterator it_end;
+    int erase = 0;
+
+    *is_new = false;
+
+    for (PersistDataList::iterator it = m_device.m_persist_list.begin();
+        it != m_device.m_persist_list.end(); it++) {
+        PersistData cur_el = *it;
+
+        if (stack_begin > it->stack_cpu_addr) {
+            // this stack data must be destroyed
+            m_destroy_stack.push_front(cur_el.stack_ptr_data);
+            it_end = it;
+            erase++;
+        }
+        else if (stack_begin == it->stack_cpu_addr) {
+            if (routine_id != it-> routine_id) {
+                // this stack data must be destroyed
+                m_destroy_stack.push_front(cur_el.stack_ptr_data);
+                it_end = it;
+                erase++;
+                break;
+            }
+            else {
+                // stack data is reused
+                m_stack_ptr_data = it->stack_ptr_data;
+                if (erase > 0) {
+                    // all obsolete stack sections must be erased from the list
+                    m_device.m_persist_list.erase(it_begin, ++it_end);
+
+                    m_in_datalen +=
+                        erase * sizeof(new_el->stack_ptr_data->mic_addr);
+                }
+                OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n",
+                                 m_stack_ptr_data->mic_addr);
+                return true;
+            }
+        }
+        else if (stack_begin < it->stack_cpu_addr) {
+            break;
+        }
+    }
+
+    if (erase > 0) {
+        // all obsolete stack sections must be erased from the list
+        m_device.m_persist_list.erase(it_begin, ++it_end);
+        m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr);
+    }
+    // new stack table is created
+    new_el = new PersistData(stack_begin, routine_id, buf_size);
+    // create MIC buffer
+    COIRESULT res;
+    uint32_t buffer_flags = 0;
+
+    // create buffer with large pages if data length exceeds
+    // large page threshold
+    if (buf_size >= __offload_use_2mb_buffers) {
+        buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
+    }
+    res = COI::BufferCreate(buf_size,
+        COI_BUFFER_NORMAL,
+        buffer_flags,
+        0,
+        1,
+        &m_device.get_process(),
+        &new_el->stack_ptr_data->mic_buf);
+    if (res != COI_SUCCESS) {
+        if (m_status != 0) {
+            m_status->result = translate_coi_error(res);
+        }
+        else if (m_is_mandatory) {
+            report_coi_error(c_buf_create, res);
+        }
+        return false;
+    }
+    // make buffer valid on the device.
+    res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
+        m_device.get_process(),
+        COI_BUFFER_VALID,
+        COI_BUFFER_NO_MOVE,
+        0, 0, 0);
+    if (res != COI_SUCCESS) {
+        if (m_status != 0) {
+            m_status->result = translate_coi_error(res);
+        }
+        else if (m_is_mandatory) {
+            report_coi_error(c_buf_set_state, res);
+        }
+        return false;
+    }
+    res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
+        COI_PROCESS_SOURCE,
+        COI_BUFFER_INVALID,
+        COI_BUFFER_NO_MOVE,
+        0, 0, 0);
+    if (res != COI_SUCCESS) {
+        if (m_status != 0) {
+            m_status->result = translate_coi_error(res);
+        }
+        else if (m_is_mandatory) {
+            report_coi_error(c_buf_set_state, res);
+        }
+        return false;
+    }
+    // persistence algorithm requires target stack initialy to be nullified
+    if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) {
+        return false;
+    }
+
+    m_stack_ptr_data = new_el->stack_ptr_data;
+    init_mic_address(m_stack_ptr_data);
+    OFFLOAD_TRACE(3, "Allocating stack buffer with addr %p\n",
+                      m_stack_ptr_data->mic_addr);
+    m_device.m_persist_list.push_front(*new_el);
+    init_mic_address(new_el->stack_ptr_data);
+    *is_new = true;
+    return true;
+}
+
+bool OffloadDescriptor::setup_descriptors(
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int vars_total,
+    int entry_id,
+    const void *stack_addr
+)
+{
+    COIRESULT res;
+
+    OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers);
+
+    // make a copy of variable descriptors
+    m_vars_total = vars_total;
+    if (vars_total > 0) {
+        m_vars = (VarDesc*) malloc(m_vars_total * sizeof(VarDesc));
+        memcpy(m_vars, vars, m_vars_total * sizeof(VarDesc));
+        m_vars_extra = (VarExtra*) malloc(m_vars_total * sizeof(VarExtra));
+    }
+
+    // dependencies
+    m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * (m_vars_total  + 1));
+    if (m_vars_total > 0) {
+        m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_vars_total);
+    }
+
+    // copyin/copyout data length
+    m_in_datalen = 0;
+    m_out_datalen = 0;
+
+    // First pass over variable descriptors
+    // - Calculate size of the input and output non-pointer data
+    // - Allocate buffers for input and output pointers
+    for (int i = 0; i < m_vars_total; i++) {
+        void*   alloc_base = NULL;
+        int64_t alloc_disp = 0;
+        int64_t alloc_size;
+        bool    src_is_for_mic = (m_vars[i].direction.out ||
+                                  m_vars[i].into == NULL);
+
+        const char *var_sname = "";
+        if (vars2 != NULL && i < vars_total) {
+            if (vars2[i].sname != NULL) {
+                var_sname = vars2[i].sname;
+            }
+        }
+        OFFLOAD_TRACE(2, "   VarDesc %d, var=%s, %s, %s\n",
+            i, var_sname,
+            vardesc_direction_as_string[m_vars[i].direction.bits],
+            vardesc_type_as_string[m_vars[i].type.src]);
+        if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) {
+            OFFLOAD_TRACE(2, "              into=%s, %s\n", vars2[i].dname,
+                vardesc_type_as_string[m_vars[i].type.dst]);
+        }
+        OFFLOAD_TRACE(2,
+            "              type_src=%d, type_dstn=%d, direction=%d, "
+            "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
+            "offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n",
+            m_vars[i].type.src,
+            m_vars[i].type.dst,
+            m_vars[i].direction.bits,
+            m_vars[i].alloc_if,
+            m_vars[i].free_if,
+            m_vars[i].align,
+            m_vars[i].mic_offset,
+            m_vars[i].flags.bits,
+            m_vars[i].offset,
+            m_vars[i].size,
+            m_vars[i].count,
+            m_vars[i].ptr,
+            m_vars[i].into);
+
+        if (m_vars[i].alloc != NULL) {
+            // array descriptor
+            const arr_desc *ap =
+                static_cast<const arr_desc*>(m_vars[i].alloc);
+
+            // debug dump
+            __arr_desc_dump("    ", "ALLOC", ap, 0);
+
+            __arr_data_offset_and_length(ap, alloc_disp, alloc_size);
+
+            alloc_base = reinterpret_cast<void*>(ap->base);
+        }
+
+        m_vars_extra[i].cpu_disp = 0;
+        m_vars_extra[i].cpu_offset = 0;
+        m_vars_extra[i].src_data = 0;
+        m_vars_extra[i].read_rng_src = 0;
+        m_vars_extra[i].read_rng_dst = 0;
+        // flag is_arr_ptr_el is 1 only for var_descs generated
+        // for c_data_ptr_array type
+        if (i < vars_total) {
+            m_vars_extra[i].is_arr_ptr_el = 0;
+        }
+
+        switch (m_vars[i].type.src) {
+            case c_data_ptr_array:
+                {
+                    const arr_desc *ap;
+                    const VarDesc3 *vd3 =
+                        static_cast<const VarDesc3*>(m_vars[i].ptr);
+                    int flags = vd3->array_fields;
+                    OFFLOAD_TRACE(2,
+                        "              pointer array flags = %04x\n", flags);
+                    OFFLOAD_TRACE(2,
+                        "              pointer array type is %s\n",
+                        vardesc_type_as_string[flags & 0x3f]);
+                    ap = static_cast<const arr_desc*>(vd3->ptr_array);
+                    __arr_desc_dump("              ", "ptr array", ap, 0);
+                    if (m_vars[i].into) {
+                        ap = static_cast<const arr_desc*>(m_vars[i].into);
+                        __arr_desc_dump(
+                            "              ", "into array", ap, 0);
+                    }
+                    if ((flags & (1<<flag_align_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->align_array);
+                        __arr_desc_dump(
+                            "              ", "align array", ap, 0);
+                    }
+                    if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
+                        __arr_desc_dump(
+                            "              ", "alloc_if array", ap, 0);
+                    }
+                    if ((flags & (1<<flag_free_if_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->free_if_array);
+                        __arr_desc_dump(
+                            "              ", "free_if array", ap, 0);
+                    }
+                    if ((flags & (1<<flag_extent_start_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->extent_start);
+                        __arr_desc_dump(
+                            "              ", "extent_start array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_extent_start_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              extent_start scalar = %d\n",
+                            (int64_t)vd3->extent_start);
+                    }
+                    if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>
+                            (vd3->extent_elements);
+                        __arr_desc_dump(
+                            "              ", "extent_elements array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_extent_elements_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              extent_elements scalar = %d\n",
+                            (int64_t)vd3->extent_elements);
+                    }
+                    if ((flags & (1<<flag_into_start_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->into_start);
+                        __arr_desc_dump(
+                            "              ", "into_start array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_into_start_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              into_start scalar = %d\n",
+                            (int64_t)vd3->into_start);
+                    }
+                    if ((flags & (1<<flag_into_elements_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->into_elements);
+                        __arr_desc_dump(
+                            "              ", "into_elements array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_into_elements_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              into_elements scalar = %d\n",
+                            (int64_t)vd3->into_elements);
+                    }
+                    if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->alloc_start);
+                        __arr_desc_dump(
+                            "              ", "alloc_start array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_alloc_start_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              alloc_start scalar = %d\n",
+                            (int64_t)vd3->alloc_start);
+                    }
+                    if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
+                        ap = static_cast<const arr_desc*>(vd3->alloc_elements);
+                        __arr_desc_dump(
+                            "              ", "alloc_elements array", ap, 0);
+                    } else if ((flags &
+                        (1<<flag_alloc_elements_is_scalar)) != 0) {
+                        OFFLOAD_TRACE(2,
+                            "              alloc_elements scalar = %d\n",
+                            (int64_t)vd3->alloc_elements);
+                    }
+                }
+                if (!gen_var_descs_for_pointer_array(i)) {
+                    return false;
+                }
+                break;
+
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var:
+                // In all uses later
+                // VarDesc.size will have the length of the data to be
+                // transferred
+                // VarDesc.disp will have an offset from base
+                if (m_vars[i].type.src == c_cean_var) {
+                    // array descriptor
+                    const arr_desc *ap =
+                        static_cast<const arr_desc*>(m_vars[i].ptr);
+
+                    // debug dump
+                    __arr_desc_dump("", "IN/OUT", ap, 0);
+
+                    // offset and length are derived from the array descriptor
+                    __arr_data_offset_and_length(ap, m_vars[i].disp,
+                                                 m_vars[i].size);
+                    if (!is_arr_desc_contiguous(ap)) {
+                        m_vars[i].flags.is_noncont_src = 1;
+                        m_vars_extra[i].read_rng_src =
+                            init_read_ranges_arr_desc(ap);
+                    }
+                    // all necessary information about length and offset is
+                    // transferred in var descriptor. There is no need to send
+                    // array descriptor to the target side.
+                    m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
+                }
+                else {
+                    m_vars[i].size *= m_vars[i].count;
+                    m_vars[i].disp = 0;
+                }
+
+                if (m_vars[i].direction.bits) {
+                    // make sure that transfer size > 0
+                    if (m_vars[i].size <= 0) {
+                        LIBOFFLOAD_ERROR(c_zero_or_neg_transfer_size);
+                        exit(1);
+                    }
+
+                    if (m_vars[i].flags.is_static) {
+                        PtrData *ptr_data;
+
+                        // find data associated with variable
+                        if (!find_ptr_data(ptr_data,
+                                           m_vars[i].ptr,
+                                           m_vars[i].disp,
+                                           m_vars[i].size,
+                                           false)) {
+                            return false;
+                        }
+
+                        if (ptr_data != 0) {
+                            // offset to base from the beginning of the buffer
+                            // memory
+                            m_vars[i].offset =
+                                (char*) m_vars[i].ptr -
+                                (char*) ptr_data->cpu_addr.start();
+                        }
+                        else {
+                            m_vars[i].flags.is_static = false;
+                            if (m_vars[i].into == NULL) {
+                                m_vars[i].flags.is_static_dstn = false;
+                            }
+                        }
+                        m_vars_extra[i].src_data = ptr_data;
+                    }
+
+                    if (m_is_openmp) {
+                        if (m_vars[i].flags.is_static) {
+                            // Static data is transferred only by omp target
+                            // update construct which passes zeros for
+                            // alloc_if and free_if.
+                            if (m_vars[i].alloc_if || m_vars[i].free_if) {
+                                m_vars[i].direction.bits = c_parameter_nocopy;
+                            }
+                        }
+                        else {
+                            AutoData *auto_data;
+                            if (m_vars[i].alloc_if) {
+                                auto_data = m_device.insert_auto_data(
+                                    m_vars[i].ptr, m_vars[i].size);
+                                auto_data->add_reference();
+                            }
+                            else {
+                                // TODO: what should be done if var is not in
+                                // the table?
+                                auto_data = m_device.find_auto_data(
+                                    m_vars[i].ptr);
+                            }
+
+                            // For automatic variables data is transferred
+                            // only if alloc_if == 0 && free_if == 0
+                            // or reference count is 1
+                            if ((m_vars[i].alloc_if || m_vars[i].free_if) &&
+                                auto_data != 0 &&
+                                auto_data->get_reference() != 1) {
+                                m_vars[i].direction.bits = c_parameter_nocopy;
+                            }
+
+                            // save data for later use
+                            m_vars_extra[i].auto_data = auto_data;
+                        }
+                    }
+
+                    if (m_vars[i].direction.in &&
+                        !m_vars[i].flags.is_static) {
+                        m_in_datalen += m_vars[i].size;
+
+                        // for non-static target destination defined as CEAN
+                        // expression we pass to target its size and dist
+                        if (m_vars[i].into == NULL &&
+                            m_vars[i].type.src == c_cean_var) {
+                            m_in_datalen += 2 * sizeof(uint64_t);
+                        }
+                        m_need_runfunction = true;
+                    }
+                    if (m_vars[i].direction.out &&
+                        !m_vars[i].flags.is_static) {
+                        m_out_datalen += m_vars[i].size;
+                        m_need_runfunction = true;
+                    }
+                }
+                break;
+
+            case c_dv:
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr);
+
+                    // debug dump
+                    __dv_desc_dump("IN/OUT", dvp);
+
+                    // send dope vector contents excluding base
+                    m_in_datalen += m_vars[i].size - sizeof(uint64_t);
+                    m_need_runfunction = true;
+                }
+                break;
+
+            case c_string_ptr:
+                if ((m_vars[i].direction.bits ||
+                     m_vars[i].alloc_if ||
+                     m_vars[i].free_if) &&
+                    m_vars[i].size == 0) {
+                    m_vars[i].size = 1;
+                    m_vars[i].count =
+                        strlen(*static_cast<char**>(m_vars[i].ptr)) + 1;
+                }
+                /* fallthru */
+
+            case c_data_ptr:
+                if (m_vars[i].flags.is_stack_buf &&
+                    !m_vars[i].direction.bits &&
+                    m_vars[i].alloc_if) {
+                    // this var_desc is for stack buffer
+                    bool is_new;
+
+                    if (!offload_stack_memory_manager(
+                            stack_addr, entry_id,
+                            m_vars[i].count, m_vars[i].align, &is_new)) {
+                        return false;
+                    }
+                    if (is_new) {
+                        m_compute_buffers.push_back(
+                            m_stack_ptr_data->mic_buf);
+                        m_device.m_persist_list.front().cpu_stack_addr =
+                            static_cast<char*>(m_vars[i].ptr);
+                    }
+                    else {
+                        m_vars[i].flags.sink_addr = 1;
+                        m_in_datalen += sizeof(m_stack_ptr_data->mic_addr);
+                    }
+                    m_vars[i].size = m_destroy_stack.size();
+                    m_vars_extra[i].src_data = m_stack_ptr_data;
+                    // need to add reference for buffer
+                    m_need_runfunction = true;
+                    break;
+                }
+                /* fallthru */
+
+            case c_cean_var_ptr:
+            case c_dv_ptr:
+                if (m_vars[i].type.src == c_cean_var_ptr) {
+                    // array descriptor
+                    const arr_desc *ap =
+                        static_cast<const arr_desc*>(m_vars[i].ptr);
+
+                    // debug dump
+                    __arr_desc_dump("", "IN/OUT", ap, 1);
+
+                    // offset and length are derived from the array descriptor
+                    __arr_data_offset_and_length(ap, m_vars[i].disp,
+                                                 m_vars[i].size);
+
+                    if (!is_arr_desc_contiguous(ap)) {
+                        m_vars[i].flags.is_noncont_src = 1;
+                        m_vars_extra[i].read_rng_src =
+                            init_read_ranges_arr_desc(ap);
+                    }
+                    // all necessary information about length and offset is
+                    // transferred in var descriptor. There is no need to send
+                    // array descriptor to the target side.
+                    m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
+                }
+                else if (m_vars[i].type.src == c_dv_ptr) {
+                    // need to send DV to the device unless it is 'nocopy'
+                    if (m_vars[i].direction.bits ||
+                        m_vars[i].alloc_if ||
+                        m_vars[i].free_if) {
+                        ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].ptr);
+
+                        // debug dump
+                        __dv_desc_dump("IN/OUT", dvp);
+
+                        m_vars[i].direction.bits = c_parameter_in;
+                    }
+
+                    // no displacement
+                    m_vars[i].disp = 0;
+                }
+                else {
+                    // c_data_ptr or c_string_ptr
+                    m_vars[i].size *= m_vars[i].count;
+                    m_vars[i].disp = 0;
+                }
+
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    PtrData *ptr_data;
+
+                    // check that buffer length >= 0
+                    if (m_vars[i].alloc_if &&
+                        m_vars[i].disp + m_vars[i].size < 0) {
+                        LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
+                        exit(1);
+                    }
+
+                    // base address
+                    void *base = *static_cast<void**>(m_vars[i].ptr);
+
+                    // allocate buffer if we have no INTO and don't need
+                    // allocation for the ptr at target
+                    if (src_is_for_mic) {
+                        if (m_vars[i].flags.is_stack_buf) {
+                            // for stack persistent objects ptr data is created
+                            // by var_desc with number 0.
+                            // Its ptr_data is stored at m_stack_ptr_data
+                            ptr_data = m_stack_ptr_data;
+                            m_vars[i].flags.sink_addr = 1;
+                        }
+                        else if (m_vars[i].alloc_if) {
+                            // add new entry
+                            if (!alloc_ptr_data(
+                                    ptr_data,
+                                    base,
+                                    (alloc_base != NULL) ?
+                                        alloc_disp : m_vars[i].disp,
+                                    (alloc_base != NULL) ?
+                                        alloc_size : m_vars[i].size,
+                                    alloc_disp,
+                                    (alloc_base != NULL) ?
+                                        0 : m_vars[i].align)) {
+                                return false;
+                            }
+
+                            if (ptr_data->add_reference() == 0 &&
+                                ptr_data->mic_buf != 0) {
+                                // add buffer to the list of buffers that
+                                // are passed to dispatch call
+                                m_compute_buffers.push_back(
+                                    ptr_data->mic_buf);
+                            }
+                            else {
+                                // will send buffer address to device
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+
+                            if (!ptr_data->is_static) {
+                                // need to add reference for buffer
+                                m_need_runfunction = true;
+                            }
+                        }
+                        else {
+                            bool error_if_not_found = true;
+                            if (m_is_openmp) {
+                                // For omp target update variable is ignored
+                                // if it does not exist.
+                                if (!m_vars[i].alloc_if &&
+                                    !m_vars[i].free_if) {
+                                    error_if_not_found = false;
+                                }
+                            }
+
+                            // use existing association from pointer table
+                            if (!find_ptr_data(ptr_data,
+                                               base,
+                                               m_vars[i].disp,
+                                               m_vars[i].size,
+                                               error_if_not_found)) {
+                                return false;
+                            }
+
+                            if (m_is_openmp) {
+                                // make var nocopy if it does not exist
+                                if (ptr_data == 0) {
+                                    m_vars[i].direction.bits =
+                                        c_parameter_nocopy;
+                                }
+                            }
+
+                            if (ptr_data != 0) {
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+                        }
+
+                        if (ptr_data != 0) {
+                            if (m_is_openmp) {
+                                // data is transferred only if
+                                // alloc_if == 0 && free_if == 0
+                                // or reference count is 1
+                                if ((m_vars[i].alloc_if ||
+                                     m_vars[i].free_if) &&
+                                    ptr_data->get_reference() != 1) {
+                                    m_vars[i].direction.bits =
+                                        c_parameter_nocopy;
+                                }
+                            }
+
+                            if (ptr_data->alloc_disp != 0) {
+                                m_vars[i].flags.alloc_disp = 1;
+                                m_in_datalen += sizeof(alloc_disp);
+                            }
+
+                            if (m_vars[i].flags.sink_addr) {
+                                // get buffers's address on the sink
+                                if (!init_mic_address(ptr_data)) {
+                                    return false;
+                                }
+
+                                m_in_datalen += sizeof(ptr_data->mic_addr);
+                            }
+
+                            if (!ptr_data->is_static && m_vars[i].free_if) {
+                                // need to decrement buffer reference on target
+                                m_need_runfunction = true;
+                            }
+
+                            // offset to base from the beginning of the buffer
+                            // memory
+                            m_vars[i].offset = (char*) base -
+                                (char*) ptr_data->cpu_addr.start();
+
+                            // copy other pointer properties to var descriptor
+                            m_vars[i].mic_offset = ptr_data->mic_offset;
+                            m_vars[i].flags.is_static = ptr_data->is_static;
+                        }
+                    }
+                    else {
+                        if (!find_ptr_data(ptr_data,
+                                           base,
+                                           m_vars[i].disp,
+                                           m_vars[i].size,
+                                           false)) {
+                            return false;
+                        }
+                        if (ptr_data) {
+                            m_vars[i].offset =
+                                (char*) base -
+                                (char*) ptr_data->cpu_addr.start();
+                        }
+                    }
+
+                    // save pointer data
+                    m_vars_extra[i].src_data = ptr_data;
+                }
+                break;
+
+            case c_func_ptr:
+                if (m_vars[i].direction.in) {
+                    m_in_datalen += __offload_funcs.max_name_length();
+                }
+                if (m_vars[i].direction.out) {
+                    m_out_datalen += __offload_funcs.max_name_length();
+                }
+                m_need_runfunction = true;
+                break;
+
+            case c_dv_data:
+            case c_dv_ptr_data:
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+                ArrDesc *dvp;
+                if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+                    const arr_desc *ap;
+                    ap = static_cast<const arr_desc*>(m_vars[i].ptr);
+
+                    dvp = (m_vars[i].type.src == c_dv_data_slice) ?
+                          reinterpret_cast<ArrDesc*>(ap->base) :
+                          *reinterpret_cast<ArrDesc**>(ap->base);
+                }
+                else {
+                    dvp = (m_vars[i].type.src == c_dv_data) ?
+                          static_cast<ArrDesc*>(m_vars[i].ptr) :
+                          *static_cast<ArrDesc**>(m_vars[i].ptr);
+                }
+
+                // if allocatable dope vector isn't allocated don't
+                // transfer its data
+                if (!__dv_is_allocated(dvp)) {
+                    m_vars[i].direction.bits = c_parameter_nocopy;
+                    m_vars[i].alloc_if = 0;
+                    m_vars[i].free_if = 0;
+                }
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    const arr_desc *ap;
+
+                    if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+                        ap = static_cast<const arr_desc*>(m_vars[i].ptr);
+
+                        // debug dump
+                        __arr_desc_dump("", "IN/OUT", ap, 0);
+                    }
+                    if (!__dv_is_contiguous(dvp)) {
+                        m_vars[i].flags.is_noncont_src = 1;
+                        m_vars_extra[i].read_rng_src =
+                            init_read_ranges_dv(dvp);
+                    }
+
+                    // size and displacement
+                    if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
+                        // offset and length are derived from the
+                        // array descriptor
+                        __arr_data_offset_and_length(ap,
+                                                     m_vars[i].disp,
+                                                     m_vars[i].size);
+                        if (m_vars[i].direction.bits) {
+                            if (!is_arr_desc_contiguous(ap)) {
+                                if (m_vars[i].flags.is_noncont_src) {
+                                    LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
+                                    return false;
+                                }
+                                m_vars[i].flags.is_noncont_src = 1;
+                                m_vars_extra[i].read_rng_src =
+                                    init_read_ranges_arr_desc(ap);
+                            }
+                        }
+                    }
+                    else {
+                        if (m_vars[i].flags.has_length) {
+                            m_vars[i].size =
+                                __dv_data_length(dvp, m_vars[i].count);
+                        }
+                        else {
+                            m_vars[i].size = __dv_data_length(dvp);
+                        }
+                        m_vars[i].disp = 0;
+                    }
+
+                    // check that length >= 0
+                    if (m_vars[i].alloc_if &&
+                        (m_vars[i].disp + m_vars[i].size < 0)) {
+                        LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
+                        exit(1);
+                    }
+
+                    // base address
+                    void *base = reinterpret_cast<void*>(dvp->Base);
+                    PtrData *ptr_data;
+
+                    // allocate buffer if we have no INTO and don't need
+                    // allocation for the ptr at target
+                    if (src_is_for_mic) {
+                        if (m_vars[i].alloc_if) {
+                            // add new entry
+                            if (!alloc_ptr_data(
+                                    ptr_data,
+                                    base,
+                                    (alloc_base != NULL) ?
+                                        alloc_disp : m_vars[i].disp,
+                                    (alloc_base != NULL) ?
+                                        alloc_size : m_vars[i].size,
+                                    alloc_disp,
+                                    (alloc_base != NULL) ?
+                                        0 : m_vars[i].align)) {
+                                return false;
+                            }
+
+                            if (ptr_data->add_reference() == 0 &&
+                                ptr_data->mic_buf != 0) {
+                                // add buffer to the list of buffers
+                                // that are passed to dispatch call
+                                m_compute_buffers.push_back(
+                                    ptr_data->mic_buf);
+                            }
+                            else {
+                                // will send buffer address to device
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+
+                            if (!ptr_data->is_static) {
+                                // need to add reference for buffer
+                                m_need_runfunction = true;
+                            }
+                        }
+                        else {
+                            bool error_if_not_found = true;
+                            if (m_is_openmp) {
+                                // For omp target update variable is ignored
+                                // if it does not exist.
+                                if (!m_vars[i].alloc_if &&
+                                    !m_vars[i].free_if) {
+                                    error_if_not_found = false;
+                                }
+                            }
+
+                            // use existing association from pointer table
+                            if (!find_ptr_data(ptr_data,
+                                               base,
+                                               m_vars[i].disp,
+                                               m_vars[i].size,
+                                               error_if_not_found)) {
+                                return false;
+                            }
+
+                            if (m_is_openmp) {
+                                // make var nocopy if it does not exist
+                                if (ptr_data == 0) {
+                                    m_vars[i].direction.bits =
+                                        c_parameter_nocopy;
+                                }
+                            }
+
+                            if (ptr_data != 0) {
+                                // need to update base in dope vector on device
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+                        }
+
+                        if (ptr_data != 0) {
+                            if (m_is_openmp) {
+                                // data is transferred only if
+                                // alloc_if == 0 && free_if == 0
+                                // or reference count is 1
+                                if ((m_vars[i].alloc_if ||
+                                     m_vars[i].free_if) &&
+                                    ptr_data->get_reference() != 1) {
+                                    m_vars[i].direction.bits =
+                                        c_parameter_nocopy;
+                                }
+                            }
+
+                            if (ptr_data->alloc_disp != 0) {
+                                m_vars[i].flags.alloc_disp = 1;
+                                m_in_datalen += sizeof(alloc_disp);
+                            }
+
+                            if (m_vars[i].flags.sink_addr) {
+                                // get buffers's address on the sink
+                                if (!init_mic_address(ptr_data)) {
+                                    return false;
+                                }
+
+                                m_in_datalen += sizeof(ptr_data->mic_addr);
+                            }
+
+                            if (!ptr_data->is_static && m_vars[i].free_if) {
+                                // need to decrement buffer reference on target
+                                m_need_runfunction = true;
+                            }
+
+                            // offset to base from the beginning of the buffer
+                            // memory
+                            m_vars[i].offset =
+                                (char*) base -
+                                (char*) ptr_data->cpu_addr.start();
+
+                            // copy other pointer properties to var descriptor
+                            m_vars[i].mic_offset = ptr_data->mic_offset;
+                            m_vars[i].flags.is_static = ptr_data->is_static;
+                        }
+                    }
+                    else { // !src_is_for_mic
+                        if (!find_ptr_data(ptr_data,
+                                           base,
+                                           m_vars[i].disp,
+                                           m_vars[i].size,
+                                           false)) {
+                            return false;
+                        }
+                        m_vars[i].offset = !ptr_data ? 0 :
+                                (char*) base -
+                                (char*) ptr_data->cpu_addr.start();
+                    }
+
+                    // save pointer data
+                    m_vars_extra[i].src_data = ptr_data;
+                }
+                break;
+
+            default:
+                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
+                LIBOFFLOAD_ABORT;
+        }
+        if (m_vars[i].type.src == c_data_ptr_array) {
+            continue;
+        }
+
+        if (src_is_for_mic && m_vars[i].flags.is_stack_buf) {
+            m_vars[i].offset = static_cast<char*>(m_vars[i].ptr) -
+                m_device.m_persist_list.front().cpu_stack_addr;
+        }
+        // if source is used at CPU save its offset and disp
+        if (m_vars[i].into == NULL || m_vars[i].direction.in) {
+            m_vars_extra[i].cpu_offset = m_vars[i].offset;
+            m_vars_extra[i].cpu_disp   = m_vars[i].disp;
+        }
+
+        // If "into" is define we need to do the similar work for it
+        if (!m_vars[i].into) {
+            continue;
+        }
+
+        int64_t into_disp =0, into_offset = 0;
+
+        switch (m_vars[i].type.dst) {
+            case c_data_ptr_array:
+                break;
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var: {
+                int64_t size = m_vars[i].size;
+
+                if (m_vars[i].type.dst == c_cean_var) {
+                    // array descriptor
+                    const arr_desc *ap =
+                        static_cast<const arr_desc*>(m_vars[i].into);
+
+                    // debug dump
+                    __arr_desc_dump("    ", "INTO", ap, 0);
+
+                    // offset and length are derived from the array descriptor
+                    __arr_data_offset_and_length(ap, into_disp, size);
+
+                    if (!is_arr_desc_contiguous(ap)) {
+                        m_vars[i].flags.is_noncont_dst = 1;
+                        m_vars_extra[i].read_rng_dst =
+                            init_read_ranges_arr_desc(ap);
+                        if (!cean_ranges_match(
+                            m_vars_extra[i].read_rng_src,
+                            m_vars_extra[i].read_rng_dst)) {
+                            LIBOFFLOAD_ERROR(c_ranges_dont_match);
+                            exit(1);
+                        }
+                    }
+                    m_vars[i].into = reinterpret_cast<void*>(ap->base);
+                }
+
+                int64_t size_src = m_vars_extra[i].read_rng_src ?
+                    cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+                    m_vars[i].size;
+                int64_t size_dst = m_vars_extra[i].read_rng_dst ?
+                    cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
+                    size;
+                // It's supposed that "into" size must be not less
+                // than src size
+                if (size_src > size_dst) {
+                    LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
+                                     size_src, size_dst);
+                    exit(1);
+                }
+
+                if (m_vars[i].direction.bits) {
+                    if (m_vars[i].flags.is_static_dstn) {
+                        PtrData *ptr_data;
+
+                        // find data associated with variable
+                        if (!find_ptr_data(ptr_data, m_vars[i].into,
+                                           into_disp, size, false)) {
+                            return false;
+                        }
+                        if (ptr_data != 0) {
+                            // offset to base from the beginning of the buffer
+                            // memory
+                            into_offset =
+                                (char*) m_vars[i].into -
+                                (char*) ptr_data->cpu_addr.start();
+                        }
+                        else {
+                            m_vars[i].flags.is_static_dstn = false;
+                        }
+                        m_vars_extra[i].dst_data = ptr_data;
+                    }
+                }
+
+                if (m_vars[i].direction.in &&
+                    !m_vars[i].flags.is_static_dstn) {
+                    m_in_datalen += m_vars[i].size;
+
+                    // for non-static target destination defined as CEAN
+                    // expression we pass to target its size and dist
+                    if (m_vars[i].type.dst == c_cean_var) {
+                        m_in_datalen += 2 * sizeof(uint64_t);
+                    }
+                    m_need_runfunction = true;
+                }
+                break;
+            }
+
+            case c_dv:
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].into);
+
+                    // debug dump
+                    __dv_desc_dump("INTO", dvp);
+
+                    // send dope vector contents excluding base
+                    m_in_datalen += m_vars[i].size - sizeof(uint64_t);
+                    m_need_runfunction = true;
+                }
+                break;
+
+            case c_string_ptr:
+            case c_data_ptr:
+            case c_cean_var_ptr:
+            case c_dv_ptr: {
+                int64_t size = m_vars[i].size;
+
+                if (m_vars[i].type.dst == c_cean_var_ptr) {
+                    // array descriptor
+                    const arr_desc *ap =
+                        static_cast<const arr_desc*>(m_vars[i].into);
+
+                    // debug dump
+                    __arr_desc_dump("    ", "INTO", ap, 1);
+
+                    // offset and length are derived from the array descriptor
+                    __arr_data_offset_and_length(ap, into_disp, size);
+
+                    if (!is_arr_desc_contiguous(ap)) {
+                        m_vars[i].flags.is_noncont_src = 1;
+                        m_vars_extra[i].read_rng_dst =
+                            init_read_ranges_arr_desc(ap);
+                        if (!cean_ranges_match(
+                            m_vars_extra[i].read_rng_src,
+                            m_vars_extra[i].read_rng_dst)) {
+                            LIBOFFLOAD_ERROR(c_ranges_dont_match);
+                        }
+                    }
+                    m_vars[i].into = reinterpret_cast<char**>(ap->base);
+                }
+                else if (m_vars[i].type.dst == c_dv_ptr) {
+                    // need to send DV to the device unless it is 'nocopy'
+                    if (m_vars[i].direction.bits ||
+                        m_vars[i].alloc_if ||
+                        m_vars[i].free_if) {
+                        ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].into);
+
+                        // debug dump
+                        __dv_desc_dump("INTO", dvp);
+
+                        m_vars[i].direction.bits = c_parameter_in;
+                    }
+                }
+
+                int64_t size_src = m_vars_extra[i].read_rng_src ?
+                    cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+                    m_vars[i].size;
+                int64_t size_dst = m_vars_extra[i].read_rng_dst ?
+                    cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
+                    size;
+                // It's supposed that "into" size must be not less than
+                // src size
+                if (size_src > size_dst) {
+                    LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
+                                     size_src, size_dst);
+                    exit(1);
+                }
+
+                if (m_vars[i].direction.bits) {
+                    PtrData *ptr_data;
+
+                    // base address
+                    void *base = *static_cast<void**>(m_vars[i].into);
+
+                    if (m_vars[i].direction.in) {
+                        // allocate buffer
+                        if (m_vars[i].flags.is_stack_buf) {
+                            // for stack persistent objects ptr data is created
+                            // by var_desc with number 0.
+                            // Its ptr_data is stored at m_stack_ptr_data
+                            ptr_data = m_stack_ptr_data;
+                            m_vars[i].flags.sink_addr = 1;
+                        }
+                        else if (m_vars[i].alloc_if) {
+                            // add new entry
+                            if (!alloc_ptr_data(
+                                    ptr_data,
+                                    base,
+                                    (alloc_base != NULL) ?
+                                        alloc_disp : into_disp,
+                                    (alloc_base != NULL) ?
+                                        alloc_size : size,
+                                    alloc_disp,
+                                    (alloc_base != NULL) ?
+                                        0 : m_vars[i].align)) {
+                                return false;
+                            }
+
+                            if (ptr_data->add_reference() == 0 &&
+                                ptr_data->mic_buf != 0) {
+                                // add buffer to the list of buffers that
+                                // are passed to dispatch call
+                                m_compute_buffers.push_back(
+                                    ptr_data->mic_buf);
+                            }
+                            else {
+                                // will send buffer address to device
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+
+                            if (!ptr_data->is_static) {
+                                // need to add reference for buffer
+                                m_need_runfunction = true;
+                            }
+                        }
+                        else {
+                            // use existing association from pointer table
+                            if (!find_ptr_data(ptr_data, base, into_disp, size)) {
+                                return false;
+                            }
+                            m_vars[i].flags.sink_addr = 1;
+                        }
+
+                        if (ptr_data->alloc_disp != 0) {
+                            m_vars[i].flags.alloc_disp = 1;
+                            m_in_datalen += sizeof(alloc_disp);
+                        }
+
+                        if (m_vars[i].flags.sink_addr) {
+                            // get buffers's address on the sink
+                            if (!init_mic_address(ptr_data)) {
+                                return false;
+                            }
+
+                            m_in_datalen += sizeof(ptr_data->mic_addr);
+                        }
+
+                        if (!ptr_data->is_static && m_vars[i].free_if) {
+                            // need to decrement buffer reference on target
+                            m_need_runfunction = true;
+                        }
+
+                        // copy other pointer properties to var descriptor
+                        m_vars[i].mic_offset = ptr_data->mic_offset;
+                        m_vars[i].flags.is_static_dstn = ptr_data->is_static;
+                    }
+                    else {
+                        if (!find_ptr_data(ptr_data,
+                                           base,
+                                           into_disp,
+                                           m_vars[i].size,
+                                           false)) {
+                            return false;
+                        }
+                    }
+                    if (ptr_data) {
+                        into_offset = ptr_data ?
+                            (char*) base -
+                            (char*) ptr_data->cpu_addr.start() :
+                            0;
+                    }
+                    // save pointer data
+                    m_vars_extra[i].dst_data = ptr_data;
+                }
+                break;
+            }
+
+            case c_func_ptr:
+                break;
+
+            case c_dv_data:
+            case c_dv_ptr_data:
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+                if (m_vars[i].direction.bits ||
+                    m_vars[i].alloc_if ||
+                    m_vars[i].free_if) {
+                    const arr_desc *ap;
+                    ArrDesc *dvp;
+                    PtrData *ptr_data;
+                    int64_t disp;
+                    int64_t size;
+
+                    if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+                        ap = static_cast<const arr_desc*>(m_vars[i].into);
+
+                        // debug dump
+                        __arr_desc_dump("    ", "INTO", ap, 0);
+
+                        dvp = (m_vars[i].type.dst == c_dv_data_slice) ?
+                              reinterpret_cast<ArrDesc*>(ap->base) :
+                              *reinterpret_cast<ArrDesc**>(ap->base);
+                    }
+                    else {
+                        dvp = (m_vars[i].type.dst == c_dv_data) ?
+                              static_cast<ArrDesc*>(m_vars[i].into) :
+                              *static_cast<ArrDesc**>(m_vars[i].into);
+                    }
+                    if (!__dv_is_contiguous(dvp)) {
+                        m_vars[i].flags.is_noncont_dst = 1;
+                        m_vars_extra[i].read_rng_dst =
+                            init_read_ranges_dv(dvp);
+                    }
+                    // size and displacement
+                    if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+                        // offset and length are derived from the array
+                        // descriptor
+                        __arr_data_offset_and_length(ap, into_disp, size);
+                        if (m_vars[i].direction.bits) {
+                            if (!is_arr_desc_contiguous(ap)) {
+                                if (m_vars[i].flags.is_noncont_dst) {
+                                    LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
+                                    return false;
+                                }
+                                m_vars[i].flags.is_noncont_dst = 1;
+                                m_vars_extra[i].read_rng_dst =
+                                    init_read_ranges_arr_desc(ap);
+                                if (!cean_ranges_match(
+                                    m_vars_extra[i].read_rng_src,
+                                    m_vars_extra[i].read_rng_dst)) {
+                                    LIBOFFLOAD_ERROR(c_ranges_dont_match);
+                                }
+                            }
+                        }
+                    }
+                    else {
+                        if (m_vars[i].flags.has_length) {
+                            size = __dv_data_length(dvp, m_vars[i].count);
+                        }
+                        else {
+                            size = __dv_data_length(dvp);
+                        }
+                        disp = 0;
+                    }
+
+                    int64_t size_src =
+                        m_vars_extra[i].read_rng_src ?
+                        cean_get_transf_size(m_vars_extra[i].read_rng_src) :
+                        m_vars[i].size;
+                    int64_t size_dst =
+                        m_vars_extra[i].read_rng_dst ?
+                        cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
+                        size;
+                    // It's supposed that "into" size must be not less
+                    // than src size
+                    if (size_src > size_dst) {
+                        LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
+                            size_src, size_dst);
+                        exit(1);
+                    }
+
+                    // base address
+                    void *base = reinterpret_cast<void*>(dvp->Base);
+
+                    // allocate buffer
+                    if (m_vars[i].direction.in) {
+                        if (m_vars[i].alloc_if) {
+                            // add new entry
+                            if (!alloc_ptr_data(
+                                    ptr_data,
+                                    base,
+                                    (alloc_base != NULL) ?
+                                        alloc_disp : into_disp,
+                                    (alloc_base != NULL) ?
+                                        alloc_size : size,
+                                    alloc_disp,
+                                    (alloc_base != NULL) ?
+                                        0 : m_vars[i].align)) {
+                                return false;
+                            }
+                            if (ptr_data->add_reference() == 0 &&
+                                ptr_data->mic_buf !=0) {
+                                // add buffer to the list of buffers
+                                // that are passed to dispatch call
+                                m_compute_buffers.push_back(
+                                    ptr_data->mic_buf);
+                            }
+                            else {
+                                // will send buffer address to device
+                                m_vars[i].flags.sink_addr = 1;
+                            }
+
+                            if (!ptr_data->is_static) {
+                                // need to add reference for buffer
+                                m_need_runfunction = true;
+                            }
+                        }
+                        else {
+                            // use existing association from pointer table
+                            if (!find_ptr_data(ptr_data, base, into_disp, size)) {
+                                return false;
+                            }
+
+                            // need to update base in dope vector on device
+                            m_vars[i].flags.sink_addr = 1;
+                        }
+
+                        if (ptr_data->alloc_disp != 0) {
+                            m_vars[i].flags.alloc_disp = 1;
+                            m_in_datalen += sizeof(alloc_disp);
+                        }
+
+                        if (m_vars[i].flags.sink_addr) {
+                            // get buffers's address on the sink
+                            if (!init_mic_address(ptr_data)) {
+                                return false;
+                            }
+                            m_in_datalen += sizeof(ptr_data->mic_addr);
+                        }
+
+                        if (!ptr_data->is_static && m_vars[i].free_if) {
+                            // need to decrement buffer reference on target
+                            m_need_runfunction = true;
+                        }
+
+                        // offset to base from the beginning of the buffer
+                        // memory
+                        into_offset =
+                            (char*) base - (char*) ptr_data->cpu_addr.start();
+
+                        // copy other pointer properties to var descriptor
+                        m_vars[i].mic_offset = ptr_data->mic_offset;
+                        m_vars[i].flags.is_static_dstn = ptr_data->is_static;
+                    }
+                    else { // src_is_for_mic
+                        if (!find_ptr_data(ptr_data,
+                                           base,
+                                           into_disp,
+                                           size,
+                                           false)) {
+                            return false;
+                        }
+                        into_offset = !ptr_data ?
+                            0 :
+                            (char*) base - (char*) ptr_data->cpu_addr.start();
+                    }
+
+                    // save pointer data
+                    m_vars_extra[i].dst_data = ptr_data;
+                }
+                break;
+
+            default:
+                LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
+                LIBOFFLOAD_ABORT;
+        }
+        // if into is used at CPU save its offset and disp
+        if (m_vars[i].direction.out) {
+            m_vars_extra[i].cpu_offset = into_offset;
+            m_vars_extra[i].cpu_disp   = into_disp;
+        }
+        else {
+            if (m_vars[i].flags.is_stack_buf) {
+                into_offset = static_cast<char*>(m_vars[i].into) -
+                    m_device.m_persist_list.front().cpu_stack_addr;
+            }
+            m_vars[i].offset = into_offset;
+            m_vars[i].disp   = into_disp;
+        }
+    }
+
+    return true;
+}
+
+bool OffloadDescriptor::setup_misc_data(const char *name)
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_setup_misc_data);
+
+    // we can skip run functon call together with wait if offloaded
+    // region is empty and there is no user defined non-pointer IN/OUT data
+    if (m_need_runfunction) {
+        // variable descriptors are sent as input data
+        m_in_datalen += m_vars_total * sizeof(VarDesc);
+
+        // timer data is sent as a part of the output data
+        m_out_datalen += OFFLOAD_TIMER_DATALEN();
+
+        // max from input data and output data length
+        uint64_t data_len = m_in_datalen > m_out_datalen ? m_in_datalen :
+                                                           m_out_datalen;
+
+        // Misc data has the following layout
+        //     <Function Descriptor>
+        //     <Function Name>
+        //     <In/Out Data>            (optional)
+        //
+        // We can transfer copyin/copyout data in misc/return data which can
+        // be passed to run function call if its size does not exceed
+        // COI_PIPELINE_MAX_IN_MISC_DATA_LEN. Otherwise we have to allocate
+        // buffer for it.
+
+        m_func_desc_size = sizeof(FunctionDescriptor) + strlen(name) + 1;
+        m_func_desc_size = (m_func_desc_size + 7) & ~7;
+
+        int misc_data_offset = 0;
+        int misc_data_size = 0;
+        if (data_len > 0) {
+            if (m_func_desc_size +
+                m_in_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN &&
+                m_out_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN) {
+                // use misc/return data for copyin/copyout
+                misc_data_offset = m_func_desc_size;
+                misc_data_size = data_len;
+            }
+            else {
+                OffloadTimer timer_buf(get_timer_data(),
+                                       c_offload_host_alloc_data_buffer);
+
+                // send/receive data using buffer
+                COIRESULT res = COI::BufferCreate(data_len,
+                                                  COI_BUFFER_NORMAL,
+                                                  0, 0,
+                                                  1, &m_device.get_process(),
+                                                  &m_inout_buf);
+                if (res != COI_SUCCESS) {
+                    if (m_status != 0) {
+                        m_status->result = translate_coi_error(res);
+                        return false;
+                    }
+                    report_coi_error(c_buf_create, res);
+                }
+
+                m_compute_buffers.push_back(m_inout_buf);
+                m_destroy_buffers.push_back(m_inout_buf);
+            }
+        }
+
+        // initialize function descriptor
+        m_func_desc = (FunctionDescriptor*) malloc(m_func_desc_size +
+                                                   misc_data_size);
+        m_func_desc->console_enabled = console_enabled;
+        m_func_desc->timer_enabled =
+            timer_enabled || (offload_report_level && offload_report_enabled);
+        m_func_desc->offload_report_level = offload_report_level;
+        m_func_desc->offload_number = GET_OFFLOAD_NUMBER(get_timer_data());
+        m_func_desc->in_datalen = m_in_datalen;
+        m_func_desc->out_datalen = m_out_datalen;
+        m_func_desc->vars_num = m_vars_total;
+        m_func_desc->data_offset = misc_data_offset;
+
+        // append entry name
+        strcpy(m_func_desc->data, name);
+    }
+
+    return true;
+}
+
+bool OffloadDescriptor::wait_dependencies(
+    const void **waits,
+    int num_waits
+)
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps);
+    bool ret = true;
+
+    for (int i = 0; i < num_waits; i++) {
+
+        OffloadDescriptor *task = m_device.find_signal(waits[i], true);
+        if (task == 0) {
+            LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
+                             waits[i]);
+            LIBOFFLOAD_ABORT;
+        }
+
+        if (!task->offload_finish()) {
+            ret = false;
+        }
+
+        task->cleanup();
+        delete task;
+    }
+
+    return ret;
+}
+
+bool OffloadDescriptor::offload(
+    const char *name,
+    bool is_empty,
+    VarDesc *vars,
+    VarDesc2 *vars2,
+    int vars_total,
+    const void **waits,
+    int num_waits,
+    const void **signal,
+    int entry_id,
+    const void *stack_addr
+)
+{
+    if (signal == 0) {
+        OFFLOAD_DEBUG_TRACE_1(1,
+                      GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_init_func,
+                      "Offload function %s, is_empty=%d, #varDescs=%d, "
+                      "#waits=%d, signal=none\n",
+                      name, is_empty, vars_total, num_waits);
+        OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_sent_pointer_data,
+                      "#Wait : %d \n", num_waits);
+        OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_signal,
+                      "none %d\n", 0);
+    }
+    else {
+        OFFLOAD_DEBUG_TRACE_1(1,
+                      GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_init_func,
+                      "Offload function %s, is_empty=%d, #varDescs=%d, "
+                      "#waits=%d, signal=%p\n",
+                      name, is_empty, vars_total, num_waits,
+                      *signal);
+
+        OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_signal,
+                      "%d\n", signal);
+    }
+    OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
+                      c_offload_wait,
+                      "#Wait : %d  %p\n", num_waits, waits);
+
+    if (m_status != 0) {
+        m_status->result = OFFLOAD_SUCCESS;
+        m_status->device_number = m_device.get_logical_index();
+    }
+
+    m_need_runfunction = !is_empty;
+
+    // wait for dependencies to finish
+    if (!wait_dependencies(waits, num_waits)) {
+        cleanup();
+        return false;
+    }
+
+    // setup buffers
+    if (!setup_descriptors(vars, vars2, vars_total, entry_id, stack_addr)) {
+        cleanup();
+        return false;
+    }
+
+    // initiate send for pointers. Want to do it as early as possible.
+    if (!send_pointer_data(signal != 0)) {
+        cleanup();
+        return false;
+    }
+
+    // setup misc data for run function
+    if (!setup_misc_data(name)) {
+        cleanup();
+        return false;
+    }
+
+    // gather copyin data into buffer
+    if (!gather_copyin_data()) {
+        cleanup();
+        return false;
+    }
+
+    // Start the computation
+    if (!compute()) {
+        cleanup();
+        return false;
+    }
+
+    // initiate receive for pointers
+    if (!receive_pointer_data(signal != 0)) {
+        cleanup();
+        return false;
+    }
+
+    // if there is a signal save descriptor for the later use.
+    if (signal != 0) {
+        m_device.add_signal(*signal, this);
+        return true;
+    }
+
+    // wait for the offload to finish.
+    if (!offload_finish()) {
+        cleanup();
+        return false;
+    }
+
+    cleanup();
+    return true;
+}
+
+bool OffloadDescriptor::offload_finish()
+{
+    COIRESULT res;
+
+    // wait for compute dependencies to become signaled
+    if (m_in_deps_total > 0) {
+        OffloadTimer timer(get_timer_data(), c_offload_host_wait_compute);
+
+        if (__offload_active_wait) {
+            // keep CPU busy
+            do {
+                res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
+            }
+            while (res == COI_TIME_OUT_REACHED);
+        }
+        else {
+            res = COI::EventWait(m_in_deps_total, m_in_deps, -1, 1, 0, 0);
+        }
+
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+                return false;
+            }
+            report_coi_error(c_event_wait, res);
+        }
+    }
+
+    // scatter copyout data received from target
+    if (!scatter_copyout_data()) {
+        return false;
+    }
+    // wait for receive dependencies to become signaled
+    if (m_out_deps_total > 0) {
+        OffloadTimer timer(get_timer_data(), c_offload_host_wait_buffers_reads);
+
+        if (__offload_active_wait) {
+            // keep CPU busy
+            do {
+                res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
+            }
+            while (res == COI_TIME_OUT_REACHED);
+        }
+        else {
+            res = COI::EventWait(m_out_deps_total, m_out_deps, -1, 1, 0, 0);
+        }
+
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+                return false;
+            }
+            report_coi_error(c_event_wait, res);
+        }
+    }
+
+    // destroy buffers
+    {
+        OffloadTimer timer(get_timer_data(), c_offload_host_destroy_buffers);
+
+        for (BufferList::const_iterator it = m_destroy_buffers.begin();
+             it != m_destroy_buffers.end(); it++) {
+            res = COI::BufferDestroy(*it);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_destroy, res);
+            }
+        }
+    }
+
+    return true;
+}
+
+void OffloadDescriptor::cleanup()
+{
+    // release device in orsl
+    ORSL::release(m_device.get_logical_index());
+
+    OFFLOAD_TIMER_STOP(get_timer_data(), c_offload_host_total_offload);
+
+    // report stuff
+    Offload_Report_Epilog(get_timer_data());
+}
+
+bool OffloadDescriptor::is_signaled()
+{
+    bool signaled = true;
+    COIRESULT res;
+
+    // check compute and receive dependencies
+    if (m_in_deps_total > 0) {
+        res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
+        signaled = signaled && (res == COI_SUCCESS);
+    }
+    if (m_out_deps_total > 0) {
+        res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
+        signaled = signaled && (res == COI_SUCCESS);
+    }
+
+    return signaled;
+}
+
+// Send pointer data if source or destination or both of them are
+// noncontiguous. There is guarantee that length of destination enough for
+// transferred data.
+bool OffloadDescriptor::send_noncontiguous_pointer_data(
+    int i,
+    PtrData* src_data,
+    PtrData* dst_data,
+    COIEVENT *event
+    )
+{
+    int64_t offset_src, offset_dst;
+    int64_t length_src, length_dst;
+    int64_t length_src_cur, length_dst_cur;
+    int64_t send_size, data_sent = 0;
+    COIRESULT res;
+    bool dst_is_empty = true;
+    bool src_is_empty = true;
+
+    // Set length_src and length_dst
+    length_src = (m_vars_extra[i].read_rng_src) ?
+        m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
+    length_dst = !m_vars[i].into ? length_src :
+                     (m_vars_extra[i].read_rng_dst) ?
+                     m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
+    send_size = (length_src < length_dst) ? length_src : length_dst;
+
+    // consequently get contiguous ranges,
+    // define corresponded destination offset and send data
+    do {
+        if (src_is_empty) {
+            if (m_vars_extra[i].read_rng_src) {
+                if (!get_next_range(m_vars_extra[i].read_rng_src,
+                         &offset_src)) {
+                    // source ranges are over - nothing to send
+                    break;
+                }
+            }
+            else if (data_sent == 0) {
+                offset_src = m_vars_extra[i].cpu_disp;
+            }
+            else {
+                break;
+            }
+            length_src_cur = length_src;
+        }
+        else {
+            // if source is contiguous or its contiguous range is greater
+            // than destination one
+            offset_src += send_size;
+        }
+        length_src_cur -= send_size;
+        src_is_empty = length_src_cur == 0;
+
+        if (dst_is_empty) {
+            if (m_vars[i].into) {
+                if (m_vars_extra[i].read_rng_dst) {
+                    if (!get_next_range(m_vars_extra[i].read_rng_dst,
+                             &offset_dst)) {
+                        // destination ranges are over
+                        LIBOFFLOAD_ERROR(c_destination_is_over);
+                        return false;
+                    }
+                }
+                // into is contiguous.
+                else {
+                    offset_dst = m_vars[i].disp;
+                }
+                length_dst_cur = length_dst;
+            }
+            // same as source
+            else {
+                offset_dst = offset_src;
+                length_dst_cur = length_src;
+            }
+        }
+        else {
+            // if destination is contiguous or its contiguous range is greater
+            // than source one
+            offset_dst += send_size;
+        }
+        length_dst_cur -= send_size;
+        dst_is_empty = length_dst_cur == 0;
+
+        if (src_data != 0 && src_data->cpu_buf != 0) {
+            res = COI::BufferCopy(
+                dst_data->mic_buf,
+                src_data->cpu_buf,
+                m_vars[i].mic_offset - dst_data->alloc_disp +
+                m_vars[i].offset + offset_dst,
+                m_vars_extra[i].cpu_offset + offset_src,
+                send_size,
+                COI_COPY_UNSPECIFIED,
+                0, 0,
+                event);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_copy, res);
+            }
+        }
+        else {
+            char *base = offload_get_src_base(m_vars[i].ptr,
+                m_vars[i].type.src);
+
+            res = COI::BufferWrite(
+                dst_data->mic_buf,
+                m_vars[i].mic_offset - dst_data->alloc_disp +
+                m_vars[i].offset + offset_dst,
+                base + offset_src,
+                send_size,
+                COI_COPY_UNSPECIFIED,
+                0, 0,
+                event);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_write, res);
+            }
+        }
+        data_sent += length_src;
+    }
+    while (true);
+    return true;
+}
+
+bool OffloadDescriptor::send_pointer_data(bool is_async)
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_send_pointers);
+
+    uint64_t ptr_sent = 0;
+    COIRESULT res;
+
+    // Initiate send for pointer data
+    for (int i = 0; i < m_vars_total; i++) {
+        switch (m_vars[i].type.dst) {
+            case c_data_ptr_array:
+                break;
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var:
+                if (m_vars[i].direction.in &&
+                    m_vars[i].flags.is_static_dstn) {
+                    COIEVENT *event =
+                        (is_async ||
+                         m_vars[i].size >= __offload_use_async_buffer_write) ?
+                        &m_in_deps[m_in_deps_total++] : 0;
+                    PtrData* dst_data = m_vars[i].into ?
+                                            m_vars_extra[i].dst_data :
+                                            m_vars_extra[i].src_data;
+                    PtrData* src_data =
+                        VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
+                        VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+                        m_vars[i].flags.is_static ?
+                           m_vars_extra[i].src_data : 0;
+
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        if (!send_noncontiguous_pointer_data(
+                                i, src_data, dst_data, event)) {
+                            return false;
+                        }
+                    }
+                    else if (src_data != 0 && src_data->cpu_buf != 0) {
+                        res = COI::BufferCopy(
+                            dst_data->mic_buf,
+                            src_data->cpu_buf,
+                            m_vars[i].mic_offset - dst_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                        char *base = offload_get_src_base(m_vars[i].ptr,
+                                                          m_vars[i].type.src);
+                        res = COI::BufferWrite(
+                            dst_data->mic_buf,
+                            m_vars[i].mic_offset - dst_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            base + m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_write, res);
+                        }
+                    }
+                    ptr_sent += m_vars[i].size;
+                }
+                break;
+
+            case c_string_ptr:
+            case c_data_ptr:
+            case c_cean_var_ptr:
+            case c_dv_ptr:
+                if (m_vars[i].direction.in && m_vars[i].size > 0) {
+                    COIEVENT *event =
+                        (is_async ||
+                         m_vars[i].size >= __offload_use_async_buffer_write) ?
+                        &m_in_deps[m_in_deps_total++] : 0;
+                    PtrData* dst_data = m_vars[i].into ?
+                                            m_vars_extra[i].dst_data :
+                                            m_vars_extra[i].src_data;
+                    PtrData* src_data =
+                        VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
+                        VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+                        m_vars[i].flags.is_static ?
+                            m_vars_extra[i].src_data : 0;
+
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        send_noncontiguous_pointer_data(
+                            i, src_data, dst_data, event);
+                    }
+                    else if (src_data != 0 && src_data->cpu_buf != 0) {
+                        res = COI::BufferCopy(
+                            dst_data->mic_buf,
+                            src_data->cpu_buf,
+                            m_vars[i].mic_offset - dst_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                        char *base = offload_get_src_base(m_vars[i].ptr,
+                                                          m_vars[i].type.src);
+                        res = COI::BufferWrite(
+                            dst_data->mic_buf,
+                            m_vars[i].mic_offset - dst_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            base + m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_write, res);
+                        }
+                    }
+
+                    ptr_sent += m_vars[i].size;
+                }
+                break;
+
+            case c_dv_data:
+            case c_dv_ptr_data:
+                if (m_vars[i].direction.in &&
+                    m_vars[i].size > 0) {
+                    PtrData *ptr_data = m_vars[i].into ?
+                                        m_vars_extra[i].dst_data :
+                                        m_vars_extra[i].src_data;
+                    PtrData* src_data = m_vars_extra[i].src_data;
+
+                    COIEVENT *event =
+                        (is_async ||
+                         m_vars[i].size >= __offload_use_async_buffer_write) ?
+                        &m_in_deps[m_in_deps_total++] : 0;
+
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        send_noncontiguous_pointer_data(
+                            i, src_data, ptr_data, event);
+                    }
+                    else if (src_data && src_data->cpu_buf != 0) {
+                        res = COI::BufferCopy(
+                            ptr_data->mic_buf,
+                            src_data->cpu_buf,
+                            m_vars[i].offset + ptr_data->mic_offset -
+                            ptr_data->alloc_disp +
+                            m_vars[i].disp,
+                            m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                        char *base = offload_get_src_base(m_vars[i].ptr,
+                                                          m_vars[i].type.src);
+                        res = COI::BufferWrite(
+                            ptr_data->mic_buf,
+                            ptr_data->mic_offset - ptr_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            base + m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_write, res);
+                        }
+                    }
+                    ptr_sent += m_vars[i].size;
+                }
+                break;
+
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+                if (m_vars[i].direction.in &&
+                    m_vars[i].size > 0) {
+                    PtrData *dst_data = m_vars[i].into ?
+                                        m_vars_extra[i].dst_data :
+                                        m_vars_extra[i].src_data;
+                    PtrData* src_data =
+                        (VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
+                        VAR_TYPE_IS_DV_DATA(m_vars[i].type.src) ||
+                        VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) ||
+                        VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
+                        m_vars[i].flags.is_static) ?
+                            m_vars_extra[i].src_data : 0;
+                    COIEVENT *event =
+                        (is_async ||
+                         m_vars[i].size >= __offload_use_async_buffer_write) ?
+                        &m_in_deps[m_in_deps_total++] : 0;
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        send_noncontiguous_pointer_data(
+                            i, src_data, dst_data, event);
+                    }
+                    else if (src_data && src_data->cpu_buf != 0) {
+                        res = COI::BufferCopy(
+                            dst_data->mic_buf,
+                            src_data->cpu_buf,
+                            m_vars[i].offset - dst_data->alloc_disp +
+                            dst_data->mic_offset +
+                            m_vars[i].disp,
+                            m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                        char *base = offload_get_src_base(m_vars[i].ptr,
+                                                          m_vars[i].type.src);
+                        res = COI::BufferWrite(
+                            dst_data->mic_buf,
+                            dst_data->mic_offset - dst_data->alloc_disp +
+                            m_vars[i].offset + m_vars[i].disp,
+                            base + m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            0, 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_write, res);
+                        }
+                    }
+
+                    ptr_sent += m_vars[i].size;
+                }
+                break;
+
+            default:
+                break;
+        }
+
+        // alloc field isn't used at target.
+        // We can reuse it for offset of array pointers.
+        if (m_vars_extra[i].is_arr_ptr_el) {
+            m_vars[i].ptr_arr_offset = m_vars_extra[i].ptr_arr_offset;
+        }
+    }
+
+    if (m_status) {
+        m_status->data_sent += ptr_sent;
+    }
+
+    OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), ptr_sent);
+    OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
+                  c_offload_sent_pointer_data,
+                  "Total pointer data sent to target: [%lld] bytes\n",
+                  ptr_sent);
+
+    return true;
+}
+
+bool OffloadDescriptor::gather_copyin_data()
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_gather_inputs);
+
+    if (m_need_runfunction && m_in_datalen > 0) {
+        COIMAPINSTANCE map_inst;
+        char *data;
+
+        // init marshaller
+        if (m_inout_buf != 0) {
+            OffloadTimer timer_map(get_timer_data(),
+                                   c_offload_host_map_in_data_buffer);
+
+            COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_in_datalen,
+                                           COI_MAP_WRITE_ENTIRE_BUFFER,
+                                           0, 0, 0, &map_inst,
+                                           reinterpret_cast<void**>(&data));
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_map, res);
+            }
+        }
+        else {
+            data = (char*) m_func_desc + m_func_desc->data_offset;
+        }
+
+        // send variable descriptors
+        memcpy(data, m_vars, m_vars_total * sizeof(VarDesc));
+        data += m_vars_total * sizeof(VarDesc);
+
+        // init marshaller
+        m_in.init_buffer(data, m_in_datalen);
+
+        // Gather copy data into buffer
+        for (int i = 0; i < m_vars_total; i++) {
+            bool src_is_for_mic = (m_vars[i].direction.out ||
+                                   m_vars[i].into == NULL);
+            PtrData* ptr_data = src_is_for_mic ?
+                                m_vars_extra[i].src_data :
+                                m_vars_extra[i].dst_data;
+            if (m_vars[i].flags.alloc_disp) {
+                m_in.send_data(&ptr_data->alloc_disp,
+                               sizeof(ptr_data->alloc_disp));
+            }
+
+            // send sink address to the target
+            if (m_vars[i].flags.sink_addr) {
+                m_in.send_data(&ptr_data->mic_addr,
+                               sizeof(ptr_data->mic_addr));
+            }
+
+            switch (m_vars[i].type.dst) {
+                case c_data_ptr_array:
+                    break;
+                case c_data:
+                case c_void_ptr:
+                case c_cean_var:
+                    if (m_vars[i].direction.in &&
+                        !m_vars[i].flags.is_static_dstn) {
+
+                        char *ptr = offload_get_src_base(m_vars[i].ptr,
+                                                         m_vars[i].type.src);
+                        if (m_vars[i].type.dst == c_cean_var) {
+                            // offset and length are derived from the array
+                            // descriptor
+                            int64_t size = m_vars[i].size;
+                            int64_t disp = m_vars[i].disp;
+                            m_in.send_data(reinterpret_cast<char*>(&size),
+                                           sizeof(int64_t));
+                            m_in.send_data(reinterpret_cast<char*>(&disp),
+                                           sizeof(int64_t));
+                        }
+
+                        m_in.send_data(ptr + m_vars_extra[i].cpu_disp,
+                                       m_vars[i].size);
+                    }
+                    break;
+
+                case c_dv:
+                    if (m_vars[i].direction.bits ||
+                        m_vars[i].alloc_if ||
+                        m_vars[i].free_if) {
+                        // send dope vector excluding base
+                        char *ptr = static_cast<char*>(m_vars[i].ptr);
+                        m_in.send_data(ptr + sizeof(uint64_t),
+                                       m_vars[i].size - sizeof(uint64_t));
+                    }
+                    break;
+
+                case c_data_ptr:
+                    // send to target addresses of obsolete
+                    // stacks to be released
+                    if (m_vars[i].flags.is_stack_buf &&
+                        !m_vars[i].direction.bits &&
+                        m_vars[i].alloc_if &&
+                        m_vars[i].size != 0) {
+                        for (PtrDataList::iterator it =
+                            m_destroy_stack.begin();
+                            it != m_destroy_stack.end(); it++) {
+                            PtrData * ptr_data = *it;
+                            m_in.send_data(&(ptr_data->mic_addr),
+                                sizeof(ptr_data->mic_addr));
+                        }
+                    }
+                    break;
+                case c_func_ptr:
+                    if (m_vars[i].direction.in) {
+                        m_in.send_func_ptr(*((const void**) m_vars[i].ptr));
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+        }
+
+        if (m_status) {
+            m_status->data_sent += m_in.get_tfr_size();
+        }
+
+        if (m_func_desc->data_offset == 0) {
+            OffloadTimer timer_unmap(get_timer_data(),
+                                     c_offload_host_unmap_in_data_buffer);
+            COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_unmap, res);
+            }
+        }
+    }
+
+    OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), m_in.get_tfr_size());
+    OFFLOAD_DEBUG_TRACE_1(1,
+                  GET_OFFLOAD_NUMBER(get_timer_data()), c_offload_copyin_data,
+                  "Total copyin data sent to target: [%lld] bytes\n",
+                  m_in.get_tfr_size());
+
+    return true;
+}
+
+bool OffloadDescriptor::compute()
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_start_compute);
+
+    if (m_need_runfunction) {
+        OFFLOAD_DEBUG_TRACE_1(2, GET_OFFLOAD_NUMBER(get_timer_data()),
+                              c_offload_compute, "Compute task on MIC\n");
+
+        void* misc = m_func_desc;
+        int   misc_len = m_func_desc_size;
+        void* ret = 0;
+        int   ret_len = 0;
+
+        if (m_func_desc->data_offset != 0) {
+            misc_len += m_in_datalen;
+
+            if (m_out_datalen > 0) {
+                ret = (char*) m_func_desc + m_func_desc->data_offset;
+                ret_len = m_out_datalen;
+            }
+        }
+
+        // dispatch task
+        COIRESULT res;
+        COIEVENT event;
+        res = m_device.compute(m_compute_buffers,
+                               misc, misc_len,
+                               ret, ret_len,
+                               m_in_deps_total,
+                               m_in_deps_total > 0 ? m_in_deps : 0,
+                               &event);
+        if (res != COI_SUCCESS) {
+            if (m_status != 0) {
+                m_status->result = translate_coi_error(res);
+                return false;
+            }
+            report_coi_error(c_pipeline_run_func, res);
+        }
+
+        m_in_deps_total = 1;
+        m_in_deps[0] = event;
+    }
+
+    return true;
+}
+
+// receive pointer data if source or destination or both of them are
+// noncontiguous. There is guarantee that length of destination enough for
+// transferred data.
+bool OffloadDescriptor::receive_noncontiguous_pointer_data(
+    int i,
+    char* base,
+    COIBUFFER dst_buf,
+    COIEVENT *event
+)
+{
+    int64_t offset_src, offset_dst;
+    int64_t length_src, length_dst;
+    int64_t length_src_cur, length_dst_cur;
+    int64_t receive_size, data_received = 0;
+    COIRESULT res;
+    bool dst_is_empty = true;
+    bool src_is_empty = true;
+
+    // Set length_src and length_dst
+    length_src = (m_vars_extra[i].read_rng_src) ?
+        m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
+    length_dst = !m_vars[i].into ? length_src :
+                     (m_vars_extra[i].read_rng_dst) ?
+                     m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
+    receive_size = (length_src < length_dst) ? length_src : length_dst;
+
+    // consequently get contiguous ranges,
+    // define corresponded destination offset and receive data
+    do {
+        // get sorce offset
+        if (src_is_empty) {
+            if (m_vars_extra[i].read_rng_src) {
+                if (!get_next_range(m_vars_extra[i].read_rng_src,
+                         &offset_src)) {
+                    // source ranges are over - nothing to send
+                    break;
+                }
+            }
+            else if (data_received == 0) {
+                offset_src = 0;
+            }
+            else {
+                break;
+            }
+            length_src_cur = length_src;
+        }
+        else {
+            // if source is contiguous or its contiguous range is greater
+            // than destination one
+            offset_src += receive_size;
+        }
+        length_src_cur -= receive_size;
+        src_is_empty = length_src_cur == 0;
+
+        // get destination offset
+        if (dst_is_empty) {
+            if (m_vars[i].into) {
+                if (m_vars_extra[i].read_rng_dst) {
+                    if (!get_next_range(m_vars_extra[i].read_rng_dst,
+                             &offset_dst)) {
+                        // destination ranges are over
+                        LIBOFFLOAD_ERROR(c_destination_is_over);
+                        return false;
+                    }
+                }
+                // destination is contiguous.
+                else {
+                    offset_dst = m_vars_extra[i].cpu_disp;
+                }
+                length_dst_cur = length_dst;
+            }
+            // same as source
+            else {
+                offset_dst = offset_src;
+                length_dst_cur = length_src;
+            }
+        }
+        else {
+            // if destination is contiguous or its contiguous range is greater
+            // than source one
+            offset_dst += receive_size;
+        }
+        length_dst_cur -= receive_size;
+        dst_is_empty = length_dst_cur == 0;
+
+        if (dst_buf != 0) {
+            res = COI::BufferCopy(
+                dst_buf,
+                m_vars_extra[i].src_data->mic_buf,
+                m_vars_extra[i].cpu_offset + offset_dst,
+                m_vars[i].offset + offset_src +
+                m_vars[i].mic_offset -
+                m_vars_extra[i].src_data->alloc_disp,
+                receive_size,
+                COI_COPY_UNSPECIFIED,
+                m_in_deps_total,
+                m_in_deps_total > 0 ? m_in_deps : 0,
+                event);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_copy, res);
+            }
+        }
+        else {
+            res = COI::BufferRead(
+                m_vars_extra[i].src_data->mic_buf,
+                m_vars[i].offset + offset_src +
+                m_vars[i].mic_offset -
+                m_vars_extra[i].src_data->alloc_disp,
+                base + offset_dst,
+                receive_size,
+                COI_COPY_UNSPECIFIED,
+                m_in_deps_total,
+                m_in_deps_total > 0 ? m_in_deps : 0,
+                event);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_read, res);
+            }
+        }
+        data_received += receive_size;
+    }
+    while (true);
+    return true;
+}
+
+bool OffloadDescriptor::receive_pointer_data(bool is_async)
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_start_buffers_reads);
+
+    uint64_t ptr_received = 0;
+    COIRESULT res;
+
+    for (int i = 0; i < m_vars_total; i++) {
+        switch (m_vars[i].type.src) {
+            case c_data_ptr_array:
+                break;
+            case c_data:
+            case c_void_ptr:
+            case c_cean_var:
+                if (m_vars[i].direction.out &&
+                    m_vars[i].flags.is_static) {
+                    COIEVENT *event =
+                        (is_async ||
+                         m_in_deps_total > 0 ||
+                         m_vars[i].size >= __offload_use_async_buffer_read) ?
+                        &m_out_deps[m_out_deps_total++] : 0;
+                    PtrData *ptr_data = NULL;
+                    COIBUFFER dst_buf = NULL; // buffer at host
+                    char *base;
+
+                    if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
+                        ptr_data = m_vars[i].into ?
+                                   m_vars_extra[i].dst_data :
+                                   m_vars_extra[i].src_data;
+                    }
+                    else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
+                        if (m_vars[i].flags.is_static_dstn) {
+                            ptr_data = m_vars[i].into ?
+                                       m_vars_extra[i].dst_data :
+                                       m_vars_extra[i].src_data;
+                        }
+                    }
+                    dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
+                    if (dst_buf == NULL) {
+                        base = offload_get_src_base(
+                            m_vars[i].into ?
+                            static_cast<char*>(m_vars[i].into) :
+                            static_cast<char*>(m_vars[i].ptr),
+                            m_vars[i].type.dst);
+                    }
+
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        receive_noncontiguous_pointer_data(
+                            i, base, dst_buf, event);
+                    }
+                    else if (dst_buf != 0) {
+                        res = COI::BufferCopy(
+                            dst_buf,
+                            m_vars_extra[i].src_data->mic_buf,
+                            m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].offset + m_vars[i].disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            m_in_deps_total,
+                            m_in_deps_total > 0 ? m_in_deps : 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                       res = COI::BufferRead(
+                            m_vars_extra[i].src_data->mic_buf,
+                            m_vars[i].offset + m_vars[i].disp,
+                            base + m_vars_extra[i].cpu_offset +
+                            m_vars_extra[i].cpu_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            m_in_deps_total,
+                            m_in_deps_total > 0 ? m_in_deps : 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_read, res);
+                        }
+                    }
+                    ptr_received += m_vars[i].size;
+                }
+                break;
+
+            case c_string_ptr:
+            case c_data_ptr:
+            case c_cean_var_ptr:
+            case c_dv_data:
+            case c_dv_ptr_data:
+            case c_dv_data_slice:
+            case c_dv_ptr_data_slice:
+            case c_dv_ptr: {
+                COIBUFFER dst_buf = NULL; // buffer on host
+                if (m_vars[i].direction.out && m_vars[i].size > 0) {
+                    COIEVENT *event =
+                        (is_async ||
+                         m_in_deps_total > 0 ||
+                         m_vars[i].size >= __offload_use_async_buffer_read) ?
+                        &m_out_deps[m_out_deps_total++] : 0;
+
+                    uint64_t dst_offset = 0;
+                    char *base = static_cast<char*>(m_vars[i].ptr);
+
+                    if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
+                        PtrData *ptr_data = m_vars[i].into ?
+                                            m_vars_extra[i].dst_data :
+                                            m_vars_extra[i].src_data;
+                        dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
+                        if (dst_buf == NULL) {
+                            base = m_vars[i].into ?
+                                   *static_cast<char**>(m_vars[i].into) :
+                                   *static_cast<char**>(m_vars[i].ptr);
+                        }
+                        dst_offset = m_vars_extra[i].cpu_offset +
+                                     m_vars_extra[i].cpu_disp;
+                    }
+                    else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
+                        if (m_vars[i].flags.is_static_dstn) {
+                            dst_buf = m_vars[i].into ?
+                                        m_vars_extra[i].dst_data->cpu_buf :
+                                        m_vars_extra[i].src_data->cpu_buf;
+                        }
+                        if (dst_buf == NULL) {
+                            base = offload_get_src_base(
+                                m_vars[i].into ?
+                                static_cast<char*>(m_vars[i].into) :
+                                static_cast<char*>(m_vars[i].ptr),
+                                m_vars[i].type.dst);
+                        }
+                        dst_offset = m_vars_extra[i].cpu_offset +
+                                     m_vars_extra[i].cpu_disp;
+                    }
+                    else if (VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst) ||
+                             VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
+                        PtrData *ptr_data = m_vars[i].into != 0 ?
+                                            m_vars_extra[i].dst_data :
+                                            m_vars_extra[i].src_data;
+                        dst_buf = ptr_data != 0 ? ptr_data->cpu_buf : 0;
+                        if (dst_buf == NULL) {
+                            base = offload_get_src_base(
+                                m_vars[i].into ?
+                                static_cast<char*>(m_vars[i].into) :
+                                static_cast<char*>(m_vars[i].ptr),
+                                m_vars[i].type.dst);
+
+                        }
+                        dst_offset = m_vars_extra[i].cpu_offset +
+                                     m_vars_extra[i].cpu_disp;
+                    }
+
+                    if (m_vars[i].flags.is_noncont_src ||
+                        m_vars[i].flags.is_noncont_dst) {
+                        receive_noncontiguous_pointer_data(
+                            i, base, dst_buf, event);
+                    }
+                    else if (dst_buf != 0) {
+                        res = COI::BufferCopy(
+                            dst_buf,
+                            m_vars_extra[i].src_data->mic_buf,
+                            dst_offset,
+                            m_vars[i].offset + m_vars[i].disp +
+                                m_vars[i].mic_offset -
+                                m_vars_extra[i].src_data->alloc_disp,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            m_in_deps_total,
+                            m_in_deps_total > 0 ? m_in_deps : 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_copy, res);
+                        }
+                    }
+                    else {
+                        res = COI::BufferRead(
+                            m_vars_extra[i].src_data->mic_buf,
+                            m_vars[i].offset + m_vars[i].disp +
+                                m_vars[i].mic_offset -
+                                m_vars_extra[i].src_data->alloc_disp,
+                            base + dst_offset,
+                            m_vars[i].size,
+                            COI_COPY_UNSPECIFIED,
+                            m_in_deps_total,
+                            m_in_deps_total > 0 ? m_in_deps : 0,
+                            event);
+                        if (res != COI_SUCCESS) {
+                            if (m_status != 0) {
+                                m_status->result = translate_coi_error(res);
+                                return false;
+                            }
+                            report_coi_error(c_buf_read, res);
+                        }
+                    }
+                    ptr_received += m_vars[i].size;
+                }
+                break;
+            }
+
+            default:
+                break;
+        }
+
+        // destroy buffers for obsolete stacks
+        if (m_destroy_stack.size() != 0) {
+            for (PtrDataList::iterator it = m_destroy_stack.begin();
+                it != m_destroy_stack.end(); it++) {
+                PtrData *ptr_data = *it;
+                m_destroy_buffers.push_back(ptr_data->mic_buf);
+                OFFLOAD_TRACE(3, "Removing stack buffer with addr %p\n",
+                                  ptr_data->mic_addr);
+            }
+            m_destroy_stack.clear();
+        }
+        if (m_vars[i].free_if) {
+            // remove association for automatic variables
+            if (m_is_openmp && !m_vars[i].flags.is_static &&
+                (m_vars[i].type.src == c_data ||
+                 m_vars[i].type.src == c_void_ptr ||
+                 m_vars[i].type.src == c_cean_var)) {
+                AutoData *auto_data = m_vars_extra[i].auto_data;
+                if (auto_data != 0 && auto_data->remove_reference() == 0) {
+                    m_device.remove_auto_data(auto_data->cpu_addr.start());
+                }
+            }
+
+            // destroy buffers
+            if (m_vars[i].direction.out || m_vars[i].into == NULL) {
+                if (!VAR_TYPE_IS_PTR(m_vars[i].type.src) &&
+                    !VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) &&
+                    !VAR_TYPE_IS_DV_DATA(m_vars[i].type.src)) {
+                    continue;
+                }
+
+                PtrData *ptr_data = m_vars_extra[i].src_data;
+                if (ptr_data->remove_reference() == 0) {
+                    // destroy buffers
+                    if (ptr_data->cpu_buf != 0) {
+                        m_destroy_buffers.push_back(ptr_data->cpu_buf);
+                    }
+                    if (ptr_data->mic_buf != 0) {
+                        m_destroy_buffers.push_back(ptr_data->mic_buf);
+                    }
+                    OFFLOAD_TRACE(3, "Removing association for addr %p\n",
+                                  ptr_data->cpu_addr.start());
+
+                    // remove association from map
+                    m_device.remove_ptr_data(ptr_data->cpu_addr.start());
+                }
+            }
+            else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) ||
+                     VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst) ||
+                     VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst)) {
+                PtrData *ptr_data = m_vars_extra[i].dst_data;
+                if (ptr_data->remove_reference() == 0) {
+                    // destroy buffers
+                    if (ptr_data->cpu_buf != 0) {
+                        m_destroy_buffers.push_back(ptr_data->cpu_buf);
+                    }
+                    if (ptr_data->mic_buf != 0) {
+                        m_destroy_buffers.push_back(ptr_data->mic_buf);
+                    }
+                    OFFLOAD_TRACE(3, "Removing association for addr %p\n",
+                                  ptr_data->cpu_addr.start());
+
+                    // remove association from map
+                    m_device.remove_ptr_data(ptr_data->cpu_addr.start());
+                }
+            }
+        }
+    }
+
+    if (m_status) {
+        m_status->data_received += ptr_received;
+    }
+
+    OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), ptr_received);
+    OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
+                  c_offload_received_pointer_data,
+                  "Total pointer data received from target: [%lld] bytes\n",
+                  ptr_received);
+
+    return true;
+}
+
+bool OffloadDescriptor::scatter_copyout_data()
+{
+    OffloadTimer timer(get_timer_data(), c_offload_host_scatter_outputs);
+
+    if (m_need_runfunction && m_out_datalen > 0) {
+
+        // total size that need to be transferred from target to host
+        COIMAPINSTANCE map_inst;
+        COIRESULT res;
+        char *data;
+
+        // output data buffer
+        if (m_func_desc->data_offset == 0) {
+            OffloadTimer timer_map(get_timer_data(),
+                                   c_offload_host_map_out_data_buffer);
+
+            COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_out_datalen,
+                                           COI_MAP_READ_ONLY, 0, 0, 0,
+                                           &map_inst,
+                                            reinterpret_cast<void**>(&data));
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_map, res);
+            }
+        }
+        else {
+            data = (char*) m_func_desc + m_func_desc->data_offset;
+        }
+
+        // get timing data
+        OFFLOAD_TIMER_TARGET_DATA(get_timer_data(), data);
+        data += OFFLOAD_TIMER_DATALEN();
+
+        // initialize output marshaller
+        m_out.init_buffer(data, m_out_datalen);
+
+        for (int i = 0; i < m_vars_total; i++) {
+            switch (m_vars[i].type.src) {
+                case c_data_ptr_array:
+                    break;
+                case c_data:
+                case c_void_ptr:
+                case c_cean_var:
+                    if (m_vars[i].direction.out &&
+                        !m_vars[i].flags.is_static) {
+
+                        if (m_vars[i].into) {
+                            char *ptr = offload_get_src_base(
+                                static_cast<char*>(m_vars[i].into),
+                                m_vars[i].type.dst);
+                            m_out.receive_data(ptr + m_vars_extra[i].cpu_disp,
+                                               m_vars[i].size);
+                        }
+                        else {
+                            m_out.receive_data(
+                                static_cast<char*>(m_vars[i].ptr) +
+                                    m_vars_extra[i].cpu_disp,
+                                m_vars[i].size);
+                        }
+                    }
+                    break;
+
+                case c_func_ptr:
+                    if (m_vars[i].direction.out) {
+                        m_out.receive_func_ptr((const void**) m_vars[i].ptr);
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+        }
+
+        if (m_status) {
+            m_status->data_received += m_out.get_tfr_size();
+        }
+
+        if (m_func_desc->data_offset == 0) {
+            OffloadTimer timer_unmap(get_timer_data(),
+                                     c_offload_host_unmap_out_data_buffer);
+
+            COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
+            if (res != COI_SUCCESS) {
+                if (m_status != 0) {
+                    m_status->result = translate_coi_error(res);
+                    return false;
+                }
+                report_coi_error(c_buf_unmap, res);
+            }
+        }
+    }
+
+    OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), m_out.get_tfr_size());
+    OFFLOAD_TRACE(1, "Total copyout data received from target: [%lld] bytes\n",
+                  m_out.get_tfr_size());
+
+    return true;
+}
+
+void get_arr_desc_numbers(
+    const arr_desc *ap,
+    int64_t el_size,
+    int64_t &offset,
+    int64_t &size,
+    int     &el_number,
+    CeanReadRanges* &ptr_ranges
+)
+{
+    if (is_arr_desc_contiguous(ap)) {
+        ptr_ranges = NULL;
+        __arr_data_offset_and_length(ap, offset, size);
+        el_number = size / el_size;
+    }
+    else {
+        ptr_ranges = init_read_ranges_arr_desc(ap);
+        el_number = (ptr_ranges->range_size / el_size) *
+                    ptr_ranges->range_max_number;
+        size = ptr_ranges->range_size;
+    }
+}
+
+arr_desc * make_arr_desc(
+    void*   ptr_val,
+    int64_t extent_start_val,
+    int64_t extent_elements_val,
+    int64_t size
+)
+{
+    arr_desc *res;
+    res = (arr_desc *)malloc(sizeof(arr_desc));
+    res->base = reinterpret_cast<int64_t>(ptr_val);
+    res->rank = 1;
+    res->dim[0].size = size;
+    res->dim[0].lindex = 0;
+    res->dim[0].lower = extent_start_val;
+    res->dim[0].upper = extent_elements_val + extent_start_val - 1;
+    res->dim[0].stride = 1;
+    return res;
+}
+
+bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
+{
+    int             pointers_number;
+    int             tmp_val;
+    int             new_index = m_vars_total;
+    const arr_desc *ap;
+    const VarDesc3 *vd3 = static_cast<const VarDesc3*>(m_vars[i].ptr);
+    int             flags = vd3->array_fields;
+    bool            src_is_for_mic = (m_vars[i].direction.out ||
+                                      m_vars[i].into == NULL);
+
+    ReadArrElements<void *>  ptr;
+    ReadArrElements<void *>  into;
+    ReadArrElements<int64_t> ext_start;
+    ReadArrElements<int64_t> ext_elements;
+    ReadArrElements<int64_t> align;
+    ReadArrElements<int64_t> alloc_if;
+    ReadArrElements<int64_t> free_if;
+    ReadArrElements<int64_t> into_start;
+    ReadArrElements<int64_t> into_elem;
+    ReadArrElements<int64_t> alloc_start;
+    ReadArrElements<int64_t> alloc_elem;
+
+
+    ap = static_cast<const arr_desc*>(vd3->ptr_array);
+
+    // "pointers_number" for total number of transferred pointers.
+    // For each of them we create new var_desc and put it at the bottom
+    // of the var_desc's array
+    get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size,
+        pointers_number, ptr.ranges);
+    ptr.base = reinterpret_cast<char*>(ap->base);
+
+    // 2. prepare memory for new var_descs
+    m_vars_total += pointers_number;
+    m_vars       = (VarDesc*)realloc(m_vars, m_vars_total * sizeof(VarDesc));
+    m_vars_extra =
+        (VarExtra*)realloc(m_vars_extra, m_vars_total * sizeof(VarExtra));
+    m_in_deps    =
+        (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * (m_vars_total + 1));
+    m_out_deps   =
+        (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_vars_total);
+
+    // 3. Prepare for reading new var_desc's fields
+    //    EXTENT START
+    if ((flags & (1<<flag_extent_start_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->extent_start);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, ext_start.offset,
+            ext_start.size, tmp_val, ext_start.ranges);
+        ext_start.base = reinterpret_cast<char*>(ap->base);
+        ext_start.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_extent_start_is_scalar)) != 0) {
+        ext_start.val = (int64_t)vd3->extent_start;
+    }
+    else {
+        ext_start.val = 0;
+    }
+
+    //    EXTENT ELEMENTS NUMBER
+    if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->extent_elements);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
+            ext_elements.offset, ext_elements.size,
+            tmp_val, ext_elements.ranges);
+        ext_elements.base = reinterpret_cast<char*>(ap->base);
+        ext_elements.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_extent_elements_is_scalar)) != 0) {
+        ext_elements.val = (int64_t)vd3->extent_elements;
+    }
+    else {
+        ext_elements.val = m_vars[i].count;
+    }
+
+    //    ALLOC_IF
+    if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_if.offset,
+            alloc_if.size, tmp_val, alloc_if.ranges);
+        alloc_if.base = reinterpret_cast<char*>(ap->base);
+        alloc_if.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
+            return false;
+        }
+    }
+    else {
+        alloc_if.val = m_vars[i].count;
+    }
+
+    //    FREE_IF
+    if ((flags & (1<<flag_free_if_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->free_if_array);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, free_if.offset,
+            free_if.size, tmp_val, free_if.ranges);
+        free_if.base = reinterpret_cast<char*>(ap->base);
+        free_if.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
+            return false;
+        }
+    }
+    else {
+        free_if.val = m_vars[i].count;
+    }
+
+    //    ALIGN
+
+    if ((flags & (1<<flag_align_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->align_array);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, align.offset,
+            align.size, tmp_val, align.ranges);
+        align.base = reinterpret_cast<char*>(ap->base);
+        align.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
+            return false;
+        }
+    }
+    else {
+        align.val = m_vars[i].align;
+    }
+
+    // 3.1 INTO
+
+    if (m_vars[i].into) {
+        ap = static_cast<const arr_desc*>(m_vars[i].into);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into.offset,
+            into.size, tmp_val, into.ranges);
+        into.base = reinterpret_cast<char*>(ap->base);
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
+            return false;
+        }
+    }
+
+    // 3.2 INTO_START
+
+    if ((flags & (1<<flag_into_start_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->into_start);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_start.offset,
+            into_start.size, tmp_val, into_start.ranges);
+        into_start.base = reinterpret_cast<char*>(ap->base);
+        into_start.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_into_start_is_scalar)) != 0) {
+        into_start.val = (int64_t)vd3->into_start;
+    }
+    else {
+        into_start.val = 0;
+    }
+
+    // 3.3 INTO_ELEMENTS
+
+    if ((flags & (1<<flag_into_elements_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->into_elements);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_elem.offset,
+            into_elem.size, tmp_val, into_elem.ranges);
+        into_elem.base = reinterpret_cast<char*>(ap->base);
+        into_elem.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_into_elements_is_scalar)) != 0) {
+        into_elem.val = (int64_t)vd3->into_elements;
+    }
+    else {
+        into_elem.val = m_vars[i].count;
+    }
+
+    //    alloc_start
+
+    if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->alloc_start);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
+            alloc_start.offset, alloc_start.size, tmp_val,
+            alloc_start.ranges);
+        alloc_start.base = reinterpret_cast<char*>(ap->base);
+        alloc_start.el_size = ap->dim[ap->rank - 1].size;
+
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_alloc_start_is_scalar)) != 0) {
+        alloc_start.val = (int64_t)vd3->alloc_start;
+    }
+    else {
+        alloc_start.val = 0;
+    }
+
+    //    alloc_elem
+
+    if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
+        ap = static_cast<const arr_desc*>(vd3->alloc_elements);
+        get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_elem.offset,
+            alloc_elem.size, tmp_val, alloc_elem.ranges);
+        alloc_elem.base = reinterpret_cast<char*>(ap->base);
+        alloc_elem.el_size = ap->dim[ap->rank - 1].size;
+        if (tmp_val < pointers_number) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch,
+                             "alloc_extent elements");
+            return false;
+        }
+    }
+    else if ((flags & (1<<flag_alloc_elements_is_scalar)) != 0) {
+        alloc_elem.val = (int64_t)vd3->alloc_elements;
+    }
+    else {
+        alloc_elem.val = 0;
+    }
+
+    for (int k = 0; k < pointers_number; k++) {
+        int type = flags & 0x3f;
+        int type_src, type_dst;
+        //  Get new values
+        // type_src, type_dst
+        type_src = type_dst = (type == c_data_ptr_array) ?
+                              c_data_ptr   : (type == c_func_ptr_array) ?
+                              c_func_ptr   : (type == c_void_ptr_array) ?
+                              c_void_ptr   : (type == c_string_ptr_array) ?
+                              c_string_ptr : 0;
+
+        // Get ptr val
+        if (!ptr.read_next(true)) {
+            break;
+        }
+        else {
+            ptr.val = (void*)(ptr.base + ptr.offset);
+        }
+
+        // !!! If we got error at phase of reading - it's an internal
+        // !!! error, as we must detect mismatch before
+
+        // Get into val
+        if (m_vars[i].into) {
+            if (!into.read_next(true)) {
+                LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
+                LIBOFFLOAD_ABORT;
+            }
+            else {
+                into.val = (void*)(into.base + into.offset);
+            }
+        }
+
+        // Get other components of the clause
+        if (!ext_start.read_next(flags & (1<<flag_extent_start_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!ext_elements.read_next(
+                flags & (1<<flag_extent_elements_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!alloc_if.read_next(flags & (1<<flag_alloc_if_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!free_if.read_next(flags & (1<<flag_free_if_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!align.read_next(flags & (1<<flag_align_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!into_start.read_next(flags & (1<<flag_into_start_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!into_elem.read_next(flags & (1<<flag_into_elements_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!alloc_start.read_next(flags & (1<<flag_alloc_start_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
+            LIBOFFLOAD_ABORT;
+        }
+        if (!alloc_elem.read_next(
+                 flags & (1<<flag_alloc_elements_is_array))) {
+            LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent elements");
+            LIBOFFLOAD_ABORT;
+        }
+
+        m_vars[new_index + k].direction.bits = m_vars[i].direction.bits;
+        m_vars[new_index + k].alloc_if = alloc_if.val;
+        m_vars[new_index + k].free_if = free_if.val;
+        m_vars[new_index + k].align = align.val;
+        m_vars[new_index + k].mic_offset = 0;
+        m_vars[new_index + k].flags.bits = m_vars[i].flags.bits;
+        m_vars[new_index + k].offset = 0;
+        m_vars[new_index + k].size = m_vars[i].size;
+
+        if (ext_start.val == 0) {
+            m_vars[new_index + k].count = ext_elements.val;
+            m_vars[new_index + k].ptr = ptr.val;
+            if (type_src == c_string_ptr) {
+                m_vars[new_index + k].size = 0;
+            }
+        }
+        else {
+            m_vars[new_index + k].count = 0;
+            m_vars[new_index + k].ptr =
+                static_cast<void*>(make_arr_desc(
+                ptr.val,
+                ext_start.val,
+                ext_elements.val,
+                m_vars[i].size));
+
+            type_src = type_src == c_data_ptr ? c_cean_var_ptr :
+                                   c_string_ptr ? c_cean_var_ptr :
+                                   type_src;
+            if (!m_vars[i].into) {
+                type_dst = type_src;
+            }
+        }
+
+        if (m_vars[i].into && into_elem.val != 0) {
+            m_vars[new_index + k].into =
+                static_cast<void*>(make_arr_desc(
+                into.val,
+                into_start.val,
+                into_elem.val,
+                m_vars[i].size));
+            type_dst = (type == c_data_ptr_array) ? c_cean_var_ptr :
+                       (type == c_string_ptr_array) ? c_cean_var_ptr :
+                        type_src;
+        }
+        else {
+            m_vars[new_index + k].into = NULL;
+        }
+
+        if (alloc_elem.val != 0) {
+            m_vars[new_index + k].alloc =
+                static_cast<void*>(make_arr_desc(
+                ptr.val,
+                alloc_start.val,
+                alloc_elem.val,
+                m_vars[i].size));
+        }
+        else {
+            m_vars[new_index + k].alloc = NULL;
+        }
+
+        m_vars[new_index + k].type.src = type_src;
+        m_vars[new_index + k].type.dst = type_dst;
+
+        m_vars_extra[new_index + k].is_arr_ptr_el = 1;
+        m_vars_extra[new_index + k].ptr_arr_offset =
+            src_is_for_mic ? ptr.offset : into.offset;
+    }
+    // count and alloc fields are useless at target. They can be reused
+    // for pointer arrays.
+    m_vars[i].count = pointers_number;
+    m_vars[i].ptr_arr_offset = new_index;
+    return true;
+}
+
+static void __offload_fini_library(void)
+{
+    OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ...\n");
+    if (mic_engines_total > 0) {
+        delete[] mic_engines;
+
+        if (mic_proxy_fs_root != 0) {
+            free(mic_proxy_fs_root);
+            mic_proxy_fs_root = 0;
+        }
+
+        if (mic_library_path != 0) {
+            free(mic_library_path);
+            mic_library_path = 0;
+        }
+
+        // destroy thread key
+        thread_key_delete(mic_thread_key);
+    }
+
+    // unload COI library
+    if (COI::is_available) {
+        COI::fini();
+    }
+
+    OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ... done\n");
+}
+
+static void __offload_init_library_once(void)
+{
+    COIRESULT res;
+    uint32_t num_devices;
+    std::bitset<MIC_ENGINES_MAX> devices;
+
+    prefix = report_get_message_str(c_report_host);
+
+    // initialize trace
+    const char *env_var = getenv(htrace_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t new_val;
+        if (__offload_parse_int_string(env_var, new_val)) {
+            console_enabled = new_val & 0x0f;
+        }
+    }
+
+    env_var = getenv(offload_report_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t env_val;
+        if (__offload_parse_int_string(env_var, env_val)) {
+            if (env_val == OFFLOAD_REPORT_1 ||
+                env_val == OFFLOAD_REPORT_2 ||
+                env_val == OFFLOAD_REPORT_3) {
+                offload_report_level = env_val;
+            }
+            else {
+                LIBOFFLOAD_ERROR(c_invalid_env_report_value,
+                                 offload_report_envname);
+            }
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
+                             offload_report_envname);
+        }
+    }
+    else if (!offload_report_level) {
+        env_var = getenv(timer_envname);
+        if (env_var != 0 && *env_var != '\0') {
+            timer_enabled = atoi(env_var);
+        }
+    }
+
+    // initialize COI
+    if (!COI::init()) {
+        return;
+    }
+
+    // get number of devices installed in the system
+    res = COI::EngineGetCount(COI_ISA_KNC, &num_devices);
+    if (res != COI_SUCCESS) {
+        return;
+    }
+
+    if (num_devices > MIC_ENGINES_MAX) {
+        num_devices = MIC_ENGINES_MAX;
+    }
+
+    // fill in the list of devices that can be used for offloading
+    env_var = getenv("OFFLOAD_DEVICES");
+    if (env_var != 0) {
+        if (strcasecmp(env_var, "none") != 0) {
+            // value is composed of comma separated physical device indexes
+            char *buf = strdup(env_var);
+            char *str, *ptr;
+            for (str = strtok_r(buf, ",", &ptr); str != 0;
+                 str = strtok_r(0, ",", &ptr)) {
+                // convert string to an int
+                int64_t num;
+                if (!__offload_parse_int_string(str, num)) {
+                    LIBOFFLOAD_ERROR(c_mic_init5);
+
+                    // fallback to using all installed devices
+                    devices.reset();
+                    for (int i = 0; i < num_devices; i++) {
+                        devices.set(i);
+                    }
+                    break;
+                }
+                if (num < 0 || num >= num_devices) {
+                    LIBOFFLOAD_ERROR(c_mic_init6, num);
+                    continue;
+                }
+                devices.set(num);
+            }
+            free(buf);
+        }
+    }
+    else {
+        // use all available devices
+        for (int i = 0; i < num_devices; i++) {
+            COIENGINE engine;
+            res = COI::EngineGetHandle(COI_ISA_KNC, i, &engine);
+            if (res == COI_SUCCESS) {
+                devices.set(i);
+            }
+        }
+    }
+
+    mic_engines_total = devices.count();
+
+    // no need to continue if there are no devices to offload to
+    if (mic_engines_total <= 0) {
+        return;
+    }
+
+    // initialize indexes for available devices
+    mic_engines = new Engine[mic_engines_total];
+    for (int p_idx = 0, l_idx = 0; p_idx < num_devices; p_idx++) {
+        if (devices[p_idx]) {
+            mic_engines[l_idx].set_indexes(l_idx, p_idx);
+            l_idx++;
+        }
+    }
+
+    // library search path for device binaries
+    env_var = getenv("MIC_LD_LIBRARY_PATH");
+    if (env_var != 0) {
+        mic_library_path = strdup(env_var);
+    }
+
+    // memory size reserved for COI buffers
+    env_var = getenv("MIC_BUFFERSIZE");
+    if (env_var != 0) {
+        uint64_t new_size;
+        if (__offload_parse_size_string(env_var, new_size)) {
+            mic_buffer_size = new_size;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_BUFFERSIZE");
+        }
+    }
+
+    // determine stacksize for the pipeline on the device
+    env_var = getenv("MIC_STACKSIZE");
+    if (env_var != 0 && *env_var != '\0') {
+        uint64_t new_size;
+        if (__offload_parse_size_string(env_var, new_size) &&
+            (new_size >= 16384) && ((new_size & 4095) == 0)) {
+            mic_stack_size = new_size;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_mic_init3);
+        }
+    }
+
+    // proxy I/O
+    env_var = getenv("MIC_PROXY_IO");
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t new_val;
+        if (__offload_parse_int_string(env_var, new_val)) {
+            mic_proxy_io = new_val;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, "MIC_PROXY_IO");
+        }
+    }
+    env_var = getenv("MIC_PROXY_FS_ROOT");
+    if (env_var != 0 && *env_var != '\0') {
+        mic_proxy_fs_root = strdup(env_var);
+    }
+
+    // Prepare environment for the target process using the following
+    // rules
+    // - If MIC_ENV_PREFIX is set then any environment variable on the
+    //   host which has that prefix are copied to the device without
+    //   the prefix.
+    //   All other host environment variables are ignored.
+    // - If MIC_ENV_PREFIX is not set or if MIC_ENV_PREFIX="" then host
+    //   environment is duplicated.
+    env_var = getenv("MIC_ENV_PREFIX");
+    if (env_var != 0 && *env_var != '\0') {
+        mic_env_vars.set_prefix(env_var);
+
+        int len = strlen(env_var);
+        for (int i = 0; environ[i] != 0; i++) {
+            if (strncmp(environ[i], env_var, len) == 0 &&
+                strncmp(environ[i], "MIC_LD_LIBRARY_PATH", 19) != 0 &&
+                environ[i][len] != '=') {
+                mic_env_vars.analyze_env_var(environ[i]);
+            }
+        }
+    }
+
+    // create key for thread data
+    if (thread_key_create(&mic_thread_key, Engine::destroy_thread_data)) {
+        LIBOFFLOAD_ERROR(c_mic_init4, errno);
+        return;
+    }
+
+    // cpu frequency
+    cpu_frequency = COI::PerfGetCycleFrequency();
+
+    env_var = getenv(mic_use_2mb_buffers_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        uint64_t new_size;
+        if (__offload_parse_size_string(env_var, new_size)) {
+            __offload_use_2mb_buffers = new_size;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_value,
+                             mic_use_2mb_buffers_envname);
+        }
+    }
+
+    env_var = getenv(mic_use_async_buffer_write_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        uint64_t new_size;
+        if (__offload_parse_size_string(env_var, new_size)) {
+            __offload_use_async_buffer_write = new_size;
+        }
+    }
+
+    env_var = getenv(mic_use_async_buffer_read_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        uint64_t new_size;
+        if (__offload_parse_size_string(env_var, new_size)) {
+            __offload_use_async_buffer_read = new_size;
+        }
+    }
+
+    // mic initialization type
+    env_var = getenv(offload_init_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        if (strcmp(env_var, "on_offload") == 0) {
+            __offload_init_type = c_init_on_offload;
+        }
+        else if (strcmp(env_var, "on_offload_all") == 0) {
+            __offload_init_type = c_init_on_offload_all;
+        }
+#ifndef TARGET_WINNT
+        else if (strcmp(env_var, "on_start") == 0) {
+            __offload_init_type = c_init_on_start;
+        }
+#endif // TARGET_WINNT
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname);
+        }
+    }
+
+    // active wait
+    env_var = getenv(offload_active_wait_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t new_val;
+        if (__offload_parse_int_string(env_var, new_val)) {
+            __offload_active_wait = new_val;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
+                             offload_active_wait_envname);
+        }
+    }
+
+    // omp device num
+    env_var = getenv(omp_device_num_envname);
+    if (env_var != 0 && *env_var != '\0') {
+        int64_t new_val;
+        if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
+            __omp_device_num = new_val;
+        }
+        else {
+            LIBOFFLOAD_ERROR(c_omp_invalid_device_num_env,
+                             omp_device_num_envname);
+        }
+    }
+
+    // init ORSL
+    ORSL::init();
+}
+
+extern int __offload_init_library(void)
+{
+    // do one time intialization
+    static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
+    __offload_run_once(&ctrl, __offload_init_library_once);
+
+    // offload is available if COI is available and the number of devices > 0
+    bool is_available = COI::is_available && (mic_engines_total > 0);
+
+    // register pending libraries if there are any
+    if (is_available && __target_libs) {
+        mutex_locker_t locker(__target_libs_lock);
+
+        for (TargetImageList::iterator it = __target_libs_list.begin();
+             it != __target_libs_list.end(); it++) {
+            // Register library in COI
+            COI::ProcessRegisterLibraries(1, &it->data, &it->size,
+                                          &it->origin, &it->offset);
+
+            // add lib to all engines
+            for (int i = 0; i < mic_engines_total; i++) {
+                mic_engines[i].add_lib(*it);
+            }
+        }
+
+        __target_libs = false;
+        __target_libs_list.clear();
+    }
+
+    return is_available;
+}
+
+extern "C" void __offload_register_image(const void *target_image)
+{
+    const struct Image *image = static_cast<const struct Image*>(target_image);
+
+    // decode image
+    const char *name = image->data;
+    const void *data = image->data + strlen(image->data) + 1;
+    uint64_t    size = image->size;
+    const char *origin = 0;
+    uint64_t    offset = 0;
+
+    // our actions depend on the image type
+    const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
+    switch (hdr->e_type) {
+        case ET_EXEC:
+            // Each offload application is supposed to have only one target
+            // image representing target executable.
+            // No thread synchronization is required here as the initialization
+            // code is always executed in a single thread.
+            if (__target_exe != 0) {
+                LIBOFFLOAD_ERROR(c_multiple_target_exes);
+                exit(1);
+            }
+            __target_exe = new TargetImage(name, data, size, origin, offset);
+
+            // Registration code for execs is always called from the context
+            // of main and thus we can safely call any function here,
+            // including LoadLibrary API on windows. This is the place where
+            // we do the offload library initialization.
+            if (__offload_init_library()) {
+                // initialize engine if init_type is on_start
+                if (__offload_init_type == c_init_on_start) {
+                    for (int i = 0; i < mic_engines_total; i++) {
+                        mic_engines[i].init();
+                    }
+                }
+            }
+            break;
+
+        case ET_DYN:
+            // Registration code for libraries is called from the DllMain
+            // context (on windows) and thus we cannot do anything useful
+            // here. So we just add it to the list of pending libraries for
+            // the later use.
+            __target_libs_lock.lock();
+            __target_libs = true;
+            __target_libs_list.push_back(TargetImage(name, data, size,
+                                                     origin, offset));
+            __target_libs_lock.unlock();
+            break;
+
+        default:
+            // something is definitely wrong, issue an error and exit
+            LIBOFFLOAD_ERROR(c_unknown_binary_type);
+            exit(1);
+    }
+}
+
+extern "C" void __offload_unregister_image(const void *target_image)
+{
+    // Target image is packed as follows:
+    //      8 bytes                - size of the target binary
+    //      null-terminated string - binary name
+    //      <size> bytes           - binary contents
+    const struct Image {
+         int64_t size;
+         char data[];
+    } *image = static_cast<const struct Image*>(target_image);
+
+    // decode image
+    const char *name = image->data;
+    const void *data = image->data + strlen(image->data) + 1;
+
+    // our actions depend on the image type
+    const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
+    if (hdr->e_type == ET_EXEC) {
+        // We are executing exec's desctructors.
+        // It is time to do a library cleanup.
+        if (timer_enabled) {
+            Offload_Timer_Print();
+        }
+
+#ifdef MYO_SUPPORT
+        __offload_myoFini();
+#endif // MYO_SUPPORT
+
+        __offload_fini_library();
+    }
+}
+
+// Runtime trace interface for user programs
+
+void __offload_console_trace(int level)
+{
+    console_enabled = level;
+}
+
+// User-visible offload API
+
+int _Offload_number_of_devices(void)
+{
+    __offload_init_library();
+    return mic_engines_total;
+}
+
+int _Offload_get_device_number(void)
+{
+    return -1;
+}
+
+int _Offload_get_physical_device_number(void)
+{
+    return -1;
+}
+
+int _Offload_signaled(int index, void *signal)
+{
+    __offload_init_library();
+
+    // check index value
+    if (index < 0 || mic_engines_total <= 0) {
+        LIBOFFLOAD_ERROR(c_offload_signaled1, index);
+        LIBOFFLOAD_ABORT;
+    }
+
+    // find associated async task
+    OffloadDescriptor *task =
+        mic_engines[index % mic_engines_total].find_signal(signal, false);
+    if (task == 0) {
+        LIBOFFLOAD_ERROR(c_offload_signaled2, signal);
+        LIBOFFLOAD_ABORT;
+    }
+
+    return task->is_signaled();
+}
+
+void _Offload_report(int val)
+{
+    if (val == OFFLOAD_REPORT_ON ||
+        val == OFFLOAD_REPORT_OFF) {
+        offload_report_enabled = val;
+    }
+}
+
+// IDB support
+int   __dbg_is_attached = 0;
+int   __dbg_target_id = -1;
+pid_t __dbg_target_so_pid = -1;
+char  __dbg_target_exe_name[MAX_TARGET_NAME] = {0};
+const int __dbg_api_major_version = 1;
+const int __dbg_api_minor_version = 0;
+
+void __dbg_target_so_loaded()
+{
+}
+void __dbg_target_so_unloaded()
+{
+}
diff --git a/final/offload/src/offload_host.h b/final/offload/src/offload_host.h
new file mode 100644
index 0000000..ea23996
--- /dev/null
+++ b/final/offload/src/offload_host.h
@@ -0,0 +1,343 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+/*! \file
+    \brief The parts of the runtime library used only on the host
+*/
+
+#ifndef OFFLOAD_HOST_H_INCLUDED
+#define OFFLOAD_HOST_H_INCLUDED
+
+#ifndef TARGET_WINNT
+#include <unistd.h>
+#endif // TARGET_WINNT
+#include "offload_common.h"
+#include "offload_util.h"
+#include "offload_engine.h"
+#include "offload_env.h"
+#include "offload_orsl.h"
+#include "coi/coi_client.h"
+
+// MIC engines.
+extern Engine*  mic_engines;
+extern uint32_t mic_engines_total;
+
+//! The target image is packed as follows.
+/*!      1. 8 bytes containing the size of the target binary          */
+/*!      2. a null-terminated string which is the binary name         */
+/*!      3. <size> number of bytes that are the contents of the image */
+/*!      The address of symbol __offload_target_image
+             is the address of this structure.                        */
+struct Image {
+     int64_t size; //!< Size in bytes of the target binary name and contents
+     char data[];  //!< The name and contents of the target image
+};
+
+// The offload descriptor.
+class OffloadDescriptor
+{
+public:
+    OffloadDescriptor(
+        int index,
+        _Offload_status *status,
+        bool is_mandatory,
+        bool is_openmp,
+        OffloadHostTimerData * timer_data
+    ) :
+        m_device(mic_engines[index % mic_engines_total]),
+        m_is_mandatory(is_mandatory),
+        m_is_openmp(is_openmp),
+        m_inout_buf(0),
+        m_func_desc(0),
+        m_func_desc_size(0),
+        m_in_deps(0),
+        m_in_deps_total(0),
+        m_out_deps(0),
+        m_out_deps_total(0),
+        m_vars(0),
+        m_vars_extra(0),
+        m_status(status),
+        m_timer_data(timer_data)
+    {}
+
+    ~OffloadDescriptor()
+    {
+        if (m_in_deps != 0) {
+            free(m_in_deps);
+        }
+        if (m_out_deps != 0) {
+            free(m_out_deps);
+        }
+        if (m_func_desc != 0) {
+            free(m_func_desc);
+        }
+        if (m_vars != 0) {
+            free(m_vars);
+            free(m_vars_extra);
+        }
+    }
+
+    bool offload(const char *name, bool is_empty,
+                 VarDesc *vars, VarDesc2 *vars2, int vars_total,
+                 const void **waits, int num_waits, const void **signal,
+                 int entry_id, const void *stack_addr);
+    bool offload_finish();
+
+    bool is_signaled();
+
+    OffloadHostTimerData* get_timer_data() const {
+        return m_timer_data;
+    }
+
+private:
+    bool wait_dependencies(const void **waits, int num_waits);
+    bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total,
+                           int entry_id, const void *stack_addr);
+    bool setup_misc_data(const char *name);
+    bool send_pointer_data(bool is_async);
+    bool send_noncontiguous_pointer_data(
+        int i,
+        PtrData* src_buf,
+        PtrData* dst_buf,
+        COIEVENT *event);
+    bool receive_noncontiguous_pointer_data(
+        int i,
+        char* src_data,
+        COIBUFFER dst_buf,
+        COIEVENT *event);
+
+    bool gather_copyin_data();
+
+    bool compute();
+
+    bool receive_pointer_data(bool is_async);
+    bool scatter_copyout_data();
+
+    void cleanup();
+
+    bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
+                       int64_t length, bool error_does_not_exist = true);
+    bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
+                        int64_t length, int64_t alloc_disp, int align);
+    bool init_static_ptr_data(PtrData *ptr_data);
+    bool init_mic_address(PtrData *ptr_data);
+    bool offload_stack_memory_manager(const void * stack_begin, int routine_id,
+                                      int buf_size, int align, bool *is_new);
+    bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size);
+
+    bool gen_var_descs_for_pointer_array(int i);
+
+    void report_coi_error(error_types msg, COIRESULT res);
+    _Offload_result translate_coi_error(COIRESULT res) const;
+
+private:
+    typedef std::list<COIBUFFER> BufferList;
+
+    // extra data associated with each variable descriptor
+    struct VarExtra {
+        PtrData* src_data;
+        PtrData* dst_data;
+        AutoData* auto_data;
+        int64_t cpu_disp;
+        int64_t cpu_offset;
+        CeanReadRanges *read_rng_src;
+        CeanReadRanges *read_rng_dst;
+        int64_t ptr_arr_offset;
+        bool is_arr_ptr_el;
+    };
+
+    template<typename T> class ReadArrElements {
+    public:
+        ReadArrElements():
+            ranges(NULL),
+            el_size(sizeof(T)),
+            offset(0),
+            count(0),
+            is_empty(true),
+            base(NULL)
+        {}
+
+        bool read_next(bool flag)
+        {
+            if (flag != 0) {
+                if (is_empty) {
+                    if (ranges) {
+                        if (!get_next_range(ranges, &offset)) {
+                            // ranges are over
+                            return false;
+                        }
+                    }
+                    // all contiguous elements are over
+                    else if (count != 0) {
+                        return false;
+                    }
+
+                    length_cur = size;
+                }
+                else {
+                    offset += el_size;
+                }
+                val = (T)get_el_value(base, offset, el_size);
+                length_cur -= el_size;
+                count++;
+                is_empty = length_cur == 0;
+            }
+            return true;
+        }
+    public:
+        CeanReadRanges * ranges;
+        T       val;
+        int     el_size;
+        int64_t size,
+                offset,
+                length_cur;
+        bool    is_empty;
+        int     count;
+        char   *base;
+    };
+
+    // ptr_data for persistent auto objects
+    PtrData*    m_stack_ptr_data;
+    PtrDataList m_destroy_stack;
+
+    // Engine
+    Engine& m_device;
+
+    // if true offload is mandatory
+    bool m_is_mandatory;
+
+    // if true offload has openmp origin
+    const bool m_is_openmp;
+
+    // The Marshaller for the inputs of the offloaded region.
+    Marshaller m_in;
+
+    // The Marshaller for the outputs of the offloaded region.
+    Marshaller m_out;
+
+    // List of buffers that are passed to dispatch call
+    BufferList m_compute_buffers;
+
+    // List of buffers that need to be destroyed at the end of offload
+    BufferList m_destroy_buffers;
+
+    // Variable descriptors
+    VarDesc*  m_vars;
+    VarExtra* m_vars_extra;
+    int       m_vars_total;
+
+    // Pointer to a user-specified status variable
+    _Offload_status *m_status;
+
+    // Function descriptor
+    FunctionDescriptor* m_func_desc;
+    uint32_t            m_func_desc_size;
+
+    // Buffer for transferring copyin/copyout data
+    COIBUFFER m_inout_buf;
+
+    // Dependencies
+    COIEVENT *m_in_deps;
+    uint32_t  m_in_deps_total;
+    COIEVENT *m_out_deps;
+    uint32_t  m_out_deps_total;
+
+    // Timer data
+    OffloadHostTimerData *m_timer_data;
+
+    // copyin/copyout data length
+    uint64_t m_in_datalen;
+    uint64_t m_out_datalen;
+
+    // a boolean value calculated in setup_descriptors. If true we need to do
+    // a run function on the target. Otherwise it may be optimized away.
+    bool m_need_runfunction;
+};
+
+// Initialization types for MIC
+enum OffloadInitType {
+    c_init_on_start,         // all devices before entering main
+    c_init_on_offload,       // single device before starting the first offload
+    c_init_on_offload_all    // all devices before starting the first offload
+};
+
+// Initializes library and registers specified offload image.
+extern "C" void __offload_register_image(const void* image);
+extern "C" void __offload_unregister_image(const void* image);
+
+// Initializes offload runtime library.
+extern int __offload_init_library(void);
+
+// thread data for associating pipelines with threads
+extern pthread_key_t mic_thread_key;
+
+// Environment variables for devices
+extern MicEnvVar mic_env_vars;
+
+// CPU frequency
+extern uint64_t cpu_frequency;
+
+// LD_LIBRARY_PATH for MIC libraries
+extern char* mic_library_path;
+
+// stack size for target
+extern uint32_t mic_stack_size;
+
+// Preallocated memory size for buffers on MIC
+extern uint64_t mic_buffer_size;
+
+// Setting controlling inout proxy
+extern bool  mic_proxy_io;
+extern char* mic_proxy_fs_root;
+
+// Threshold for creating buffers with large pages
+extern uint64_t __offload_use_2mb_buffers;
+
+// offload initialization type
+extern OffloadInitType __offload_init_type;
+
+// Device number to offload to when device is not explicitly specified.
+extern int __omp_device_num;
+
+// target executable
+extern TargetImage* __target_exe;
+
+// IDB support
+
+// Called by the offload runtime after initialization of offload infrastructure
+// has been completed.
+extern "C" void  __dbg_target_so_loaded();
+
+// Called by the offload runtime when the offload infrastructure is about to be
+// shut down, currently at application exit.
+extern "C" void  __dbg_target_so_unloaded();
+
+// Null-terminated string containing path to the process image of the hosting
+// application (offload_main)
+#define MAX_TARGET_NAME 512
+extern "C" char  __dbg_target_exe_name[MAX_TARGET_NAME];
+
+// Integer specifying the process id
+extern "C" pid_t __dbg_target_so_pid;
+
+// Integer specifying the 0-based device number
+extern "C" int   __dbg_target_id;
+
+// Set to non-zero by the host-side debugger to enable offload debugging
+// support
+extern "C" int   __dbg_is_attached;
+
+// Major version of the debugger support API
+extern "C" const int __dbg_api_major_version;
+
+// Minor version of the debugger support API
+extern "C" const int __dbg_api_minor_version;
+
+#endif // OFFLOAD_HOST_H_INCLUDED
diff --git a/final/offload/src/offload_myo_host.cpp b/final/offload/src/offload_myo_host.cpp
new file mode 100644
index 0000000..2e1c186
--- /dev/null
+++ b/final/offload/src/offload_myo_host.cpp
@@ -0,0 +1,805 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "offload_myo_host.h"
+#include <errno.h>
+#include <malloc.h>
+#include "offload_host.h"
+
+#if defined(LINUX) || defined(FREEBSD)
+#include <mm_malloc.h>
+#endif
+
+#define MYO_VERSION1    "MYO_1.0"
+
+extern "C" void __cilkrts_cilk_for_32(void*, void*, uint32_t, int32_t);
+extern "C" void __cilkrts_cilk_for_64(void*, void*, uint64_t, int32_t);
+
+#ifndef TARGET_WINNT
+#pragma weak __cilkrts_cilk_for_32
+#pragma weak __cilkrts_cilk_for_64
+#endif // TARGET_WINNT
+
+#ifdef TARGET_WINNT
+#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(-1)
+#else // TARGET_WINNT
+#define MYO_TABLE_END_MARKER() reinterpret_cast<const char*>(0)
+#endif // TARGET_WINNT
+
+class MyoWrapper {
+public:
+    MyoWrapper() : m_lib_handle(0), m_is_available(false)
+    {}
+
+    bool is_available() const {
+        return m_is_available;
+    }
+
+    bool LoadLibrary(void);
+
+    // unloads the library
+    void UnloadLibrary(void) {
+//        if (m_lib_handle != 0) {
+//            DL_close(m_lib_handle);
+//            m_lib_handle = 0;
+//        }
+    }
+
+    // Wrappers for MYO client functions
+    void LibInit(void *arg, void *func) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoinit,
+                                 "%s(%p, %p)\n", __func__, arg, func);
+        CheckResult(__func__, m_lib_init(arg, func));
+    }
+
+    void LibFini(void) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myofini, "%s()\n", __func__);
+        m_lib_fini();
+    }
+
+    void* SharedMalloc(size_t size) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedmalloc,
+                                 "%s(%lld)\n", __func__, size);
+        return m_shared_malloc(size);
+    }
+
+    void SharedFree(void *ptr) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedfree,
+                                 "%s(%p)\n", __func__, ptr);
+        m_shared_free(ptr);
+    }
+
+    void* SharedAlignedMalloc(size_t size, size_t align) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedmalloc,
+                                 "%s(%lld, %lld)\n", __func__, size, align);
+        return m_shared_aligned_malloc(size, align);
+    }
+
+    void SharedAlignedFree(void *ptr) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myosharedalignedfree,
+                              "%s(%p)\n", __func__, ptr);
+        m_shared_aligned_free(ptr);
+    }
+
+    void Acquire(void) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoacquire,
+                              "%s()\n", __func__);
+        CheckResult(__func__, m_acquire());
+    }
+
+    void Release(void) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myorelease,
+                            "%s()\n", __func__);
+        CheckResult(__func__, m_release());
+    }
+
+    void HostVarTablePropagate(void *table, int num_entries) const {
+        OFFLOAD_DEBUG_TRACE(4, "%s(%p, %d)\n", __func__, table, num_entries);
+        CheckResult(__func__, m_host_var_table_propagate(table, num_entries));
+    }
+
+    void HostFptrTableRegister(void *table, int num_entries,
+                               int ordered) const {
+        OFFLOAD_DEBUG_TRACE_1(4, 0, c_offload_myoregister,
+                            "%s(%p, %d, %d)\n", __func__, table,
+                            num_entries, ordered);
+        CheckResult(__func__,
+                    m_host_fptr_table_register(table, num_entries, ordered));
+    }
+
+    void RemoteThunkCall(void *thunk, void *args, int device) {
+        OFFLOAD_DEBUG_TRACE(4, "%s(%p, %p, %d)\n", __func__, thunk, args,
+                            device);
+        CheckResult(__func__, m_remote_thunk_call(thunk, args, device));
+    }
+
+    MyoiRFuncCallHandle RemoteCall(char *func, void *args, int device) const {
+        OFFLOAD_DEBUG_TRACE(4, "%s(%s, %p, %d)\n", __func__, func, args,
+                            device);
+        return m_remote_call(func, args, device);
+    }
+
+    void GetResult(MyoiRFuncCallHandle handle) const {
+        OFFLOAD_DEBUG_TRACE(4, "%s(%p)\n", __func__, handle);
+        CheckResult(__func__, m_get_result(handle));
+    }
+
+private:
+    void CheckResult(const char *func, MyoError error) const {
+        if (error != MYO_SUCCESS) {
+             LIBOFFLOAD_ERROR(c_myowrapper_checkresult, func, error);
+            exit(1);
+        }
+    }
+
+private:
+    void* m_lib_handle;
+    bool  m_is_available;
+
+    // pointers to functions from myo library
+    MyoError (*m_lib_init)(void*, void*);
+    void     (*m_lib_fini)(void);
+    void*    (*m_shared_malloc)(size_t);
+    void     (*m_shared_free)(void*);
+    void*    (*m_shared_aligned_malloc)(size_t, size_t);
+    void     (*m_shared_aligned_free)(void*);
+    MyoError (*m_acquire)(void);
+    MyoError (*m_release)(void);
+    MyoError (*m_host_var_table_propagate)(void*, int);
+    MyoError (*m_host_fptr_table_register)(void*, int, int);
+    MyoError (*m_remote_thunk_call)(void*, void*, int);
+    MyoiRFuncCallHandle (*m_remote_call)(char*, void*, int);
+    MyoError (*m_get_result)(MyoiRFuncCallHandle);
+};
+
+bool MyoWrapper::LoadLibrary(void)
+{
+#ifndef TARGET_WINNT
+    const char *lib_name = "libmyo-client.so";
+#else // TARGET_WINNT
+    const char *lib_name = "myo-client.dll";
+#endif // TARGET_WINNT
+
+    OFFLOAD_DEBUG_TRACE(2, "Loading MYO library %s ...\n", lib_name);
+
+    m_lib_handle = DL_open(lib_name);
+    if (m_lib_handle == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to load the library. errno = %d\n",
+                            errno);
+        return false;
+    }
+
+    m_lib_init = (MyoError (*)(void*, void*))
+        DL_sym(m_lib_handle, "myoiLibInit", MYO_VERSION1);
+    if (m_lib_init == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiLibInit");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_lib_fini = (void (*)(void))
+        DL_sym(m_lib_handle, "myoiLibFini", MYO_VERSION1);
+    if (m_lib_fini == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiLibFini");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_shared_malloc = (void* (*)(size_t))
+        DL_sym(m_lib_handle, "myoSharedMalloc", MYO_VERSION1);
+    if (m_shared_malloc == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoSharedMalloc");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_shared_free = (void (*)(void*))
+        DL_sym(m_lib_handle, "myoSharedFree", MYO_VERSION1);
+    if (m_shared_free == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoSharedFree");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_shared_aligned_malloc = (void* (*)(size_t, size_t))
+        DL_sym(m_lib_handle, "myoSharedAlignedMalloc", MYO_VERSION1);
+    if (m_shared_aligned_malloc == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoSharedAlignedMalloc");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_shared_aligned_free = (void (*)(void*))
+        DL_sym(m_lib_handle, "myoSharedAlignedFree", MYO_VERSION1);
+    if (m_shared_aligned_free == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoSharedAlignedFree");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_acquire = (MyoError (*)(void))
+        DL_sym(m_lib_handle, "myoAcquire", MYO_VERSION1);
+    if (m_acquire == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoAcquire");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_release = (MyoError (*)(void))
+        DL_sym(m_lib_handle, "myoRelease", MYO_VERSION1);
+    if (m_release == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoRelease");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_host_var_table_propagate = (MyoError (*)(void*, int))
+        DL_sym(m_lib_handle, "myoiHostVarTablePropagate", MYO_VERSION1);
+    if (m_host_var_table_propagate == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiHostVarTablePropagate");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_host_fptr_table_register = (MyoError (*)(void*, int, int))
+        DL_sym(m_lib_handle, "myoiHostFptrTableRegister", MYO_VERSION1);
+    if (m_host_fptr_table_register == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiHostFptrTableRegister");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_remote_thunk_call = (MyoError (*)(void*, void*, int))
+        DL_sym(m_lib_handle, "myoiRemoteThunkCall", MYO_VERSION1);
+    if (m_remote_thunk_call == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiRemoteThunkCall");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_remote_call = (MyoiRFuncCallHandle (*)(char*, void*, int))
+        DL_sym(m_lib_handle, "myoiRemoteCall", MYO_VERSION1);
+    if (m_remote_call == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiRemoteCall");
+        UnloadLibrary();
+        return false;
+    }
+
+    m_get_result = (MyoError (*)(MyoiRFuncCallHandle))
+        DL_sym(m_lib_handle, "myoiGetResult", MYO_VERSION1);
+    if (m_get_result == 0) {
+        OFFLOAD_DEBUG_TRACE(2, "Failed to find %s in MYO library\n",
+                            "myoiGetResult");
+        UnloadLibrary();
+        return false;
+    }
+
+    OFFLOAD_DEBUG_TRACE(2, "The library was successfully loaded\n");
+
+    m_is_available = true;
+
+    return true;
+}
+
+static bool myo_is_available;
+static MyoWrapper myo_wrapper;
+
+struct MyoTable
+{
+    MyoTable(SharedTableEntry *tab, int len) : var_tab(tab), var_tab_len(len)
+    {}
+
+    SharedTableEntry*   var_tab;
+    int                 var_tab_len;
+};
+
+typedef std::list<MyoTable> MyoTableList;
+static MyoTableList __myo_table_list;
+static mutex_t      __myo_table_lock;
+static bool         __myo_tables = false;
+
+static void __offload_myo_shared_table_register(SharedTableEntry *entry);
+static void __offload_myo_shared_init_table_register(InitTableEntry* entry);
+static void __offload_myo_fptr_table_register(FptrTableEntry *entry);
+
+static void __offload_myoLoadLibrary_once(void)
+{
+    if (__offload_init_library()) {
+        myo_wrapper.LoadLibrary();
+    }
+}
+
+static bool __offload_myoLoadLibrary(void)
+{
+    static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
+    __offload_run_once(&ctrl, __offload_myoLoadLibrary_once);
+
+    return myo_wrapper.is_available();
+}
+
+static void __offload_myoInit_once(void)
+{
+    if (!__offload_myoLoadLibrary()) {
+        return;
+    }
+
+    // initialize all devices
+    for (int i = 0; i < mic_engines_total; i++) {
+        mic_engines[i].init();
+    }
+
+    // load and initialize MYO library
+    OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ...\n");
+
+    COIEVENT events[MIC_ENGINES_MAX];
+    MyoiUserParams params[MIC_ENGINES_MAX+1];
+
+    // load target library to all devices
+    for (int i = 0; i < mic_engines_total; i++) {
+        mic_engines[i].init_myo(&events[i]);
+
+        params[i].type = MYOI_USERPARAMS_DEVID;
+        params[i].nodeid = mic_engines[i].get_physical_index() + 1;
+    }
+
+    params[mic_engines_total].type = MYOI_USERPARAMS_LAST_MSG;
+
+    // initialize myo runtime on host
+    myo_wrapper.LibInit(params, 0);
+
+    // wait for the target init calls to finish
+    COIRESULT res;
+    res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0);
+    if (res != COI_SUCCESS) {
+        LIBOFFLOAD_ERROR(c_event_wait, res);
+        exit(1);
+    }
+
+    myo_is_available = true;
+
+    OFFLOAD_DEBUG_TRACE(2, "Initializing MYO library ... done\n");
+}
+
+static bool __offload_myoInit(void)
+{
+    static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
+    __offload_run_once(&ctrl, __offload_myoInit_once);
+
+    // register pending shared var tables
+    if (myo_is_available && __myo_tables) {
+        mutex_locker_t locker(__myo_table_lock);
+
+        if (__myo_tables) {
+            //  Register tables with MYO so it can propagate to target.
+            for(MyoTableList::const_iterator it = __myo_table_list.begin();
+                it != __myo_table_list.end(); ++it) {
+#ifdef TARGET_WINNT
+                for (SharedTableEntry *entry = it->var_tab;
+                     entry->varName != MYO_TABLE_END_MARKER(); entry++) {
+                    if (entry->varName == 0) {
+                        continue;
+                    }
+                    myo_wrapper.HostVarTablePropagate(entry, 1);
+                }
+#else // TARGET_WINNT
+                myo_wrapper.HostVarTablePropagate(it->var_tab,
+                                                  it->var_tab_len);
+#endif // TARGET_WINNT
+            }
+
+            __myo_table_list.clear();
+            __myo_tables = false;
+        }
+    }
+
+    return myo_is_available;
+}
+
+static bool shared_table_entries(
+    SharedTableEntry *entry
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+    for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+        if (entry->varName == 0) {
+            continue;
+        }
+#endif // TARGET_WINNT
+
+        return true;
+    }
+
+    return false;
+}
+
+static bool fptr_table_entries(
+    FptrTableEntry *entry
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+    for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+        if (entry->funcName == 0) {
+            continue;
+        }
+#endif // TARGET_WINNT
+
+        return true;
+    }
+
+    return false;
+}
+
+extern "C" void __offload_myoRegisterTables(
+    InitTableEntry* init_table,
+    SharedTableEntry *shared_table,
+    FptrTableEntry *fptr_table
+)
+{
+    // check whether we need to initialize MYO library. It is
+    // initialized only if at least one myo table is not empty
+    if (shared_table_entries(shared_table) || fptr_table_entries(fptr_table)) {
+        // make sure myo library is loaded
+        __offload_myoLoadLibrary();
+
+        // register tables
+        __offload_myo_shared_table_register(shared_table);
+        __offload_myo_fptr_table_register(fptr_table);
+        __offload_myo_shared_init_table_register(init_table);
+    }
+}
+
+void __offload_myoFini(void)
+{
+    if (myo_is_available) {
+        OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+        COIEVENT events[MIC_ENGINES_MAX];
+
+        // kick off myoiLibFini calls on all devices
+        for (int i = 0; i < mic_engines_total; i++) {
+            mic_engines[i].fini_myo(&events[i]);
+        }
+
+        // cleanup myo runtime on host
+        myo_wrapper.LibFini();
+
+        // wait for the target fini calls to finish
+        COIRESULT res;
+        res = COI::EventWait(mic_engines_total, events, -1, 1, 0, 0);
+        if (res != COI_SUCCESS) {
+            LIBOFFLOAD_ERROR(c_event_wait, res);
+            exit(1);
+        }
+    }
+}
+
+static void __offload_myo_shared_table_register(
+    SharedTableEntry *entry
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+    SharedTableEntry *start = entry;
+    int entries = 0;
+
+    // allocate shared memory for vars
+    for (; entry->varName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+        if (entry->varName == 0) {
+            OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedTable entry\n");
+            continue;
+        }
+#endif // TARGET_WINNT
+
+        OFFLOAD_DEBUG_TRACE(4, "registering MyoSharedTable entry for %s @%p\n",
+                            entry->varName, entry);
+
+        // Invoke the function to create shared memory
+        reinterpret_cast<void(*)(void)>(entry->sharedAddr)();
+        entries++;
+    }
+
+    // and table to the list if it is not empty
+    if (entries > 0) {
+        mutex_locker_t locker(__myo_table_lock);
+        __myo_table_list.push_back(MyoTable(start, entries));
+        __myo_tables = true;
+    }
+}
+
+static void __offload_myo_shared_init_table_register(InitTableEntry* entry)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+#ifdef TARGET_WINNT
+    for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
+        if (entry->funcName == 0) {
+            OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoSharedInit entry\n");
+            continue;
+        }
+
+        //  Invoke the function to init the shared memory
+        entry->func();
+    }
+#else // TARGET_WINNT
+    for (; entry->func != 0; entry++) {
+        // Invoke the function to init the shared memory
+        entry->func();
+    }
+#endif // TARGET_WINNT
+}
+
+static void __offload_myo_fptr_table_register(
+    FptrTableEntry *entry
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, entry);
+
+    FptrTableEntry *start = entry;
+    int entries = 0;
+
+    for (; entry->funcName != MYO_TABLE_END_MARKER(); entry++) {
+#ifdef TARGET_WINNT
+        if (entry->funcName == 0) {
+            OFFLOAD_DEBUG_TRACE(4, "skip registering a NULL MyoFptrTable entry\n");
+            continue;
+        }
+#endif // TARGET_WINNT
+
+        if (!myo_wrapper.is_available()) {
+            *(static_cast<void**>(entry->localThunkAddr)) = entry->funcAddr;
+        }
+
+        OFFLOAD_DEBUG_TRACE(4, "registering MyoFptrTable entry for %s @%p\n",
+                            entry->funcName, entry);
+
+#ifdef TARGET_WINNT
+        if (myo_wrapper.is_available()) {
+            myo_wrapper.HostFptrTableRegister(entry, 1, false);
+        }
+#endif // TARGET_WINNT
+
+        entries++;
+    }
+
+#ifndef TARGET_WINNT
+    if (myo_wrapper.is_available() && entries > 0) {
+        myo_wrapper.HostFptrTableRegister(start, entries, false);
+    }
+#endif // TARGET_WINNT
+}
+
+extern "C" int __offload_myoIsAvailable(int target_number)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%d)\n", __func__, target_number);
+
+    if (target_number >= -2) {
+        bool is_default_number = (target_number == -2);
+
+        if (__offload_myoInit()) {
+            if (target_number >= 0) {
+                // User provided the device number
+                int num = target_number % mic_engines_total;
+
+                // reserve device in ORSL
+                target_number = ORSL::reserve(num) ? num : -1;
+            }
+            else {
+                // try to use device 0
+                target_number = ORSL::reserve(0) ? 0 : -1;
+            }
+
+            // make sure device is initialized
+            if (target_number >= 0) {
+                mic_engines[target_number].init();
+            }
+        }
+        else {
+            // fallback to CPU
+            target_number = -1;
+        }
+
+        if (target_number < 0 && !is_default_number) {
+            LIBOFFLOAD_ERROR(c_device_is_not_available);
+            exit(1);
+        }
+    }
+    else {
+        LIBOFFLOAD_ERROR(c_invalid_device_number);
+        exit(1);
+    }
+
+    return target_number;
+}
+
+extern "C" void __offload_myoiRemoteIThunkCall(
+    void *thunk,
+    void *arg,
+    int target_number
+)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p, %p, %d)\n", __func__, thunk, arg,
+                        target_number);
+
+    myo_wrapper.Release();
+    myo_wrapper.RemoteThunkCall(thunk, arg, target_number);
+    myo_wrapper.Acquire();
+
+    ORSL::release(target_number);
+}
+
+extern "C" void* _Offload_shared_malloc(size_t size)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%lld)\n", __func__, size);
+
+    if (__offload_myoLoadLibrary()) {
+        return myo_wrapper.SharedMalloc(size);
+    }
+    else {
+        return malloc(size);
+    }
+}
+
+extern "C" void _Offload_shared_free(void *ptr)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
+
+    if (__offload_myoLoadLibrary()) {
+        myo_wrapper.SharedFree(ptr);
+    }
+    else {
+        free(ptr);
+    }
+}
+
+extern "C" void* _Offload_shared_aligned_malloc(size_t size, size_t align)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%lld, %lld)\n", __func__, size, align);
+
+    if (__offload_myoLoadLibrary()) {
+        return myo_wrapper.SharedAlignedMalloc(size, align);
+    }
+    else {
+        if (align < sizeof(void*)) {
+            align = sizeof(void*);
+        }
+        return _mm_malloc(size, align);
+    }
+}
+
+extern "C" void _Offload_shared_aligned_free(void *ptr)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s(%p)\n", __func__, ptr);
+
+    if (__offload_myoLoadLibrary()) {
+        myo_wrapper.SharedAlignedFree(ptr);
+    }
+    else {
+        _mm_free(ptr);
+    }
+}
+
+extern "C" void __intel_cilk_for_32_offload(
+    int size,
+    void (*copy_constructor)(void*, void*),
+    int target_number,
+    void *raddr,
+    void *closure_object,
+    unsigned int iters,
+    unsigned int grain_size)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+    target_number = __offload_myoIsAvailable(target_number);
+    if (target_number >= 0) {
+        struct S {
+            void *M1;
+            unsigned int M2;
+            unsigned int M3;
+            char closure[];
+        } *args;
+
+        args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size);
+        args->M1 = raddr;
+        args->M2 = iters;
+        args->M3 = grain_size;
+
+        if (copy_constructor == 0) {
+            memcpy(args->closure, closure_object, size);
+        }
+        else {
+            copy_constructor(args->closure, closure_object);
+        }
+
+        myo_wrapper.Release();
+        myo_wrapper.GetResult(
+            myo_wrapper.RemoteCall("__intel_cilk_for_32_offload",
+                                   args, target_number)
+        );
+        myo_wrapper.Acquire();
+
+        _Offload_shared_free(args);
+
+        ORSL::release(target_number);
+    }
+    else {
+        __cilkrts_cilk_for_32(raddr,
+                              closure_object,
+                              iters,
+                              grain_size);
+    }
+}
+
+extern "C" void __intel_cilk_for_64_offload(
+    int size,
+    void (*copy_constructor)(void*, void*),
+    int target_number,
+    void *raddr,
+    void *closure_object,
+    uint64_t iters,
+    uint64_t grain_size)
+{
+    OFFLOAD_DEBUG_TRACE(3, "%s\n", __func__);
+
+    target_number = __offload_myoIsAvailable(target_number);
+    if (target_number >= 0) {
+        struct S {
+            void *M1;
+            uint64_t M2;
+            uint64_t M3;
+            char closure[];
+        } *args;
+
+        args = (struct S*) _Offload_shared_malloc(sizeof(struct S) + size);
+        args->M1 = raddr;
+        args->M2 = iters;
+        args->M3 = grain_size;
+
+        if (copy_constructor == 0) {
+            memcpy(args->closure, closure_object, size);
+        }
+        else {
+            copy_constructor(args->closure, closure_object);
+        }
+
+        myo_wrapper.Release();
+        myo_wrapper.GetResult(
+            myo_wrapper.RemoteCall("__intel_cilk_for_64_offload", args,
+                                   target_number)
+        );
+        myo_wrapper.Acquire();
+
+        _Offload_shared_free(args);
+
+        ORSL::release(target_number);
+    }
+    else {
+        __cilkrts_cilk_for_64(raddr,
+                              closure_object,
+                              iters,
+                              grain_size);
+    }
+}
diff --git a/final/offload/src/offload_myo_host.h b/final/offload/src/offload_myo_host.h
new file mode 100644
index 000000