Fix runtime crash inside __kmpc_init_allocator

It seems load of traits.addr should be passed in runtime call.  Currently
the load of load traits.addr gets passed cause runtime to fail.

To fix this, skip the call to EmitLoadOfScalar for extra load.

Differential Revision: https://reviews.llvm.org/D151576

GitOrigin-RevId: a419ec4f256d279c91746a3962dd6dd2da45c304
diff --git a/libomptarget/test/mapping/target_uses_allocator.c b/libomptarget/test/mapping/target_uses_allocator.c
new file mode 100755
index 0000000..ce33e92
--- /dev/null
+++ b/libomptarget/test/mapping/target_uses_allocator.c
@@ -0,0 +1,56 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+
+#include <omp.h>
+#include <stdio.h>
+
+#define N 1024
+
+int test_omp_aligned_alloc_on_device() {
+  int errors = 0;
+
+  omp_memspace_handle_t memspace = omp_default_mem_space;
+  omp_alloctrait_t traits[2] = {{omp_atk_alignment, 64}, {omp_atk_access, 64}};
+  omp_allocator_handle_t alloc =
+      omp_init_allocator(omp_default_mem_space, 1, traits);
+
+#pragma omp target map(tofrom : errors) uses_allocators(alloc(traits))
+  {
+    int *x;
+    int not_correct_array_values = 0;
+
+    x = (int *)omp_aligned_alloc(64, N * sizeof(int), alloc);
+    if (x == NULL) {
+      errors++;
+    } else {
+#pragma omp parallel for simd simdlen(16) aligned(x : 64)
+      for (int i = 0; i < N; i++) {
+        x[i] = i;
+      }
+
+#pragma omp parallel for simd simdlen(16) aligned(x : 64)
+      for (int i = 0; i < N; i++) {
+        if (x[i] != i) {
+#pragma omp atomic write
+          not_correct_array_values = 1;
+        }
+      }
+      if (not_correct_array_values) {
+        errors++;
+      }
+      omp_free(x, alloc);
+    }
+  }
+
+  omp_destroy_allocator(alloc);
+
+  return errors;
+}
+
+int main() {
+  int errors = 0;
+  if (test_omp_aligned_alloc_on_device())
+    printf("FAILE\n");
+  else
+    // CHECK: PASSED
+    printf("PASSED\n");
+}