New package pocl (#3413)

* New package pocl * pocl: Update dependencies, add self-test * pocl: Don't require LLVM shared libraries LLVM build fails with shared libraries. * Add patch * Update * Update * Make build and install work; install test still fails * Split pocl into pocl proper and pocl-test * Add debug output * pocl: Update to 0.14-rc * pocl: Correct flake8 error * pocl: Heed code review recommendations * pocl: Add newline at end of file * pocl: Correct flake8 error I don’t want to use an even longer line by putting the whole variant declaration onto a single line, nor do I think that having an overlong line and adding `# noqa` at the end is more readable than splitting a string over three lines. * pocl: Correct dependency type for libtool
2017-03-12 13:52:18 -04:00
parent 0b948da74c
commit 9121599145
5 changed files with 431 additions and 0 deletions
--- a/var/spack/repos/builtin/packages/pocl/example1.c
+++ b/var/spack/repos/builtin/packages/pocl/example1.c
@@ -0,0 +1,232 @@
+/* example1 - Simple example from OpenCL specification.
+
+   Copyright (c) 2011 Universidad Rey Juan Carlos
+
+   Permission is hereby granted, free of charge, to any person
+   obtaining a copy of this software and associated documentation
+   files (the "Software"), to deal in the Software without
+   restriction, including without limitation the rights to use, copy,
+   modify, merge, publish, distribute, sublicense, and/or sell copies
+   of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#include <CL/opencl.h>
+#include <poclu.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define N 128
+
+void delete_memobjs(cl_mem *memobjs, int n) {
+  for (int i = 0; i < n; ++i)
+    clReleaseMemObject(memobjs[i]);
+}
+
+int exec_dot_product_kernel(const char *program_source, int n, cl_float4 *srcA,
+                            cl_float4 *srcB, cl_float *dst) {
+  cl_context context = poclu_create_any_context();
+  if (context == (cl_context)0)
+    return -1;
+
+  // get the list of GPU devices associated with context
+  size_t cb;
+  clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb);
+  cl_device_id *devices = malloc(cb);
+  clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL);
+
+  // create a command-queue
+  cl_command_queue cmd_queue =
+      clCreateCommandQueue(context, devices[0], 0, NULL);
+  if (cmd_queue == 0) {
+    clReleaseContext(context);
+    free(devices);
+    return -1;
+  }
+
+  // don't know why this is necessary
+  for (int i = 0; i < n; ++i) {
+    poclu_bswap_cl_float_array(devices[0], &srcA[i], 4);
+    poclu_bswap_cl_float_array(devices[0], &srcB[i], 4);
+  }
+
+  // allocate the buffer memory objects
+  cl_mem memobjs[3];
+
+  memobjs[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                              sizeof(cl_float4) * n, srcA, NULL);
+  if (memobjs[0] == 0) {
+    clReleaseCommandQueue(cmd_queue);
+    clReleaseContext(context);
+    return -1;
+  }
+
+  memobjs[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                              sizeof(cl_float4) * n, srcB, NULL);
+  if (memobjs[1] == 0) {
+    delete_memobjs(memobjs, 1);
+    clReleaseCommandQueue(cmd_queue);
+    clReleaseContext(context);
+    return -1;
+  }
+
+  memobjs[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * n,
+                              NULL, NULL);
+  if (memobjs[2] == 0) {
+    delete_memobjs(memobjs, 2);
+    clReleaseCommandQueue(cmd_queue);
+    clReleaseContext(context);
+    return -1;
+  }
+
+  // create the program
+  cl_program program =
+      clCreateProgramWithSource(context, 1, &program_source, NULL, NULL);
+  if (program == 0) {
+    delete_memobjs(memobjs, 3);
+    clReleaseCommandQueue(cmd_queue);
+    clReleaseContext(context);
+    return -1;
+  }
+
+  // build the program
+  cl_int err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+  if (err != CL_SUCCESS) {
+    delete_memobjs(memobjs, 3);
+    clReleaseProgram(program);
+    clReleaseCommandQueue(cmd_queue);
+    clReleaseContext(context);
+    return -1;
+  }
+
+  // create the kernel
+  cl_kernel kernel = clCreateKernel(program, "dot_product", NULL);
+  if (kernel == 0) {
+    delete_memobjs(memobjs, 3);
+    clReleaseProgram(program);
+    clReleaseCommandQueue(cmd_queue);
+    clReleaseContext(context);
+    return -1;
+  }
+
+  // set the args values
+  err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobjs[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&memobjs[2]);
+
+  if (err != CL_SUCCESS) {
+    delete_memobjs(memobjs, 3);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    clReleaseCommandQueue(cmd_queue);
+    clReleaseContext(context);
+    return -1;
+  }
+
+  // set work-item dimensions
+  size_t global_work_size[1];
+  global_work_size[0] = n;
+  size_t local_work_size[1];
+  local_work_size[0] = 128;
+
+  // execute kernel
+  err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, global_work_size,
+                               local_work_size, 0, NULL, NULL);
+  if (err != CL_SUCCESS) {
+    delete_memobjs(memobjs, 3);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    clReleaseCommandQueue(cmd_queue);
+    clReleaseContext(context);
+    return -1;
+  }
+
+  // read output image
+  err = clEnqueueReadBuffer(cmd_queue, memobjs[2], CL_TRUE, 0,
+                            n * sizeof(cl_float), dst, 0, NULL, NULL);
+  if (err != CL_SUCCESS) {
+    delete_memobjs(memobjs, 3);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    clReleaseCommandQueue(cmd_queue);
+    clReleaseContext(context);
+    return -1;
+  }
+
+  for (int i = 0; i < n; ++i) {
+    poclu_bswap_cl_float_array(devices[0], &dst[i], 1);
+    poclu_bswap_cl_float_array(devices[0], &srcA[i], 4);
+    poclu_bswap_cl_float_array(devices[0], &srcB[i], 4);
+  }
+
+  free(devices);
+
+  // release kernel, program, and memory objects
+  delete_memobjs(memobjs, 3);
+  clReleaseKernel(kernel);
+  clReleaseProgram(program);
+  clReleaseCommandQueue(cmd_queue);
+  clReleaseContext(context);
+
+  // success
+  return 0;
+}
+
+int main(void) {
+  const char *source = "__kernel void dot_product(\n"
+                       "  __global const float4 *a,\n"
+                       "  __global const float4 *b,\n"
+                       "  __global float *c)\n"
+                       "{\n"
+                       "  int gid = get_global_id(0);\n"
+                       "  float4 prod = a[gid] * b[gid];\n"
+                       "  c[gid] = prod.x + prod.y + prod.z + prod.w;\n"
+                       "}\n";
+
+  cl_float4 *srcA = malloc(N * sizeof(cl_float4));
+  cl_float4 *srcB = malloc(N * sizeof(cl_float4));
+  cl_float *dst = malloc(N * sizeof(cl_float));
+
+  for (int i = 0; i < N; ++i) {
+    srcA[i].s[0] = (cl_float)i;
+    srcA[i].s[1] = (cl_float)i;
+    srcA[i].s[2] = (cl_float)i;
+    srcA[i].s[3] = (cl_float)i;
+    srcB[i].s[0] = (cl_float)i;
+    srcB[i].s[1] = (cl_float)i;
+    srcB[i].s[2] = (cl_float)i;
+    srcB[i].s[3] = (cl_float)i;
+  }
+
+  if (exec_dot_product_kernel(source, N, srcA, srcB, dst)) {
+    printf("Error running the tests\n");
+    return -1;
+  }
+
+  for (int i = 0; i < 4; ++i) {
+    printf("(%f, %f, %f, %f) . (%f, %f, %f, %f) = %f\n", srcA[i].s[0],
+           srcA[i].s[1], srcA[i].s[2], srcA[i].s[3], srcB[i].s[0], srcB[i].s[1],
+           srcB[i].s[2], srcB[i].s[3], dst[i]);
+    if (srcA[i].s[0] * srcB[i].s[0] + srcA[i].s[1] * srcB[i].s[1] +
+            srcA[i].s[2] * srcB[i].s[2] + srcA[i].s[3] * srcB[i].s[3] !=
+        dst[i]) {
+      printf("FAIL\n");
+      return -1;
+    }
+  }
+
+  printf("OK\n");
+  return 0;
+}
--- a/var/spack/repos/builtin/packages/pocl/example1.out
+++ b/var/spack/repos/builtin/packages/pocl/example1.out
@@ -0,0 +1,5 @@
+(0.000000, 0.000000, 0.000000, 0.000000) . (0.000000, 0.000000, 0.000000, 0.000000) = 0.000000
+(1.000000, 1.000000, 1.000000, 1.000000) . (1.000000, 1.000000, 1.000000, 1.000000) = 4.000000
+(2.000000, 2.000000, 2.000000, 2.000000) . (2.000000, 2.000000, 2.000000, 2.000000) = 16.000000
+(3.000000, 3.000000, 3.000000, 3.000000) . (3.000000, 3.000000, 3.000000, 3.000000) = 36.000000
+OK
--- a/var/spack/repos/builtin/packages/pocl/package.py
+++ b/var/spack/repos/builtin/packages/pocl/package.py
@@ -0,0 +1,108 @@
+##############################################################################
+# Copyright (c) 2013-2016, Lawrence Livermore National Security, LLC.
+# Produced at the Lawrence Livermore National Laboratory.
+#
+# This file is part of Spack.
+# Created by Todd Gamblin, tgamblin@llnl.gov, All rights reserved.
+# LLNL-CODE-647188
+#
+# For details, see https://github.com/llnl/spack
+# Please also see the LICENSE file for our notice and the LGPL.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License (as
+# published by the Free Software Foundation) version 2.1, February 1999.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the terms and
+# conditions of the GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+##############################################################################
+
+from spack import *
+from spack.package_test import *
+import os
+
+
+class Pocl(CMakePackage):
+    """Portable Computing Language (pocl) is an open source implementation
+    of the OpenCL standard which can be easily adapted for new targets
+    and devices, both for homogeneous CPU and heterogeneous
+    GPUs/accelerators."""
+
+    homepage = "http://portablecl.org"
+    url = "http://portablecl.org/downloads/pocl-0.13.tar.gz"
+
+    version("master", git="https://github.com/pocl/pocl.git")
+    version("0.14-rc",
+            git="https://github.com/pocl/pocl.git", branch="release_0_14")
+    version("0.13", "344480864d4269f2f63f1509395898bd")
+    version("0.12", "e197ba3aa01a35f40581c48e053330dd")
+    version("0.11", "9be0640cde2983062c47393d9e8e8fe7")
+    version("0.10", "0096be4f595c7b5cbfa42430c8b3af6a")
+
+    # This is Github's pocl/pocl#373
+    patch("uint.patch", when="@:0.13")
+    patch("vecmathlib.patch", when="@:0.13")
+
+    depends_on("cmake @2.8.12:", type="build")
+    depends_on("hwloc")
+    depends_on("libtool", type=("build", "run"))
+    # We don't request LLVM's shared libraries because these are not
+    # enabled by default, and also because they fail to build for us
+    # (see #1616)
+    depends_on("llvm +clang")
+    depends_on("pkg-config", type="build")
+
+    # These are the supported LLVM versions
+    depends_on("llvm @3.7:3.9", when="@master")
+    depends_on("llvm @3.7:3.9", when="@0.14")
+    depends_on("llvm @3.7:3.8", when="@0.13")
+    depends_on("llvm @3.2:3.7", when="@0.12")
+    depends_on("llvm @3.2:3.6", when="@0.11")
+    depends_on("llvm @3.2:3.5", when="@0.10")
+
+    variant("distro", default=False,
+            description=("Support several CPU architectures, "
+                         "suitable e.g. in a build "
+                         "that will be made available for download"))
+    variant("icd", default=False,
+            description="Support a system-wide ICD loader")
+
+    def cmake_args(self):
+        spec = self.spec
+        args = ["-DINSTALL_OPENCL_HEADERS=ON"]
+        if "~shared" in spec["llvm"]:
+            args += ["-DSTATIC_LLVM"]
+        if "+distro" in spec:
+            args += ["-DKERNELLIB_HOST_CPU_VARIANTS=distro"]
+        args += ["-DENABLE_ICD=%s" % ("ON" if "+icd" in spec else "OFF")]
+        return args
+
+    @run_after('install')
+    def symlink_opencl(self):
+        with working_dir(self.build_directory):
+            os.symlink("OpenCL", join_path(self.prefix.include, "CL"))
+
+    @run_after('install')
+    def check_install(self):
+        # Build and run a small program to test the installed OpenCL library
+        spec = self.spec
+        print("Checking pocl installation...")
+        checkdir = "spack-check"
+        with working_dir(checkdir, create=True):
+            source = join_path(os.path.dirname(self.module.__file__),
+                               "example1.c")
+            cflags = spec["pocl"].cppflags.split()
+            # ldflags = spec["pocl"].libs.ld_flags.split()
+            ldflags = ["-L%s" % spec["pocl"].prefix.lib,
+                       "-lOpenCL", "-lpoclu"]
+            output = compile_c_and_execute(source, cflags, ldflags)
+            compare_output_file(
+                output,
+                join_path(os.path.dirname(self.module.__file__),
+                          "example1.out"))
--- a/var/spack/repos/builtin/packages/pocl/uint.patch
+++ b/var/spack/repos/builtin/packages/pocl/uint.patch
@@ -0,0 +1,11 @@
+--- a/lib/CL/clCreateSubDevices.c
+++ b/lib/CL/clCreateSubDevices.c
+@@ -46,7 +46,7 @@
+    cl_device_id *new_devs = NULL;
+    // number of elements in (copies of) properties, including terminating null
+    cl_uint num_props = 0;
+-   uint i;
+   cl_uint i;
+ 
+    POCL_GOTO_ERROR_COND((in_device == NULL), CL_INVALID_DEVICE);
+    POCL_GOTO_ERROR_COND((properties == NULL), CL_INVALID_VALUE);
--- a/var/spack/repos/builtin/packages/pocl/vecmathlib.patch
+++ b/var/spack/repos/builtin/packages/pocl/vecmathlib.patch
@@ -0,0 +1,75 @@
+diff --git a/lib/kernel/vecmathlib/vec_sse_double1.h b/lib/kernel/vecmathlib/vec_sse_double1.h
+index d727de8..dc582b3 100644
+--- a/lib/kernel/vecmathlib/vec_sse_double1.h
+++ b/lib/kernel/vecmathlib/vec_sse_double1.h
+@@ -397,8 +397,8 @@ public:
+     }
+     return r;
+   }
+-  boolvec_t isfinite() const { return vml_std::isfinite(v); }
+-  boolvec_t isinf() const { return vml_std::isinf(v); }
+  boolvec_t isfinite() const { return bool(vml_std::isfinite(v)); }
+  boolvec_t isinf() const { return bool(vml_std::isinf(v)); }
+   boolvec_t isnan() const {
+     // This is wrong:
+     // return _mm_ucomineq_sd(from_double(v), from_double(v));
+@@ -407,9 +407,9 @@ public:
+     // __asm__("ucomisd %[v],%[v]; setp %[r]": [r]"=q"(r): [v]"x"(v));
+     // return boolvec_t::scalar_t(r);
+     // This works as well:
+-    return vml_std::isnan(v);
+    return bool(vml_std::isnan(v));
+   }
+-  boolvec_t isnormal() const { return vml_std::isnormal(v); }
+  boolvec_t isnormal() const { return bool(vml_std::isnormal(v)); }
+   realvec_t ldexp(int_t n) const { return vml_std::ldexp(v, n); }
+   realvec_t ldexp(intvec_t n) const { return vml_std::ldexp(v, n); }
+   realvec_t log() const { return MF::vml_log(*this); }
+@@ -433,7 +433,7 @@ public:
+   }
+   realvec_t round() const { return MF::vml_round(*this); }
+   realvec_t rsqrt() const { return MF::vml_rsqrt(*this); }
+-  boolvec_t signbit() const { return vml_std::signbit(v); }
+  boolvec_t signbit() const { return bool(vml_std::signbit(v)); }
+   realvec_t sin() const { return MF::vml_sin(*this); }
+   realvec_t sinh() const { return MF::vml_sinh(*this); }
+   realvec_t sqrt() const {
+diff --git a/lib/kernel/vecmathlib/vec_sse_float1.h b/lib/kernel/vecmathlib/vec_sse_float1.h
+index a84a046..4868b2c 100644
+--- a/lib/kernel/vecmathlib/vec_sse_float1.h
+++ b/lib/kernel/vecmathlib/vec_sse_float1.h
+@@ -394,8 +394,8 @@ public:
+     }
+     return r;
+   }
+-  boolvec_t isfinite() const { return vml_std::isfinite(v); }
+-  boolvec_t isinf() const { return vml_std::isinf(v); }
+  boolvec_t isfinite() const { return bool(vml_std::isfinite(v)); }
+  boolvec_t isinf() const { return bool(vml_std::isinf(v)); }
+   boolvec_t isnan() const {
+ #if defined VML_HAVE_NAN
+     // This is wrong:
+@@ -405,12 +405,12 @@ public:
+     // __asm__("ucomiss %[v],%[v]; setp %[r]": [r]"=q"(r): [v]"x"(v));
+     // return boolvec_t::scalar_t(r);
+     // This works as well:
+-    return vml_std::isnan(v);
+    return bool(vml_std::isnan(v));
+ #else
+     return BV(false);
+ #endif
+   }
+-  boolvec_t isnormal() const { return vml_std::isnormal(v); }
+  boolvec_t isnormal() const { return bool(vml_std::isnormal(v)); }
+   realvec_t ldexp(int_t n) const { return vml_std::ldexp(v, n); }
+   realvec_t ldexp(intvec_t n) const { return vml_std::ldexp(v, n); }
+   realvec_t log() const { return MF::vml_log(*this); }
+@@ -434,7 +434,7 @@ public:
+   }
+   realvec_t round() const { return MF::vml_round(*this); }
+   realvec_t rsqrt() const { return MF::vml_rsqrt(*this); }
+-  boolvec_t signbit() const { return vml_std::signbit(v); }
+  boolvec_t signbit() const { return bool(vml_std::signbit(v)); }
+   realvec_t sin() const { return MF::vml_sin(*this); }
+   realvec_t sinh() const { return MF::vml_sinh(*this); }
+   realvec_t sqrt() const { return to_float(_mm_sqrt_ss(from_float(v))); }