Merge branch 'gallium-userbuf'

author Marek Olšák <maraeo@gmail.com>

Fri, 11 May 2012 14:38:13 +0000 (16:38 +0200)

committer Marek Olšák <maraeo@gmail.com>

Fri, 11 May 2012 14:38:13 +0000 (16:38 +0200)
author Marek Olšák <maraeo@gmail.com>
Fri, 11 May 2012 14:38:13 +0000 (16:38 +0200)
committer Marek Olšák <maraeo@gmail.com>
Fri, 11 May 2012 14:38:13 +0000 (16:38 +0200)
diff --git a/bin/.gitignore b/bin/.gitignore

index 04c0a1c16bb44a32d75553ecc6aa0d963e6cdda0..2ee67a666a1f42627d3150e1109e00ad08b98310 100644 (file)
--- a/bin/.gitignore
+++ b/bin/.gitignore
@@ -4,3 +4,4 @@ install-sh
  /depcomp
  /missing
  ylwrap
+compile
diff --git a/configs/autoconf.in b/configs/autoconf.in

index eb6713ddf086544bb7e182c8c4363aae7fbbbb86..3c8f4c1eef920d24140babbbde62b9c145149e4e 100644 (file)
--- a/configs/autoconf.in
+++ b/configs/autoconf.in
@@ -219,3 +219,6 @@ MESA_LLVM = @MESA_LLVM@
  LLVM_VERSION = @LLVM_VERSION@
  
  HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
diff --git a/configs/default b/configs/default

index cdaeec8b478eda30e5255ac45ab9ddd571450023..40fa5e31ea214ce7f535a9647b3c10f9df5b4900 100644 (file)
--- a/configs/default
+++ b/configs/default
@@ -9,7 +9,7 @@ CONFIG_NAME = default
  
  # Version info
  MESA_MAJOR=8
-MESA_MINOR=0
+MESA_MINOR=1
  MESA_TINY=0
  MESA_VERSION = $(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY)
  
diff --git a/configure.ac b/configure.ac

index 3bc59ca3e7893914609fe4495e4fe6ce56d08f0a..2d1265a7b638d94091f8ab499a3a1239e7b8ee00 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -109,11 +109,11 @@ if test "x$GCC" = xyes -a "x$acv_mesa_CLANG" = xno; then
  
      GCC_VERSION=`$CC -dumpversion`
      if test $? -eq 0; then
-        major=`echo $GCC_VERSION | cut -d. -f1`
-        minor=`echo $GCC_VERSION | cut -d. -f2`
+        GCC_VERSION_MAJOR=`echo $GCC_VERSION | cut -d. -f1`
+        GCC_VERSION_MINOR=`echo $GCC_VERSION | cut -d. -f2`
      fi
  
-    if test $major -lt 3 -o $major -eq 3 -a $minor -lt 3 ; then
+    if test $GCC_VERSION_MAJOR -lt 3 -o $GCC_VERSION_MAJOR -eq 3 -a $GCC_VERSION_MINOR -lt 3 ; then
          AC_MSG_RESULT([no])
          AC_MSG_ERROR([If using GCC, version 3.3.0 or later is required.])
      else
@@ -616,7 +616,11 @@ AC_ARG_ENABLE([va],
           [enable va library @<:@default=auto@:>@])],
     [enable_va="$enableval"],
     [enable_va=auto])
-
+AC_ARG_ENABLE([opencl],
+   [AS_HELP_STRING([--enable-opencl],
+         [enable OpenCL library @<:@default=no@:>@])],
+   [enable_opencl="$enableval"],
+   [enable_opencl=no])
  AC_ARG_ENABLE([xlib_glx],
      [AS_HELP_STRING([--enable-xlib-glx],
          [make GLX library Xlib-based instead of DRI-based @<:@default=disable@:>@])],
@@ -643,6 +647,12 @@ AC_ARG_ENABLE([r600-llvm-compiler],
      [enable_r600_llvm="$enableval"],
      [enable_r600_llvm=no])
  
+AC_ARG_ENABLE([gallium_tests],
+    [AS_HELP_STRING([--enable-gallium-tests],
+        [Enable optional Gallium tests) @<:@default=disable@:>@])],
+    [enable_gallium_tests="$enableval"],
+    [enable_gallium_tests=no])
+
  # Option for Gallium drivers
  GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
  
@@ -670,7 +680,8 @@ if test "x$enable_opengl" = xno -a \
          "x$enable_d3d1x" = xno -a \
          "x$enable_xvmc" = xno -a \
          "x$enable_vdpau" = xno -a \
-        "x$enable_va" = xno; then
+        "x$enable_va" = xno -a \
+        "x$enable_opencl" = xno; then
      AC_MSG_ERROR([at least one API should be enabled])
  fi
  
@@ -1477,6 +1488,7 @@ if test "x$enable_gallium_gbm" = xyes; then
      GALLIUM_STATE_TRACKERS_DIRS="gbm $GALLIUM_STATE_TRACKERS_DIRS"
      GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS gbm"
      HAVE_ST_GBM="yes"
+    enable_gallium_loader=yes
  fi
  
  dnl
@@ -1596,6 +1608,24 @@ if test "x$enable_va" = xyes; then
      HAVE_ST_VA="yes"
  fi
  
+dnl
+dnl OpenCL configuration
+dnl
+
+if test "x$enable_opencl" = xyes; then
+    if test "x$with_gallium_drivers" = x; then
+        AC_MSG_ERROR([cannot enable OpenCL without Gallium])
+    fi
+
+    if test $GCC_VERSION_MAJOR -lt 4 -o $GCC_VERSION_MAJOR -eq 4 -a $GCC_VERSION_MINOR -lt 6; then
+        AC_MSG_ERROR([gcc >= 4.6 is required to build clover])
+    fi
+
+    GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS clover"
+    GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS opencl"
+    enable_gallium_loader=yes
+fi
+
  dnl
  dnl GLU configuration
  dnl
@@ -1828,6 +1858,14 @@ AC_ARG_WITH([xvmc-libdir],
      [XVMC_LIB_INSTALL_DIR='${libdir}'])
  AC_SUBST([XVMC_LIB_INSTALL_DIR])
  
+dnl
+dnl Gallium Tests
+dnl
+if test "x$enable_gallium_tests" = xyes; then
+    SRC_DIRS="$SRC_DIRS gallium/tests/trivial"
+    enable_gallium_loader=yes
+fi
+
  dnl Directory for VDPAU libs
  AC_ARG_WITH([vdpau-libdir],
      [AS_HELP_STRING([--with-vdpau-libdir=DIR],
@@ -1844,6 +1882,14 @@ AC_ARG_WITH([va-libdir],
      [VA_LIB_INSTALL_DIR='${libdir}/va'])
  AC_SUBST([VA_LIB_INSTALL_DIR])
  
+dnl Directory for OpenCL libs
+AC_ARG_WITH([opencl-libdir],
+    [AS_HELP_STRING([--with-opencl-libdir=DIR],
+        [directory for the OpenCL libraries @<:@default=${libdir}/opencl@:>@])],
+    [OPENCL_LIB_INSTALL_DIR="$withval"],
+    [OPENCL_LIB_INSTALL_DIR='${libdir}/opencl'])
+AC_SUBST([OPENCL_LIB_INSTALL_DIR])
+
  dnl
  dnl Gallium helper functions
  dnl
@@ -1884,6 +1930,17 @@ gallium_require_llvm() {
      fi
  }
  
+gallium_require_drm_loader() {
+    if test "x$enable_gallium_loader" = xyes; then
+        PKG_CHECK_MODULES([LIBUDEV], [libudev], [],
+                          AC_MSG_ERROR([Gallium drm loader requrires libudev]))
+        if test "x$have_libdrm" != xyes; then
+            AC_MSG_ERROR([Gallium drm loader requires libdrm >= $LIBDRM_REQUIRED])
+        fi
+        enable_gallium_drm_loader=yes
+    fi
+}
+
  dnl Gallium drivers
  dnl Duplicates in GALLIUM_DRIVERS_DIRS are removed by sorting it after this block
  if test "x$with_gallium_drivers" != x; then
@@ -1911,6 +1968,7 @@ if test "x$with_gallium_drivers" != x; then
              ;;
          xr600)
              PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
+            gallium_require_drm_loader
              GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
              if test "x$enable_r600_llvm" = xyes; then
                  if test "x$LLVM_VERSION" != "x3.1"; then
@@ -1931,6 +1989,7 @@ if test "x$with_gallium_drivers" != x; then
              ;;
          xnouveau)
              PKG_CHECK_MODULES([NOUVEAU], [libdrm_nouveau >= $LIBDRM_NOUVEAU_REQUIRED])
+            gallium_require_drm_loader
              GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nv30 nv50 nvc0"
              gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" "" "xvmc-nouveau" "vdpau-nouveau"
              ;;
@@ -1967,6 +2026,25 @@ if test "x$with_gallium_drivers" != x; then
      done
  fi
  
+if test "x$enable_gallium_loader" = xyes; then
+    GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/null"
+    GALLIUM_PIPE_LOADER_DEFINES="-DHAVE_PIPE_LOADER_SW"
+    GALLIUM_PIPE_LOADER_LIBS="\$(TOP)/src/gallium/auxiliary/pipe-loader/libpipe_loader.a"
+    GALLIUM_PIPE_LOADER_LIBS="$GALLIUM_PIPE_LOADER_LIBS \$(TOP)/src/gallium/winsys/sw/null/libws_null.a"
+
+    if test "x$HAVE_WINSYS_XLIB" = xyes; then
+        GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_XLIB"
+        GALLIUM_PIPE_LOADER_LIBS="$GALLIUM_PIPE_LOADER_LIBS \$(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a"
+    fi
+
+    if test "x$enable_gallium_drm_loader" = xyes; then
+        GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRM"
+    fi
+
+    AC_SUBST([GALLIUM_PIPE_LOADER_DEFINES])
+    AC_SUBST([GALLIUM_PIPE_LOADER_LIBS])
+fi
+
  dnl Tell Automake which drivers to build
  for driver in $GALLIUM_DRIVERS_DIRS; do
      case "x$driver" in
@@ -1990,6 +2068,8 @@ AM_CONDITIONAL(HAVE_IDENTITY_GALLIUM, test x$HAVE_IDENTITY_GALLIUM = xyes)
  AM_CONDITIONAL(HAVE_NOOP_GALLIUM, test x$HAVE_NOOP_GALLIUM = xyes)
  AM_CONDITIONAL(NEED_RADEON_GALLIUM, test x$NEED_RADEON_GALLIUM = xyes)
  AM_CONDITIONAL(USE_R600_LLVM_COMPILER, test x$USE_R600_LLVM_COMPILER = xyes)
+AM_CONDITIONAL(HAVE_LOADER_GALLIUM, test x$enable_gallium_loader = xyes)
+AM_CONDITIONAL(HAVE_DRM_LOADER_GALLIUM, test x$enable_gallium_drm_loader = xyes)
  AC_SUBST([GALLIUM_MAKE_DIRS])
  
  dnl prepend CORE_DIRS to SRC_DIRS
@@ -2005,9 +2085,12 @@ CXXFLAGS="$CXXFLAGS $USER_CXXFLAGS"
  
  dnl Substitute the config
  AC_CONFIG_FILES([configs/autoconf
+               src/gallium/auxiliary/pipe-loader/Makefile
+               src/gallium/state_trackers/clover/Makefile
                 src/gallium/drivers/Makefile
                 src/gallium/drivers/r300/Makefile
                 src/gallium/drivers/r600/Makefile
+               src/gallium/targets/opencl/Makefile
                 src/gbm/Makefile
                 src/gbm/main/gbm.pc
                 src/egl/drivers/Makefile
diff --git a/docs/GL3.txt b/docs/GL3.txt

index 0a860bb3c1d63f209055c075c53d926c02c3c815..5fc6c69ba4b95d02d805d23a09ce7b5be976257d 100644 (file)
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -88,7 +88,7 @@ GL 4.0:
  
  GLSL 4.0                                             not started
  GL_ARB_texture_query_lod                             not started
-GL_ARB_draw_buffers_blend                            DONE (r600, softpipe)
+GL_ARB_draw_buffers_blend                            DONE (i965, r600, softpipe)
  GL_ARB_draw_indirect                                 not started
  GL_ARB_gpu_shader_fp64                               not started
  GL_ARB_sample_shading                                not started
diff --git a/docs/viewperf.html b/docs/viewperf.html

index 2796250462056258859b6a24016b1f77b4f3db7f..af351bc422539a1a74898c9518c765df04084522 100644 (file)
--- a/docs/viewperf.html
+++ b/docs/viewperf.html
@@ -84,6 +84,11 @@ If the Mesa driver doesn't support this extension the rendering will
  be incorrect and the test will fail.
  </p>
  
+<p>
+Also, the color of the line drawings in test 2 seem to appear in a random
+color.  This is probably due to some uninitialized state somewhere.
+</p>
+
  
  
  <h2>sw-02 test 6</h2>
diff --git a/include/CL/cl.h b/include/CL/cl.h

new file mode 100644 (file)

index 0000000..4f21afe
--- /dev/null
+++ b/include/CL/cl.h
@@ -0,0 +1,998 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/* $Revision: 11985 $ on $Date: 2010-07-15 11:16:06 -0700 (Thu, 15 Jul 2010) $ */
+
+#ifndef __OPENCL_CL_H
+#define __OPENCL_CL_H
+
+#ifdef __APPLE__
+#include <OpenCL/cl_platform.h>
+#else
+#include <CL/cl_platform.h>
+#endif 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************/
+
+typedef struct _cl_platform_id *    cl_platform_id;
+typedef struct _cl_device_id *      cl_device_id;
+typedef struct _cl_context *        cl_context;
+typedef struct _cl_command_queue *  cl_command_queue;
+typedef struct _cl_mem *            cl_mem;
+typedef struct _cl_program *        cl_program;
+typedef struct _cl_kernel *         cl_kernel;
+typedef struct _cl_event *          cl_event;
+typedef struct _cl_sampler *        cl_sampler;
+
+typedef cl_uint             cl_bool;                     /* WARNING!  Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ 
+typedef cl_ulong            cl_bitfield;
+typedef cl_bitfield         cl_device_type;
+typedef cl_uint             cl_platform_info;
+typedef cl_uint             cl_device_info;
+typedef cl_bitfield         cl_device_fp_config;
+typedef cl_uint             cl_device_mem_cache_type;
+typedef cl_uint             cl_device_local_mem_type;
+typedef cl_bitfield         cl_device_exec_capabilities;
+typedef cl_bitfield         cl_command_queue_properties;
+
+typedef intptr_t                       cl_context_properties;
+typedef cl_uint             cl_context_info;
+typedef cl_uint             cl_command_queue_info;
+typedef cl_uint             cl_channel_order;
+typedef cl_uint             cl_channel_type;
+typedef cl_bitfield         cl_mem_flags;
+typedef cl_uint             cl_mem_object_type;
+typedef cl_uint             cl_mem_info;
+typedef cl_uint             cl_image_info;
+typedef cl_uint             cl_buffer_create_type;
+typedef cl_uint             cl_addressing_mode;
+typedef cl_uint             cl_filter_mode;
+typedef cl_uint             cl_sampler_info;
+typedef cl_bitfield         cl_map_flags;
+typedef cl_uint             cl_program_info;
+typedef cl_uint             cl_program_build_info;
+typedef cl_int              cl_build_status;
+typedef cl_uint             cl_kernel_info;
+typedef cl_uint             cl_kernel_work_group_info;
+typedef cl_uint             cl_event_info;
+typedef cl_uint             cl_command_type;
+typedef cl_uint             cl_profiling_info;
+
+typedef struct _cl_image_format {
+    cl_channel_order        image_channel_order;
+    cl_channel_type         image_channel_data_type;
+} cl_image_format;
+
+
+typedef struct _cl_buffer_region {
+    size_t                  origin;
+    size_t                  size;
+} cl_buffer_region;
+
+/******************************************************************************/
+
+/* Error Codes */
+#define CL_SUCCESS                                  0
+#define CL_DEVICE_NOT_FOUND                         -1
+#define CL_DEVICE_NOT_AVAILABLE                     -2
+#define CL_COMPILER_NOT_AVAILABLE                   -3
+#define CL_MEM_OBJECT_ALLOCATION_FAILURE            -4
+#define CL_OUT_OF_RESOURCES                         -5
+#define CL_OUT_OF_HOST_MEMORY                       -6
+#define CL_PROFILING_INFO_NOT_AVAILABLE             -7
+#define CL_MEM_COPY_OVERLAP                         -8
+#define CL_IMAGE_FORMAT_MISMATCH                    -9
+#define CL_IMAGE_FORMAT_NOT_SUPPORTED               -10
+#define CL_BUILD_PROGRAM_FAILURE                    -11
+#define CL_MAP_FAILURE                              -12
+#define CL_MISALIGNED_SUB_BUFFER_OFFSET             -13
+#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
+
+#define CL_INVALID_VALUE                            -30
+#define CL_INVALID_DEVICE_TYPE                      -31
+#define CL_INVALID_PLATFORM                         -32
+#define CL_INVALID_DEVICE                           -33
+#define CL_INVALID_CONTEXT                          -34
+#define CL_INVALID_QUEUE_PROPERTIES                 -35
+#define CL_INVALID_COMMAND_QUEUE                    -36
+#define CL_INVALID_HOST_PTR                         -37
+#define CL_INVALID_MEM_OBJECT                       -38
+#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR          -39
+#define CL_INVALID_IMAGE_SIZE                       -40
+#define CL_INVALID_SAMPLER                          -41
+#define CL_INVALID_BINARY                           -42
+#define CL_INVALID_BUILD_OPTIONS                    -43
+#define CL_INVALID_PROGRAM                          -44
+#define CL_INVALID_PROGRAM_EXECUTABLE               -45
+#define CL_INVALID_KERNEL_NAME                      -46
+#define CL_INVALID_KERNEL_DEFINITION                -47
+#define CL_INVALID_KERNEL                           -48
+#define CL_INVALID_ARG_INDEX                        -49
+#define CL_INVALID_ARG_VALUE                        -50
+#define CL_INVALID_ARG_SIZE                         -51
+#define CL_INVALID_KERNEL_ARGS                      -52
+#define CL_INVALID_WORK_DIMENSION                   -53
+#define CL_INVALID_WORK_GROUP_SIZE                  -54
+#define CL_INVALID_WORK_ITEM_SIZE                   -55
+#define CL_INVALID_GLOBAL_OFFSET                    -56
+#define CL_INVALID_EVENT_WAIT_LIST                  -57
+#define CL_INVALID_EVENT                            -58
+#define CL_INVALID_OPERATION                        -59
+#define CL_INVALID_GL_OBJECT                        -60
+#define CL_INVALID_BUFFER_SIZE                      -61
+#define CL_INVALID_MIP_LEVEL                        -62
+#define CL_INVALID_GLOBAL_WORK_SIZE                 -63
+#define CL_INVALID_PROPERTY                         -64
+
+/* OpenCL Version */
+#define CL_VERSION_1_0                              1
+#define CL_VERSION_1_1                              1
+
+/* cl_bool */
+#define CL_FALSE                                    0
+#define CL_TRUE                                     1
+
+/* cl_platform_info */
+#define CL_PLATFORM_PROFILE                         0x0900
+#define CL_PLATFORM_VERSION                         0x0901
+#define CL_PLATFORM_NAME                            0x0902
+#define CL_PLATFORM_VENDOR                          0x0903
+#define CL_PLATFORM_EXTENSIONS                      0x0904
+
+/* cl_device_type - bitfield */
+#define CL_DEVICE_TYPE_DEFAULT                      (1 << 0)
+#define CL_DEVICE_TYPE_CPU                          (1 << 1)
+#define CL_DEVICE_TYPE_GPU                          (1 << 2)
+#define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
+#define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
+
+/* cl_device_info */
+#define CL_DEVICE_TYPE                              0x1000
+#define CL_DEVICE_VENDOR_ID                         0x1001
+#define CL_DEVICE_MAX_COMPUTE_UNITS                 0x1002
+#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS          0x1003
+#define CL_DEVICE_MAX_WORK_GROUP_SIZE               0x1004
+#define CL_DEVICE_MAX_WORK_ITEM_SIZES               0x1005
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR       0x1006
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT      0x1007
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT        0x1008
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG       0x1009
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT      0x100A
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE     0x100B
+#define CL_DEVICE_MAX_CLOCK_FREQUENCY               0x100C
+#define CL_DEVICE_ADDRESS_BITS                      0x100D
+#define CL_DEVICE_MAX_READ_IMAGE_ARGS               0x100E
+#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS              0x100F
+#define CL_DEVICE_MAX_MEM_ALLOC_SIZE                0x1010
+#define CL_DEVICE_IMAGE2D_MAX_WIDTH                 0x1011
+#define CL_DEVICE_IMAGE2D_MAX_HEIGHT                0x1012
+#define CL_DEVICE_IMAGE3D_MAX_WIDTH                 0x1013
+#define CL_DEVICE_IMAGE3D_MAX_HEIGHT                0x1014
+#define CL_DEVICE_IMAGE3D_MAX_DEPTH                 0x1015
+#define CL_DEVICE_IMAGE_SUPPORT                     0x1016
+#define CL_DEVICE_MAX_PARAMETER_SIZE                0x1017
+#define CL_DEVICE_MAX_SAMPLERS                      0x1018
+#define CL_DEVICE_MEM_BASE_ADDR_ALIGN               0x1019
+#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE          0x101A
+#define CL_DEVICE_SINGLE_FP_CONFIG                  0x101B
+#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE             0x101C
+#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE         0x101D
+#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE             0x101E
+#define CL_DEVICE_GLOBAL_MEM_SIZE                   0x101F
+#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE          0x1020
+#define CL_DEVICE_MAX_CONSTANT_ARGS                 0x1021
+#define CL_DEVICE_LOCAL_MEM_TYPE                    0x1022
+#define CL_DEVICE_LOCAL_MEM_SIZE                    0x1023
+#define CL_DEVICE_ERROR_CORRECTION_SUPPORT          0x1024
+#define CL_DEVICE_PROFILING_TIMER_RESOLUTION        0x1025
+#define CL_DEVICE_ENDIAN_LITTLE                     0x1026
+#define CL_DEVICE_AVAILABLE                         0x1027
+#define CL_DEVICE_COMPILER_AVAILABLE                0x1028
+#define CL_DEVICE_EXECUTION_CAPABILITIES            0x1029
+#define CL_DEVICE_QUEUE_PROPERTIES                  0x102A
+#define CL_DEVICE_NAME                              0x102B
+#define CL_DEVICE_VENDOR                            0x102C
+#define CL_DRIVER_VERSION                           0x102D
+#define CL_DEVICE_PROFILE                           0x102E
+#define CL_DEVICE_VERSION                           0x102F
+#define CL_DEVICE_EXTENSIONS                        0x1030
+#define CL_DEVICE_PLATFORM                          0x1031
+/* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */
+/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF       0x1034
+#define CL_DEVICE_HOST_UNIFIED_MEMORY               0x1035
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR          0x1036
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT         0x1037
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT           0x1038
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG          0x1039
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT         0x103A
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE        0x103B
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF          0x103C
+#define CL_DEVICE_OPENCL_C_VERSION                  0x103D
+
+/* cl_device_fp_config - bitfield */
+#define CL_FP_DENORM                                (1 << 0)
+#define CL_FP_INF_NAN                               (1 << 1)
+#define CL_FP_ROUND_TO_NEAREST                      (1 << 2)
+#define CL_FP_ROUND_TO_ZERO                         (1 << 3)
+#define CL_FP_ROUND_TO_INF                          (1 << 4)
+#define CL_FP_FMA                                   (1 << 5)
+#define CL_FP_SOFT_FLOAT                            (1 << 6)
+
+/* cl_device_mem_cache_type */
+#define CL_NONE                                     0x0
+#define CL_READ_ONLY_CACHE                          0x1
+#define CL_READ_WRITE_CACHE                         0x2
+
+/* cl_device_local_mem_type */
+#define CL_LOCAL                                    0x1
+#define CL_GLOBAL                                   0x2
+
+/* cl_device_exec_capabilities - bitfield */
+#define CL_EXEC_KERNEL                              (1 << 0)
+#define CL_EXEC_NATIVE_KERNEL                       (1 << 1)
+
+/* cl_command_queue_properties - bitfield */
+#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE      (1 << 0)
+#define CL_QUEUE_PROFILING_ENABLE                   (1 << 1)
+
+/* cl_context_info  */
+#define CL_CONTEXT_REFERENCE_COUNT                  0x1080
+#define CL_CONTEXT_DEVICES                          0x1081
+#define CL_CONTEXT_PROPERTIES                       0x1082
+#define CL_CONTEXT_NUM_DEVICES                      0x1083
+
+/* cl_context_info + cl_context_properties */
+#define CL_CONTEXT_PLATFORM                         0x1084
+
+/* cl_command_queue_info */
+#define CL_QUEUE_CONTEXT                            0x1090
+#define CL_QUEUE_DEVICE                             0x1091
+#define CL_QUEUE_REFERENCE_COUNT                    0x1092
+#define CL_QUEUE_PROPERTIES                         0x1093
+
+/* cl_mem_flags - bitfield */
+#define CL_MEM_READ_WRITE                           (1 << 0)
+#define CL_MEM_WRITE_ONLY                           (1 << 1)
+#define CL_MEM_READ_ONLY                            (1 << 2)
+#define CL_MEM_USE_HOST_PTR                         (1 << 3)
+#define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
+#define CL_MEM_COPY_HOST_PTR                        (1 << 5)
+
+/* cl_channel_order */
+#define CL_R                                        0x10B0
+#define CL_A                                        0x10B1
+#define CL_RG                                       0x10B2
+#define CL_RA                                       0x10B3
+#define CL_RGB                                      0x10B4
+#define CL_RGBA                                     0x10B5
+#define CL_BGRA                                     0x10B6
+#define CL_ARGB                                     0x10B7
+#define CL_INTENSITY                                0x10B8
+#define CL_LUMINANCE                                0x10B9
+#define CL_Rx                                       0x10BA
+#define CL_RGx                                      0x10BB
+#define CL_RGBx                                     0x10BC
+
+/* cl_channel_type */
+#define CL_SNORM_INT8                               0x10D0
+#define CL_SNORM_INT16                              0x10D1
+#define CL_UNORM_INT8                               0x10D2
+#define CL_UNORM_INT16                              0x10D3
+#define CL_UNORM_SHORT_565                          0x10D4
+#define CL_UNORM_SHORT_555                          0x10D5
+#define CL_UNORM_INT_101010                         0x10D6
+#define CL_SIGNED_INT8                              0x10D7
+#define CL_SIGNED_INT16                             0x10D8
+#define CL_SIGNED_INT32                             0x10D9
+#define CL_UNSIGNED_INT8                            0x10DA
+#define CL_UNSIGNED_INT16                           0x10DB
+#define CL_UNSIGNED_INT32                           0x10DC
+#define CL_HALF_FLOAT                               0x10DD
+#define CL_FLOAT                                    0x10DE
+
+/* cl_mem_object_type */
+#define CL_MEM_OBJECT_BUFFER                        0x10F0
+#define CL_MEM_OBJECT_IMAGE2D                       0x10F1
+#define CL_MEM_OBJECT_IMAGE3D                       0x10F2
+
+/* cl_mem_info */
+#define CL_MEM_TYPE                                 0x1100
+#define CL_MEM_FLAGS                                0x1101
+#define CL_MEM_SIZE                                 0x1102
+#define CL_MEM_HOST_PTR                             0x1103
+#define CL_MEM_MAP_COUNT                            0x1104
+#define CL_MEM_REFERENCE_COUNT                      0x1105
+#define CL_MEM_CONTEXT                              0x1106
+#define CL_MEM_ASSOCIATED_MEMOBJECT                 0x1107
+#define CL_MEM_OFFSET                               0x1108
+
+/* cl_image_info */
+#define CL_IMAGE_FORMAT                             0x1110
+#define CL_IMAGE_ELEMENT_SIZE                       0x1111
+#define CL_IMAGE_ROW_PITCH                          0x1112
+#define CL_IMAGE_SLICE_PITCH                        0x1113
+#define CL_IMAGE_WIDTH                              0x1114
+#define CL_IMAGE_HEIGHT                             0x1115
+#define CL_IMAGE_DEPTH                              0x1116
+
+/* cl_addressing_mode */
+#define CL_ADDRESS_NONE                             0x1130
+#define CL_ADDRESS_CLAMP_TO_EDGE                    0x1131
+#define CL_ADDRESS_CLAMP                            0x1132
+#define CL_ADDRESS_REPEAT                           0x1133
+#define CL_ADDRESS_MIRRORED_REPEAT                  0x1134
+
+/* cl_filter_mode */
+#define CL_FILTER_NEAREST                           0x1140
+#define CL_FILTER_LINEAR                            0x1141
+
+/* cl_sampler_info */
+#define CL_SAMPLER_REFERENCE_COUNT                  0x1150
+#define CL_SAMPLER_CONTEXT                          0x1151
+#define CL_SAMPLER_NORMALIZED_COORDS                0x1152
+#define CL_SAMPLER_ADDRESSING_MODE                  0x1153
+#define CL_SAMPLER_FILTER_MODE                      0x1154
+
+/* cl_map_flags - bitfield */
+#define CL_MAP_READ                                 (1 << 0)
+#define CL_MAP_WRITE                                (1 << 1)
+
+/* cl_program_info */
+#define CL_PROGRAM_REFERENCE_COUNT                  0x1160
+#define CL_PROGRAM_CONTEXT                          0x1161
+#define CL_PROGRAM_NUM_DEVICES                      0x1162
+#define CL_PROGRAM_DEVICES                          0x1163
+#define CL_PROGRAM_SOURCE                           0x1164
+#define CL_PROGRAM_BINARY_SIZES                     0x1165
+#define CL_PROGRAM_BINARIES                         0x1166
+
+/* cl_program_build_info */
+#define CL_PROGRAM_BUILD_STATUS                     0x1181
+#define CL_PROGRAM_BUILD_OPTIONS                    0x1182
+#define CL_PROGRAM_BUILD_LOG                        0x1183
+
+/* cl_build_status */
+#define CL_BUILD_SUCCESS                            0
+#define CL_BUILD_NONE                               -1
+#define CL_BUILD_ERROR                              -2
+#define CL_BUILD_IN_PROGRESS                        -3
+
+/* cl_kernel_info */
+#define CL_KERNEL_FUNCTION_NAME                     0x1190
+#define CL_KERNEL_NUM_ARGS                          0x1191
+#define CL_KERNEL_REFERENCE_COUNT                   0x1192
+#define CL_KERNEL_CONTEXT                           0x1193
+#define CL_KERNEL_PROGRAM                           0x1194
+
+/* cl_kernel_work_group_info */
+#define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
+#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE           0x11B1
+#define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
+#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
+#define CL_KERNEL_PRIVATE_MEM_SIZE                  0x11B4
+
+/* cl_event_info  */
+#define CL_EVENT_COMMAND_QUEUE                      0x11D0
+#define CL_EVENT_COMMAND_TYPE                       0x11D1
+#define CL_EVENT_REFERENCE_COUNT                    0x11D2
+#define CL_EVENT_COMMAND_EXECUTION_STATUS           0x11D3
+#define CL_EVENT_CONTEXT                            0x11D4
+
+/* cl_command_type */
+#define CL_COMMAND_NDRANGE_KERNEL                   0x11F0
+#define CL_COMMAND_TASK                             0x11F1
+#define CL_COMMAND_NATIVE_KERNEL                    0x11F2
+#define CL_COMMAND_READ_BUFFER                      0x11F3
+#define CL_COMMAND_WRITE_BUFFER                     0x11F4
+#define CL_COMMAND_COPY_BUFFER                      0x11F5
+#define CL_COMMAND_READ_IMAGE                       0x11F6
+#define CL_COMMAND_WRITE_IMAGE                      0x11F7
+#define CL_COMMAND_COPY_IMAGE                       0x11F8
+#define CL_COMMAND_COPY_IMAGE_TO_BUFFER             0x11F9
+#define CL_COMMAND_COPY_BUFFER_TO_IMAGE             0x11FA
+#define CL_COMMAND_MAP_BUFFER                       0x11FB
+#define CL_COMMAND_MAP_IMAGE                        0x11FC
+#define CL_COMMAND_UNMAP_MEM_OBJECT                 0x11FD
+#define CL_COMMAND_MARKER                           0x11FE
+#define CL_COMMAND_ACQUIRE_GL_OBJECTS               0x11FF
+#define CL_COMMAND_RELEASE_GL_OBJECTS               0x1200
+#define CL_COMMAND_READ_BUFFER_RECT                 0x1201
+#define CL_COMMAND_WRITE_BUFFER_RECT                0x1202
+#define CL_COMMAND_COPY_BUFFER_RECT                 0x1203
+#define CL_COMMAND_USER                             0x1204
+
+/* command execution status */
+#define CL_COMPLETE                                 0x0
+#define CL_RUNNING                                  0x1
+#define CL_SUBMITTED                                0x2
+#define CL_QUEUED                                   0x3
+  
+/* cl_buffer_create_type  */
+#define CL_BUFFER_CREATE_TYPE_REGION                0x1220
+
+/* cl_profiling_info  */
+#define CL_PROFILING_COMMAND_QUEUED                 0x1280
+#define CL_PROFILING_COMMAND_SUBMIT                 0x1281
+#define CL_PROFILING_COMMAND_START                  0x1282
+#define CL_PROFILING_COMMAND_END                    0x1283
+
+/********************************************************************************************************/
+
+/* Platform API */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetPlatformIDs(cl_uint          /* num_entries */,
+                 cl_platform_id * /* platforms */,
+                 cl_uint *        /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL 
+clGetPlatformInfo(cl_platform_id   /* platform */, 
+                  cl_platform_info /* param_name */,
+                  size_t           /* param_value_size */, 
+                  void *           /* param_value */,
+                  size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Device APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDs(cl_platform_id   /* platform */,
+               cl_device_type   /* device_type */, 
+               cl_uint          /* num_entries */, 
+               cl_device_id *   /* devices */, 
+               cl_uint *        /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceInfo(cl_device_id    /* device */,
+                cl_device_info  /* param_name */, 
+                size_t          /* param_value_size */, 
+                void *          /* param_value */,
+                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Context APIs  */
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContext(const cl_context_properties * /* properties */,
+                cl_uint                       /* num_devices */,
+                const cl_device_id *          /* devices */,
+                void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
+                void *                        /* user_data */,
+                cl_int *                      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContextFromType(const cl_context_properties * /* properties */,
+                        cl_device_type                /* device_type */,
+                        void (CL_CALLBACK *     /* pfn_notify*/ )(const char *, const void *, size_t, void *),
+                        void *                        /* user_data */,
+                        cl_int *                      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetContextInfo(cl_context         /* context */, 
+                 cl_context_info    /* param_name */, 
+                 size_t             /* param_value_size */, 
+                 void *             /* param_value */, 
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Command Queue APIs */
+extern CL_API_ENTRY cl_command_queue CL_API_CALL
+clCreateCommandQueue(cl_context                     /* context */, 
+                     cl_device_id                   /* device */, 
+                     cl_command_queue_properties    /* properties */,
+                     cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetCommandQueueInfo(cl_command_queue      /* command_queue */,
+                      cl_command_queue_info /* param_name */,
+                      size_t                /* param_value_size */,
+                      void *                /* param_value */,
+                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
+#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
+/* 
+ *  WARNING:
+ *     This API introduces mutable state into the OpenCL implementation. It has been REMOVED
+ *  to better facilitate thread safety.  The 1.0 API is not thread safe. It is not tested by the
+ *  OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
+ *  It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
+ *
+ *  Software developers previously relying on this API are instructed to set the command queue 
+ *  properties when creating the queue, instead. 
+ */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetCommandQueueProperty(cl_command_queue              /* command_queue */,
+                          cl_command_queue_properties   /* properties */, 
+                          cl_bool                        /* enable */,
+                          cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED;
+#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
+
+/* Memory Object APIs */
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateBuffer(cl_context   /* context */,
+               cl_mem_flags /* flags */,
+               size_t       /* size */,
+               void *       /* host_ptr */,
+               cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateSubBuffer(cl_mem                   /* buffer */,
+                  cl_mem_flags             /* flags */,
+                  cl_buffer_create_type    /* buffer_create_type */,
+                  const void *             /* buffer_create_info */,
+                  cl_int *                 /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImage2D(cl_context              /* context */,
+                cl_mem_flags            /* flags */,
+                const cl_image_format * /* image_format */,
+                size_t                  /* image_width */,
+                size_t                  /* image_height */,
+                size_t                  /* image_row_pitch */, 
+                void *                  /* host_ptr */,
+                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+                        
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImage3D(cl_context              /* context */,
+                cl_mem_flags            /* flags */,
+                const cl_image_format * /* image_format */,
+                size_t                  /* image_width */, 
+                size_t                  /* image_height */,
+                size_t                  /* image_depth */, 
+                size_t                  /* image_row_pitch */, 
+                size_t                  /* image_slice_pitch */, 
+                void *                  /* host_ptr */,
+                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+                        
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedImageFormats(cl_context           /* context */,
+                           cl_mem_flags         /* flags */,
+                           cl_mem_object_type   /* image_type */,
+                           cl_uint              /* num_entries */,
+                           cl_image_format *    /* image_formats */,
+                           cl_uint *            /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
+                                    
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetMemObjectInfo(cl_mem           /* memobj */,
+                   cl_mem_info      /* param_name */, 
+                   size_t           /* param_value_size */,
+                   void *           /* param_value */,
+                   size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetImageInfo(cl_mem           /* image */,
+               cl_image_info    /* param_name */, 
+               size_t           /* param_value_size */,
+               void *           /* param_value */,
+               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetMemObjectDestructorCallback(  cl_mem /* memobj */, 
+                                    void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
+                                    void * /*user_data */ )             CL_API_SUFFIX__VERSION_1_1;  
+
+/* Sampler APIs  */
+extern CL_API_ENTRY cl_sampler CL_API_CALL
+clCreateSampler(cl_context          /* context */,
+                cl_bool             /* normalized_coords */, 
+                cl_addressing_mode  /* addressing_mode */, 
+                cl_filter_mode      /* filter_mode */,
+                cl_int *            /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSamplerInfo(cl_sampler         /* sampler */,
+                 cl_sampler_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+                            
+/* Program Object APIs  */
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithSource(cl_context        /* context */,
+                          cl_uint           /* count */,
+                          const char **     /* strings */,
+                          const size_t *    /* lengths */,
+                          cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithBinary(cl_context                     /* context */,
+                          cl_uint                        /* num_devices */,
+                          const cl_device_id *           /* device_list */,
+                          const size_t *                 /* lengths */,
+                          const unsigned char **         /* binaries */,
+                          cl_int *                       /* binary_status */,
+                          cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clBuildProgram(cl_program           /* program */,
+               cl_uint              /* num_devices */,
+               const cl_device_id * /* device_list */,
+               const char *         /* options */, 
+               void (CL_CALLBACK *  /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
+               void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramInfo(cl_program         /* program */,
+                 cl_program_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramBuildInfo(cl_program            /* program */,
+                      cl_device_id          /* device */,
+                      cl_program_build_info /* param_name */,
+                      size_t                /* param_value_size */,
+                      void *                /* param_value */,
+                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+                            
+/* Kernel Object APIs */
+extern CL_API_ENTRY cl_kernel CL_API_CALL
+clCreateKernel(cl_program      /* program */,
+               const char *    /* kernel_name */,
+               cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateKernelsInProgram(cl_program     /* program */,
+                         cl_uint        /* num_kernels */,
+                         cl_kernel *    /* kernels */,
+                         cl_uint *      /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainKernel(cl_kernel    /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseKernel(cl_kernel   /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetKernelArg(cl_kernel    /* kernel */,
+               cl_uint      /* arg_index */,
+               size_t       /* arg_size */,
+               const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelInfo(cl_kernel       /* kernel */,
+                cl_kernel_info  /* param_name */,
+                size_t          /* param_value_size */,
+                void *          /* param_value */,
+                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelWorkGroupInfo(cl_kernel                  /* kernel */,
+                         cl_device_id               /* device */,
+                         cl_kernel_work_group_info  /* param_name */,
+                         size_t                     /* param_value_size */,
+                         void *                     /* param_value */,
+                         size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Event Object APIs  */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clWaitForEvents(cl_uint             /* num_events */,
+                const cl_event *    /* event_list */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventInfo(cl_event         /* event */,
+               cl_event_info    /* param_name */,
+               size_t           /* param_value_size */,
+               void *           /* param_value */,
+               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+                            
+extern CL_API_ENTRY cl_event CL_API_CALL
+clCreateUserEvent(cl_context    /* context */,
+                  cl_int *      /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;               
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetUserEventStatus(cl_event   /* event */,
+                     cl_int     /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
+                     
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetEventCallback( cl_event    /* event */,
+                    cl_int      /* command_exec_callback_type */,
+                    void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
+                    void *      /* user_data */) CL_API_SUFFIX__VERSION_1_1;
+
+/* Profiling APIs  */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventProfilingInfo(cl_event            /* event */,
+                        cl_profiling_info   /* param_name */,
+                        size_t              /* param_value_size */,
+                        void *              /* param_value */,
+                        size_t *            /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+                                
+/* Flush and Finish APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Enqueued Commands APIs */
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBuffer(cl_command_queue    /* command_queue */,
+                    cl_mem              /* buffer */,
+                    cl_bool             /* blocking_read */,
+                    size_t              /* offset */,
+                    size_t              /* cb */, 
+                    void *              /* ptr */,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBufferRect(cl_command_queue    /* command_queue */,
+                        cl_mem              /* buffer */,
+                        cl_bool             /* blocking_read */,
+                        const size_t *      /* buffer_origin */,
+                        const size_t *      /* host_origin */, 
+                        const size_t *      /* region */,
+                        size_t              /* buffer_row_pitch */,
+                        size_t              /* buffer_slice_pitch */,
+                        size_t              /* host_row_pitch */,
+                        size_t              /* host_slice_pitch */,                        
+                        void *              /* ptr */,
+                        cl_uint             /* num_events_in_wait_list */,
+                        const cl_event *    /* event_wait_list */,
+                        cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBuffer(cl_command_queue   /* command_queue */, 
+                     cl_mem             /* buffer */, 
+                     cl_bool            /* blocking_write */, 
+                     size_t             /* offset */, 
+                     size_t             /* cb */, 
+                     const void *       /* ptr */, 
+                     cl_uint            /* num_events_in_wait_list */, 
+                     const cl_event *   /* event_wait_list */, 
+                     cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_0;
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBufferRect(cl_command_queue    /* command_queue */,
+                         cl_mem              /* buffer */,
+                         cl_bool             /* blocking_write */,
+                         const size_t *      /* buffer_origin */,
+                         const size_t *      /* host_origin */, 
+                         const size_t *      /* region */,
+                         size_t              /* buffer_row_pitch */,
+                         size_t              /* buffer_slice_pitch */,
+                         size_t              /* host_row_pitch */,
+                         size_t              /* host_slice_pitch */,                        
+                         const void *        /* ptr */,
+                         cl_uint             /* num_events_in_wait_list */,
+                         const cl_event *    /* event_wait_list */,
+                         cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBuffer(cl_command_queue    /* command_queue */, 
+                    cl_mem              /* src_buffer */,
+                    cl_mem              /* dst_buffer */, 
+                    size_t              /* src_offset */,
+                    size_t              /* dst_offset */,
+                    size_t              /* cb */, 
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferRect(cl_command_queue    /* command_queue */, 
+                        cl_mem              /* src_buffer */,
+                        cl_mem              /* dst_buffer */, 
+                        const size_t *      /* src_origin */,
+                        const size_t *      /* dst_origin */,
+                        const size_t *      /* region */, 
+                        size_t              /* src_row_pitch */,
+                        size_t              /* src_slice_pitch */,
+                        size_t              /* dst_row_pitch */,
+                        size_t              /* dst_slice_pitch */,
+                        cl_uint             /* num_events_in_wait_list */,
+                        const cl_event *    /* event_wait_list */,
+                        cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_1;
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadImage(cl_command_queue     /* command_queue */,
+                   cl_mem               /* image */,
+                   cl_bool              /* blocking_read */, 
+                   const size_t *       /* origin[3] */,
+                   const size_t *       /* region[3] */,
+                   size_t               /* row_pitch */,
+                   size_t               /* slice_pitch */, 
+                   void *               /* ptr */,
+                   cl_uint              /* num_events_in_wait_list */,
+                   const cl_event *     /* event_wait_list */,
+                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteImage(cl_command_queue    /* command_queue */,
+                    cl_mem              /* image */,
+                    cl_bool             /* blocking_write */, 
+                    const size_t *      /* origin[3] */,
+                    const size_t *      /* region[3] */,
+                    size_t              /* input_row_pitch */,
+                    size_t              /* input_slice_pitch */, 
+                    const void *        /* ptr */,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImage(cl_command_queue     /* command_queue */,
+                   cl_mem               /* src_image */,
+                   cl_mem               /* dst_image */, 
+                   const size_t *       /* src_origin[3] */,
+                   const size_t *       /* dst_origin[3] */,
+                   const size_t *       /* region[3] */, 
+                   cl_uint              /* num_events_in_wait_list */,
+                   const cl_event *     /* event_wait_list */,
+                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
+                           cl_mem           /* src_image */,
+                           cl_mem           /* dst_buffer */, 
+                           const size_t *   /* src_origin[3] */,
+                           const size_t *   /* region[3] */, 
+                           size_t           /* dst_offset */,
+                           cl_uint          /* num_events_in_wait_list */,
+                           const cl_event * /* event_wait_list */,
+                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
+                           cl_mem           /* src_buffer */,
+                           cl_mem           /* dst_image */, 
+                           size_t           /* src_offset */,
+                           const size_t *   /* dst_origin[3] */,
+                           const size_t *   /* region[3] */, 
+                           cl_uint          /* num_events_in_wait_list */,
+                           const cl_event * /* event_wait_list */,
+                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapBuffer(cl_command_queue /* command_queue */,
+                   cl_mem           /* buffer */,
+                   cl_bool          /* blocking_map */, 
+                   cl_map_flags     /* map_flags */,
+                   size_t           /* offset */,
+                   size_t           /* cb */,
+                   cl_uint          /* num_events_in_wait_list */,
+                   const cl_event * /* event_wait_list */,
+                   cl_event *       /* event */,
+                   cl_int *         /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapImage(cl_command_queue  /* command_queue */,
+                  cl_mem            /* image */, 
+                  cl_bool           /* blocking_map */, 
+                  cl_map_flags      /* map_flags */, 
+                  const size_t *    /* origin[3] */,
+                  const size_t *    /* region[3] */,
+                  size_t *          /* image_row_pitch */,
+                  size_t *          /* image_slice_pitch */,
+                  cl_uint           /* num_events_in_wait_list */,
+                  const cl_event *  /* event_wait_list */,
+                  cl_event *        /* event */,
+                  cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
+                        cl_mem           /* memobj */,
+                        void *           /* mapped_ptr */,
+                        cl_uint          /* num_events_in_wait_list */,
+                        const cl_event *  /* event_wait_list */,
+                        cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
+                       cl_kernel        /* kernel */,
+                       cl_uint          /* work_dim */,
+                       const size_t *   /* global_work_offset */,
+                       const size_t *   /* global_work_size */,
+                       const size_t *   /* local_work_size */,
+                       cl_uint          /* num_events_in_wait_list */,
+                       const cl_event * /* event_wait_list */,
+                       cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueTask(cl_command_queue  /* command_queue */,
+              cl_kernel         /* kernel */,
+              cl_uint           /* num_events_in_wait_list */,
+              const cl_event *  /* event_wait_list */,
+              cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNativeKernel(cl_command_queue  /* command_queue */,
+                                         void (*user_func)(void *), 
+                      void *            /* args */,
+                      size_t            /* cb_args */, 
+                      cl_uint           /* num_mem_objects */,
+                      const cl_mem *    /* mem_list */,
+                      const void **     /* args_mem_loc */,
+                      cl_uint           /* num_events_in_wait_list */,
+                      const cl_event *  /* event_wait_list */,
+                      cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMarker(cl_command_queue    /* command_queue */,
+                cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
+                       cl_uint          /* num_events */,
+                       const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+/* Extension function access
+ *
+ * Returns the extension function address for the given function name,
+ * or NULL if a valid function can not be found.  The client must
+ * check to make sure the address is not NULL, before using or 
+ * calling the returned function address.
+ */
+extern CL_API_ENTRY void * CL_API_CALL clGetExtensionFunctionAddress(const char * /* func_name */) CL_API_SUFFIX__VERSION_1_0;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __OPENCL_CL_H */
+
diff --git a/include/CL/cl.hpp b/include/CL/cl.hpp

new file mode 100644 (file)

index 0000000..99b86a6
--- /dev/null
+++ b/include/CL/cl.hpp
@@ -0,0 +1,4011 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/*! \file
+ *
+ *   \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33)    
+ *   \author Benedict R. Gaster and Laurent Morichetti
+ *   
+ *   Additions and fixes from Brian Cole, March 3rd 2010.
+ *   
+ *   \version 1.1
+ *   \date June 2010
+ *
+ *   Optional extension support
+ *
+ *         cl
+ *         cl_ext_device_fission
+ *                             #define USE_CL_DEVICE_FISSION
+ */
+
+/*! \mainpage
+ * \section intro Introduction
+ * For many large applications C++ is the language of choice and so it seems
+ * reasonable to define C++ bindings for OpenCL.
+ *
+ *
+ * The interface is contained with a single C++ header file \em cl.hpp and all
+ * definitions are contained within the namespace \em cl. There is no additional
+ * requirement to include \em cl.h and to use either the C++ or original C
+ * bindings it is enough to simply include \em cl.hpp.
+ *
+ * The bindings themselves are lightweight and correspond closely to the
+ * underlying C API. Using the C++ bindings introduces no additional execution
+ * overhead.
+ *
+ * For detail documentation on the bindings see:
+ *
+ * The OpenCL C++ Wrapper API 1.1 (revision 04)
+ *  http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf
+ *
+ * \section example Example
+ *
+ * The following example shows a general use case for the C++
+ * bindings, including support for the optional exception feature and
+ * also the supplied vector and string classes, see following sections for
+ * decriptions of these features.
+ *
+ * \code
+ * #define __CL_ENABLE_EXCEPTIONS
+ * 
+ * #if defined(__APPLE__) || defined(__MACOSX)
+ * #include <OpenCL/cl.hpp>
+ * #else
+ * #include <CL/cl.hpp>
+ * #endif
+ * #include <cstdio>
+ * #include <cstdlib>
+ * #include <iostream>
+ * 
+ *  const char * helloStr  = "__kernel void "
+ *                           "hello(void) "
+ *                           "{ "
+ *                           "  "
+ *                           "} ";
+ * 
+ *  int
+ *  main(void)
+ *  {
+ *     cl_int err = CL_SUCCESS;
+ *     try {
+ *
+ *       std::vector<cl::Platform> platforms;
+ *       cl::Platform::get(&platforms);
+ *       if (platforms.size() == 0) {
+ *           std::cout << "Platform size 0\n";
+ *           return -1;
+ *       }
+ *
+ *       cl_context_properties properties[] = 
+ *          { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0};
+ *       cl::Context context(CL_DEVICE_TYPE_CPU, properties); 
+ * 
+ *       std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
+ * 
+ *       cl::Program::Sources source(1,
+ *           std::make_pair(helloStr,strlen(helloStr)));
+ *       cl::Program program_ = cl::Program(context, source);
+ *       program_.build(devices);
+ * 
+ *       cl::Kernel kernel(program_, "hello", &err);
+ * 
+ *       cl::Event event;
+ *       cl::CommandQueue queue(context, devices[0], 0, &err);
+ *       queue.enqueueNDRangeKernel(
+ *           kernel, 
+ *           cl::NullRange, 
+ *           cl::NDRange(4,4),
+ *           cl::NullRange,
+ *           NULL,
+ *           &event); 
+ * 
+ *       event.wait();
+ *     }
+ *     catch (cl::Error err) {
+ *        std::cerr 
+ *           << "ERROR: "
+ *           << err.what()
+ *           << "("
+ *           << err.err()
+ *           << ")"
+ *           << std::endl;
+ *     }
+ * 
+ *    return EXIT_SUCCESS;
+ *  }
+ * 
+ * \endcode
+ *
+ */
+#ifndef CL_HPP_
+#define CL_HPP_
+
+#ifdef _WIN32
+#include <windows.h>
+#include <malloc.h>
+#if defined(USE_DX_INTEROP)
+#include <CL/cl_d3d10.h>
+#endif
+#endif // _WIN32
+
+// 
+#if defined(USE_CL_DEVICE_FISSION)
+#include <CL/cl_ext.h>
+#endif
+
+#if defined(__APPLE__) || defined(__MACOSX)
+#include <OpenGL/OpenGL.h>
+#include <OpenCL/opencl.h>
+#else
+#include <GL/gl.h>
+#include <CL/opencl.h>
+#endif // !__APPLE__
+
+#if !defined(CL_CALLBACK)
+#define CL_CALLBACK
+#endif //CL_CALLBACK
+
+#include <utility>
+
+#if !defined(__NO_STD_VECTOR)
+#include <vector>
+#endif
+
+#if !defined(__NO_STD_STRING)
+#include <string>
+#endif 
+
+#if defined(linux) || defined(__APPLE__) || defined(__MACOSX)
+# include <alloca.h>
+#endif // linux
+
+#include <cstring>
+
+/*! \namespace cl
+ *
+ * \brief The OpenCL C++ bindings are defined within this namespace.
+ *
+ */
+namespace cl {
+
+#define __INIT_CL_EXT_FCN_PTR(name) \
+    if(!pfn_##name) { \
+        pfn_##name = (PFN_##name) \
+            clGetExtensionFunctionAddress(#name); \
+        if(!pfn_##name) { \
+        } \
+    }
+
+class Program;
+class Device;
+class Context;
+class CommandQueue;
+class Memory;
+
+#if defined(__CL_ENABLE_EXCEPTIONS)
+#include <exception>
+/*! \class Error
+ * \brief Exception class
+ */
+class Error : public std::exception
+{
+private:
+    cl_int err_;
+    const char * errStr_;
+public:
+    /*! Create a new CL error exception for a given error code
+     *  and corresponding message.
+     */
+    Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr)
+    {}
+
+    ~Error() throw() {}
+
+    /*! \brief Get error string associated with exception
+     *
+     * \return A memory pointer to the error message string.
+     */
+    virtual const char * what() const throw ()
+    {
+        if (errStr_ == NULL) {
+            return "empty";
+        }
+        else {
+            return errStr_;
+        }
+    }
+
+    /*! \brief Get error code associated with exception
+     *
+     *  \return The error code.
+     */
+    const cl_int err(void) const { return err_; }
+};
+
+#define __ERR_STR(x) #x
+#else
+#define __ERR_STR(x) NULL
+#endif // __CL_ENABLE_EXCEPTIONS
+
+//! \cond DOXYGEN_DETAIL
+#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
+#define __GET_DEVICE_INFO_ERR               __ERR_STR(clgetDeviceInfo)
+#define __GET_PLATFORM_INFO_ERR             __ERR_STR(clGetPlatformInfo)
+#define __GET_DEVICE_IDS_ERR                __ERR_STR(clGetDeviceIDs)
+#define __GET_PLATFORM_IDS_ERR              __ERR_STR(clGetPlatformIDs)
+#define __GET_CONTEXT_INFO_ERR              __ERR_STR(clGetContextInfo)
+#define __GET_EVENT_INFO_ERR                __ERR_STR(clGetEventInfo)
+#define __GET_EVENT_PROFILE_INFO_ERR        __ERR_STR(clGetEventProfileInfo)
+#define __GET_MEM_OBJECT_INFO_ERR           __ERR_STR(clGetMemObjectInfo)
+#define __GET_IMAGE_INFO_ERR                __ERR_STR(clGetImageInfo)
+#define __GET_SAMPLER_INFO_ERR              __ERR_STR(clGetSamplerInfo)
+#define __GET_KERNEL_INFO_ERR               __ERR_STR(clGetKernelInfo)
+#define __GET_KERNEL_WORK_GROUP_INFO_ERR    __ERR_STR(clGetKernelWorkGroupInfo)
+#define __GET_PROGRAM_INFO_ERR              __ERR_STR(clGetProgramInfo)
+#define __GET_PROGRAM_BUILD_INFO_ERR        __ERR_STR(clGetProgramBuildInfo)
+#define __GET_COMMAND_QUEUE_INFO_ERR        __ERR_STR(clGetCommandQueueInfo)
+
+#define __CREATE_CONTEXT_FROM_TYPE_ERR      __ERR_STR(clCreateContextFromType)
+#define __GET_SUPPORTED_IMAGE_FORMATS_ERR   __ERR_STR(clGetSupportedImageFormats)
+
+#define __CREATE_BUFFER_ERR                 __ERR_STR(clCreateBuffer)
+#define __CREATE_SUBBUFFER_ERR              __ERR_STR(clCreateSubBuffer)
+#define __CREATE_GL_BUFFER_ERR              __ERR_STR(clCreateFromGLBuffer)
+#define __GET_GL_OBJECT_INFO_ERR            __ERR_STR(clGetGLObjectInfo)
+#define __CREATE_IMAGE2D_ERR                __ERR_STR(clCreateImage2D)
+#define __CREATE_IMAGE3D_ERR                __ERR_STR(clCreateImage3D)
+#define __CREATE_SAMPLER_ERR                __ERR_STR(clCreateSampler)
+#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback)
+
+#define __CREATE_USER_EVENT_ERR             __ERR_STR(clCreateUserEvent)
+#define __SET_USER_EVENT_STATUS_ERR         __ERR_STR(clSetUserEventStatus)
+#define __SET_EVENT_CALLBACK_ERR            __ERR_STR(clSetEventCallback)
+#define __WAIT_FOR_EVENTS_ERR               __ERR_STR(clWaitForEvents)
+
+#define __CREATE_KERNEL_ERR                 __ERR_STR(clCreateKernel)
+#define __SET_KERNEL_ARGS_ERR               __ERR_STR(clSetKernelArg)
+#define __CREATE_PROGRAM_WITH_SOURCE_ERR    __ERR_STR(clCreateProgramWithSource)
+#define __CREATE_PROGRAM_WITH_BINARY_ERR    __ERR_STR(clCreateProgramWithBinary)
+#define __BUILD_PROGRAM_ERR                 __ERR_STR(clBuildProgram)
+#define __CREATE_KERNELS_IN_PROGRAM_ERR     __ERR_STR(clCreateKernelsInProgram)
+
+#define __CREATE_COMMAND_QUEUE_ERR          __ERR_STR(clCreateCommandQueue)
+#define __SET_COMMAND_QUEUE_PROPERTY_ERR    __ERR_STR(clSetCommandQueueProperty)
+#define __ENQUEUE_READ_BUFFER_ERR           __ERR_STR(clEnqueueReadBuffer)
+#define __ENQUEUE_READ_BUFFER_RECT_ERR      __ERR_STR(clEnqueueReadBufferRect)
+#define __ENQUEUE_WRITE_BUFFER_ERR          __ERR_STR(clEnqueueWriteBuffer)
+#define __ENQUEUE_WRITE_BUFFER_RECT_ERR     __ERR_STR(clEnqueueWriteBufferRect)
+#define __ENQEUE_COPY_BUFFER_ERR            __ERR_STR(clEnqueueCopyBuffer)
+#define __ENQEUE_COPY_BUFFER_RECT_ERR       __ERR_STR(clEnqueueCopyBufferRect)
+#define __ENQUEUE_READ_IMAGE_ERR            __ERR_STR(clEnqueueReadImage)
+#define __ENQUEUE_WRITE_IMAGE_ERR           __ERR_STR(clEnqueueWriteImage)
+#define __ENQUEUE_COPY_IMAGE_ERR            __ERR_STR(clEnqueueCopyImage)
+#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR  __ERR_STR(clEnqueueCopyImageToBuffer)
+#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR  __ERR_STR(clEnqueueCopyBufferToImage)
+#define __ENQUEUE_MAP_BUFFER_ERR            __ERR_STR(clEnqueueMapBuffer)
+#define __ENQUEUE_MAP_IMAGE_ERR             __ERR_STR(clEnqueueMapImage)
+#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR      __ERR_STR(clEnqueueUnMapMemObject)
+#define __ENQUEUE_NDRANGE_KERNEL_ERR        __ERR_STR(clEnqueueNDRangeKernel)
+#define __ENQUEUE_TASK_ERR                  __ERR_STR(clEnqueueTask)
+#define __ENQUEUE_NATIVE_KERNEL             __ERR_STR(clEnqueueNativeKernel)
+#define __ENQUEUE_MARKER_ERR                __ERR_STR(clEnqueueMarker)
+#define __ENQUEUE_WAIT_FOR_EVENTS_ERR       __ERR_STR(clEnqueueWaitForEvents)
+#define __ENQUEUE_BARRIER_ERR               __ERR_STR(clEnqueueBarrier)
+
+#define __ENQUEUE_ACQUIRE_GL_ERR            __ERR_STR(clEnqueueAcquireGLObjects)
+#define __ENQUEUE_RELEASE_GL_ERR            __ERR_STR(clEnqueueReleaseGLObjects)
+
+#define __UNLOAD_COMPILER_ERR               __ERR_STR(clUnloadCompiler)
+
+#define __FLUSH_ERR                         __ERR_STR(clFlush)
+#define __FINISH_ERR                        __ERR_STR(clFinish)
+
+#define __CREATE_SUB_DEVICES                __ERR_STR(clCreateSubDevicesEXT)
+#endif // __CL_USER_OVERRIDE_ERROR_STRINGS
+//! \endcond
+
+/*! \class string
+ * \brief Simple string class, that provides a limited subset of std::string
+ * functionality but avoids many of the issues that come with that class.
+ */
+class string
+{
+private:
+    ::size_t size_;
+    char * str_;
+public:
+    string(void) : size_(0), str_(NULL)
+    {
+    }
+
+    string(char * str, ::size_t size) :
+        size_(size),
+        str_(NULL)
+    {
+        str_ = new char[size_+1];
+        if (str_ != NULL) {
+            memcpy(str_, str, size_  * sizeof(char));
+            str_[size_] = '\0';
+        }
+        else {
+            size_ = 0;
+        }
+    }
+
+    string(char * str) :
+        str_(NULL)
+    {
+        size_= ::strlen(str);
+        str_ = new char[size_ + 1];
+        if (str_ != NULL) {
+            memcpy(str_, str, (size_ + 1) * sizeof(char));
+        }
+        else {
+            size_ = 0;
+        }
+    }
+
+    string& operator=(const string& rhs)
+    {
+        if (this == &rhs) {
+            return *this;
+        }
+
+        if (rhs.size_ == 0 || rhs.str_ == NULL) {
+            size_ = 0;
+            str_  = NULL;
+        } 
+        else {
+            size_ = rhs.size_;
+            str_ = new char[size_ + 1];
+            if (str_ != NULL) {
+                memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char));
+            }
+            else {
+                size_ = 0;
+            }
+        }
+
+        return *this;
+    }
+
+    string(const string& rhs)
+    {
+        *this = rhs;
+    }
+
+    ~string()
+    {
+        if (str_ != NULL) {
+            delete[] str_;
+        }
+    }
+
+    ::size_t size(void) const   { return size_; }
+    ::size_t length(void) const { return size(); }
+
+    const char * c_str(void) const { return (str_) ? str_ : "";}
+};
+
+#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING)
+#include <string>
+typedef std::string STRING_CLASS;
+#elif !defined(__USE_DEV_STRING) 
+typedef cl::string STRING_CLASS;
+#endif
+
+#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR)
+#include <vector>
+#define VECTOR_CLASS std::vector
+#elif !defined(__USE_DEV_VECTOR) 
+#define VECTOR_CLASS cl::vector 
+#endif
+
+#if !defined(__MAX_DEFAULT_VECTOR_SIZE)
+#define __MAX_DEFAULT_VECTOR_SIZE 10
+#endif
+
+/*! \class vector
+ * \brief Fixed sized vector implementation that mirroring 
+ * std::vector functionality.
+ */
+template <typename T, unsigned int N = __MAX_DEFAULT_VECTOR_SIZE>
+class vector
+{
+private:
+    T data_[N];
+    unsigned int size_;
+    bool empty_;
+public:
+    vector() : 
+        size_(-1),
+        empty_(true)
+    {}
+
+    ~vector() {}
+
+    unsigned int size(void) const
+    {
+        return size_ + 1;
+    }
+
+    void clear()
+    {
+        size_ = -1;
+        empty_ = true;
+    }
+
+    void push_back (const T& x)
+    { 
+        if (size() < N) {
+            size_++;  
+            data_[size_] = x;
+            empty_ = false;
+        }
+    }
+
+    void pop_back(void)
+    {
+        if (!empty_) {
+            data_[size_].~T();
+            size_--;
+            if (size_ == -1) {
+                empty_ = true;
+            }
+        }
+    }
+  
+    vector(const vector<T, N>& vec) : 
+        size_(vec.size_),
+        empty_(vec.empty_)
+    {
+        if (!empty_) {
+            memcpy(&data_[0], &vec.data_[0], size() * sizeof(T));
+        }
+    } 
+
+    vector(unsigned int size, const T& val = T()) :
+        size_(-1),
+        empty_(true)
+    {
+        for (unsigned int i = 0; i < size; i++) {
+            push_back(val);
+        }
+    }
+
+    vector<T, N>& operator=(const vector<T, N>& rhs)
+    {
+        if (this == &rhs) {
+            return *this;
+        }
+
+        size_  = rhs.size_;
+        empty_ = rhs.empty_;
+
+        if (!empty_) { 
+            memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T));
+        }
+    
+        return *this;
+    }
+
+    bool operator==(vector<T,N> &vec)
+    {
+        if (empty_ && vec.empty_) {
+            return true;
+        }
+
+        if (size() != vec.size()) {
+            return false;
+        }
+
+        return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false;
+    }
+  
+    operator T* ()             { return data_; }
+    operator const T* () const { return data_; }
+   
+    bool empty (void) const
+    {
+        return empty_;
+    }
+  
+    unsigned int max_size (void) const
+    {
+        return N;
+    }
+
+    unsigned int capacity () const
+    {
+        return sizeof(T) * N;
+    }
+
+    T& operator[](int index)
+    {
+        return data_[index];
+    }
+  
+    T operator[](int index) const
+    {
+        return data_[index];
+    }
+  
+    template<class I>
+    void assign(I start, I end)
+    {
+        clear();   
+        while(start < end) {
+            push_back(*start);
+            start++;
+        }
+    }
+
+    /*! \class iterator
+     * \brief Iterator class for vectors
+     */
+    class iterator
+    {
+    private:
+        vector<T,N> vec_;
+        int index_;
+        bool initialized_;
+    public:
+        iterator(void) : 
+            index_(-1),
+            initialized_(false)
+        {
+            index_ = -1;
+            initialized_ = false;
+        }
+
+        ~iterator(void) {}
+
+        static iterator begin(vector<T,N> &vec)
+        {
+            iterator i;
+
+            if (!vec.empty()) {
+                i.index_ = 0;
+            }
+
+            i.vec_ = vec;
+            i.initialized_ = true;
+            return i;
+        }
+
+        static iterator end(vector<T,N> &vec)
+        {
+            iterator i;
+
+            if (!vec.empty()) {
+                i.index_ = vec.size();
+            }
+            i.vec_ = vec;
+            i.initialized_ = true;
+            return i;
+        }
+    
+        bool operator==(iterator i)
+        {
+            return ((vec_ == i.vec_) && 
+                    (index_ == i.index_) && 
+                    (initialized_ == i.initialized_));
+        }
+
+        bool operator!=(iterator i)
+        {
+            return (!(*this==i));
+        }
+
+        void operator++()
+        {
+            index_++;
+        }
+
+        void operator++(int x)
+        {
+            index_ += x;
+        }
+
+        void operator--()
+        {
+            index_--;
+        }
+
+        void operator--(int x)
+        {
+            index_ -= x;
+        }
+
+        T operator *()
+        {
+            return vec_[index_];
+        }
+    };
+
+    iterator begin(void)
+    {
+        return iterator::begin(*this);
+    }
+
+    iterator end(void)
+    {
+        return iterator::end(*this);
+    }
+
+    T& front(void)
+    {
+        return data_[0];
+    }
+
+    T& back(void)
+    {
+        return data_[size_];
+    }
+
+    const T& front(void) const
+    {
+        return data_[0];
+    }
+
+    const T& back(void) const
+    {
+        return data_[size_];
+    }
+};  
+    
+/*!
+ * \brief size_t class used to interface between C++ and
+ * OpenCL C calls that require arrays of size_t values, who's
+ * size is known statically.
+ */
+template <int N>
+struct size_t : public cl::vector< ::size_t, N> { };
+
+namespace detail {
+
+// GetInfo help struct
+template <typename Functor, typename T>
+struct GetInfoHelper
+{
+    static cl_int
+    get(Functor f, cl_uint name, T* param)
+    {
+        return f(name, sizeof(T), param, NULL);
+    }
+};
+
+// Specialized GetInfoHelper for VECTOR_CLASS params
+template <typename Func, typename T>
+struct GetInfoHelper<Func, VECTOR_CLASS<T> >
+{
+    static cl_int get(Func f, cl_uint name, VECTOR_CLASS<T>* param)
+    {
+        ::size_t required;
+        cl_int err = f(name, 0, NULL, &required);
+        if (err != CL_SUCCESS) {
+            return err;
+        }
+
+        T* value = (T*) alloca(required);
+        err = f(name, required, value, NULL);
+        if (err != CL_SUCCESS) {
+            return err;
+        }
+
+        param->assign(&value[0], &value[required/sizeof(T)]);
+        return CL_SUCCESS;
+    }
+};
+
+// Specialized for getInfo<CL_PROGRAM_BINARIES>
+template <typename Func>
+struct GetInfoHelper<Func, VECTOR_CLASS<char *> >
+{
+    static cl_int
+    get(Func f, cl_uint name, VECTOR_CLASS<char *>* param)
+    {
+      cl_uint err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL);
+      if (err != CL_SUCCESS) {
+        return err;
+      }
+      
+      return CL_SUCCESS;
+    }
+};
+
+// Specialized GetInfoHelper for STRING_CLASS params
+template <typename Func>
+struct GetInfoHelper<Func, STRING_CLASS>
+{
+    static cl_int get(Func f, cl_uint name, STRING_CLASS* param)
+    {
+        ::size_t required;
+        cl_int err = f(name, 0, NULL, &required);
+        if (err != CL_SUCCESS) {
+            return err;
+        }
+
+        char* value = (char*) alloca(required);
+        err = f(name, required, value, NULL);
+        if (err != CL_SUCCESS) {
+            return err;
+        }
+
+        *param = value;
+        return CL_SUCCESS;
+    }
+};
+
+#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \
+namespace detail { \
+template <typename Func> \
+struct GetInfoHelper<Func, CPP_TYPE> \
+{ \
+    static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \
+    { \
+      cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \
+      if (err != CL_SUCCESS) { \
+        return err; \
+      } \
+      \
+      return ReferenceHandler<CPP_TYPE::cl_type>::retain((*param)()); \
+    } \
+}; \
+} 
+
+
+#define __PARAM_NAME_INFO_1_0(F) \
+    F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \
+    F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \
+    F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \
+    F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \
+    F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \
+    \
+    F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \
+    F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \
+    F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \
+    F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_bitfield) \
+    F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \
+    F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \
+    F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \
+    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \
+    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \
+    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \
+    F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \
+    F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \
+    F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \
+    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \
+    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\
+    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \
+    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \
+    F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \
+    F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \
+    F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \
+    F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \
+    F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \
+    F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \
+    F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \
+    F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \
+    F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \
+    F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \
+    F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \
+    F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \
+    F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \
+    F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \
+    F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \
+    F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \
+    F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \
+    \
+    F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \
+    F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS<Device>) \
+    F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS<cl_context_properties>) \
+    \
+    F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \
+    F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \
+    F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \
+    F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \
+    \
+    F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \
+    F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \
+    F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \
+    F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \
+    \
+    F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \
+    F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \
+    F(cl_mem_info, CL_MEM_SIZE, ::size_t) \
+    F(cl_mem_info, CL_MEM_HOST_PTR, void*) \
+    F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \
+    F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \
+    F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \
+    \
+    F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \
+    F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \
+    F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \
+    F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \
+    F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \
+    F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \
+    F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \
+    \
+    F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \
+    F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \
+    F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \
+    F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \
+    F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \
+    \
+    F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \
+    F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \
+    F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \
+    F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS<cl_device_id>) \
+    F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \
+    F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \
+    F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS<char *>) \
+    \
+    F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \
+    F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \
+    F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \
+    \
+    F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \
+    F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \
+    F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \
+    F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \
+    F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \
+    \
+    F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \
+    F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \
+    F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \
+    \
+    F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \
+    F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \
+    F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \
+    F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties)
+
+#if defined(CL_VERSION_1_1)
+#define __PARAM_NAME_INFO_1_1(F) \
+    F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \
+    F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \
+    F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \
+    F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \
+    \
+    F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \
+    F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \
+    \
+    F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \
+    F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \
+    \
+    F(cl_event_info, CL_EVENT_CONTEXT, cl::Context)
+#endif // CL_VERSION_1_1
+
+#if defined(USE_CL_DEVICE_FISSION)
+#define __PARAM_NAME_DEVICE_FISSION(F) \
+    F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \
+       F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
+       F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
+       F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \
+       F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS<cl_device_partition_property_ext>)
+#endif // USE_CL_DEVICE_FISSION
+
+template <typename enum_type, cl_int Name>
+struct param_traits {};
+
+#define __DECLARE_PARAM_TRAITS(token, param_name, T) \
+struct token;                                        \
+template<>                                           \
+struct param_traits<detail:: token,param_name>       \
+{                                                    \
+    enum { value = param_name };                     \
+    typedef T param_type;                            \
+};
+
+__PARAM_NAME_INFO_1_0(__DECLARE_PARAM_TRAITS);
+#if defined(CL_VERSION_1_1)
+__PARAM_NAME_INFO_1_1(__DECLARE_PARAM_TRAITS);
+#endif // CL_VERSION_1_1
+
+#if defined(USE_CL_DEVICE_FISSION)
+__PARAM_NAME_DEVICE_FISSION(__DECLARE_PARAM_TRAITS);
+#endif // USE_CL_DEVICE_FISSION
+
+#undef __DECLARE_PARAM_TRAITS
+
+// Convenience functions
+
+template <typename Func, typename T>
+inline cl_int
+getInfo(Func f, cl_uint name, T* param)
+{
+    return GetInfoHelper<Func, T>::get(f, name, param);
+}
+
+template <typename Func, typename Arg0>
+struct GetInfoFunctor0
+{
+    Func f_; const Arg0& arg0_;
+    cl_int operator ()(
+        cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
+    { return f_(arg0_, param, size, value, size_ret); }
+};
+
+template <typename Func, typename Arg0, typename Arg1>
+struct GetInfoFunctor1
+{
+    Func f_; const Arg0& arg0_; const Arg1& arg1_;
+    cl_int operator ()(
+        cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
+    { return f_(arg0_, arg1_, param, size, value, size_ret); }
+};
+
+template <typename Func, typename Arg0, typename T>
+inline cl_int
+getInfo(Func f, const Arg0& arg0, cl_uint name, T* param)
+{
+    GetInfoFunctor0<Func, Arg0> f0 = { f, arg0 };
+    return GetInfoHelper<GetInfoFunctor0<Func, Arg0>, T>
+        ::get(f0, name, param);
+}
+
+template <typename Func, typename Arg0, typename Arg1, typename T>
+inline cl_int
+getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param)
+{
+    GetInfoFunctor1<Func, Arg0, Arg1> f0 = { f, arg0, arg1 };
+    return GetInfoHelper<GetInfoFunctor1<Func, Arg0, Arg1>, T>
+        ::get(f0, name, param);
+}
+
+template<typename T>
+struct ReferenceHandler
+{ };
+
+template <>
+struct ReferenceHandler<cl_device_id>
+{
+    // cl_device_id does not have retain().
+    static cl_int retain(cl_device_id)
+    { return CL_INVALID_DEVICE; }
+    // cl_device_id does not have release().
+    static cl_int release(cl_device_id)
+    { return CL_INVALID_DEVICE; }
+};
+
+template <>
+struct ReferenceHandler<cl_platform_id>
+{
+    // cl_platform_id does not have retain().
+    static cl_int retain(cl_platform_id)
+    { return CL_INVALID_PLATFORM; }
+    // cl_platform_id does not have release().
+    static cl_int release(cl_platform_id)
+    { return CL_INVALID_PLATFORM; }
+};
+
+template <>
+struct ReferenceHandler<cl_context>
+{
+    static cl_int retain(cl_context context)
+    { return ::clRetainContext(context); }
+    static cl_int release(cl_context context)
+    { return ::clReleaseContext(context); }
+};
+
+template <>
+struct ReferenceHandler<cl_command_queue>
+{
+    static cl_int retain(cl_command_queue queue)
+    { return ::clRetainCommandQueue(queue); }
+    static cl_int release(cl_command_queue queue)
+    { return ::clReleaseCommandQueue(queue); }
+};
+
+template <>
+struct ReferenceHandler<cl_mem>
+{
+    static cl_int retain(cl_mem memory)
+    { return ::clRetainMemObject(memory); }
+    static cl_int release(cl_mem memory)
+    { return ::clReleaseMemObject(memory); }
+};
+
+template <>
+struct ReferenceHandler<cl_sampler>
+{
+    static cl_int retain(cl_sampler sampler)
+    { return ::clRetainSampler(sampler); }
+    static cl_int release(cl_sampler sampler)
+    { return ::clReleaseSampler(sampler); }
+};
+
+template <>
+struct ReferenceHandler<cl_program>
+{
+    static cl_int retain(cl_program program)
+    { return ::clRetainProgram(program); }
+    static cl_int release(cl_program program)
+    { return ::clReleaseProgram(program); }
+};
+
+template <>
+struct ReferenceHandler<cl_kernel>
+{
+    static cl_int retain(cl_kernel kernel)
+    { return ::clRetainKernel(kernel); }
+    static cl_int release(cl_kernel kernel)
+    { return ::clReleaseKernel(kernel); }
+};
+
+template <>
+struct ReferenceHandler<cl_event>
+{
+    static cl_int retain(cl_event event)
+    { return ::clRetainEvent(event); }
+    static cl_int release(cl_event event)
+    { return ::clReleaseEvent(event); }
+};
+
+template <typename T>
+class Wrapper
+{
+public:
+    typedef T cl_type;
+
+protected:
+    cl_type object_;
+
+public:
+    Wrapper() : object_(NULL) { }
+
+    ~Wrapper()
+    {
+        if (object_ != NULL) { release(); }
+    }
+
+    Wrapper(const Wrapper<cl_type>& rhs)
+    {
+        object_ = rhs.object_;
+        if (object_ != NULL) { retain(); }
+    }
+
+    Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs)
+    {
+        if (object_ != NULL) { release(); }
+        object_ = rhs.object_;
+        if (object_ != NULL) { retain(); }
+        return *this;
+    }
+
+    cl_type operator ()() const { return object_; }
+
+    cl_type& operator ()() { return object_; }
+
+protected:
+
+    cl_int retain() const
+    {
+        return ReferenceHandler<cl_type>::retain(object_);
+    }
+
+    cl_int release() const
+    {
+        return ReferenceHandler<cl_type>::release(object_);
+    }
+};
+
+#if defined(__CL_ENABLE_EXCEPTIONS)
+static inline cl_int errHandler (
+    cl_int err,
+    const char * errStr = NULL) throw(Error)
+{
+    if (err != CL_SUCCESS) {
+        throw Error(err, errStr);
+    }
+    return err;
+}
+#else
+static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
+{
+    return err;
+}
+#endif // __CL_ENABLE_EXCEPTIONS
+
+} // namespace detail
+//! \endcond
+
+/*! \stuct ImageFormat
+ * \brief ImageFormat interface fro cl_image_format.
+ */
+struct ImageFormat : public cl_image_format
+{
+    ImageFormat(){}
+
+    ImageFormat(cl_channel_order order, cl_channel_type type)
+    {
+        image_channel_order = order;
+        image_channel_data_type = type;
+    }
+
+    ImageFormat& operator = (const ImageFormat& rhs)
+    {
+        if (this != &rhs) {
+            this->image_channel_data_type = rhs.image_channel_data_type;
+            this->image_channel_order     = rhs.image_channel_order;
+        }
+        return *this;
+    }
+};
+
+/*! \class Device
+ * \brief Device interface for cl_device_id.
+ */
+class Device : public detail::Wrapper<cl_device_id>
+{
+public:
+    Device(cl_device_id device) { object_ = device; }
+
+    Device() : detail::Wrapper<cl_type>() { }
+
+    Device(const Device& device) : detail::Wrapper<cl_type>(device) { }
+
+    Device& operator = (const Device& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    template <typename T>
+    cl_int getInfo(cl_device_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetDeviceInfo, object_, name, param),
+            __GET_DEVICE_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_device_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_device_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+#if defined(USE_CL_DEVICE_FISSION)
+       cl_int createSubDevices(
+               const cl_device_partition_property_ext * properties,
+               VECTOR_CLASS<Device>* devices)
+       {
+               typedef CL_API_ENTRY cl_int 
+                       ( CL_API_CALL * PFN_clCreateSubDevicesEXT)(
+                               cl_device_id /*in_device*/,
+                const cl_device_partition_property_ext * /* properties */,
+                cl_uint /*num_entries*/,
+                cl_device_id * /*out_devices*/,
+                cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+               static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL;
+               __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT);
+
+               cl_uint n = 0;
+        cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __CREATE_SUB_DEVICES);
+        }
+
+        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+        err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __CREATE_SUB_DEVICES);
+        }
+
+        devices->assign(&ids[0], &ids[n]);
+        return CL_SUCCESS;
+       }
+#endif
+};
+
+/*! \class Platform
+ *  \brief Platform interface.
+ */
+class Platform : public detail::Wrapper<cl_platform_id>
+{
+public:
+    static const Platform null();
+
+    Platform(cl_platform_id platform) { object_ = platform; }
+
+    Platform() : detail::Wrapper<cl_type>()  { }
+
+    Platform(const Platform& platform) : detail::Wrapper<cl_type>(platform) { }
+
+    Platform& operator = (const Platform& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetPlatformInfo, object_, name, param),
+            __GET_PLATFORM_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_platform_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_platform_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    cl_int getDevices(
+        cl_device_type type,
+        VECTOR_CLASS<Device>* devices) const
+    {
+        cl_uint n = 0;
+        cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+        }
+
+        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+        err = ::clGetDeviceIDs(object_, type, n, ids, NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+        }
+
+        devices->assign(&ids[0], &ids[n]);
+        return CL_SUCCESS;
+    }
+
+#if defined(USE_DX_INTEROP)
+   /*! \brief Get the list of available D3D10 devices.
+     *
+     *  \param d3d_device_source.
+     *
+     *  \param d3d_object.
+     *
+     *  \param d3d_device_set.
+     *
+     *  \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device
+     *  values returned in devices can be used to identify a specific OpenCL
+     *  device. If \a devices argument is NULL, this argument is ignored.
+     *
+     *  \return One of the following values:
+     *    - CL_SUCCESS if the function is executed successfully.
+     *
+     *  The application can query specific capabilities of the OpenCL device(s)
+     *  returned by cl::getDevices. This can be used by the application to
+     *  determine which device(s) to use.
+     *
+     * \note In the case that exceptions are enabled and a return value
+     * other than CL_SUCCESS is generated, then cl::Error exception is
+     * generated.
+     */
+    cl_int getDevices(
+        cl_d3d10_device_source_khr d3d_device_source,
+        void *                     d3d_object,
+        cl_d3d10_device_set_khr    d3d_device_set,
+        VECTOR_CLASS<Device>* devices) const
+    {
+        typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)(
+            cl_platform_id platform, 
+            cl_d3d10_device_source_khr d3d_device_source, 
+            void * d3d_object,
+            cl_d3d10_device_set_khr d3d_device_set,
+            cl_uint num_entries,
+            cl_device_id * devices,
+            cl_uint* num_devices);
+
+        static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL;
+        __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR);
+
+        cl_uint n = 0;
+        cl_int err = pfn_clGetDeviceIDsFromD3D10KHR(
+            object_, 
+            d3d_device_source, 
+            d3d_object,
+            d3d_device_set, 
+            0, 
+            NULL, 
+            &n);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+        }
+
+        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+        err = pfn_clGetDeviceIDsFromD3D10KHR(
+            object_, 
+            d3d_device_source, 
+            d3d_object,
+            d3d_device_set,
+            n, 
+            ids, 
+            NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+        }
+
+        devices->assign(&ids[0], &ids[n]);
+        return CL_SUCCESS;
+    }
+#endif
+
+    static cl_int get(
+        VECTOR_CLASS<Platform>* platforms)
+    {
+        cl_uint n = 0;
+        cl_int err = ::clGetPlatformIDs(0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+        }
+
+        cl_platform_id* ids = (cl_platform_id*) alloca(
+            n * sizeof(cl_platform_id));
+        err = ::clGetPlatformIDs(n, ids, NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+        }
+
+        platforms->assign(&ids[0], &ids[n]);
+        return CL_SUCCESS;
+    }
+};
+
+static inline cl_int
+UnloadCompiler()
+{
+    return ::clUnloadCompiler();
+}
+
+class Context : public detail::Wrapper<cl_context>
+{
+public:
+    Context(
+        const VECTOR_CLASS<Device>& devices,
+        cl_context_properties* properties = NULL,
+        void (CL_CALLBACK * notifyFptr)(
+            const char *,
+            const void *,
+            ::size_t,
+            void *) = NULL,
+        void* data = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateContext(
+            properties, (cl_uint) devices.size(),
+            (cl_device_id*) &devices.front(),
+            notifyFptr, data, &error);
+
+        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Context(
+        cl_device_type type,
+        cl_context_properties* properties = NULL,
+        void (CL_CALLBACK * notifyFptr)(
+            const char *,
+            const void *,
+            ::size_t,
+            void *) = NULL,
+        void* data = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateContextFromType(
+            properties, type, notifyFptr, data, &error);
+
+        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Context() : detail::Wrapper<cl_type>() { }
+
+    Context(const Context& context) : detail::Wrapper<cl_type>(context) { }
+
+    Context& operator = (const Context& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    template <typename T>
+    cl_int getInfo(cl_context_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetContextInfo, object_, name, param),
+            __GET_CONTEXT_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_context_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_context_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    cl_int getSupportedImageFormats(
+        cl_mem_flags flags,
+        cl_mem_object_type type,
+        VECTOR_CLASS<ImageFormat>* formats) const
+    {
+        cl_uint numEntries;
+        cl_int err = ::clGetSupportedImageFormats(
+           object_, 
+           flags,
+           type, 
+           0, 
+           NULL, 
+           &numEntries);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
+        }
+
+        ImageFormat* value = (ImageFormat*)
+            alloca(numEntries * sizeof(ImageFormat));
+        err = ::clGetSupportedImageFormats(
+            object_, 
+            flags, 
+            type, 
+            numEntries,
+            (cl_image_format*) value, 
+            NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
+        }
+
+        formats->assign(&value[0], &value[numEntries]);
+        return CL_SUCCESS;
+    }
+};
+
+__GET_INFO_HELPER_WITH_RETAIN(cl::Context)
+
+/*! \class Event
+ * \brief Event interface for cl_event.
+ */
+class Event : public detail::Wrapper<cl_event>
+{
+public:
+    Event() : detail::Wrapper<cl_type>() { }
+
+    Event(const Event& event) : detail::Wrapper<cl_type>(event) { }
+
+    Event& operator = (const Event& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    template <typename T>
+    cl_int getInfo(cl_event_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetEventInfo, object_, name, param),
+            __GET_EVENT_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_event_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_event_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    template <typename T>
+    cl_int getProfilingInfo(cl_profiling_info name, T* param) const
+    {
+        return detail::errHandler(detail::getInfo(
+            &::clGetEventProfilingInfo, object_, name, param),
+            __GET_EVENT_PROFILE_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_profiling_info, name>::param_type
+    getProfilingInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_profiling_info, name>::param_type param;
+        cl_int result = getProfilingInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    cl_int wait() const
+    {
+        return detail::errHandler(
+            ::clWaitForEvents(1, &object_),
+            __WAIT_FOR_EVENTS_ERR);
+    }
+
+#if defined(CL_VERSION_1_1)
+    cl_int setCallback(
+        cl_int type,
+        void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *),             
+        void * user_data = NULL)
+    {
+        return detail::errHandler(
+            ::clSetEventCallback(
+                object_,
+                type,
+                pfn_notify,
+                user_data), 
+            __SET_EVENT_CALLBACK_ERR);
+    }
+#endif
+
+    static cl_int
+    waitForEvents(const VECTOR_CLASS<Event>& events)
+    {
+        return detail::errHandler(
+            ::clWaitForEvents(
+                (cl_uint) events.size(), (cl_event*)&events.front()),
+            __WAIT_FOR_EVENTS_ERR);
+    }
+};
+
+__GET_INFO_HELPER_WITH_RETAIN(cl::Event)
+
+#if defined(CL_VERSION_1_1)
+/*! \class UserEvent
+ * \brief User event interface for cl_event.
+ */
+class UserEvent : public Event
+{
+public:
+    UserEvent(
+        const Context& context,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateUserEvent(
+            context(),
+            &error);
+
+        detail::errHandler(error, __CREATE_USER_EVENT_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    UserEvent() : Event() { }
+
+    UserEvent(const UserEvent& event) : Event(event) { }
+
+    UserEvent& operator = (const UserEvent& rhs)
+    {
+        if (this != &rhs) {
+            Event::operator=(rhs);
+        }
+        return *this;
+    }
+
+    cl_int setStatus(cl_int status)
+    {
+        return detail::errHandler(
+            ::clSetUserEventStatus(object_,status), 
+            __SET_USER_EVENT_STATUS_ERR);
+    }
+};
+#endif
+
+inline static cl_int
+WaitForEvents(const VECTOR_CLASS<Event>& events)
+{
+    return detail::errHandler(
+        ::clWaitForEvents(
+            (cl_uint) events.size(), (cl_event*)&events.front()),
+        __WAIT_FOR_EVENTS_ERR);
+}
+
+/*! \class Memory
+ * \brief Memory interface for cl_mem.
+ */
+class Memory : public detail::Wrapper<cl_mem>
+{
+public:
+    Memory() : detail::Wrapper<cl_type>() { }
+
+    Memory(const Memory& memory) : detail::Wrapper<cl_type>(memory) { }
+
+    Memory& operator = (const Memory& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    template <typename T>
+    cl_int getInfo(cl_mem_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetMemObjectInfo, object_, name, param),
+            __GET_MEM_OBJECT_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_mem_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_mem_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+#if defined(CL_VERSION_1_1)
+    cl_int setDestructorCallback(
+        void (CL_CALLBACK * pfn_notify)(cl_mem, void *),               
+        void * user_data = NULL)
+    {
+        return detail::errHandler(
+            ::clSetMemObjectDestructorCallback(
+                object_,
+                pfn_notify,
+                user_data), 
+            __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR);
+    }
+#endif
+
+};
+
+__GET_INFO_HELPER_WITH_RETAIN(cl::Memory)
+
+/*! \class Buffer
+ * \brief Memory buffer interface.
+ */
+class Buffer : public Memory
+{
+public:
+    Buffer(
+        const Context& context,
+        cl_mem_flags flags,
+        ::size_t size,
+        void* host_ptr = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error);
+
+        detail::errHandler(error, __CREATE_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Buffer() : Memory() { }
+
+    Buffer(const Buffer& buffer) : Memory(buffer) { }
+
+    Buffer& operator = (const Buffer& rhs)
+    {
+        if (this != &rhs) {
+            Memory::operator=(rhs);
+        }
+        return *this;
+    }
+
+#if defined(CL_VERSION_1_1)
+    Buffer createSubBuffer(
+        cl_mem_flags flags,
+        cl_buffer_create_type buffer_create_type,
+        const void * buffer_create_info,
+        cl_int * err = NULL)
+    {
+        Buffer result;
+        cl_int error;
+        result.object_ = ::clCreateSubBuffer(
+            object_, 
+            flags, 
+            buffer_create_type, 
+            buffer_create_info, 
+            &error);
+
+        detail::errHandler(error, __CREATE_SUBBUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+
+        return result;
+       }               
+#endif
+};
+
+#if defined (USE_DX_INTEROP)
+class BufferD3D10 : public Buffer
+{
+public:
+    typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)(
+    cl_context context, cl_mem_flags flags, ID3D10Buffer*  buffer,
+    cl_int* errcode_ret);
+
+    BufferD3D10(
+        const Context& context,
+        cl_mem_flags flags,
+        ID3D10Buffer* bufobj,
+        cl_int * err = NULL)
+    {
+        static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL;
+        __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR);
+
+        cl_int error;
+        object_ = pfn_clCreateFromD3D10BufferKHR(
+            context(),
+            flags,
+            bufobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    BufferD3D10() : Buffer() { }
+
+    BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { }
+
+    BufferD3D10& operator = (const BufferD3D10& rhs)
+    {
+        if (this != &rhs) {
+            Buffer::operator=(rhs);
+        }
+        return *this;
+    }
+};
+#endif
+
+/*! \class BufferGL
+ * \brief Memory buffer interface for GL interop.
+ */
+class BufferGL : public Buffer
+{
+public:
+    BufferGL(
+        const Context& context,
+        cl_mem_flags flags,
+        GLuint bufobj,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateFromGLBuffer(
+            context(),
+            flags,
+            bufobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    BufferGL() : Buffer() { }
+
+    BufferGL(const BufferGL& buffer) : Buffer(buffer) { }
+
+    BufferGL& operator = (const BufferGL& rhs)
+    {
+        if (this != &rhs) {
+            Buffer::operator=(rhs);
+        }
+        return *this;
+    }
+
+    cl_int getObjectInfo(
+        cl_gl_object_type *type,
+        GLuint * gl_object_name)
+    {
+        return detail::errHandler(
+            ::clGetGLObjectInfo(object_,type,gl_object_name),
+            __GET_GL_OBJECT_INFO_ERR);
+    }
+};
+
+/*! \class BufferRenderGL
+ * \brief Memory buffer interface for GL interop with renderbuffer.
+ */
+class BufferRenderGL : public Buffer
+{
+public:
+    BufferRenderGL(
+        const Context& context,
+        cl_mem_flags flags,
+        GLuint bufobj,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateFromGLRenderbuffer(
+            context(),
+            flags,
+            bufobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    BufferRenderGL() : Buffer() { }
+
+    BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { }
+
+    BufferRenderGL& operator = (const BufferRenderGL& rhs)
+    {
+        if (this != &rhs) {
+            Buffer::operator=(rhs);
+        }
+        return *this;
+    }
+
+    cl_int getObjectInfo(
+        cl_gl_object_type *type,
+        GLuint * gl_object_name)
+    {
+        return detail::errHandler(
+            ::clGetGLObjectInfo(object_,type,gl_object_name),
+            __GET_GL_OBJECT_INFO_ERR);
+    }
+};
+
+/*! \class Image
+ * \brief Base class  interface for all images.
+ */
+class Image : public Memory
+{
+protected:
+    Image() : Memory() { }
+
+    Image(const Image& image) : Memory(image) { }
+
+    Image& operator = (const Image& rhs)
+    {
+        if (this != &rhs) {
+            Memory::operator=(rhs);
+        }
+        return *this;
+    }
+public:
+    template <typename T>
+    cl_int getImageInfo(cl_image_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetImageInfo, object_, name, param),
+            __GET_IMAGE_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_image_info, name>::param_type
+    getImageInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_image_info, name>::param_type param;
+        cl_int result = getImageInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+};
+
+/*! \class Image2D
+ * \brief Image interface for 2D images.
+ */
+class Image2D : public Image
+{
+public:
+    Image2D(
+        const Context& context,
+        cl_mem_flags flags,
+        ImageFormat format,
+        ::size_t width,
+        ::size_t height,
+        ::size_t row_pitch = 0,
+        void* host_ptr = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateImage2D(
+            context(), flags,&format, width, height, row_pitch, host_ptr, &error);
+
+        detail::errHandler(error, __CREATE_IMAGE2D_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Image2D() { }
+
+    Image2D(const Image2D& image2D) : Image(image2D) { }
+
+    Image2D& operator = (const Image2D& rhs)
+    {
+        if (this != &rhs) {
+            Image::operator=(rhs);
+        }
+        return *this;
+    }
+};
+
+/*! \class Image2DGL
+ * \brief 2D image interface for GL interop.
+ */
+class Image2DGL : public Image2D
+{
+public:
+    Image2DGL(
+        const Context& context,
+        cl_mem_flags flags,
+        GLenum target,
+        GLint  miplevel,
+        GLuint texobj,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateFromGLTexture2D(
+            context(),
+            flags,
+            target,
+            miplevel,
+            texobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Image2DGL() : Image2D() { }
+
+    Image2DGL(const Image2DGL& image) : Image2D(image) { }
+
+    Image2DGL& operator = (const Image2DGL& rhs)
+    {
+        if (this != &rhs) {
+            Image2D::operator=(rhs);
+        }
+        return *this;
+    }
+};
+
+/*! \class Image3D
+ * \brief Image interface for 3D images.
+ */
+class Image3D : public Image
+{
+public:
+    Image3D(
+        const Context& context,
+        cl_mem_flags flags,
+        ImageFormat format,
+        ::size_t width,
+        ::size_t height,
+        ::size_t depth,
+        ::size_t row_pitch = 0,
+        ::size_t slice_pitch = 0,
+        void* host_ptr = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateImage3D(
+            context(), flags, &format, width, height, depth, row_pitch,
+            slice_pitch, host_ptr, &error);
+
+        detail::errHandler(error, __CREATE_IMAGE3D_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Image3D() { }
+
+    Image3D(const Image3D& image3D) : Image(image3D) { }
+
+    Image3D& operator = (const Image3D& rhs)
+    {
+        if (this != &rhs) {
+            Image::operator=(rhs);
+        }
+        return *this;
+    }
+};
+
+/*! \class Image2DGL
+ * \brief 2D image interface for GL interop.
+ */
+class Image3DGL : public Image3D
+{
+public:
+    Image3DGL(
+        const Context& context,
+        cl_mem_flags flags,
+        GLenum target,
+        GLint  miplevel,
+        GLuint texobj,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateFromGLTexture3D(
+            context(),
+            flags,
+            target,
+            miplevel,
+            texobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Image3DGL() : Image3D() { }
+
+    Image3DGL(const Image3DGL& image) : Image3D(image) { }
+
+    Image3DGL& operator = (const Image3DGL& rhs)
+    {
+        if (this != &rhs) {
+            Image3D::operator=(rhs);
+        }
+        return *this;
+    }
+};
+
+/*! \class Sampler
+ * \brief Sampler interface for cl_sampler.
+ */
+class Sampler : public detail::Wrapper<cl_sampler>
+{
+public:
+    Sampler() { }
+
+    Sampler(
+        const Context& context,
+        cl_bool normalized_coords,
+        cl_addressing_mode addressing_mode,
+        cl_filter_mode filter_mode,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateSampler(
+            context(), 
+            normalized_coords,
+            addressing_mode,
+            filter_mode,
+            &error);
+
+        detail::errHandler(error, __CREATE_SAMPLER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Sampler(const Sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
+
+    Sampler& operator = (const Sampler& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    template <typename T>
+    cl_int getInfo(cl_sampler_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetSamplerInfo, object_, name, param),
+            __GET_SAMPLER_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_sampler_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_sampler_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+};
+
+__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler)
+
+class Program;
+class CommandQueue;
+class Kernel;
+
+/*! \class NDRange
+ * \brief NDRange interface
+ */
+class NDRange
+{
+private:
+    size_t<3> sizes_;
+    cl_uint dimensions_;
+
+public:
+    NDRange()
+        : dimensions_(0)
+    { }
+
+    NDRange(::size_t size0)
+        : dimensions_(1)
+    {
+        sizes_.push_back(size0);
+    }
+
+    NDRange(::size_t size0, ::size_t size1)
+        : dimensions_(2)
+    {
+        sizes_.push_back(size0);
+        sizes_.push_back(size1);
+    }
+
+    NDRange(::size_t size0, ::size_t size1, ::size_t size2)
+        : dimensions_(3)
+    {
+        sizes_.push_back(size0);
+        sizes_.push_back(size1);
+        sizes_.push_back(size2);
+    }
+
+    operator const ::size_t*() const { return (const ::size_t*) sizes_; }
+    ::size_t dimensions() const { return dimensions_; }
+};
+
+static const NDRange NullRange;
+
+/*!
+ * \struct LocalSpaceArg
+ * \brief Local address raper for use with Kernel::setArg
+ */
+struct LocalSpaceArg
+{
+    ::size_t size_;
+};
+
+namespace detail {
+
+template <typename T>
+struct KernelArgumentHandler
+{
+    static ::size_t size(const T&) { return sizeof(T); }
+    static T* ptr(T& value) { return &value; }
+};
+
+template <>
+struct KernelArgumentHandler<LocalSpaceArg>
+{
+    static ::size_t size(const LocalSpaceArg& value) { return value.size_; }
+    static void* ptr(LocalSpaceArg&) { return NULL; }
+};
+
+} 
+//! \endcond
+
+inline LocalSpaceArg
+__local(::size_t size)
+{
+    LocalSpaceArg ret = { size };
+    return ret;
+}
+
+class KernelFunctor;
+
+/*! \class Kernel
+ * \brief Kernel interface that implements cl_kernel
+ */
+class Kernel : public detail::Wrapper<cl_kernel>
+{
+public:
+    inline Kernel(const Program& program, const char* name, cl_int* err = NULL);
+
+    Kernel() { }
+
+    Kernel(const Kernel& kernel) : detail::Wrapper<cl_type>(kernel) { }
+
+    Kernel& operator = (const Kernel& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    template <typename T>
+    cl_int getInfo(cl_kernel_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetKernelInfo, object_, name, param),
+            __GET_KERNEL_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_kernel_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_kernel_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    template <typename T>
+    cl_int getWorkGroupInfo(
+        const Device& device, cl_kernel_work_group_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(
+                &::clGetKernelWorkGroupInfo, object_, device(), name, param),
+                __GET_KERNEL_WORK_GROUP_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_kernel_work_group_info, name>::param_type
+        getWorkGroupInfo(const Device& device, cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+        detail::cl_kernel_work_group_info, name>::param_type param;
+        cl_int result = getWorkGroupInfo(device, name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    template <typename T>
+    cl_int setArg(cl_uint index, T value)
+    {
+        return detail::errHandler(
+            ::clSetKernelArg(
+                object_,
+                index,
+                detail::KernelArgumentHandler<T>::size(value),
+                detail::KernelArgumentHandler<T>::ptr(value)),
+            __SET_KERNEL_ARGS_ERR);
+    }
+
+    cl_int setArg(cl_uint index, ::size_t size, void* argPtr)
+    {
+        return detail::errHandler(
+            ::clSetKernelArg(object_, index, size, argPtr),
+            __SET_KERNEL_ARGS_ERR);
+    }
+
+    KernelFunctor bind(
+        const CommandQueue& queue,
+        const NDRange& offset,
+        const NDRange& global,
+        const NDRange& local);
+
+    KernelFunctor bind(
+        const CommandQueue& queue,
+        const NDRange& global,
+        const NDRange& local);
+};
+
+__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel)
+
+/*! \class Program
+ * \brief Program interface that implements cl_program.
+ */
+class Program : public detail::Wrapper<cl_program>
+{
+public:
+    typedef VECTOR_CLASS<std::pair<const void*, ::size_t> > Binaries;
+    typedef VECTOR_CLASS<std::pair<const char*, ::size_t> > Sources;
+
+    Program(
+        const Context& context,
+        const Sources& sources,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+
+        const ::size_t n = (::size_t)sources.size();
+        ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t));
+        const char** strings = (const char**) alloca(n * sizeof(const char*));
+
+        for (::size_t i = 0; i < n; ++i) {
+            strings[i] = sources[(int)i].first;
+            lengths[i] = sources[(int)i].second;
+        }
+
+        object_ = ::clCreateProgramWithSource(
+            context(), (cl_uint)n, strings, lengths, &error);
+
+        detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Program(
+        const Context& context,
+        const VECTOR_CLASS<Device>& devices,
+        const Binaries& binaries,
+        VECTOR_CLASS<cl_int>* binaryStatus = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        const ::size_t n = binaries.size();
+        ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t));
+        const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*));
+
+        for (::size_t i = 0; i < n; ++i) {
+            images[i] = (const unsigned char*)binaries[(int)i].first;
+            lengths[i] = binaries[(int)i].second;
+        }
+
+        object_ = ::clCreateProgramWithBinary(
+            context(), (cl_uint) devices.size(),
+            (cl_device_id*)&devices.front(),
+            lengths, images, binaryStatus != NULL
+               ? (cl_int*) &binaryStatus->front()
+               : NULL, &error);
+
+        detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Program() { }
+
+    Program(const Program& program) : detail::Wrapper<cl_type>(program) { }
+
+    Program& operator = (const Program& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    cl_int build(
+        const VECTOR_CLASS<Device>& devices,
+        const char* options = NULL,
+        void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+        void* data = NULL) const
+    {
+        return detail::errHandler(
+            ::clBuildProgram(
+                object_,
+                (cl_uint)
+                devices.size(),
+                (cl_device_id*)&devices.front(),
+                options,
+                notifyFptr,
+                data),
+                __BUILD_PROGRAM_ERR);
+    }
+
+    template <typename T>
+    cl_int getInfo(cl_program_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetProgramInfo, object_, name, param),
+            __GET_PROGRAM_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_program_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_program_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    template <typename T>
+    cl_int getBuildInfo(
+        const Device& device, cl_program_build_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(
+                &::clGetProgramBuildInfo, object_, device(), name, param),
+                __GET_PROGRAM_BUILD_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_program_build_info, name>::param_type
+    getBuildInfo(const Device& device, cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_program_build_info, name>::param_type param;
+        cl_int result = getBuildInfo(device, name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    cl_int createKernels(VECTOR_CLASS<Kernel>* kernels)
+    {
+        cl_uint numKernels;
+        cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
+        }
+
+        Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel));
+        err = ::clCreateKernelsInProgram(
+            object_, numKernels, (cl_kernel*) value, NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
+        }
+
+        kernels->assign(&value[0], &value[numKernels]);
+        return CL_SUCCESS;
+    }
+};
+
+__GET_INFO_HELPER_WITH_RETAIN(cl::Program)
+
+inline Kernel::Kernel(const Program& program, const char* name, cl_int* err)
+{
+    cl_int error;
+
+    object_ = ::clCreateKernel(program(), name, &error);
+    detail::errHandler(error, __CREATE_KERNEL_ERR);
+
+    if (err != NULL) {
+        *err = error;
+    }
+
+}
+
+/*! \class CommandQueue
+ * \brief CommandQueue interface for cl_command_queue.
+ */
+class CommandQueue : public detail::Wrapper<cl_command_queue>
+{
+public:
+    CommandQueue(
+        const Context& context,
+        const Device& device,
+        cl_command_queue_properties properties = 0,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateCommandQueue(
+            context(), device(), properties, &error);
+
+        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    CommandQueue() { }
+
+    CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { }
+
+    CommandQueue& operator = (const CommandQueue& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    template <typename T>
+    cl_int getInfo(cl_command_queue_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(
+                &::clGetCommandQueueInfo, object_, name, param),
+                __GET_COMMAND_QUEUE_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_command_queue_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_command_queue_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    cl_int enqueueReadBuffer(
+        const Buffer& buffer,
+        cl_bool blocking,
+        ::size_t offset,
+        ::size_t size,
+        void* ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueReadBuffer(
+                object_, buffer(), blocking, offset, size,
+                ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_READ_BUFFER_ERR);
+    }
+
+    cl_int enqueueWriteBuffer(
+        const Buffer& buffer,
+        cl_bool blocking,
+        ::size_t offset,
+        ::size_t size,
+        const void* ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueWriteBuffer(
+                object_, buffer(), blocking, offset, size,
+                ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+                __ENQUEUE_WRITE_BUFFER_ERR);
+    }
+
+    cl_int enqueueCopyBuffer(
+        const Buffer& src,
+        const Buffer& dst,
+        ::size_t src_offset,
+        ::size_t dst_offset,
+        ::size_t size,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueCopyBuffer(
+                object_, src(), dst(), src_offset, dst_offset, size,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQEUE_COPY_BUFFER_ERR);
+    }
+
+#if defined(CL_VERSION_1_1)
+    cl_int enqueueReadBufferRect(
+        const Buffer& buffer,
+        cl_bool blocking,
+        const size_t<3>& buffer_offset,
+        const size_t<3>& host_offset,
+        const size_t<3>& region,
+        ::size_t buffer_row_pitch,
+        ::size_t buffer_slice_pitch,
+        ::size_t host_row_pitch,
+        ::size_t host_slice_pitch,
+        void *ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueReadBufferRect(
+                object_, 
+                buffer(), 
+                blocking, 
+                (const ::size_t *)buffer_offset,
+                (const ::size_t *)host_offset,
+                (const ::size_t *)region,
+                buffer_row_pitch,
+                buffer_slice_pitch,
+                host_row_pitch,
+                host_slice_pitch,
+                ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+                __ENQUEUE_READ_BUFFER_RECT_ERR);
+    }
+
+
+    cl_int enqueueWriteBufferRect(
+        const Buffer& buffer,
+        cl_bool blocking,
+        const size_t<3>& buffer_offset,
+        const size_t<3>& host_offset,
+        const size_t<3>& region,
+        ::size_t buffer_row_pitch,
+        ::size_t buffer_slice_pitch,
+        ::size_t host_row_pitch,
+        ::size_t host_slice_pitch,
+        void *ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueWriteBufferRect(
+                object_, 
+                buffer(), 
+                blocking, 
+                (const ::size_t *)buffer_offset,
+                (const ::size_t *)host_offset,
+                (const ::size_t *)region,
+                buffer_row_pitch,
+                buffer_slice_pitch,
+                host_row_pitch,
+                host_slice_pitch,
+                ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+                __ENQUEUE_WRITE_BUFFER_RECT_ERR);
+    }
+
+    cl_int enqueueCopyBufferRect(
+        const Buffer& src,
+        const Buffer& dst,
+        const size_t<3>& src_origin,
+        const size_t<3>& dst_origin,
+        const size_t<3>& region,
+        ::size_t src_row_pitch,
+        ::size_t src_slice_pitch,
+        ::size_t dst_row_pitch,
+        ::size_t dst_slice_pitch,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueCopyBufferRect(
+                object_, 
+                src(), 
+                dst(), 
+                (const ::size_t *)src_origin, 
+                (const ::size_t *)dst_origin, 
+                (const ::size_t *)region,
+                src_row_pitch,
+                src_slice_pitch,
+                dst_row_pitch,
+                dst_slice_pitch,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQEUE_COPY_BUFFER_RECT_ERR);
+    }
+#endif
+
+    cl_int enqueueReadImage(
+        const Image& image,
+        cl_bool blocking,
+        const size_t<3>& origin,
+        const size_t<3>& region,
+        ::size_t row_pitch,
+        ::size_t slice_pitch,
+        void* ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueReadImage(
+                object_, image(), blocking, (const ::size_t *) origin,
+                (const ::size_t *) region, row_pitch, slice_pitch, ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_READ_IMAGE_ERR);
+    }
+
+    cl_int enqueueWriteImage(
+        const Image& image,
+        cl_bool blocking,
+        const size_t<3>& origin,
+        const size_t<3>& region,
+        ::size_t row_pitch,
+        ::size_t slice_pitch,
+        void* ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueWriteImage(
+                object_, image(), blocking, (const ::size_t *) origin,
+                (const ::size_t *) region, row_pitch, slice_pitch, ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_WRITE_IMAGE_ERR);
+    }
+
+    cl_int enqueueCopyImage(
+        const Image& src,
+        const Image& dst,
+        const size_t<3>& src_origin,
+        const size_t<3>& dst_origin,
+        const size_t<3>& region,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueCopyImage(
+                object_, src(), dst(), (const ::size_t *) src_origin,
+                (const ::size_t *)dst_origin, (const ::size_t *) region,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_COPY_IMAGE_ERR);
+    }
+
+    cl_int enqueueCopyImageToBuffer(
+        const Image& src,
+        const Buffer& dst,
+        const size_t<3>& src_origin,
+        const size_t<3>& region,
+        ::size_t dst_offset,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueCopyImageToBuffer(
+                object_, src(), dst(), (const ::size_t *) src_origin,
+                (const ::size_t *) region, dst_offset,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR);
+    }
+
+    cl_int enqueueCopyBufferToImage(
+        const Buffer& src,
+        const Image& dst,
+        ::size_t src_offset,
+        const size_t<3>& dst_origin,
+        const size_t<3>& region,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueCopyBufferToImage(
+                object_, src(), dst(), src_offset,
+                (const ::size_t *) dst_origin, (const ::size_t *) region,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR);
+    }
+
+    void* enqueueMapBuffer(
+        const Buffer& buffer,
+        cl_bool blocking,
+        cl_map_flags flags,
+        ::size_t offset,
+        ::size_t size,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL,
+        cl_int* err = NULL) const
+    {
+        cl_int error;
+        void * result = ::clEnqueueMapBuffer(
+            object_, buffer(), blocking, flags, offset, size,
+            (events != NULL) ? (cl_uint) events->size() : 0,
+            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+            (cl_event*) event,
+            &error);
+
+        detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+        return result;
+    }
+
+    void* enqueueMapImage(
+        const Image& buffer,
+        cl_bool blocking,
+        cl_map_flags flags,
+        const size_t<3>& origin,
+        const size_t<3>& region,
+        ::size_t * row_pitch,
+        ::size_t * slice_pitch,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL,
+        cl_int* err = NULL) const
+    {
+        cl_int error;
+        void * result = ::clEnqueueMapImage(
+            object_, buffer(), blocking, flags,
+            (const ::size_t *) origin, (const ::size_t *) region,
+            row_pitch, slice_pitch,
+            (events != NULL) ? (cl_uint) events->size() : 0,
+            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+            (cl_event*) event,
+            &error);
+
+        detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR);
+        if (err != NULL) {
+              *err = error;
+        }
+        return result;
+    }
+
+    cl_int enqueueUnmapMemObject(
+        const Memory& memory,
+        void* mapped_ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueUnmapMemObject(
+                object_, memory(), mapped_ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
+    }
+
+    cl_int enqueueNDRangeKernel(
+        const Kernel& kernel,
+        const NDRange& offset,
+        const NDRange& global,
+        const NDRange& local,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueNDRangeKernel(
+                object_, kernel(), (cl_uint) global.dimensions(),
+                offset.dimensions() != 0 ? (const ::size_t*) offset : NULL,
+                (const ::size_t*) global,
+                local.dimensions() != 0 ? (const ::size_t*) local : NULL,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_NDRANGE_KERNEL_ERR);
+    }
+
+    cl_int enqueueTask(
+        const Kernel& kernel,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueTask(
+                object_, kernel(),
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_TASK_ERR);
+    }
+
+    cl_int enqueueNativeKernel(
+        void (*userFptr)(void *),
+        std::pair<void*, ::size_t> args,
+        const VECTOR_CLASS<Memory>* mem_objects = NULL,
+        const VECTOR_CLASS<const void*>* mem_locs = NULL,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) 
+            ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem))
+            : NULL;
+
+        if (mems != NULL) {
+            for (unsigned int i = 0; i < mem_objects->size(); i++) {
+                mems[i] = ((*mem_objects)[i])();
+            }
+        }
+
+        return detail::errHandler(
+            ::clEnqueueNativeKernel(
+                object_, userFptr, args.first, args.second,
+                (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+                mems,
+                (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_NATIVE_KERNEL);
+    }
+
+    cl_int enqueueMarker(Event* event = NULL) const
+    {
+        return detail::errHandler(
+            ::clEnqueueMarker(object_, (cl_event*) event),
+            __ENQUEUE_MARKER_ERR);
+    }
+
+    cl_int enqueueWaitForEvents(const VECTOR_CLASS<Event>& events) const
+    {
+        return detail::errHandler(
+            ::clEnqueueWaitForEvents(
+                object_,
+                (cl_uint) events.size(),
+                (const cl_event*) &events.front()),
+            __ENQUEUE_WAIT_FOR_EVENTS_ERR);
+    }
+
+    cl_int enqueueAcquireGLObjects(
+         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+         const VECTOR_CLASS<Event>* events = NULL,
+         Event* event = NULL) const
+     {
+         return detail::errHandler(
+             ::clEnqueueAcquireGLObjects(
+                 object_,
+                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+                 (events != NULL) ? (cl_uint) events->size() : 0,
+                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                 (cl_event*) event),
+             __ENQUEUE_ACQUIRE_GL_ERR);
+     }
+
+    cl_int enqueueReleaseGLObjects(
+         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+         const VECTOR_CLASS<Event>* events = NULL,
+         Event* event = NULL) const
+     {
+         return detail::errHandler(
+             ::clEnqueueReleaseGLObjects(
+                 object_,
+                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+                 (events != NULL) ? (cl_uint) events->size() : 0,
+                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                 (cl_event*) event),
+             __ENQUEUE_RELEASE_GL_ERR);
+     }
+
+#if defined (USE_DX_INTEROP)
+typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)(
+    cl_command_queue command_queue, cl_uint num_objects,
+    const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list, cl_event* event);
+typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
+    cl_command_queue command_queue, cl_uint num_objects,
+    const cl_mem* mem_objects,  cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list, cl_event* event);
+
+    cl_int enqueueAcquireD3D10Objects(
+         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+         const VECTOR_CLASS<Event>* events = NULL,
+         Event* event = NULL) const
+     {
+         static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL;
+         __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR);
+               
+         return detail::errHandler(
+             pfn_clEnqueueAcquireD3D10ObjectsKHR(
+                 object_,
+                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+                 (events != NULL) ? (cl_uint) events->size() : 0,
+                 (events != NULL) ? (cl_event*) &events->front() : NULL,
+                 (cl_event*) event),
+             __ENQUEUE_ACQUIRE_GL_ERR);
+     }
+
+    cl_int enqueueReleaseD3D10Objects(
+         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+         const VECTOR_CLASS<Event>* events = NULL,
+         Event* event = NULL) const
+    {
+        static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL;
+        __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR);
+
+        return detail::errHandler(
+            pfn_clEnqueueReleaseD3D10ObjectsKHR(
+                object_,
+                (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+                (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL) ? (cl_event*) &events->front() : NULL,
+                (cl_event*) event),
+            __ENQUEUE_RELEASE_GL_ERR);
+    }
+#endif
+
+    cl_int enqueueBarrier() const
+    {
+        return detail::errHandler(
+            ::clEnqueueBarrier(object_),
+            __ENQUEUE_BARRIER_ERR);
+    }
+
+    cl_int flush() const
+    {
+        return detail::errHandler(::clFlush(object_), __FLUSH_ERR);
+    }
+
+    cl_int finish() const
+    {
+        return detail::errHandler(::clFinish(object_), __FINISH_ERR);
+    }
+};
+
+__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue)
+
+/*! \class KernelFunctor
+ * \brief Kernel functor interface
+ *
+ * \note Currently only functors of zero to ten arguments are supported. It
+ * is straightforward to add more and a more general solution, similar to
+ * Boost.Lambda could be followed if required in the future.
+ */
+class KernelFunctor
+{
+private:
+    Kernel kernel_;
+    CommandQueue queue_;
+    NDRange offset_;
+    NDRange global_;
+    NDRange local_;
+
+    cl_int err_;
+public:
+    KernelFunctor() { }
+
+    KernelFunctor(
+        const Kernel& kernel,
+        const CommandQueue& queue,
+        const NDRange& offset,
+        const NDRange& global,
+        const NDRange& local) :
+            kernel_(kernel),
+            queue_(queue),
+            offset_(offset),
+            global_(global),
+            local_(local),
+            err_(CL_SUCCESS)
+    {}
+
+    KernelFunctor& operator=(const KernelFunctor& rhs);
+
+    KernelFunctor(const KernelFunctor& rhs);
+
+    cl_int getError() { return err_; }
+
+    inline Event operator()(const VECTOR_CLASS<Event>* events = NULL);
+
+    template<typename A1>
+    inline Event operator()(
+        const A1& a1, 
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4, class A5>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4, class A5, class A6>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4,
+             class A5, class A6, class A7>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6, 
+        const A7& a7,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6, 
+        const A7& a7, 
+        const A8& a8,
+        const VECTOR_CLASS<Event>* events = NULL);
+
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6, 
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10,
+             class A11>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10, 
+        const A11& a11,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10,
+             class A11, class A12>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10, 
+        const A11& a11, 
+        const A12& a12,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10,
+             class A11, class A12, class A13>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10, 
+        const A11& a11, 
+        const A12& a12, 
+        const A13& a13,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10,
+             class A11, class A12, class A13, class A14>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10, 
+        const A11& a11,
+        const A12& a12, 
+        const A13& a13, 
+        const A14& a14,
+        const VECTOR_CLASS<Event>* events = NULL);
+    
+    template<class A1, class A2, class A3, class A4, class A5,
+             class A6, class A7, class A8, class A9, class A10,
+             class A11, class A12, class A13, class A14, class A15>
+    inline Event operator()(
+        const A1& a1, 
+        const A2& a2, 
+        const A3& a3, 
+        const A4& a4, 
+        const A5& a5, 
+        const A6& a6,
+        const A7& a7, 
+        const A8& a8, 
+        const A9& a9, 
+        const A10& a10, 
+        const A11& a11,
+        const A12& a12, 
+        const A13& a13, 
+        const A14& a14, 
+        const A15& a15,
+        const VECTOR_CLASS<Event>* events = NULL);
+};
+
+inline KernelFunctor Kernel::bind(
+    const CommandQueue& queue,
+    const NDRange& offset,
+    const NDRange& global,
+    const NDRange& local)
+{
+    return KernelFunctor(*this,queue,offset,global,local);
+}
+
+inline KernelFunctor Kernel::bind(
+    const CommandQueue& queue,
+    const NDRange& global,
+    const NDRange& local)
+{
+    return KernelFunctor(*this,queue,NullRange,global,local);
+}
+
+inline KernelFunctor& KernelFunctor::operator=(const KernelFunctor& rhs)
+{
+    if (this == &rhs) {
+        return *this;
+    }
+    
+    kernel_ = rhs.kernel_;
+    queue_  = rhs.queue_;
+    offset_ = rhs.offset_;
+    global_ = rhs.global_;
+    local_  = rhs.local_;
+    
+    return *this;
+}
+
+inline KernelFunctor::KernelFunctor(const KernelFunctor& rhs) :
+    kernel_(rhs.kernel_),
+    queue_(rhs.queue_),
+    offset_(rhs.offset_),
+    global_(rhs.global_),
+    local_(rhs.local_)
+{
+}
+
+Event KernelFunctor::operator()(const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<typename A1>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<typename A1, typename A2>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<typename A1, typename A2, typename A3>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<typename A1, typename A2, typename A3, typename A4>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<typename A1, typename A2, typename A3, typename A4, typename A5>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<typename A1, typename A2, typename A3, typename A4, typename A5,
+         typename A6>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<typename A1, typename A2, typename A3, typename A4,
+         typename A5, typename A6, typename A7>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6, 
+    const A7& a7,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<typename A1, typename A2, typename A3, typename A4, typename A5,
+         typename A6, typename A7, typename A8>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6, 
+    const A7& a7, 
+    const A8& a8,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<typename A1, typename A2, typename A3, typename A4, typename A5,
+         typename A6, typename A7, typename A8, typename A9>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5,
+    const A6& a6, 
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<typename A1, typename A2, typename A3, typename A4, typename A5,
+         typename A6, typename A7, typename A8, typename A9, typename A10>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<class A1, class A2, class A3, class A4, class A5,
+         class A6, class A7, class A8, class A9, class A10,
+         class A11>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10, 
+    const A11& a11,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+    kernel_.setArg(10,a11);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<class A1, class A2, class A3, class A4, class A5,
+         class A6, class A7, class A8, class A9, class A10,
+         class A11, class A12>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10, 
+    const A11& a11, 
+    const A12& a12,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+    kernel_.setArg(10,a11);
+    kernel_.setArg(11,a12);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<class A1, class A2, class A3, class A4, class A5,
+         class A6, class A7, class A8, class A9, class A10,
+         class A11, class A12, class A13>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10, 
+    const A11& a11, 
+    const A12& a12, 
+    const A13& a13,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+    
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+    kernel_.setArg(10,a11);
+    kernel_.setArg(11,a12);
+    kernel_.setArg(12,a13);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<class A1, class A2, class A3, class A4, class A5,
+         class A6, class A7, class A8, class A9, class A10,
+         class A11, class A12, class A13, class A14>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5, 
+    const A6& a6,
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10, 
+    const A11& a11,
+    const A12& a12, 
+    const A13& a13, 
+    const A14& a14,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+    
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+    kernel_.setArg(10,a11);
+    kernel_.setArg(11,a12);
+    kernel_.setArg(12,a13);
+    kernel_.setArg(13,a14);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+template<class A1, class A2, class A3, class A4, class A5,
+         class A6, class A7, class A8, class A9, class A10,
+         class A11, class A12, class A13, class A14, class A15>
+Event KernelFunctor::operator()(
+    const A1& a1, 
+    const A2& a2, 
+    const A3& a3, 
+    const A4& a4, 
+    const A5& a5,
+    const A6& a6, 
+    const A7& a7, 
+    const A8& a8, 
+    const A9& a9, 
+    const A10& a10, 
+    const A11& a11,
+    const A12& a12, 
+    const A13& a13, 
+    const A14& a14, 
+    const A15& a15,
+    const VECTOR_CLASS<Event>* events)
+{
+    Event event;
+    
+    kernel_.setArg(0,a1);
+    kernel_.setArg(1,a2);
+    kernel_.setArg(2,a3);
+    kernel_.setArg(3,a4);
+    kernel_.setArg(4,a5);
+    kernel_.setArg(5,a6);
+    kernel_.setArg(6,a7);
+    kernel_.setArg(7,a8);
+    kernel_.setArg(8,a9);
+    kernel_.setArg(9,a10);
+    kernel_.setArg(10,a11);
+    kernel_.setArg(11,a12);
+    kernel_.setArg(12,a13);
+    kernel_.setArg(13,a14);
+    kernel_.setArg(14,a15);
+
+    err_ = queue_.enqueueNDRangeKernel(
+        kernel_,
+        offset_,
+        global_,
+        local_,
+        NULL,    // bgaster_fixme - do we want to allow wait event lists?
+        &event);
+
+    return event;
+}
+
+#undef __ERR_STR
+#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
+#undef __GET_DEVICE_INFO_ERR
+#undef __GET_PLATFORM_INFO_ERR
+#undef __GET_DEVICE_IDS_ERR
+#undef __GET_CONTEXT_INFO_ERR
+#undef __GET_EVENT_INFO_ERR
+#undef __GET_EVENT_PROFILE_INFO_ERR
+#undef __GET_MEM_OBJECT_INFO_ERR
+#undef __GET_IMAGE_INFO_ERR
+#undef __GET_SAMPLER_INFO_ERR
+#undef __GET_KERNEL_INFO_ERR
+#undef __GET_KERNEL_WORK_GROUP_INFO_ERR
+#undef __GET_PROGRAM_INFO_ERR
+#undef __GET_PROGRAM_BUILD_INFO_ERR
+#undef __GET_COMMAND_QUEUE_INFO_ERR
+
+#undef __CREATE_CONTEXT_FROM_TYPE_ERR
+#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR
+
+#undef __CREATE_BUFFER_ERR
+#undef __CREATE_SUBBUFFER_ERR
+#undef __CREATE_IMAGE2D_ERR
+#undef __CREATE_IMAGE3D_ERR
+#undef __CREATE_SAMPLER_ERR
+#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR
+
+#undef __CREATE_USER_EVENT_ERR
+#undef __SET_USER_EVENT_STATUS_ERR
+#undef __SET_EVENT_CALLBACK_ERR
+
+#undef __WAIT_FOR_EVENTS_ERR
+
+#undef __CREATE_KERNEL_ERR
+#undef __SET_KERNEL_ARGS_ERR
+#undef __CREATE_PROGRAM_WITH_SOURCE_ERR
+#undef __CREATE_PROGRAM_WITH_BINARY_ERR
+#undef __BUILD_PROGRAM_ERR
+#undef __CREATE_KERNELS_IN_PROGRAM_ERR
+
+#undef __CREATE_COMMAND_QUEUE_ERR
+#undef __SET_COMMAND_QUEUE_PROPERTY_ERR
+#undef __ENQUEUE_READ_BUFFER_ERR
+#undef __ENQUEUE_WRITE_BUFFER_ERR
+#undef __ENQUEUE_READ_BUFFER_RECT_ERR
+#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR
+#undef __ENQEUE_COPY_BUFFER_ERR
+#undef __ENQEUE_COPY_BUFFER_RECT_ERR
+#undef __ENQUEUE_READ_IMAGE_ERR
+#undef __ENQUEUE_WRITE_IMAGE_ERR
+#undef __ENQUEUE_COPY_IMAGE_ERR
+#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR
+#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR
+#undef __ENQUEUE_MAP_BUFFER_ERR
+#undef __ENQUEUE_MAP_IMAGE_ERR
+#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR
+#undef __ENQUEUE_NDRANGE_KERNEL_ERR
+#undef __ENQUEUE_TASK_ERR
+#undef __ENQUEUE_NATIVE_KERNEL
+
+#undef __UNLOAD_COMPILER_ERR
+#endif //__CL_USER_OVERRIDE_ERROR_STRINGS
+
+#undef __GET_INFO_HELPER_WITH_RETAIN
+
+// Extensions
+#undef __INIT_CL_EXT_FCN_PTR
+#undef __CREATE_SUB_DEVICES
+
+#if defined(USE_CL_DEVICE_FISSION)
+#undef __PARAM_NAME_DEVICE_FISSION
+#endif // USE_CL_DEVICE_FISSION
+
+} // namespace cl
+
+#endif // CL_HPP_
diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h

new file mode 100644 (file)

index 0000000..4e92c7e
--- /dev/null
+++ b/include/CL/cl_ext.h
@@ -0,0 +1,213 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
+
+/* cl_ext.h contains OpenCL extensions which don't have external */
+/* (OpenGL, D3D) dependencies.                                   */
+
+#ifndef __CL_EXT_H
+#define __CL_EXT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __APPLE__
+       #include <OpenCL/cl.h>
+    #include <AvailabilityMacros.h>
+#else
+       #include <CL/cl.h>
+#endif
+
+/* cl_khr_fp64 extension - no extension #define since it has no functions  */
+#define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
+
+/* cl_khr_fp16 extension - no extension #define since it has no functions  */
+#define CL_DEVICE_HALF_FP_CONFIG                    0x1033
+
+/* Memory object destruction
+ *
+ * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
+ *
+ * Registers a user callback function that will be called when the memory object is deleted and its resources 
+ * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback 
+ * stack associated with memobj. The registered user callback functions are called in the reverse order in 
+ * which they were registered. The user callback functions are called and then the memory object is deleted 
+ * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be 
+ * notified when the memory referenced by host_ptr, specified when the memory object is created and used as 
+ * the storage bits for the memory object, can be reused or freed.
+ *
+ * The application may not call CL api's with the cl_mem object passed to the pfn_notify.
+ *
+ * Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
+ * before using.
+ */
+#define cl_APPLE_SetMemObjectDestructor 1
+cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE(  cl_mem /* memobj */, 
+                                        void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
+                                        void * /*user_data */ )             CL_EXT_SUFFIX__VERSION_1_0;  
+
+
+/* Context Logging Functions
+ *
+ * The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
+ * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
+ * before using.
+ *
+ * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger 
+ */
+#define cl_APPLE_ContextLoggingFunctions 1
+extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE(  const char * /* errstr */, 
+                                            const void * /* private_info */, 
+                                            size_t       /* cb */, 
+                                            void *       /* user_data */ )  CL_EXT_SUFFIX__VERSION_1_0;
+
+/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
+extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE(   const char * /* errstr */, 
+                                          const void * /* private_info */, 
+                                          size_t       /* cb */, 
+                                          void *       /* user_data */ )    CL_EXT_SUFFIX__VERSION_1_0;
+
+/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
+extern void CL_API_ENTRY clLogMessagesToStderrAPPLE(   const char * /* errstr */, 
+                                          const void * /* private_info */, 
+                                          size_t       /* cb */, 
+                                          void *       /* user_data */ )    CL_EXT_SUFFIX__VERSION_1_0;
+
+
+/************************ 
+* cl_khr_icd extension *                                                  
+************************/
+#define cl_khr_icd 1
+
+/* cl_platform_info                                                        */
+#define CL_PLATFORM_ICD_SUFFIX_KHR                  0x0920
+
+/* Additional Error Codes                                                  */
+#define CL_PLATFORM_NOT_FOUND_KHR                   -1001
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clIcdGetPlatformIDsKHR(cl_uint          /* num_entries */,
+                       cl_platform_id * /* platforms */,
+                       cl_uint *        /* num_platforms */);
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
+    cl_uint          /* num_entries */,
+    cl_platform_id * /* platforms */,
+    cl_uint *        /* num_platforms */);
+
+
+/******************************************
+* cl_nv_device_attribute_query extension *
+******************************************/
+/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
+#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV       0x4000
+#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV       0x4001
+#define CL_DEVICE_REGISTERS_PER_BLOCK_NV            0x4002
+#define CL_DEVICE_WARP_SIZE_NV                      0x4003
+#define CL_DEVICE_GPU_OVERLAP_NV                    0x4004
+#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV            0x4005
+#define CL_DEVICE_INTEGRATED_MEMORY_NV              0x4006
+
+
+/*********************************
+* cl_amd_device_attribute_query *
+*********************************/
+#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD        0x4036
+
+
+#ifdef CL_VERSION_1_1
+   /***********************************
+    * cl_ext_device_fission extension *
+    ***********************************/
+    #define cl_ext_device_fission   1
+    
+    extern CL_API_ENTRY cl_int CL_API_CALL
+    clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; 
+    
+    typedef CL_API_ENTRY cl_int 
+    (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+    extern CL_API_ENTRY cl_int CL_API_CALL
+    clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; 
+    
+    typedef CL_API_ENTRY cl_int 
+    (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+    typedef cl_ulong  cl_device_partition_property_ext;
+    extern CL_API_ENTRY cl_int CL_API_CALL
+    clCreateSubDevicesEXT(  cl_device_id /*in_device*/,
+                            const cl_device_partition_property_ext * /* properties */,
+                            cl_uint /*num_entries*/,
+                            cl_device_id * /*out_devices*/,
+                            cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+    typedef CL_API_ENTRY cl_int 
+    ( CL_API_CALL * clCreateSubDevicesEXT_fn)(  cl_device_id /*in_device*/,
+                                                const cl_device_partition_property_ext * /* properties */,
+                                                cl_uint /*num_entries*/,
+                                                cl_device_id * /*out_devices*/,
+                                                cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+    /* cl_device_partition_property_ext */
+    #define CL_DEVICE_PARTITION_EQUALLY_EXT             0x4050
+    #define CL_DEVICE_PARTITION_BY_COUNTS_EXT           0x4051
+    #define CL_DEVICE_PARTITION_BY_NAMES_EXT            0x4052
+    #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT  0x4053
+    
+    /* clDeviceGetInfo selectors */
+    #define CL_DEVICE_PARENT_DEVICE_EXT                 0x4054
+    #define CL_DEVICE_PARTITION_TYPES_EXT               0x4055
+    #define CL_DEVICE_AFFINITY_DOMAINS_EXT              0x4056
+    #define CL_DEVICE_REFERENCE_COUNT_EXT               0x4057
+    #define CL_DEVICE_PARTITION_STYLE_EXT               0x4058
+    
+    /* error codes */
+    #define CL_DEVICE_PARTITION_FAILED_EXT              -1057
+    #define CL_INVALID_PARTITION_COUNT_EXT              -1058
+    #define CL_INVALID_PARTITION_NAME_EXT               -1059
+    
+    /* CL_AFFINITY_DOMAINs */
+    #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT             0x1
+    #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT             0x2
+    #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT             0x3
+    #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT             0x4
+    #define CL_AFFINITY_DOMAIN_NUMA_EXT                 0x10
+    #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT     0x100
+    
+    /* cl_device_partition_property_ext list terminators */
+    #define CL_PROPERTIES_LIST_END_EXT                  ((cl_device_partition_property_ext) 0)
+    #define CL_PARTITION_BY_COUNTS_LIST_END_EXT         ((cl_device_partition_property_ext) 0)
+    #define CL_PARTITION_BY_NAMES_LIST_END_EXT          ((cl_device_partition_property_ext) 0 - 1)
+
+
+
+#endif /* CL_VERSION_1_1 */
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* __CL_EXT_H */
diff --git a/include/CL/cl_gl.h b/include/CL/cl_gl.h

new file mode 100644 (file)

index 0000000..3b4fe06
--- /dev/null
+++ b/include/CL/cl_gl.h
@@ -0,0 +1,155 @@
+/**********************************************************************************
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+
+/*
+ * cl_gl.h contains Khronos-approved (KHR) OpenCL extensions which have
+ * OpenGL dependencies. The application is responsible for #including
+ * OpenGL or OpenGL ES headers before #including cl_gl.h.
+ */
+
+#ifndef __OPENCL_CL_GL_H
+#define __OPENCL_CL_GL_H
+
+#ifdef __APPLE__
+#include <OpenCL/cl.h>
+#include <OpenGL/CGLDevice.h>
+#else
+#include <CL/cl.h>
+#endif 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef cl_uint     cl_gl_object_type;
+typedef cl_uint     cl_gl_texture_info;
+typedef cl_uint     cl_gl_platform_info;
+typedef struct __GLsync *cl_GLsync;
+
+/* cl_gl_object_type */
+#define CL_GL_OBJECT_BUFFER             0x2000
+#define CL_GL_OBJECT_TEXTURE2D          0x2001
+#define CL_GL_OBJECT_TEXTURE3D          0x2002
+#define CL_GL_OBJECT_RENDERBUFFER       0x2003
+
+/* cl_gl_texture_info */
+#define CL_GL_TEXTURE_TARGET            0x2004
+#define CL_GL_MIPMAP_LEVEL              0x2005
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLBuffer(cl_context     /* context */,
+                     cl_mem_flags   /* flags */,
+                     cl_GLuint      /* bufobj */,
+                     int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture2D(cl_context      /* context */,
+                        cl_mem_flags    /* flags */,
+                        cl_GLenum       /* target */,
+                        cl_GLint        /* miplevel */,
+                        cl_GLuint       /* texture */,
+                        cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture3D(cl_context      /* context */,
+                        cl_mem_flags    /* flags */,
+                        cl_GLenum       /* target */,
+                        cl_GLint        /* miplevel */,
+                        cl_GLuint       /* texture */,
+                        cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLRenderbuffer(cl_context   /* context */,
+                           cl_mem_flags /* flags */,
+                           cl_GLuint    /* renderbuffer */,
+                           cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLObjectInfo(cl_mem                /* memobj */,
+                  cl_gl_object_type *   /* gl_object_type */,
+                  cl_GLuint *              /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
+                  
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLTextureInfo(cl_mem               /* memobj */,
+                   cl_gl_texture_info   /* param_name */,
+                   size_t               /* param_value_size */,
+                   void *               /* param_value */,
+                   size_t *             /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireGLObjects(cl_command_queue      /* command_queue */,
+                          cl_uint               /* num_objects */,
+                          const cl_mem *        /* mem_objects */,
+                          cl_uint               /* num_events_in_wait_list */,
+                          const cl_event *      /* event_wait_list */,
+                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseGLObjects(cl_command_queue      /* command_queue */,
+                          cl_uint               /* num_objects */,
+                          const cl_mem *        /* mem_objects */,
+                          cl_uint               /* num_events_in_wait_list */,
+                          const cl_event *      /* event_wait_list */,
+                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+/* cl_khr_gl_sharing extension  */
+
+#define cl_khr_gl_sharing 1
+
+typedef cl_uint     cl_gl_context_info;
+
+/* Additional Error Codes  */
+#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR  -1000
+
+/* cl_gl_context_info  */
+#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR    0x2006
+#define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
+
+/* Additional cl_context_properties  */
+#define CL_GL_CONTEXT_KHR                       0x2008
+#define CL_EGL_DISPLAY_KHR                      0x2009
+#define CL_GLX_DISPLAY_KHR                      0x200A
+#define CL_WGL_HDC_KHR                          0x200B
+#define CL_CGL_SHAREGROUP_KHR                   0x200C
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
+                      cl_gl_context_info            /* param_name */,
+                      size_t                        /* param_value_size */,
+                      void *                        /* param_value */,
+                      size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
+    const cl_context_properties * properties,
+    cl_gl_context_info            param_name,
+    size_t                        param_value_size,
+    void *                        param_value,
+    size_t *                      param_value_size_ret);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __OPENCL_CL_GL_H  */
diff --git a/include/CL/cl_gl_ext.h b/include/CL/cl_gl_ext.h

new file mode 100644 (file)

index 0000000..26e4782
--- /dev/null
+++ b/include/CL/cl_gl_ext.h
@@ -0,0 +1,69 @@
+/**********************************************************************************
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+
+/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have           */
+/* OpenGL dependencies.                                                         */
+
+#ifndef __OPENCL_CL_GL_EXT_H
+#define __OPENCL_CL_GL_EXT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __APPLE__
+    #include <OpenCL/cl_gl.h>
+#else
+    #include <CL/cl_gl.h>
+#endif
+
+/*
+ * For each extension, follow this template
+ * /* cl_VEN_extname extension  */
+/* #define cl_VEN_extname 1
+ * ... define new types, if any
+ * ... define new tokens, if any
+ * ... define new APIs, if any
+ *
+ *  If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
+ *  This allows us to avoid having to decide whether to include GL headers or GLES here.
+ */
+
+/* 
+ *  cl_khr_gl_event  extension
+ *  See section 9.9 in the OpenCL 1.1 spec for more information
+ */
+#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR     0x200D
+
+extern CL_API_ENTRY cl_event CL_API_CALL
+clCreateEventFromGLsyncKHR(cl_context           /* context */,
+                           cl_GLsync            /* cl_GLsync */,
+                           cl_int *             /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __OPENCL_CL_GL_EXT_H  */
diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h

new file mode 100644 (file)

index 0000000..043b048
--- /dev/null
+++ b/include/CL/cl_platform.h
@@ -0,0 +1,1198 @@
+/**********************************************************************************
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */
+
+#ifndef __CL_PLATFORM_H
+#define __CL_PLATFORM_H
+
+#ifdef __APPLE__
+    /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
+    #include <AvailabilityMacros.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN32)
+    #define CL_API_ENTRY
+    #define CL_API_CALL     __stdcall
+    #define CL_CALLBACK     __stdcall
+#else
+    #define CL_API_ENTRY
+    #define CL_API_CALL
+    #define CL_CALLBACK
+#endif
+
+#ifdef __APPLE__
+    #define CL_EXTENSION_WEAK_LINK                  __attribute__((weak_import))       
+    #define CL_API_SUFFIX__VERSION_1_0              AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+    #define CL_EXT_SUFFIX__VERSION_1_0              CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+    #define CL_API_SUFFIX__VERSION_1_1              CL_EXTENSION_WEAK_LINK
+    #define CL_EXT_SUFFIX__VERSION_1_1              CL_EXTENSION_WEAK_LINK
+    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED   CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+#else
+    #define CL_EXTENSION_WEAK_LINK                         
+    #define CL_API_SUFFIX__VERSION_1_0
+    #define CL_EXT_SUFFIX__VERSION_1_0
+    #define CL_API_SUFFIX__VERSION_1_1
+    #define CL_EXT_SUFFIX__VERSION_1_1
+    #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
+#endif
+
+#if (defined (_WIN32) && defined(_MSC_VER))
+
+/* scalar types  */
+typedef signed   __int8         cl_char;
+typedef unsigned __int8         cl_uchar;
+typedef signed   __int16        cl_short;
+typedef unsigned __int16        cl_ushort;
+typedef signed   __int32        cl_int;
+typedef unsigned __int32        cl_uint;
+typedef signed   __int64        cl_long;
+typedef unsigned __int64        cl_ulong;
+
+typedef unsigned __int16        cl_half;
+typedef float                   cl_float;
+typedef double                  cl_double;
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT         8
+#define CL_SCHAR_MAX        127
+#define CL_SCHAR_MIN        (-127-1)
+#define CL_CHAR_MAX         CL_SCHAR_MAX
+#define CL_CHAR_MIN         CL_SCHAR_MIN
+#define CL_UCHAR_MAX        255
+#define CL_SHRT_MAX         32767
+#define CL_SHRT_MIN         (-32767-1)
+#define CL_USHRT_MAX        65535
+#define CL_INT_MAX          2147483647
+#define CL_INT_MIN          (-2147483647-1)
+#define CL_UINT_MAX         0xffffffffU
+#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG          6
+#define CL_FLT_MANT_DIG     24
+#define CL_FLT_MAX_10_EXP   +38
+#define CL_FLT_MAX_EXP      +128
+#define CL_FLT_MIN_10_EXP   -37
+#define CL_FLT_MIN_EXP      -125
+#define CL_FLT_RADIX        2
+#define CL_FLT_MAX          340282346638528859811704183484516925440.0f
+#define CL_FLT_MIN          1.175494350822287507969e-38f
+#define CL_FLT_EPSILON      0x1.0p-23f
+
+#define CL_DBL_DIG          15
+#define CL_DBL_MANT_DIG     53
+#define CL_DBL_MAX_10_EXP   +308
+#define CL_DBL_MAX_EXP      +1024
+#define CL_DBL_MIN_10_EXP   -307
+#define CL_DBL_MIN_EXP      -1021
+#define CL_DBL_RADIX        2
+#define CL_DBL_MAX          179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
+#define CL_DBL_MIN          2.225073858507201383090e-308
+#define CL_DBL_EPSILON      2.220446049250313080847e-16
+
+#define  CL_M_E             2.718281828459045090796
+#define  CL_M_LOG2E         1.442695040888963387005
+#define  CL_M_LOG10E        0.434294481903251816668
+#define  CL_M_LN2           0.693147180559945286227
+#define  CL_M_LN10          2.302585092994045901094
+#define  CL_M_PI            3.141592653589793115998
+#define  CL_M_PI_2          1.570796326794896557999
+#define  CL_M_PI_4          0.785398163397448278999
+#define  CL_M_1_PI          0.318309886183790691216
+#define  CL_M_2_PI          0.636619772367581382433
+#define  CL_M_2_SQRTPI      1.128379167095512558561
+#define  CL_M_SQRT2         1.414213562373095145475
+#define  CL_M_SQRT1_2       0.707106781186547572737
+
+#define  CL_M_E_F           2.71828174591064f
+#define  CL_M_LOG2E_F       1.44269502162933f
+#define  CL_M_LOG10E_F      0.43429449200630f
+#define  CL_M_LN2_F         0.69314718246460f
+#define  CL_M_LN10_F        2.30258512496948f
+#define  CL_M_PI_F          3.14159274101257f
+#define  CL_M_PI_2_F        1.57079637050629f
+#define  CL_M_PI_4_F        0.78539818525314f
+#define  CL_M_1_PI_F        0.31830987334251f
+#define  CL_M_2_PI_F        0.63661974668503f
+#define  CL_M_2_SQRTPI_F    1.12837922573090f
+#define  CL_M_SQRT2_F       1.41421353816986f
+#define  CL_M_SQRT1_2_F     0.70710676908493f
+
+#define CL_NAN              (CL_INFINITY - CL_INFINITY)
+#define CL_HUGE_VALF        ((cl_float) 1e50)
+#define CL_HUGE_VAL         ((cl_double) 1e500)
+#define CL_MAXFLOAT         CL_FLT_MAX
+#define CL_INFINITY         CL_HUGE_VALF
+
+#else
+
+#include <stdint.h>
+
+/* scalar types  */
+typedef int8_t          cl_char;
+typedef uint8_t         cl_uchar;
+typedef int16_t         cl_short    __attribute__((aligned(2)));
+typedef uint16_t        cl_ushort   __attribute__((aligned(2)));
+typedef int32_t         cl_int      __attribute__((aligned(4)));
+typedef uint32_t        cl_uint     __attribute__((aligned(4)));
+typedef int64_t         cl_long     __attribute__((aligned(8)));
+typedef uint64_t        cl_ulong    __attribute__((aligned(8)));
+
+typedef uint16_t        cl_half     __attribute__((aligned(2)));
+typedef float           cl_float    __attribute__((aligned(4)));
+typedef double          cl_double   __attribute__((aligned(8)));
+
+/* Macro names and corresponding values defined by OpenCL */
+#define CL_CHAR_BIT         8
+#define CL_SCHAR_MAX        127
+#define CL_SCHAR_MIN        (-127-1)
+#define CL_CHAR_MAX         CL_SCHAR_MAX
+#define CL_CHAR_MIN         CL_SCHAR_MIN
+#define CL_UCHAR_MAX        255
+#define CL_SHRT_MAX         32767
+#define CL_SHRT_MIN         (-32767-1)
+#define CL_USHRT_MAX        65535
+#define CL_INT_MAX          2147483647
+#define CL_INT_MIN          (-2147483647-1)
+#define CL_UINT_MAX         0xffffffffU
+#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG          6
+#define CL_FLT_MANT_DIG     24
+#define CL_FLT_MAX_10_EXP   +38
+#define CL_FLT_MAX_EXP      +128
+#define CL_FLT_MIN_10_EXP   -37
+#define CL_FLT_MIN_EXP      -125
+#define CL_FLT_RADIX        2
+#define CL_FLT_MAX          0x1.fffffep127f
+#define CL_FLT_MIN          0x1.0p-126f
+#define CL_FLT_EPSILON      0x1.0p-23f
+
+#define CL_DBL_DIG          15
+#define CL_DBL_MANT_DIG     53
+#define CL_DBL_MAX_10_EXP   +308
+#define CL_DBL_MAX_EXP      +1024
+#define CL_DBL_MIN_10_EXP   -307
+#define CL_DBL_MIN_EXP      -1021
+#define CL_DBL_RADIX        2
+#define CL_DBL_MAX          0x1.fffffffffffffp1023
+#define CL_DBL_MIN          0x1.0p-1022
+#define CL_DBL_EPSILON      0x1.0p-52
+
+#define  CL_M_E             2.718281828459045090796
+#define  CL_M_LOG2E         1.442695040888963387005
+#define  CL_M_LOG10E        0.434294481903251816668
+#define  CL_M_LN2           0.693147180559945286227
+#define  CL_M_LN10          2.302585092994045901094
+#define  CL_M_PI            3.141592653589793115998
+#define  CL_M_PI_2          1.570796326794896557999
+#define  CL_M_PI_4          0.785398163397448278999
+#define  CL_M_1_PI          0.318309886183790691216
+#define  CL_M_2_PI          0.636619772367581382433
+#define  CL_M_2_SQRTPI      1.128379167095512558561
+#define  CL_M_SQRT2         1.414213562373095145475
+#define  CL_M_SQRT1_2       0.707106781186547572737
+
+#define  CL_M_E_F           2.71828174591064f
+#define  CL_M_LOG2E_F       1.44269502162933f
+#define  CL_M_LOG10E_F      0.43429449200630f
+#define  CL_M_LN2_F         0.69314718246460f
+#define  CL_M_LN10_F        2.30258512496948f
+#define  CL_M_PI_F          3.14159274101257f
+#define  CL_M_PI_2_F        1.57079637050629f
+#define  CL_M_PI_4_F        0.78539818525314f
+#define  CL_M_1_PI_F        0.31830987334251f
+#define  CL_M_2_PI_F        0.63661974668503f
+#define  CL_M_2_SQRTPI_F    1.12837922573090f
+#define  CL_M_SQRT2_F       1.41421353816986f
+#define  CL_M_SQRT1_2_F     0.70710676908493f
+
+#if defined( __GNUC__ )
+   #define CL_HUGE_VALF     __builtin_huge_valf()
+   #define CL_HUGE_VAL      __builtin_huge_val()
+   #define CL_NAN           __builtin_nanf( "" )
+#else
+   #define CL_HUGE_VALF     ((cl_float) 1e50)
+   #define CL_HUGE_VAL      ((cl_double) 1e500)
+   float nanf( const char * );
+   #define CL_NAN           nanf( "" )  
+#endif
+#define CL_MAXFLOAT         CL_FLT_MAX
+#define CL_INFINITY         CL_HUGE_VALF
+
+#endif
+
+#include <stddef.h>
+
+/* Mirror types to GL types. Mirror types allow us to avoid deciding which headers to load based on whether we are using GL or GLES here. */
+typedef unsigned int cl_GLuint;
+typedef int          cl_GLint;
+typedef unsigned int cl_GLenum;
+
+/*
+ * Vector types 
+ *
+ *  Note:   OpenCL requires that all types be naturally aligned. 
+ *          This means that vector types must be naturally aligned.
+ *          For example, a vector of four floats must be aligned to
+ *          a 16 byte boundary (calculated as 4 * the natural 4-byte 
+ *          alignment of the float).  The alignment qualifiers here
+ *          will only function properly if your compiler supports them
+ *          and if you don't actively work to defeat them.  For example,
+ *          in order for a cl_float4 to be 16 byte aligned in a struct,
+ *          the start of the struct must itself be 16-byte aligned. 
+ *
+ *          Maintaining proper alignment is the user's responsibility.
+ */
+
+/* Define basic vector types */
+#if defined( __VEC__ )
+   #include <altivec.h>   /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
+   typedef vector unsigned char     __cl_uchar16;
+   typedef vector signed char       __cl_char16;
+   typedef vector unsigned short    __cl_ushort8;
+   typedef vector signed short      __cl_short8;
+   typedef vector unsigned int      __cl_uint4;
+   typedef vector signed int        __cl_int4;
+   typedef vector float             __cl_float4;
+   #define  __CL_UCHAR16__  1
+   #define  __CL_CHAR16__   1
+   #define  __CL_USHORT8__  1
+   #define  __CL_SHORT8__   1
+   #define  __CL_UINT4__    1
+   #define  __CL_INT4__     1
+   #define  __CL_FLOAT4__   1
+#endif
+
+#if defined( __SSE__ )
+    #if defined( __MINGW64__ )
+        #include <intrin.h>
+    #else
+        #include <xmmintrin.h>
+    #endif
+    #if defined( __GNUC__ )
+        typedef float __cl_float4   __attribute__((vector_size(16)));
+    #else
+        typedef __m128 __cl_float4;
+    #endif
+    #define __CL_FLOAT4__   1
+#endif
+
+#if defined( __SSE2__ )
+    #if defined( __MINGW64__ )
+        #include <intrin.h>
+    #else
+        #include <emmintrin.h>
+    #endif
+    #if defined( __GNUC__ )
+        typedef cl_uchar    __cl_uchar16    __attribute__((vector_size(16)));
+        typedef cl_char     __cl_char16     __attribute__((vector_size(16)));
+        typedef cl_ushort   __cl_ushort8    __attribute__((vector_size(16)));
+        typedef cl_short    __cl_short8     __attribute__((vector_size(16)));
+        typedef cl_uint     __cl_uint4      __attribute__((vector_size(16)));
+        typedef cl_int      __cl_int4       __attribute__((vector_size(16)));
+        typedef cl_ulong    __cl_ulong2     __attribute__((vector_size(16)));
+        typedef cl_long     __cl_long2      __attribute__((vector_size(16)));
+        typedef cl_double   __cl_double2    __attribute__((vector_size(16)));
+    #else
+        typedef __m128i __cl_uchar16;
+        typedef __m128i __cl_char16;
+        typedef __m128i __cl_ushort8;
+        typedef __m128i __cl_short8;
+        typedef __m128i __cl_uint4;
+        typedef __m128i __cl_int4;
+        typedef __m128i __cl_ulong2;
+        typedef __m128i __cl_long2;
+        typedef __m128d __cl_double2;
+    #endif
+    #define __CL_UCHAR16__  1
+    #define __CL_CHAR16__   1
+    #define __CL_USHORT8__  1
+    #define __CL_SHORT8__   1
+    #define __CL_INT4__     1
+    #define __CL_UINT4__    1
+    #define __CL_ULONG2__   1
+    #define __CL_LONG2__    1
+    #define __CL_DOUBLE2__  1
+#endif
+
+#if defined( __MMX__ )
+    #include <mmintrin.h>
+    #if defined( __GNUC__ )
+        typedef cl_uchar    __cl_uchar8     __attribute__((vector_size(8)));
+        typedef cl_char     __cl_char8      __attribute__((vector_size(8)));
+        typedef cl_ushort   __cl_ushort4    __attribute__((vector_size(8)));
+        typedef cl_short    __cl_short4     __attribute__((vector_size(8)));
+        typedef cl_uint     __cl_uint2      __attribute__((vector_size(8)));
+        typedef cl_int      __cl_int2       __attribute__((vector_size(8)));
+        typedef cl_ulong    __cl_ulong1     __attribute__((vector_size(8)));
+        typedef cl_long     __cl_long1      __attribute__((vector_size(8)));
+        typedef cl_float    __cl_float2     __attribute__((vector_size(8)));
+    #else
+        typedef __m64       __cl_uchar8;
+        typedef __m64       __cl_char8;
+        typedef __m64       __cl_ushort4;
+        typedef __m64       __cl_short4;
+        typedef __m64       __cl_uint2;
+        typedef __m64       __cl_int2;
+        typedef __m64       __cl_ulong1;
+        typedef __m64       __cl_long1;
+        typedef __m64       __cl_float2;
+    #endif
+    #define __CL_UCHAR8__   1
+    #define __CL_CHAR8__    1
+    #define __CL_USHORT4__  1
+    #define __CL_SHORT4__   1
+    #define __CL_INT2__     1
+    #define __CL_UINT2__    1
+    #define __CL_ULONG1__   1
+    #define __CL_LONG1__    1
+    #define __CL_FLOAT2__   1
+#endif
+
+#if defined( __AVX__ )
+    #if defined( __MINGW64__ )
+        #include <intrin.h>
+    #else
+        #include <immintrin.h> 
+    #endif
+    #if defined( __GNUC__ )
+        typedef cl_float    __cl_float8     __attribute__((vector_size(32)));
+        typedef cl_double   __cl_double4    __attribute__((vector_size(32)));
+    #else
+        typedef __m256      __cl_float8;
+        typedef __m256d     __cl_double4;
+    #endif
+    #define __CL_FLOAT8__   1
+    #define __CL_DOUBLE4__  1
+#endif
+
+/* Define alignment keys */
+#if defined( __GNUC__ )
+    #define CL_ALIGNED(_x)          __attribute__ ((aligned(_x)))
+#elif defined( _WIN32) && (_MSC_VER)
+    /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements     */
+    /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx                                                 */
+    /* #include <crtdefs.h>                                                                                             */
+    /* #define CL_ALIGNED(_x)          _CRT_ALIGN(_x)                                                                   */
+    #define CL_ALIGNED(_x)
+#else
+   #warning  Need to implement some method to align data here
+   #define  CL_ALIGNED(_x)
+#endif
+
+/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+    /* .xyzw and .s0123...{f|F} are supported */
+    #define CL_HAS_NAMED_VECTOR_FIELDS 1
+    /* .hi and .lo are supported */
+    #define CL_HAS_HI_LO_VECTOR_FIELDS 1
+#endif
+
+/* Define cl_vector types */
+
+/* ---- cl_charn ---- */
+typedef union
+{
+    cl_char  CL_ALIGNED(2) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_char  x, y; };
+   __extension__ struct{ cl_char  s0, s1; };
+   __extension__ struct{ cl_char  lo, hi; };
+#endif
+#if defined( __CL_CHAR2__) 
+    __cl_char2     v2;
+#endif
+}cl_char2;
+
+typedef union
+{
+    cl_char  CL_ALIGNED(4) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_char  x, y, z, w; };
+   __extension__ struct{ cl_char  s0, s1, s2, s3; };
+   __extension__ struct{ cl_char2 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__) 
+    __cl_char2     v2[2];
+#endif
+#if defined( __CL_CHAR4__) 
+    __cl_char4     v4;
+#endif
+}cl_char4;
+
+/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
+typedef  cl_char4  cl_char3;
+
+typedef union
+{
+    cl_char   CL_ALIGNED(8) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_char  x, y, z, w; };
+   __extension__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_char4 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__) 
+    __cl_char2     v2[4];
+#endif
+#if defined( __CL_CHAR4__) 
+    __cl_char4     v4[2];
+#endif
+#if defined( __CL_CHAR8__ )
+    __cl_char8     v8;
+#endif
+}cl_char8;
+
+typedef union
+{
+    cl_char  CL_ALIGNED(16) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_char  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_char  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_char8 lo, hi; };
+#endif
+#if defined( __CL_CHAR2__) 
+    __cl_char2     v2[8];
+#endif
+#if defined( __CL_CHAR4__) 
+    __cl_char4     v4[4];
+#endif
+#if defined( __CL_CHAR8__ )
+    __cl_char8     v8[2];
+#endif
+#if defined( __CL_CHAR16__ )
+    __cl_char16    v16;
+#endif
+}cl_char16;
+
+
+/* ---- cl_ucharn ---- */
+typedef union
+{
+    cl_uchar  CL_ALIGNED(2) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uchar  x, y; };
+   __extension__ struct{ cl_uchar  s0, s1; };
+   __extension__ struct{ cl_uchar  lo, hi; };
+#endif
+#if defined( __cl_uchar2__) 
+    __cl_uchar2     v2;
+#endif
+}cl_uchar2;
+
+typedef union
+{
+    cl_uchar  CL_ALIGNED(4) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uchar  x, y, z, w; };
+   __extension__ struct{ cl_uchar  s0, s1, s2, s3; };
+   __extension__ struct{ cl_uchar2 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__) 
+    __cl_uchar2     v2[2];
+#endif
+#if defined( __CL_UCHAR4__) 
+    __cl_uchar4     v4;
+#endif
+}cl_uchar4;
+
+/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
+typedef  cl_uchar4  cl_uchar3;
+
+typedef union
+{
+    cl_uchar   CL_ALIGNED(8) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uchar  x, y, z, w; };
+   __extension__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_uchar4 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__) 
+    __cl_uchar2     v2[4];
+#endif
+#if defined( __CL_UCHAR4__) 
+    __cl_uchar4     v4[2];
+#endif
+#if defined( __CL_UCHAR8__ )
+    __cl_uchar8     v8;
+#endif
+}cl_uchar8;
+
+typedef union
+{
+    cl_uchar  CL_ALIGNED(16) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uchar  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_uchar  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_uchar8 lo, hi; };
+#endif
+#if defined( __CL_UCHAR2__) 
+    __cl_uchar2     v2[8];
+#endif
+#if defined( __CL_UCHAR4__) 
+    __cl_uchar4     v4[4];
+#endif
+#if defined( __CL_UCHAR8__ )
+    __cl_uchar8     v8[2];
+#endif
+#if defined( __CL_UCHAR16__ )
+    __cl_uchar16    v16;
+#endif
+}cl_uchar16;
+
+
+/* ---- cl_shortn ---- */
+typedef union
+{
+    cl_short  CL_ALIGNED(4) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_short  x, y; };
+   __extension__ struct{ cl_short  s0, s1; };
+   __extension__ struct{ cl_short  lo, hi; };
+#endif
+#if defined( __CL_SHORT2__) 
+    __cl_short2     v2;
+#endif
+}cl_short2;
+
+typedef union
+{
+    cl_short  CL_ALIGNED(8) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_short  x, y, z, w; };
+   __extension__ struct{ cl_short  s0, s1, s2, s3; };
+   __extension__ struct{ cl_short2 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__) 
+    __cl_short2     v2[2];
+#endif
+#if defined( __CL_SHORT4__) 
+    __cl_short4     v4;
+#endif
+}cl_short4;
+
+/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
+typedef  cl_short4  cl_short3;
+
+typedef union
+{
+    cl_short   CL_ALIGNED(16) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_short  x, y, z, w; };
+   __extension__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_short4 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__) 
+    __cl_short2     v2[4];
+#endif
+#if defined( __CL_SHORT4__) 
+    __cl_short4     v4[2];
+#endif
+#if defined( __CL_SHORT8__ )
+    __cl_short8     v8;
+#endif
+}cl_short8;
+
+typedef union
+{
+    cl_short  CL_ALIGNED(32) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_short  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_short  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_short8 lo, hi; };
+#endif
+#if defined( __CL_SHORT2__) 
+    __cl_short2     v2[8];
+#endif
+#if defined( __CL_SHORT4__) 
+    __cl_short4     v4[4];
+#endif
+#if defined( __CL_SHORT8__ )
+    __cl_short8     v8[2];
+#endif
+#if defined( __CL_SHORT16__ )
+    __cl_short16    v16;
+#endif
+}cl_short16;
+
+
+/* ---- cl_ushortn ---- */
+typedef union
+{
+    cl_ushort  CL_ALIGNED(4) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ushort  x, y; };
+   __extension__ struct{ cl_ushort  s0, s1; };
+   __extension__ struct{ cl_ushort  lo, hi; };
+#endif
+#if defined( __CL_USHORT2__) 
+    __cl_ushort2     v2;
+#endif
+}cl_ushort2;
+
+typedef union
+{
+    cl_ushort  CL_ALIGNED(8) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ushort  x, y, z, w; };
+   __extension__ struct{ cl_ushort  s0, s1, s2, s3; };
+   __extension__ struct{ cl_ushort2 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__) 
+    __cl_ushort2     v2[2];
+#endif
+#if defined( __CL_USHORT4__) 
+    __cl_ushort4     v4;
+#endif
+}cl_ushort4;
+
+/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
+typedef  cl_ushort4  cl_ushort3;
+
+typedef union
+{
+    cl_ushort   CL_ALIGNED(16) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ushort  x, y, z, w; };
+   __extension__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_ushort4 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__) 
+    __cl_ushort2     v2[4];
+#endif
+#if defined( __CL_USHORT4__) 
+    __cl_ushort4     v4[2];
+#endif
+#if defined( __CL_USHORT8__ )
+    __cl_ushort8     v8;
+#endif
+}cl_ushort8;
+
+typedef union
+{
+    cl_ushort  CL_ALIGNED(32) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ushort  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_ushort  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_ushort8 lo, hi; };
+#endif
+#if defined( __CL_USHORT2__) 
+    __cl_ushort2     v2[8];
+#endif
+#if defined( __CL_USHORT4__) 
+    __cl_ushort4     v4[4];
+#endif
+#if defined( __CL_USHORT8__ )
+    __cl_ushort8     v8[2];
+#endif
+#if defined( __CL_USHORT16__ )
+    __cl_ushort16    v16;
+#endif
+}cl_ushort16;
+
+/* ---- cl_intn ---- */
+typedef union
+{
+    cl_int  CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_int  x, y; };
+   __extension__ struct{ cl_int  s0, s1; };
+   __extension__ struct{ cl_int  lo, hi; };
+#endif
+#if defined( __CL_INT2__) 
+    __cl_int2     v2;
+#endif
+}cl_int2;
+
+typedef union
+{
+    cl_int  CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_int  x, y, z, w; };
+   __extension__ struct{ cl_int  s0, s1, s2, s3; };
+   __extension__ struct{ cl_int2 lo, hi; };
+#endif
+#if defined( __CL_INT2__) 
+    __cl_int2     v2[2];
+#endif
+#if defined( __CL_INT4__) 
+    __cl_int4     v4;
+#endif
+}cl_int4;
+
+/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
+typedef  cl_int4  cl_int3;
+
+typedef union
+{
+    cl_int   CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_int  x, y, z, w; };
+   __extension__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_int4 lo, hi; };
+#endif
+#if defined( __CL_INT2__) 
+    __cl_int2     v2[4];
+#endif
+#if defined( __CL_INT4__) 
+    __cl_int4     v4[2];
+#endif
+#if defined( __CL_INT8__ )
+    __cl_int8     v8;
+#endif
+}cl_int8;
+
+typedef union
+{
+    cl_int  CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_int  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_int  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_int8 lo, hi; };
+#endif
+#if defined( __CL_INT2__) 
+    __cl_int2     v2[8];
+#endif
+#if defined( __CL_INT4__) 
+    __cl_int4     v4[4];
+#endif
+#if defined( __CL_INT8__ )
+    __cl_int8     v8[2];
+#endif
+#if defined( __CL_INT16__ )
+    __cl_int16    v16;
+#endif
+}cl_int16;
+
+
+/* ---- cl_uintn ---- */
+typedef union
+{
+    cl_uint  CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uint  x, y; };
+   __extension__ struct{ cl_uint  s0, s1; };
+   __extension__ struct{ cl_uint  lo, hi; };
+#endif
+#if defined( __CL_UINT2__) 
+    __cl_uint2     v2;
+#endif
+}cl_uint2;
+
+typedef union
+{
+    cl_uint  CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uint  x, y, z, w; };
+   __extension__ struct{ cl_uint  s0, s1, s2, s3; };
+   __extension__ struct{ cl_uint2 lo, hi; };
+#endif
+#if defined( __CL_UINT2__) 
+    __cl_uint2     v2[2];
+#endif
+#if defined( __CL_UINT4__) 
+    __cl_uint4     v4;
+#endif
+}cl_uint4;
+
+/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
+typedef  cl_uint4  cl_uint3;
+
+typedef union
+{
+    cl_uint   CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uint  x, y, z, w; };
+   __extension__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_uint4 lo, hi; };
+#endif
+#if defined( __CL_UINT2__) 
+    __cl_uint2     v2[4];
+#endif
+#if defined( __CL_UINT4__) 
+    __cl_uint4     v4[2];
+#endif
+#if defined( __CL_UINT8__ )
+    __cl_uint8     v8;
+#endif
+}cl_uint8;
+
+typedef union
+{
+    cl_uint  CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_uint  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_uint  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_uint8 lo, hi; };
+#endif
+#if defined( __CL_UINT2__) 
+    __cl_uint2     v2[8];
+#endif
+#if defined( __CL_UINT4__) 
+    __cl_uint4     v4[4];
+#endif
+#if defined( __CL_UINT8__ )
+    __cl_uint8     v8[2];
+#endif
+#if defined( __CL_UINT16__ )
+    __cl_uint16    v16;
+#endif
+}cl_uint16;
+
+/* ---- cl_longn ---- */
+typedef union
+{
+    cl_long  CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_long  x, y; };
+   __extension__ struct{ cl_long  s0, s1; };
+   __extension__ struct{ cl_long  lo, hi; };
+#endif
+#if defined( __CL_LONG2__) 
+    __cl_long2     v2;
+#endif
+}cl_long2;
+
+typedef union
+{
+    cl_long  CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_long  x, y, z, w; };
+   __extension__ struct{ cl_long  s0, s1, s2, s3; };
+   __extension__ struct{ cl_long2 lo, hi; };
+#endif
+#if defined( __CL_LONG2__) 
+    __cl_long2     v2[2];
+#endif
+#if defined( __CL_LONG4__) 
+    __cl_long4     v4;
+#endif
+}cl_long4;
+
+/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
+typedef  cl_long4  cl_long3;
+
+typedef union
+{
+    cl_long   CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_long  x, y, z, w; };
+   __extension__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_long4 lo, hi; };
+#endif
+#if defined( __CL_LONG2__) 
+    __cl_long2     v2[4];
+#endif
+#if defined( __CL_LONG4__) 
+    __cl_long4     v4[2];
+#endif
+#if defined( __CL_LONG8__ )
+    __cl_long8     v8;
+#endif
+}cl_long8;
+
+typedef union
+{
+    cl_long  CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_long  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_long  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_long8 lo, hi; };
+#endif
+#if defined( __CL_LONG2__) 
+    __cl_long2     v2[8];
+#endif
+#if defined( __CL_LONG4__) 
+    __cl_long4     v4[4];
+#endif
+#if defined( __CL_LONG8__ )
+    __cl_long8     v8[2];
+#endif
+#if defined( __CL_LONG16__ )
+    __cl_long16    v16;
+#endif
+}cl_long16;
+
+
+/* ---- cl_ulongn ---- */
+typedef union
+{
+    cl_ulong  CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ulong  x, y; };
+   __extension__ struct{ cl_ulong  s0, s1; };
+   __extension__ struct{ cl_ulong  lo, hi; };
+#endif
+#if defined( __CL_ULONG2__) 
+    __cl_ulong2     v2;
+#endif
+}cl_ulong2;
+
+typedef union
+{
+    cl_ulong  CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ulong  x, y, z, w; };
+   __extension__ struct{ cl_ulong  s0, s1, s2, s3; };
+   __extension__ struct{ cl_ulong2 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__) 
+    __cl_ulong2     v2[2];
+#endif
+#if defined( __CL_ULONG4__) 
+    __cl_ulong4     v4;
+#endif
+}cl_ulong4;
+
+/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
+typedef  cl_ulong4  cl_ulong3;
+
+typedef union
+{
+    cl_ulong   CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ulong  x, y, z, w; };
+   __extension__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_ulong4 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__) 
+    __cl_ulong2     v2[4];
+#endif
+#if defined( __CL_ULONG4__) 
+    __cl_ulong4     v4[2];
+#endif
+#if defined( __CL_ULONG8__ )
+    __cl_ulong8     v8;
+#endif
+}cl_ulong8;
+
+typedef union
+{
+    cl_ulong  CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_ulong  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_ulong  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_ulong8 lo, hi; };
+#endif
+#if defined( __CL_ULONG2__) 
+    __cl_ulong2     v2[8];
+#endif
+#if defined( __CL_ULONG4__) 
+    __cl_ulong4     v4[4];
+#endif
+#if defined( __CL_ULONG8__ )
+    __cl_ulong8     v8[2];
+#endif
+#if defined( __CL_ULONG16__ )
+    __cl_ulong16    v16;
+#endif
+}cl_ulong16;
+
+
+/* --- cl_floatn ---- */
+
+typedef union
+{
+    cl_float  CL_ALIGNED(8) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_float  x, y; };
+   __extension__ struct{ cl_float  s0, s1; };
+   __extension__ struct{ cl_float  lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__) 
+    __cl_float2     v2;
+#endif
+}cl_float2;
+
+typedef union
+{
+    cl_float  CL_ALIGNED(16) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_float   x, y, z, w; };
+   __extension__ struct{ cl_float   s0, s1, s2, s3; };
+   __extension__ struct{ cl_float2  lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__) 
+    __cl_float2     v2[2];
+#endif
+#if defined( __CL_FLOAT4__) 
+    __cl_float4     v4;
+#endif
+}cl_float4;
+
+/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
+typedef  cl_float4  cl_float3;
+
+typedef union
+{
+    cl_float   CL_ALIGNED(32) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_float   x, y, z, w; };
+   __extension__ struct{ cl_float   s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_float4  lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__) 
+    __cl_float2     v2[4];
+#endif
+#if defined( __CL_FLOAT4__) 
+    __cl_float4     v4[2];
+#endif
+#if defined( __CL_FLOAT8__ )
+    __cl_float8     v8;
+#endif
+}cl_float8;
+
+typedef union
+{
+    cl_float  CL_ALIGNED(64) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_float  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_float  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_float8 lo, hi; };
+#endif
+#if defined( __CL_FLOAT2__) 
+    __cl_float2     v2[8];
+#endif
+#if defined( __CL_FLOAT4__) 
+    __cl_float4     v4[4];
+#endif
+#if defined( __CL_FLOAT8__ )
+    __cl_float8     v8[2];
+#endif
+#if defined( __CL_FLOAT16__ )
+    __cl_float16    v16;
+#endif
+}cl_float16;
+
+/* --- cl_doublen ---- */
+
+typedef union
+{
+    cl_double  CL_ALIGNED(16) s[2];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_double  x, y; };
+   __extension__ struct{ cl_double s0, s1; };
+   __extension__ struct{ cl_double lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__) 
+    __cl_double2     v2;
+#endif
+}cl_double2;
+
+typedef union
+{
+    cl_double  CL_ALIGNED(32) s[4];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_double  x, y, z, w; };
+   __extension__ struct{ cl_double  s0, s1, s2, s3; };
+   __extension__ struct{ cl_double2 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__) 
+    __cl_double2     v2[2];
+#endif
+#if defined( __CL_DOUBLE4__) 
+    __cl_double4     v4;
+#endif
+}cl_double4;
+
+/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
+typedef  cl_double4  cl_double3;
+
+typedef union
+{
+    cl_double   CL_ALIGNED(64) s[8];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_double  x, y, z, w; };
+   __extension__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7; };
+   __extension__ struct{ cl_double4 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__) 
+    __cl_double2     v2[4];
+#endif
+#if defined( __CL_DOUBLE4__) 
+    __cl_double4     v4[2];
+#endif
+#if defined( __CL_DOUBLE8__ )
+    __cl_double8     v8;
+#endif
+}cl_double8;
+
+typedef union
+{
+    cl_double  CL_ALIGNED(128) s[16];
+#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+   __extension__ struct{ cl_double  x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
+   __extension__ struct{ cl_double  s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
+   __extension__ struct{ cl_double8 lo, hi; };
+#endif
+#if defined( __CL_DOUBLE2__) 
+    __cl_double2     v2[8];
+#endif
+#if defined( __CL_DOUBLE4__) 
+    __cl_double4     v4[4];
+#endif
+#if defined( __CL_DOUBLE8__ )
+    __cl_double8     v8[2];
+#endif
+#if defined( __CL_DOUBLE16__ )
+    __cl_double16    v16;
+#endif
+}cl_double16;
+
+/* Macro to facilitate debugging 
+ * Usage:
+ *   Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. 
+ *   The first line ends with:   CL_PROGRAM_STRING_BEGIN \"
+ *   Each line thereafter of OpenCL C source must end with: \n\
+ *   The last line ends in ";
+ *
+ *   Example:
+ *
+ *   const char *my_program = CL_PROGRAM_STRING_BEGIN "\
+ *   kernel void foo( int a, float * b )             \n\
+ *   {                                               \n\
+ *      // my comment                                \n\
+ *      *b[ get_global_id(0)] = a;                   \n\
+ *   }                                               \n\
+ *   ";
+ *
+ * This should correctly set up the line, (column) and file information for your source 
+ * string so you can do source level debugging.
+ */
+#define  __CL_STRINGIFY( _x )               # _x
+#define  _CL_STRINGIFY( _x )                __CL_STRINGIFY( _x )
+#define  CL_PROGRAM_STRING_DEBUG_INFO       "#line "  _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" 
+  
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __CL_PLATFORM_H  */
diff --git a/include/CL/opencl.h b/include/CL/opencl.h

new file mode 100644 (file)

index 0000000..26a6389
--- /dev/null
+++ b/include/CL/opencl.h
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2010 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
+
+#ifndef __OPENCL_H
+#define __OPENCL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __APPLE__
+
+#include <OpenCL/cl.h>
+#include <OpenCL/cl_gl.h>
+#include <OpenCL/cl_gl_ext.h>
+#include <OpenCL/cl_ext.h>
+
+#else
+
+#include <CL/cl.h>
+#include <CL/cl_gl.h>
+#include <CL/cl_gl_ext.h>
+#include <CL/cl_ext.h>
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __OPENCL_H   */
+
diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h

index da8366652fe2c505044015faae791fc90aeec59e..e37917eda991a07510ca50a42060a68e46dcbc45 100644 (file)
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -894,7 +894,7 @@ struct __DRIdri2ExtensionRec {
   * extensions.
   */
  #define __DRI_IMAGE "DRI_IMAGE"
-#define __DRI_IMAGE_VERSION 3
+#define __DRI_IMAGE_VERSION 4
  
  /**
   * These formats correspond to the similarly named MESA_FORMAT_*
@@ -906,10 +906,12 @@ struct __DRIdri2ExtensionRec {
  #define __DRI_IMAGE_FORMAT_XRGB8888     0x1002
  #define __DRI_IMAGE_FORMAT_ARGB8888     0x1003
  #define __DRI_IMAGE_FORMAT_ABGR8888     0x1004
+#define __DRI_IMAGE_FORMAT_XBGR8888     0x1005
  
  #define __DRI_IMAGE_USE_SHARE          0x0001
  #define __DRI_IMAGE_USE_SCANOUT                0x0002
  #define __DRI_IMAGE_USE_CURSOR         0x0004
+#define __DRI_IMAGE_USE_WRITE          0x0008
  
  /**
   * queryImage attributes
@@ -954,6 +956,13 @@ struct __DRIimageExtensionRec {
      * \since 2
      */
     GLboolean (*validateUsage)(__DRIimage *image, unsigned int use);
+
+   /**
+    * Write data into image.
+    *
+    * \since 4
+    */
+   int (*write)(__DRIimage *image, const void *buf, size_t count);
  };
  
  
diff --git a/src/SConscript b/src/SConscript

index ba6be0be1a0a1b916c53ec08669f9b6a8c07f221..777ad23f2c904104b1d0cc4133914bd55f8708ea 100644 (file)
--- a/src/SConscript
+++ b/src/SConscript
@@ -24,7 +24,7 @@ SConscript('mapi/vgapi/SConscript')
  if not env['embedded']:
      if env['platform'] not in ['windows', 'darwin', 'haiku']:
          SConscript('glx/SConscript')
-    if env['platform'] not in ['darwin', 'haiku']:
+    if env['platform'] not in ['darwin', 'haiku', 'sunos']:
          SConscript('egl/main/SConscript')
      if env['platform'] not in ['darwin']:
          SConscript('glu/sgi/SConscript')
diff --git a/src/egl/drivers/dri2/Makefile.am b/src/egl/drivers/dri2/Makefile.am

index e4d4abb966d2d65f9871a0c41b209f64c328b8dc..49ec06bbec7bd019233a2bb0c81e812828e6424e 100644 (file)
--- a/src/egl/drivers/dri2/Makefile.am
+++ b/src/egl/drivers/dri2/Makefile.am
@@ -26,6 +26,7 @@ AM_CFLAGS = \
         -I$(top_srcdir)/src/gbm/backends/dri \
         -I$(top_srcdir)/src/egl/wayland/wayland-egl \
         -I$(top_srcdir)/src/egl/wayland/wayland-drm \
+       -I$(top_builddir)/src/egl/wayland/wayland-drm \
         $(DEFINES) \
         $(LIBDRM_CFLAGS) \
         $(LIBUDEV_CFLAGS) \
diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c

index 1998941f82928953a4ce389e9cb10b59572df131..66142cdee5f895ccdf7229229198922ad8f7cc07 100644 (file)
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -311,6 +311,8 @@ dri2_create_image_android_native_buffer(_EGLDisplay *disp,
        format = __DRI_IMAGE_FORMAT_ABGR8888;
        break;
     case HAL_PIXEL_FORMAT_RGBX_8888:
+      format = __DRI_IMAGE_FORMAT_XBGR8888;
+      break;
     case HAL_PIXEL_FORMAT_RGB_888:
     case HAL_PIXEL_FORMAT_RGBA_5551:
     case HAL_PIXEL_FORMAT_RGBA_4444:
diff --git a/src/gallium/SConscript b/src/gallium/SConscript

index da2e4dd5dedf5bedd11e1e13a6c1d76e80a74e9c..b61eba0390e19acfdbbdc7261b939206e483d941 100644 (file)
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -33,8 +33,11 @@ if env['drm']:
          SConscript([
              'drivers/r300/SConscript',
              'drivers/r600/SConscript',
-            'drivers/radeonsi/SConscript',
          ])
+        if env['llvm']:
+            SConscript([
+                'drivers/radeonsi/SConscript',
+            ])
      # XXX: nouveau drivers have a tight dependency on libdrm, so to enable
      # we need some version logic before we enable them. Also, ATM there is
      # no nouveau target in scons
@@ -55,7 +58,7 @@ SConscript('winsys/sw/null/SConscript')
  
  if not env['embedded']:
      SConscript('state_trackers/vega/SConscript')
-    if env['platform'] not in ['darwin', 'haiku']:
+    if env['platform'] not in ['darwin', 'haiku', 'sunos']:
          SConscript('state_trackers/egl/SConscript')
  
      if env['x11']:
@@ -121,7 +124,7 @@ SConscript([
  ])
  
  if not env['embedded']:
-    if env['platform'] not in ['darwin', 'haiku']:
+    if env['platform'] not in ['darwin', 'haiku', 'sunos']:
          SConscript([
              'targets/egl-static/SConscript'
          ])
@@ -153,8 +156,11 @@ if not env['embedded']:
              SConscript([
                  'targets/dri-r300/SConscript',
                  'targets/dri-r600/SConscript',
-                'targets/dri-radeonsi/SConscript',
              ])
+            if env['llvm']:
+                SConscript([
+                    'targets/dri-radeonsi/SConscript',
+                ])
  
      if env['xorg'] and env['drm']:
          SConscript([
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile

index 896c058fde9f1bed2264a80b84773f64bf9b4bd0..a70ae7384fbf6c2c7e9e33fd2dcaea56cfdcf406 100644 (file)
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -18,6 +18,14 @@ endif
  
  include ../Makefile.template
  
+default install clean: %: subdirs-%
+
+subdirs-%:
+       @for dir in $(SUBDIRS) ; do \
+               if [ -d $$dir ] ; then \
+                       (cd $$dir && $(MAKE) $*) || exit 1; \
+               fi \
+       done
  
  indices/u_indices_gen.c: indices/u_indices_gen.py
         $(PYTHON2) $< > $@
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources

index 44cf2d326de70702f82ee49f2f3c55a4ce5cb4b9..277428b38be5d7638618a0302e161bb5d90a4b4f 100644 (file)
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -1,3 +1,5 @@
+SUBDIRS := pipe-loader
+
  C_SOURCES := \
         cso_cache/cso_cache.c \
         cso_cache/cso_context.c \
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c

index ea0a4fb3f2508122db3df95216619b9c2266f55d..d6b981195b220f02884fa8511e3e96522861922a 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -237,12 +237,13 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
        decl = tgsi_default_full_declaration();
        decl.Declaration.File = TGSI_FILE_INPUT;
        /* XXX this could be linear... */
-      decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+      decl.Declaration.Interpolate = 1;
        decl.Declaration.Semantic = 1;
        decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
        decl.Semantic.Index = aactx->maxGeneric + 1;
        decl.Range.First = 
        decl.Range.Last = aactx->maxInput + 1;
+      decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
        ctx->emit_declaration(ctx, &decl);
  
        /* declare new sampler */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c

index a900dd3ab5411587ecf5a1ea429e8f8a7b368d61..ec703d0b3949656b6737641ef41479b69a08180d 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -201,12 +201,13 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
        decl = tgsi_default_full_declaration();
        decl.Declaration.File = TGSI_FILE_INPUT;
        /* XXX this could be linear... */
-      decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
+      decl.Declaration.Interpolate = 1;
        decl.Declaration.Semantic = 1;
        decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
        decl.Semantic.Index = aactx->maxGeneric + 1;
        decl.Range.First = 
        decl.Range.Last = texInput;
+      decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
        ctx->emit_declaration(ctx, &decl);
  
        /* declare new temp regs */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c

index cfb6ef4453a816b54da8ba5fd6909af521cc45e2..842f6eeba22435ddadf9391d4bb8c2b2d5aa10a2 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -234,12 +234,13 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
           /* declare new position input reg */
           decl = tgsi_default_full_declaration();
           decl.Declaration.File = TGSI_FILE_INPUT;
-         decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */
+         decl.Declaration.Interpolate = 1;
           decl.Declaration.Semantic = 1;
           decl.Semantic.Name = TGSI_SEMANTIC_POSITION;
           decl.Semantic.Index = 0;
           decl.Range.First = 
              decl.Range.Last = wincoordInput;
+         decl.Interp.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */
           ctx->emit_declaration(ctx, &decl);
        }
  
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c

index 4ba4aa545966e588ca5435f0421ee17a884164dc..3a74790e89f580c79b6ecb5d615ddd90d680ec32 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -926,7 +926,7 @@ lp_build_sgn(struct lp_build_context *bld,
     }
     else
     {
-      LLVMValueRef minus_one = lp_build_const_vec(bld->gallivm, type, -1.0);
+      LLVMValueRef minus_one = lp_build_const_int_vec(bld->gallivm, type, -1.0);
        cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
        res = lp_build_select(bld, cond, bld->one, minus_one);
     }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.c b/src/gallium/auxiliary/gallivm/lp_bld_assert.c

index 9de5e8e7b51459377ce252f11d4f689bd5751071..37c142bd2aed55b67f37072f9f6383be008700a9 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_assert.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.c
@@ -29,6 +29,7 @@
  #include "util/u_memory.h"
  #include "lp_bld_assert.h"
  #include "lp_bld_init.h"
+#include "lp_bld_const.h"
  #include "lp_bld_printf.h"
  
  
@@ -55,48 +56,37 @@ lp_assert(int condition, const char *msg)
   * \param condition should be an 'i1' or 'i32' value
   * \param msg  a string to print if the assertion fails.
   */
-LLVMValueRef
+void
  lp_build_assert(struct gallivm_state *gallivm,
                  LLVMValueRef condition,
                  const char *msg)
  {
     LLVMBuilderRef builder = gallivm->builder;
     LLVMContextRef context = gallivm->context;
-   LLVMModuleRef module = gallivm->module;
     LLVMTypeRef arg_types[2];
-   LLVMValueRef msg_string, assert_func, params[2], r;
+   LLVMTypeRef ret_type;
+   LLVMValueRef function;
+   LLVMValueRef args[2];
+   LLVMValueRef msg_string;
  
-   msg_string = lp_build_const_string_variable(module, context,
-                                               msg, strlen(msg) + 1);
+   msg_string = lp_build_const_string(gallivm, msg);
  
+   ret_type = LLVMVoidTypeInContext(context);
     arg_types[0] = LLVMInt32TypeInContext(context);
     arg_types[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0);
  
-   /* lookup the lp_assert function */
-   assert_func = LLVMGetNamedFunction(module, "lp_assert");
-
-   /* Create the assertion function if not found */
-   if (!assert_func) {
-      LLVMTypeRef func_type =
-         LLVMFunctionType(LLVMVoidTypeInContext(context), arg_types, 2, 0);
-
-      assert_func = LLVMAddFunction(module, "lp_assert", func_type);
-      LLVMSetFunctionCallConv(assert_func, LLVMCCallConv);
-      LLVMSetLinkage(assert_func, LLVMExternalLinkage);
-      LLVMAddGlobalMapping(gallivm->engine, assert_func,
-                           func_to_pointer((func_pointer)lp_assert));
-   }
-   assert(assert_func);
+   function = lp_build_const_func_pointer(gallivm,
+                                          func_to_pointer((func_pointer)lp_assert),
+                                          ret_type, arg_types, Elements(arg_types),
+                                          "assert");
  
     /* build function call param list */
-   params[0] = LLVMBuildZExt(builder, condition, arg_types[0], "");
-   params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], "");
+   args[0] = LLVMBuildZExt(builder, condition, arg_types[0], "");
+   args[1] = msg_string;
  
     /* check arg types */
-   assert(LLVMTypeOf(params[0]) == arg_types[0]);
-   assert(LLVMTypeOf(params[1]) == arg_types[1]);
-
-   r = LLVMBuildCall(builder, assert_func, params, 2, "");
+   assert(LLVMTypeOf(args[0]) == arg_types[0]);
+   assert(LLVMTypeOf(args[1]) == arg_types[1]);
  
-   return r;
+   LLVMBuildCall(builder, function, args, Elements(args), "");
  }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_assert.h b/src/gallium/auxiliary/gallivm/lp_bld_assert.h

index 1d2baab30a21e857c74ac8ce44c2e9b5540e6ddd..e377b59bbedd5debb2811c216805e453b2e5bf6b 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_assert.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_assert.h
@@ -33,7 +33,7 @@
  #include "lp_bld_init.h"
  
  
-LLVMValueRef
+void
  lp_build_assert(struct gallivm_state *gallivm,
                  LLVMValueRef condition,
                  const char *msg);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.c b/src/gallium/auxiliary/gallivm/lp_bld_const.c

index 6d8b7c26fc8344f37e588dc9d3133e4bce26fb9e..f0611b158d92b837b0b98fd2d79aa653530591e4 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -409,3 +409,69 @@ lp_build_const_mask_aos(struct gallivm_state *gallivm,
  
     return LLVMConstVector(masks, type.length);
  }
+
+
+/**
+ * Performs lp_build_const_mask_aos, but first swizzles the mask
+ */
+LLVMValueRef
+lp_build_const_mask_aos_swizzled(struct gallivm_state *gallivm,
+                        struct lp_type type,
+                        unsigned mask,
+                        const unsigned char *swizzle)
+{
+   mask =
+           ((mask & (1 << swizzle[0])) >> swizzle[0])
+        | (((mask & (1 << swizzle[1])) >> swizzle[1]) << 1)
+        | (((mask & (1 << swizzle[2])) >> swizzle[2]) << 2)
+        | (((mask & (1 << swizzle[3])) >> swizzle[3]) << 3);
+
+   return lp_build_const_mask_aos(gallivm, type, mask);
+}
+
+
+/**
+ * Build a zero-terminated constant string.
+ */
+LLVMValueRef
+lp_build_const_string(struct gallivm_state *gallivm,
+                      const char *str)
+{
+   unsigned len = strlen(str) + 1;
+   LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context);
+   LLVMValueRef string = LLVMAddGlobal(gallivm->module, LLVMArrayType(i8, len), "");
+   LLVMSetGlobalConstant(string, TRUE);
+   LLVMSetLinkage(string, LLVMInternalLinkage);
+   LLVMSetInitializer(string, LLVMConstStringInContext(gallivm->context, str, len, TRUE));
+   string = LLVMConstBitCast(string, LLVMPointerType(i8, 0));
+   return string;
+}
+
+
+/**
+ * Build a callable function pointer.
+ *
+ * We this casts instead of LLVMAddGlobalMapping()
+ * to work around a bug in LLVM 2.6, and for efficiency/simplicity.
+ */
+LLVMValueRef
+lp_build_const_func_pointer(struct gallivm_state *gallivm,
+                            const void *ptr,
+                            LLVMTypeRef ret_type,
+                            LLVMTypeRef *arg_types,
+                            unsigned num_args,
+                            const char *name)
+{
+   LLVMTypeRef function_type;
+   LLVMValueRef function;
+
+   function_type = LLVMFunctionType(ret_type, arg_types, num_args, 0);
+
+   function = lp_build_const_int_pointer(gallivm, ptr);
+
+   function = LLVMBuildBitCast(gallivm->builder, function,
+                               LLVMPointerType(function_type, 0),
+                               name);
+
+   return function;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h

index 69718eb4b3d1116ec1da2e6ca0aeef5d302cb3db..2205616274f2d48d67e5b50af9bd84bfced9f6ef 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
@@ -111,6 +111,13 @@ lp_build_const_mask_aos(struct gallivm_state *gallivm,
                          unsigned mask);
  
  
+LLVMValueRef
+lp_build_const_mask_aos_swizzled(struct gallivm_state *gallivm,
+                        struct lp_type type,
+                        unsigned mask,
+                        const unsigned char *swizzle);
+
+
  static INLINE LLVMValueRef
  lp_build_const_int32(struct gallivm_state *gallivm, int i)
  {
@@ -142,5 +149,18 @@ lp_build_const_int_pointer(struct gallivm_state *gallivm, const void *ptr)
  }
  
  
+LLVMValueRef
+lp_build_const_string(struct gallivm_state *gallivm,
+                      const char *str);
+
+
+LLVMValueRef
+lp_build_const_func_pointer(struct gallivm_state *gallivm,
+                            const void *ptr,
+                            LLVMTypeRef ret_type,
+                            LLVMTypeRef *arg_types,
+                            unsigned num_args,
+                            const char *name);
+
  
  #endif /* !LP_BLD_CONST_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.c b/src/gallium/auxiliary/gallivm/lp_bld_flow.c

index a9c9c7af10c2d046fe28124b853c40b06af4d047..d2b3713ed2d18e13aad04213c18e4a5b7c5142f5 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -265,6 +265,73 @@ lp_build_loop_end(struct lp_build_loop_state *state,
     lp_build_loop_end_cond(state, end, step, LLVMIntNE);
  }
  
+/**
+ * Creates a c-style for loop,
+ * contrasts lp_build_loop as this checks condition on entry
+ * e.g. for(i = start; i cmp_op end; i += step)
+ * \param state      the for loop state, initialized here
+ * \param gallivm    the gallivm state
+ * \param start      starting value of iterator
+ * \param cmp_op     comparison operator used for comparing current value with end value
+ * \param end        value used to compare against iterator
+ * \param step       value added to iterator at end of each loop
+ */
+void
+lp_build_for_loop_begin(struct lp_build_for_loop_state *state,
+                        struct gallivm_state *gallivm,
+                        LLVMValueRef start,
+                        LLVMIntPredicate cmp_op,
+                        LLVMValueRef end,
+                        LLVMValueRef step)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+
+   assert(LLVMTypeOf(start) == LLVMTypeOf(end));
+   assert(LLVMTypeOf(start) == LLVMTypeOf(step));
+
+   state->begin = lp_build_insert_new_block(gallivm, "loop_begin");
+   state->step  = step;
+   state->counter_var = lp_build_alloca(gallivm, LLVMTypeOf(start), "loop_counter");
+   state->gallivm = gallivm;
+   state->cond = cmp_op;
+   state->end = end;
+
+   LLVMBuildStore(builder, start, state->counter_var);
+   LLVMBuildBr(builder, state->begin);
+
+   LLVMPositionBuilderAtEnd(builder, state->begin);
+   state->counter = LLVMBuildLoad(builder, state->counter_var, "");
+
+   state->body = lp_build_insert_new_block(gallivm, "loop_body");
+   LLVMPositionBuilderAtEnd(builder, state->body);
+}
+
+/**
+ * End the for loop.
+ */
+void
+lp_build_for_loop_end(struct lp_build_for_loop_state *state)
+{
+   LLVMValueRef next, cond;
+   LLVMBuilderRef builder = state->gallivm->builder;
+
+   next = LLVMBuildAdd(builder, state->counter, state->step, "");
+   LLVMBuildStore(builder, next, state->counter_var);
+   LLVMBuildBr(builder, state->begin);
+
+   state->exit = lp_build_insert_new_block(state->gallivm, "loop_exit");
+
+   /*
+    * We build the comparison for the begin block here,
+    * if we build it earlier the output llvm ir is not human readable
+    * as the code produced is not in the standard begin -> body -> end order.
+    */
+   LLVMPositionBuilderAtEnd(builder, state->begin);
+   cond = LLVMBuildICmp(builder, state->cond, state->counter, state->end, "");
+   LLVMBuildCondBr(builder, cond, state->body, state->exit);
+
+   LLVMPositionBuilderAtEnd(builder, state->exit);
+}
  
  
  /*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h

index 3cd5a9f42a54bdb14bc6782490e88205b16bdfd5..0da849bfe0cc061bf75fc14df1232efd1f4fe189 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h
@@ -131,6 +131,33 @@ lp_build_loop_end_cond(struct lp_build_loop_state *state,
                         LLVMIntPredicate cond);
  
  
+/**
+ * Implementation of simple C-style for loops
+ */
+struct lp_build_for_loop_state
+{
+   LLVMBasicBlockRef begin;
+   LLVMBasicBlockRef body;
+   LLVMBasicBlockRef exit;
+   LLVMValueRef counter_var;
+   LLVMValueRef counter;
+   LLVMValueRef step;
+   LLVMIntPredicate cond;
+   LLVMValueRef end;
+   struct gallivm_state *gallivm;
+};
+
+void
+lp_build_for_loop_begin(struct lp_build_for_loop_state *state,
+                        struct gallivm_state *gallivm,
+                        LLVMValueRef start,
+                        LLVMIntPredicate llvm_cond,
+                        LLVMValueRef end,
+                        LLVMValueRef step);
+
+void
+lp_build_for_loop_end(struct lp_build_for_loop_state *state);
+
  
  /**
   * if/else/endif.
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c

index 82ab19eda14ba199389ac7c040c357a9b1ac31ed..e4b8da6bcfd043d5513769f031b9ccc2292a6dc9 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -643,28 +643,18 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
            */
           LLVMTypeRef ret_type;
           LLVMTypeRef arg_types[4];
-         LLVMTypeRef function_type;
  
           ret_type = LLVMVoidTypeInContext(gallivm->context);
           arg_types[0] = pf32t;
           arg_types[1] = pi8t;
           arg_types[2] = i32t;
           arg_types[3] = i32t;
-         function_type = LLVMFunctionType(ret_type, arg_types,
-                                          Elements(arg_types), 0);
  
-         /* Note: we're using this casting here instead of LLVMAddGlobalMapping()
-          * to work around a bug in LLVM 2.6, and for efficiency/simplicity.
-          */
-
-         /* make const pointer for the C fetch_rgba_float function */
-         function = lp_build_const_int_pointer(gallivm,
-            func_to_pointer((func_pointer) format_desc->fetch_rgba_float));
-
-         /* cast the callee pointer to the function's type */
-         function = LLVMBuildBitCast(builder, function,
-                                     LLVMPointerType(function_type, 0),
-                                     "cast callee");
+         function = lp_build_const_func_pointer(gallivm,
+                                                func_to_pointer((func_pointer) format_desc->fetch_rgba_float),
+                                                ret_type,
+                                                arg_types, Elements(arg_types),
+                                                format_desc->short_name);
        }
  
        tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h

index f68bf75a8515df54506f97430b8ff914c84bacb1..5fc0f996c646357c04a163aba0b1178cafb4bd49 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h
@@ -81,5 +81,12 @@ extern LLVMValueRef
  lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
                         const char *Name);
  
+void
+lp_set_load_alignment(LLVMValueRef Inst,
+                       unsigned Align);
+
+void
+lp_set_store_alignment(LLVMValueRef Inst,
+                      unsigned Align);
  
  #endif /* !LP_BLD_INIT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp

index 68f8808f3effae419e6d18102bc9259350282f29..6c4586c421244414d0d9b0bf2c10dd480d99157e 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -165,3 +165,18 @@ lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
     return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name));
  }
  
+extern "C"
+void
+lp_set_load_alignment(LLVMValueRef Inst,
+                       unsigned Align)
+{
+   llvm::unwrap<llvm::LoadInst>(Inst)->setAlignment(Align);
+}
+
+extern "C"
+void
+lp_set_store_alignment(LLVMValueRef Inst,
+                      unsigned Align)
+{
+   llvm::unwrap<llvm::StoreInst>(Inst)->setAlignment(Align);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.c b/src/gallium/auxiliary/gallivm/lp_bld_printf.c

index 56ff42695880381fbe728107a03ca585acf28b08..5e359ceaa2053ae306967621d7da321eb7c62987 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.c
@@ -66,19 +66,6 @@ lp_get_printf_arg_count(const char *fmt)
     return count;
  }
  
-LLVMValueRef 
-lp_build_const_string_variable(LLVMModuleRef module,
-                               LLVMContextRef context,
-                               const char *str, int len)
-{
-   LLVMValueRef string = LLVMAddGlobal(module, LLVMArrayType(LLVMInt8TypeInContext(context), len + 1), "");
-   LLVMSetGlobalConstant(string, TRUE);
-   LLVMSetLinkage(string, LLVMInternalLinkage);
-   LLVMSetInitializer(string, LLVMConstStringInContext(context, str, len + 1, TRUE));
-   return string;
-}
- 
-
  /**
   * lp_build_printf.
   *
@@ -94,24 +81,22 @@ lp_build_printf(struct gallivm_state *gallivm, const char *fmt, ...)
     int argcount = lp_get_printf_arg_count(fmt);
     LLVMBuilderRef builder = gallivm->builder;
     LLVMContextRef context = gallivm->context;
-   LLVMModuleRef module = gallivm->module;
     LLVMValueRef params[50];
-   LLVMValueRef fmtarg = lp_build_const_string_variable(module, context,
-                                                        fmt, strlen(fmt) + 1);
-   LLVMValueRef int0 = lp_build_const_int32(gallivm, 0);
-   LLVMValueRef index[2];
-   LLVMValueRef func_printf = LLVMGetNamedFunction(module, "printf");
+   LLVMValueRef fmtarg = lp_build_const_string(gallivm, fmt);
+   LLVMTypeRef printf_type;
+   LLVMValueRef func_printf;
  
     assert(Elements(params) >= argcount + 1);
  
-   index[0] = index[1] = int0;
+   printf_type = LLVMFunctionType(LLVMIntTypeInContext(context, 32), NULL, 0, 1);
  
-   if (!func_printf) {
-      LLVMTypeRef printf_type = LLVMFunctionType(LLVMIntTypeInContext(context, 32), NULL, 0, 1);
-      func_printf = LLVMAddFunction(module, "printf", printf_type);
-   }
+   func_printf = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)debug_printf));
+
+   func_printf = LLVMBuildBitCast(gallivm->builder, func_printf,
+                                  LLVMPointerType(printf_type, 0),
+                                  "debug_printf");
  
-   params[0] = LLVMBuildGEP(builder, fmtarg, index, 2, "");
+   params[0] = fmtarg;
  
     va_start(arglist, fmt);
     for (i = 1; i <= argcount; i++) {
@@ -170,3 +155,30 @@ lp_build_print_ivec4(struct gallivm_state *gallivm,
     util_snprintf(format, sizeof(format), "%s %%i %%i %%i %%i\n", msg);
     return lp_build_printf(gallivm, format, x, y, z, w);
  }
+
+
+/**
+ * Print a uint8[16] vector.
+ */
+LLVMValueRef
+lp_build_print_uvec16(struct gallivm_state *gallivm,
+                    const char *msg, LLVMValueRef vec)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   char format[1000];
+   LLVMValueRef args[16];
+   int i;
+
+   for (i = 0; i < 16; ++i) {
+      args[i] = LLVMBuildExtractElement(builder, vec, lp_build_const_int32(gallivm, i), "");
+   }
+
+   util_snprintf(format, sizeof(format), "%s %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u %%u\n", msg);
+
+   return lp_build_printf(
+            gallivm, format,
+            args[ 0], args[ 1], args[ 2], args[ 3],
+            args[ 4], args[ 5], args[ 6], args[ 7],
+            args[ 8], args[ 9], args[10], args[11],
+            args[12], args[13], args[14], args[15]);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_printf.h b/src/gallium/auxiliary/gallivm/lp_bld_printf.h

index 79db74d88861b16e406b1b88dc08451c20cf335b..7a2b26d41f42132db40c9cf1c6f9ac6afabfe3e3 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_printf.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_printf.h
@@ -34,12 +34,9 @@
  #include "lp_bld_init.h"
  
  
-LLVMValueRef lp_build_const_string_variable(LLVMModuleRef module,
-                                            LLVMContextRef context,
-                                            const char *str, int len);
-
-LLVMValueRef lp_build_printf(struct gallivm_state *gallivm,
-                             const char *fmt, ...);
+LLVMValueRef
+lp_build_printf(struct gallivm_state *gallivm,
+                const char *fmt, ...);
  
  LLVMValueRef
  lp_build_print_vec4(struct gallivm_state *gallivm,
@@ -49,5 +46,9 @@ LLVMValueRef
  lp_build_print_ivec4(struct gallivm_state *gallivm,
                       const char *msg, LLVMValueRef vec);
  
+LLVMValueRef
+lp_build_print_uvec16(struct gallivm_state *gallivm,
+                     const char *msg, LLVMValueRef vec);
+
  #endif
  
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c

index 07f4f3bf6b855ce2fab67a00a102312d5bffd9ac..c6d4f1bcc28c8f91c5952818a12e3ff88105b600 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -349,7 +349,7 @@ lp_build_brilinear_lod(struct lp_build_context *bld,
   * Combined log2 and brilinear lod computation.
   *
   * It's in all identical to calling lp_build_fast_log2() and
- * lp_build_brilinear_lod() above, but by combining we can compute the interger
+ * lp_build_brilinear_lod() above, but by combining we can compute the integer
   * and fractional part independently.
   */
  static void
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.c b/src/gallium/auxiliary/gallivm/lp_bld_struct.c

index 0dc2f24d10a2eb0695ede785e6a77de495f21450..cc248d15e97f362d50adbf1166bf226c062efd99 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_struct.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.c
@@ -146,6 +146,25 @@ lp_build_pointer_get(LLVMBuilderRef builder,
  }
  
  
+LLVMValueRef
+lp_build_pointer_get_unaligned(LLVMBuilderRef builder,
+                               LLVMValueRef ptr,
+                               LLVMValueRef index,
+                               unsigned alignment)
+{
+   LLVMValueRef element_ptr;
+   LLVMValueRef res;
+   assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
+   element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
+   res = LLVMBuildLoad(builder, element_ptr, "");
+   lp_set_load_alignment(res, alignment);
+#ifdef DEBUG
+   lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index));
+#endif
+   return res;
+}
+
+
  void
  lp_build_pointer_set(LLVMBuilderRef builder,
                       LLVMValueRef ptr,
@@ -156,3 +175,18 @@ lp_build_pointer_set(LLVMBuilderRef builder,
     element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
     LLVMBuildStore(builder, value, element_ptr);
  }
+
+
+void
+lp_build_pointer_set_unaligned(LLVMBuilderRef builder,
+                               LLVMValueRef ptr,
+                               LLVMValueRef index,
+                               LLVMValueRef value,
+                               unsigned alignment)
+{
+   LLVMValueRef element_ptr;
+   LLVMValueRef instr;
+   element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
+   instr = LLVMBuildStore(builder, value, element_ptr);
+   lp_set_store_alignment(instr, alignment);
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_struct.h b/src/gallium/auxiliary/gallivm/lp_bld_struct.h

index 11605c685f09d5b50b5a77879014ee72dc2f837f..6b7b4f2a6bf7d6949b0f001da9412fec27025ed0 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_struct.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_struct.h
@@ -104,6 +104,18 @@ lp_build_pointer_get(LLVMBuilderRef builder,
                     LLVMValueRef ptr,
                     LLVMValueRef index);
  
+/**
+ * Get the value of an array element, with explicit alignment.
+ *
+ * If the element size is different from the alignment this will
+ * cause llvm to emit an unaligned load
+ */
+LLVMValueRef
+lp_build_pointer_get_unaligned(LLVMBuilderRef builder,
+                               LLVMValueRef ptr,
+                               LLVMValueRef index,
+                               unsigned alignment);
+
  /**
   * Set the value of an array element.
   */
@@ -113,4 +125,17 @@ lp_build_pointer_set(LLVMBuilderRef builder,
                       LLVMValueRef index,
                       LLVMValueRef value);
  
+/**
+ * Set the value of an array element, with explicit alignment.
+ *
+ * If the element size is different from the alignment this will
+ * cause llvm to emit an unaligned store
+ */
+void
+lp_build_pointer_set_unaligned(LLVMBuilderRef builder,
+                               LLVMValueRef ptr,
+                               LLVMValueRef index,
+                               LLVMValueRef value,
+                               unsigned alignment);
+
  #endif /* !LP_BLD_STRUCT_H */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c

index 45bbf81eb2969a7af792984349df85ef1dc5f71e..680c85f843c8082b66cbd6eb5a34813d2dcc6e90 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -215,8 +215,6 @@ lp_build_tgsi_inst_llvm(
     case TGSI_OPCODE_PUSHA:
     case TGSI_OPCODE_POPA:
     case TGSI_OPCODE_SAD:
-   case TGSI_OPCODE_TXF:
-   case TGSI_OPCODE_TXQ:
        /* deprecated? */
        assert(0);
        return FALSE;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h

index 888221d4d64801593f010cd4a0caacaf8dcce755..773c679a4d838d94848a70395e89fa20bd87ac77 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -121,6 +121,11 @@ struct lp_tgsi_info
      */
     unsigned indirect_textures:1;
  
+   /*
+    * Whether any immediate values are outside the range of 0 and 1
+    */
+   unsigned unclamped_immediates:1;
+
     /*
      * Texture opcode description. Aimed at detecting and described direct
      * texture opcodes.
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c

index 7f011563264a2e5bf8e213ed2d4eb2c28f44445d..d278444ce9005188a644e2023967f6ddd5666e9d 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -1007,6 +1007,17 @@ i2f_emit_cpu(
                                                                emit_data->args[0]);
  }
  
+/* TGSI_OPCODE_IABS (CPU Only) */
+static void
+iabs_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->int_bld,
+                                                       emit_data->args[0]);
+}
+
  /* TGSI_OPCODE_IDIV (CPU Only) */
  static void
  idiv_emit_cpu(
@@ -1101,6 +1112,18 @@ islt_emit_cpu(
     iset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
  }
  
+
+/* TGSI_OPCODE_ISSG (CPU Only) */
+static void
+issg_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sgn(&bld_base->int_bld,
+                                                       emit_data->args[0]);
+}
+
  /* TGSI_OPCODE_LG2 (CPU Only) */
  static void
  lg2_emit_cpu(
@@ -1541,6 +1564,7 @@ lp_set_default_actions_cpu(
     bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
  
     bld_base->op_actions[TGSI_OPCODE_I2F].emit = i2f_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_IABS].emit = iabs_emit_cpu;
     bld_base->op_actions[TGSI_OPCODE_IDIV].emit = idiv_emit_cpu;
     bld_base->op_actions[TGSI_OPCODE_INEG].emit = ineg_emit_cpu;
     bld_base->op_actions[TGSI_OPCODE_IMAX].emit = imax_emit_cpu;
@@ -1548,6 +1572,7 @@ lp_set_default_actions_cpu(
     bld_base->op_actions[TGSI_OPCODE_ISGE].emit = isge_emit_cpu;
     bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu;
     bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu;
  
     bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
     bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c

index 80c148124ee2daa9a96b899e1438e8cc13df37ae..24bc13a9be87471d68f6d4cfdfe7861164d04638 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -325,8 +325,10 @@ lp_emit_store_aos(
     if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
        LLVMValueRef writemask;
  
-      writemask = lp_build_const_mask_aos(bld->bld_base.base.gallivm, bld->bld_base.base.type,
-                                          reg->Register.WriteMask);
+      writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
+                                                   bld->bld_base.base.type,
+                                                   reg->Register.WriteMask,
+                                                   bld->swizzles);
  
        if (mask) {
           mask = LLVMBuildAnd(builder, mask, writemask, "");
@@ -1089,6 +1091,7 @@ lp_build_tgsi_aos(struct gallivm_state *gallivm,
        debug_printf("2222222222222222222222222222 \n");
     }
     tgsi_parse_free(&parse);
+   FREE(bld.bld_base.instructions);
  
     if (0) {
        LLVMModuleRef module = LLVMGetGlobalParent(
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c

index 3373ed4426d4ee10ff0e42af48f742c4a10e562b..ab393ed942a022c9025b49d2a6bc5e7bf2c107a1 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
@@ -442,8 +442,12 @@ lp_build_tgsi_info(const struct tgsi_token *tokens,
              assert(size <= 4);
              if (ctx.num_imms < Elements(ctx.imm)) {
                 for (chan = 0; chan < size; ++chan) {
-                  ctx.imm[ctx.num_imms][chan] =
-                        parse.FullToken.FullImmediate.u[chan].Float;
+                  float value = parse.FullToken.FullImmediate.u[chan].Float;
+                  ctx.imm[ctx.num_imms][chan] = value;
+
+                  if (value < 0.0f || value > 1.0f) {
+                     info->unclamped_immediates = TRUE;
+                  }
                 }
                 ++ctx.num_imms;
              }
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h

index 3e1c273027b9ede09aca0af2d963f529c9dc684c..3773be945dd7b064b7e25b4cd9702a113137c43b 100644 (file)
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -44,6 +44,7 @@
  
  #include <pthread.h> /* POSIX threads headers */
  #include <stdio.h> /* for perror() */
+#include <signal.h>
  
  
  /* pipe_thread
diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am

new file mode 100644 (file)

index 0000000..c63dce3
--- /dev/null
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -0,0 +1,38 @@
+AUTOMAKE_OPTIONS = subdir-objects
+
+AM_CPPFLAGS = $(DEFINES) \
+       $(GALLIUM_PIPE_LOADER_DEFINES) \
+       -I$(top_srcdir)/include \
+       -I$(top_srcdir)/src/gallium/include \
+       -I$(top_srcdir)/src/gallium/auxiliary \
+       -I$(top_srcdir)/src/gallium/winsys
+
+AM_CFLAGS = $(PIC_FLAGS)
+
+noinst_LTLIBRARIES =
+
+if HAVE_LOADER_GALLIUM
+noinst_LTLIBRARIES += libpipe_loader.la
+
+libpipe_loader_la_SOURCES = \
+       pipe_loader.h \
+       pipe_loader_priv.h \
+       pipe_loader.c \
+       pipe_loader_sw.c
+
+if HAVE_DRM_LOADER_GALLIUM
+libpipe_loader_la_SOURCES += pipe_loader_drm.c
+AM_CFLAGS += $(LIBDRM_CFLAGS)
+endif
+
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the library in the current directory.
+all-local: libpipe_loader.la
+       ln -f .libs/libpipe_loader.a .
+
+clean-local:
+       rm -f libpipe_loader.a
+endif
+
+# FIXME: Remove when the rest of Gallium is converted to automake.
+default: all
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.c b/src/gallium/auxiliary/pipe-loader/pipe_loader.c

new file mode 100644 (file)

index 0000000..6a10ac3
--- /dev/null
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
@@ -0,0 +1,102 @@
+/**************************************************************************
+ *
+ * Copyright 2012 Francisco Jerez
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe_loader_priv.h"
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+#include "util/u_dl.h"
+
+#define MODULE_PREFIX "pipe_"
+
+static int (*backends[])(struct pipe_loader_device **, int) = {
+#ifdef HAVE_PIPE_LOADER_DRM
+   &pipe_loader_drm_probe,
+#endif
+   &pipe_loader_sw_probe
+};
+
+int
+pipe_loader_probe(struct pipe_loader_device **devs, int ndev)
+{
+   int i, n = 0;
+
+   for (i = 0; i < Elements(backends); i++)
+      n += backends[i](&devs[n], MAX2(0, ndev - n));
+
+   return n;
+}
+
+void
+pipe_loader_release(struct pipe_loader_device **devs, int ndev)
+{
+   int i;
+
+   for (i = 0; i < ndev; i++)
+      devs[i]->ops->release(&devs[i]);
+}
+
+struct pipe_screen *
+pipe_loader_create_screen(struct pipe_loader_device *dev,
+                          const char *library_paths)
+{
+   return dev->ops->create_screen(dev, library_paths);
+}
+
+struct util_dl_library *
+pipe_loader_find_module(struct pipe_loader_device *dev,
+                        const char *library_paths)
+{
+   struct util_dl_library *lib;
+   const char *next;
+   char path[PATH_MAX];
+   int len, ret;
+
+   for (next = library_paths; *next; library_paths = next + 1) {
+      next = util_strchrnul(library_paths, ':');
+      len = next - library_paths;
+
+      if (len)
+         ret = util_snprintf(path, sizeof(path), "%.*s/%s%s%s",
+                             len, library_paths,
+                             MODULE_PREFIX, dev->driver_name, UTIL_DL_EXT);
+      else
+         ret = util_snprintf(path, sizeof(path), "%s%s%s",
+                             MODULE_PREFIX, dev->driver_name, UTIL_DL_EXT);
+
+      if (ret > 0 && ret < sizeof(path)) {
+         lib = util_dl_open(path);
+         if (lib) {
+            debug_printf("loaded %s\n", path);
+            return lib;
+         }
+      }
+   }
+
+   return NULL;
+}
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h

new file mode 100644 (file)

index 0000000..e419694
--- /dev/null
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
@@ -0,0 +1,144 @@
+/**************************************************************************
+ *
+ * Copyright 2012 Francisco Jerez
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file Library that provides device enumeration and creation of
+ * winsys/pipe_screen instances.
+ */
+
+#ifndef PIPE_LOADER_H
+#define PIPE_LOADER_H
+
+#include "pipe/p_compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pipe_screen;
+
+enum pipe_loader_device_type {
+   PIPE_LOADER_DEVICE_SOFTWARE,
+   PIPE_LOADER_DEVICE_PCI,
+   NUM_PIPE_LOADER_DEVICE_TYPES
+};
+
+/**
+ * A device known to the pipe loader.
+ */
+struct pipe_loader_device {
+   enum pipe_loader_device_type type;
+
+   union {
+      struct {
+         int vendor_id;
+         int chip_id;
+      } pci;
+   }; /**< Discriminated by \a type */
+
+   const char *driver_name;
+   const struct pipe_loader_ops *ops;
+};
+
+/**
+ * Get a list of known devices.
+ *
+ * \param devs Array that will be filled with pointers to the devices
+ *             available in the system.
+ * \param ndev Maximum number of devices to return.
+ * \return Number of devices available in the system.
+ */
+int
+pipe_loader_probe(struct pipe_loader_device **devs, int ndev);
+
+/**
+ * Create a pipe_screen for the specified device.
+ *
+ * \param dev Device the screen will be created for.
+ * \param library_paths Colon-separated list of filesystem paths that
+ *                      will be used to look for the pipe driver
+ *                      module that handles this device.
+ */
+struct pipe_screen *
+pipe_loader_create_screen(struct pipe_loader_device *dev,
+                          const char *library_paths);
+
+/**
+ * Release resources allocated for a list of devices.
+ *
+ * Should be called when the specified devices are no longer in use to
+ * release any resources allocated by pipe_loader_probe.
+ *
+ * \param devs Devices to release.
+ * \param ndev Number of devices to release.
+ */
+void
+pipe_loader_release(struct pipe_loader_device **devs, int ndev);
+
+#ifdef HAVE_PIPE_LOADER_SW
+
+/**
+ * Get a list of known software devices.
+ *
+ * This function is platform-specific.
+ *
+ * \sa pipe_loader_probe
+ */
+int
+pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev);
+
+#endif
+
+#ifdef HAVE_PIPE_LOADER_DRM
+
+/**
+ * Get a list of known DRM devices.
+ *
+ * This function is platform-specific.
+ *
+ * \sa pipe_loader_probe
+ */
+int
+pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev);
+
+/**
+ * Initialize a DRM device in an already opened fd.
+ *
+ * This function is platform-specific.
+ *
+ * \sa pipe_loader_probe
+ */
+boolean
+pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PIPE_LOADER_H */
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c

new file mode 100644 (file)

index 0000000..7a7e994
--- /dev/null
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -0,0 +1,218 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Intel Corporation
+ * Copyright 2012 Francisco Jerez
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kristian Høgsberg <krh@bitplanet.net>
+ *    Benjamin Franzke <benjaminfranzke@googlemail.com>
+ *
+ **************************************************************************/
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <libudev.h>
+#include <xf86drm.h>
+
+#include "state_tracker/drm_driver.h"
+#include "pipe_loader_priv.h"
+
+#include "util/u_memory.h"
+#include "util/u_dl.h"
+#include "util/u_debug.h"
+
+#define DRIVER_MAP_GALLIUM_ONLY
+#include "pci_ids/pci_id_driver_map.h"
+
+struct pipe_loader_drm_device {
+   struct pipe_loader_device base;
+   struct util_dl_library *lib;
+   int fd;
+};
+
+#define pipe_loader_drm_device(dev) ((struct pipe_loader_drm_device *)dev)
+
+static boolean
+find_drm_pci_id(struct pipe_loader_drm_device *ddev)
+{
+   struct udev *udev = NULL;
+   struct udev_device *parent, *device = NULL;
+   struct stat stat;
+   const char *pci_id;
+
+   if (fstat(ddev->fd, &stat) < 0)
+      goto fail;
+
+   udev = udev_new();
+   if (!udev)
+      goto fail;
+
+   device = udev_device_new_from_devnum(udev, 'c', stat.st_rdev);
+   if (!device)
+      goto fail;
+
+   parent = udev_device_get_parent(device);
+   if (!parent)
+      goto fail;
+
+   pci_id = udev_device_get_property_value(parent, "PCI_ID");
+   if (!pci_id ||
+       sscanf(pci_id, "%x:%x", &ddev->base.pci.vendor_id,
+              &ddev->base.pci.chip_id) != 2)
+      goto fail;
+
+   return TRUE;
+
+  fail:
+   if (device)
+      udev_device_unref(device);
+   if (udev)
+      udev_unref(udev);
+
+   debug_printf("pci id for fd %d not found\n", ddev->fd);
+   return FALSE;
+}
+
+static boolean
+find_drm_driver_name(struct pipe_loader_drm_device *ddev)
+{
+   struct pipe_loader_device *dev = &ddev->base;
+   int i, j;
+
+   for (i = 0; driver_map[i].driver; i++) {
+      if (dev->pci.vendor_id != driver_map[i].vendor_id)
+         continue;
+
+      if (driver_map[i].num_chips_ids == -1) {
+         dev->driver_name = driver_map[i].driver;
+         goto found;
+      }
+
+      for (j = 0; j < driver_map[i].num_chips_ids; j++) {
+         if (dev->pci.chip_id == driver_map[i].chip_ids[j]) {
+            dev->driver_name = driver_map[i].driver;
+            goto found;
+         }
+      }
+   }
+
+   return FALSE;
+
+  found:
+   debug_printf("driver for %04x:%04x: %s\n", dev->pci.vendor_id,
+                dev->pci.chip_id, dev->driver_name);
+   return TRUE;
+}
+
+static struct pipe_loader_ops pipe_loader_drm_ops;
+
+boolean
+pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
+{
+   struct pipe_loader_drm_device *ddev = CALLOC_STRUCT(pipe_loader_drm_device);
+
+   ddev->base.type = PIPE_LOADER_DEVICE_PCI;
+   ddev->base.ops = &pipe_loader_drm_ops;
+   ddev->fd = fd;
+
+   if (!find_drm_pci_id(ddev))
+      goto fail;
+
+   if (!find_drm_driver_name(ddev))
+      goto fail;
+
+   *dev = &ddev->base;
+   return TRUE;
+
+  fail:
+   FREE(ddev);
+   return FALSE;
+}
+
+static int
+open_drm_minor(int minor)
+{
+   char path[PATH_MAX];
+   snprintf(path, sizeof(path), DRM_DEV_NAME, DRM_DIR_NAME, minor);
+   return open(path, O_RDWR, 0);
+}
+
+int
+pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)
+{
+   int i, j, fd;
+
+   for (i = 0, j = 0; i < DRM_MAX_MINOR; i++) {
+      fd = open_drm_minor(i);
+      if (fd < 0)
+         continue;
+
+      if (j >= ndev || !pipe_loader_drm_probe_fd(&devs[j], fd))
+         close(fd);
+
+      j++;
+   }
+
+   return j;
+}
+
+static void
+pipe_loader_drm_release(struct pipe_loader_device **dev)
+{
+   struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(*dev);
+
+   if (ddev->lib)
+      util_dl_close(ddev->lib);
+
+   close(ddev->fd);
+   FREE(ddev);
+   *dev = NULL;
+}
+
+static struct pipe_screen *
+pipe_loader_drm_create_screen(struct pipe_loader_device *dev,
+                              const char *library_paths)
+{
+   struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
+   const struct drm_driver_descriptor *dd;
+
+   if (!ddev->lib)
+      ddev->lib = pipe_loader_find_module(dev, library_paths);
+   if (!ddev->lib)
+      return NULL;
+
+   dd = (const struct drm_driver_descriptor *)
+      util_dl_get_proc_address(ddev->lib, "driver_descriptor");
+
+   /* sanity check on the name */
+   if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0)
+      return NULL;
+
+   return dd->create_screen(ddev->fd);
+}
+
+static struct pipe_loader_ops pipe_loader_drm_ops = {
+   .create_screen = pipe_loader_drm_create_screen,
+   .release = pipe_loader_drm_release
+};
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h

new file mode 100644 (file)

index 0000000..0be833a
--- /dev/null
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h
@@ -0,0 +1,47 @@
+/**************************************************************************
+ *
+ * Copyright 2012 Francisco Jerez
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PIPE_LOADER_PRIV_H
+#define PIPE_LOADER_PRIV_H
+
+#include "pipe_loader.h"
+
+struct pipe_loader_ops {
+   struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev,
+                                        const char *library_paths);
+
+   void (*release)(struct pipe_loader_device **dev);
+};
+
+/**
+ * Open the pipe driver module that handles a specified device.
+ */
+struct util_dl_library *
+pipe_loader_find_module(struct pipe_loader_device *dev,
+                        const char *library_paths);
+
+#endif /* PIPE_LOADER_PRIV_H */
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c

new file mode 100644 (file)

index 0000000..c2b78c6
--- /dev/null
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -0,0 +1,107 @@
+/**************************************************************************
+ *
+ * Copyright 2012 Francisco Jerez
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe_loader_priv.h"
+
+#include "util/u_memory.h"
+#include "util/u_dl.h"
+#include "sw/null/null_sw_winsys.h"
+#include "target-helpers/inline_sw_helper.h"
+#include "state_tracker/xlib_sw_winsys.h"
+
+struct pipe_loader_sw_device {
+   struct pipe_loader_device base;
+   struct util_dl_library *lib;
+   struct sw_winsys *ws;
+};
+
+#define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev)
+
+static struct pipe_loader_ops pipe_loader_sw_ops;
+
+static struct sw_winsys *(*backends[])() = {
+#ifdef HAVE_WINSYS_XLIB
+   x11_sw_create,
+#endif
+   null_sw_create
+};
+
+int
+pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev)
+{
+   int i;
+
+   for (i = 0; i < Elements(backends); i++) {
+      if (i < ndev) {
+         struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+
+         sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
+         sdev->base.driver_name = "swrast";
+         sdev->base.ops = &pipe_loader_sw_ops;
+         sdev->ws = backends[i]();
+         devs[i] = &sdev->base;
+      }
+   }
+
+   return i;
+}
+
+static void
+pipe_loader_sw_release(struct pipe_loader_device **dev)
+{
+   struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev);
+
+   if (sdev->lib)
+      util_dl_close(sdev->lib);
+
+   FREE(sdev);
+   *dev = NULL;
+}
+
+static struct pipe_screen *
+pipe_loader_sw_create_screen(struct pipe_loader_device *dev,
+                             const char *library_paths)
+{
+   struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev);
+   struct pipe_screen *(*init)(struct sw_winsys *);
+
+   if (!sdev->lib)
+      sdev->lib = pipe_loader_find_module(dev, library_paths);
+   if (!sdev->lib)
+      return NULL;
+
+   init = (void *)util_dl_get_proc_address(sdev->lib, "swrast_create_screen");
+   if (!init)
+      return NULL;
+
+   return init(sdev->ws);
+}
+
+static struct pipe_loader_ops pipe_loader_sw_ops = {
+   .create_screen = pipe_loader_sw_create_screen,
+   .release = pipe_loader_sw_release
+};
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c

index 6ec2b0d8f216207f7a7d80dcae81ff8daa02da52..1c24b9bdbeda163a12b3ae4f5787ac72a9c0df32 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -104,12 +104,11 @@ tgsi_default_declaration( void )
     declaration.NrTokens = 1;
     declaration.File = TGSI_FILE_NULL;
     declaration.UsageMask = TGSI_WRITEMASK_XYZW;
-   declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT;
+   declaration.Interpolate = 0;
     declaration.Dimension = 0;
     declaration.Semantic = 0;
-   declaration.Centroid = 0;
     declaration.Invariant = 0;
-   declaration.CylindricalWrap = 0;
+   declaration.Local = 0;
  
     return declaration;
  }
@@ -121,9 +120,8 @@ tgsi_build_declaration(
     unsigned interpolate,
     unsigned dimension,
     unsigned semantic,
-   unsigned centroid,
     unsigned invariant,
-   unsigned cylindrical_wrap,
+   unsigned local,
     struct tgsi_header *header )
  {
     struct tgsi_declaration declaration;
@@ -137,9 +135,8 @@ tgsi_build_declaration(
     declaration.Interpolate = interpolate;
     declaration.Dimension = dimension;
     declaration.Semantic = semantic;
-   declaration.Centroid = centroid;
     declaration.Invariant = invariant;
-   declaration.CylindricalWrap = cylindrical_wrap;
+   declaration.Local = local;
  
     header_bodysize_grow( header );
  
@@ -194,6 +191,36 @@ tgsi_build_declaration_dimension(unsigned index_2d,
     return dd;
  }
  
+static struct tgsi_declaration_interp
+tgsi_default_declaration_interp( void )
+{
+   struct tgsi_declaration_interp di;
+
+   di.Interpolate = TGSI_INTERPOLATE_CONSTANT;
+   di.Centroid = 0;
+   di.CylindricalWrap = 0;
+
+   return di;
+}
+
+static struct tgsi_declaration_interp
+tgsi_build_declaration_interp(unsigned interpolate,
+                              unsigned centroid,
+                              unsigned cylindrical_wrap,
+                              struct tgsi_declaration *declaration,
+                              struct tgsi_header *header)
+{
+   struct tgsi_declaration_interp di;
+
+   di.Interpolate = interpolate;
+   di.Centroid = centroid;
+   di.CylindricalWrap = cylindrical_wrap;
+
+   declaration_grow(declaration, header);
+
+   return di;
+}
+
  static struct tgsi_declaration_semantic
  tgsi_default_declaration_semantic( void )
  {
@@ -227,42 +254,72 @@ tgsi_build_declaration_semantic(
     return ds;
  }
  
-
  static struct tgsi_declaration_resource
  tgsi_default_declaration_resource(void)
  {
-   struct tgsi_declaration_resource declaration_resource;
+   struct tgsi_declaration_resource dr;
  
-   declaration_resource.Resource = TGSI_TEXTURE_UNKNOWN;
-   declaration_resource.ReturnTypeX = PIPE_TYPE_UNORM;
-   declaration_resource.ReturnTypeY = PIPE_TYPE_UNORM;
-   declaration_resource.ReturnTypeZ = PIPE_TYPE_UNORM;
-   declaration_resource.ReturnTypeW = PIPE_TYPE_UNORM;
+   dr.Resource = TGSI_TEXTURE_BUFFER;
+   dr.Raw = 0;
+   dr.Writable = 0;
  
-   return declaration_resource;
+   return dr;
  }
  
  static struct tgsi_declaration_resource
  tgsi_build_declaration_resource(unsigned texture,
-                                unsigned return_type_x,
-                                unsigned return_type_y,
-                                unsigned return_type_z,
-                                unsigned return_type_w,
+                                unsigned raw,
+                                unsigned writable,
                                  struct tgsi_declaration *declaration,
                                  struct tgsi_header *header)
  {
-   struct tgsi_declaration_resource declaration_resource;
+   struct tgsi_declaration_resource dr;
+
+   dr = tgsi_default_declaration_resource();
+   dr.Resource = texture;
+   dr.Raw = raw;
+   dr.Writable = writable;
+
+   declaration_grow(declaration, header);
+
+   return dr;
+}
+
+static struct tgsi_declaration_sampler_view
+tgsi_default_declaration_sampler_view(void)
+{
+   struct tgsi_declaration_sampler_view dsv;
  
-   declaration_resource = tgsi_default_declaration_resource();
-   declaration_resource.Resource = texture;
-   declaration_resource.ReturnTypeX = return_type_x;
-   declaration_resource.ReturnTypeY = return_type_y;
-   declaration_resource.ReturnTypeZ = return_type_z;
-   declaration_resource.ReturnTypeW = return_type_w;
+   dsv.Resource = TGSI_TEXTURE_BUFFER;
+   dsv.ReturnTypeX = PIPE_TYPE_UNORM;
+   dsv.ReturnTypeY = PIPE_TYPE_UNORM;
+   dsv.ReturnTypeZ = PIPE_TYPE_UNORM;
+   dsv.ReturnTypeW = PIPE_TYPE_UNORM;
+
+   return dsv;
+}
+
+static struct tgsi_declaration_sampler_view
+tgsi_build_declaration_sampler_view(unsigned texture,
+                                    unsigned return_type_x,
+                                    unsigned return_type_y,
+                                    unsigned return_type_z,
+                                    unsigned return_type_w,
+                                    struct tgsi_declaration *declaration,
+                                    struct tgsi_header *header)
+{
+   struct tgsi_declaration_sampler_view dsv;
+
+   dsv = tgsi_default_declaration_sampler_view();
+   dsv.Resource = texture;
+   dsv.ReturnTypeX = return_type_x;
+   dsv.ReturnTypeY = return_type_y;
+   dsv.ReturnTypeZ = return_type_z;
+   dsv.ReturnTypeW = return_type_w;
  
     declaration_grow(declaration, header);
  
-   return declaration_resource;
+   return dsv;
  }
  
  
@@ -274,8 +331,10 @@ tgsi_default_full_declaration( void )
     full_declaration.Declaration  = tgsi_default_declaration();
     full_declaration.Range = tgsi_default_declaration_range();
     full_declaration.Semantic = tgsi_default_declaration_semantic();
+   full_declaration.Interp = tgsi_default_declaration_interp();
     full_declaration.ImmediateData.u = NULL;
     full_declaration.Resource = tgsi_default_declaration_resource();
+   full_declaration.SamplerView = tgsi_default_declaration_sampler_view();
  
     return full_declaration;
  }
@@ -302,9 +361,8 @@ tgsi_build_full_declaration(
        full_decl->Declaration.Interpolate,
        full_decl->Declaration.Dimension,
        full_decl->Declaration.Semantic,
-      full_decl->Declaration.Centroid,
        full_decl->Declaration.Invariant,
-      full_decl->Declaration.CylindricalWrap,
+      full_decl->Declaration.Local,
        header );
  
     if (maxsize <= size)
@@ -332,6 +390,22 @@ tgsi_build_full_declaration(
                                               header);
     }
  
+   if (full_decl->Declaration.Interpolate) {
+      struct tgsi_declaration_interp *di;
+
+      if (maxsize <= size) {
+         return 0;
+      }
+      di = (struct tgsi_declaration_interp *)&tokens[size];
+      size++;
+
+      *di = tgsi_build_declaration_interp(full_decl->Interp.Interpolate,
+                                          full_decl->Interp.Centroid,
+                                          full_decl->Interp.CylindricalWrap,
+                                          declaration,
+                                          header);
+   }
+
     if( full_decl->Declaration.Semantic ) {
        struct tgsi_declaration_semantic *ds;
  
@@ -375,14 +449,31 @@ tgsi_build_full_declaration(
        size++;
  
        *dr = tgsi_build_declaration_resource(full_decl->Resource.Resource,
-                                            full_decl->Resource.ReturnTypeX,
-                                            full_decl->Resource.ReturnTypeY,
-                                            full_decl->Resource.ReturnTypeZ,
-                                            full_decl->Resource.ReturnTypeW,
+                                            full_decl->Resource.Raw,
+                                            full_decl->Resource.Writable,
                                              declaration,
                                              header);
     }
  
+   if (full_decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
+      struct tgsi_declaration_sampler_view *dsv;
+
+      if (maxsize <= size) {
+         return  0;
+      }
+      dsv = (struct tgsi_declaration_sampler_view *)&tokens[size];
+      size++;
+
+      *dsv = tgsi_build_declaration_sampler_view(
+         full_decl->SamplerView.Resource,
+         full_decl->SamplerView.ReturnTypeX,
+         full_decl->SamplerView.ReturnTypeY,
+         full_decl->SamplerView.ReturnTypeZ,
+         full_decl->SamplerView.ReturnTypeW,
+         declaration,
+         header);
+   }
+
     return size;
  }
  
@@ -405,11 +496,13 @@ tgsi_default_immediate( void )
  
  static struct tgsi_immediate
  tgsi_build_immediate(
-   struct tgsi_header *header )
+   struct tgsi_header *header,
+   unsigned type )
  {
     struct tgsi_immediate immediate;
  
     immediate = tgsi_default_immediate();
+   immediate.DataType = type;
  
     header_bodysize_grow( header );
  
@@ -442,21 +535,6 @@ immediate_grow(
     header_bodysize_grow( header );
  }
  
-static union tgsi_immediate_data
-tgsi_build_immediate_float32(
-   float value,
-   struct tgsi_immediate *immediate,
-   struct tgsi_header *header )
-{
-   union tgsi_immediate_data immediate_data;
-
-   immediate_data.Float = value;
-
-   immediate_grow( immediate, header );
-
-   return immediate_data;
-}
-
  unsigned
  tgsi_build_full_immediate(
     const struct tgsi_full_immediate *full_imm,
@@ -472,7 +550,7 @@ tgsi_build_full_immediate(
     immediate = (struct tgsi_immediate *) &tokens[size];
     size++;
  
-   *immediate = tgsi_build_immediate( header );
+   *immediate = tgsi_build_immediate( header, full_imm->Immediate.DataType );
  
     assert( full_imm->Immediate.NrTokens <= 4 + 1 );
  
@@ -481,13 +559,12 @@ tgsi_build_full_immediate(
  
        if( maxsize <= size )
           return  0;
+
        data = (union tgsi_immediate_data *) &tokens[size];
-      size++;
+      *data = full_imm->u[i];
  
-      *data = tgsi_build_immediate_float32(
-         full_imm->u[i].Float,
-         immediate,
-         header );
+      immediate_grow( immediate, header );
+      size++;
     }
  
     return size;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c

index 635212b7e867dba0d96e66c2095846c8d0e34327..383c54590afca5de65b8a578e74dd58117223791 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -271,6 +271,9 @@ iter_declaration(
        ctx,
        decl->Declaration.UsageMask );
  
+   if (decl->Declaration.Local)
+      TXT( ", LOCAL" );
+
     if (decl->Declaration.Semantic) {
        TXT( ", " );
        ENM( decl->Semantic.Name, tgsi_semantic_names );
@@ -285,53 +288,64 @@ iter_declaration(
     if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
        TXT(", ");
        ENM(decl->Resource.Resource, tgsi_texture_names);
+      if (decl->Resource.Writable)
+         TXT(", WR");
+      if (decl->Resource.Raw)
+         TXT(", RAW");
+   }
+
+   if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
        TXT(", ");
-      if ((decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeY) &&
-          (decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeZ) &&
-          (decl->Resource.ReturnTypeX == decl->Resource.ReturnTypeW)) {
-         ENM(decl->Resource.ReturnTypeX, tgsi_type_names);
+      ENM(decl->SamplerView.Resource, tgsi_texture_names);
+      TXT(", ");
+      if ((decl->SamplerView.ReturnTypeX == decl->SamplerView.ReturnTypeY) &&
+          (decl->SamplerView.ReturnTypeX == decl->SamplerView.ReturnTypeZ) &&
+          (decl->SamplerView.ReturnTypeX == decl->SamplerView.ReturnTypeW)) {
+         ENM(decl->SamplerView.ReturnTypeX, tgsi_type_names);
        } else {
-         ENM(decl->Resource.ReturnTypeX, tgsi_type_names);
+         ENM(decl->SamplerView.ReturnTypeX, tgsi_type_names);
           TXT(", ");
-         ENM(decl->Resource.ReturnTypeY, tgsi_type_names);
+         ENM(decl->SamplerView.ReturnTypeY, tgsi_type_names);
           TXT(", ");
-         ENM(decl->Resource.ReturnTypeZ, tgsi_type_names);
+         ENM(decl->SamplerView.ReturnTypeZ, tgsi_type_names);
           TXT(", ");
-         ENM(decl->Resource.ReturnTypeW, tgsi_type_names);
+         ENM(decl->SamplerView.ReturnTypeW, tgsi_type_names);
        }
-
     }
  
-   if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT &&
-       decl->Declaration.File == TGSI_FILE_INPUT)
-   {
-      TXT( ", " );
-      ENM( decl->Declaration.Interpolate, tgsi_interpolate_names );
-   }
+   if (decl->Declaration.Interpolate) {
+      if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT &&
+          decl->Declaration.File == TGSI_FILE_INPUT)
+      {
+         TXT( ", " );
+         ENM( decl->Interp.Interpolate, tgsi_interpolate_names );
+      }
+
+      if (decl->Interp.Centroid) {
+         TXT( ", CENTROID" );
+      }
  
-   if (decl->Declaration.Centroid) {
-      TXT( ", CENTROID" );
+      if (decl->Interp.CylindricalWrap) {
+         TXT(", CYLWRAP_");
+         if (decl->Interp.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_X) {
+            CHR('X');
+         }
+         if (decl->Interp.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_Y) {
+            CHR('Y');
+         }
+         if (decl->Interp.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_Z) {
+            CHR('Z');
+         }
+         if (decl->Interp.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_W) {
+            CHR('W');
+         }
+      }
     }
  
     if (decl->Declaration.Invariant) {
        TXT( ", INVARIANT" );
     }
  
-   if (decl->Declaration.CylindricalWrap) {
-      TXT(", CYLWRAP_");
-      if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_X) {
-         CHR('X');
-      }
-      if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_Y) {
-         CHR('Y');
-      }
-      if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_Z) {
-         CHR('Z');
-      }
-      if (decl->Declaration.CylindricalWrap & TGSI_CYLINDRICAL_WRAP_W) {
-         CHR('W');
-      }
-   }
  
     if (decl->Declaration.File == TGSI_FILE_IMMEDIATE_ARRAY) {
        unsigned i;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c

index c4ad34b1e6150b22bd493c91b11ccb7c42b86cf4..5e23f5da65b718edf8865f0a9ef111e97b6824bc 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2121,7 +2121,7 @@ exec_sample(struct tgsi_exec_machine *mach,
        control = tgsi_sampler_lod_bias;
     }
  
-   switch (mach->Resources[resource_unit].Resource) {
+   switch (mach->SamplerViews[resource_unit].Resource) {
     case TGSI_TEXTURE_1D:
     case TGSI_TEXTURE_SHADOW1D:
        FETCH(&r[0], 0, TGSI_CHAN_X);
@@ -2215,7 +2215,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,
      * XXX: This is fake SAMPLE_D -- the derivatives are not taken into account, yet.
      */
  
-   switch (mach->Resources[resource_unit].Resource) {
+   switch (mach->SamplerViews[resource_unit].Resource) {
     case TGSI_TEXTURE_1D:
     case TGSI_TEXTURE_SHADOW1D:
  
@@ -2338,8 +2338,8 @@ static void
  exec_declaration(struct tgsi_exec_machine *mach,
                   const struct tgsi_full_declaration *decl)
  {
-   if (decl->Declaration.File == TGSI_FILE_RESOURCE) {
-      mach->Resources[decl->Range.First] = decl->Resource;
+   if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
+      mach->SamplerViews[decl->Range.First] = decl->SamplerView;
        return;
     }
  
@@ -2371,7 +2371,7 @@ exec_declaration(struct tgsi_exec_machine *mach,
              eval_coef_func eval;
              uint i, j;
  
-            switch (decl->Declaration.Interpolate) {
+            switch (decl->Interp.Interpolate) {
              case TGSI_INTERPOLATE_CONSTANT:
                 eval = eval_constant_coef;
                 break;
@@ -4154,11 +4154,11 @@ exec_instruction(
        exec_endswitch(mach);
        break;
  
-   case TGSI_OPCODE_LOAD:
+   case TGSI_OPCODE_SAMPLE_I:
        assert(0);
        break;
  
-   case TGSI_OPCODE_LOAD_MS:
+   case TGSI_OPCODE_SAMPLE_I_MS:
        assert(0);
        break;
  
@@ -4190,7 +4190,7 @@ exec_instruction(
        assert(0);
        break;
  
-   case TGSI_OPCODE_RESINFO:
+   case TGSI_OPCODE_SVIEWINFO:
        assert(0);
        break;
  
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h

index d9e93ce138d1321a3e0ad50c9f04ba8d66895f29..0ecb4e952bb48a0f127eb3c45fc9c21421595112 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -331,7 +331,8 @@ struct tgsi_exec_machine
     struct tgsi_full_declaration *Declarations;
     uint NumDeclarations;
  
-   struct tgsi_declaration_resource Resources[PIPE_MAX_SHADER_RESOURCES];
+   struct tgsi_declaration_sampler_view
+      SamplerViews[PIPE_MAX_SHADER_SAMPLER_VIEWS];
  
     boolean UsedGeometryShader;
  };
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c

index 81df96b3c7a0799d069d660e98c5a8f48839170a..8bf9aeb428493ff004f8eef72f23f190450b454f 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -183,22 +183,39 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
     { 0, 0, 0, 0, 0, 0, NONE, "DEFAULT", TGSI_OPCODE_DEFAULT },
     { 0, 0, 0, 0, 0, 0, NONE, "ENDSWITCH", TGSI_OPCODE_ENDSWITCH },
  
-   { 1, 2, 0, 0, 0, 0, OTHR, "LOAD",        TGSI_OPCODE_LOAD },
-   { 1, 2, 0, 0, 0, 0, OTHR, "LOAD_MS",     TGSI_OPCODE_LOAD_MS },
     { 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE",      TGSI_OPCODE_SAMPLE },
+   { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I",    TGSI_OPCODE_SAMPLE_I },
+   { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_I_MS", TGSI_OPCODE_SAMPLE_I_MS },
     { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_B",    TGSI_OPCODE_SAMPLE_B },
     { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C",    TGSI_OPCODE_SAMPLE_C },
     { 1, 4, 0, 0, 0, 0, OTHR, "SAMPLE_C_LZ", TGSI_OPCODE_SAMPLE_C_LZ },
     { 1, 5, 0, 0, 0, 0, OTHR, "SAMPLE_D",    TGSI_OPCODE_SAMPLE_D },
     { 1, 3, 0, 0, 0, 0, OTHR, "SAMPLE_L",    TGSI_OPCODE_SAMPLE_L },
     { 1, 3, 0, 0, 0, 0, OTHR, "GATHER4",     TGSI_OPCODE_GATHER4 },
-   { 1, 2, 0, 0, 0, 0, OTHR, "RESINFO",     TGSI_OPCODE_RESINFO },
+   { 1, 2, 0, 0, 0, 0, OTHR, "SVIEWINFO",   TGSI_OPCODE_SVIEWINFO },
     { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_POS",  TGSI_OPCODE_SAMPLE_POS },
     { 1, 2, 0, 0, 0, 0, OTHR, "SAMPLE_INFO", TGSI_OPCODE_SAMPLE_INFO },
     { 1, 1, 0, 0, 0, 0, COMP, "UARL", TGSI_OPCODE_UARL },
     { 1, 3, 0, 0, 0, 0, COMP, "UCMP", TGSI_OPCODE_UCMP },
     { 1, 1, 0, 0, 0, 0, COMP, "IABS", TGSI_OPCODE_IABS },
     { 1, 1, 0, 0, 0, 0, COMP, "ISSG", TGSI_OPCODE_ISSG },
+   { 1, 2, 0, 0, 0, 0, OTHR, "LOAD", TGSI_OPCODE_LOAD },
+   { 1, 2, 0, 0, 0, 0, OTHR, "STORE", TGSI_OPCODE_STORE },
+   { 1, 0, 0, 0, 0, 0, OTHR, "MFENCE", TGSI_OPCODE_MFENCE },
+   { 1, 0, 0, 0, 0, 0, OTHR, "LFENCE", TGSI_OPCODE_LFENCE },
+   { 1, 0, 0, 0, 0, 0, OTHR, "SFENCE", TGSI_OPCODE_SFENCE },
+   { 0, 0, 0, 0, 0, 0, OTHR, "BARRIER", TGSI_OPCODE_BARRIER },
+
+   { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUADD", TGSI_OPCODE_ATOMUADD },
+   { 1, 3, 0, 0, 0, 0, OTHR, "ATOMXCHG", TGSI_OPCODE_ATOMXCHG },
+   { 1, 4, 0, 0, 0, 0, OTHR, "ATOMCAS", TGSI_OPCODE_ATOMCAS },
+   { 1, 3, 0, 0, 0, 0, OTHR, "ATOMAND", TGSI_OPCODE_ATOMAND },
+   { 1, 3, 0, 0, 0, 0, OTHR, "ATOMOR", TGSI_OPCODE_ATOMOR },
+   { 1, 3, 0, 0, 0, 0, OTHR, "ATOMXOR", TGSI_OPCODE_ATOMXOR },
+   { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMIN", TGSI_OPCODE_ATOMUMIN },
+   { 1, 3, 0, 0, 0, 0, OTHR, "ATOMUMAX", TGSI_OPCODE_ATOMUMAX },
+   { 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMIN", TGSI_OPCODE_ATOMIMIN },
+   { 1, 3, 0, 0, 0, 0, OTHR, "ATOMIMAX", TGSI_OPCODE_ATOMIMAX }
  };
  
  const struct tgsi_opcode_info *
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h

index b5d4504425ba9d64b9b67f98fa8f847ffb5072f3..96b864f50d0335998cd0994ef85fe538da7c6267 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -168,16 +168,16 @@ OP01(CASE)
  OP00(DEFAULT)
  OP00(ENDSWITCH)
  
-OP12(LOAD)
-OP12(LOAD_MS)
  OP13(SAMPLE)
+OP12(SAMPLE_I)
+OP12(SAMPLE_I_MS)
  OP14(SAMPLE_B)
  OP14(SAMPLE_C)
  OP14(SAMPLE_C_LZ)
  OP15(SAMPLE_D)
  OP13(SAMPLE_L)
  OP13(GATHER4)
-OP12(RESINFO)
+OP12(SVIEWINFO)
  OP13(SAMPLE_POS)
  OP12(SAMPLE_INFO)
  
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c

index e1902eb1862570f122fbba3673b3fe8c50c21b73..45c5c41ec82d7632b7c1109a302fedafb59f7c0b 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -113,6 +113,10 @@ tgsi_parse_token(
           next_token(ctx, &decl->Dim);
        }
  
+      if( decl->Declaration.Interpolate ) {
+         next_token( ctx, &decl->Interp );
+      }
+
        if( decl->Declaration.Semantic ) {
           next_token( ctx, &decl->Semantic );
        }
@@ -132,6 +136,10 @@ tgsi_parse_token(
           next_token(ctx, &decl->Resource);
        }
  
+      if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
+         next_token(ctx, &decl->SamplerView);
+      }
+
        break;
     }
  
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h

index f7b7e6edc98f4a5eca26888a35d634b83324161e..3f8bf99e3c169d2d3b6a9161756ff83189ce728e 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -67,9 +67,11 @@ struct tgsi_full_declaration
     struct tgsi_declaration Declaration;
     struct tgsi_declaration_range Range;
     struct tgsi_declaration_dimension Dim;
+   struct tgsi_declaration_interp Interp;
     struct tgsi_declaration_semantic Semantic;
     struct tgsi_immediate_array_data ImmediateData;
     struct tgsi_declaration_resource Resource;
+   struct tgsi_declaration_sampler_view SamplerView;
  };
  
  struct tgsi_full_immediate
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c

index 1e00e2e54ed50a50e7474f363e3038c6f3b4799c..ce728ecee066106620ae07dd2b35e55bd4a60680 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -1170,7 +1170,7 @@ emit_declaration(
        for( i = first; i <= last; i++ ) {
           for( j = 0; j < NUM_CHANNELS; j++ ) {
              if( mask & (1 << j) ) {
-               switch( decl->Declaration.Interpolate ) {
+               switch( decl->Interp.Interpolate ) {
                 case TGSI_INTERPOLATE_CONSTANT:
                    emit_coef_a0( func, 0, i, j );
                    emit_inputs( func, 0, i, j );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c

index e4e9c032e0276f394cbfe3dbb849a8e052572a5d..df299baa9c13581464ad7346b2244e48af7c97be 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -68,7 +68,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
     procType = parse.FullHeader.Processor.Processor;
     assert(procType == TGSI_PROCESSOR_FRAGMENT ||
            procType == TGSI_PROCESSOR_VERTEX ||
-          procType == TGSI_PROCESSOR_GEOMETRY);
+          procType == TGSI_PROCESSOR_GEOMETRY ||
+          procType == TGSI_PROCESSOR_COMPUTE);
  
  
     /**
@@ -157,9 +158,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                 if (file == TGSI_FILE_INPUT) {
                    info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
                    info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
-                  info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
-                  info->input_centroid[reg] = (ubyte)fulldecl->Declaration.Centroid;
-                  info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Declaration.CylindricalWrap;
+                  info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate;
+                  info->input_centroid[reg] = (ubyte)fulldecl->Interp.Centroid;
+                  info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
                    info->num_inputs++;
  
                    if (procType == TGSI_PROCESSOR_FRAGMENT &&
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c

index 520452ce1b88af17471364c15721c03e59ed5262..b5fd1fc0a650d090dd7349e1505fb9ae8f354f28 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -32,11 +32,12 @@
  #include "tgsi_strings.h"
  
  
-const char *tgsi_processor_type_names[3] =
+const char *tgsi_processor_type_names[4] =
  {
     "FRAG",
     "VERT",
-   "GEOM"
+   "GEOM",
+   "COMP"
  };
  
  const char *tgsi_file_names[TGSI_FILE_COUNT] =
@@ -53,7 +54,8 @@ const char *tgsi_file_names[TGSI_FILE_COUNT] =
     "SV",
     "IMMX",
     "TEMPX",
-   "RES"
+   "RES",
+   "SVIEW"
  };
  
  const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
@@ -72,12 +74,16 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
     "VERTEXID",
     "STENCIL",
     "CLIPDIST",
-   "CLIPVERTEX"
+   "CLIPVERTEX",
+   "GRID_SIZE",
+   "BLOCK_ID",
+   "BLOCK_SIZE",
+   "THREAD_ID"
  };
  
  const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] =
  {
-   "UNKNOWN",
+   "BUFFER",
     "1D",
     "2D",
     "3D",
@@ -90,7 +96,8 @@ const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] =
     "2DARRAY",
     "SHADOW1DARRAY",
     "SHADOW2DARRAY",
-   "SHADOWCUBE"
+   "SHADOWCUBE",
+   "UNKNOWN"
  };
  
  const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.h b/src/gallium/auxiliary/tgsi/tgsi_strings.h

index 0946a58f135e400ca84972ee000a118c8fd988d3..5c57e229c28e4b8a9583f8b7fc4c910228fde1da 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.h
@@ -38,7 +38,7 @@ extern "C" {
  #endif
  
  
-extern const char *tgsi_processor_type_names[3];
+extern const char *tgsi_processor_type_names[4];
  
  extern const char *tgsi_file_names[TGSI_FILE_COUNT];
  
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c

index 4b3d22c3072262d3148a6d0ae6ac5aa25b6cce98..279a046e202c6d6d8b2b132abc49157477a21d11 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -132,6 +132,23 @@ static boolean parse_uint( const char **pcur, uint *val )
     return FALSE;
  }
  
+static boolean parse_int( const char **pcur, int *val )
+{
+   const char *cur = *pcur;
+   int sign = (*cur == '-' ? -1 : 1);
+
+   if (*cur == '+' || *cur == '-')
+      cur++;
+
+   if (parse_uint(&cur, (uint *)val)) {
+      *val *= sign;
+      *pcur = cur;
+      return TRUE;
+   }
+
+   return FALSE;
+}
+
  static boolean parse_identifier( const char **pcur, char *ret )
  {
     const char *cur = *pcur;
@@ -238,6 +255,8 @@ static boolean parse_header( struct translate_ctx *ctx )
        processor = TGSI_PROCESSOR_VERTEX;
     else if (str_match_no_case( &ctx->cur, "GEOM" ))
        processor = TGSI_PROCESSOR_GEOMETRY;
+   else if (str_match_no_case( &ctx->cur, "COMP" ))
+      processor = TGSI_PROCESSOR_COMPUTE;
     else {
        report_error( ctx, "Unknown header" );
        return FALSE;
@@ -447,24 +466,10 @@ parse_register_bracket(
           eat_opt_white(&ctx->cur);
        }
  
-      if (*ctx->cur == '+' || *ctx->cur == '-') {
-         boolean negate;
-
-         negate = *ctx->cur == '-';
-         ctx->cur++;
-         eat_opt_white( &ctx->cur );
-         if (!parse_uint( &ctx->cur, &uindex )) {
-            report_error( ctx, "Expected literal unsigned integer" );
-            return FALSE;
-         }
-         if (negate)
-            brackets->index = -(int) uindex;
-         else
-            brackets->index = (int) uindex;
-      }
-      else {
+      if (*ctx->cur == '+' || *ctx->cur == '-')
+         parse_int( &ctx->cur, &brackets->index );
+      else
           brackets->index = 0;
-      }
     }
     else {
        if (!parse_uint( &ctx->cur, &uindex )) {
@@ -819,6 +824,7 @@ parse_instruction(
     uint saturate = TGSI_SAT_NONE;
     const struct tgsi_opcode_info *info;
     struct tgsi_full_instruction inst;
+   const char *cur;
     uint advance;
  
     inst = tgsi_default_full_instruction();
@@ -864,7 +870,7 @@ parse_instruction(
      */
     eat_opt_white( &ctx->cur );
     for (i = 0; i < TGSI_OPCODE_LAST; i++) {
-      const char *cur = ctx->cur;
+      cur = ctx->cur;
  
        info = tgsi_get_opcode_info( i );
        if (match_inst_mnemonic(&cur, info)) {
@@ -938,22 +944,20 @@ parse_instruction(
        }
     }
  
-   if (info->is_branch) {
+   cur = ctx->cur;
+   eat_opt_white( &cur );
+   if (info->is_branch && *cur == ':') {
        uint target;
  
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != ':') {
-         report_error( ctx, "Expected `:'" );
-         return FALSE;
-      }
-      ctx->cur++;
-      eat_opt_white( &ctx->cur );
-      if (!parse_uint( &ctx->cur, &target )) {
+      cur++;
+      eat_opt_white( &cur );
+      if (!parse_uint( &cur, &target )) {
           report_error( ctx, "Expected a label" );
           return FALSE;
        }
        inst.Instruction.Label = 1;
        inst.Label.Label = target;
+      ctx->cur = cur;
     }
  
     advance = tgsi_build_full_instruction(
@@ -970,10 +974,11 @@ parse_instruction(
  
  /* parses a 4-touple of the form {x, y, z, w}
   * where x, y, z, w are numbers */
-static boolean parse_immediate_data(struct translate_ctx *ctx,
-                                    float *values)
+static boolean parse_immediate_data(struct translate_ctx *ctx, unsigned type,
+                                    union tgsi_immediate_data *values)
  {
     unsigned i;
+   int ret;
  
     eat_opt_white( &ctx->cur );
     if (*ctx->cur != '{') {
@@ -991,8 +996,21 @@ static boolean parse_immediate_data(struct translate_ctx *ctx,
           ctx->cur++;
           eat_opt_white( &ctx->cur );
        }
-      if (!parse_float( &ctx->cur, &values[i] )) {
-         report_error( ctx, "Expected literal floating point" );
+
+      switch (type) {
+      case TGSI_IMM_FLOAT32:
+         ret = parse_float(&ctx->cur, &values[i].Float);
+         break;
+      case TGSI_IMM_UINT32:
+         ret = parse_uint(&ctx->cur, &values[i].Uint);
+         break;
+      case TGSI_IMM_INT32:
+         ret = parse_int(&ctx->cur, &values[i].Int);
+         break;
+      }
+
+      if (!ret) {
+         report_error( ctx, "Expected immediate constant" );
           return FALSE;
        }
     }
@@ -1013,7 +1031,7 @@ static boolean parse_declaration( struct translate_ctx *ctx )
     struct parsed_dcl_bracket brackets[2];
     int num_brackets;
     uint writemask;
-   const char *cur;
+   const char *cur, *cur2;
     uint advance;
     boolean is_vs_input;
     boolean is_imm_array;
@@ -1066,6 +1084,42 @@ static boolean parse_declaration( struct translate_ctx *ctx )
              report_error(ctx, "Expected texture target");
              return FALSE;
           }
+
+         cur2 = cur;
+         eat_opt_white(&cur2);
+         while (*cur2 == ',') {
+            cur2++;
+            eat_opt_white(&cur2);
+            if (str_match_no_case(&cur2, "RAW") &&
+                !is_digit_alpha_underscore(cur2)) {
+               decl.Resource.Raw = 1;
+
+            } else if (str_match_no_case(&cur2, "WR") &&
+                !is_digit_alpha_underscore(cur2)) {
+               decl.Resource.Writable = 1;
+
+            } else {
+               break;
+            }
+            cur = cur2;
+            eat_opt_white(&cur2);
+         }
+
+         ctx->cur = cur;
+
+      } else if (file == TGSI_FILE_SAMPLER_VIEW) {
+         for (i = 0; i < TGSI_TEXTURE_COUNT; i++) {
+            if (str_match_no_case(&cur, tgsi_texture_names[i])) {
+               if (!is_digit_alpha_underscore(cur)) {
+                  decl.SamplerView.Resource = i;
+                  break;
+               }
+            }
+         }
+         if (i == TGSI_TEXTURE_COUNT) {
+            report_error(ctx, "Expected texture target");
+            return FALSE;
+         }
           eat_opt_white( &cur );
           if (*cur != ',') {
              report_error( ctx, "Expected `,'" );
@@ -1079,16 +1133,16 @@ static boolean parse_declaration( struct translate_ctx *ctx )
                    if (!is_digit_alpha_underscore(cur)) {
                       switch (j) {
                       case 0:
-                        decl.Resource.ReturnTypeX = i;
+                        decl.SamplerView.ReturnTypeX = i;
                          break;
                       case 1:
-                        decl.Resource.ReturnTypeY = i;
+                        decl.SamplerView.ReturnTypeY = i;
                          break;
                       case 2:
-                        decl.Resource.ReturnTypeZ = i;
+                        decl.SamplerView.ReturnTypeZ = i;
                          break;
                       case 3:
-                        decl.Resource.ReturnTypeW = i;
+                        decl.SamplerView.ReturnTypeW = i;
                          break;
                       default:
                          assert(0);
@@ -1104,7 +1158,7 @@ static boolean parse_declaration( struct translate_ctx *ctx )
                 }
                 break;
              } else {
-               const char *cur2 = cur;
+               cur2 = cur;
                 eat_opt_white( &cur2 );
                 if (*cur2 == ',') {
                    cur2++;
@@ -1116,51 +1170,64 @@ static boolean parse_declaration( struct translate_ctx *ctx )
              }
           }
           if (j < 4) {
-            decl.Resource.ReturnTypeY =
-               decl.Resource.ReturnTypeZ =
-               decl.Resource.ReturnTypeW =
-               decl.Resource.ReturnTypeX;
+            decl.SamplerView.ReturnTypeY =
+               decl.SamplerView.ReturnTypeZ =
+               decl.SamplerView.ReturnTypeW =
+               decl.SamplerView.ReturnTypeX;
           }
           ctx->cur = cur;
        } else {
-         for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) {
-            if (str_match_no_case( &cur, tgsi_semantic_names[i] )) {
-               const char *cur2 = cur;
-               uint index;
+         if (str_match_no_case(&cur, "LOCAL") &&
+             !is_digit_alpha_underscore(cur)) {
+            decl.Declaration.Local = 1;
+            ctx->cur = cur;
+         }
  
-               if (is_digit_alpha_underscore( cur ))
-                  continue;
-               eat_opt_white( &cur2 );
-               if (*cur2 == '[') {
-                  cur2++;
-                  eat_opt_white( &cur2 );
-                  if (!parse_uint( &cur2, &index )) {
-                     report_error( ctx, "Expected literal integer" );
-                     return FALSE;
-                  }
+         cur = ctx->cur;
+         eat_opt_white( &cur );
+         if (*cur == ',') {
+            cur++;
+            eat_opt_white( &cur );
+
+            for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) {
+               if (str_match_no_case( &cur, tgsi_semantic_names[i] )) {
+                  uint index;
+
+                  if (is_digit_alpha_underscore( cur ))
+                     continue;
+                  cur2 = cur;
                    eat_opt_white( &cur2 );
-                  if (*cur2 != ']') {
-                     report_error( ctx, "Expected `]'" );
-                     return FALSE;
-                  }
-                  cur2++;
+                  if (*cur2 == '[') {
+                     cur2++;
+                     eat_opt_white( &cur2 );
+                     if (!parse_uint( &cur2, &index )) {
+                        report_error( ctx, "Expected literal integer" );
+                        return FALSE;
+                     }
+                     eat_opt_white( &cur2 );
+                     if (*cur2 != ']') {
+                        report_error( ctx, "Expected `]'" );
+                        return FALSE;
+                     }
+                     cur2++;
  
-                  decl.Semantic.Index = index;
+                     decl.Semantic.Index = index;
  
-                  cur = cur2;
-               }
+                     cur = cur2;
+                  }
  
-               decl.Declaration.Semantic = 1;
-               decl.Semantic.Name = i;
+                  decl.Declaration.Semantic = 1;
+                  decl.Semantic.Name = i;
  
-               ctx->cur = cur;
-               break;
+                  ctx->cur = cur;
+                  break;
+               }
              }
           }
        }
     } else if (is_imm_array) {
        unsigned i;
-      float *vals_itr;
+      union tgsi_immediate_data *vals_itr;
        /* we have our immediate data */
        if (*cur != '{') {
           report_error( ctx, "Immediate array without data" );
@@ -1172,9 +1239,9 @@ static boolean parse_declaration( struct translate_ctx *ctx )
        decl.ImmediateData.u =
           MALLOC(sizeof(union tgsi_immediate_data) * 4 *
                  (decl.Range.Last + 1));
-      vals_itr = (float*)decl.ImmediateData.u;
+      vals_itr = decl.ImmediateData.u;
        for (i = 0; i <= decl.Range.Last; ++i) {
-         if (!parse_immediate_data(ctx, vals_itr)) {
+         if (!parse_immediate_data(ctx, TGSI_IMM_FLOAT32, vals_itr)) {
              FREE(decl.ImmediateData.u);
              return FALSE;
           }
@@ -1209,7 +1276,8 @@ static boolean parse_declaration( struct translate_ctx *ctx )
           if (str_match_no_case( &cur, tgsi_interpolate_names[i] )) {
              if (is_digit_alpha_underscore( cur ))
                 continue;
-            decl.Declaration.Interpolate = i;
+            decl.Declaration.Interpolate = 1;
+            decl.Interp.Interpolate = i;
  
              ctx->cur = cur;
              break;
@@ -1240,28 +1308,27 @@ static boolean parse_declaration( struct translate_ctx *ctx )
  static boolean parse_immediate( struct translate_ctx *ctx )
  {
     struct tgsi_full_immediate imm;
-   float values[4];
     uint advance;
+   int type;
  
     if (!eat_white( &ctx->cur )) {
        report_error( ctx, "Syntax error" );
        return FALSE;
     }
-   if (!str_match_no_case( &ctx->cur, "FLT32" ) ||
-       is_digit_alpha_underscore( ctx->cur )) {
-      report_error( ctx, "Expected `FLT32'" );
+   for (type = 0; type < Elements(tgsi_immediate_type_names); ++type) {
+      if (str_match_no_case(&ctx->cur, tgsi_immediate_type_names[type]) &&
+          !is_digit_alpha_underscore(ctx->cur))
+         break;
+   }
+   if (type == Elements(tgsi_immediate_type_names)) {
+      report_error( ctx, "Expected immediate type" );
        return FALSE;
     }
  
-   parse_immediate_data(ctx, values);
-
     imm = tgsi_default_full_immediate();
     imm.Immediate.NrTokens += 4;
-   imm.Immediate.DataType = TGSI_IMM_FLOAT32;
-   imm.u[0].Float = values[0];
-   imm.u[1].Float = values[1];
-   imm.u[2].Float = values[2];
-   imm.u[3].Float = values[3];
+   imm.Immediate.DataType = type;
+   parse_immediate_data(ctx, type, imm.u);
  
     advance = tgsi_build_full_immediate(
        &imm,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c

index 0f9aa3ab43a8ae0e0e6e8d7b478c78cc15b1226f..e427585db194b558dfb11b689593668645e9daac 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -36,6 +36,7 @@
  #include "util/u_debug.h"
  #include "util/u_memory.h"
  #include "util/u_math.h"
+#include "util/u_bitmask.h"
  
  union tgsi_any_token {
     struct tgsi_header header;
@@ -46,8 +47,9 @@ union tgsi_any_token {
     struct tgsi_declaration decl;
     struct tgsi_declaration_range decl_range;
     struct tgsi_declaration_dimension decl_dim;
+   struct tgsi_declaration_interp decl_interp;
     struct tgsi_declaration_semantic decl_semantic;
-   struct tgsi_declaration_resource decl_resource;
+   struct tgsi_declaration_sampler_view decl_sampler_view;
     struct tgsi_immediate imm;
     union  tgsi_immediate_data imm_data;
     struct tgsi_instruction insn;
@@ -74,7 +76,6 @@ struct ureg_tokens {
  #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
  #define UREG_MAX_CONSTANT_RANGE 32
  #define UREG_MAX_IMMEDIATE 256
-#define UREG_MAX_TEMP 256
  #define UREG_MAX_ADDR 2
  #define UREG_MAX_PRED 1
  
@@ -147,10 +148,11 @@ struct ureg_program
        unsigned return_type_y;
        unsigned return_type_z;
        unsigned return_type_w;
-   } resource[PIPE_MAX_SHADER_RESOURCES];
-   unsigned nr_resources;
+   } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned nr_sampler_views;
  
-   unsigned temps_active[UREG_MAX_TEMP / 32];
+   struct util_bitmask *free_temps;
+   struct util_bitmask *local_temps;
     unsigned nr_temps;
  
     struct const_decl const_decls;
@@ -529,43 +531,48 @@ out:
     return ureg_src_register(TGSI_FILE_CONSTANT, index);
  }
  
-
-/* Allocate a new temporary.  Temporaries greater than UREG_MAX_TEMP
- * are legal, but will not be released.
- */
-struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
+static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
+                                        boolean local )
  {
     unsigned i;
  
-   for (i = 0; i < UREG_MAX_TEMP; i += 32) {
-      int bit = ffs(~ureg->temps_active[i/32]);
-      if (bit != 0) {
-         i += bit - 1;
-         goto out;
-      }
+   /* Look for a released temporary.
+    */
+   for (i = util_bitmask_get_first_index(ureg->free_temps);
+        i != UTIL_BITMASK_INVALID_INDEX;
+        i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) {
+      if (util_bitmask_get(ureg->local_temps, i) == local)
+         break;
     }
  
-   /* No reusable temps, so allocate a new one:
+   /* Or allocate a new one.
      */
-   i = ureg->nr_temps++;
+   if (i == UTIL_BITMASK_INVALID_INDEX)
+      i = ureg->nr_temps++;
  
-out:
-   if (i < UREG_MAX_TEMP)
-      ureg->temps_active[i/32] |= 1 << (i % 32);
+   util_bitmask_clear(ureg->free_temps, i);
  
-   if (i >= ureg->nr_temps)
-      ureg->nr_temps = i + 1;
+   if (local)
+      util_bitmask_set(ureg->local_temps, i);
  
     return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
  }
  
+struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
+{
+   return alloc_temporary(ureg, FALSE);
+}
+
+struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg )
+{
+   return alloc_temporary(ureg, TRUE);
+}
  
  void ureg_release_temporary( struct ureg_program *ureg,
                               struct ureg_dst tmp )
  {
     if(tmp.File == TGSI_FILE_TEMPORARY)
-      if (tmp.Index < UREG_MAX_TEMP)
-         ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32));
+      util_bitmask_set(ureg->free_temps, tmp.Index);
  }
  
  
@@ -615,34 +622,34 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
  }
  
  /*
- * Allocate a new shader resource.
+ * Allocate a new shader sampler view.
   */
  struct ureg_src
-ureg_DECL_resource(struct ureg_program *ureg,
-                   unsigned index,
-                   unsigned target,
-                   unsigned return_type_x,
-                   unsigned return_type_y,
-                   unsigned return_type_z,
-                   unsigned return_type_w)
+ureg_DECL_sampler_view(struct ureg_program *ureg,
+                       unsigned index,
+                       unsigned target,
+                       unsigned return_type_x,
+                       unsigned return_type_y,
+                       unsigned return_type_z,
+                       unsigned return_type_w)
  {
-   struct ureg_src reg = ureg_src_register(TGSI_FILE_RESOURCE, index);
+   struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index);
     uint i;
  
-   for (i = 0; i < ureg->nr_resources; i++) {
-      if (ureg->resource[i].index == index) {
+   for (i = 0; i < ureg->nr_sampler_views; i++) {
+      if (ureg->sampler_view[i].index == index) {
           return reg;
        }
     }
  
-   if (i < PIPE_MAX_SHADER_RESOURCES) {
-      ureg->resource[i].index = index;
-      ureg->resource[i].target = target;
-      ureg->resource[i].return_type_x = return_type_x;
-      ureg->resource[i].return_type_y = return_type_y;
-      ureg->resource[i].return_type_z = return_type_z;
-      ureg->resource[i].return_type_w = return_type_w;
-      ureg->nr_resources++;
+   if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) {
+      ureg->sampler_view[i].index = index;
+      ureg->sampler_view[i].target = target;
+      ureg->sampler_view[i].return_type_x = return_type_x;
+      ureg->sampler_view[i].return_type_y = return_type_y;
+      ureg->sampler_view[i].return_type_z = return_type_z;
+      ureg->sampler_view[i].return_type_w = return_type_w;
+      ureg->nr_sampler_views++;
        return reg;
     }
  
@@ -891,7 +898,7 @@ ureg_emit_dst( struct ureg_program *ureg,
     assert(dst.File != TGSI_FILE_CONSTANT);
     assert(dst.File != TGSI_FILE_INPUT);
     assert(dst.File != TGSI_FILE_SAMPLER);
-   assert(dst.File != TGSI_FILE_RESOURCE);
+   assert(dst.File != TGSI_FILE_SAMPLER_VIEW);
     assert(dst.File != TGSI_FILE_IMMEDIATE);
     assert(dst.File < TGSI_FILE_COUNT);
  
@@ -1229,28 +1236,50 @@ emit_decl_fs(struct ureg_program *ureg,
               unsigned cylindrical_wrap,
               unsigned centroid)
  {
-   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 4);
  
     out[0].value = 0;
     out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
-   out[0].decl.NrTokens = 3;
+   out[0].decl.NrTokens = 4;
     out[0].decl.File = file;
     out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
-   out[0].decl.Interpolate = interpolate;
+   out[0].decl.Interpolate = 1;
     out[0].decl.Semantic = 1;
-   out[0].decl.CylindricalWrap = cylindrical_wrap;
-   out[0].decl.Centroid = centroid;
  
     out[1].value = 0;
     out[1].decl_range.First = index;
     out[1].decl_range.Last = index;
  
     out[2].value = 0;
-   out[2].decl_semantic.Name = semantic_name;
-   out[2].decl_semantic.Index = semantic_index;
+   out[2].decl_interp.Interpolate = interpolate;
+   out[2].decl_interp.CylindricalWrap = cylindrical_wrap;
+   out[2].decl_interp.Centroid = centroid;
+
+   out[3].value = 0;
+   out[3].decl_semantic.Name = semantic_name;
+   out[3].decl_semantic.Index = semantic_index;
  }
  
  
+static void emit_decl( struct ureg_program *ureg,
+                       unsigned file,
+                       unsigned index,
+                       boolean local )
+{
+   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
+
+   out[0].value = 0;
+   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   out[0].decl.NrTokens = 2;
+   out[0].decl.File = file;
+   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
+   out[0].decl.Local = local;
+
+   out[1].value = 0;
+   out[1].decl_range.First = index;
+   out[1].decl_range.Last = index;
+}
+
  static void emit_decl_range( struct ureg_program *ureg,
                               unsigned file,
                               unsigned first,
@@ -1263,7 +1292,6 @@ static void emit_decl_range( struct ureg_program *ureg,
     out[0].decl.NrTokens = 2;
     out[0].decl.File = file;
     out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
-   out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
     out[0].decl.Semantic = 0;
  
     out[1].value = 0;
@@ -1285,7 +1313,6 @@ emit_decl_range2D(struct ureg_program *ureg,
     out[0].decl.NrTokens = 3;
     out[0].decl.File = file;
     out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
-   out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
     out[0].decl.Dimension = 1;
  
     out[1].value = 0;
@@ -1297,33 +1324,32 @@ emit_decl_range2D(struct ureg_program *ureg,
  }
  
  static void
-emit_decl_resource(struct ureg_program *ureg,
-                   unsigned index,
-                   unsigned target,
-                   unsigned return_type_x,
-                   unsigned return_type_y,
-                   unsigned return_type_z,
-                   unsigned return_type_w )
+emit_decl_sampler_view(struct ureg_program *ureg,
+                       unsigned index,
+                       unsigned target,
+                       unsigned return_type_x,
+                       unsigned return_type_y,
+                       unsigned return_type_z,
+                       unsigned return_type_w )
  {
     union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
  
     out[0].value = 0;
     out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
     out[0].decl.NrTokens = 3;
-   out[0].decl.File = TGSI_FILE_RESOURCE;
+   out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
     out[0].decl.UsageMask = 0xf;
-   out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
  
     out[1].value = 0;
     out[1].decl_range.First = index;
     out[1].decl_range.Last = index;
  
     out[2].value = 0;
-   out[2].decl_resource.Resource    = target;
-   out[2].decl_resource.ReturnTypeX = return_type_x;
-   out[2].decl_resource.ReturnTypeY = return_type_y;
-   out[2].decl_resource.ReturnTypeZ = return_type_z;
-   out[2].decl_resource.ReturnTypeW = return_type_w;
+   out[2].decl_sampler_view.Resource    = target;
+   out[2].decl_sampler_view.ReturnTypeX = return_type_x;
+   out[2].decl_sampler_view.ReturnTypeY = return_type_y;
+   out[2].decl_sampler_view.ReturnTypeZ = return_type_z;
+   out[2].decl_sampler_view.ReturnTypeW = return_type_w;
  }
  
  static void
@@ -1473,14 +1499,14 @@ static void emit_decls( struct ureg_program *ureg )
                         ureg->sampler[i].Index, 1 );
     }
  
-   for (i = 0; i < ureg->nr_resources; i++) {
-      emit_decl_resource(ureg,
-                         ureg->resource[i].index,
-                         ureg->resource[i].target,
-                         ureg->resource[i].return_type_x,
-                         ureg->resource[i].return_type_y,
-                         ureg->resource[i].return_type_z,
-                         ureg->resource[i].return_type_w);
+   for (i = 0; i < ureg->nr_sampler_views; i++) {
+      emit_decl_sampler_view(ureg,
+                             ureg->sampler_view[i].index,
+                             ureg->sampler_view[i].target,
+                             ureg->sampler_view[i].return_type_x,
+                             ureg->sampler_view[i].return_type_y,
+                             ureg->sampler_view[i].return_type_z,
+                             ureg->sampler_view[i].return_type_w);
     }
  
     if (ureg->const_decls.nr_constant_ranges) {
@@ -1508,10 +1534,9 @@ static void emit_decls( struct ureg_program *ureg )
        }
     }
  
-   if (ureg->nr_temps) {
-      emit_decl_range( ureg,
-                       TGSI_FILE_TEMPORARY,
-                       0, ureg->nr_temps );
+   for (i = 0; i < ureg->nr_temps; i++) {
+      emit_decl( ureg, TGSI_FILE_TEMPORARY, i,
+                 util_bitmask_get(ureg->local_temps, i) );
     }
  
     if (ureg->nr_addrs) {
@@ -1668,7 +1693,21 @@ struct ureg_program *ureg_create( unsigned processor )
     ureg->property_gs_input_prim = ~0;
     ureg->property_gs_output_prim = ~0;
     ureg->property_gs_max_vertices = ~0;
+
+   ureg->free_temps = util_bitmask_create();
+   if (ureg->free_temps == NULL)
+      goto fail;
+
+   ureg->local_temps = util_bitmask_create();
+   if (ureg->local_temps == NULL)
+      goto fail;
+
     return ureg;
+
+fail:
+   FREE(ureg->free_temps);
+   FREE(ureg);
+   return NULL;
  }
  
  
@@ -1681,6 +1720,9 @@ void ureg_destroy( struct ureg_program *ureg )
            ureg->domain[i].tokens != error_tokens)
           FREE(ureg->domain[i].tokens);
     }
-   
+
+   util_bitmask_destroy(ureg->free_temps);
+   util_bitmask_destroy(ureg->local_temps);
+
     FREE(ureg);
  }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h

index 07ab8cba0baf42fcb611c51298dc083b6aaeee11..e6131f25aae664f610fae5ee8117e656acf8777a 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -272,6 +272,14 @@ ureg_DECL_constant( struct ureg_program *,
  struct ureg_dst
  ureg_DECL_temporary( struct ureg_program * );
  
+/**
+ * Emit a temporary with the LOCAL declaration flag set.  For use when
+ * the register value is not required to be preserved across
+ * subroutine boundaries.
+ */
+struct ureg_dst
+ureg_DECL_local_temporary( struct ureg_program * );
+
  void 
  ureg_release_temporary( struct ureg_program *ureg,
                          struct ureg_dst tmp );
@@ -292,13 +300,13 @@ ureg_DECL_sampler( struct ureg_program *,
                     unsigned index );
  
  struct ureg_src
-ureg_DECL_resource(struct ureg_program *,
-                   unsigned index,
-                   unsigned target,
-                   unsigned return_type_x,
-                   unsigned return_type_y,
-                   unsigned return_type_z,
-                   unsigned return_type_w );
+ureg_DECL_sampler_view(struct ureg_program *,
+                       unsigned index,
+                       unsigned target,
+                       unsigned return_type_x,
+                       unsigned return_type_y,
+                       unsigned return_type_z,
+                       unsigned return_type_w );
  
  
  static INLINE struct ureg_src
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h

index bd4e51d279f3c5762742838dc5c5a6a1aefbc846..1718fb5e240b6b4dc868a8d1b6caceff5ae9596d 100644 (file)
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -549,6 +549,19 @@ util_format_colormask(const struct util_format_description *desc)
  }
  
  
+/**
+ * Checks if color mask covers every channel for the specified format
+ *
+ * @param desc       a format description to check colormask with
+ * @param colormask  a bit mask for channels, matches format of PIPE_MASK_RGBA
+ */
+static INLINE boolean
+util_format_colormask_full(const struct util_format_description *desc, unsigned colormask)
+{
+   return (~colormask & util_format_colormask(desc)) == 0;
+}
+
+
  boolean
  util_format_is_float(enum pipe_format format);
  
diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c

index ac0df8c1a9c8df09cccd17c5202e1a9287e58199..3a91b1da138fb75afdd2996be1ebd7a1060e7ece 100644 (file)
--- a/src/gallium/auxiliary/util/u_pstipple.c
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -298,12 +298,13 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
           /* declare new position input reg */
           decl = tgsi_default_full_declaration();
           decl.Declaration.File = TGSI_FILE_INPUT;
-         decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR;
+         decl.Declaration.Interpolate = 1;
           decl.Declaration.Semantic = 1;
           decl.Semantic.Name = TGSI_SEMANTIC_POSITION;
           decl.Semantic.Index = 0;
           decl.Range.First = 
              decl.Range.Last = wincoordInput;
+         decl.Interp.Interpolate = TGSI_INTERPOLATE_LINEAR;
           ctx->emit_declaration(ctx, &decl);
        }
  
diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h

index cc7992d73910ec03a50012f2d302efa3e014e9cd..ed15981f1a5b61afe62532bda3a9c27460e3bbbc 100644 (file)
--- a/src/gallium/auxiliary/util/u_string.h
+++ b/src/gallium/auxiliary/util/u_string.h
@@ -48,6 +48,21 @@
  extern "C" {
  #endif
  
+#ifdef _GNU_SOURCE
+
+#define util_strchrnul strchrnul
+
+#else
+
+static INLINE char *
+util_strchrnul(const char *s, char c)
+{
+   for (; *s && *s != c; ++s);
+
+   return (char *)s;
+}
+
+#endif
  
  #ifdef WIN32
  
@@ -72,12 +87,9 @@ util_sprintf(char *str, const char *format, ...)
  static INLINE char *
  util_strchr(const char *s, char c)
  {
-   while(*s) {
-      if(*s == c)
-        return (char *)s;
-      ++s;
-   }
-   return NULL;
+   char *p = util_strchrnul(s, c);
+
+   return *p ? p : NULL;
  }
  
  static INLINE char*
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst

index b2872cd282f39f40219050b0d8fe9bcb8bacd84e..d17ea4289da0cdfd4d0e96401d66adf511f2890e 100644 (file)
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -120,6 +120,23 @@ to the array index which is used for sampling.
  * ``sampler_view_destroy`` destroys a sampler view and releases its reference
    to associated texture.
  
+Shader Resources
+^^^^^^^^^^^^^^^^
+
+Shader resources are textures or buffers that may be read or written
+from a shader without an associated sampler.  This means that they
+have no support for floating point coordinates, address wrap modes or
+filtering.
+
+Shader resources are specified for all the shader stages at once using
+the ``set_shader_resources`` method.  When binding texture resources,
+the ``level``, ``first_layer`` and ``last_layer`` pipe_surface fields
+specify the mipmap level and the range of layers the texture will be
+constrained to.  In the case of buffers, ``first_element`` and
+``last_element`` specify the range within the buffer that will be used
+by the shader resource.  Writes to a shader resource are only allowed
+when the ``writable`` flag is set.
+
  Surfaces
  ^^^^^^^^
  
@@ -542,3 +559,44 @@ These flags control the behavior of a transfer object.
  ``PIPE_TRANSFER_FLUSH_EXPLICIT``
    Written ranges will be notified later with :ref:`transfer_flush_region`.
    Cannot be used with ``PIPE_TRANSFER_READ``.
+
+
+Compute kernel execution
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+A compute program can be defined, bound or destroyed using
+``create_compute_state``, ``bind_compute_state`` or
+``destroy_compute_state`` respectively.
+
+Any of the subroutines contained within the compute program can be
+executed on the device using the ``launch_grid`` method.  This method
+will execute as many instances of the program as elements in the
+specified N-dimensional grid, hopefully in parallel.
+
+The compute program has access to four special resources:
+
+* ``GLOBAL`` represents a memory space shared among all the threads
+  running on the device.  An arbitrary buffer created with the
+  ``PIPE_BIND_GLOBAL`` flag can be mapped into it using the
+  ``set_global_binding`` method.
+
+* ``LOCAL`` represents a memory space shared among all the threads
+  running in the same working group.  The initial contents of this
+  resource are undefined.
+
+* ``PRIVATE`` represents a memory space local to a single thread.
+  The initial contents of this resource are undefined.
+
+* ``INPUT`` represents a read-only memory space that can be
+  initialized at ``launch_grid`` time.
+
+These resources use a byte-based addressing scheme, and they can be
+accessed from the compute program by means of the LOAD/STORE TGSI
+opcodes.  Additional resources to be accessed using the same opcodes
+may be specified by the user with the ``set_compute_resources``
+method.
+
+In addition, normal texture sampling is allowed from the compute
+program: ``bind_compute_sampler_states`` may be used to set up texture
+samplers for the compute stage and ``set_compute_sampler_views`` may
+be used to bind a number of sampler views to it.
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst

index f6c6f3fd119e69580f10037d308b4f3ef2008f64..ff63ce83beab003853ebf4622e5198012f396918 100644 (file)
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -110,6 +110,8 @@ The integer capabilities:
  * ``PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY``: This CAP describes
    a hw limitation.  If true, pipe_vertex_element::src_offset must always be
    aligned to 4.  If false, there are no restrictions on src_offset.
+* ``PIPE_CAP_COMPUTE``: Whether the implementation supports the
+  compute entry points defined in pipe_context and pipe_screen.
  * ``PIPE_CAP_USER_INDEX_BUFFERS``: Whether user index buffers are supported.
    If not, the state tracker must upload all indices which are not in hw
    resources.
@@ -192,8 +194,33 @@ to be 0.
    If unsupported, only float opcodes are supported.
  * ``PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS``: THe maximum number of texture
    samplers.
+* ``PIPE_SHADER_CAP_PREFERRED_IR``: Preferred representation of the
+  program.  It should be one of the ``pipe_shader_ir`` enum values.
  
  
+.. _pipe_compute_cap:
+
+PIPE_COMPUTE_CAP_*
+^^^^^^^^^^^^^^^^^^
+
+Compute-specific capabilities. They can be queried using
+pipe_screen::get_compute_param.
+
+* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
+  for grid and block coordinates.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
+  units.  Value type: ``uint64_t []``.
+* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
+  units.  Value type: ``uint64_t []``.
+* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
+  resource.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
+  resource.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
+  resource.  Value type: ``uint64_t``.
+* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
+  resource.  Value type: ``uint64_t``.
+
  .. _pipe_bind:
  
  PIPE_BIND_*
@@ -231,6 +258,12 @@ resources might be created and handled quite differently.
  * ``PIPE_BIND_SCANOUT``: A front color buffer or scanout buffer.
  * ``PIPE_BIND_SHARED``: A sharable buffer that can be given to another
    process.
+* ``PIPE_BIND_GLOBAL``: A buffer that can be mapped into the global
+  address space of a compute program.
+* ``PIPE_BIND_SHADER_RESOURCE``: A buffer or texture that can be
+  bound to the graphics pipeline as a shader resource.
+* ``PIPE_BIND_COMPUTE_RESOURCE``: A buffer or texture that can be
+  bound to the compute program as a shader resource.
  
  .. _pipe_usage:
  
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst

index 48e68968346f41888fae373554b1a86b164a2718..548a9a398556d79368c910a67be8f0b90b7736f8 100644 (file)
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -1312,28 +1312,36 @@ This opcode is the inverse of :opcode:`DFRACEXP`.
     dst.zw = \sqrt{src.zw}
  
  
-.. _resourceopcodes:
+.. _samplingopcodes:
  
-Resource Access Opcodes
-^^^^^^^^^^^^^^^^^^^^^^^^
+Resource Sampling Opcodes
+^^^^^^^^^^^^^^^^^^^^^^^^^
  
  Those opcodes follow very closely semantics of the respective Direct3D
  instructions. If in doubt double check Direct3D documentation.
  
-.. opcode:: LOAD - Simplified alternative to the "SAMPLE" instruction.
-               Using the provided integer address, LOAD fetches data
-               from the specified buffer/texture without any filtering.
+.. opcode:: SAMPLE - Using provided address, sample data from the
+               specified texture using the filtering mode identified
+               by the gven sampler. The source data may come from
+               any resource type other than buffers.
+               SAMPLE dst, address, sampler_view, sampler
+               e.g.
+               SAMPLE TEMP[0], TEMP[1], SVIEW[0], SAMP[0]
+
+.. opcode:: SAMPLE_I - Simplified alternative to the SAMPLE instruction.
+               Using the provided integer address, SAMPLE_I fetches data
+               from the specified sampler view without any filtering.
                 The source data may come from any resource type other
                 than CUBE.
-               LOAD dst, address, resource
+               SAMPLE_I dst, address, sampler_view
                 e.g.
-               LOAD TEMP[0], TEMP[1], RES[0]
+               SAMPLE_I TEMP[0], TEMP[1], SVIEW[0]
                 The 'address' is specified as unsigned integers. If the
                 'address' is out of range [0...(# texels - 1)] the
                 result of the fetch is always 0 in all components.
                 As such the instruction doesn't honor address wrap
                 modes, in cases where that behavior is desirable
-               'sample' instruction should be used.
+               'SAMPLE' instruction should be used.
                 address.w always provides an unsigned integer mipmap
                 level. If the value is out of the range then the
                 instruction always returns 0 in all components.
@@ -1348,7 +1356,7 @@ instructions. If in doubt double check Direct3D documentation.
                 For 2D texture arrays address.z provides the array
                 index, otherwise it exhibits the same behavior as in
                 the case for 1D texture arrays.
-               The exeact semantics of the source address are presented
+               The exact semantics of the source address are presented
                 in the table below:
                 resource type         X     Y     Z       W
                 -------------         ------------------------
@@ -1364,25 +1372,16 @@ instructions. If in doubt double check Direct3D documentation.
                 Where 'mpl' is a mipmap level and 'idx' is the
                 array index.
  
-
-.. opcode:: LOAD_MS - Just like LOAD but allows fetch data from
+.. opcode:: SAMPLE_I_MS - Just like SAMPLE_I but allows fetch data from
                 multi-sampled surfaces.
  
-.. opcode:: SAMPLE - Using provided address, sample data from the
-               specified texture using the filtering mode identified
-               by the gven sampler. The source data may come from
-               any resource type other than buffers.
-               SAMPLE dst, address, resource, sampler
-               e.g.
-               SAMPLE TEMP[0], TEMP[1], RES[0], SAMP[0]
-
  .. opcode:: SAMPLE_B - Just like the SAMPLE instruction with the
                 exception that an additiona bias is applied to the
                 level of detail computed as part of the instruction
                 execution.
-               SAMPLE_B dst, address, resource, sampler, lod_bias
+               SAMPLE_B dst, address, sampler_view, sampler, lod_bias
                 e.g.
-               SAMPLE_B TEMP[0], TEMP[1], RES[0], SAMP[0], TEMP[2].x
+               SAMPLE_B TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2].x
  
  .. opcode:: SAMPLE_C - Similar to the SAMPLE instruction but it
                 performs a comparison filter. The operands to SAMPLE_C
@@ -1394,33 +1393,32 @@ instructions. If in doubt double check Direct3D documentation.
                 reference value against the red component value for the
                 surce resource at each texel that the currently configured
                 texture filter covers based on the provided coordinates.
-               SAMPLE_C dst, address, resource.r, sampler, ref_value
+               SAMPLE_C dst, address, sampler_view.r, sampler, ref_value
                 e.g.
-               SAMPLE_C TEMP[0], TEMP[1], RES[0].r, SAMP[0], TEMP[2].x
+               SAMPLE_C TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x
  
  .. opcode:: SAMPLE_C_LZ - Same as SAMPLE_C, but LOD is 0 and derivatives
                 are ignored. The LZ stands for level-zero.
-               SAMPLE_C_LZ dst, address, resource.r, sampler, ref_value
+               SAMPLE_C_LZ dst, address, sampler_view.r, sampler, ref_value
                 e.g.
-               SAMPLE_C_LZ TEMP[0], TEMP[1], RES[0].r, SAMP[0], TEMP[2].x
+               SAMPLE_C_LZ TEMP[0], TEMP[1], SVIEW[0].r, SAMP[0], TEMP[2].x
  
  
  .. opcode:: SAMPLE_D - SAMPLE_D is identical to the SAMPLE opcode except
                 that the derivatives for the source address in the x
                 direction and the y direction are provided by extra
                 parameters.
-               SAMPLE_D dst, address, resource, sampler, der_x, der_y
+               SAMPLE_D dst, address, sampler_view, sampler, der_x, der_y
                 e.g.
-               SAMPLE_D TEMP[0], TEMP[1], RES[0], SAMP[0], TEMP[2], TEMP[3]
+               SAMPLE_D TEMP[0], TEMP[1], SVIEW[0], SAMP[0], TEMP[2], TEMP[3]
  
  .. opcode:: SAMPLE_L - SAMPLE_L is identical to the SAMPLE opcode except
                 that the LOD is provided directly as a scalar value,
                 representing no anisotropy. Source addresses A channel
                 is used as the LOD.
-               SAMPLE_L dst, address, resource, sampler
+               SAMPLE_L dst, address, sampler_view, sampler
                 e.g.
-               SAMPLE_L TEMP[0], TEMP[1], RES[0], SAMP[0]
-
+               SAMPLE_L TEMP[0], TEMP[1], SVIEW[0], SAMP[0]
  
  .. opcode:: GATHER4 - Gathers the four texels to be used in a bi-linear
                 filtering operation and packs them into a single register.
@@ -1435,18 +1433,18 @@ instructions. If in doubt double check Direct3D documentation.
                 the magnitude of the deltas are half a texel.
  
  
-.. opcode:: RESINFO - query the dimensions of a given input buffer.
+.. opcode:: SVIEWINFO - query the dimensions of a given sampler view.
                 dst receives width, height, depth or array size and
                 number of mipmap levels. The dst can have a writemask
                 which will specify what info is the caller interested
                 in.
-               RESINFO dst, src_mip_level, resource
+               SVIEWINFO dst, src_mip_level, sampler_view
                 e.g.
-               RESINFO TEMP[0], TEMP[1].x, RES[0]
+               SVIEWINFO TEMP[0], TEMP[1].x, SVIEW[0]
                 src_mip_level is an unsigned integer scalar. If it's
                 out of range then returns 0 for width, height and
                 depth/array size but the total number of mipmap is
-               still returned correctly for the given resource.
+               still returned correctly for the given sampler view.
                 The returned width, height and depth values are for
                 the mipmap level selected by the src_mip_level and
                 are in the number of texels.
@@ -1463,6 +1461,272 @@ instructions. If in doubt double check Direct3D documentation.
                 not a render target, the result is 0.
  
  
+.. _resourceopcodes:
+
+Resource Access Opcodes
+^^^^^^^^^^^^^^^^^^^^^^^
+
+.. opcode:: LOAD - Fetch data from a shader resource
+
+               Syntax: ``LOAD dst, resource, address``
+
+               Example: ``LOAD TEMP[0], RES[0], TEMP[1]``
+
+               Using the provided integer address, LOAD fetches data
+               from the specified buffer or texture without any
+               filtering.
+
+               The 'address' is specified as a vector of unsigned
+               integers.  If the 'address' is out of range the result
+               is unspecified.
+
+               Only the first mipmap level of a resource can be read
+               from using this instruction.
+
+               For 1D or 2D texture arrays, the array index is
+               provided as an unsigned integer in address.y or
+               address.z, respectively.  address.yz are ignored for
+               buffers and 1D textures.  address.z is ignored for 1D
+               texture arrays and 2D textures.  address.w is always
+               ignored.
+
+.. opcode:: STORE - Write data to a shader resource
+
+               Syntax: ``STORE resource, address, src``
+
+               Example: ``STORE RES[0], TEMP[0], TEMP[1]``
+
+               Using the provided integer address, STORE writes data
+               to the specified buffer or texture.
+
+               The 'address' is specified as a vector of unsigned
+               integers.  If the 'address' is out of range the result
+               is unspecified.
+
+               Only the first mipmap level of a resource can be
+               written to using this instruction.
+
+               For 1D or 2D texture arrays, the array index is
+               provided as an unsigned integer in address.y or
+               address.z, respectively.  address.yz are ignored for
+               buffers and 1D textures.  address.z is ignored for 1D
+               texture arrays and 2D textures.  address.w is always
+               ignored.
+
+
+.. _threadsyncopcodes:
+
+Inter-thread synchronization opcodes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These opcodes are intended for communication between threads running
+within the same compute grid.  For now they're only valid in compute
+programs.
+
+.. opcode:: MFENCE - Memory fence
+
+  Syntax: ``MFENCE resource``
+
+  Example: ``MFENCE RES[0]``
+
+  This opcode forces strong ordering between any memory access
+  operations that affect the specified resource.  This means that
+  previous loads and stores (and only those) will be performed and
+  visible to other threads before the program execution continues.
+
+
+.. opcode:: LFENCE - Load memory fence
+
+  Syntax: ``LFENCE resource``
+
+  Example: ``LFENCE RES[0]``
+
+  Similar to MFENCE, but it only affects the ordering of memory loads.
+
+
+.. opcode:: SFENCE - Store memory fence
+
+  Syntax: ``SFENCE resource``
+
+  Example: ``SFENCE RES[0]``
+
+  Similar to MFENCE, but it only affects the ordering of memory stores.
+
+
+.. opcode:: BARRIER - Thread group barrier
+
+  ``BARRIER``
+
+  This opcode suspends the execution of the current thread until all
+  the remaining threads in the working group reach the same point of
+  the program.  Results are unspecified if any of the remaining
+  threads terminates or never reaches an executed BARRIER instruction.
+
+
+.. _atomopcodes:
+
+Atomic opcodes
+^^^^^^^^^^^^^^
+
+These opcodes provide atomic variants of some common arithmetic and
+logical operations.  In this context atomicity means that another
+concurrent memory access operation that affects the same memory
+location is guaranteed to be performed strictly before or after the
+entire execution of the atomic operation.
+
+For the moment they're only valid in compute programs.
+
+.. opcode:: ATOMUADD - Atomic integer addition
+
+  Syntax: ``ATOMUADD dst, resource, offset, src``
+
+  Example: ``ATOMUADD TEMP[0], RES[0], TEMP[1], TEMP[2]``
+
+  The following operation is performed atomically on each component:
+
+.. math::
+
+  dst_i = resource[offset]_i
+
+  resource[offset]_i = dst_i + src_i
+
+
+.. opcode:: ATOMXCHG - Atomic exchange
+
+  Syntax: ``ATOMXCHG dst, resource, offset, src``
+
+  Example: ``ATOMXCHG TEMP[0], RES[0], TEMP[1], TEMP[2]``
+
+  The following operation is performed atomically on each component:
+
+.. math::
+
+  dst_i = resource[offset]_i
+
+  resource[offset]_i = src_i
+
+
+.. opcode:: ATOMCAS - Atomic compare-and-exchange
+
+  Syntax: ``ATOMCAS dst, resource, offset, cmp, src``
+
+  Example: ``ATOMCAS TEMP[0], RES[0], TEMP[1], TEMP[2], TEMP[3]``
+
+  The following operation is performed atomically on each component:
+
+.. math::
+
+  dst_i = resource[offset]_i
+
+  resource[offset]_i = (dst_i == cmp_i ? src_i : dst_i)
+
+
+.. opcode:: ATOMAND - Atomic bitwise And
+
+  Syntax: ``ATOMAND dst, resource, offset, src``
+
+  Example: ``ATOMAND TEMP[0], RES[0], TEMP[1], TEMP[2]``
+
+  The following operation is performed atomically on each component:
+
+.. math::
+
+  dst_i = resource[offset]_i
+
+  resource[offset]_i = dst_i \& src_i
+
+
+.. opcode:: ATOMOR - Atomic bitwise Or
+
+  Syntax: ``ATOMOR dst, resource, offset, src``
+
+  Example: ``ATOMOR TEMP[0], RES[0], TEMP[1], TEMP[2]``
+
+  The following operation is performed atomically on each component:
+
+.. math::
+
+  dst_i = resource[offset]_i
+
+  resource[offset]_i = dst_i | src_i
+
+
+.. opcode:: ATOMXOR - Atomic bitwise Xor
+
+  Syntax: ``ATOMXOR dst, resource, offset, src``
+
+  Example: ``ATOMXOR TEMP[0], RES[0], TEMP[1], TEMP[2]``
+
+  The following operation is performed atomically on each component:
+
+.. math::
+
+  dst_i = resource[offset]_i
+
+  resource[offset]_i = dst_i \oplus src_i
+
+
+.. opcode:: ATOMUMIN - Atomic unsigned minimum
+
+  Syntax: ``ATOMUMIN dst, resource, offset, src``
+
+  Example: ``ATOMUMIN TEMP[0], RES[0], TEMP[1], TEMP[2]``
+
+  The following operation is performed atomically on each component:
+
+.. math::
+
+  dst_i = resource[offset]_i
+
+  resource[offset]_i = (dst_i < src_i ? dst_i : src_i)
+
+
+.. opcode:: ATOMUMAX - Atomic unsigned maximum
+
+  Syntax: ``ATOMUMAX dst, resource, offset, src``
+
+  Example: ``ATOMUMAX TEMP[0], RES[0], TEMP[1], TEMP[2]``
+
+  The following operation is performed atomically on each component:
+
+.. math::
+
+  dst_i = resource[offset]_i
+
+  resource[offset]_i = (dst_i > src_i ? dst_i : src_i)
+
+
+.. opcode:: ATOMIMIN - Atomic signed minimum
+
+  Syntax: ``ATOMIMIN dst, resource, offset, src``
+
+  Example: ``ATOMIMIN TEMP[0], RES[0], TEMP[1], TEMP[2]``
+
+  The following operation is performed atomically on each component:
+
+.. math::
+
+  dst_i = resource[offset]_i
+
+  resource[offset]_i = (dst_i < src_i ? dst_i : src_i)
+
+
+.. opcode:: ATOMIMAX - Atomic signed maximum
+
+  Syntax: ``ATOMIMAX dst, resource, offset, src``
+
+  Example: ``ATOMIMAX TEMP[0], RES[0], TEMP[1], TEMP[2]``
+
+  The following operation is performed atomically on each component:
+
+.. math::
+
+  dst_i = resource[offset]_i
+
+  resource[offset]_i = (dst_i > src_i ? dst_i : src_i)
+
+
+
  Explanation of symbols used
  ------------------------------
  
@@ -1531,19 +1795,17 @@ of TGSI_FILE.
  UsageMask field specifies which of the register components can be accessed
  and is one of TGSI_WRITEMASK.
  
-Interpolate field is only valid for fragment shader INPUT register files.
-It specifes the way input is being interpolated by the rasteriser and is one
-of TGSI_INTERPOLATE.
+The Local flag specifies that a given value isn't intended for
+subroutine parameter passing and, as a result, the implementation
+isn't required to give any guarantees of it being preserved across
+subroutine boundaries.  As it's merely a compiler hint, the
+implementation is free to ignore it.
  
  If Dimension flag is set to 1, a Declaration Dimension token follows.
  
  If Semantic flag is set to 1, a Declaration Semantic token follows.
  
-CylindricalWrap bitfield is only valid for fragment shader INPUT register
-files. It specifies which register components should be subject to cylindrical
-wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X
-is set to 1, the X component should be interpolated according to cylindrical
-wrapping rules.
+If Interpolate flag is set to 1, a Declaration Interpolate token follows.
  
  If file is TGSI_FILE_RESOURCE, a Declaration Resource token follows.
  
@@ -1690,12 +1952,42 @@ is a writable stencil reference value. Only the Y component is writable.
  This allows the fragment shader to change the fragments stencilref value.
  
  
-Declaration Resource
+Declaration Interpolate
+^^^^^^^^^^^^^^^^^^^^^^^
+
+This token is only valid for fragment shader INPUT declarations.
+
+The Interpolate field specifes the way input is being interpolated by
+the rasteriser and is one of TGSI_INTERPOLATE_*.
+
+The CylindricalWrap bitfield specifies which register components
+should be subject to cylindrical wrapping when interpolating by the
+rasteriser. If TGSI_CYLINDRICAL_WRAP_X is set to 1, the X component
+should be interpolated according to cylindrical wrapping rules.
+
+
+Declaration Sampler View
  ^^^^^^^^^^^^^^^^^^^^^^^^
  
+   Follows Declaration token if file is TGSI_FILE_SAMPLER_VIEW.
+
+   DCL SVIEW[#], resource, type(s)
+
+   Declares a shader input sampler view and assigns it to a SVIEW[#]
+   register.
+
+   resource can be one of BUFFER, 1D, 2D, 3D, 1DArray and 2DArray.
+
+   type must be 1 or 4 entries (if specifying on a per-component
+   level) out of UNORM, SNORM, SINT, UINT and FLOAT.
+
+
+Declaration Resource
+^^^^^^^^^^^^^^^^^^^^
+
     Follows Declaration token if file is TGSI_FILE_RESOURCE.
  
-   DCL RES[#], resource, type(s)
+   DCL RES[#], resource [, WR] [, RAW]
  
     Declares a shader input resource and assigns it to a RES[#]
     register.
@@ -1703,8 +1995,21 @@ Declaration Resource
     resource can be one of BUFFER, 1D, 2D, 3D, CUBE, 1DArray and
     2DArray.
  
-   type must be 1 or 4 entries (if specifying on a per-component
-   level) out of UNORM, SNORM, SINT, UINT and FLOAT.
+   If the RAW keyword is not specified, the texture data will be
+   subject to conversion, swizzling and scaling as required to yield
+   the specified data type from the physical data format of the bound
+   resource.
+
+   If the RAW keyword is specified, no channel conversion will be
+   performed: the values read for each of the channels (X,Y,Z,W) will
+   correspond to consecutive words in the same order and format
+   they're found in memory.  No element-to-address conversion will be
+   performed either: the value of the provided X coordinate will be
+   interpreted in byte units instead of texel units.  The result of
+   accessing a misaligned address is undefined.
+
+   Usage of the STORE opcode is only allowed if the WR (writable) flag
+   is set.
  
  
  Properties
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c

index 94c0c69fcae09356cb372cae1ff4259291306897..def9a03d3773e99d34c1063da8f9a84d3cd6f4c3 100644 (file)
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -512,6 +512,22 @@ i915_translate_instruction(struct i915_fp_compile *p,
        emit_simple_arith(p, inst, A0_ADD, 2, fs);
        break;
  
+   case TGSI_OPCODE_CEIL:
+      src0 = src_vector(p, &inst->Src[0], fs);
+      tmp = i915_get_utemp(p);
+      flags = get_result_flags(inst);
+      i915_emit_arith(p,
+                      A0_FLR,
+                      tmp,
+                      flags & A0_DEST_CHANNEL_ALL, 0,
+                      negate(src0, 1, 1, 1, 1), 0, 0);
+      i915_emit_arith(p,
+                      A0_MOV,
+                      get_result_vector(p, &inst->Dst[0]),
+                      flags, 0,
+                      negate(tmp, 1, 1, 1, 1), 0, 0);
+      break;
+
     case TGSI_OPCODE_CMP:
        src0 = src_vector(p, &inst->Src[0], fs);
        src1 = src_vector(p, &inst->Src[1], fs);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h

index f82ae30bb7dc8a277f5a48b792075b03c9471b0a..c0c95a27129f270c4a0eced1561dd4aa2b753183 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -64,12 +64,14 @@ lp_build_blend_func(struct lp_build_context *bld,
  LLVMValueRef
  lp_build_blend_aos(struct gallivm_state *gallivm,
                     const struct pipe_blend_state *blend,
+                   const enum pipe_format *cbuf_format,
                     struct lp_type type,
                     unsigned rt,
                     LLVMValueRef src,
                     LLVMValueRef dst,
+                   LLVMValueRef mask,
                     LLVMValueRef const_,
-                   unsigned alpha_swizzle);
+                   const unsigned char swizzle[4]);
  
  
  void
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c

index c342346a36ec33238877f4582b7243f5ed33fa25..59d5f545966086e32f458776e1284add3b9a7978 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -45,12 +45,14 @@
  
  #include "pipe/p_state.h"
  #include "util/u_debug.h"
+#include "util/u_format.h"
  
  #include "gallivm/lp_bld_type.h"
  #include "gallivm/lp_bld_const.h"
  #include "gallivm/lp_bld_arit.h"
  #include "gallivm/lp_bld_logic.h"
  #include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_bitarit.h"
  #include "gallivm/lp_bld_debug.h"
  
  #include "lp_bld_blend.h"
@@ -300,25 +302,39 @@ lp_build_blend_func(struct lp_build_context *bld,
  }
  
  
+/**
+ * Performs blending of src and dst pixels
+ *
+ * @param blend         the blend state of the shader variant
+ * @param cbuf_format   format of the colour buffer
+ * @param type          data type of the pixel vector
+ * @param rt            rt number
+ * @param src           blend src
+ * @param dst           blend dst
+ * @param mask          optional mask to apply to the blending result
+ * @param const_        const blend color
+ * @param swizzle       swizzle values for RGBA
+ *
+ * @return the result of blending src and dst
+ */
  LLVMValueRef
  lp_build_blend_aos(struct gallivm_state *gallivm,
                     const struct pipe_blend_state *blend,
+                   const enum pipe_format *cbuf_format,
                     struct lp_type type,
                     unsigned rt,
                     LLVMValueRef src,
                     LLVMValueRef dst,
+                   LLVMValueRef mask,
                     LLVMValueRef const_,
-                   unsigned alpha_swizzle)
+                   const unsigned char swizzle[4])
  {
     struct lp_build_blend_aos_context bld;
     LLVMValueRef src_term;
     LLVMValueRef dst_term;
-
-   /* FIXME: color masking not implemented yet */
-   assert(blend->rt[rt].colormask == 0xf);
-
-   if(!blend->rt[rt].blend_enable)
-      return src;
+   LLVMValueRef result;
+   unsigned alpha_swizzle = swizzle[3];
+   boolean fullcolormask;
  
     /* Setup build context */
     memset(&bld, 0, sizeof bld);
@@ -327,30 +343,59 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
     bld.dst = dst;
     bld.const_ = const_;
  
-   /* TODO: There are still a few optimization opportunities here. For certain
-    * combinations it is possible to reorder the operations and therefore saving
-    * some instructions. */
+   if (!blend->rt[rt].blend_enable) {
+      result = src;
+   } else {
+
+      /* TODO: There are still a few optimization opportunities here. For certain
+       * combinations it is possible to reorder the operations and therefore saving
+       * some instructions. */
+
+      src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor,
+                                       blend->rt[rt].alpha_src_factor, alpha_swizzle);
+      dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor,
+                                       blend->rt[rt].alpha_dst_factor, alpha_swizzle);
+
+      lp_build_name(src_term, "src_term");
+      lp_build_name(dst_term, "dst_term");
  
-   src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor,
-                                    blend->rt[rt].alpha_src_factor, alpha_swizzle);
-   dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor,
-                                    blend->rt[rt].alpha_dst_factor, alpha_swizzle);
+      if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) {
+         result = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term);
+      }
+      else {
+         /* Seperate RGB / A functions */
+
+         LLVMValueRef rgb;
+         LLVMValueRef alpha;
  
-   lp_build_name(src_term, "src_term");
-   lp_build_name(dst_term, "dst_term");
+         rgb   = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func,   src_term, dst_term);
+         alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term);
  
-   if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) {
-      return lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term);
+         result = lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
+      }
     }
-   else {
-      /* Seperate RGB / A functions */
  
-      LLVMValueRef rgb;
-      LLVMValueRef alpha;
+   /* Check if color mask is necessary */
+   fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), blend->rt[rt].colormask);
+
+   if (!fullcolormask) {
+      LLVMValueRef color_mask;
  
-      rgb   = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func,   src_term, dst_term);
-      alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term);
+      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, blend->rt[rt].colormask, swizzle);
+      lp_build_name(color_mask, "color_mask");
  
-      return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
+      /* Combine with input mask if necessary */
+      if (mask) {
+         mask = lp_build_and(&bld.base, color_mask, mask);
+      } else {
+         mask = color_mask;
+      }
+   }
+
+   /* Apply mask, if one exists */
+   if (mask) {
+      result = lp_build_select(&bld.base, mask, result, dst);
     }
+
+   return result;
  }
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c

index 71d0ddf5e7529bbed16e6462afaf33536d89ff3f..230b80a945fdd07044c6c06847457dc19d3ae7ec 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -290,6 +290,10 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
     c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
     rej4 = _mm_slli_epi32(rej4, 2);
  
+   /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */
+   c = _mm_sub_epi32(c, _mm_set1_epi32(1));
+   rej4 = _mm_add_epi32(rej4, _mm_set1_epi32(1));
+
     dcdx2 = _mm_add_epi32(dcdx, dcdx);
     dcdx3 = _mm_add_epi32(dcdx2, dcdx);
  
@@ -383,7 +387,7 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
     __m128i span_1;                /* 0,dcdx,2dcdx,3dcdx for plane 1 */
     __m128i span_2;                /* 0,dcdx,2dcdx,3dcdx for plane 2 */
     __m128i unused;
-   
+
     transpose4_epi32(&p0, &p1, &p2, &zero,
                      &c, &dcdx, &dcdy, &unused);
  
@@ -394,6 +398,9 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
     c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x)));
     c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y)));
  
+   /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */
+   c = _mm_sub_epi32(c, _mm_set1_epi32(1));
+
     dcdx2 = _mm_add_epi32(dcdx, dcdx);
     dcdx3 = _mm_add_epi32(dcdx2, dcdx);
  
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c

index b50c354fa9bffb0223f12b9f8152f8e03a2a6dfb..26d35debdaf3f74e7fdeb9fb00e5a9e2208e2021 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -271,15 +271,13 @@ do_triangle_ccw(struct lp_setup_context *setup,
         */
        int adj = (setup->pixel_offset != 0) ? 1 : 0;
  
-      bbox.x0 = (MIN3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
-      bbox.x1 = (MAX3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
-      bbox.y0 = (MIN3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
-      bbox.y1 = (MAX3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+      /* Inclusive x0, exclusive x1 */
+      bbox.x0 = MIN3(x[0], x[1], x[2]) >> FIXED_ORDER;
+      bbox.x1 = (MAX3(x[0], x[1], x[2]) - 1) >> FIXED_ORDER;
  
-      /* Inclusive coordinates:
-       */
-      bbox.x1--;
-      bbox.y1--;
+      /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */
+      bbox.y0 = (MIN3(y[0], y[1], y[2]) + adj) >> FIXED_ORDER;
+      bbox.y1 = (MAX3(y[0], y[1], y[2]) - 1 + adj) >> FIXED_ORDER;
     }
  
     if (bbox.x1 < bbox.x0 ||
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c

index ec94190649c36cfeb14967cabe5b8bb63f8cf5af..2d2391e908c5ec98e2d876245ffec6734a710c04 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -924,6 +924,7 @@ generate_variant(struct llvmpipe_context *lp,
                   const struct lp_fragment_shader_variant_key *key)
  {
     struct lp_fragment_shader_variant *variant;
+   const struct util_format_description *cbuf0_format_desc;
     boolean fullcolormask;
  
     variant = CALLOC_STRUCT(lp_fragment_shader_variant);
@@ -942,12 +943,8 @@ generate_variant(struct llvmpipe_context *lp,
      */
     fullcolormask = FALSE;
     if (key->nr_cbufs == 1) {
-      const struct util_format_description *format_desc;
-      format_desc = util_format_description(key->cbuf_format[0]);
-      if ((~key->blend.rt[0].colormask &
-           util_format_colormask(format_desc)) == 0) {
-         fullcolormask = TRUE;
-      }
+      cbuf0_format_desc = util_format_description(key->cbuf_format[0]);
+      fullcolormask = util_format_colormask_full(cbuf0_format_desc, key->blend.rt[0].colormask);
     }
  
     variant->opaque =
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c

index b3ca134131d36529d2842fe414e61cbd9977c05d..51324cbb6a3189f9c9ba735243e9f9ffb602eec1 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -179,7 +179,9 @@ add_blend_test(struct gallivm_state *gallivm,
     LLVMValueRef res_ptr;
     LLVMBasicBlockRef block;
     LLVMBuilderRef builder;
+   const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM;
     const unsigned rt = 0;
+   const unsigned char swizzle[4] = { 0, 1, 2, 3 };
  
     vec_type = lp_build_vec_type(gallivm, type);
  
@@ -205,7 +207,7 @@ add_blend_test(struct gallivm_state *gallivm,
        dst = LLVMBuildLoad(builder, dst_ptr, "dst");
        con = LLVMBuildLoad(builder, const_ptr, "const");
  
-      res = lp_build_blend_aos(gallivm, blend, type, rt, src, dst, con, 3);
+      res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle);
  
        lp_build_name(res, "res");
  
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c

index ff199debd7445238efb8ae863acf3b884ba2f8b8..936e2bf246ace722d4faf2117e5117785de02fb9 100644 (file)
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -523,8 +523,10 @@ nouveau_scratch_runout_release(struct nouveau_context *nv)
  {
     if (!nv->scratch.nr_runout)
        return;
-   while (nv->scratch.nr_runout--)
+   do {
+      --nv->scratch.nr_runout;
        nouveau_bo_ref(NULL, &nv->scratch.runout[nv->scratch.nr_runout]);
+   } while (nv->scratch.nr_runout);
  
     FREE(nv->scratch.runout);
     nv->scratch.end = 0;
diff --git a/src/gallium/drivers/nv30/nvfx_fragprog.c b/src/gallium/drivers/nv30/nvfx_fragprog.c

index e562b454f92696465cd4cfc0b8c308b2b9560b1e..592ad21c6c89154cd72a00e0be93b2e18b37de8f 100644 (file)
--- a/src/gallium/drivers/nv30/nvfx_fragprog.c
+++ b/src/gallium/drivers/nv30/nvfx_fragprog.c
@@ -535,6 +535,11 @@ nvfx_fragprog_parse_instruction(struct nv30_context* nvfx, struct nvfx_fpc *fpc,
     case TGSI_OPCODE_ADD:
        nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none));
        break;
+   case TGSI_OPCODE_CEIL:
+      tmp = nvfx_src(temp(fpc));
+      nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none));
+      nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none));
+      break;
     case TGSI_OPCODE_CMP:
        insn = arith(0, MOV, none.reg, mask, src[0], none, none);
        insn.cc_update = 1;
diff --git a/src/gallium/drivers/nv30/nvfx_vertprog.c b/src/gallium/drivers/nv30/nvfx_vertprog.c

index d7eb9fb0a6399741ced2ee4d34d2e9bbea7c4b35..82972b3943cf9ac0784b663d9a382c5272b84211 100644 (file)
--- a/src/gallium/drivers/nv30/nvfx_vertprog.c
+++ b/src/gallium/drivers/nv30/nvfx_vertprog.c
@@ -550,6 +550,11 @@ nvfx_vertprog_parse_instruction(struct nv30_context *nv30, struct nvfx_vpc *vpc,
     case TGSI_OPCODE_ARL:
        nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none));
        break;
+   case TGSI_OPCODE_CEIL:
+      tmp = nvfx_src(temp(vpc));
+      nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, neg(src[0]), none, none));
+      nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none));
+      break;
     case TGSI_OPCODE_CMP:
        insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none);
        insn.cc_update = 1;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp

index f7dac25c116325a4f0f0877d801d0f68b772407c..f713e6391c6325ed00a89148541607186da0409e 100644 (file)
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
@@ -278,6 +278,31 @@ BuildUtil::mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc)
     return mkOp2(OP_UNION, typeOfSize(dst->reg.size), dst, def0, def1);
  }
  
+Instruction *
+BuildUtil::mkSplit(Value *h[2], uint8_t halfSize, Value *val)
+{
+   Instruction *insn = NULL;
+
+   const DataType fTy = typeOfSize(halfSize * 2);
+
+   if (val->reg.file == FILE_IMMEDIATE)
+      val = mkMov(getSSA(halfSize * 2), val, fTy)->getDef(0);
+
+   if (isMemoryFile(val->reg.file)) {
+      h[0] = cloneShallow(getFunction(), val);
+      h[1] = cloneShallow(getFunction(), val);
+      h[0]->reg.size = halfSize;
+      h[1]->reg.size = halfSize;
+      h[1]->reg.data.offset += halfSize;
+   } else {
+      h[0] = getSSA(halfSize, val->reg.file);
+      h[1] = getSSA(halfSize, val->reg.file);
+      insn = mkOp1(OP_SPLIT, fTy, h[0], val);
+      insn->setDef(1, h[1]);
+   }
+   return insn;
+}
+
  FlowInstruction *
  BuildUtil::mkFlow(operation op, void *targ, CondCode cc, Value *pred)
  {
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h

index 9ee04dbcd128335f7a0eae497380246be23a6aef..dd7e491cb5c2384fe624598c83b92bd3759ebe5e 100644 (file)
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
@@ -81,6 +81,8 @@ public:
  
     Instruction *mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc);
  
+   Instruction *mkSplit(Value *half[2], uint8_t halfSize, Value *);
+
     void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2);
  
     ImmediateValue *mkImm(float);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp

index 82e23602ca0749fb14c50acce5f4ab5fff795cb5..16f191da159884a27eba6b558d3c2bf7656ab448 100644 (file)
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
@@ -347,6 +347,7 @@ static nv50_ir::TexTarget translateTexture(uint tex)
     NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
     NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
     NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
+   NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
  
     case TGSI_TEXTURE_UNKNOWN:
     default:
@@ -548,7 +549,7 @@ static nv50_ir::operation translateOpcode(uint opcode)
     NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
     NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
     NV50_IR_OPCODE_CASE(GATHER4, TXG);
-   NV50_IR_OPCODE_CASE(RESINFO, TXQ);
+   NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
  
     NV50_IR_OPCODE_CASE(END, EXIT);
  
@@ -597,8 +598,8 @@ public:
  
     int clipVertexOutput;
  
-   uint8_t *resourceTargets; // TGSI_TEXTURE_*
-   unsigned resourceCount;
+   uint8_t *samplerViewTargets; // TGSI_TEXTURE_*
+   unsigned samplerViewCount;
  
  private:
     int inferSysValDirection(unsigned sn) const;
@@ -617,7 +618,7 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
     if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
        tgsi_dump(tokens, 0);
  
-   resourceTargets = NULL;
+   samplerViewTargets = NULL;
  
     mainTempsInLMem = FALSE;
  }
@@ -632,8 +633,8 @@ Source::~Source()
     if (info->immd.type)
        FREE(info->immd.type);
  
-   if (resourceTargets)
-      delete[] resourceTargets;
+   if (samplerViewTargets)
+      delete[] samplerViewTargets;
  }
  
  bool Source::scanSource()
@@ -650,8 +651,8 @@ bool Source::scanSource()
  
     clipVertexOutput = -1;
  
-   resourceCount = scan.file_max[TGSI_FILE_RESOURCE] + 1;
-   resourceTargets = new uint8_t[resourceCount];
+   samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
+   samplerViewTargets = new uint8_t[samplerViewCount];
  
     info->immd.bufSize = 0;
     tempArrayCount = 0;
@@ -805,7 +806,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
              info->in[i].si = si;
              if (info->type == PIPE_SHADER_FRAGMENT) {
                 // translate interpolation mode
-               switch (decl->Declaration.Interpolate) {
+               switch (decl->Interp.Interpolate) {
                 case TGSI_INTERPOLATE_CONSTANT:
                    info->in[i].flat = 1;
                    break;
@@ -818,7 +819,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
                 default:
                    break;
                 }
-               if (decl->Declaration.Centroid)
+               if (decl->Interp.Centroid)
                    info->in[i].centroid = 1;
              }
           }
@@ -874,9 +875,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
           info->sv[i].input = inferSysValDirection(sn);
        }
        break;
-   case TGSI_FILE_RESOURCE:
+   case TGSI_FILE_SAMPLER_VIEW:
        for (i = first; i <= last; ++i)
-         resourceTargets[i] = decl->Resource.Resource;
+         samplerViewTargets[i] = decl->SamplerView.Resource;
        break;
     case TGSI_FILE_IMMEDIATE_ARRAY:
     {
@@ -1000,13 +1001,15 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
  nv50_ir::TexInstruction::Target
  Instruction::getTexture(const tgsi::Source *code, int s) const
  {
-   if (insn->Instruction.Texture) {
-      return translateTexture(insn->Texture.Texture);
-   } else {
+   switch (getSrc(s).getFile()) {
+   case TGSI_FILE_SAMPLER_VIEW: {
        // XXX: indirect access
        unsigned int r = getSrc(s).getIndex(0);
-      assert(r < code->resourceCount);
-      return translateTexture(code->resourceTargets[r]);
+      assert(r < code->samplerViewCount);
+      return translateTexture(code->samplerViewTargets[r]);
+   }
+   default:
+      return translateTexture(insn->Texture.Texture);
     }
  }
  
@@ -2042,7 +2045,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
        handleTXF(dst0, 1);
        break;
     case TGSI_OPCODE_TXQ:
-   case TGSI_OPCODE_RESINFO:
+   case TGSI_OPCODE_SVIEWINFO:
        handleTXQ(dst0, TXQ_DIMS);
        break;
     case TGSI_OPCODE_F2I:
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp

index 27373b4cc47ca56a9b3c27a59e3986b1a55fa05d..16bba0e172392e6e930fa542aaa7ca717b52723b 100644 (file)
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp
@@ -57,15 +57,17 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
  
     Instruction *i[9];
  
-   Value *a[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) };
-   Value *b[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) };
+   bld->setPosition(mul, true);
+
+   Value *a[2], *b[2];
     Value *c[2];
     Value *t[4];
     for (int j = 0; j < 4; ++j)
        t[j] = bld->getSSA(fullSize);
  
-   (i[0] = bld->mkOp1(OP_SPLIT, fTy, a[0], mul->getSrc(0)))->setDef(1, a[1]);
-   (i[1] = bld->mkOp1(OP_SPLIT, fTy, b[0], mul->getSrc(1)))->setDef(1, b[1]);
+   // split sources into halves
+   i[0] = bld->mkSplit(a, halfSize, mul->getSrc(0));
+   i[1] = bld->mkSplit(b, halfSize, mul->getSrc(1));
  
     i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
     i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
@@ -96,7 +98,8 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
     delete_Instruction(bld->getProgram(), mul);
  
     for (int j = 2; j <= (highResult ? 5 : 4); ++j)
-      i[j]->sType = hTy;
+      if (i[j])
+         i[j]->sType = hTy;
  
     return true;
  }
@@ -518,7 +521,6 @@ private:
  
     bool handleEXPORT(Instruction *);
  
-   bool handleMUL(Instruction *);
     bool handleDIV(Instruction *);
     bool handleSQRT(Instruction *);
     bool handlePOW(Instruction *);
@@ -587,7 +589,8 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
     if (i->tex.target.isArray()) {
        Value *layer = i->getSrc(arg - 1);
        LValue *src = new_LValue(func, FILE_GPR);
-      bld.mkCvt(OP_CVT, TYPE_U16, src, TYPE_F32, layer);
+      bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer);
+      bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511));
        i->setSrc(arg - 1, src);
  
        if (i->tex.target.isCube()) {
@@ -940,14 +943,6 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i)
     return true;
  }
  
-bool
-NV50LoweringPreSSA::handleMUL(Instruction *i)
-{
-   if (!isFloatType(i->dType) && typeSizeof(i->sType) > 2)
-      return expandIntegerMUL(&bld, i);
-   return true;
-}
-
  bool
  NV50LoweringPreSSA::handleDIV(Instruction *i)
  {
@@ -1068,8 +1063,6 @@ NV50LoweringPreSSA::visit(Instruction *i)
        return handleSELP(i);
     case OP_POW:
        return handlePOW(i);
-   case OP_MUL:
-      return handleMUL(i);
     case OP_DIV:
        return handleDIV(i);
     case OP_SQRT:
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp

index db5195cd5828696861b554c9bd21bd9afcea5e79..10382d9cac616715f31c9a3f4d5b5c6821d38e48 100644 (file)
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp
@@ -564,7 +564,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
        insn = mul2->getSrc(t)->getInsn();
        if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32)
           mul1 = insn;
-      if (mul1) {
+      if (mul1 && !mul1->saturate) {
           int s1;
  
           if (mul1->src(s1 = 0).getImmediate(imm1) ||
@@ -584,10 +584,11 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
              if (f < 0)
                 mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG);
           }
+         mul1->saturate = mul2->saturate;
           return;
        }
     }
-   if (mul2->getDef(0)->refCount() == 1) {
+   if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) {
        // b = mul a, imm
        // d = mul b, c   -> d = mul_x_imm a, c
        int s2, t2;
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp

index 77edaa6067a450b163311a8d7a75001cc4fd2696..726331e91e7426be1b7395286505eb9fd153a3a1 100644 (file)
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp
@@ -1819,8 +1819,8 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex)
     int n = tex->srcCount(0xff, true);
     if (n > 4) {
        condenseSrcs(tex, 0, 3);
-      if (n > 5)
-         condenseSrcs(tex, 4, n - 1);
+      if (n > 5) // NOTE: first call modified positions already
+         condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1));
     } else
     if (n > 1) {
        condenseSrcs(tex, 0, n - 1);
@@ -1850,8 +1850,8 @@ RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex)
  
     if (s > 1)
        condenseSrcs(tex, 0, s - 1);
-   if (n > 1)
-      condenseSrcs(tex, s, s + (n - 1));
+   if (n > 1) // NOTE: first call modified positions already
+      condenseSrcs(tex, 1, n);
  
     condenseDefs(tex);
  }
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp

index 5e541e514cb62ceff26b5b7a7835341e7bc5dd15..8b11c6a2fdda82dcd995a7a81ab6abf4aba1bbf7 100644 (file)
--- a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp
@@ -310,7 +310,22 @@ TargetNV50::insnCanLoad(const Instruction *i, int s,
        return false;
     }
  
-   if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * typeSizeof(ld->dType)))
+   uint8_t ldSize;
+
+   if ((i->op == OP_MUL || i->op == OP_MAD) && !isFloatType(i->dType)) {
+      // 32-bit MUL will be split into 16-bit MULs
+      if (ld->src(0).isIndirect(0))
+         return false;
+      if (sf == FILE_IMMEDIATE)
+         return false;
+      ldSize = 2;
+   } else {
+      ldSize = typeSizeof(ld->dType);
+   }
+
+   if (ldSize < 4 && sf == FILE_SHADER_INPUT) // no < 4-byte aligned a[] access
+      return false;
+   if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * ldSize))
        return false;
  
     if (ld->src(0).isIndirect(0)) {
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h

index 1cee0e06c02e2a85a509384636fbb5f291e6f0f4..44a0ba0f5616d31aaf3727b70cf79b58100c2048 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -48,6 +48,7 @@
  #define NV50_NEW_CONSTBUF     (1 << 18)
  #define NV50_NEW_TEXTURES     (1 << 19)
  #define NV50_NEW_SAMPLERS     (1 << 20)
+#define NV50_NEW_STRMOUT      (1 << 21)
  #define NV50_NEW_CONTEXT      (1 << 31)
  
  #define NV50_BIND_FB          0
@@ -56,9 +57,10 @@
  #define NV50_BIND_INDEX       3
  #define NV50_BIND_TEXTURES    4
  #define NV50_BIND_CB(s, i)   (5 + 16 * (s) + (i))
-#define NV50_BIND_SCREEN     53
-#define NV50_BIND_TLS        54
-#define NV50_BIND_COUNT      55
+#define NV50_BIND_SO         53
+#define NV50_BIND_SCREEN     54
+#define NV50_BIND_TLS        55
+#define NV50_BIND_COUNT      56
  #define NV50_BIND_2D          0
  #define NV50_BIND_M2MF        0
  #define NV50_BIND_FENCE       1
@@ -92,11 +94,13 @@ struct nv50_context {
        boolean point_sprite;
        boolean rt_serialize;
        boolean flushed;
+      boolean rasterizer_discard;
        uint8_t tls_required;
        uint8_t num_vtxbufs;
        uint8_t num_vtxelts;
        uint8_t num_textures[3];
        uint8_t num_samplers[3];
+      uint8_t prim_size;
        uint16_t scissor;
     } state;
  
@@ -126,6 +130,10 @@ struct nv50_context {
     struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
     unsigned num_samplers[3];
  
+   uint8_t num_so_targets;
+   uint8_t so_targets_dirty;
+   struct pipe_stream_output_target *so_target[4];
+
     struct pipe_framebuffer_state framebuffer;
     struct pipe_blend_color blend_colour;
     struct pipe_stencil_ref stencil_ref;
@@ -168,6 +176,14 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
  
  /* nv50_query.c */
  void nv50_init_query_functions(struct nv50_context *);
+void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
+                               struct pipe_query *, unsigned result_offset);
+void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
+void nva0_so_target_save_offset(struct pipe_context *,
+                                struct pipe_stream_output_target *,
+                                unsigned index, boolean seralize);
+
+#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
  
  /* nv50_shader_state.c */
  void nv50_vertprog_validate(struct nv50_context *);
@@ -177,6 +193,7 @@ void nv50_fp_linkage_validate(struct nv50_context *);
  void nv50_gp_linkage_validate(struct nv50_context *);
  void nv50_constbufs_validate(struct nv50_context *);
  void nv50_validate_derived_rs(struct nv50_context *);
+void nv50_stream_output_validate(struct nv50_context *);
  
  /* nv50_state.c */
  extern void nv50_init_state_functions(struct nv50_context *);
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c

index 1b2e2934b7946c7b7a5b783ad6f0365d5950aef4..ca40ac2dd43772c05241c8d13227c2e5112e3917 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -68,6 +68,17 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
           break;
        }
     }
+
+   /*
+    * Corner case: VP has no inputs, but we will still need to submit data to
+    * draw it. HW will shout at us and won't draw anything if we don't enable
+    * any input, so let's just pretend it's the first one.
+    */
+   if (prog->vp.attrs[0] == 0 &&
+       prog->vp.attrs[1] == 0 &&
+       prog->vp.attrs[2] == 0)
+      prog->vp.attrs[0] |= 0xf;
+
     /* VertexID before InstanceID */
     if (info->io.vertexId < info->numSysVals)
        info->sv[info->io.vertexId].slot[0] = n++;
@@ -235,6 +246,59 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
     }
  }
  
+static struct nv50_stream_output_state *
+nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
+                                  const struct pipe_stream_output_info *pso)
+{
+   struct nv50_stream_output_state *so;
+   unsigned b, i, c;
+   unsigned base[4];
+
+   so = MALLOC_STRUCT(nv50_stream_output_state);
+   if (!so)
+      return NULL;
+   memset(so->map, 0xff, sizeof(so->map));
+
+   for (b = 0; b < 4; ++b)
+      so->num_attribs[b] = 0;
+   for (i = 0; i < pso->num_outputs; ++i) {
+      unsigned end =  pso->output[i].dst_offset + pso->output[i].num_components;
+      b = pso->output[i].output_buffer;
+      assert(b < 4);
+      so->num_attribs[b] = MAX2(so->num_attribs[b], end);
+   }
+
+   so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
+
+   so->stride[0] = pso->stride[0] * 4;
+   base[0] = 0;
+   for (b = 1; b < 4; ++b) {
+      assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
+      so->stride[b] = so->num_attribs[b] * 4;
+      if (so->num_attribs[b])
+         so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
+      base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
+   }
+   if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
+      assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
+      so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
+   }
+
+   so->map_size = base[3] + so->num_attribs[3];
+
+   for (i = 0; i < pso->num_outputs; ++i) {
+      const unsigned s = pso->output[i].start_component;
+      const unsigned p = pso->output[i].dst_offset;
+      const unsigned r = pso->output[i].register_index;
+      b = pso->output[i].output_buffer;
+
+      for (c = 0; c < pso->output[i].num_components; ++c)
+         so->map[base[b] + p + c] = info->out[r].slot[s + c];
+   }
+
+   return so;
+}
+
  boolean
  nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
  {
@@ -293,6 +357,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
           prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
     }
  
+   if (prog->pipe.stream_output.num_outputs)
+      prog->so = nv50_program_create_strmout_state(info,
+                                                   &prog->pipe.stream_output);
+
  out:
     FREE(info);
     return !ret;
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h

index 92361ad9946f61a99686be634fb7fe423048b38b..f56268b5439a281b01014dc1b9248dcf8c8ce19b 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -42,6 +42,15 @@ struct nv50_varying {
     ubyte si; /* semantic index */
  };
  
+struct nv50_stream_output_state
+{
+   uint32_t ctrl;
+   uint16_t stride[4];
+   uint8_t num_attribs[4];
+   uint8_t map_size;
+   uint8_t map[128];
+};
+
  struct nv50_program {
     struct pipe_shader_state pipe;
  
@@ -88,6 +97,8 @@ struct nv50_program {
     void *fixups; /* relocation records */
  
     struct nouveau_heap *mem;
+
+   struct nv50_stream_output_state *so;
  };
  
  boolean nv50_program_translate(struct nv50_program *, uint16_t chipset);
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c

index 04e32b7e8b9d55ad53b945ca060797ff088fddb6..3abe189e7b522038bbba9a01466f40abbc3c3ce6 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_push.c
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -210,7 +210,8 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
  {
     struct push_context ctx;
     unsigned i, index_size;
-   unsigned inst = info->instance_count;
+   unsigned inst_count = info->instance_count;
+   unsigned vert_count = info->count;
     boolean apply_bias = info->indexed && info->index_bias;
  
     ctx.push = nv50->base.pushbuf;
@@ -242,6 +243,17 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
        ctx.primitive_restart = info->primitive_restart;
        ctx.restart_index = info->restart_index;
     } else {
+      if (unlikely(info->count_from_stream_output)) {
+         struct pipe_context *pipe = &nv50->base.pipe;
+         struct nv50_so_target *targ;
+         targ = nv50_so_target(info->count_from_stream_output);
+         if (!targ->pq) {
+            NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
+            return;
+         }
+         pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+         vert_count /= targ->stride;
+      }
        ctx.idxbuf = NULL;
        index_size = 0;
        ctx.primitive_restart = FALSE;
@@ -262,21 +274,21 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
     }
     nv50->state.prim_restart = info->primitive_restart;
  
-   while (inst--) {
+   while (inst_count--) {
        BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1);
        PUSH_DATA (ctx.push, ctx.prim);
        switch (index_size) {
        case 0:
-         emit_vertices_seq(&ctx, info->start, info->count);
+         emit_vertices_seq(&ctx, info->start, vert_count);
           break;
        case 1:
-         emit_vertices_i08(&ctx, info->start, info->count);
+         emit_vertices_i08(&ctx, info->start, vert_count);
           break;
        case 2:
-         emit_vertices_i16(&ctx, info->start, info->count);
+         emit_vertices_i16(&ctx, info->start, vert_count);
           break;
        case 4:
-         emit_vertices_i32(&ctx, info->start, info->count);
+         emit_vertices_i32(&ctx, info->start, vert_count);
           break;
        default:
           assert(0);
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c

index 5275e74964a3a8cb94e345b9f1613b1cbe75968d..8e62c5f11bc3ffdef4f3baacea2618af95669e51 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -36,7 +36,8 @@
  
  struct nv50_query {
     uint32_t *data;
-   uint32_t type;
+   uint16_t type;
+   uint16_t index;
     uint32_t sequence;
     struct nouveau_bo *bo;
     uint32_t base;
@@ -170,21 +171,15 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
        BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
        PUSH_DATA (push, 1);
        break;
-   case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
-      PUSH_SPACE(push, 2);
-      BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
-      PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+      nv50_query_get(push, q, 0x10, 0x06805002);
        break;
     case PIPE_QUERY_PRIMITIVES_EMITTED:
-      PUSH_SPACE(push, 2);
-      BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
-      PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK);
+      nv50_query_get(push, q, 0x10, 0x05805002);
        break;
     case PIPE_QUERY_SO_STATISTICS:
-      PUSH_SPACE(push, 3);
-      BEGIN_NI04(push, NV50_3D(COUNTER_RESET), 2);
-      PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK);
-      PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+      nv50_query_get(push, q, 0x20, 0x05805002);
+      nv50_query_get(push, q, 0x30, 0x06805002);
        break;
     case PIPE_QUERY_TIMESTAMP_DISJOINT:
     case PIPE_QUERY_TIME_ELAPSED:
@@ -227,6 +222,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
     case PIPE_QUERY_GPU_FINISHED:
        nv50_query_get(push, q, 0, 0x1000f010);
        break;
+   case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+      nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
+      break;
     default:
        assert(0);
        break;
@@ -247,6 +245,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
     struct nv50_context *nv50 = nv50_context(pipe);
     struct nv50_query *q = nv50_query(pq);
     uint64_t *res64 = (uint64_t *)result;
+   uint32_t *res32 = (uint32_t *)result;
     boolean *res8 = (boolean *)result;
     uint64_t *data64 = (uint64_t *)q->data;
  
@@ -275,11 +274,11 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
        break;
     case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
     case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
-      res64[0] = data64[0];
+      res64[0] = data64[0] - data64[2];
        break;
     case PIPE_QUERY_SO_STATISTICS:
-      res64[0] = data64[0];
-      res64[1] = data64[1];
+      res64[0] = data64[0] - data64[4];
+      res64[1] = data64[2] - data64[6];
        break;
     case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
        res64[0] = 1000000000;
@@ -288,6 +287,9 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
     case PIPE_QUERY_TIME_ELAPSED:
        res64[0] = data64[1] - data64[3];
        break;
+   case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+      res32[0] = q->data[1];
+      break;
     default:
        return FALSE;
     }
@@ -295,6 +297,21 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
     return TRUE;
  }
  
+void
+nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
+{
+   struct nv50_query *q = nv50_query(pq);
+   unsigned offset = q->offset;
+
+   PUSH_SPACE(push, 5);
+   PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+   BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
+   PUSH_DATAh(push, q->bo->offset + offset);
+   PUSH_DATA (push, q->bo->offset + offset);
+   PUSH_DATA (push, q->sequence);
+   PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+}
+
  static void
  nv50_render_condition(struct pipe_context *pipe,
                        struct pipe_query *pq, uint mode)
@@ -324,6 +341,38 @@ nv50_render_condition(struct pipe_context *pipe,
     PUSH_DATA (push, NV50_3D_COND_MODE_RES_NON_ZERO);
  }
  
+void
+nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
+                          struct pipe_query *pq, unsigned result_offset)
+{
+   struct nv50_query *q = nv50_query(pq);
+
+   /* XXX: does this exist ? */
+#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+
+   nouveau_pushbuf_space(push, 0, 0, 1);
+   nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
+                        NV50_IB_ENTRY_1_NO_PREFETCH);
+}
+
+void
+nva0_so_target_save_offset(struct pipe_context *pipe,
+                           struct pipe_stream_output_target *ptarg,
+                           unsigned index, boolean serialize)
+{
+   struct nv50_so_target *targ = nv50_so_target(ptarg);
+
+   if (serialize) {
+      struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
+      PUSH_SPACE(push, 2);
+      BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+      PUSH_DATA (push, 0);
+   }
+
+   nv50_query(targ->pq)->index = index;
+   nv50_query_end(pipe, targ->pq);
+}
+
  void
  nv50_init_query_functions(struct nv50_context *nv50)
  {
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c

index a6dfbedf299a6c6f031b0caf5b641847876c689c..c96e028b2a2677fcc2126459ace0f45920c487d5 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -73,6 +73,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
  static int
  nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
  {
+   const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
     switch (param) {
     case PIPE_CAP_MAX_COMBINED_SAMPLERS:
        return 64;
@@ -82,8 +84,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
        return 12;
     case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
        return 14;
-   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: /* shader support missing */
-      return 0;
+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      return 512;
     case PIPE_CAP_MIN_TEXEL_OFFSET:
        return -8;
     case PIPE_CAP_MAX_TEXEL_OFFSET:
@@ -95,7 +97,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
     case PIPE_CAP_ANISOTROPIC_FILTER:
     case PIPE_CAP_SCALED_RESOLVE:
        return 1;
-   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
     case PIPE_CAP_SEAMLESS_CUBE_MAP:
        return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS;
     case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
@@ -121,11 +122,12 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
     case PIPE_CAP_OCCLUSION_QUERY:
        return 1;
     case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
-      return 0;
+      return 4;
     case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
-      return 128;
     case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
-      return 32;
+      return 64;
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+      return (class_3d >= NVA0_3D_CLASS) ? 1 : 0;
     case PIPE_CAP_BLEND_EQUATION_SEPARATE:
     case PIPE_CAP_INDEP_BLEND_ENABLE:
        return 1;
diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c

index aef3f129c81de4f72a78bae953f6b010dcf8cec6..d070f07bbbc912fc408b507bcea41205127d88a8 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@@ -207,6 +207,8 @@ nv50_gmtyprog_validate(struct nv50_context *nv50)
        PUSH_DATA (push, gp->gp.vert_count);
        BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);
        PUSH_DATA (push, gp->code_base);
+
+      nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */
     }
     nv50_program_update_context_state(nv50, gp, 2);
  
@@ -278,6 +280,12 @@ nv50_validate_derived_rs(struct nv50_context *nv50)
  
     nv50_sprite_coords_validate(nv50);
  
+   if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {
+      nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;
+      BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
+      PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
+   }
+
     if (nv50->dirty & NV50_NEW_FRAGPROG)
        return;
     psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
@@ -343,6 +351,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
     uint32_t colors = fp->fp.colors;
     uint32_t lin[4];
     uint8_t map[64];
+   uint8_t so_map[64];
  
     if (!(nv50->dirty & (NV50_NEW_VERTPROG |
                          NV50_NEW_FRAGPROG |
@@ -411,6 +420,30 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
     if (nv50->rast->pipe.clamp_vertex_color)
        colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
  
+   if (unlikely(vp->so)) {
+      /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
+       * gets written.
+       *
+       * TODO:
+       * Inverting vp->so->map (output -> offset) would probably speed this up.
+       */
+      memset(so_map, 0, sizeof(so_map));
+      for (i = 0; i < vp->so->map_size; ++i) {
+         if (vp->so->map[i] == 0xff)
+            continue;
+         for (c = 0; c < m; ++c)
+            if (map[c] == vp->so->map[i] && !so_map[c])
+               break;
+         if (c == m) {
+            c = m;
+            map[m++] = vp->so->map[i];
+         }
+         so_map[c] = 0x80 | i;
+      }
+      for (c = m; c & 3; ++c)
+         so_map[c] = 0;
+   }
+
     n = (m + 3) / 4;
     assert(m <= 64);
  
@@ -451,6 +484,11 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
  
     BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);
     PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);
+
+   if (vp->so) {
+      BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);
+      PUSH_DATAp(push, so_map, n);
+   }
  }
  
  static int
@@ -509,3 +547,75 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
     BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
     PUSH_DATAp(push, map, n);
  }
+
+void
+nv50_stream_output_validate(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   struct nv50_stream_output_state *so;
+   uint32_t ctrl;
+   unsigned i;
+   unsigned prims = ~0;
+
+   so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;
+
+   if (!so || !nv50->num_so_targets) {
+      BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
+      PUSH_DATA (push, 0);
+      if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
+         BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
+         PUSH_DATA (push, 0);
+      }
+      BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
+      PUSH_DATA (push, 1);
+      return;
+   }
+
+   ctrl = so->ctrl;
+   if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)
+      ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;
+
+   BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
+   PUSH_DATA (push, ctrl);
+
+   nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
+
+   for (i = 0; i < nv50->num_so_targets; ++i) {
+      struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
+      struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
+
+      const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
+
+      if (n == 4 && !targ->clean)
+         nv84_query_fifo_wait(push, targ->pq);
+      BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
+      PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
+      PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
+      PUSH_DATA (push, so->num_attribs[i]);
+      if (n == 4) {
+         PUSH_DATA(push, targ->pipe.buffer_size);
+
+         BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
+         if (!targ->clean) {
+            assert(targ->pq);
+            nv50_query_pushbuf_submit(push, targ->pq, 0x4);
+         } else {
+            PUSH_DATA(push, 0);
+            targ->clean = FALSE;
+         }
+      } else {
+         const unsigned limit = targ->pipe.buffer_size /
+            (so->stride[i] * nv50->state.prim_size);
+         prims = MIN2(prims, limit);
+      }
+      BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
+   }
+   if (prims != ~0) {
+      BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
+      PUSH_DATA (push, prims);
+   }
+   BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
+   PUSH_DATA (push, 1);
+   BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
+   PUSH_DATA (push, 1);
+}
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c

index a17540a1492032db7e00db222043e8c089c41f20..7f840e2b42e1ef45f633b121167d4e3cdd37a9b5 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -680,6 +680,9 @@ nv50_sp_state_create(struct pipe_context *pipe,
     prog->type = type;
     prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
  
+   if (cso->stream_output.num_outputs)
+      prog->pipe.stream_output = cso->stream_output;
+
     return (void *)prog;
  }
  
@@ -920,6 +923,90 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
     nv50->dirty |= NV50_NEW_VERTEX;
  }
  
+static struct pipe_stream_output_target *
+nv50_so_target_create(struct pipe_context *pipe,
+                      struct pipe_resource *res,
+                      unsigned offset, unsigned size)
+{
+   struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target);
+   if (!targ)
+      return NULL;
+
+   if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) {
+      targ->pq = pipe->create_query(pipe,
+                                    NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET);
+      if (!targ->pq) {
+         FREE(targ);
+         return NULL;
+      }
+   } else {
+      targ->pq = NULL;
+   }
+   targ->clean = TRUE;
+
+   targ->pipe.buffer_size = size;
+   targ->pipe.buffer_offset = offset;
+   targ->pipe.context = pipe;
+   targ->pipe.buffer = NULL;
+   pipe_resource_reference(&targ->pipe.buffer, res);
+   pipe_reference_init(&targ->pipe.reference, 1);
+
+   return &targ->pipe;
+}
+
+static void
+nv50_so_target_destroy(struct pipe_context *pipe,
+                       struct pipe_stream_output_target *ptarg)
+{
+   struct nv50_so_target *targ = nv50_so_target(ptarg);
+   if (targ->pq)
+      pipe->destroy_query(pipe, targ->pq);
+   FREE(targ);
+}
+
+static void
+nv50_set_stream_output_targets(struct pipe_context *pipe,
+                               unsigned num_targets,
+                               struct pipe_stream_output_target **targets,
+                               unsigned append_mask)
+{
+   struct nv50_context *nv50 = nv50_context(pipe);
+   unsigned i;
+   boolean serialize = TRUE;
+   const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
+
+   assert(num_targets <= 4);
+
+   for (i = 0; i < num_targets; ++i) {
+      const boolean changed = nv50->so_target[i] != targets[i];
+      if (!changed && (append_mask & (1 << i)))
+         continue;
+      nv50->so_targets_dirty |= 1 << i;
+
+      if (can_resume && changed && nv50->so_target[i]) {
+         nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
+         serialize = FALSE;
+      }
+
+      if (targets[i] && !(append_mask & (1 << i)))
+         nv50_so_target(targets[i])->clean = TRUE;
+
+      pipe_so_target_reference(&nv50->so_target[i], targets[i]);
+   }
+   for (; i < nv50->num_so_targets; ++i) {
+      if (can_resume && nv50->so_target[i]) {
+         nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
+         serialize = FALSE;
+      }
+      pipe_so_target_reference(&nv50->so_target[i], NULL);
+      nv50->so_targets_dirty |= 1 << i;
+   }
+   nv50->num_so_targets = num_targets;
+
+   if (nv50->so_targets_dirty)
+      nv50->dirty |= NV50_NEW_STRMOUT;
+}
+
  void
  nv50_init_state_functions(struct nv50_context *nv50)
  {
@@ -975,5 +1062,8 @@ nv50_init_state_functions(struct nv50_context *nv50)
  
     pipe->set_vertex_buffers = nv50_set_vertex_buffers;
     pipe->set_index_buffer = nv50_set_index_buffer;
-}
  
+   pipe->create_stream_output_target = nv50_so_target_create;
+   pipe->stream_output_target_destroy = nv50_so_target_destroy;
+   pipe->set_stream_output_targets = nv50_set_stream_output_targets;
+}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c

index c19acf6c426e194e5acd4003910c9d60bc297faf..a95e96d3c51c8e6f471d3041085616b72d818620 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -360,6 +360,8 @@ static struct state_validate {
      { nv50_constbufs_validate,     NV50_NEW_CONSTBUF },
      { nv50_validate_textures,      NV50_NEW_TEXTURES },
      { nv50_validate_samplers,      NV50_NEW_SAMPLERS },
+    { nv50_stream_output_validate, NV50_NEW_STRMOUT |
+                                   NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
      { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }
  };
  #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h

index 188406da6008d07e8022accc07c3e1b8f7375ce0..8a9260c937e325c6a51abfd92e0f5cd064bc6d72 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_stateobj.h
+++ b/src/gallium/drivers/nv50/nv50_stateobj.h
@@ -51,4 +51,17 @@ struct nv50_vertex_stateobj {
     struct nv50_vertex_element element[0];
  };
  
+struct nv50_so_target {
+   struct pipe_stream_output_target pipe;
+   struct pipe_query *pq;
+   unsigned stride;
+   boolean clean;
+};
+
+static INLINE struct nv50_so_target *
+nv50_so_target(struct pipe_stream_output_target *ptarg)
+{
+   return (struct nv50_so_target *)ptarg;
+}
+
  #endif
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c

index b38e49ffcc103d341457782cb3f3d89da7bf9084..15c88d5316d1bec880e7a25748c4fad41ce85c50 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -711,7 +711,7 @@ nv50_blit_set_src(struct nv50_context *nv50,
     templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
  
     nv50->textures[2][0] = nv50_create_sampler_view(pipe, res, &templ);
-   nv50->textures[2][0] = NULL;
+   nv50->textures[2][1] = NULL;
  
     nv50_blit_fixup_tic_entry(nv50->textures[2][0]);
  
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c

index bc01e69decfafe3a6c47cb8c2343b2f47ac1f023..323677eaf8076abdf6b1e2ecd841f284bdf71f5b 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -405,6 +405,25 @@ nv50_prim_gl(unsigned prim)
     }
  }
  
+/* For pre-nva0 transform feedback. */
+static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] =
+{
+   [PIPE_PRIM_POINTS] = 1,
+   [PIPE_PRIM_LINES] = 2,
+   [PIPE_PRIM_LINE_LOOP] = 2,
+   [PIPE_PRIM_LINE_STRIP] = 2,
+   [PIPE_PRIM_TRIANGLES] = 3,
+   [PIPE_PRIM_TRIANGLE_STRIP] = 3,
+   [PIPE_PRIM_TRIANGLE_FAN] = 3,
+   [PIPE_PRIM_QUADS] = 3,
+   [PIPE_PRIM_QUAD_STRIP] = 3,
+   [PIPE_PRIM_POLYGON] = 3,
+   [PIPE_PRIM_LINES_ADJACENCY] = 2,
+   [PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2,
+   [PIPE_PRIM_TRIANGLES_ADJACENCY] = 3,
+   [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3
+};
+
  static void
  nv50_draw_arrays(struct nv50_context *nv50,
                   unsigned mode, unsigned start, unsigned count,
@@ -623,6 +642,51 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
     }
  }
  
+static void
+nva0_draw_stream_output(struct nv50_context *nv50,
+                        const struct pipe_draw_info *info)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   struct nv50_so_target *so = nv50_so_target(info->count_from_stream_output);
+   struct nv04_resource *res = nv04_resource(so->pipe.buffer);
+   unsigned num_instances = info->instance_count;
+   unsigned mode = nv50_prim_gl(info->mode);
+
+   if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) {
+      /* A proper implementation without waiting doesn't seem possible,
+       * so don't bother.
+       */
+      NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
+      return;
+   }
+
+   if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+      res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+      PUSH_SPACE(push, 4);
+      BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+      PUSH_DATA (push, 0);
+      BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
+      PUSH_DATA (push, 0);
+   }
+
+   assert(num_instances);
+   do {
+      PUSH_SPACE(push, 8);
+      BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+      PUSH_DATA (push, mode);
+      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
+      PUSH_DATA (push, 0);
+      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
+      PUSH_DATA (push, 0);
+      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
+      nv50_query_pushbuf_submit(push, so->pq, 0x4);
+      BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+      PUSH_DATA (push, 0);
+
+      mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+   } while (--num_instances);
+}
+
  static void
  nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan)
  {
@@ -655,6 +719,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
     if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS)))
        nv50_update_user_vbufs(nv50);
  
+   if (unlikely(nv50->num_so_targets && !nv50->gmtyprog))
+      nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode];
+
     nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */
  
     push->kick_notify = nv50_draw_vbo_kick_notify;
@@ -679,11 +746,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
        nv50->base.vbo_dirty = FALSE;
     }
  
-   if (!info->indexed) {
-      nv50_draw_arrays(nv50,
-                       info->mode, info->start, info->count,
-                       info->instance_count);
-   } else {
+   if (info->indexed) {
        boolean shorten = info->max_index <= 65535;
  
        assert(nv50->idxbuf.buffer);
@@ -713,6 +776,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
        nv50_draw_elements(nv50, shorten,
                           info->mode, info->start, info->count,
                           info->instance_count, info->index_bias);
+   } else
+   if (unlikely(info->count_from_stream_output)) {
+      nva0_draw_stream_output(nv50, info);
+   } else {
+      nv50_draw_arrays(nv50,
+                       info->mode, info->start, info->count,
+                       info->instance_count);
     }
     push->kick_notify = nv50_default_kick_notify;
  
diff --git a/src/gallium/drivers/nv50/nv50_winsys.h b/src/gallium/drivers/nv50/nv50_winsys.h

index b36898dabe64dcecd2a005f51411e5f95b8dedd3..145ee70cb9fba4284b5fee02744d335132764b44 100644 (file)
--- a/src/gallium/drivers/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nv50/nv50_winsys.h
@@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
  
  #define SUBC_3D(m) 3, (m)
  #define NV50_3D(n) SUBC_3D(NV50_3D_##n)
+#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n)
  
  #define SUBC_2D(m) 4, (m)
  #define NV50_2D(n) SUBC_2D(NV50_2D_##n)
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp

index 2ca4979dc74558ba249947958b0f82628369e68d..fbd1aa5dfc91ed5cce5c00aab3d108934e19ef41 100644 (file)
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp
@@ -1020,7 +1020,7 @@ CodeEmitterNVC0::emitTEX(const TexInstruction *i)
        code[1] |= 0x02000000;
     }
  
-   if (i->tex.derivAll)
+   if (i->op != OP_TXD && i->tex.derivAll)
        code[1] |= 1 << 13;
  
     defId(i->def(0), 14);
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp

index 02ae9fd5d0efdd5ec23f34c085e7f14dfa603c0b..900e998df8d3c909ca67f3152783627730d12af9 100644 (file)
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp
@@ -749,21 +749,22 @@ bool
  NVC0LoweringPass::handleTXD(TexInstruction *txd)
  {
     int dim = txd->tex.target.getDim();
-   int arg = txd->tex.target.getDim() + txd->tex.target.isArray();
+   int arg = txd->tex.target.getArgCount();
  
     handleTEX(txd);
-   while (txd->src(arg).exists())
+   while (txd->srcExists(arg))
        ++arg;
  
     txd->tex.derivAll = true;
-   if (dim > 2 || txd->tex.target.isShadow())
+   if (dim > 2 ||
+       txd->tex.target.isCube() ||
+       arg > 4 ||
+       txd->tex.target.isShadow())
        return handleManualTXD(txd);
  
-   assert(arg <= 4); // at most s/t/array, x, y, offset
-
     for (int c = 0; c < dim; ++c) {
-      txd->src(arg + c * 2 + 0).set(txd->dPdx[c]);
-      txd->src(arg + c * 2 + 1).set(txd->dPdy[c]);
+      txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]);
+      txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]);
        txd->dPdx[c].set(NULL);
        txd->dPdy[c].set(NULL);
     }
diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp

index 10c2d09d65716f050b30859ed92bbfe2d1e294b4..e4b9dc18311cc2118b341f706b6401680820fa44 100644 (file)
--- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp
@@ -223,6 +223,9 @@ static const struct opProperties _initProps[] =
     { OP_ABS,    0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
     { OP_NEG,    0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
     { OP_CVT,    0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
+   { OP_CEIL,   0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
+   { OP_FLOOR,  0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
+   { OP_TRUNC,  0x1, 0x1, 0x0, 0x8, 0x1, 0x0 },
     { OP_AND,    0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
     { OP_OR,     0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
     { OP_XOR,    0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 },
diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c

index 69d67585d8b24b955abd61ef87a20a8813cc55e2..b9e73dd514b34a5f0acdcf4e7b8185c0f68abd5d 100644 (file)
--- a/src/gallium/drivers/r300/r300_vs_draw.c
+++ b/src/gallium/drivers/r300/r300_vs_draw.c
@@ -94,11 +94,12 @@ static void emit_output(struct tgsi_transform_context *ctx,
  
      decl = tgsi_default_full_declaration();
      decl.Declaration.File = TGSI_FILE_OUTPUT;
-    decl.Declaration.Interpolate = interp;
+    decl.Declaration.Interpolate = 1;
      decl.Declaration.Semantic = TRUE;
      decl.Semantic.Name = name;
      decl.Semantic.Index = index;
      decl.Range.First = decl.Range.Last = reg;
+    decl.Interp.Interpolate = interp;
      ctx->emit_declaration(ctx, &decl);
      ++vsctx->num_outputs;
  }
diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am

index 3089a829e533f11622f8d42123d5e353b892b8a4..77d2674d262276197ff719e65d335a92d354251f 100644 (file)
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -29,7 +29,7 @@ libr600_a_SOURCES += \
         $(LLVM_C_SOURCES)
  
  libr600_a_LIBADD = \
-       $(top_srcdir)/src/gallium/drivers/radeon/libradeon.a
+       $(top_builddir)/src/gallium/drivers/radeon/libradeon.a
  
  AM_CFLAGS += \
         $(LLVM_CFLAGS) \
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c

index b6d03ef37de7c4e2c8dbc3f36fa2b943273e3035..d2c1679796a3bd0ee1a5bdd1b76e4307ee6de8f2 100644 (file)
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -133,6 +133,10 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
                                         S_SQ_CF_WORD1_COND(cf->cond) |
                                         S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
                 break;
+       case CF_NATIVE:
+               bc->bytecode[id++] = cf->isa[0];
+               bc->bytecode[id++] = cf->isa[1];
+               break;
         default:
                 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
                 return -EINVAL;
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c

index 517121dc28808baf420dc569ca0a0b7913162bab..81aedb5c0ac21cb25b7cdcfba000609012eb6b54 100644 (file)
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -796,11 +796,11 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx,
                 alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
                 alpha_ref = fui(state->alpha.ref_value);
         }
+       dsa->sx_alpha_test_control = alpha_test_control & 0xff;
         dsa->alpha_ref = alpha_ref;
  
         /* misc */
         db_render_control = 0;
-       r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control);
         r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control);
         r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control);
         return rstate;
@@ -1428,6 +1428,11 @@ static void evergreen_cb(struct r600_context *rctx, struct r600_pipe_state *rsta
                 blend_bypass = 1;
         }
  
+       if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT)
+               rctx->sx_alpha_test_control |= S_028410_ALPHA_TEST_BYPASS(1);
+       else
+               rctx->sx_alpha_test_control &= C_028410_ALPHA_TEST_BYPASS;
+
         color_info |= S_028C70_FORMAT(format) |
                 S_028C70_COMP_SWAP(swap) |
                 S_028C70_BLEND_CLAMP(blend_clamp) |
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h

index 4009e91d4fc18c24da9f144a4769b06d7e6423ec..105d80f061d00ba6516d774e8ac574a4aa9ce6af 100644 (file)
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -32,20 +32,16 @@
  #define EVERGREEN_CONTEXT_REG_OFFSET                0X00028000
  #define EVERGREEN_CONTEXT_REG_END                   0X00029000
  #define EVERGREEN_RESOURCE_OFFSET                   0x00030000
-#define EVERGREEN_RESOURCE_END                      0x00034000
-#define CAYMAN_RESOURCE_END                         0x00038000
+#define EVERGREEN_RESOURCE_END                      0x00038000
  #define EVERGREEN_LOOP_CONST_OFFSET                 0x0003A200
-#define EVERGREEN_LOOP_CONST_END                    0x0003A26C
+#define EVERGREEN_LOOP_CONST_END                    0x0003A500
  #define EVERGREEN_BOOL_CONST_OFFSET                 0x0003A500
-#define EVERGREEN_BOOL_CONST_END                    0x0003A506
-#define CAYMAN_BOOL_CONST_END                       0x0003A518
+#define EVERGREEN_BOOL_CONST_END                    0x0003A518
  #define EVERGREEN_SAMPLER_OFFSET                    0X0003C000
-#define EVERGREEN_SAMPLER_END                       0X0003CFF0
-#define CAYMAN_SAMPLER_END                          0X0003C600
+#define EVERGREEN_SAMPLER_END                       0X0003C600
  
  #define EVERGREEN_CTL_CONST_OFFSET                  0x0003CFF0
-#define EVERGREEN_CTL_CONST_END                     0x0003E200
-#define CAYMAN_CTL_CONST_END                        0x0003FF0C
+#define EVERGREEN_CTL_CONST_END                     0x0003FF0C
  
  #define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
  #define EVENT_TYPE_ZPASS_DONE                  0x15
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c

index 651933bf37c30e1c33b34e09e8bbe37ca442c322..5a10bd90776f14dd4c85a93a9774f1146adaf496 100644 (file)
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -94,6 +94,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *
                 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT:
                 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
                 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL:
                 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
                 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
                 case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
@@ -153,6 +154,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT:
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT:
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT:
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
@@ -171,6 +173,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode *
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL:
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
                 case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
@@ -1927,6 +1930,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
                         case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
                         case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
                         case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
+                       case CF_NATIVE:
                                 break;
                         default:
                                 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -2025,13 +2029,12 @@ int r600_bytecode_build(struct r600_bytecode *bc)
                                 }
                                 break;
                         case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX:
-                               if (bc->chip_class == CAYMAN) {
-                                       LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
-                                               r = r600_bytecode_vtx_build(bc, vtx, addr);
-                                               if (r)
-                                                       return r;
-                                               addr += 4;
-                                       }
+                               LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+                                       assert(bc->chip_class >= EVERGREEN);
+                                       r = r600_bytecode_vtx_build(bc, vtx, addr);
+                                       if (r)
+                                               return r;
+                                       addr += 4;
                                 }
                                 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
                                         r = r600_bytecode_tex_build(bc, tex, addr);
@@ -2069,6 +2072,8 @@ int r600_bytecode_build(struct r600_bytecode *bc)
                         case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
                         case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
                                 break;
+                       case CF_NATIVE:
+                               break;
                         default:
                                 R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
                                 return -EINVAL;
@@ -2341,6 +2346,10 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
                                 fprintf(stderr, "COND:%X ", cf->cond);
                                 fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
                                 break;
+                       case CF_NATIVE:
+                               fprintf(stderr, "%04d %08X CF NATIVE\n", id, bc->bytecode[id]);
+                               fprintf(stderr, "%04d %08X CF NATIVE\n", id + 1, bc->bytecode[id + 1]);
+                               break;
                         default:
                                 R600_ERR("Unknown instruction %0x\n", cf->inst);
                         }
@@ -2477,7 +2486,8 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
                         if (alu->last) {
                                 for (i = 0; i < nliteral; i++, id++) {
                                         float *f = (float*)(bc->bytecode + id);
-                                       fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f);
+                                       fprintf(stderr, "%04d %08X\t%f (%d)\n", id, bc->bytecode[id], *f,
+                                                       *(bc->bytecode + id));
                                 }
                                 id += nliteral & 1;
                                 nliteral = 0;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h

index 5790ead991f8d1e501453739c5d83f62472b8d2e..a8a157b79e4d74de4142d25d117fb5a75fa2a2ff 100644 (file)
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -135,6 +135,14 @@ struct r600_bytecode_kcache {
         unsigned                        addr;
  };
  
+/* A value of CF_NATIVE in r600_bytecode_cf::inst means that this instruction
+ * has already been encoded, and the encoding has been stored in
+ * r600_bytecode::isa.  This is used by the LLVM backend to emit CF instructions
+ * e.g. RAT_WRITE_* that can't be properly represented by struct
+ * r600_bytecode_cf.
+ */
+#define CF_NATIVE ~0
+
  struct r600_bytecode_cf {
         struct list_head                list;
  
@@ -157,6 +165,7 @@ struct r600_bytecode_cf {
         struct r600_bytecode_alu                *curr_bs_head;
         struct r600_bytecode_alu                *prev_bs_head;
         struct r600_bytecode_alu                *prev2_bs_head;
+       unsigned isa[2];
  };
  
  #define FC_NONE                                0
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c

index d467baf60fb835453182c12e8272407619f0505f..f916604db7bd132ed9f2c84206ff0f15038c2fa6 100644 (file)
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -21,10 +21,44 @@ static LLVMValueRef llvm_fetch_const(
         enum tgsi_opcode_type type,
         unsigned swizzle)
  {
-       return lp_build_intrinsic_unary(bld_base->base.gallivm->builder,
+       LLVMValueRef cval = lp_build_intrinsic_unary(bld_base->base.gallivm->builder,
                 "llvm.AMDGPU.load.const", bld_base->base.elem_type,
                 lp_build_const_int32(bld_base->base.gallivm,
                 radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)));
+
+       return bitcast(bld_base, type, cval);
+}
+
+static void llvm_load_system_value(
+               struct radeon_llvm_context * ctx,
+               unsigned index,
+               const struct tgsi_full_declaration *decl)
+{
+       unsigned chan;
+
+       switch (decl->Semantic.Name) {
+       case TGSI_SEMANTIC_INSTANCEID: chan = 3; break;
+       case TGSI_SEMANTIC_VERTEXID: chan = 0; break;
+       default: assert(!"unknown system value");
+       }
+
+       LLVMValueRef reg = lp_build_const_int32(
+                       ctx->soa.bld_base.base.gallivm, chan);
+       ctx->system_values[index] = lp_build_intrinsic_unary(
+                       ctx->soa.bld_base.base.gallivm->builder,
+                       "llvm.R600.load.input",
+                       ctx->soa.bld_base.base.elem_type, reg);
+}
+
+static LLVMValueRef llvm_fetch_system_value(
+               struct lp_build_tgsi_context * bld_base,
+               const struct tgsi_full_src_register *reg,
+               enum tgsi_opcode_type type,
+               unsigned swizzle)
+{
+       struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+       LLVMValueRef cval = ctx->system_values[reg->Register.Index];
+       return bitcast(bld_base, type, cval);
  }
  
  static void llvm_load_input(
@@ -59,17 +93,13 @@ static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
         for (i = 0; i < ctx->reserved_reg_count; i++) {
                 unsigned chan;
                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-                       LLVMValueRef reg;
                         LLVMValueRef reg_index = lp_build_const_int32(
                                         base->gallivm,
                                         radeon_llvm_reg_index_soa(i, chan));
-                       reg = lp_build_intrinsic_unary(base->gallivm->builder,
-                                               "llvm.AMDGPU.reserve.reg",
-                                               base->elem_type, reg_index);
                         lp_build_intrinsic_unary(base->gallivm->builder,
-                               "llvm.AMDGPU.export.reg",
+                               "llvm.AMDGPU.reserve.reg",
                                 LLVMVoidTypeInContext(base->gallivm->context),
-                               reg);
+                               reg_index);
                 }
         }
  }
@@ -85,7 +115,6 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
                 unsigned chan;
                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
                         LLVMValueRef output;
-                       LLVMValueRef store_output;
                         unsigned adjusted_reg_idx = i +
                                         ctx->reserved_reg_count;
                         LLVMValueRef reg_index = lp_build_const_int32(
@@ -95,16 +124,11 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
                         output = LLVMBuildLoad(base->gallivm->builder,
                                 ctx->soa.outputs[i][chan], "");
  
-                       store_output = lp_build_intrinsic_binary(
+                       lp_build_intrinsic_binary(
                                 base->gallivm->builder,
                                 "llvm.AMDGPU.store.output",
-                               base->elem_type,
-                               output, reg_index);
-
-                       lp_build_intrinsic_unary(base->gallivm->builder,
-                               "llvm.AMDGPU.export.reg",
                                 LLVMVoidTypeInContext(base->gallivm->context),
-                               store_output);
+                               output, reg_index);
                 }
         }
  }
@@ -169,28 +193,7 @@ static struct lp_build_tgsi_action dot_action = {
         .intr_name = "llvm.AMDGPU.dp4"
  };
  
-static void txp_fetch_args(
-       struct lp_build_tgsi_context * bld_base,
-       struct lp_build_emit_data * emit_data)
-{
-       LLVMValueRef src_w;
-       unsigned chan;
-       LLVMValueRef coords[4];
-
-       emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-       src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
  
-       for (chan = 0; chan < 3; chan++ ) {
-               LLVMValueRef arg = lp_build_emit_fetch(bld_base,
-                                               emit_data->inst, 0, chan);
-               coords[chan] = lp_build_emit_llvm_binary(bld_base,
-                                       TGSI_OPCODE_DIV, arg, src_w);
-       }
-       coords[3] = bld_base->base.one;
-       emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
-                                               coords, 4);
-       emit_data->arg_count = 1;
-}
  
  LLVMModuleRef r600_tgsi_llvm(
         struct radeon_llvm_context * ctx,
@@ -204,20 +207,25 @@ LLVMModuleRef r600_tgsi_llvm(
         bld_base->info = &shader_info;
         bld_base->userdata = ctx;
         bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const;
+       bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = llvm_fetch_system_value;
         bld_base->emit_prologue = llvm_emit_prologue;
         bld_base->emit_epilogue = llvm_emit_epilogue;
         ctx->userdata = ctx;
         ctx->load_input = llvm_load_input;
+       ctx->load_system_value = llvm_load_system_value;
  
         bld_base->op_actions[TGSI_OPCODE_DP2] = dot_action;
         bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action;
         bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action;
         bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action;
+       bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex;
+       bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex;
         bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex;
         bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex;
         bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex;
         bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
-       bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
+       bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
+       bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
         bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
  
         lp_build_tgsi_llvm(bld_base, tokens);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h

index 63fc27564d7454e510934588361e88fd4eb75ebc..db455f021ad9951e92b35cddff8f8c6650284328 100644 (file)
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -161,6 +161,7 @@ struct r600_pipe_dsa {
         ubyte                           valuemask[2];
         ubyte                           writemask[2];
         bool                            is_flush;
+       unsigned                        sx_alpha_test_control;
  };
  
  struct r600_vertex_element
@@ -250,6 +251,7 @@ struct r600_context {
         struct pipe_framebuffer_state   framebuffer;
         unsigned                        cb_target_mask;
         unsigned                        fb_cb_shader_mask;
+       unsigned                        sx_alpha_test_control;
         unsigned                        cb_shader_mask;
         unsigned                        cb_color_control;
         unsigned                        pa_sc_line_stipple;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c

index 5e22b35ba48b67982914e2cb79bf865448c4856f..cd78104a010a97df32908cb2d70f792a731dd038 100644 (file)
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -287,6 +287,7 @@ static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu,
  {
         alu->inst = pred_inst; 
         alu->predicate = 1;
+       alu->dst.write = 0;
         alu->src[1].sel = V_SQ_ALU_SRC_0;
         alu->src[1].chan = 0;
         alu->last = 1;
@@ -362,6 +363,10 @@ static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx,
                         tgsi_loop_brk_cont(ctx);
                 }
                 break;
+       case 8:
+               r600_break_from_byte_stream(ctx, &alu,
+                       CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT));
+               break;
         }
  
         return bytes_read;
@@ -401,10 +406,43 @@ static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx,
         return bytes_read;
  }
  
+static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
+       unsigned char * bytes, unsigned bytes_read)
+{
+       struct r600_bytecode_vtx vtx;
+       memset(&vtx, 0, sizeof(vtx));
+       vtx.inst = bytes[bytes_read++];
+       vtx.fetch_type = bytes[bytes_read++];
+       vtx.buffer_id = bytes[bytes_read++];
+       vtx.src_gpr = bytes[bytes_read++];
+       vtx.src_sel_x = bytes[bytes_read++];
+       vtx.mega_fetch_count = bytes[bytes_read++];
+       vtx.dst_gpr = bytes[bytes_read++];
+       vtx.dst_sel_x = bytes[bytes_read++];
+       vtx.dst_sel_y = bytes[bytes_read++];
+       vtx.dst_sel_z = bytes[bytes_read++];
+       vtx.dst_sel_w = bytes[bytes_read++];
+       vtx.use_const_fields = bytes[bytes_read++];
+       vtx.data_format = bytes[bytes_read++];
+       vtx.num_format_all = bytes[bytes_read++];
+       vtx.format_comp_all = bytes[bytes_read++];
+       vtx.srf_mode_all = bytes[bytes_read++];
+       vtx.offset = bytes[bytes_read++];
+       vtx.endian = bytes[bytes_read++];
+
+       if (r600_bytecode_add_vtx(ctx->bc, &vtx)) {
+               fprintf(stderr, "Error adding vtx\n");
+       }
+       /* Use the Texture Cache */
+       ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX;
+       return bytes_read;
+}
+
  static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
                                 unsigned char * bytes,  unsigned num_bytes)
  {
         unsigned bytes_read = 0;
+       unsigned i, byte;
         while (bytes_read < num_bytes) {
                 char inst_type = bytes[bytes_read++];
                 switch (inst_type) {
@@ -420,6 +458,20 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
                         bytes_read = r600_fc_from_byte_stream(ctx, bytes,
                                                                 bytes_read);
                         break;
+               case 3:
+                       r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
+                       for (i = 0; i < 2; i++) {
+                               for (byte = 0 ; byte < 4; byte++) {
+                                       ctx->bc->cf_last->isa[i] |=
+                                       (bytes[bytes_read++] << (byte * 8));
+                               }
+                       }
+                       break;
+
+               case 4:
+                       bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
+                                                               bytes_read);
+                       break;
                 default:
                         /* XXX: Error here */
                         break;
@@ -670,8 +722,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                 ctx->shader->input[i].name = d->Semantic.Name;
                 ctx->shader->input[i].sid = d->Semantic.Index;
                 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
-               ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
-               ctx->shader->input[i].centroid = d->Declaration.Centroid;
+               ctx->shader->input[i].interpolate = d->Interp.Interpolate;
+               ctx->shader->input[i].centroid = d->Interp.Centroid;
                 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
                 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
                         switch (ctx->shader->input[i].name) {
@@ -697,7 +749,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                 ctx->shader->output[i].sid = d->Semantic.Index;
                 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
                 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
-               ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
+               ctx->shader->output[i].interpolate = d->Interp.Interpolate;
                 ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
                 if (ctx->type == TGSI_PROCESSOR_VERTEX) {
                         switch (d->Semantic.Name) {
@@ -5102,7 +5154,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
         {80,                    0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_PUSHA,     0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_POPA,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_CEIL,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_CEIL,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
         {TGSI_OPCODE_I2F,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
         {TGSI_OPCODE_NOT,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
         {TGSI_OPCODE_TRUNC,     0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
@@ -5168,16 +5220,16 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
         {TGSI_OPCODE_CASE,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_DEFAULT,   0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
-       {TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
+       {TGSI_OPCODE_SAMPLE_I,  0, 0, tgsi_unsupported},
+       {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
-       {TGSI_OPCODE_RESINFO,   0, 0, tgsi_unsupported},
+       {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_UARL,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl},
@@ -5276,7 +5328,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
         {80,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_PUSHA,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_POPA,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_CEIL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_CEIL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
         {TGSI_OPCODE_I2F,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans},
         {TGSI_OPCODE_NOT,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
         {TGSI_OPCODE_TRUNC,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
@@ -5342,16 +5394,16 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
         {TGSI_OPCODE_CASE,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_DEFAULT,   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
-       {TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
+       {TGSI_OPCODE_SAMPLE_I,      0, 0, tgsi_unsupported},
+       {TGSI_OPCODE_SAMPLE_I_MS,   0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
-       {TGSI_OPCODE_RESINFO,   0, 0, tgsi_unsupported},
+       {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_UARL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
@@ -5450,7 +5502,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
         {80,                    0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_PUSHA,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_POPA,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_CEIL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_CEIL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2},
         {TGSI_OPCODE_I2F,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2},
         {TGSI_OPCODE_NOT,       0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2},
         {TGSI_OPCODE_TRUNC,     0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
@@ -5516,16 +5568,16 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
         {TGSI_OPCODE_CASE,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_DEFAULT,   0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
         {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_LOAD,      0, 0, tgsi_unsupported},
-       {TGSI_OPCODE_LOAD_MS,   0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE,    0, 0, tgsi_unsupported},
+       {TGSI_OPCODE_SAMPLE_I,      0, 0, tgsi_unsupported},
+       {TGSI_OPCODE_SAMPLE_I_MS,   0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_B,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_C,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_D,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_L,  0, 0, tgsi_unsupported},
         {TGSI_OPCODE_GATHER4,   0, 0, tgsi_unsupported},
-       {TGSI_OPCODE_RESINFO,   0, 0, tgsi_unsupported},
+       {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported},
         {TGSI_OPCODE_UARL,      0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl},
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c

index 3a83b613e587889b18f3ac31d04ad06c73fca178..acf59f80bf49cf80affe97dc000158c64d0df0cd 100644 (file)
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -805,9 +805,9 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
                 alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1);
                 alpha_ref = fui(state->alpha.ref_value);
         }
+       dsa->sx_alpha_test_control = alpha_test_control & 0xff;
         dsa->alpha_ref = alpha_ref;
  
-       r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control);
         r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control);
         return rstate;
  }
@@ -1466,6 +1466,11 @@ static void r600_cb(struct r600_context *rctx, struct r600_pipe_state *rstate,
                 blend_bypass = 1;
         }
  
+       if (ntype == V_0280A0_NUMBER_UINT || ntype == V_0280A0_NUMBER_SINT)
+               rctx->sx_alpha_test_control |= S_028410_ALPHA_TEST_BYPASS(1);
+       else
+               rctx->sx_alpha_test_control &= C_028410_ALPHA_TEST_BYPASS;
+
         color_info |= S_0280A0_FORMAT(format) |
                 S_0280A0_COMP_SWAP(swap) |
                 S_0280A0_BLEND_BYPASS(blend_bypass) |
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c

index ccae7d91d43863075b963830f9fa6cee65633e01..d47383558d9b23b8dcd173f311ec0bbf027e786a 100644 (file)
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -244,6 +244,8 @@ void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
                 return;
         rstate = &dsa->rstate;
         rctx->states[rstate->id] = rstate;
+       rctx->sx_alpha_test_control &= ~0xff;
+       rctx->sx_alpha_test_control |= dsa->sx_alpha_test_control;
         rctx->alpha_ref = dsa->alpha_ref;
         rctx->alpha_ref_dirty = true;
         r600_context_pipe_state_set(rctx, rstate);
@@ -796,6 +798,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
                 r600_pipe_state_add_reg(&rctx->vgt, R_02823C_CB_SHADER_MASK, 0);
                 r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, info.index_bias);
                 r600_pipe_state_add_reg(&rctx->vgt, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, info.restart_index);
+               r600_pipe_state_add_reg(&rctx->vgt, R_028410_SX_ALPHA_TEST_CONTROL, 0);
                 r600_pipe_state_add_reg(&rctx->vgt, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info.primitive_restart);
                 r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
                 r600_pipe_state_add_reg(&rctx->vgt, R_028A0C_PA_SC_LINE_STIPPLE, 0);
@@ -817,6 +820,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
         r600_pipe_state_mod_reg(&rctx->vgt, rctx->cb_shader_mask);
         r600_pipe_state_mod_reg(&rctx->vgt, info.index_bias);
         r600_pipe_state_mod_reg(&rctx->vgt, info.restart_index);
+       r600_pipe_state_mod_reg(&rctx->vgt, rctx->sx_alpha_test_control);
         r600_pipe_state_mod_reg(&rctx->vgt, info.primitive_restart);
         r600_pipe_state_mod_reg(&rctx->vgt, info.start_instance);
  
diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h

index eff002a5eae58a8b7c2005b4c4118b7b5ac02ad1..0f42cb744d3fd42608e4a3c3e3864e0c6b044d1a 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPU.h
+++ b/src/gallium/drivers/radeon/AMDGPU.h
@@ -1,4 +1,4 @@
-//===-- AMDGPU.h - TODO: Add brief description -------===//
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -6,10 +6,6 @@
  // License. See LICENSE.TXT for details.
  //
  //===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
  
  #ifndef AMDGPU_H
  #define AMDGPU_H
@@ -19,29 +15,24 @@
  #include "llvm/Target/TargetMachine.h"
  
  namespace llvm {
-    class FunctionPass;
-    class AMDGPUTargetMachine;
-
-    FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
-    FunctionPass *createR600LowerShaderInstructionsPass(TargetMachine &tm);
-    FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm);
-
-    FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
-    FunctionPass *createSIInitMachineFunctionInfoPass(TargetMachine &tm);
-    FunctionPass *createSILowerShaderInstructionsPass(TargetMachine &tm);
-    FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm);
-    FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
  
-    FunctionPass *createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm);
+class FunctionPass;
+class AMDGPUTargetMachine;
  
-    FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm);
-    FunctionPass *createAMDGPULowerShaderInstructionsPass(TargetMachine &tm);
+// R600 Passes
+FunctionPass* createR600KernelParametersPass(const TargetData* TD);
+FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
+FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm);
  
-    FunctionPass *createAMDGPUDelimitInstGroupsPass(TargetMachine &tm);
+// SI Passes
+FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
+FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm);
+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
  
-    FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+// Passes common to R600 and SI
+FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm);
+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
  
-    FunctionPass *createAMDGPUFixRegClassesPass(TargetMachine &tm);
+} // End namespace llvm
  
-} /* End namespace llvm */
-#endif /* AMDGPU_H */
+#endif // AMDGPU_H
diff --git a/src/gallium/drivers/radeon/AMDGPUConstants.pm b/src/gallium/drivers/radeon/AMDGPUConstants.pm

deleted file mode 100644 (file)

index b64ff49..0000000
--- a/src/gallium/drivers/radeon/AMDGPUConstants.pm
+++ /dev/null
@@ -1,44 +0,0 @@
-#===-- AMDGPUConstants.pm - TODO: Add brief description -------===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===----------------------------------------------------------------------===#
-#
-# TODO: Add full description
-#
-#===----------------------------------------------------------------------===#
-
-package AMDGPUConstants;
-
-use base 'Exporter';
-
-use constant CONST_REG_COUNT => 256;
-use constant TEMP_REG_COUNT => 128;
-
-our @EXPORT = ('TEMP_REG_COUNT', 'CONST_REG_COUNT', 'get_hw_index', 'get_chan_str');
-
-sub get_hw_index {
-  my ($index) = @_;
-  return int($index / 4);
-}
-
-sub get_chan_str {
-  my ($index) = @_;
-  my $chan = $index % 4;
-  if ($chan == 0 )  {
-    return 'X';
-  } elsif ($chan == 1) {
-    return 'Y';
-  } elsif ($chan == 2) {
-    return 'Z';
-  } elsif ($chan == 3) {
-    return 'W';
-  } else {
-    die("Unknown chan value: $chan");
-  }
-}
-
-1;
diff --git a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp

index ce947f8ff7822aea6096741a5fb85d2f481605f6..8e82b8438bbfc54305daaa70e06fea73ddfa0e4d 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp
@@ -34,7 +34,7 @@ namespace {
      virtual bool runOnMachineFunction(MachineFunction &MF);
  
    };
-} /* End anonymous namespace */
+} // End anonymous namespace
  
  char AMDGPUConvertToISAPass::ID = 0;
  
diff --git a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl

index 1fd4fb04b3e14814f99722964e5ece741626c5a7..130eaac72bc31f2b575f88b1e092f62945824331 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl
+++ b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl
@@ -1,15 +1,32 @@
-#===-- AMDGPUGenInstrEnums.pl - TODO: Add brief description -------===#
+#===-- AMDGPUGenInstrEnums.pl - Script for generating instruction enums ----===#
  #
  #                     The LLVM Compiler Infrastructure
  #
  # This file is distributed under the University of Illinois Open Source
  # License. See LICENSE.TXT for details.
  #
-#===----------------------------------------------------------------------===#
+#===-----------------------------------------------------------------------===#
  #
-# TODO: Add full description
+# This perl script is used to generate the following files:
  #
-#===----------------------------------------------------------------------===#
+# 1. perl AMDGPUGenInstrEnums.pl td  > AMDGPUInstrEnums.td
+#
+#    This file contains Tablegen constants used for matching hw instructions
+#    from R600 and SI with functionally similar AMDIL instruction.  It aslo
+#    contains definitions of floating point constants like pi (in hex notation)
+#    that are used in some of the shader patterns.
+#
+# 2. perl AMDGPUGenInstrEnums.pl h   > AMDGPUInstrEnums.h
+#
+#    This file contains cpp enums that match the constant values in
+#    AMDGPUInstrEnums.td
+#
+# 3. perl AMDGPUGenInstrEnums.pl inc > AMDGPUInstrEnums.include
+#
+#    This file contains a function called GetRealAMDILOpcode which maps the
+#    constant values defined in AMDGPUInstrEnums.h to the corresponding AMDIL
+#    instructions.
+#===-----------------------------------------------------------------------===#
  
  use warnings;
  use strict;
@@ -41,7 +58,7 @@ my $FILE_TYPE = $ARGV[0];
  
  open AMDIL, '<', 'AMDILInstructions.td';
  
-my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'CMOVLOG_f32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ');
+my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'CMOVLOG_f32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ', 'BINARY_OR_i32', 'BINARY_NOT_i32');
  
  while (<AMDIL>) {
    if ($_ =~ /defm\s+([A-Z_]+)\s+:\s+([A-Za-z0-9]+)</) {
diff --git a/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl b/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl

deleted file mode 100644 (file)

index 60523a7..0000000
--- a/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl
+++ /dev/null
@@ -1,30 +0,0 @@
-#===-- AMDGPUGenShaderPatterns.pl - TODO: Add brief description -------===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===----------------------------------------------------------------------===#
-#
-# TODO: Add full description
-#
-#===----------------------------------------------------------------------===#
-
-use strict;
-use warnings;
-
-use AMDGPUConstants;
-
-my $reg_prefix = $ARGV[0];
-
-for (my $i = 0; $i < CONST_REG_COUNT * 4; $i++) {
-  my $index = get_hw_index($i);
-  my $chan = get_chan_str($i);
-print <<STRING;
-def : Pat <
-  (int_AMDGPU_load_const $i),
-  (f32 (MOV (f32 $reg_prefix$index\_$chan)))
->;
-STRING
-}
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp

index 2c1052fd8ea44accab12af4ebca7c908253411dc..2bdc8a759f2bff39b923b6bbb3dd65d57f617ba8 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUISelLowering.cpp - TODO: Add brief description -------===//
+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This is the parent TargetLowering class for hardware code gen targets.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h

index 3c5beb1cdae207c8d20a7d9307334d1eddff783b..1b3f71006e208aa9ea9b9d3eb3402ed41ee604fa 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h
+++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUISelLowering.h - TODO: Add brief description -------===//
+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This file contains the interface defintiion of the TargetLowering class
+// that is common to all AMD GPUs.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp

index 4742283f68852b7ae0824ff3540c90a56f603df1..ecd8ac9052631983c610ea3a3d91eda9c8f10660 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp
@@ -108,9 +108,4 @@ unsigned AMDGPUInstrInfo::getISAOpcode(unsigned opcode) const
    }
  }
  
-bool AMDGPUInstrInfo::isRegPreload(const MachineInstr &MI) const
-{
-  return (get(MI.getOpcode()).TSFlags >> AMDGPU_TFLAG_SHIFTS::PRELOAD_REG) & 0x1;
-}
-
  #include "AMDGPUInstrEnums.include"
diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h

index fa009bc6302a890d49efe4cdc9157820af27cfa6..930b41e71916b7e91d301b1ca345209fad70d24e 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
+++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUInstrInfo.h - TODO: Add brief description -------===//
+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This file contains the definitoin of a TargetInstrInfo class that is common
+// to all AMD GPUs.
  //
  //===----------------------------------------------------------------------===//
  
@@ -21,17 +22,17 @@
  
  namespace llvm {
  
-  class AMDGPUTargetMachine;
-  class MachineFunction;
-  class MachineInstr;
-  class MachineInstrBuilder;
+class AMDGPUTargetMachine;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
  
-  class AMDGPUInstrInfo : public AMDILInstrInfo {
-  private:
+class AMDGPUInstrInfo : public AMDILInstrInfo {
+private:
    AMDGPUTargetMachine & TM;
    std::map<unsigned, unsigned> amdilToISA;
  
-  public:
+public:
    explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm);
  
    virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
@@ -41,19 +42,9 @@ namespace llvm {
    virtual MachineInstr * convertToISA(MachineInstr & MI, MachineFunction &MF,
      DebugLoc DL) const;
  
-  bool isRegPreload(const MachineInstr &MI) const;
-
    #include "AMDGPUInstrEnums.h.include"
-  };
+};
  
  } // End llvm namespace
  
-/* AMDGPU target flags are stored in bits 32-39 */
-namespace AMDGPU_TFLAG_SHIFTS {
-  enum TFLAGS {
-    PRELOAD_REG = 32
-  };
-}
-
-
  #endif // AMDGPUINSTRINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td

index 0433c8dcd95d052a9cf643911d0ccd6a00fa4063..f689356e488d6407e8eaeba1882f18672f7107be 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUInstructions.td
+++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td
@@ -1,4 +1,4 @@
-//===-- AMDGPUInstructions.td - TODO: Add brief description -------===//
+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This file contains instruction defs that are common to all hw codegen
+// targets.
  //
  //===----------------------------------------------------------------------===//
  
@@ -16,14 +17,12 @@ include "AMDGPUInstrEnums.td"
  class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
    field bits<16> AMDILOp = 0;
    field bits<3> Gen = 0;
-  field bit PreloadReg = 0;
  
    let Namespace = "AMDIL";
    let OutOperandList = outs;
    let InOperandList = ins;
    let AsmString = asm;
    let Pattern = pattern;
-  let TSFlags{32} = PreloadReg;
    let TSFlags{42-40} = Gen;
    let TSFlags{63-48} = AMDILOp;
  }
@@ -37,42 +36,12 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
  
  let isCodeGenOnly = 1 in {
  
-  def EXPORT_REG : AMDGPUShaderInst <
-    (outs),
-    (ins GPRF32:$src),
-    "EXPORT_REG $src",
-    [(int_AMDGPU_export_reg GPRF32:$src)]
-  >;
-
-  def LOAD_INPUT : AMDGPUShaderInst <
-    (outs GPRF32:$dst),
-    (ins i32imm:$src),
-    "LOAD_INPUT $dst, $src",
-    [] >{
-    let PreloadReg = 1;
-  }
-
    def MASK_WRITE : AMDGPUShaderInst <
      (outs),
      (ins GPRF32:$src),
      "MASK_WRITE $src",
      []
    >;
-
-  def RESERVE_REG : AMDGPUShaderInst <
-    (outs GPRF32:$dst),
-    (ins i32imm:$src),
-    "RESERVE_REG $dst, $src",
-    [(set GPRF32:$dst, (int_AMDGPU_reserve_reg imm:$src))]> {
-    let PreloadReg = 1;
-  }
-
-  def STORE_OUTPUT: AMDGPUShaderInst <
-    (outs GPRF32:$dst),
-    (ins GPRF32:$src0, i32imm:$src1),
-    "STORE_OUTPUT $dst, $src0, $src1",
-    [(set GPRF32:$dst, (int_AMDGPU_store_output GPRF32:$src0, imm:$src1))]
-  >;
  }
  
  /* Generic helper patterns for intrinsics */
diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td

index d2cda0db936147cc8082e7eb24ce618abaab260d..398fd11431f68c5f5fea00f1d1721c4f71af1ea0 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
+++ b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td
@@ -1,4 +1,4 @@
-//===-- AMDGPUIntrinsics.td - TODO: Add brief description -------===//
+//===-- AMDGPUIntrinsics.td - Common intrinsics  -*- tablegen -*-----------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,17 +7,16 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This file defines intrinsics that are used by all hw codegen targets.
  //
  //===----------------------------------------------------------------------===//
  
  let TargetPrefix = "AMDGPU", isTarget = 1 in {
  
-  def int_AMDGPU_export_reg : Intrinsic<[], [llvm_float_ty], []>;
    def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>;
    def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], []>;
-  def int_AMDGPU_reserve_reg : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>;
-  def int_AMDGPU_store_output : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], []>;
+  def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
    def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], []>;
  
    def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], []>;
@@ -26,7 +25,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
    def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
    def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], []>;
    def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
-  def int_AMDGPU_kill : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+  def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
    def int_AMDGPU_kilp : Intrinsic<[], [], []>;
    def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
    def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
@@ -35,7 +34,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
    def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
    def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
    def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
-  def int_AMDGPU_sge : BinaryIntFloat;
+  def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
    def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
    def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
    def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>;
@@ -43,9 +42,18 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
    def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
    def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
    def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
    def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
    def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
    def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>;
+  def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+  def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], []>;
  }
  
  let TargetPrefix = "TGSI", isTarget = 1 in {
diff --git a/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp b/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp

index b49d0dddf653c9b985970779ca1487665fe720b9..2e455fea8aba0ab27bd5d9531ab94584afc22486 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp
+++ b/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPULowerInstructions.cpp - TODO: Add brief description -------===//
+//===-- AMDGPULowerInstructions.cpp - AMDGPU lowering pass ----------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This pass lowers unsupported AMDIL MachineInstrs to LLVM pseudo 
+// MachineInstrs for hw codegen targets.
  //
  //===----------------------------------------------------------------------===//
  
@@ -27,7 +28,7 @@ namespace {
    private:
      static char ID;
      TargetMachine &TM;
-    void lowerVCREATE_v4f32(MachineInstr &MI, MachineBasicBlock::iterator I,
+    void lowerVCREATE_v4(MachineInstr &MI, MachineBasicBlock::iterator I,
                                MachineBasicBlock &MBB, MachineFunction &MF);
  
    public:
@@ -56,8 +57,9 @@ bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
  
        switch (MI.getOpcode()) {
        default: continue;
-      case AMDIL::VCREATE_v4f32: lowerVCREATE_v4f32(MI, I, MBB, MF); break;
-
+      case AMDIL::VCREATE_v4f32:
+      case AMDIL::VCREATE_v4i32:
+        lowerVCREATE_v4(MI, I, MBB, MF); break;
        }
        MI.eraseFromParent();
      }
@@ -65,7 +67,7 @@ bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
    return false;
  }
  
-void AMDGPULowerInstructionsPass::lowerVCREATE_v4f32(MachineInstr &MI,
+void AMDGPULowerInstructionsPass::lowerVCREATE_v4(MachineInstr &MI,
      MachineBasicBlock::iterator I, MachineBasicBlock &MBB, MachineFunction &MF)
  {
    MachineRegisterInfo & MRI = MF.getRegInfo();
diff --git a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp

deleted file mode 100644 (file)

index d33055c..0000000
--- a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- AMDGPULowerShaderInstructions.cpp - TODO: Add brief description -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "AMDGPULowerShaderInstructions.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-using namespace llvm;
-
-void AMDGPULowerShaderInstructionsPass::preloadRegister(MachineFunction * MF,
-    const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg) const
-{
-  if (!MRI->isLiveIn(physReg)) {
-    MRI->addLiveIn(physReg, virtReg);
-    MachineBasicBlock &EntryMBB = MF->front();
-    BuildMI(MF->front(), EntryMBB.begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
-            virtReg)
-            .addReg(physReg);
-  } else {
-    /* We can't mark the same register as preloaded twice, but we still must
-     * associate virtReg with the correct preloaded register. */
-    unsigned newReg = MRI->getLiveInVirtReg(physReg);
-    MRI->replaceRegWith(virtReg, newReg);
-  }
-}
diff --git a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h

deleted file mode 100644 (file)

index 5ee77fa..0000000
--- a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- AMDGPULowerShaderInstructions.h - TODO: Add brief description -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef AMDGPU_LOWER_SHADER_INSTRUCTIONS
-#define AMDGPU_LOWER_SHADER_INSTRUCTIONS
-
-namespace llvm {
-
-class MachineFunction;
-class MachineRegisterInfo;
-class TargetInstrInfo;
-
-class AMDGPULowerShaderInstructionsPass {
-
-  protected:
-    MachineRegisterInfo * MRI;
-    /**
-     * @param physReg The physical register that will be preloaded.
-     * @param virtReg The virtual register that currently holds the
-     *                preloaded value.
-     */
-    void preloadRegister(MachineFunction * MF, const TargetInstrInfo * TII,
-                         unsigned physReg, unsigned virtReg) const;
-};
-
-} // end namespace llvm
-
-
-#endif // AMDGPU_LOWER_SHADER_INSTRUCTIONS
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp

index 162a49116a07a726d02c5eb245e68d8edc4976ee..ad48335fd33ec396468b4704305569f489e4b961 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPURegisterInfo.cpp - TODO: Add brief description -------===//
+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// Parent TargetRegisterInfo class common to all hw codegen targets.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h

index f4492e9795d1de191a54c7055661d5348065c843..d545c06f69e7a27bd7c4db3c410c19d716325884 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h
+++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h
@@ -1,4 +1,4 @@
-//===-- AMDGPURegisterInfo.h - TODO: Add brief description -------===//
+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This file contains the TargetRegisterInfo interface that is implemented
+// by all hw codegen targets.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td

index 173d6622569f5f0181a2c5f0eaa7eb11b4ba0150..1707903ae7e1731061db485b5e6faedd85bf7342 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td
+++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td
@@ -1,4 +1,4 @@
-//===-- AMDGPURegisterInfo.td - TODO: Add brief description -------===//
+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// Tablegen register definitions common to all hw codegen targets.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp b/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp

deleted file mode 100644 (file)

index c923f19..0000000
--- a/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- AMDGPUReorderPreloadInstructions.cpp - TODO: Add brief description -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDIL.h"
-#include "AMDILInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Function.h"
-
-using namespace llvm;
-
-namespace {
-  class AMDGPUReorderPreloadInstructionsPass : public MachineFunctionPass {
-
-  private:
-    static char ID;
-    TargetMachine &TM;
-
-  public:
-    AMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) :
-      MachineFunctionPass(ID), TM(tm) { }
-
-      bool runOnMachineFunction(MachineFunction &MF);
-
-      const char *getPassName() const { return "AMDGPU Reorder Preload Instructions"; }
-    };
-} /* End anonymous namespace */
-
-char AMDGPUReorderPreloadInstructionsPass::ID = 0;
-
-FunctionPass *llvm::createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) {
-    return new AMDGPUReorderPreloadInstructionsPass(tm);
-}
-
-/* This pass moves instructions that represent preloaded registers to the
- * start of the program. */
-bool AMDGPUReorderPreloadInstructionsPass::runOnMachineFunction(MachineFunction &MF)
-{
-  const AMDGPUInstrInfo * TII =
-                        static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
-
-  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-                                                  BB != BB_E; ++BB) {
-    MachineBasicBlock &MBB = *BB;
-    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
-         I != MBB.end(); I = Next, Next = llvm::next(I) ) {
-      MachineInstr &MI = *I;
-      if (TII->isRegPreload(MI)) {
-         MF.front().insert(MF.front().begin(), MI.removeFromParent());
-      }
-    }
-  }
-  return false;
-}
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp

index 313349ce01b332cfc31a35030d31386e2da82503..c1c21abc9c11186906295dbc6f2ba493c9bc053c 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUTargetMachine.cpp - TODO: Add brief description -------===//
+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// The AMDGPU target machine contains all of the hardware specific information
+// needed to emit code for R600 and SI GPUs.
  //
  //===----------------------------------------------------------------------===//
  
@@ -16,7 +17,6 @@
  #include "AMDILTargetMachine.h"
  #include "R600ISelLowering.h"
  #include "R600InstrInfo.h"
-#include "R600KernelParameters.h"
  #include "SIISelLowering.h"
  #include "SIInstrInfo.h"
  #include "llvm/Analysis/Passes.h"
@@ -112,31 +112,28 @@ AMDGPUPassConfig::addPreISel()
  {
    const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
    if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
-    PM.add(createR600KernelParametersPass(
+    PM->add(createR600KernelParametersPass(
                       getAMDGPUTargetMachine().getTargetData()));
    }
    return false;
  }
  
  bool AMDGPUPassConfig::addInstSelector() {
-  PM.add(createAMDILPeepholeOpt(*TM));
-  PM.add(createAMDILISelDag(getAMDGPUTargetMachine()));
+  PM->add(createAMDILPeepholeOpt(*TM));
+  PM->add(createAMDILISelDag(getAMDGPUTargetMachine()));
    return false;
  }
  
  bool AMDGPUPassConfig::addPreRegAlloc() {
    const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
  
-  PM.add(createAMDGPUReorderPreloadInstructionsPass(*TM));
    if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
-    PM.add(createR600LowerShaderInstructionsPass(*TM));
-    PM.add(createR600LowerInstructionsPass(*TM));
+    PM->add(createR600LowerInstructionsPass(*TM));
    } else {
-    PM.add(createSILowerShaderInstructionsPass(*TM));
-    PM.add(createSIAssignInterpRegsPass(*TM));
+    PM->add(createSIAssignInterpRegsPass(*TM));
    }
-  PM.add(createAMDGPULowerInstructionsPass(*TM));
-  PM.add(createAMDGPUConvertToISAPass(*TM));
+  PM->add(createAMDGPULowerInstructionsPass(*TM));
+  PM->add(createAMDGPUConvertToISAPass(*TM));
    return false;
  }
  
@@ -150,10 +147,10 @@ bool AMDGPUPassConfig::addPreSched2() {
  
  bool AMDGPUPassConfig::addPreEmitPass() {
    const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
-  PM.add(createAMDILCFGPreparationPass(*TM));
-  PM.add(createAMDILCFGStructurizerPass(*TM));
+  PM->add(createAMDILCFGPreparationPass(*TM));
+  PM->add(createAMDILCFGStructurizerPass(*TM));
    if (ST.device()->getGeneration() == AMDILDeviceInfo::HD7XXX) {
-    PM.add(createSIPropagateImmReadsPass(*TM));
+    PM->add(createSIPropagateImmReadsPass(*TM));
    }
  
    return false;
diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h

index d4165b09e84ee0b0f8c1225dae88c105fe4edb51..2428fe638a731e14078ec162be4587b2b51b3657 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
+++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUTargetMachine.h - TODO: Add brief description -------===//
+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+//  The AMDGPU TargetMachine interface definition for hw codgen targets.
  //
  //===----------------------------------------------------------------------===//
  
@@ -52,9 +52,6 @@ public:
                                                formatted_raw_ostream &Out,
                                                CodeGenFileType FileType,
                                                bool DisableVerify);
-public:
-   void dumpCode() { mDump = true; }
-   bool shouldDumpCode() const { return mDump; }
  };
  
  } /* End namespace llvm */
diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.cpp b/src/gallium/drivers/radeon/AMDGPUUtil.cpp

index a5045436ab4d107e3e4e95e64e30e5ba599e0db7..bd8f5eef697217e32add6fcbe85745c5c8eb11f2 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUUtil.cpp
+++ b/src/gallium/drivers/radeon/AMDGPUUtil.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUUtil.cpp - TODO: Add brief description -------===//
+//===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,39 +7,39 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// Common utility functions used by hw codegen targets
  //
  //===----------------------------------------------------------------------===//
  
  #include "AMDGPUUtil.h"
  #include "AMDGPURegisterInfo.h"
  #include "AMDIL.h"
+#include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
  #include "llvm/Target/TargetInstrInfo.h"
  #include "llvm/Target/TargetMachine.h"
  #include "llvm/Target/TargetRegisterInfo.h"
  
  using namespace llvm;
  
-/* Some instructions act as place holders to emulate operations that the GPU
- * hardware does automatically. This function can be used to check if
- * an opcode falls into this category. */
-bool llvm::isPlaceHolderOpcode(unsigned opcode)
+// Some instructions act as place holders to emulate operations that the GPU
+// hardware does automatically. This function can be used to check if
+// an opcode falls into this category.
+bool AMDGPU::isPlaceHolderOpcode(unsigned opcode)
  {
    switch (opcode) {
    default: return false;
-  case AMDIL::EXPORT_REG:
    case AMDIL::RETURN:
    case AMDIL::LOAD_INPUT:
    case AMDIL::LAST:
+  case AMDIL::MASK_WRITE:
    case AMDIL::RESERVE_REG:
      return true;
    }
  }
  
-bool llvm::isTransOp(unsigned opcode)
+bool AMDGPU::isTransOp(unsigned opcode)
  {
    switch(opcode) {
      default: return false;
@@ -67,10 +67,12 @@ bool llvm::isTransOp(unsigned opcode)
    }
  }
  
-bool llvm::isTexOp(unsigned opcode)
+bool AMDGPU::isTexOp(unsigned opcode)
  {
    switch(opcode) {
    default: return false;
+  case AMDIL::TEX_LD:
+  case AMDIL::TEX_GET_TEXTURE_RESINFO:
    case AMDIL::TEX_SAMPLE:
    case AMDIL::TEX_SAMPLE_C:
    case AMDIL::TEX_SAMPLE_L:
@@ -79,11 +81,13 @@ bool llvm::isTexOp(unsigned opcode)
    case AMDIL::TEX_SAMPLE_C_LB:
    case AMDIL::TEX_SAMPLE_G:
    case AMDIL::TEX_SAMPLE_C_G:
+  case AMDIL::TEX_GET_GRADIENTS_H:
+  case AMDIL::TEX_GET_GRADIENTS_V:
      return true;
    }
  }
  
-bool llvm::isReductionOp(unsigned opcode)
+bool AMDGPU::isReductionOp(unsigned opcode)
  {
    switch(opcode) {
      default: return false;
@@ -93,13 +97,25 @@ bool llvm::isReductionOp(unsigned opcode)
    }
  }
  
-bool llvm::isFCOp(unsigned opcode)
+bool AMDGPU::isCubeOp(unsigned opcode)
+{
+  switch(opcode) {
+    default: return false;
+    case AMDIL::CUBE_r600:
+    case AMDIL::CUBE_eg:
+      return true;
+  }
+}
+
+
+bool AMDGPU::isFCOp(unsigned opcode)
  {
    switch(opcode) {
    default: return false;
    case AMDIL::BREAK_LOGICALZ_f32:
    case AMDIL::BREAK_LOGICALNZ_i32:
    case AMDIL::BREAK_LOGICALZ_i32:
+  case AMDIL::BREAK_LOGICALNZ_f32:
    case AMDIL::CONTINUE_LOGICALNZ_f32:
    case AMDIL::IF_LOGICALNZ_i32:
    case AMDIL::IF_LOGICALZ_f32:
@@ -112,11 +128,14 @@ bool llvm::isFCOp(unsigned opcode)
    }
  }
  
-void AMDGPU::utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
-    const struct TargetInstrInfo * TII, unsigned physReg, unsigned virtReg)
+void AMDGPU::utilAddLiveIn(llvm::MachineFunction * MF,
+                                                                                                        llvm::MachineRegisterInfo & MRI,
+                                                                                                        const struct llvm::TargetInstrInfo * TII,
+                                                                                                        unsigned physReg, unsigned virtReg)
  {
      if (!MRI.isLiveIn(physReg)) {
        MRI.addLiveIn(physReg, virtReg);
+      MF->front().addLiveIn(physReg);
        BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
                             TII->get(TargetOpcode::COPY), virtReg)
              .addReg(physReg);
diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.h b/src/gallium/drivers/radeon/AMDGPUUtil.h

index 299146e1ba771d3247fb75996e3583bda1e5b0d6..15f2ce57af96ee4821567679da630b56d6ec1e91 100644 (file)
--- a/src/gallium/drivers/radeon/AMDGPUUtil.h
+++ b/src/gallium/drivers/radeon/AMDGPUUtil.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUUtil.h - TODO: Add brief description -------===//
+//===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,43 +7,40 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// Declarations for utility functions common to all hw codegen targets.
  //
  //===----------------------------------------------------------------------===//
  
  #ifndef AMDGPU_UTIL_H
  #define AMDGPU_UTIL_H
  
-#include "AMDGPURegisterInfo.h"
-#include "llvm/Support/DataTypes.h"
-
  namespace llvm {
  
-class AMDILMachineFunctionInfo;
+class MachineFunction;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+
+}
  
-class TargetMachine;
-class TargetRegisterInfo;
+namespace AMDGPU {
  
  bool isPlaceHolderOpcode(unsigned opcode);
  
  bool isTransOp(unsigned opcode);
  bool isTexOp(unsigned opcode);
  bool isReductionOp(unsigned opcode);
+bool isCubeOp(unsigned opcode);
  bool isFCOp(unsigned opcode);
  
-/* XXX: Move these to AMDGPUInstrInfo.h */
+// XXX: Move these to AMDGPUInstrInfo.h
  #define MO_FLAG_CLAMP (1 << 0)
  #define MO_FLAG_NEG   (1 << 1)
  #define MO_FLAG_ABS   (1 << 2)
  #define MO_FLAG_MASK  (1 << 3)
  
-} /* End namespace llvm */
-
-namespace AMDGPU {
-
  void utilAddLiveIn(llvm::MachineFunction * MF, llvm::MachineRegisterInfo & MRI,
      const struct llvm::TargetInstrInfo * TII, unsigned physReg, unsigned virtReg);
  
  } // End namespace AMDGPU
  
-#endif /* AMDGPU_UTIL_H */
+#endif // AMDGPU_UTIL_H
diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h

index 317ea124f66076c405c5354f7b04615a560fbd92..6759ccd95278b178f8794628958c669d7c307425 100644 (file)
--- a/src/gallium/drivers/radeon/AMDIL.h
+++ b/src/gallium/drivers/radeon/AMDIL.h
@@ -137,11 +137,6 @@ enum AddressSpaces {
    LAST_ADDRESS     = 8
  };
  
-// We are piggybacking on the CommentFlag enum in MachineInstr.h to
-// set bits in AsmPrinterFlags of the MachineInstruction. We will
-// start at bit 16 and allocate down while LLVM will start at bit
-// 1 and allocate up.
-
  // This union/struct combination is an easy way to read out the
  // exact bits that are needed.
  typedef union ResourceRec {
@@ -181,26 +176,6 @@ typedef union ResourceRec {
  
  } // namespace AMDILAS
  
-// The OpSwizzle encodes a subset of all possible
-// swizzle combinations into a number of bits using
-// only the combinations utilized by the backend.
-// The lower 128 are for source swizzles and the
-// upper 128 or for destination swizzles.
-// The valid mappings can be found in the
-// getSrcSwizzle and getDstSwizzle functions of
-// AMDILUtilityFunctions.cpp.
-typedef union SwizzleRec {
-  struct {
-#ifdef __BIG_ENDIAN__
-    unsigned char dst : 1;
-    unsigned char swizzle : 7;
-#else
-    unsigned char swizzle : 7;
-    unsigned char dst : 1;
-#endif
-  } bits;
-  unsigned char u8all;
-} OpSwizzle;
  // Enums corresponding to AMDIL condition codes for IL.  These
  // values must be kept in sync with the ones in the .td file.
  namespace AMDILCC {
diff --git a/src/gallium/drivers/radeon/AMDIL.td b/src/gallium/drivers/radeon/AMDIL.td

index 9bcccac241173c03a334f095f8e68c07be3cef76..deee290fad55bf02f6ccdbaecb7f928bb3955137 100644 (file)
--- a/src/gallium/drivers/radeon/AMDIL.td
+++ b/src/gallium/drivers/radeon/AMDIL.td
@@ -1,4 +1,4 @@
-//===-- AMDIL.td - TODO: Add brief description -------===//
+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp

index 6625dd77d5f356f35f0d9720164bc46a5cc3a420..d7c96573a1597fdb5bafe4d9740a73301ede9b15 100644 (file)
--- a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
+++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp
@@ -1,4 +1,4 @@
-//===-- AMDIL7XXDevice.cpp - TODO: Add brief description -------===//
+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td

index 2706b211f2d82161dbf4d2b2f94b88c689e89870..31ebed31d726f2ae33148af7c81b1102cb85f0be 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILBase.td
+++ b/src/gallium/drivers/radeon/AMDILBase.td
@@ -60,6 +60,11 @@ def FeatureDebug : SubtargetFeature<"debug",
          "CapsOverride[AMDILDeviceInfo::Debug]",
          "true",
          "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+        "mDumpCode",
+        "true",
+        "Dump MachineInstrs in the CodeEmitter">;
+
  
  //===----------------------------------------------------------------------===//
  // Register File, Calling Conv, Instruction Descriptions
diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp

index 289af6f210ece76726486a2efd427506ed5d9083..cdcd5e898802393c6d8176b060d0e6f3dfcd9a65 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
+++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp
@@ -7,22 +7,22 @@
  //
  //==-----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "structcfg"
-#ifdef DEBUG
-#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-#else
  #define DEBUGME 0
-#endif
+#define DEBUG_TYPE "structcfg"
  
  #include "AMDILTargetMachine.h"
  #include "AMDILUtilityFunctions.h"
  #include "llvm/ADT/SCCIterator.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
  #include "llvm/CodeGen/MachineDominators.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineFunctionAnalysis.h"
  #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
  #include "llvm/CodeGen/MachineJumpTableInfo.h"
  #include "llvm/CodeGen/MachineLoopInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -30,8 +30,6 @@
  #define FirstNonDebugInstr(A) A->begin()
  using namespace llvm;
  
-// bixia TODO: move this out to analysis lib. Make this work for both target
-// AMDIL and CBackend.
  // TODO: move-begin.
  
  //===----------------------------------------------------------------------===//
@@ -109,23 +107,6 @@ void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
  //
  //===----------------------------------------------------------------------===//
  
-#include "AMDILTargetMachine.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
  namespace llvm {
  
  /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
@@ -3156,10 +3137,6 @@ struct CFGStructTraits<AMDILCFGStructurizer>
           iterEnd = srcBlk->end();
           iter != iterEnd; ++iter) {
        MachineInstr *instr = func->CloneMachineInstr(iter);
-      // This is a workaround for LLVM bugzilla 8420 because CloneMachineInstr
-      // does not clone the AsmPrinterFlags.
-      instr->setAsmPrinterFlag(
-         (llvm::MachineInstr::CommentFlag)iter->getAsmPrinterFlags());
        newBlk->push_back(instr);
      }
      return newBlk;
diff --git a/src/gallium/drivers/radeon/AMDILCodeEmitter.h b/src/gallium/drivers/radeon/AMDILCodeEmitter.h

index b0ea1455cf924f015e0214fa3ebb5e839968b780..fa46cbd203d0db1c75e6caf78df054461e11af5d 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILCodeEmitter.h
+++ b/src/gallium/drivers/radeon/AMDILCodeEmitter.h
@@ -1,23 +1,21 @@
-//                     The LLVM Compiler Infrastructure
+//===-- AMDILCodeEmitter.h - AMDIL Code Emitter interface -----------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
  // This file is distributed under the University of Illinois Open Source
  // License. See LICENSE.TXT for details.
  //
-//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
-//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
-//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===//
+//===----------------------------------------------------------------------===//
  //
-//                     The LLVM Compiler Infrastructure
+// CodeEmitter interface for R600 and SI codegen.
  //
+//===----------------------------------------------------------------------===//
  
  #ifndef AMDILCODEEMITTER_H
  #define AMDILCODEEMITTER_H
  
  namespace llvm {
  
-  /* XXX: Temp HACK to work around tablegen name generation */
    class AMDILCodeEmitter {
    public:
      uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
diff --git a/src/gallium/drivers/radeon/AMDILConversions.td b/src/gallium/drivers/radeon/AMDILConversions.td

index 0db66ae84757baed27cda20f37f39c97ae848a3c..1bc5e4ddf37c49010debe4722095922ea39c2da5 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILConversions.td
+++ b/src/gallium/drivers/radeon/AMDILConversions.td
@@ -1,4 +1,4 @@
-//===-- AMDILConversions.td - TODO: Add brief description -------===//
+//==- AMDILConversions.td - Type conversion tablegen patterns -*-tablegen -*-=//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDILDevice.cpp b/src/gallium/drivers/radeon/AMDILDevice.cpp

index aa6d8af70124d25d006e604f4fe5b02e3595c84b..4294a8bef0c6cd38f516749cf8d69a302cc4d801 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILDevice.cpp
+++ b/src/gallium/drivers/radeon/AMDILDevice.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILDevice.cpp - TODO: Add brief description -------===//
+//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp

index 89b8312c294228bf6ca7033f9444499016f4fc91..cbf5b512471d8bbb720325e2e0b14020e832c1f8 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILDeviceInfo.cpp - TODO: Add brief description -------===//
+//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -6,11 +6,16 @@
  // License. See LICENSE.TXT for details.
  //
  //==-----------------------------------------------------------------------===//
+//
+// Function that creates DeviceInfo from a device name and other information.
+//
+//==-----------------------------------------------------------------------===//
  #include "AMDILDevices.h"
  #include "AMDILSubtarget.h"
  
  using namespace llvm;
  namespace llvm {
+namespace AMDILDeviceInfo {
      AMDILDevice*
  getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
  {
@@ -84,4 +89,5 @@ getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64b
          return new AMDIL7XXDevice(ptr);
      }
  }
-}
+} // End namespace AMDILDeviceInfo
+} // End namespace llvm
diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.h b/src/gallium/drivers/radeon/AMDILDeviceInfo.h

index c4acf9145ae16d368a1bb0c50e151e45bf9f4317..06ac4322d0fb41bc729e7c07cfb83045ba31bb49 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILDeviceInfo.h
+++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.h
@@ -1,4 +1,4 @@
-//===-- AMDILDeviceInfo.h - TODO: Add brief description -------===//
+//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -82,8 +82,8 @@ namespace llvm
      };
  
  
+  AMDILDevice*
+    getDeviceFromName(const std::string &name, AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
    } // namespace AMDILDeviceInfo
-  llvm::AMDILDevice*
-    getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
  } // namespace llvm
  #endif // _AMDILDEVICEINFO_H_
diff --git a/src/gallium/drivers/radeon/AMDILDevices.h b/src/gallium/drivers/radeon/AMDILDevices.h

index 3fc5fa05669087268132780ee43fe464f9347fcd..cfcc3304b4b9619de03cdb566591b9ca3067ca49 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILDevices.h
+++ b/src/gallium/drivers/radeon/AMDILDevices.h
@@ -1,4 +1,4 @@
-//===-- AMDILDevices.h - TODO: Add brief description -------===//
+//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td

index 445fd608bbb0fb951324e7e77fe5f09a5464363b..f10936b8c6cf6f59b1c832fb6db2bbe5ab8deb85 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td
+++ b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td
@@ -1,4 +1,4 @@
-//===-- AMDILEnumeratedTypes.td - TODO: Add brief description -------===//
+//===-- AMDILEnumeratedTypes.td - IL Type definitions --*- tablegen -*-----===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp

index 7b5c52345d2658cc9b7b91317b32d8b7ca0905b0..779b2d3df2f9571927adb61f233c8471f185c4ed 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
+++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILEvergreenDevice.cpp - TODO: Add brief description -------===//
+//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp

index ff04d9d55bf862457700769d724769444e4a9008..b8898828dd62b287b46745c608257a412c2b6ea7 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
+++ b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp
@@ -13,9 +13,12 @@
  #include "AMDILDevices.h"
  #include "AMDILTargetMachine.h"
  #include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/ValueMap.h"
  #include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/CodeGen/SelectionDAGISel.h"
  #include "llvm/Support/Compiler.h"
+#include <list>
+#include <queue>
  
  using namespace llvm;
  
@@ -35,13 +38,21 @@ class AMDILDAGToDAGISel : public SelectionDAGISel {
  public:
    AMDILDAGToDAGISel(AMDILTargetMachine &TM AMDIL_OPT_LEVEL_DECL);
    virtual ~AMDILDAGToDAGISel();
-  inline SDValue getSmallIPtrImm(unsigned Imm);
  
    SDNode *Select(SDNode *N);
+  virtual const char *getPassName() const;
+
+private:
+  inline SDValue getSmallIPtrImm(unsigned Imm);
+
    // Complex pattern selectors
    bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
    bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
    bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+
+  static bool checkType(const Value *ptr, unsigned int addrspace);
+  static const Value *getBasePointerValue(const Value *V);
+
    static bool isGlobalStore(const StoreSDNode *N);
    static bool isPrivateStore(const StoreSDNode *N);
    static bool isLocalStore(const StoreSDNode *N);
@@ -54,8 +65,6 @@ public:
    static bool isLocalLoad(const LoadSDNode *N);
    static bool isRegionLoad(const LoadSDNode *N);
  
-  virtual const char *getPassName() const;
-private:
    SDNode *xformAtomicInst(SDNode *N);
  
    // Include the pieces autogenerated from the target description.
@@ -165,26 +174,75 @@ SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
    return SelectCode(N);
  }
  
+bool AMDILDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
+  if (!ptr) {
+    return false;
+  }
+  Type *ptrType = ptr->getType();
+  return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+}
+
+const Value * AMDILDAGToDAGISel::getBasePointerValue(const Value *V)
+{
+  if (!V) {
+    return NULL;
+  }
+  const Value *ret = NULL;
+  ValueMap<const Value *, bool> ValueBitMap;
+  std::queue<const Value *, std::list<const Value *> > ValueQueue;
+  ValueQueue.push(V);
+  while (!ValueQueue.empty()) {
+    V = ValueQueue.front();
+    if (ValueBitMap.find(V) == ValueBitMap.end()) {
+      ValueBitMap[V] = true;
+      if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
+        ret = V;
+        break;
+      } else if (dyn_cast<GlobalVariable>(V)) {
+        ret = V;
+        break;
+      } else if (dyn_cast<Constant>(V)) {
+        const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
+        if (CE) {
+          ValueQueue.push(CE->getOperand(0));
+        }
+      } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+        ret = AI;
+        break;
+      } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
+        uint32_t numOps = I->getNumOperands();
+        for (uint32_t x = 0; x < numOps; ++x) {
+          ValueQueue.push(I->getOperand(x));
+        }
+      } else {
+        // assert(0 && "Found a Value that we didn't know how to handle!");
+      }
+    }
+    ValueQueue.pop();
+  }
+  return ret;
+}
+
  bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
-  return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+  return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
  }
  
  bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
-  return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
-          && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
-          && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
+  return (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+          && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+          && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
  }
  
  bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
-  return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+  return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
  }
  
  bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
-  return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+  return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
  }
  
  bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
-  if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
+  if (checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
      return true;
    }
    MachineMemOperand *MMO = N->getMemOperand();
@@ -195,27 +253,27 @@ bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
        && ((V && dyn_cast<GlobalValue>(V))
            || (BV && dyn_cast<GlobalValue>(
                          getBasePointerValue(MMO->getValue()))))) {
-    return check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
+    return checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
    } else {
      return false;
    }
  }
  
  bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
-  return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+  return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
  }
  
  bool AMDILDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) {
-  return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+  return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
  }
  
  bool AMDILDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) {
-  return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+  return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
  }
  
  bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
    MachineMemOperand *MMO = N->getMemOperand();
-  if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+  if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
      if (MMO) {
        const Value *V = MMO->getValue();
        const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
@@ -228,19 +286,19 @@ bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
  }
  
  bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
-  if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+  if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
      // Check to make sure we are not a constant pool load or a constant load
      // that is marked as a private load
      if (isCPLoad(N) || isConstantLoad(N, -1)) {
        return false;
      }
    }
-  if (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
-      && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
-      && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
-      && !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
-      && !check_type(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
-      && !check_type(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
+  if (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+      && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+      && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
+      && !checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+      && !checkType(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
+      && !checkType(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
    {
      return true;
    }
diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp

index 54c6ea65065ab5cc52d821db8c5e79e66e5d2121..19b12fcf72b73de39aeb65394c298b65d987348d 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp
+++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp
@@ -623,6 +623,48 @@ translateToOpcode(uint64_t CCCode, unsigned int regClass)
    assert(0 && "Unknown opcode retrieved");
    return 0;
  }
+
+/// Helper function used by LowerFormalArguments
+static const TargetRegisterClass*
+getRegClassFromType(unsigned int type) {
+  switch (type) {
+  default:
+    assert(0 && "Passed in type does not match any register classes.");
+  case MVT::i8:
+    return &AMDIL::GPRI8RegClass;
+  case MVT::i16:
+    return &AMDIL::GPRI16RegClass;
+  case MVT::i32:
+    return &AMDIL::GPRI32RegClass;
+  case MVT::f32:
+    return &AMDIL::GPRF32RegClass;
+  case MVT::i64:
+    return &AMDIL::GPRI64RegClass;
+  case MVT::f64:
+    return &AMDIL::GPRF64RegClass;
+  case MVT::v4f32:
+    return &AMDIL::GPRV4F32RegClass;
+  case MVT::v4i8:
+    return &AMDIL::GPRV4I8RegClass;
+  case MVT::v4i16:
+    return &AMDIL::GPRV4I16RegClass;
+  case MVT::v4i32:
+    return &AMDIL::GPRV4I32RegClass;
+  case MVT::v2f32:
+    return &AMDIL::GPRV2F32RegClass;
+  case MVT::v2i8:
+    return &AMDIL::GPRV2I8RegClass;
+  case MVT::v2i16:
+    return &AMDIL::GPRV2I16RegClass;
+  case MVT::v2i32:
+    return &AMDIL::GPRV2I32RegClass;
+  case MVT::v2f64:
+    return &AMDIL::GPRV2F64RegClass;
+  case MVT::v2i64:
+    return &AMDIL::GPRV2I64RegClass;
+  }
+}
+
  SDValue
  AMDILTargetLowering::LowerMemArgument(
      SDValue Chain,
@@ -2189,6 +2231,7 @@ AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
    SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
    return Result;
  }
+
  /// LowerFORMAL_ARGUMENTS - transform physical registers into
  /// virtual registers and generate load operations for
  /// arguments places on the stack.
@@ -3191,7 +3234,7 @@ AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
      amdtm = reinterpret_cast<const AMDILTargetMachine*>
      (&this->getTargetMachine());
    const AMDILSubtarget*
-    stm = dynamic_cast<const AMDILSubtarget*>(
+    stm = static_cast<const AMDILSubtarget*>(
          amdtm->getSubtargetImpl());
    if (RST == MVT::f64 && RHSVT.isVector()
        && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
@@ -3248,7 +3291,7 @@ AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
      amdtm = reinterpret_cast<const AMDILTargetMachine*>
      (&this->getTargetMachine());
    const AMDILSubtarget*
-    stm = dynamic_cast<const AMDILSubtarget*>(
+    stm = static_cast<const AMDILSubtarget*>(
          amdtm->getSubtargetImpl());
    if (RST == MVT::f64 && RHSVT.isVector()
        && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
@@ -3314,7 +3357,7 @@ AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
      amdtm = reinterpret_cast<const AMDILTargetMachine*>
      (&this->getTargetMachine());
    const AMDILSubtarget*
-    stm = dynamic_cast<const AMDILSubtarget*>(
+    stm = static_cast<const AMDILSubtarget*>(
          amdtm->getSubtargetImpl());
    if (stm->calVersion() >= CAL_VERSION_SC_135) {
      // unsigned x = RHS;
@@ -3489,7 +3532,7 @@ AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
      amdtm = reinterpret_cast<const AMDILTargetMachine*>
      (&this->getTargetMachine());
    const AMDILSubtarget*
-    stm = dynamic_cast<const AMDILSubtarget*>(
+    stm = static_cast<const AMDILSubtarget*>(
          amdtm->getSubtargetImpl());
    if (LST == MVT::f64 && LHSVT.isVector()
        && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
@@ -3543,7 +3586,7 @@ AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
      amdtm = reinterpret_cast<const AMDILTargetMachine*>
      (&this->getTargetMachine());
    const AMDILSubtarget*
-    stm = dynamic_cast<const AMDILSubtarget*>(
+    stm = static_cast<const AMDILSubtarget*>(
          amdtm->getSubtargetImpl());
    if (LST == MVT::f64 && LHSVT.isVector()
        && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
@@ -3843,7 +3886,6 @@ SDValue
  AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
  {
    EVT VT = Op.getValueType();
-  //printSDValue(Op, 1);
    SDValue Nodes1;
    SDValue second;
    SDValue third;
@@ -3965,7 +4007,6 @@ AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
      SelectionDAG &DAG) const
  {
    EVT VT = Op.getValueType();
-  //printSDValue(Op, 1);
    const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
    uint64_t swizzleNum = 0;
    DebugLoc DL = Op.getDebugLoc();
@@ -4782,7 +4823,7 @@ uint32_t
  AMDILTargetLowering::genVReg(uint32_t regType) const
  {
    return mBB->getParent()->getRegInfo().createVirtualRegister(
-      getRegClassFromID(regType));
+      getTargetMachine().getRegisterInfo()->getRegClass(regType));
  }
  
  MachineInstrBuilder
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.cpp b/src/gallium/drivers/radeon/AMDILInstrInfo.cpp

index fbc3e45b357abcbddb45deb04d8e29e1b850bd7f..cd2fb48209ca7bf7b9044b68ed44d6b119463798 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDILInstrInfo.cpp
@@ -10,13 +10,10 @@
  // This file contains the AMDIL implementation of the TargetInstrInfo class.
  //
  //===----------------------------------------------------------------------===//
-#include "AMDILInstrInfo.h"
-#include "AMDILUtilityFunctions.h"
-
-#define GET_INSTRINFO_CTOR
-#include "AMDILGenInstrInfo.inc"
  
  #include "AMDILInstrInfo.h"
+#include "AMDIL.h"
+#include "AMDILISelLowering.h"
  #include "AMDILUtilityFunctions.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -24,6 +21,9 @@
  #include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/Instructions.h"
  
+#define GET_INSTRINFO_CTOR
+#include "AMDILGenInstrInfo.inc"
+
  using namespace llvm;
  
  AMDILInstrInfo::AMDILInstrInfo(AMDILTargetMachine &tm)
@@ -36,28 +36,6 @@ const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const {
    return RI;
  }
  
-/// Return true if the instruction is a register to register move and leave the
-/// source and dest operands in the passed parameters.
-bool AMDILInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
-                                 unsigned int &DstReg, unsigned int &SrcSubIdx,
-                                 unsigned int &DstSubIdx) const {
-  // FIXME: we should look for:
-  //    add with 0
-  //assert(0 && "is Move Instruction has not been implemented yet!");
-  //return true;
-  if (!isMove(MI.getOpcode())) {
-    return false;
-  }
-  if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) {
-    return false;
-  }
-  SrcReg = MI.getOperand(1).getReg();
-  DstReg = MI.getOperand(0).getReg();
-  DstSubIdx = 0;
-  SrcSubIdx = 0;
-  return true;
-}
-
  bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
                                             unsigned &SrcReg, unsigned &DstReg,
                                             unsigned &SubIdx) const {
@@ -99,22 +77,7 @@ bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
  // TODO: Implement this function
    return false;
  }
-#if 0
-void
-AMDILInstrInfo::reMaterialize(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator MI,
-                              unsigned DestReg, unsigned SubIdx,
-                             const MachineInstr *Orig,
-                             const TargetRegisterInfo *TRI) const {
-// TODO: Implement this function
-}
  
-MachineInst AMDILInstrInfo::duplicate(MachineInstr *Orig,
-                                      MachineFunction &MF) const {
-// TODO: Implement this function
-  return NULL;
-}
-#endif
  MachineInstr *
  AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
                                        MachineBasicBlock::iterator &MBBI,
@@ -122,25 +85,6 @@ AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
  // TODO: Implement this function
    return NULL;
  }
-#if 0
-MachineInst AMDILInstrInfo::commuteInstruction(MachineInstr *MI,
-                                               bool NewMI = false) const {
-// TODO: Implement this function
-  return NULL;
-}
-bool
-AMDILInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
-                                     unsigned &SrcOpIdx2) const
-{
-// TODO: Implement this function
-}
-bool
-AMDILInstrInfo::produceSameValue(const MachineInstr *MI0,
-                                const MachineInstr *MI1) const
-{
-// TODO: Implement this function
-}
-#endif
  bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
                                          MachineBasicBlock &MBB) const {
    while (iter != MBB.end()) {
@@ -299,43 +243,6 @@ MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
    return MBB->end();
  }
  
-bool
-AMDILInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator I,
-                             unsigned DestReg, unsigned SrcReg,
-                             const TargetRegisterClass *DestRC,
-                             const TargetRegisterClass *SrcRC,
-                             DebugLoc DL) const {
-  // If we are adding to the end of a basic block we can safely assume that the
-  // move is caused by a PHI node since all move instructions that are non-PHI
-  // have already been inserted into the basic blocks Therefor we call the skip
-  // flow control instruction to move the iterator before the flow control
-  // instructions and put the move instruction there.
-  bool phi = (DestReg < 1025) || (SrcReg < 1025);
-  int movInst = phi ? getMoveInstFromID(DestRC->getID())
-                    : getPHIMoveInstFromID(DestRC->getID());
-  
-  MachineBasicBlock::iterator iTemp = (I == MBB.end()) ? skipFlowControl(&MBB)
-                                                       : I;
-  if (DestRC != SrcRC) {
-    //int convInst;
-    size_t dSize = DestRC->getSize();
-    size_t sSize = SrcRC->getSize();
-    if (dSize > sSize) {
-      // Elements are going to get duplicated.
-      BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
-    } else if (dSize == sSize) {
-      // Direct copy, conversions are not handled.
-      BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
-    } else if (dSize < sSize) {
-      // Elements are going to get dropped.
-      BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
-    }
-  } else {
-    BuildMI( MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg);
-  }
-  return true;
-}
  void
  AMDILInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MI, DebugLoc DL,
@@ -427,15 +334,11 @@ AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
    if (MI != MBB.end()) {
      DL = MI->getDebugLoc();
    }
-  MachineInstr *nMI = BuildMI(MBB, MI, DL, get(Opc))
+  BuildMI(MBB, MI, DL, get(Opc))
      .addReg(SrcReg, getKillRegState(isKill))
      .addFrameIndex(FrameIndex)
      .addMemOperand(MMO)
      .addImm(0);
-  AMDILAS::InstrResEnc curRes;
-  curRes.bits.ResourceID 
-    = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
-  setAsmPrinterFlags(nMI, curRes);
  }
  
  void
@@ -511,16 +414,11 @@ AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
    if (MI != MBB.end()) {
      DL = MI->getDebugLoc();
    }
-  MachineInstr* nMI = BuildMI(MBB, MI, DL, get(Opc))
+  BuildMI(MBB, MI, DL, get(Opc))
      .addReg(DestReg, RegState::Define)
      .addFrameIndex(FrameIndex)
      .addMemOperand(MMO)
      .addImm(0);
-  AMDILAS::InstrResEnc curRes;
-  curRes.bits.ResourceID 
-    = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
-  setAsmPrinterFlags(nMI, curRes);
-
  }
  MachineInstr *
  AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -569,65 +467,6 @@ AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
    return 0;
  }
  
-bool
-AMDILInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
-                                        int64_t &Offset1,
-                                        int64_t &Offset2) const {
-  return false;
-  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) {
-    return false;
-  }
-  const MachineSDNode *mload1 = dyn_cast<MachineSDNode>(Load1);
-  const MachineSDNode *mload2 = dyn_cast<MachineSDNode>(Load2);
-  if (!mload1 || !mload2) {
-    return false;
-  }
-  if (mload1->memoperands_empty() ||
-      mload2->memoperands_empty()) {
-    return false;
-  }
-  MachineMemOperand *memOp1 = (*mload1->memoperands_begin());
-  MachineMemOperand *memOp2 = (*mload2->memoperands_begin());
-  const Value *mv1 = memOp1->getValue();
-  const Value *mv2 = memOp2->getValue();
-  if (!memOp1->isLoad() || !memOp2->isLoad()) {
-    return false;
-  }
-  if (getBasePointerValue(mv1) == getBasePointerValue(mv2)) {
-    if (isa<GetElementPtrInst>(mv1) && isa<GetElementPtrInst>(mv2)) {
-      const GetElementPtrInst *gep1 = dyn_cast<GetElementPtrInst>(mv1);
-      const GetElementPtrInst *gep2 = dyn_cast<GetElementPtrInst>(mv2);
-      if (!gep1 || !gep2) {
-        return false;
-      }
-      if (gep1->getNumOperands() != gep2->getNumOperands()) {
-        return false;
-      }
-      for (unsigned i = 0, e = gep1->getNumOperands() - 1; i < e; ++i) {
-        const Value *op1 = gep1->getOperand(i);
-        const Value *op2 = gep2->getOperand(i);
-        if (op1 != op2) {
-          // If any value except the last one is different, return false.
-          return false;
-        }
-      }
-      unsigned size = gep1->getNumOperands()-1;
-      if (!isa<ConstantInt>(gep1->getOperand(size))
-          || !isa<ConstantInt>(gep2->getOperand(size))) {
-        return false;
-      }
-      Offset1 = dyn_cast<ConstantInt>(gep1->getOperand(size))->getSExtValue();
-      Offset2 = dyn_cast<ConstantInt>(gep2->getOperand(size))->getSExtValue();
-      return true;
-    } else if (isa<Argument>(mv1) && isa<Argument>(mv2)) {
-      return false;
-    } else if (isa<GlobalValue>(mv1) && isa<GlobalValue>(mv2)) {
-      return false;
-    }
-  }
-  return false;
-}
-
  bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
                                               int64_t Offset1, int64_t Offset2,
                                               unsigned NumLoads) const {
@@ -654,16 +493,6 @@ bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const {
    // TODO: Implement this function
    return false;
  }
-#if 0
-bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  // TODO: Implement this function
-}
-
-bool AMDILInstrInfo::PredicateInstruction(MachineInstr *MI,
-        const SmallVectorImpl<MachineOperand> &Pred) const {
-    // TODO: Implement this function
-}
-#endif
  bool
  AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
                                    const SmallVectorImpl<MachineOperand> &Pred2)
@@ -689,21 +518,112 @@ AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
    return true;
  }
  
-unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
-  // TODO: Implement this function
-  return 0;
+bool AMDILInstrInfo::isLoadInst(MachineInstr *MI) const {
+  if (strstr(getName(MI->getOpcode()), "LOADCONST")) {
+    return false;
+  }
+  return strstr(getName(MI->getOpcode()), "LOAD");
  }
  
-#if 0
-unsigned
-AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const {
-  // TODO: Implement this function
-  return 0;
+bool AMDILInstrInfo::isSWSExtLoadInst(MachineInstr *MI) const
+{
+switch (MI->getOpcode()) {
+    default:
+      break;
+      ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD);
+      ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD);
+      ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD);
+      ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD);
+      ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD);
+      ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD);
+      return true;
+  };
+  return false;
  }
  
-unsigned AMDILInstrInfo::getInlineAsmLength(const char *Str,
-                                            const MCAsmInfo &MAI) const {
-  // TODO: Implement this function
-  return 0;
+bool AMDILInstrInfo::isExtLoadInst(MachineInstr *MI) const {
+  return strstr(getName(MI->getOpcode()), "EXTLOAD");
+}
+
+bool AMDILInstrInfo::isSExtLoadInst(MachineInstr *MI) const {
+  return strstr(getName(MI->getOpcode()), "SEXTLOAD");
+}
+
+bool AMDILInstrInfo::isAExtLoadInst(MachineInstr *MI) const {
+  return strstr(getName(MI->getOpcode()), "AEXTLOAD");
+}
+
+bool AMDILInstrInfo::isZExtLoadInst(MachineInstr *MI) const {
+  return strstr(getName(MI->getOpcode()), "ZEXTLOAD");
+}
+
+bool AMDILInstrInfo::isStoreInst(MachineInstr *MI) const {
+  return strstr(getName(MI->getOpcode()), "STORE");
+}
+
+bool AMDILInstrInfo::isTruncStoreInst(MachineInstr *MI) const {
+  return strstr(getName(MI->getOpcode()), "TRUNCSTORE");
+}
+
+bool AMDILInstrInfo::isAtomicInst(MachineInstr *MI) const {
+  return strstr(getName(MI->getOpcode()), "ATOM");
+}
+
+bool AMDILInstrInfo::isVolatileInst(MachineInstr *MI) const {
+  if (!MI->memoperands_empty()) {
+    for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(),
+        moe = MI->memoperands_end(); mob != moe; ++mob) {
+      // If there is a volatile mem operand, this is a volatile instruction.
+      if ((*mob)->isVolatile()) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+bool AMDILInstrInfo::isGlobalInst(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "GLOBAL");
+}
+bool AMDILInstrInfo::isPrivateInst(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "PRIVATE");
+}
+bool AMDILInstrInfo::isConstantInst(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "CONSTANT")
+    || strstr(getName(MI->getOpcode()), "CPOOL");
+}
+bool AMDILInstrInfo::isRegionInst(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "REGION");
+}
+bool AMDILInstrInfo::isLocalInst(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "LOCAL");
+}
+bool AMDILInstrInfo::isImageInst(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "IMAGE");
+}
+bool AMDILInstrInfo::isAppendInst(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "APPEND");
+}
+bool AMDILInstrInfo::isRegionAtomic(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "ATOM_R");
+}
+bool AMDILInstrInfo::isLocalAtomic(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "ATOM_L");
+}
+bool AMDILInstrInfo::isGlobalAtomic(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "ATOM_G")
+    || isArenaAtomic(MI);
+}
+bool AMDILInstrInfo::isArenaAtomic(llvm::MachineInstr *MI) const
+{
+  return strstr(getName(MI->getOpcode()), "ATOM_A");
  }
-#endif
diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.h b/src/gallium/drivers/radeon/AMDILInstrInfo.h

index 88dd4e9441ab7a3cfd038362f99afb297f058a89..4121246e6f9d95ffcec7f90a80878459201a9bd6 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILInstrInfo.h
+++ b/src/gallium/drivers/radeon/AMDILInstrInfo.h
@@ -40,12 +40,6 @@ public:
    // always be able to get register info as well (through this method).
    const AMDILRegisterInfo &getRegisterInfo() const;
  
-  // Return true if the instruction is a register to register move and leave the
-  // source and dest operands in the passed parameters.
-  bool isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
-                   unsigned int &DstReg, unsigned int &SrcSubIdx,
-                   unsigned int &DstSubIdx) const;
-
    bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
                               unsigned &DstReg, unsigned &SubIdx) const;
  
@@ -62,29 +56,10 @@ public:
                               const MachineMemOperand *&MMO,
                               int &FrameIndex) const;
  
-
-#if 0
-  void reMaterialize(MachineBasicBlock &MBB,
-                     MachineBasicBlock::iterator MI,
-                     unsigned DestReg, unsigned SubIdx,
-                     const MachineInstr *Orig,
-                     const TargetRegisterInfo *TRI) const;
-  MachineInstr *duplicate(MachineInstr *Orig,
-                          MachineFunction &MF) const;
-#endif
    MachineInstr *
    convertToThreeAddress(MachineFunction::iterator &MFI,
                          MachineBasicBlock::iterator &MBBI,
                          LiveVariables *LV) const;
-#if 0
-  MachineInstr *commuteInstruction(MachineInstr *MI,
-                                   bool NewMI = false) const;
-  bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
-                             unsigned &SrcOpIdx2) const;
-  bool produceSameValue(const MachineInstr *MI0,
-                        const MachineInstr *MI1) const;
-
-#endif
  
    bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                       MachineBasicBlock *&FBB,
@@ -99,12 +74,6 @@ public:
                 const SmallVectorImpl<MachineOperand> &Cond,
                 DebugLoc DL) const;
  
-  bool copyRegToReg(MachineBasicBlock &MBB,
-                    MachineBasicBlock::iterator I,
-                    unsigned DestReg, unsigned SrcReg,
-                    const TargetRegisterClass *DestRC,
-                    const TargetRegisterClass *SrcRC,
-                    DebugLoc DL) const;
    virtual void copyPhysReg(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MI, DebugLoc DL,
                             unsigned DestReg, unsigned SrcReg,
@@ -141,8 +110,6 @@ public:
    unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
                                        bool UnfoldLoad, bool UnfoldStore,
                                        unsigned *LoadRegIndex = 0) const;
-  bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
-                               int64_t &Offset1, int64_t &Offset2) const;
    bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
                                 int64_t Offset1, int64_t Offset2,
                                 unsigned NumLoads) const;
@@ -151,24 +118,36 @@ public:
    void insertNoop(MachineBasicBlock &MBB,
                    MachineBasicBlock::iterator MI) const;
    bool isPredicated(const MachineInstr *MI) const;
-#if 0
-  bool isUnpredicatedTerminator(const MachineInstr *MI) const;
-  bool PredicateInstruction(MachineInstr *MI,
-                            const SmallVectorImpl<MachineOperand> &Pred) const;
-#endif
    bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
                           const SmallVectorImpl<MachineOperand> &Pred2) const;
    bool DefinesPredicate(MachineInstr *MI,
                          std::vector<MachineOperand> &Pred) const;
    bool isPredicable(MachineInstr *MI) const;
    bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
-  unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
-#if 0
-  unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
-  unsigned getInlineAsmLength(const char *Str,
-                              const MCAsmInfo &MAI) const;
-#endif
-  };
+
+  // Helper functions that check the opcode for status information
+  bool isLoadInst(llvm::MachineInstr *MI) const;
+  bool isExtLoadInst(llvm::MachineInstr *MI) const;
+  bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
+  bool isSExtLoadInst(llvm::MachineInstr *MI) const;
+  bool isZExtLoadInst(llvm::MachineInstr *MI) const;
+  bool isAExtLoadInst(llvm::MachineInstr *MI) const;
+  bool isStoreInst(llvm::MachineInstr *MI) const;
+  bool isTruncStoreInst(llvm::MachineInstr *MI) const;
+  bool isAtomicInst(llvm::MachineInstr *MI) const;
+  bool isVolatileInst(llvm::MachineInstr *MI) const;
+  bool isGlobalInst(llvm::MachineInstr *MI) const;
+  bool isPrivateInst(llvm::MachineInstr *MI) const;
+  bool isConstantInst(llvm::MachineInstr *MI) const;
+  bool isRegionInst(llvm::MachineInstr *MI) const;
+  bool isLocalInst(llvm::MachineInstr *MI) const;
+  bool isImageInst(llvm::MachineInstr *MI) const;
+  bool isAppendInst(llvm::MachineInstr *MI) const;
+  bool isRegionAtomic(llvm::MachineInstr *MI) const;
+  bool isLocalAtomic(llvm::MachineInstr *MI) const;
+  bool isGlobalAtomic(llvm::MachineInstr *MI) const;
+  bool isArenaAtomic(llvm::MachineInstr *MI) const;
+};
  
  }
  
diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td

index f824a67d7ad83c38d0d5d486a78b8c3beae91279..db56e2121b3623942056772278b17b2e8ebcd606 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILInstructions.td
+++ b/src/gallium/drivers/radeon/AMDILInstructions.td
@@ -1,4 +1,4 @@
-//===-- AMDILInstructions.td - TODO: Add brief description -------===//
+//===-- AMDILInstructions.td - AMDIL Instruction definitions --------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp b/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp

deleted file mode 100644 (file)

index 9366f2e..0000000
--- a/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===---- AMDILMCCodeEmitter.cpp - Convert AMDIL text to AMDIL binary ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-//===---------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "amdil-emitter"
-#include "AMDIL.h"
-#include "AMDILInstrInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-#if 0
-namespace {
-  class AMDILMCCodeEmitter : public MCCodeEmitter {
-    AMDILMCCodeEmitter(const AMDILMCCodeEmitter &);// DO NOT IMPLEMENT
-    void operator=(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT
-    const TargetMachine &TM;
-    const TargetInstrInfo &TII;
-    MCContext &Ctx;
-    bool Is64BitMode;
-    public:
-    AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit);
-    ~AMDILMCCodeEmitter();
-    unsigned getNumFixupKinds() const;
-    const MCFixupKindInfo& getFixupKindInfo(MCFixupKind Kind) const;
-    static unsigned GetAMDILRegNum(const MCOperand &MO);
-    void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const;
-    void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
-        raw_ostream &OS) const;
-    void EmitImmediate(const MCOperand &Disp, unsigned ImmSize,
-        MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &os,
-        SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const;
-
-    void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-        SmallVectorImpl<MCFixup> &Fixups) const;
-
-  }; // class AMDILMCCodeEmitter
-}; // anonymous namespace
-
-namespace llvm {
-  MCCodeEmitter *createAMDILMCCodeEmitter(const Target &,
-      TargetMachine &TM, MCContext &Ctx)
-  {
-    return new AMDILMCCodeEmitter(TM, Ctx, false);
-  }
-}
-
-AMDILMCCodeEmitter::AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx
-    , bool is64Bit)
-: TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx)
-{
-  Is64BitMode = is64Bit;
-}
-
-AMDILMCCodeEmitter::~AMDILMCCodeEmitter()
-{
-}
-
-unsigned
-AMDILMCCodeEmitter::getNumFixupKinds() const
-{
-  return 0;
-}
-
-const MCFixupKindInfo &
-AMDILMCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const
-{
-//  const static MCFixupKindInfo Infos[] = {};
-  if (Kind < FirstTargetFixupKind) {
-    return MCCodeEmitter::getFixupKindInfo(Kind);
-  }
-  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
-      "Invalid kind!");
-  return MCCodeEmitter::getFixupKindInfo(Kind);
- // return Infos[Kind - FirstTargetFixupKind];
-
-}
-
-void
-AMDILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte,
-    raw_ostream &OS) const
-{
-  OS << (char) C;
-  ++CurByte;
-}
-void
-AMDILMCCodeEmitter::EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
-    raw_ostream &OS) const
-{
-  // Output the constant in little endian byte order
-  for (unsigned i = 0; i != Size; ++i) {
-    EmitByte(Val & 255, CurByte, OS);
-    Val >>= 8;
-  }
-}
-void
-AMDILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned ImmSize,
-    MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
-    SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const
-{
-  // If this is a simple integer displacement that doesn't require a relocation
-  // emit it now.
-  if (DispOp.isImm()) {
-    EmitConstant(DispOp.getImm() + ImmOffset, ImmSize, CurByte, OS);
-  }
-
-  // If we have an immoffset, add it to the expression
-  const MCExpr *Expr = DispOp.getExpr();
-
-  if (ImmOffset) {
-    Expr = MCBinaryExpr::CreateAdd(Expr,
-        MCConstantExpr::Create(ImmOffset, Ctx), Ctx);
-  }
-  // Emit a symbolic constant as a fixup and 4 zeros.
-  Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
-  // TODO: Why the 4 zeros?
-  EmitConstant(0, ImmSize, CurByte, OS);
-}
-
-void
-AMDILMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-    SmallVectorImpl<MCFixup> &Fixups) const
-{
-#if 0
-  unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = TII.get(Opcode);
-  unsigned TSFlags = Desc.TSFlags;
-
-  // Keep track of the current byte being emitted.
-  unsigned CurByte = 0;
-
-  unsigned NumOps = Desc.getNumOperands();
-  unsigned CurOp = 0;
-
-  unsigned char BaseOpcode = 0;
-#ifndef NDEBUG
-  // FIXME: Verify.
-  if (// !Desc.isVariadic() &&
-      CurOp != NumOps) {
-    errs() << "Cannot encode all operands of: ";
-    MI.dump();
-    errs() << '\n';
-    abort();
-  }
-#endif
-#endif
-}
-#endif
diff --git a/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp b/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp

index b8e536361f07fa3d93c7d70955a2ce0ef048cbe5..5cb988785e24963cc871778beff540dc6e66a6d5 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp
+++ b/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp
@@ -8,17 +8,11 @@
  //==-----------------------------------------------------------------------===//
  
  
-#define DEBUG_TYPE "machine_peephole"
-#if !defined(NDEBUG)
-#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-#else
-#define DEBUGME (false)
-#endif
-
  #include "AMDIL.h"
+#include "AMDILInstrInfo.h"
  #include "AMDILSubtarget.h"
-#include "AMDILUtilityFunctions.h"
  #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Target/TargetMachine.h"
@@ -56,7 +50,7 @@ namespace llvm
  AMDILMachinePeephole::AMDILMachinePeephole(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
    : MachineFunctionPass(ID), TM(tm)
  {
-  mDebug = DEBUGME;
+  mDebug = false;
  }
  
  bool
@@ -64,6 +58,8 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF)
  {
    bool Changed = false;
    const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+  const AMDILInstrInfo * AMDILII =
+                         static_cast<const AMDILInstrInfo *>(TM.getInstrInfo());
    for (MachineFunction::iterator MBB = MF.begin(), MBE = MF.end();
        MBB != MBE; ++MBB) {
      MachineBasicBlock *mb = MBB;
@@ -74,7 +70,7 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF)
        name = TM.getInstrInfo()->getName(mi->getOpcode());
        switch (mi->getOpcode()) {
          default:
-          if (isAtomicInst(TM.getInstrInfo(), mi)) {
+          if (AMDILII->isAtomicInst(mi)) {
              // If we don't support the hardware accellerated address spaces,
              // then the atomic needs to be transformed to the global atomic.
              if (strstr(name, "_L_")
@@ -94,7 +90,8 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF)
                    TM.getInstrInfo()->get(
                      (mi->getOpcode() - AMDIL::ATOM_R_ADD) + AMDIL::ATOM_G_ADD));
              }
-          } else if ((isLoadInst(TM.getInstrInfo(), mi) || isStoreInst(TM.getInstrInfo(), mi)) && isVolatileInst(TM.getInstrInfo(), mi)) {
+          } else if ((AMDILII->isLoadInst(mi) || AMDILII->isStoreInst(mi))
+                     && AMDILII->isVolatileInst(mi)) {
              insertFence(MIB);
            }
            continue;
diff --git a/src/gallium/drivers/radeon/AMDILMultiClass.td b/src/gallium/drivers/radeon/AMDILMultiClass.td

index 92691db52fdacca23ca8f5fe0ce43417ce5df4f4..d6828178ba799c12635cf83b401bdb1d1da31a13 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILMultiClass.td
+++ b/src/gallium/drivers/radeon/AMDILMultiClass.td
@@ -1,4 +1,4 @@
-//===-- AMDILMultiClass.td - TODO: Add brief description -------===//
+//===-- AMDILMultiClass.td - AMDIL Multiclass defs ---*- tablegen -*-------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.cpp b/src/gallium/drivers/radeon/AMDILNIDevice.cpp

index 8fda1c18ae551b91790f47e4fa54e401ac28401f..d4112cda0b509b4d250ea8b0e081e44fca04a27a 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILNIDevice.cpp
+++ b/src/gallium/drivers/radeon/AMDILNIDevice.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILNIDevice.cpp - TODO: Add brief description -------===//
+//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp

index 5fe9f53c8c8c70f0792c33e02e38e7cdaed63a7a..b62c7ab048bc5a158c7379b5e27bb6d3a5cf0914 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
+++ b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp
@@ -1,4 +1,4 @@
-//===-- AMDILPeepholeOptimizer.cpp - TODO: Add brief description -------===//
+//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,20 +7,14 @@
  //
  //==-----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "PeepholeOpt"
-#ifdef DEBUG
-#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-#else
-#define DEBUGME 0
-#endif
-
  #include "AMDILAlgorithms.tpp"
  #include "AMDILDevices.h"
-#include "AMDILUtilityFunctions.h"
+#include "AMDILInstrInfo.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/ADT/StringExtras.h"
  #include "llvm/ADT/StringRef.h"
  #include "llvm/ADT/Twine.h"
+#include "llvm/Constants.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineFunctionAnalysis.h"
  #include "llvm/Function.h"
@@ -41,6 +35,9 @@ using namespace llvm;
  // The Peephole optimization pass is used to do simple last minute optimizations
  // that are required for correct code or to remove redundant functions
  namespace {
+
+class OpaqueType;
+
  class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass {
  public:
    TargetMachine &TM;
@@ -114,6 +111,19 @@ private:
    // samplers at compile time.
    bool propagateSamplerInst(CallInst *CI);
  
+  // Helper functions
+
+  // Group of functions that recursively calculate the size of a structure based
+  // on it's sub-types.
+  size_t getTypeSize(Type * const T, bool dereferencePtr = false);
+  size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
+  size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
+  size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
+  size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
+  size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
+  size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
+  size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
+
    LLVMContext *mCTX;
    Function *mF;
    const AMDILSubtarget *mSTM;
@@ -134,7 +144,7 @@ namespace llvm {
  AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
    : FunctionPass(ID), TM(tm) 
  {
-  mDebug = DEBUGME;
+  mDebug = false;
    optLevel = TM.getOptLevel();
  
  }
@@ -1136,3 +1146,106 @@ AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const
    FunctionPass::getAnalysisUsage(AU);
    AU.setPreservesAll();
  }
+
+size_t AMDILPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
+  size_t size = 0;
+  if (!T) {
+    return size;
+  }
+  switch (T->getTypeID()) {
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+  case Type::LabelTyID:
+    assert(0 && "These types are not supported by this backend");
+  default:
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+    size = T->getPrimitiveSizeInBits() >> 3;
+    break;
+  case Type::PointerTyID:
+    size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
+    break;
+  case Type::IntegerTyID:
+    size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
+    break;
+  case Type::StructTyID:
+    size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
+    break;
+  case Type::ArrayTyID:
+    size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
+    break;
+  case Type::FunctionTyID:
+    size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
+    break;
+  case Type::VectorTyID:
+    size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
+    break;
+  };
+  return size;
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(StructType * const ST,
+    bool dereferencePtr) {
+  size_t size = 0;
+  if (!ST) {
+    return size;
+  }
+  Type *curType;
+  StructType::element_iterator eib;
+  StructType::element_iterator eie;
+  for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
+    curType = *eib;
+    size += getTypeSize(curType, dereferencePtr);
+  }
+  return size;
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(IntegerType * const IT,
+    bool dereferencePtr) {
+  return IT ? (IT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(FunctionType * const FT,
+    bool dereferencePtr) {
+    assert(0 && "Should not be able to calculate the size of an function type");
+    return 0;
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(ArrayType * const AT,
+    bool dereferencePtr) {
+  return (size_t)(AT ? (getTypeSize(AT->getElementType(),
+                                    dereferencePtr) * AT->getNumElements())
+                     : 0);
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(VectorType * const VT,
+    bool dereferencePtr) {
+  return VT ? (VT->getBitWidth() >> 3) : 0;
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(PointerType * const PT,
+    bool dereferencePtr) {
+  if (!PT) {
+    return 0;
+  }
+  Type *CT = PT->getElementType();
+  if (CT->getTypeID() == Type::StructTyID &&
+      PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+    return getTypeSize(dyn_cast<StructType>(CT));
+  } else if (dereferencePtr) {
+    size_t size = 0;
+    for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
+      size += getTypeSize(PT->getContainedType(x), dereferencePtr);
+    }
+    return size;
+  } else {
+    return 4;
+  }
+}
+
+size_t AMDILPeepholeOpt::getTypeSize(OpaqueType * const OT,
+    bool dereferencePtr) {
+  //assert(0 && "Should not be able to calculate the size of an opaque type");
+  return 4;
+}
diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp

index 5588233378c143bef322920021527f31d7de71d1..d7c1dc74b8b43b99352155b5c5fae137f01c6c4a 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp
+++ b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp
@@ -20,7 +20,8 @@
  
  #include "AMDILRegisterInfo.h"
  #include "AMDIL.h"
-#include "AMDILUtilityFunctions.h"
+#include "AMDILInstrInfo.h"
+#include "AMDILTargetMachine.h"
  #include "llvm/ADT/BitVector.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -109,7 +110,9 @@ AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
      if (!MI.getOperand(x).isFI()) {
        continue;
      }
-    bool def = isStoreInst(TM.getInstrInfo(), &MI);
+    const AMDILInstrInfo * AMDILII =
+                         static_cast<const AMDILInstrInfo *>(TM.getInstrInfo());
+    bool def = AMDILII->isStoreInst(&MI);
      int FrameIndex = MI.getOperand(x).getIndex();
      int64_t Offset = MFI->getObjectOffset(FrameIndex);
      //int64_t Size = MF.getFrameInfo()->getObjectSize(FrameIndex);
diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.cpp b/src/gallium/drivers/radeon/AMDILSIDevice.cpp

index ce560984ef91b81bdb439d2fcd035d590b0246a8..ae402a5d1f7c4332a4fe2e540241aa85827bc5e7 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILSIDevice.cpp
+++ b/src/gallium/drivers/radeon/AMDILSIDevice.cpp
@@ -1,49 +1,49 @@
-//===-- AMDILSIDevice.cpp - TODO: Add brief description -------===//
+//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
  // This file is distributed under the University of Illinois Open Source
  // License. See LICENSE.TXT for details.
  //
-//==-----------------------------------------------------------------------===//\r
-#include "AMDILSIDevice.h"\r
-#include "AMDILEvergreenDevice.h"\r
-#include "AMDILNIDevice.h"\r
-#include "AMDILSubtarget.h"\r
+//==-----------------------------------------------------------------------===//
+#include "AMDILSIDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSubtarget.h"
  
-using namespace llvm;\r
-\r
-AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)\r
-  : AMDILEvergreenDevice(ST)\r
-{\r
-}\r
-AMDILSIDevice::~AMDILSIDevice()\r
-{\r
-}\r
-\r
-size_t\r
-AMDILSIDevice::getMaxLDSSize() const\r
-{\r
-  if (usesHardware(AMDILDeviceInfo::LocalMem)) {\r
-    return MAX_LDS_SIZE_900;\r
-  } else {\r
-    return 0;\r
-  }\r
-}\r
-\r
-uint32_t\r
-AMDILSIDevice::getGeneration() const\r
-{\r
-  return AMDILDeviceInfo::HD7XXX;\r
-}\r
-\r
-std::string\r
-AMDILSIDevice::getDataLayout() const\r
-{\r
-    return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"\r
-      "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"\r
-      "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"\r
-      "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"\r
-      "-v512:512:512-v1024:1024:1024-v2048:2048:2048"\r
-      "-n8:16:32:64");\r
-}\r
+using namespace llvm;
+
+AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
+  : AMDILEvergreenDevice(ST)
+{
+}
+AMDILSIDevice::~AMDILSIDevice()
+{
+}
+
+size_t
+AMDILSIDevice::getMaxLDSSize() const
+{
+  if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+    return MAX_LDS_SIZE_900;
+  } else {
+    return 0;
+  }
+}
+
+uint32_t
+AMDILSIDevice::getGeneration() const
+{
+  return AMDILDeviceInfo::HD7XXX;
+}
+
+std::string
+AMDILSIDevice::getDataLayout() const
+{
+    return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
+      "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+      "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+      "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+      "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+      "-n8:16:32:64");
+}
diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.h b/src/gallium/drivers/radeon/AMDILSIDevice.h

index 69f35a0588df00a1394e8d2ad2772940fc8b43e5..b272af7cfcf9639d077b88f2b35e02dd953d8bc1 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILSIDevice.h
+++ b/src/gallium/drivers/radeon/AMDILSIDevice.h
@@ -1,45 +1,45 @@
-//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//\r
+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
  // This file is distributed under the University of Illinois Open Source
  // License. See LICENSE.TXT for details.
  //
-//==-----------------------------------------------------------------------===//\r
-//\r
-// Interface for the subtarget data classes.\r
-//\r
-//===---------------------------------------------------------------------===//\r
-// This file will define the interface that each generation needs to\r
-// implement in order to correctly answer queries on the capabilities of the\r
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===---------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
  // specific hardware.\r
-//===---------------------------------------------------------------------===//\r
-#ifndef _AMDILSIDEVICE_H_\r
-#define _AMDILSIDEVICE_H_\r
-#include "AMDILEvergreenDevice.h"\r
-#include "AMDILSubtarget.h"\r
+//===---------------------------------------------------------------------===//
+#ifndef _AMDILSIDEVICE_H_
+#define _AMDILSIDEVICE_H_
+#include "AMDILEvergreenDevice.h"
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+  class AMDILSubtarget;
+//===---------------------------------------------------------------------===//
+// SI generation of devices and their respective sub classes
+//===---------------------------------------------------------------------===//
+
+// The AMDILSIDevice is the base class for all Northern Island series of
+// cards. It is very similiar to the AMDILEvergreenDevice, with the major
+// exception being differences in wavefront size and hardware capabilities.  The
+// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
+// integer operations
+
+  class AMDILSIDevice : public AMDILEvergreenDevice {
+    public:
+      AMDILSIDevice(AMDILSubtarget*);
+      virtual ~AMDILSIDevice();
+      virtual size_t getMaxLDSSize() const;
+      virtual uint32_t getGeneration() const;
+      virtual std::string getDataLayout() const;
+    protected:
+  }; // AMDILSIDevice
  
-namespace llvm {\r
-  class AMDILSubtarget;\r
-//===---------------------------------------------------------------------===//\r
-// SI generation of devices and their respective sub classes\r
-//===---------------------------------------------------------------------===//\r
-\r
-// The AMDILSIDevice is the base class for all Northern Island series of\r
-// cards. It is very similiar to the AMDILEvergreenDevice, with the major\r
-// exception being differences in wavefront size and hardware capabilities.  The\r
-// SI devices are all 64 wide wavefronts and also add support for signed 24 bit\r
-// integer operations\r
-\r
-  class AMDILSIDevice : public AMDILEvergreenDevice {\r
-    public:\r
-      AMDILSIDevice(AMDILSubtarget*);\r
-      virtual ~AMDILSIDevice();\r
-      virtual size_t getMaxLDSSize() const;\r
-      virtual uint32_t getGeneration() const;\r
-      virtual std::string getDataLayout() const;\r
-    protected:\r
-  }; // AMDILSIDevice\r
-\r
-} // namespace llvm\r
-#endif // _AMDILSIDEVICE_H_\r
+} // namespace llvm
+#endif // _AMDILSIDEVICE_H_
diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.cpp b/src/gallium/drivers/radeon/AMDILSubtarget.cpp

index 11b6bbe0c0129c37a6c9530b78f930e9181f49fa..249cb03f4a3b83c6d2e14b34bf9dc48a4e958426 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILSubtarget.cpp
+++ b/src/gallium/drivers/radeon/AMDILSubtarget.cpp
@@ -27,7 +27,8 @@ using namespace llvm;
  #define GET_SUBTARGETINFO_TARGET_DESC
  #include "AMDILGenSubtargetInfo.inc"
  
-AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS )
+AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ),
+  mDumpCode(false)
  {
    memset(CapsOverride, 0, sizeof(*CapsOverride)
        * AMDILDeviceInfo::MaxNumberCapabilities);
@@ -93,7 +94,7 @@ AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::St
    }
  #endif
    mDevName = GPU;
-  mDevice = getDeviceFromName(mDevName, this, mIs64bit);
+  mDevice = AMDILDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
  }
  AMDILSubtarget::~AMDILSubtarget()
  {
diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.h b/src/gallium/drivers/radeon/AMDILSubtarget.h

index a4b0e34ada70971702600b0bc5f7af7480ccb9ee..38fcb859ac643af932d5d6e11ca2150c11a57ce2 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILSubtarget.h
+++ b/src/gallium/drivers/radeon/AMDILSubtarget.h
@@ -42,6 +42,7 @@ namespace llvm {
        uint32_t mVersion;
        bool mIs64bit;
        bool mIs32on64bit;
+      bool mDumpCode;
      public:
        AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS);
        virtual ~AMDILSubtarget();
@@ -67,6 +68,7 @@ namespace llvm {
          ParseSubtargetFeatures(
              llvm::StringRef CPU,
              llvm::StringRef FS);
+      bool dumpCode() const { return mDumpCode; }
  
    };
  
diff --git a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp

index 77fac1d97bddd11acfe47cdcc0efe888268bcbf5..0879d43ad7245bc8ab6aa84997b2d8cba5e7511e 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp
+++ b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp
@@ -150,8 +150,8 @@ bool AMDILPassConfig::addPreISel()
  
  bool AMDILPassConfig::addInstSelector()
  {
-  PM.add(createAMDILPeepholeOpt(*TM));
-  PM.add(createAMDILISelDag(getAMDILTargetMachine()));
+  PM->add(createAMDILPeepholeOpt(*TM));
+  PM->add(createAMDILISelDag(getAMDILTargetMachine()));
    return false;
  }
  
@@ -162,7 +162,7 @@ bool AMDILPassConfig::addPreRegAlloc()
      llvm::RegisterScheduler::setDefault(&llvm::createSourceListDAGScheduler);
    }
  
-  PM.add(createAMDILMachinePeephole(*TM));
+  PM->add(createAMDILMachinePeephole(*TM));
    return false;
  }
  
@@ -175,8 +175,8 @@ bool AMDILPassConfig::addPostRegAlloc() {
  /// true if -print-machineinstrs should print out the code after the passes.
  bool AMDILPassConfig::addPreEmitPass()
  {
-  PM.add(createAMDILCFGPreparationPass(*TM));
-  PM.add(createAMDILCFGStructurizerPass(*TM));
+  PM->add(createAMDILCFGPreparationPass(*TM));
+  PM->add(createAMDILCFGStructurizerPass(*TM));
    return true;
  }
  
diff --git a/src/gallium/drivers/radeon/AMDILTokenDesc.td b/src/gallium/drivers/radeon/AMDILTokenDesc.td

index b81f593506f21ec129660af5fc4abc7857c9feff..2dafb2cd5591fe34ab376079eccf3f0ee5c9a656 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILTokenDesc.td
+++ b/src/gallium/drivers/radeon/AMDILTokenDesc.td
@@ -1,4 +1,4 @@
-//===-- AMDILTokenDesc.td - TODO: Add brief description -------===//
+//===-- AMDILTokenDesc.td - AMDIL Token Definitions --*- tablegen -*-----===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp b/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp

deleted file mode 100644 (file)

index f2ef4eb..0000000
--- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp
+++ /dev/null
@@ -1,683 +0,0 @@
-//===-- AMDILUtilityFunctions.cpp - AMDIL Utility Functions       ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file provides the implementations of functions that are declared in the
-// AMDILUtilityFUnctions.h file.
-//
-//===----------------------------------------------------------------------===//
-#include "AMDILUtilityFunctions.h"
-#include "AMDILISelLowering.h"
-#include "llvm/ADT/ValueMap.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Type.h"
-
-#include <cstdio>
-#include <list>
-#include <queue>
-
-#define GET_OPCODE_NAME(TII, MI) \
-  TII->getName(MI->getOpcode())
-
-
-using namespace llvm;
-int64_t GET_SCALAR_SIZE(llvm::Type *A) {
-  return A->getScalarSizeInBits();
-}
-
-const TargetRegisterClass * getRegClassFromID(unsigned int ID) {
-  switch (ID) {
-  default:
-    assert(0 && "Passed in ID does not match any register classes.");
-    return NULL;
-  case AMDIL::GPRI8RegClassID:
-    return &AMDIL::GPRI8RegClass;
-  case AMDIL::GPRI16RegClassID:
-    return &AMDIL::GPRI16RegClass;
-  case AMDIL::GPRI32RegClassID:
-    return &AMDIL::GPRI32RegClass;
-  case AMDIL::GPRF32RegClassID:
-    return &AMDIL::GPRF32RegClass;
-  case AMDIL::GPRI64RegClassID:
-    return &AMDIL::GPRI64RegClass;
-  case AMDIL::GPRF64RegClassID:
-    return &AMDIL::GPRF64RegClass;
-  case AMDIL::GPRV4F32RegClassID:
-    return &AMDIL::GPRV4F32RegClass;
-  case AMDIL::GPRV4I8RegClassID:
-    return &AMDIL::GPRV4I8RegClass;
-  case AMDIL::GPRV4I16RegClassID:
-    return &AMDIL::GPRV4I16RegClass;
-  case AMDIL::GPRV4I32RegClassID:
-    return &AMDIL::GPRV4I32RegClass;
-  case AMDIL::GPRV2F32RegClassID:
-    return &AMDIL::GPRV2F32RegClass;
-  case AMDIL::GPRV2I8RegClassID:
-    return &AMDIL::GPRV2I8RegClass;
-  case AMDIL::GPRV2I16RegClassID:
-    return &AMDIL::GPRV2I16RegClass;
-  case AMDIL::GPRV2I32RegClassID:
-    return &AMDIL::GPRV2I32RegClass;
-  case AMDIL::GPRV2F64RegClassID:
-    return &AMDIL::GPRV2F64RegClass;
-  case AMDIL::GPRV2I64RegClassID:
-    return &AMDIL::GPRV2I64RegClass;
-  };
-}
-
-unsigned int getMoveInstFromID(unsigned int ID) {
-  switch (ID) {
-  default:
-    assert(0 && "Passed in ID does not match any move instructions.");
-  case AMDIL::GPRI8RegClassID:
-    return AMDIL::MOVE_i8;
-  case AMDIL::GPRI16RegClassID:
-    return AMDIL::MOVE_i16;
-  case AMDIL::GPRI32RegClassID:
-    return AMDIL::MOVE_i32;
-  case AMDIL::GPRF32RegClassID:
-    return AMDIL::MOVE_f32;
-  case AMDIL::GPRI64RegClassID:
-    return AMDIL::MOVE_i64;
-  case AMDIL::GPRF64RegClassID:
-    return AMDIL::MOVE_f64;
-  case AMDIL::GPRV4F32RegClassID:
-    return AMDIL::MOVE_v4f32;
-  case AMDIL::GPRV4I8RegClassID:
-    return AMDIL::MOVE_v4i8;
-  case AMDIL::GPRV4I16RegClassID:
-    return AMDIL::MOVE_v4i16;
-  case AMDIL::GPRV4I32RegClassID:
-    return AMDIL::MOVE_v4i32;
-  case AMDIL::GPRV2F32RegClassID:
-    return AMDIL::MOVE_v2f32;
-  case AMDIL::GPRV2I8RegClassID:
-    return AMDIL::MOVE_v2i8;
-  case AMDIL::GPRV2I16RegClassID:
-    return AMDIL::MOVE_v2i16;
-  case AMDIL::GPRV2I32RegClassID:
-    return AMDIL::MOVE_v2i32;
-  case AMDIL::GPRV2F64RegClassID:
-    return AMDIL::MOVE_v2f64;
-  case AMDIL::GPRV2I64RegClassID:
-    return AMDIL::MOVE_v2i64;
-  };
-  return -1;
-}
-
-unsigned int getPHIMoveInstFromID(unsigned int ID) {
-  switch (ID) {
-  default:
-    assert(0 && "Passed in ID does not match any move instructions.");
-  case AMDIL::GPRI8RegClassID:
-    return AMDIL::PHIMOVE_i8;
-  case AMDIL::GPRI16RegClassID:
-    return AMDIL::PHIMOVE_i16;
-  case AMDIL::GPRI32RegClassID:
-    return AMDIL::PHIMOVE_i32;
-  case AMDIL::GPRF32RegClassID:
-    return AMDIL::PHIMOVE_f32;
-  case AMDIL::GPRI64RegClassID:
-    return AMDIL::PHIMOVE_i64;
-  case AMDIL::GPRF64RegClassID:
-    return AMDIL::PHIMOVE_f64;
-  case AMDIL::GPRV4F32RegClassID:
-    return AMDIL::PHIMOVE_v4f32;
-  case AMDIL::GPRV4I8RegClassID:
-    return AMDIL::PHIMOVE_v4i8;
-  case AMDIL::GPRV4I16RegClassID:
-    return AMDIL::PHIMOVE_v4i16;
-  case AMDIL::GPRV4I32RegClassID:
-    return AMDIL::PHIMOVE_v4i32;
-  case AMDIL::GPRV2F32RegClassID:
-    return AMDIL::PHIMOVE_v2f32;
-  case AMDIL::GPRV2I8RegClassID:
-    return AMDIL::PHIMOVE_v2i8;
-  case AMDIL::GPRV2I16RegClassID:
-    return AMDIL::PHIMOVE_v2i16;
-  case AMDIL::GPRV2I32RegClassID:
-    return AMDIL::PHIMOVE_v2i32;
-  case AMDIL::GPRV2F64RegClassID:
-    return AMDIL::PHIMOVE_v2f64;
-  case AMDIL::GPRV2I64RegClassID:
-    return AMDIL::PHIMOVE_v2i64;
-  };
-  return -1;
-}
-
-const TargetRegisterClass* getRegClassFromType(unsigned int type) {
-  switch (type) {
-  default:
-    assert(0 && "Passed in type does not match any register classes.");
-  case MVT::i8:
-    return &AMDIL::GPRI8RegClass;
-  case MVT::i16:
-    return &AMDIL::GPRI16RegClass;
-  case MVT::i32:
-    return &AMDIL::GPRI32RegClass;
-  case MVT::f32:
-    return &AMDIL::GPRF32RegClass;
-  case MVT::i64:
-    return &AMDIL::GPRI64RegClass;
-  case MVT::f64:
-    return &AMDIL::GPRF64RegClass;
-  case MVT::v4f32:
-    return &AMDIL::GPRV4F32RegClass;
-  case MVT::v4i8:
-    return &AMDIL::GPRV4I8RegClass;
-  case MVT::v4i16:
-    return &AMDIL::GPRV4I16RegClass;
-  case MVT::v4i32:
-    return &AMDIL::GPRV4I32RegClass;
-  case MVT::v2f32:
-    return &AMDIL::GPRV2F32RegClass;
-  case MVT::v2i8:
-    return &AMDIL::GPRV2I8RegClass;
-  case MVT::v2i16:
-    return &AMDIL::GPRV2I16RegClass;
-  case MVT::v2i32:
-    return &AMDIL::GPRV2I32RegClass;
-  case MVT::v2f64:
-    return &AMDIL::GPRV2F64RegClass;
-  case MVT::v2i64:
-    return &AMDIL::GPRV2I64RegClass;
-  }
-}
-
-void printSDNode(const SDNode *N) {
-  printf("Opcode: %d isTargetOpcode: %d isMachineOpcode: %d\n",
-         N->getOpcode(), N->isTargetOpcode(), N->isMachineOpcode());
-  printf("Empty: %d OneUse: %d Size: %d NodeID: %d\n",
-         N->use_empty(), N->hasOneUse(), (int)N->use_size(), N->getNodeId());
-  for (unsigned int i = 0; i < N->getNumOperands(); ++i) {
-    printf("OperandNum: %d ValueCount: %d ValueType: %d\n",
-           i, N->getNumValues(), N->getValueType(0) .getSimpleVT().SimpleTy);
-    printSDValue(N->getOperand(i), 0);
-  }
-}
-
-void printSDValue(const SDValue &Op, int level) {
-  printf("\nOp: %p OpCode: %d NumOperands: %d ", (void*)&Op, Op.getOpcode(),
-         Op.getNumOperands());
-  printf("IsTarget: %d IsMachine: %d ", Op.isTargetOpcode(),
-         Op.isMachineOpcode());
-  if (Op.isMachineOpcode()) {
-    printf("MachineOpcode: %d\n", Op.getMachineOpcode());
-  } else {
-    printf("\n");
-  }
-  EVT vt = Op.getValueType();
-  printf("ValueType: %d \n", vt.getSimpleVT().SimpleTy);
-  printf("UseEmpty: %d OneUse: %d\n", Op.use_empty(), Op.hasOneUse());
-  if (level) {
-    printf("Children for %d:\n", level);
-    for (unsigned int i = 0; i < Op.getNumOperands(); ++i) {
-      printf("Child %d->%d:", level, i);
-      printSDValue(Op.getOperand(i), level - 1);
-    }
-  }
-}
-
-bool isPHIMove(unsigned int opcode) {
-  switch (opcode) {
-  default:
-    return false;
-    ExpandCaseToAllTypes(AMDIL::PHIMOVE);
-    return true;
-  }
-  return false;
-}
-
-bool isMove(unsigned int opcode) {
-  switch (opcode) {
-  default:
-    return false;
-    ExpandCaseToAllTypes(AMDIL::MOVE);
-    return true;
-  }
-  return false;
-}
-
-bool isMoveOrEquivalent(unsigned int opcode) {
-  switch (opcode) {
-  default:
-    return isMove(opcode) || isPHIMove(opcode);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASCHAR);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASSHORT);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASINT);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASLONG);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASDOUBLE);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASFLOAT);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2CHAR);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2SHORT);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2INT);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2FLOAT);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2LONG);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2DOUBLE);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4CHAR);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4SHORT);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4INT);
-    ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4FLOAT);
-    case AMDIL::INTTOANY_i8:
-    case AMDIL::INTTOANY_i16:
-    case AMDIL::INTTOANY_i32:
-    case AMDIL::INTTOANY_f32:
-    case AMDIL::DLO:
-    case AMDIL::LLO:
-    case AMDIL::LLO_v2i64:
-      return true;
-  };
-  return false;
-}
-
-bool check_type(const Value *ptr, unsigned int addrspace) {
-  if (!ptr) {
-    return false;
-  }
-  Type *ptrType = ptr->getType();
-  return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
-}
-
-size_t getTypeSize(Type * const T, bool dereferencePtr) {
-  size_t size = 0;
-  if (!T) {
-    return size;
-  }
-  switch (T->getTypeID()) {
-  case Type::X86_FP80TyID:
-  case Type::FP128TyID:
-  case Type::PPC_FP128TyID:
-  case Type::LabelTyID:
-    assert(0 && "These types are not supported by this backend");
-  default:
-  case Type::FloatTyID:
-  case Type::DoubleTyID:
-    size = T->getPrimitiveSizeInBits() >> 3;
-    break;
-  case Type::PointerTyID:
-    size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
-    break;
-  case Type::IntegerTyID:
-    size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
-    break;
-  case Type::StructTyID:
-    size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
-    break;
-  case Type::ArrayTyID:
-    size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
-    break;
-  case Type::FunctionTyID:
-    size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
-    break;
-  case Type::VectorTyID:
-    size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
-    break;
-  };
-  return size;
-}
-
-size_t getTypeSize(StructType * const ST, bool dereferencePtr) {
-  size_t size = 0;
-  if (!ST) {
-    return size;
-  }
-  Type *curType;
-  StructType::element_iterator eib;
-  StructType::element_iterator eie;
-  for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
-    curType = *eib;
-    size += getTypeSize(curType, dereferencePtr);
-  }
-  return size;
-}
-
-size_t getTypeSize(IntegerType * const IT, bool dereferencePtr) {
-  return IT ? (IT->getBitWidth() >> 3) : 0;
-}
-
-size_t getTypeSize(FunctionType * const FT, bool dereferencePtr) {
-    assert(0 && "Should not be able to calculate the size of an function type");
-    return 0;
-}
-
-size_t getTypeSize(ArrayType * const AT, bool dereferencePtr) {
-  return (size_t)(AT ? (getTypeSize(AT->getElementType(),
-                                    dereferencePtr) * AT->getNumElements())
-                     : 0);
-}
-
-size_t getTypeSize(VectorType * const VT, bool dereferencePtr) {
-  return VT ? (VT->getBitWidth() >> 3) : 0;
-}
-
-size_t getTypeSize(PointerType * const PT, bool dereferencePtr) {
-  if (!PT) {
-    return 0;
-  }
-  Type *CT = PT->getElementType();
-  if (CT->getTypeID() == Type::StructTyID &&
-      PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
-    return getTypeSize(dyn_cast<StructType>(CT));
-  } else if (dereferencePtr) {
-    size_t size = 0;
-    for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
-      size += getTypeSize(PT->getContainedType(x), dereferencePtr);
-    }
-    return size;
-  } else {
-    return 4;
-  }
-}
-
-size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr) {
-  //assert(0 && "Should not be able to calculate the size of an opaque type");
-  return 4;
-}
-
-size_t getNumElements(Type * const T) {
-  size_t size = 0;
-  if (!T) {
-    return size;
-  }
-  switch (T->getTypeID()) {
-  case Type::X86_FP80TyID:
-  case Type::FP128TyID:
-  case Type::PPC_FP128TyID:
-  case Type::LabelTyID:
-    assert(0 && "These types are not supported by this backend");
-  default:
-  case Type::FloatTyID:
-  case Type::DoubleTyID:
-    size = 1;
-    break;
-  case Type::PointerTyID:
-    size = getNumElements(dyn_cast<PointerType>(T));
-    break;
-  case Type::IntegerTyID:
-    size = getNumElements(dyn_cast<IntegerType>(T));
-    break;
-  case Type::StructTyID:
-    size = getNumElements(dyn_cast<StructType>(T));
-    break;
-  case Type::ArrayTyID:
-    size = getNumElements(dyn_cast<ArrayType>(T));
-    break;
-  case Type::FunctionTyID:
-    size = getNumElements(dyn_cast<FunctionType>(T));
-    break;
-  case Type::VectorTyID:
-    size = getNumElements(dyn_cast<VectorType>(T));
-    break;
-  };
-  return size;
-}
-
-size_t getNumElements(StructType * const ST) {
-  size_t size = 0;
-  if (!ST) {
-    return size;
-  }
-  Type *curType;
-  StructType::element_iterator eib;
-  StructType::element_iterator eie;
-  for (eib = ST->element_begin(), eie = ST->element_end();
-       eib != eie; ++eib) {
-    curType = *eib;
-    size += getNumElements(curType);
-  }
-  return size;
-}
-
-size_t getNumElements(IntegerType * const IT) {
-  return (!IT) ? 0 : 1;
-}
-
-size_t getNumElements(FunctionType * const FT) {
-  assert(0 && "Should not be able to calculate the number of "
-         "elements of a function type");
-  return 0;
-}
-
-size_t getNumElements(ArrayType * const AT) {
-  return (!AT) ? 0
-               :  (size_t)(getNumElements(AT->getElementType()) *
-                           AT->getNumElements());
-}
-
-size_t getNumElements(VectorType * const VT) {
-  return (!VT) ? 0
-               : VT->getNumElements() * getNumElements(VT->getElementType());
-}
-
-size_t getNumElements(PointerType * const PT) {
-  size_t size = 0;
-  if (!PT) {
-    return size;
-  }
-  for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
-    size += getNumElements(PT->getContainedType(x));
-  }
-  return size;
-}
-
-const llvm::Value *getBasePointerValue(const llvm::Value *V)
-{
-  if (!V) {
-    return NULL;
-  }
-  const Value *ret = NULL;
-  ValueMap<const Value *, bool> ValueBitMap;
-  std::queue<const Value *, std::list<const Value *> > ValueQueue;
-  ValueQueue.push(V);
-  while (!ValueQueue.empty()) {
-    V = ValueQueue.front();
-    if (ValueBitMap.find(V) == ValueBitMap.end()) {
-      ValueBitMap[V] = true;
-      if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
-        ret = V;
-        break;
-      } else if (dyn_cast<GlobalVariable>(V)) {
-        ret = V;
-        break;
-      } else if (dyn_cast<Constant>(V)) {
-        const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
-        if (CE) {
-          ValueQueue.push(CE->getOperand(0));
-        }
-      } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
-        ret = AI;
-        break;
-      } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
-        uint32_t numOps = I->getNumOperands();
-        for (uint32_t x = 0; x < numOps; ++x) {
-          ValueQueue.push(I->getOperand(x));
-        }
-      } else {
-        // assert(0 && "Found a Value that we didn't know how to handle!");
-      }
-    }
-    ValueQueue.pop();
-  }
-  return ret;
-}
-
-const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI) {
-  const Value *moVal = NULL;
-  if (!MI->memoperands_empty()) {
-    const MachineMemOperand *memOp = (*MI->memoperands_begin());
-    moVal = memOp ? memOp->getValue() : NULL;
-    moVal = getBasePointerValue(moVal);
-  }
-  return moVal;
-}
-
-bool commaPrint(int i, llvm::raw_ostream &O) {
-  O << ":" << i;
-  return false;
-}
-
-bool isLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
-  if (strstr(GET_OPCODE_NAME(TII, MI), "LOADCONST")) {
-    return false;
-  }
-  return strstr(GET_OPCODE_NAME(TII, MI), "LOAD");
-}
-
-bool isSWSExtLoadInst(MachineInstr *MI)
-{
-switch (MI->getOpcode()) {
-    default:
-      break;
-      ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD);
-      ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD);
-      ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD);
-      ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD);
-      ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD);
-      ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD);
-      return true;
-  };
-  return false;
-}
-
-bool isExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
-  return strstr(GET_OPCODE_NAME(TII, MI), "EXTLOAD");
-}
-
-bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
-  return strstr(GET_OPCODE_NAME(TII, MI), "SEXTLOAD");
-}
-
-bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
-  return strstr(GET_OPCODE_NAME(TII, MI), "AEXTLOAD");
-}
-
-bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
-  return strstr(GET_OPCODE_NAME(TII, MI), "ZEXTLOAD");
-}
-
-bool isStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
-  return strstr(GET_OPCODE_NAME(TII, MI), "STORE");
-}
-
-bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
-  return strstr(GET_OPCODE_NAME(TII, MI), "TRUNCSTORE");
-}
-
-bool isAtomicInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
-  return strstr(GET_OPCODE_NAME(TII, MI), "ATOM");
-}
-
-bool isVolatileInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) {
-  if (!MI->memoperands_empty()) {
-    for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(),
-        moe = MI->memoperands_end(); mob != moe; ++mob) {
-      // If there is a volatile mem operand, this is a volatile instruction.
-      if ((*mob)->isVolatile()) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "GLOBAL");
-}
-bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "PRIVATE");
-}
-bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "CONSTANT")
-    || strstr(GET_OPCODE_NAME(TII, MI), "CPOOL");
-}
-bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "REGION");
-}
-bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "LOCAL");
-}
-bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "IMAGE");
-}
-bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "APPEND");
-}
-bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_R");
-}
-bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_L");
-}
-bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_G")
-    || isArenaAtomic(TII, MI);
-}
-bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI)
-{
-  return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_A");
-}
-
-const char* getSrcSwizzle(unsigned idx) {
-  const char *srcSwizzles[]  = {
-    "", ".x000", ".0x00", ".00x0", ".000x", ".y000", ".0y00", ".00y0", ".000y", 
-    ".z000", ".0z00", ".00z0", ".000z", ".w000", ".0w00", ".00w0", ".000w",
-    ".xy00", ".00xy", ".zw00", ".00zw", ".xyz0", ".0xyz", ".xyzw", ".0000",
-    ".xxxx", ".yyyy", ".zzzz", ".wwww", ".xyxy", ".zwzw", ".xzxz", ".ywyw",
-    ".x0y0", ".0x0y", ".xy_neg(y)", "_neg(yw)", "_neg(x)", ".xy_neg(xy)",
-    "_neg(xyzw)", ".0yzw", ".x0zw", ".xy0w", ".x", ".y", ".z", ".w", ".xy",
-    ".zw"
-  };
-  assert(idx < sizeof(srcSwizzles)/sizeof(srcSwizzles[0])
-      && "Idx passed in is invalid!");
-  return srcSwizzles[idx];
-}
-const char* getDstSwizzle(unsigned idx) {
-  const char *dstSwizzles[] = {
-    "", ".x___", ".xy__", ".xyz_", ".xyzw", "._y__", "._yz_", "._yzw", ".__z_",
-    ".__zw", ".___w", ".x_zw", ".xy_w", ".x_z_", ".x__w", "._y_w", 
-  };
-  assert(idx < sizeof(dstSwizzles)/sizeof(dstSwizzles[0])
-      && "Idx passed in is invalid!");
-  return dstSwizzles[idx];
-}
-/// Helper function to get the currently set flags
-void getAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes)
-{
-  // We need 16 bits of information, but LLVMr127097 cut the field in half.
-  // So we have to use two different fields to store all of our information.
-  uint16_t upper = MI->getFlags() << 8;
-  uint16_t lower = MI->getAsmPrinterFlags();
-  curRes.u16all = upper | lower;
-}
-/// Helper function to clear the currently set flags and add the new flags.
-void setAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes)
-{
-  // We need 16 bits of information, but LLVMr127097 cut the field in half.
-  // So we have to use two different fields to store all of our information.
-  MI->clearAsmPrinterFlags();
-  MI->setFlags(0);
-  uint8_t lower = curRes.u16all & 0xFF;
-  uint8_t upper = (curRes.u16all >> 8) & 0xFF;
-  MI->setFlags(upper);
-  MI->setAsmPrinterFlag((llvm::MachineInstr::CommentFlag)lower);
-}
diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h

index 637c868b55ce35d118f24bb1e6e637118fa63ae5..66af706bbb376d5f0d0b3698effc3ff18c79deb0 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h
+++ b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h
@@ -7,191 +7,12 @@
  //
  //==-----------------------------------------------------------------------===//
  //
-// This file provides declarations for functions that are used across different
-// classes and provide various conversions or utility to shorten the code
+// This file provides helper macros for expanding case statements.
  //
  //===----------------------------------------------------------------------===//
  #ifndef AMDILUTILITYFUNCTIONS_H_
  #define AMDILUTILITYFUNCTIONS_H_
  
-#include "AMDIL.h"
-#include "AMDILTargetMachine.h"
-#include "llvm/ADT/SmallVector.h"
-
-// Utility functions from ID
-//
-namespace llvm {
-class TargetRegisterClass;
-class SDValue;
-class SDNode;
-class Value;
-class Type;
-class StructType;
-class IntegerType;
-class FunctionType;
-class VectorType;
-class ArrayType;
-class PointerType;
-class OpaqueType;
-class MachineInstr;
-
-}
-enum SrcSwizzles {
-  AMDIL_SRC_SWIZZLE_DEFAULT = 0,
-  AMDIL_SRC_SWIZZLE_X000,
-  AMDIL_SRC_SWIZZLE_0X00,
-  AMDIL_SRC_SWIZZLE_00X0,
-  AMDIL_SRC_SWIZZLE_000X,
-  AMDIL_SRC_SWIZZLE_Y000,
-  AMDIL_SRC_SWIZZLE_0Y00,
-  AMDIL_SRC_SWIZZLE_00Y0,
-  AMDIL_SRC_SWIZZLE_000Y,
-  AMDIL_SRC_SWIZZLE_Z000,
-  AMDIL_SRC_SWIZZLE_0Z00,
-  AMDIL_SRC_SWIZZLE_00Z0,
-  AMDIL_SRC_SWIZZLE_000Z,
-  AMDIL_SRC_SWIZZLE_W000,
-  AMDIL_SRC_SWIZZLE_0W00,
-  AMDIL_SRC_SWIZZLE_00W0,
-  AMDIL_SRC_SWIZZLE_000W,
-  AMDIL_SRC_SWIZZLE_XY00,
-  AMDIL_SRC_SWIZZLE_00XY,
-  AMDIL_SRC_SWIZZLE_ZW00,
-  AMDIL_SRC_SWIZZLE_00ZW,
-  AMDIL_SRC_SWIZZLE_XYZ0,
-  AMDIL_SRC_SWIZZLE_0XYZ,
-  AMDIL_SRC_SWIZZLE_XYZW,
-  AMDIL_SRC_SWIZZLE_0000,
-  AMDIL_SRC_SWIZZLE_XXXX,
-  AMDIL_SRC_SWIZZLE_YYYY,
-  AMDIL_SRC_SWIZZLE_ZZZZ,
-  AMDIL_SRC_SWIZZLE_WWWW,
-  AMDIL_SRC_SWIZZLE_XYXY,
-  AMDIL_SRC_SWIZZLE_ZWZW,
-  AMDIL_SRC_SWIZZLE_XZXZ,
-  AMDIL_SRC_SWIZZLE_YWYW,
-  AMDIL_SRC_SWIZZLE_X0Y0,
-  AMDIL_SRC_SWIZZLE_0X0Y,
-  AMDIL_SRC_SWIZZLE_XY_NEGY,
-  AMDIL_SRC_SWIZZLE_NEGYW,
-  AMDIL_SRC_SWIZZLE_NEGX,
-  AMDIL_SRC_SWIZZLE_XY_NEGXY,
-  AMDIL_SRC_SWIZZLE_NEG_XYZW,
-  AMDIL_SRC_SWIZZLE_0YZW,
-  AMDIL_SRC_SWIZZLE_X0ZW,
-  AMDIL_SRC_SWIZZLE_XY0W,
-  AMDIL_SRC_SWIZZLE_X,
-  AMDIL_SRC_SWIZZLE_Y,
-  AMDIL_SRC_SWIZZLE_Z,
-  AMDIL_SRC_SWIZZLE_W,
-  AMDIL_SRC_SWIZZLE_XY,
-  AMDIL_SRC_SWIZZLE_ZW,
-  AMDIL_SRC_SWIZZLE_LAST
-};
-enum DstSwizzles {
-  AMDIL_DST_SWIZZLE_DEFAULT = 0,
-  AMDIL_DST_SWIZZLE_X___,
-  AMDIL_DST_SWIZZLE_XY__,
-  AMDIL_DST_SWIZZLE_XYZ_,
-  AMDIL_DST_SWIZZLE_XYZW,
-  AMDIL_DST_SWIZZLE__Y__,
-  AMDIL_DST_SWIZZLE__YZ_,
-  AMDIL_DST_SWIZZLE__YZW,
-  AMDIL_DST_SWIZZLE___Z_,
-  AMDIL_DST_SWIZZLE___ZW,
-  AMDIL_DST_SWIZZLE____W,
-  AMDIL_DST_SWIZZLE_X_ZW,
-  AMDIL_DST_SWIZZLE_XY_W,
-  AMDIL_DST_SWIZZLE_X_Z_,
-  AMDIL_DST_SWIZZLE_X__W,
-  AMDIL_DST_SWIZZLE__Y_W,
-  AMDIL_DST_SWIZZLE_LAST
-};
-// Function to get the correct src swizzle string from ID
-const char *getSrcSwizzle(unsigned);
-
-// Function to get the correct dst swizzle string from ID
-const char *getDstSwizzle(unsigned);
-
-const llvm::TargetRegisterClass *getRegClassFromID(unsigned int ID);
-
-unsigned int getMoveInstFromID(unsigned int ID);
-unsigned int getPHIMoveInstFromID(unsigned int ID);
-
-// Utility functions from Type.
-const llvm::TargetRegisterClass *getRegClassFromType(unsigned int type);
-unsigned int getTargetIndependentMoveFromType(unsigned int type);
-
-// Debug functions for SDNode and SDValue.
-void printSDValue(const llvm::SDValue &Op, int level);
-void printSDNode(const llvm::SDNode *N);
-
-// Functions to check if an opcode is a specific type.
-bool isMove(unsigned int opcode);
-bool isPHIMove(unsigned int opcode);
-bool isMoveOrEquivalent(unsigned int opcode);
-
-// Function to check address space
-bool check_type(const llvm::Value *ptr, unsigned int addrspace);
-
-// Group of functions that recursively calculate the size of a structure based
-// on it's sub-types.
-size_t getTypeSize(llvm::Type * const T, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::StructType * const ST, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::IntegerType * const IT, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::FunctionType * const FT, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::ArrayType * const AT, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::VectorType * const VT, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::PointerType * const PT, bool dereferencePtr = false);
-size_t
-getTypeSize(llvm::OpaqueType * const OT, bool dereferencePtr = false);
-
-// Group of functions that recursively calculate the number of elements of a
-// structure based on it's sub-types.
-size_t getNumElements(llvm::Type * const T);
-size_t getNumElements(llvm::StructType * const ST);
-size_t getNumElements(llvm::IntegerType * const IT);
-size_t getNumElements(llvm::FunctionType * const FT);
-size_t getNumElements(llvm::ArrayType * const AT);
-size_t getNumElements(llvm::VectorType * const VT);
-size_t getNumElements(llvm::PointerType * const PT);
-size_t getNumElements(llvm::OpaqueType * const OT);
-const llvm::Value *getBasePointerValue(const llvm::Value *V);
-const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI);
-
-
-int64_t GET_SCALAR_SIZE(llvm::Type* A);
-
-// Helper functions that check the opcode for status information
-bool isLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isSWSExtLoadInst(llvm::MachineInstr *MI);
-bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isAtomicInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isVolatileInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI);
-
-
  // Macros that are used to help with switch statements for various data types
  // However, these macro's do not return anything unlike the second set below.
  #define ExpandCaseTo32bitIntTypes(Instr)  \
@@ -354,9 +175,4 @@ case Instr##_v4f32: \
  case Instr##_v2i64: \
  case Instr##_v2f64:
  
-bool commaPrint(int i, llvm::raw_ostream &O);
-/// Helper function to get the currently get/set flags.
-void getAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes);
-void setAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes);
-
  #endif // AMDILUTILITYFUNCTIONS_H_
diff --git a/src/gallium/drivers/radeon/AMDILVersion.td b/src/gallium/drivers/radeon/AMDILVersion.td

index b8b02608d3b828b2c2e97a12fec00f608692b3f0..d863b068131161a81bc211f4a1561871da7f5b58 100644 (file)
--- a/src/gallium/drivers/radeon/AMDILVersion.td
+++ b/src/gallium/drivers/radeon/AMDILVersion.td
@@ -1,4 +1,4 @@
-//===-- AMDILVersion.td - TODO: Add brief description -------===//
+//===-- AMDILVersion.td - Barrier Instruction/Intrinsic definitions------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
diff --git a/src/gallium/drivers/radeon/Makefile b/src/gallium/drivers/radeon/Makefile

index 807dc781c7cc13ebcdd4c9fb7564bb1025ade5c5..cc409645a6eabc822d09af888584b93d5a942114 100644 (file)
--- a/src/gallium/drivers/radeon/Makefile
+++ b/src/gallium/drivers/radeon/Makefile
@@ -18,6 +18,8 @@ CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS))
  
  tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3
  
+HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td)
+
  gen: $(GENERATED_SOURCES)
  
  SIRegisterInfo.td: SIGenRegisterInfo.pl
@@ -26,9 +28,13 @@ SIRegisterInfo.td: SIGenRegisterInfo.pl
  SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl
         $(PERL) $^ $@ > /dev/null
  
-R600ShaderPatterns.td: AMDGPUGenShaderPatterns.pl
-       $(PERL) $^ C > $@
-       
+R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td
+ifeq ($(HAVE_LLVM_INTRINSICS),)
+       cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td
+else
+       cp R600IntrinsicsOpenCL.td R600Intrinsics.td
+endif
+
  R600RegisterInfo.td: R600GenRegisterInfo.pl
         $(PERL) $^ > $@
  
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources

index 7d2932b4dbd90bcfda54bed2eca79bf5c42b4bff..6dc62320f406017a0ec35dab759080e7dd9c3c11 100644 (file)
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -1,6 +1,6 @@
  
  GENERATED_SOURCES := \
-       R600ShaderPatterns.td           \
+       R600Intrinsics.td               \
         R600RegisterInfo.td             \
         AMDGPUInstrEnums.td             \
         SIRegisterInfo.td               \
@@ -29,20 +29,16 @@ CPP_SOURCES := \
         AMDILISelDAGToDAG.cpp           \
         AMDILISelLowering.cpp           \
         AMDILMachinePeephole.cpp        \
-       AMDILMCCodeEmitter.cpp          \
         AMDILNIDevice.cpp               \
         AMDILPeepholeOptimizer.cpp      \
         AMDILRegisterInfo.cpp           \
         AMDILSIDevice.cpp               \
         AMDILSubtarget.cpp              \
         AMDILTargetMachine.cpp          \
-       AMDILUtilityFunctions.cpp       \
         AMDGPUTargetMachine.cpp         \
         AMDGPUISelLowering.cpp          \
         AMDGPUConvertToISA.cpp          \
         AMDGPULowerInstructions.cpp             \
-       AMDGPULowerShaderInstructions.cpp       \
-       AMDGPUReorderPreloadInstructions.cpp    \
         AMDGPUInstrInfo.cpp             \
         AMDGPURegisterInfo.cpp          \
         AMDGPUUtil.cpp                  \
@@ -51,13 +47,12 @@ CPP_SOURCES := \
         R600InstrInfo.cpp               \
         R600KernelParameters.cpp        \
         R600LowerInstructions.cpp       \
-       R600LowerShaderInstructions.cpp \
+       R600MachineFunctionInfo.cpp     \
         R600RegisterInfo.cpp            \
         SIAssignInterpRegs.cpp          \
         SICodeEmitter.cpp               \
         SIInstrInfo.cpp                 \
         SIISelLowering.cpp              \
-       SILowerShaderInstructions.cpp   \
         SIMachineFunctionInfo.cpp       \
         SIPropagateImmReads.cpp         \
         SIRegisterInfo.cpp              \
diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp

index 8faf0deb8c50e673971c5229e9efc0c7cf1c7816..421562255f6a09603c9cb2f1e6ad4c4cc5e6da1f 100644 (file)
--- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- R600CodeEmitter.cpp - TODO: Add brief description -------===//
+//===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,12 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This code emitters outputs bytecode that is understood by the r600g driver
+// in the Mesa [1] project.  The bytecode is very similar to the hardware's ISA,
+// except that the size of the instruction fields are rounded up to the
+// nearest byte.
+//
+// [1] http://www.mesa3d.org/
  //
  //===----------------------------------------------------------------------===//
  
@@ -44,8 +49,9 @@ namespace {
    const R600RegisterInfo * TRI;
    bool evergreenEncoding;
  
+  bool isCube;
    bool isReduction;
-  unsigned reductionElement;
+  unsigned currentElement;
    bool isLast;
  
    unsigned section_start;
@@ -53,7 +59,7 @@ namespace {
    public:
  
    R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
-      _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false),
+      _OS(OS), TM(NULL), evergreenEncoding(false), isCube(false), isReduction(false),
        isLast(true) { }
  
    const char *getPassName() const { return "AMDGPU Machine Code Emitter"; }
@@ -65,7 +71,7 @@ namespace {
    private:
  
    void emitALUInstr(MachineInstr  &MI);
-  void emitSrc(const MachineOperand & MO);
+  void emitSrc(const MachineOperand & MO, int chan_override  = -1);
    void emitDst(const MachineOperand & MO);
    void emitALU(MachineInstr &MI, unsigned numSrc);
    void emitTexInstr(MachineInstr &MI);
@@ -155,10 +161,8 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
    } else {
      evergreenEncoding = true;
    }
-  const AMDGPUTargetMachine *amdtm =
-    static_cast<const AMDGPUTargetMachine *>(&MF.getTarget());
  
-  if (amdtm->shouldDumpCode()) {
+  if (STM.dumpCode()) {
      MF.dump();
    }
  
@@ -171,18 +175,26 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
            if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) {
              continue;
            }
-          if (isTexOp(MI.getOpcode())) {
+          if (AMDGPU::isTexOp(MI.getOpcode())) {
              emitTexInstr(MI);
-          } else if (isFCOp(MI.getOpcode())){
+          } else if (AMDGPU::isFCOp(MI.getOpcode())){
              emitFCInstr(MI);
-          } else if (isReductionOp(MI.getOpcode())) {
+          } else if (AMDGPU::isReductionOp(MI.getOpcode())) {
              isReduction = true;
              isLast = false;
-            for (reductionElement = 0; reductionElement < 4; reductionElement++) {
-              isLast = (reductionElement == 3);
+            for (currentElement = 0; currentElement < 4; currentElement++) {
+              isLast = (currentElement == 3);
                emitALUInstr(MI);
              }
              isReduction = false;
+          } else if (AMDGPU::isCubeOp(MI.getOpcode())) {
+              isCube = true;
+              isLast = false;
+              for (currentElement = 0; currentElement < 4; currentElement++) {
+                isLast = (currentElement == 3);
+                emitALUInstr(MI);
+              }
+              isCube = false;
            } else if (MI.getOpcode() == AMDIL::RETURN ||
                       MI.getOpcode() == AMDIL::BUNDLE ||
                       MI.getOpcode() == AMDIL::KILL) {
@@ -191,12 +203,7 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) {
              switch(MI.getOpcode()) {
              case AMDIL::RAT_WRITE_CACHELESS_eg:
                {
-                /* XXX: Support for autoencoding 64-bit instructions was added
-                 * in LLVM 3.1.  Until we drop support for 3.0, we will use Magic
-                 * numbers for the high bits. */
-                  uint64_t high = 0x95c0100000000000;
                    uint64_t inst = getBinaryCodeForInstr(MI);
-                  inst |= high;
                  /* Set End Of Program bit */
                  /* XXX: Need better check of end of program.  EOP should be
                   * encoded in one of the operands of the MI, and it should be
@@ -286,7 +293,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
  
     /* Some instructions are just place holder instructions that represent
      * operations that the GPU does automatically.  They should be ignored. */
-  if (isPlaceHolderOpcode(MI.getOpcode())) {
+  if (AMDGPU::isPlaceHolderOpcode(MI.getOpcode())) {
      return;
    }
  
@@ -309,18 +316,25 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
    /* Emit instruction type */
    emitByte(0);
  
-  unsigned int opIndex;
-  for (opIndex = 1; opIndex < numOperands; opIndex++) {
-    /* Literal constants are always stored as the last operand. */
-    if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
-      break;
+  if (isCube) {
+    static const int cube_src_swz[] = {2, 2, 0, 1};
+    emitSrc(MI.getOperand(1), cube_src_swz[currentElement]);
+    emitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]);
+    emitNullBytes(SRC_BYTE_COUNT);
+  } else {
+    unsigned int opIndex;
+    for (opIndex = 1; opIndex < numOperands; opIndex++) {
+      /* Literal constants are always stored as the last operand. */
+      if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) {
+        break;
+      }
+      emitSrc(MI.getOperand(opIndex));
      }
-    emitSrc(MI.getOperand(opIndex));
-  }
  
      /* Emit zeros for unused sources */
-  for ( ; opIndex < 4; opIndex++) {
-    emitNullBytes(SRC_BYTE_COUNT);
+    for ( ; opIndex < 4; opIndex++) {
+      emitNullBytes(SRC_BYTE_COUNT);
+    }
    }
  
    emitDst(dstOp);
@@ -328,7 +342,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI)
    emitALU(MI, numOperands - 1);
  }
  
-void R600CodeEmitter::emitSrc(const MachineOperand & MO)
+void R600CodeEmitter::emitSrc(const MachineOperand & MO, int chan_override /* = -1 */)
  {
    uint32_t value = 0;
    /* Emit the source select (2 bytes).  For GPRs, this is the register index.
@@ -354,8 +368,10 @@ void R600CodeEmitter::emitSrc(const MachineOperand & MO)
    }
  
    /* Emit the source channel (1 byte) */
-  if (isReduction) {
-    emitByte(reductionElement);
+  if (chan_override != -1) {
+    emitByte(chan_override);
+  } else if (isReduction) {
+    emitByte(currentElement);
    } else if (MO.isReg()) {
      emitByte(TRI->getHWRegChan(MO.getReg()));
    } else {
@@ -397,8 +413,8 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO)
      emitByte(getHWReg(MO.getReg()));
  
      /* Emit the element of the destination register (1 byte)*/
-    if (isReduction) {
-      emitByte(reductionElement);
+    if (isReduction || isCube) {
+      emitByte(currentElement);
      } else {
        emitByte(TRI->getHWRegChan(MO.getReg()));
      }
@@ -411,7 +427,7 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO)
      }
  
      /* Emit writemask (1 byte).  */
-    if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg()))
+    if ((isReduction && currentElement != TRI->getHWRegChan(MO.getReg()))
           || MO.getTargetFlags() & MO_FLAG_MASK) {
        emitByte(0);
      } else {
@@ -570,6 +586,7 @@ void R600CodeEmitter::emitFCInstr(MachineInstr &MI)
    case AMDIL::BREAK_LOGICALZ_f32:
      instr = FC_BREAK;
      break;
+  case AMDIL::BREAK_LOGICALNZ_f32:
    case AMDIL::BREAK_LOGICALNZ_i32:
      instr = FC_BREAK_NZ_INT;
      break;
@@ -577,6 +594,7 @@ void R600CodeEmitter::emitFCInstr(MachineInstr &MI)
      instr = FC_BREAK_Z_INT;
      break;
    case AMDIL::CONTINUE_LOGICALNZ_f32:
+  case AMDIL::CONTINUE_LOGICALNZ_i32:
      instr = FC_CONTINUE;
      break;
    /* XXX: This assumes that all IFs will be if (x != 0).  If we add
@@ -706,44 +724,5 @@ RegElement maskBitToElement(unsigned int maskBit)
    }
  }
  
-unsigned int dstSwizzleToWriteMask(unsigned swizzle)
-{
-  switch(swizzle) {
-  default:
-  case AMDIL_DST_SWIZZLE_DEFAULT:
-    return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
-  case AMDIL_DST_SWIZZLE_X___:
-    return WRITE_MASK_X;
-  case AMDIL_DST_SWIZZLE_XY__:
-    return WRITE_MASK_X | WRITE_MASK_Y;
-  case AMDIL_DST_SWIZZLE_XYZ_:
-    return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z;
-  case AMDIL_DST_SWIZZLE_XYZW:
-    return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
-  case AMDIL_DST_SWIZZLE__Y__:
-    return WRITE_MASK_Y;
-  case AMDIL_DST_SWIZZLE__YZ_:
-    return WRITE_MASK_Y | WRITE_MASK_Z;
-  case AMDIL_DST_SWIZZLE__YZW:
-    return WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W;
-  case AMDIL_DST_SWIZZLE___Z_:
-    return WRITE_MASK_Z;
-  case AMDIL_DST_SWIZZLE___ZW:
-    return WRITE_MASK_Z | WRITE_MASK_W;
-  case AMDIL_DST_SWIZZLE____W:
-    return WRITE_MASK_W;
-  case AMDIL_DST_SWIZZLE_X_ZW:
-    return WRITE_MASK_X | WRITE_MASK_Z | WRITE_MASK_W;
-  case AMDIL_DST_SWIZZLE_XY_W:
-    return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_W;
-  case AMDIL_DST_SWIZZLE_X_Z_:
-    return WRITE_MASK_X | WRITE_MASK_Z;
-  case AMDIL_DST_SWIZZLE_X__W:
-    return WRITE_MASK_X | WRITE_MASK_W;
-  case AMDIL_DST_SWIZZLE__Y_W:
-    return WRITE_MASK_Y | WRITE_MASK_W;
-  }
-}
-
  #include "AMDILGenCodeEmitter.inc"
  
diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl

index cbded11576639afd53dd011f78c14d3fee3a5b9f..406f3dfdd39307d82e8edc434594c0c5d653236c 100644 (file)
--- a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
+++ b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl
@@ -1,20 +1,23 @@
-#===-- R600GenRegisterInfo.pl - TODO: Add brief description -------===#
+#===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
  #
  #                     The LLVM Compiler Infrastructure
  #
  # This file is distributed under the University of Illinois Open Source
  # License. See LICENSE.TXT for details.
  #
-#===----------------------------------------------------------------------===#
+#===------------------------------------------------------------------------===#
  #
-# TODO: Add full description
+# This perl script prints to stdout .td code to be used as R600RegisterInfo.td
+# it also generates a file called R600HwRegInfo.include, which contains helper
+# functions for determining the hw encoding of registers.
  #
-#===----------------------------------------------------------------------===#
+#===------------------------------------------------------------------------===#
  
  use strict;
  use warnings;
  
-use AMDGPUConstants;
+use constant CONST_REG_COUNT => 256;
+use constant TEMP_REG_COUNT => 128;
  
  my $CREG_MAX = CONST_REG_COUNT - 1;
  my $TREG_MAX = TEMP_REG_COUNT - 1;
@@ -81,7 +84,7 @@ def R600_Reg32 : RegisterClass <"AMDIL", [f32, i32], 32, (add
      R600_CReg32,
      ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
  
-def R600_Reg128 : RegisterClass<"AMDIL", [v4f32], 128, (add
+def R600_Reg128 : RegisterClass<"AMDIL", [v4f32, v4i32], 128, (add
      $t128_string)>
  {
    let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)];
@@ -170,3 +173,24 @@ sub print_reg_defs {
    return @reg_list;
  }
  
+#Helper functions
+sub get_hw_index {
+  my ($index) = @_;
+  return int($index / 4);
+}
+
+sub get_chan_str {
+  my ($index) = @_;
+  my $chan = $index % 4;
+  if ($chan == 0 )  {
+    return 'X';
+  } elsif ($chan == 1) {
+    return 'Y';
+  } elsif ($chan == 2) {
+    return 'Z';
+  } elsif ($chan == 3) {
+    return 'W';
+  } else {
+    die("Unknown chan value: $chan");
+  }
+}
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp

index f92fe2641a5f11a24e94779e68d0c1f7bfdacb50..e85ac31b34c59681f9b5b1ac11d11824f3c8ebdc 100644 (file)
--- a/src/gallium/drivers/radeon/R600ISelLowering.cpp
+++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- R600ISelLowering.cpp - TODO: Add brief description -------===//
+//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,12 +7,14 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// Most of the DAG lowering is handled in AMDILISelLowering.cpp.  This file
+// is mostly EmitInstrWithCustomInserter().
  //
  //===----------------------------------------------------------------------===//
  
  #include "R600ISelLowering.h"
  #include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  
  using namespace llvm;
@@ -25,9 +27,13 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
  //  setSchedulingPreference(Sched::VLIW);
    addRegisterClass(MVT::v4f32, &AMDIL::R600_Reg128RegClass);
    addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass);
+  addRegisterClass(MVT::v4i32, &AMDIL::R600_Reg128RegClass);
+  addRegisterClass(MVT::i32, &AMDIL::R600_Reg32RegClass);
  
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
  }
  
  MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
@@ -35,10 +41,10 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
  {
    MachineFunction * MF = BB->getParent();
    MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineBasicBlock::iterator I = *MI;
  
    switch (MI->getOpcode()) {
    default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
-  /* XXX: Use helper function from AMDGPULowerShaderInstructions here */
    case AMDIL::TGID_X:
      addLiveIn(MI, MF, MRI, TII, AMDIL::T1_X);
      break;
@@ -84,7 +90,49 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
    case AMDIL::LOCAL_SIZE_Z:
      lowerImplicitParameter(MI, *BB, MRI, 8);
      break;
+
+  case AMDIL::R600_LOAD_CONST:
+    {
+      int64_t RegIndex = MI->getOperand(1).getImm();
+      unsigned ConstantReg = AMDIL::R600_CReg32RegClass.getRegister(RegIndex);
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::COPY))
+                  .addOperand(MI->getOperand(0))
+                  .addReg(ConstantReg);
+      break;
+    }
+
+  case AMDIL::LOAD_INPUT:
+    {
+      int64_t RegIndex = MI->getOperand(1).getImm();
+      addLiveIn(MI, MF, MRI, TII,
+                AMDIL::R600_TReg32RegClass.getRegister(RegIndex));
+      break;
+    }
+  case AMDIL::STORE_OUTPUT:
+    {
+      int64_t OutputIndex = MI->getOperand(1).getImm();
+      unsigned OutputReg = AMDIL::R600_TReg32RegClass.getRegister(OutputIndex);
+
+      BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::COPY), OutputReg)
+                  .addOperand(MI->getOperand(0));
+
+      if (!MRI.isLiveOut(OutputReg)) {
+        MRI.addLiveOut(OutputReg);
+      }
+      break;
+    }
+
+  case AMDIL::RESERVE_REG:
+    {
+      R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
+      int64_t ReservedIndex = MI->getOperand(0).getImm();
+      unsigned ReservedReg =
+                          AMDIL::R600_TReg32RegClass.getRegister(ReservedIndex);
+      MFI->ReservedRegs.push_back(ReservedReg);
+      break;
+    }
    }
+
    MI->eraseFromParent();
    return BB;
  }
diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h

index fd26bf538c4448dfa35b06e01d7fa2be4cbdf969..fdd552a172d6254ba46239c5a961ac702cdb6af5 100644 (file)
--- a/src/gallium/drivers/radeon/R600ISelLowering.h
+++ b/src/gallium/drivers/radeon/R600ISelLowering.h
@@ -1,4 +1,4 @@
-//===-- R600ISelLowering.h - TODO: Add brief description -------===//
+//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// R600 DAG Lowering interface definition
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/R600InstrFormats.td b/src/gallium/drivers/radeon/R600InstrFormats.td

deleted file mode 100644 (file)

index 0890eb6..0000000
--- a/src/gallium/drivers/radeon/R600InstrFormats.td
+++ /dev/null
@@ -1,16 +0,0 @@
-//===-- R600InstrFormats.td - TODO: Add brief description -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-
-class ALUInst <bits<10> op, dag outs, dag ins, string asm, list<dag> pattern>
-  : InstR600 <, outs, ins , asm, pattern>
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp

index 0c7ffc4334dac06660f0d209519a97089f1646d2..2bd59fd5e1bcd5d7817fbf1ec70b33d0da637b90 100644 (file)
--- a/src/gallium/drivers/radeon/R600InstrInfo.cpp
+++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- R600InstrInfo.cpp - TODO: Add brief description -------===//
+//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// R600 Implementation of TargetInstrInfo.
  //
  //===----------------------------------------------------------------------===//
  
@@ -73,10 +73,22 @@ unsigned R600InstrInfo::getISAOpcode(unsigned opcode) const
      case AMDIL::MOVE_i32:
        return AMDIL::MOV;
      case AMDIL::SHR_i32:
+      return getASHRop();
+    case AMDIL::USHR_i32:
        return getLSHRop();
    }
  }
  
+unsigned R600InstrInfo::getASHRop() const
+{
+       unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration();
+       if (gen < AMDILDeviceInfo::HD5XXX) {
+               return AMDIL::ASHR_r600;
+       } else {
+               return AMDIL::ASHR_eg;
+       }
+}
+
  unsigned R600InstrInfo::getLSHRop() const
  {
    unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration();
diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h

index aedaa9f47f3cc36152a36b256cbbc54ddd967ec3..014eeb0b9f7e89764d7786fab38d5d02d884ab93 100644 (file)
--- a/src/gallium/drivers/radeon/R600InstrInfo.h
+++ b/src/gallium/drivers/radeon/R600InstrInfo.h
@@ -1,4 +1,4 @@
-//===-- R600InstrInfo.h - TODO: Add brief description -------===//
+//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// Interface definition for R600InstrInfo
  //
  //===----------------------------------------------------------------------===//
  
@@ -52,6 +52,7 @@ namespace llvm {
    bool isTrig(const MachineInstr &MI) const;
  
    unsigned getLSHRop() const;
+  unsigned getASHRop() const;
    unsigned getMULHI_UINT() const;
    unsigned getMULLO_UINT() const;
    unsigned getRECIP_UINT() const;
diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td

index 02043fdeea5de632f5948fd629da0853f7b87549..a18240f09bd6b7b4e30f53a1913c566a677b313b 100644 (file)
--- a/src/gallium/drivers/radeon/R600Instructions.td
+++ b/src/gallium/drivers/radeon/R600Instructions.td
@@ -1,4 +1,4 @@
-//===-- R600Instructions.td - TODO: Add brief description -------===//
+//===-- R600Instructions.td - R600 Instruction defs  -------*- tablegen -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// R600 Tablegen instruction definitions
  //
  //===----------------------------------------------------------------------===//
  
@@ -84,7 +84,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
    InstR600 <inst,
            (outs R600_Reg32:$dst),
            (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, variable_ops),
-          !strconcat(opName, "$dst $src0, $src1, $src2"),
+          !strconcat(opName, " $dst, $src0, $src1, $src2"),
            pattern,
            itin>{
  
@@ -92,7 +92,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern,
    }
  
  class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern,
-                      InstrItinClass itin = AnyALU> :
+                      InstrItinClass itin = VecALU> :
    InstR600 <inst,
            (outs R600_Reg32:$dst),
            ins,
@@ -152,8 +152,6 @@ class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, dag outs, dag ins,
    let Inst{31-30} = ELEM_SIZE;
  
    /* CF_ALLOC_EXPORT_WORD1_BUF */
-/* XXX: We can't have auto encoding of 64-bit instructions until LLVM 3.1 :( */
-/*
    let Inst{43-32} = ARRAY_SIZE;
    let Inst{47-44} = COMP_MASK;
    let Inst{51-48} = BURST_COUNT;
@@ -162,7 +160,6 @@ class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, dag outs, dag ins,
    let Inst{61-54} = cf_inst;
    let Inst{62}    = MARK;
    let Inst{63}    = BARRIER;
-*/
  }
  
  /*
@@ -311,6 +308,18 @@ def TRUNC : R600_1OP <
    [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))]
  >;
  
+def CEIL : R600_1OP <
+  0x12, "CEIL",
+  [(set R600_Reg32:$dst, (int_AMDIL_round_neginf R600_Reg32:$src))]> {
+  let AMDILOp = AMDILInst.ROUND_NEGINF_f32;
+}
+
+def RNDNE : R600_1OP <
+  0x13, "RNDNE",
+  [(set R600_Reg32:$dst, (int_AMDIL_round_nearest R600_Reg32:$src))]> {
+  let AMDILOp = AMDILInst.ROUND_NEAREST_f32;
+}
+
  def FLOOR : R600_1OP <
    0x14, "FLOOR",
    [(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))]
@@ -329,64 +338,114 @@ def AND_INT : R600_2OP <
    let AMDILOp = AMDILInst.AND_i32;
  }
  
+def OR_INT : R600_2OP <
+  0x31, "OR_INT",
+  []>{
+  let AMDILOp = AMDILInst.BINARY_OR_i32;
+}
+
  def XOR_INT : R600_2OP <
    0x32, "XOR_INT",
    []
  >;
  
+def NOT_INT : R600_1OP <
+  0x33, "NOT_INT",
+  []>{
+  let AMDILOp = AMDILInst.BINARY_NOT_i32;
+}
+
  def ADD_INT : R600_2OP <
-  0x34, "ADD_INT $dst, $src0, $src1",
+  0x34, "ADD_INT",
    []>{
    let AMDILOp = AMDILInst.ADD_i32;
  }
  
  def SUB_INT : R600_2OP <
-       0x35, "SUB_INT $dst, $src0, $src1",
+       0x35, "SUB_INT",
         []
  >;
  
+def MAX_INT : R600_2OP <
+  0x36, "MAX_INT",
+  [(set R600_Reg32:$dst, (int_AMDGPU_imax R600_Reg32:$src0, R600_Reg32:$src1))]>;
+
+def MIN_INT : R600_2OP <
+  0x37, "MIN_INT",
+  [(set R600_Reg32:$dst, (int_AMDGPU_imin R600_Reg32:$src0, R600_Reg32:$src1))]>;
+
+def MAX_UINT : R600_2OP <
+  0x38, "MAX_UINT",
+  [(set R600_Reg32:$dst, (int_AMDGPU_umax R600_Reg32:$src0, R600_Reg32:$src1))]>;
+
+def MIN_UINT : R600_2OP <
+  0x39, "MIN_UINT",
+  [(set R600_Reg32:$dst, (int_AMDGPU_umin R600_Reg32:$src0, R600_Reg32:$src1))]>;
+
+
  def SETE_INT : R600_2OP <
-  0x3A, "SETE_INT $dst, $src0, $src1",
+  0x3A, "SETE_INT",
    []>{
    let AMDILOp = AMDILInst.IEQ;
  }
  
  def SETGT_INT : R600_2OP <
-  0x3B, "SGT_INT $dst, $src0, $src1",
+  0x3B, "SGT_INT",
    []
  >;
  
  def SETGE_INT : R600_2OP <
-       0x3C, "SETGE_INT $dst, $src0, $src1",
+       0x3C, "SETGE_INT",
         []>{
    let AMDILOp = AMDILInst.IGE;
  }
  
  def SETNE_INT : R600_2OP <
-  0x3D, "SETNE_INT $dst, $src0, $src1",
+  0x3D, "SETNE_INT",
    []>{
    let AMDILOp = AMDILInst.INE;
  }
  
  def SETGT_UINT : R600_2OP <
-  0x3E, "SETGT_UINT $dst, $src0, $src1",
+  0x3E, "SETGT_UINT",
    []>{
    let AMDILOp = AMDILInst.UGT;
  }
  
  def SETGE_UINT : R600_2OP <
-  0x3F, "SETGE_UINT $dst, $src0, $src1",
+  0x3F, "SETGE_UINT",
    []>{
    let AMDILOp = AMDILInst.UGE;
  }
  
  def CNDE_INT : R600_3OP <
-       0x1C, "CNDE_INT $dst, $src0, $src1, $src2",
+       0x1C, "CNDE_INT",
         []
  >;
  
  /* Texture instructions */
  
+
+def TEX_LD : R600_TEX <
+  0x03, "TEX_LD",
+  [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_GET_TEXTURE_RESINFO : R600_TEX <
+  0x04, "TEX_GET_TEXTURE_RESINFO",
+  [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_GET_GRADIENTS_H : R600_TEX <
+  0x07, "TEX_GET_GRADIENTS_H",
+  [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
+def TEX_GET_GRADIENTS_V : R600_TEX <
+  0x08, "TEX_GET_GRADIENTS_V",
+  [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))]
+>;
+
  def TEX_SAMPLE : R600_TEX <
    0x10, "TEX_SAMPLE",
    [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))]
@@ -434,6 +493,11 @@ def KILP : Pat <
    (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO)))
  >;
  
+def KIL : Pat <
+  (int_AMDGPU_kill R600_Reg32:$src0),
+  (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
+>;
+
  /* Helper classes for common instructions */
  
  class MUL_LIT_Common <bits<32> inst> : R600_3OP <
@@ -470,6 +534,15 @@ class DOT4_Common <bits<32> inst> : R600_REDUCTION <
    [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
  >;
  
+class CUBE_Common <bits<32> inst> : InstR600 <
+  inst,
+  (outs R600_Reg128:$dst),
+  (ins R600_Reg128:$src),
+  "CUBE $dst $src",
+  [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+  VecALU
+>;
+
  class EXP_IEEE_Common <bits<32> inst> : R600_1OP <
    inst, "EXP_IEEE",
    []> {
@@ -509,6 +582,12 @@ class LSHR_Common <bits<32> inst> : R600_2OP <
    let AMDILOp = AMDILInst.USHR_i32;
  }
  
+class ASHR_Common <bits<32> inst> : R600_2OP <
+  inst, "ASHR $dst, $src0, $src1",
+  [] >{
+  let AMDILOp = AMDILInst.SHR_i32;
+}
+
  class MULHI_INT_Common <bits<32> inst> : R600_2OP <
    inst, "MULHI_INT $dst, $src0, $src1",
    [] >{
@@ -608,6 +687,7 @@ let Gen = AMDGPUGen.R600 in {
    def CNDGT_r600 : CNDGT_Common<0x19>;
    def CNDGE_r600 : CNDGE_Common<0x1A>;
    def DOT4_r600 : DOT4_Common<0x50>;
+  def CUBE_r600 : CUBE_Common<0x52>;
    def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
    def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
    def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
@@ -619,6 +699,7 @@ let Gen = AMDGPUGen.R600 in {
    def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
    def SIN_r600 : SIN_Common<0x6E>;
    def COS_r600 : COS_Common<0x6F>;
+  def ASHR_r600 : ASHR_Common<0x70>;
    def LSHR_r600 : LSHR_Common<0x71>;
    def LSHL_r600 : LSHL_Common<0x72>;
    def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
@@ -661,20 +742,12 @@ def RAT_WRITE_CACHELESS_eg :
      EG_CF_RAT <0x57, 0x2, (outs), (ins R600_TReg32_X:$rw_gpr,
                                     R600_TReg32_X:$index_gpr, i32imm:$rat_id), "">
  {
-/*
-  let Inst{3-0}   = RAT_ID;
-  let Inst{21-15} = RW_GPR;
-  let Inst{29-23} = INDEX_GPR;
-  /* Propery of the UAV */
-  let Inst{31-30} = ELEM_SIZE;
-*/
    let RIM         = 0;
    /* XXX: Have a separate instruction for non-indexed writes. */
    let TYPE        = 1;
    let RW_REL      = 0;
    let ELEM_SIZE   = 0;
  
-/*
    let ARRAY_SIZE  = 0;
    let COMP_MASK   = 1;
    let BURST_COUNT = 0;
@@ -682,7 +755,6 @@ def RAT_WRITE_CACHELESS_eg :
    let EOP         = 0;
    let MARK        = 0;
    let BARRIER     = 1;
-*/
  }
  
  def VTX_READ_eg : InstR600ISA < (outs R600_TReg32_X:$dst),
@@ -789,6 +861,7 @@ class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat<
  let Gen = AMDGPUGen.EG_CAYMAN in {
  
    def MULADD_eg : MULADD_Common<0x14>;
+  def ASHR_eg : ASHR_Common<0x15>;
    def LSHR_eg : LSHR_Common<0x16>;
    def LSHL_eg : LSHL_Common<0x17>;
    def CNDE_eg : CNDE_Common<0x19>;
@@ -812,6 +885,7 @@ let Gen = AMDGPUGen.EG_CAYMAN in {
    def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
    def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
    def DOT4_eg : DOT4_Common<0xBE>;
+  def CUBE_eg : CUBE_Common<0xC0>;
  
  } // End AMDGPUGen.EG_CAYMAN
  
@@ -905,6 +979,34 @@ def LOCAL_SIZE_Y : R600PreloadInst <"LOCAL_SIZE_Y",
  def LOCAL_SIZE_Z : R600PreloadInst <"LOCAL_SIZE_Z",
                                      int_r600_read_local_size_z>;
  
+def R600_LOAD_CONST : AMDGPUShaderInst <
+  (outs R600_Reg32:$dst),
+  (ins i32imm:$src0),
+  "R600_LOAD_CONST $dst, $src0",
+  [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
+>;
+
+def LOAD_INPUT : AMDGPUShaderInst <
+  (outs R600_Reg32:$dst),
+  (ins i32imm:$src),
+  "LOAD_INPUT $dst, $src",
+  [(set R600_Reg32:$dst, (int_R600_load_input imm:$src))]
+>;
+
+def RESERVE_REG : AMDGPUShaderInst <
+  (outs),
+  (ins i32imm:$src),
+  "RESERVE_REG $src",
+  [(int_AMDGPU_reserve_reg imm:$src)]
+>;
+
+def STORE_OUTPUT: AMDGPUShaderInst <
+  (outs),
+  (ins R600_Reg32:$src0, i32imm:$src1),
+  "STORE_OUTPUT $src0, $src1",
+  [(int_AMDGPU_store_output R600_Reg32:$src0, imm:$src1)]
+>;
+
  } // End usesCustomInserter = 1, isPseudo = 1
  
  } // End isCodeGenOnly = 1
@@ -933,15 +1035,14 @@ def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>;
  def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>;
  def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>;
  
+def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
+def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
  
-include "R600ShaderPatterns.td"
-
-// We need this pattern to avoid having real registers in PHI nodes.
-// For some reason this pattern only works when it comes after the other
-// instruction defs.
-def : Pat <
-  (int_R600_load_input imm:$src),
-  (LOAD_INPUT imm:$src)
->;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 4, sel_x>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>;
+def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>;
  
  } // End isR600toCayman Predicate
diff --git a/src/gallium/drivers/radeon/R600Intrinsics.td b/src/gallium/drivers/radeon/R600Intrinsics.td

deleted file mode 100644 (file)

index 8038fee..0000000
--- a/src/gallium/drivers/radeon/R600Intrinsics.td
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- R600Intrinsics.td - TODO: Add brief description -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "R600", isTarget = 1 in {
-  def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadWriteArgMem]>;
-}
-
-let TargetPrefix = "r600", isTarget = 1 in {
-
-class R600ReadPreloadRegisterIntrinsic<string name>
-  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-    GCCBuiltin<name>;
-
-multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
-  def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
-  def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
-  def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
-}
-
-defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
-                                       "__builtin_r600_read_global_size">;
-defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
-                                       "__builtin_r600_read_local_size">;
-defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
-                                       "__builtin_r600_read_ngroups">;
-defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
-                                       "__builtin_r600_read_tgid">;
-defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
-                                       "__builtin_r600_read_tidig">;
-} // End TargetPrefix = "r600"
diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td

new file mode 100644 (file)

index 0000000..73ef4aa
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td
@@ -0,0 +1,40 @@
+//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600 Intrinsic Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "R600", isTarget = 1 in {
+  def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
+
+let TargetPrefix = "r600", isTarget = 1 in {
+
+class R600ReadPreloadRegisterIntrinsic<string name>
+  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+    GCCBuiltin<name>;
+
+multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
+  def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
+  def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
+  def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
+}
+
+defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_global_size">;
+defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_local_size">;
+defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_ngroups">;
+defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_tgid">;
+defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
+                                       "__builtin_r600_read_tidig">;
+} // End TargetPrefix = "r600"
diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td

new file mode 100644 (file)

index 0000000..cd76135
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td
@@ -0,0 +1,16 @@
+//===-- R600Intrinsics.td - TODO: Add brief description -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO: Add full description
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "R600", isTarget = 1 in {
+  def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
diff --git a/src/gallium/drivers/radeon/R600KernelParameters.cpp b/src/gallium/drivers/radeon/R600KernelParameters.cpp

index 3fdf48a2bf26f83fb5168db87444d65637c617cc..53bfebc73647071fe64f6c503d37842fbfd3f9f5 100644 (file)
--- a/src/gallium/drivers/radeon/R600KernelParameters.cpp
+++ b/src/gallium/drivers/radeon/R600KernelParameters.cpp
@@ -1,4 +1,4 @@
-//===-- R600KernelParameters.cpp - TODO: Add brief description -------===//
+//===-- R600KernelParameters.cpp - Lower kernel function arguments --------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,89 +7,83 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This pass lowers kernel function arguments to loads from the vertex buffer.
+//
+// Kernel arguemnts are stored in the vertex buffer at an offset of 9 dwords,
+// so arg0 needs to be loaded from VTX_BUFFER[9] and arg1 is loaded from
+// VTX_BUFFER[10], etc.
  //
  //===----------------------------------------------------------------------===//
  
-#include <llvm-c/Core.h>
-#include "R600KernelParameters.h"
-#include "R600OpenCLUtils.h"
+#include "AMDGPU.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
  #include "llvm/Constants.h"
+#include "llvm/Function.h"
  #include "llvm/Intrinsics.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetData.h"
  #include "llvm/Support/IRBuilder.h"
  #include "llvm/Support/TypeBuilder.h"
-// #include "llvm/CodeGen/Function.h"
-
-namespace AMDILAS {
-enum AddressSpaces {
-  PRIVATE_ADDRESS  = 0, // Address space for private memory.
-  GLOBAL_ADDRESS   = 1, // Address space for global memory (RAT0, VTX0).
-  CONSTANT_ADDRESS = 2, // Address space for constant memory.
-  LOCAL_ADDRESS    = 3, // Address space for local memory.
-  REGION_ADDRESS   = 4, // Address space for region memory.
-  ADDRESS_NONE     = 5, // Address space for unknown memory.
-  PARAM_D_ADDRESS  = 6, // Address space for direct addressible parameter memory (CONST0)
-  PARAM_I_ADDRESS  = 7, // Address space for indirect addressible parameter memory (VTX1)
-  LAST_ADDRESS     = 8
-};
-}
-
  
  #include <map>
  #include <set>
  
  using namespace llvm;
-using namespace std;
+
+namespace {
  
  #define CONSTANT_CACHE_SIZE_DW 127
  
-class R600KernelParameters : public llvm::FunctionPass
+class R600KernelParameters : public FunctionPass
  {
-  const llvm::TargetData * TD;
+  const TargetData * TD;
    LLVMContext* Context;
    Module *mod;
-  
+
    struct param
    {
-    param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0), indirect(false), specialID(0) {}
-    
-    llvm::Value* val;
-    llvm::Value* ptr_val;
+    param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0),
+              indirect(false), specialID(0) {}
+
+    Value* val;
+    Value* ptr_val;
      int offset_in_dw;
      int size_in_dw;
  
      bool indirect;
-    
-    string specialType;
+
+    std::string specialType;
      int specialID;
-    
+
      int end() { return offset_in_dw + size_in_dw; }
-    /* The first 9 dwords are reserved for the grid sizes. */
+    // The first 9 dwords are reserved for the grid sizes.
      int get_rat_offset() { return 9 + offset_in_dw; }
    };
  
    std::vector<param> params;
  
-  int getLastSpecialID(const string& TypeName);
-  
+  bool isOpenCLKernel(const Function* fun);
+  int getLastSpecialID(const std::string& TypeName);
+
    int getListSize();
-  void AddParam(llvm::Argument* arg);
-  int calculateArgumentSize(llvm::Argument* arg);
-  void RunAna(llvm::Function* fun);
-  void Replace(llvm::Function* fun);
-  bool isIndirect(Value* val, set<Value*>& visited);
-  void Propagate(llvm::Function* fun);
-  void Propagate(llvm::Value* v, const llvm::Twine& name, bool indirect = false);
+  void AddParam(Argument* arg);
+  int calculateArgumentSize(Argument* arg);
+  void RunAna(Function* fun);
+  void Replace(Function* fun);
+  bool isIndirect(Value* val, std::set<Value*>& visited);
+  void Propagate(Function* fun);
+  void Propagate(Value* v, const Twine& name, bool indirect = false);
    Value* ConstantRead(Function* fun, param& p);
    Value* handleSpecial(Function* fun, param& p);
    bool isSpecialType(Type*);
-  string getSpecialTypeName(Type*);
+  std::string getSpecialTypeName(Type*);
  public:
    static char ID;
    R600KernelParameters() : FunctionPass(ID) {};
-  R600KernelParameters(const llvm::TargetData* TD) : FunctionPass(ID), TD(TD) {}
-//   bool runOnFunction (llvm::Function &F);
-  bool runOnFunction (llvm::Function &F);
+  R600KernelParameters(const TargetData* TD) : FunctionPass(ID), TD(TD) {}
+  bool runOnFunction (Function &F);
    void getAnalysisUsage(AnalysisUsage &AU) const;
    const char *getPassName() const;
    bool doInitialization(Module &M);
@@ -98,13 +92,42 @@ public:
  
  char R600KernelParameters::ID = 0;
  
-static RegisterPass<R600KernelParameters> X("kerparam", "OpenCL Kernel Parameter conversion", false, false);
+static RegisterPass<R600KernelParameters> X("kerparam",
+                            "OpenCL Kernel Parameter conversion", false, false);
  
-int R600KernelParameters::getLastSpecialID(const string& TypeName)
+bool R600KernelParameters::isOpenCLKernel(const Function* fun)
+{
+  Module *mod = const_cast<Function*>(fun)->getParent();
+  NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels");
+
+  if (!md or !md->getNumOperands())
+  {
+    return false;
+  }
+
+  for (int i = 0; i < int(md->getNumOperands()); i++)
+  {
+    if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0))
+    {
+      continue;
+    }
+    
+    assert(md->getOperand(i)->getNumOperands() == 1);
+
+    if (md->getOperand(i)->getOperand(0)->getName() == fun->getName())
+    {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+int R600KernelParameters::getLastSpecialID(const std::string& TypeName)
  {
    int lastID = -1;
-  
-  for (vector<param>::iterator i = params.begin(); i != params.end(); i++)
+
+  for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++)
    {
      if (i->specialType == TypeName)
      {
@@ -125,7 +148,7 @@ int R600KernelParameters::getListSize()
    return params.back().end();
  }
  
-bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited)
+bool R600KernelParameters::isIndirect(Value* val, std::set<Value*>& visited)
  {
    if (isa<LoadInst>(val))
    {
@@ -144,7 +167,7 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited)
    }
  
    visited.insert(val);
-  
+
    if (isa<GetElementPtrInst>(val))
    {
      GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(val);
@@ -158,7 +181,7 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited)
        }
      }
    }
-  
+
    for (Value::use_iterator i = val->use_begin(); i != val->use_end(); i++)
    {
      Value* v2 = dyn_cast<Value>(*i);
@@ -175,24 +198,24 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited)
    return false;
  }
  
-void R600KernelParameters::AddParam(llvm::Argument* arg)
+void R600KernelParameters::AddParam(Argument* arg)
  {
    param p;
-  
+
    p.val = dyn_cast<Value>(arg);
    p.offset_in_dw = getListSize();
    p.size_in_dw = calculateArgumentSize(arg);
  
    if (isa<PointerType>(arg->getType()) and arg->hasByValAttr())
    {
-    set<Value*> visited;
+    std::set<Value*> visited;
      p.indirect = isIndirect(p.val, visited);
    }
-  
+
    params.push_back(p);
  }
  
-int R600KernelParameters::calculateArgumentSize(llvm::Argument* arg)
+int R600KernelParameters::calculateArgumentSize(Argument* arg)
  {
    Type* t = arg->getType();
  
@@ -200,16 +223,16 @@ int R600KernelParameters::calculateArgumentSize(llvm::Argument* arg)
    {
      t = dyn_cast<PointerType>(t)->getElementType();
    }
-  
+
    int store_size_in_dw = (TD->getTypeStoreSize(t) + 3)/4;
  
    assert(store_size_in_dw);
-  
+
    return store_size_in_dw;
  }
  
  
-void R600KernelParameters::RunAna(llvm::Function* fun)
+void R600KernelParameters::RunAna(Function* fun)
  {
    assert(isOpenCLKernel(fun));
  
@@ -220,7 +243,7 @@ void R600KernelParameters::RunAna(llvm::Function* fun)
  
  }
  
-void R600KernelParameters::Replace(llvm::Function* fun)
+void R600KernelParameters::Replace(Function* fun)
  {
    for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++)
    {
@@ -237,11 +260,11 @@ void R600KernelParameters::Replace(llvm::Function* fun)
      if (new_val)
      {
        i->val->replaceAllUsesWith(new_val);
-    }   
+    }
    }
  }
  
-void R600KernelParameters::Propagate(llvm::Function* fun)
+void R600KernelParameters::Propagate(Function* fun)
  {
    for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++)
    {
@@ -256,8 +279,8 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect)
  {
    LoadInst* load = dyn_cast<LoadInst>(v);
    GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v);
-  
-  unsigned addrspace; 
+
+  unsigned addrspace;
  
    if (indirect)
    {
@@ -274,49 +297,54 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect)
  
      if (dyn_cast<PointerType>(op->getType())->getAddressSpace() != addrspace)
      {
-      op = new BitCastInst(op, PointerType::get(dyn_cast<PointerType>(op->getType())->getElementType(), addrspace), name, dyn_cast<Instruction>(v));
+      op = new BitCastInst(op, PointerType::get(dyn_cast<PointerType>(
+                           op->getType())->getElementType(), addrspace),
+                           name, dyn_cast<Instruction>(v));
      }
  
-    vector<Value*> params(GEP->idx_begin(), GEP->idx_end());
-    
-    GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name, dyn_cast<Instruction>(v));
+    std::vector<Value*> params(GEP->idx_begin(), GEP->idx_end());
+
+    GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name,
+                                                      dyn_cast<Instruction>(v));
      GEP2->setIsInBounds(GEP->isInBounds());
      v = dyn_cast<Value>(GEP2);
      GEP->replaceAllUsesWith(GEP2);
      GEP->eraseFromParent();
      load = NULL;
    }
-  
+
    if (load)
    {
-    if (load->getPointerAddressSpace() != addrspace) ///normally at this point we have the right address space
+    ///normally at this point we have the right address space
+    if (load->getPointerAddressSpace() != addrspace)
      {
        Value *orig_ptr = load->getPointerOperand();
        PointerType *orig_ptr_type = dyn_cast<PointerType>(orig_ptr->getType());
-      
-      Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(), addrspace);
+
+      Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(),
+                                            addrspace);
  
        Value* new_ptr = orig_ptr;
-      
+
        if (orig_ptr->getType() != new_ptr_type)
        {
          new_ptr = new BitCastInst(orig_ptr, new_ptr_type, "prop_cast", load);
        }
-      
+
        Value* new_load = new LoadInst(new_ptr, name, load);
        load->replaceAllUsesWith(new_load);
        load->eraseFromParent();
      }
-    
+
      return;
    }
  
-  vector<User*> users(v->use_begin(), v->use_end());
-  
+  std::vector<User*> users(v->use_begin(), v->use_end());
+
    for (int i = 0; i < int(users.size()); i++)
    {
      Value* v2 = dyn_cast<Value>(users[i]);
-    
+
      if (v2)
      {
        Propagate(v2, name, indirect);
@@ -327,7 +355,7 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect)
  Value* R600KernelParameters::ConstantRead(Function* fun, param& p)
  {
    assert(fun->front().begin() != fun->front().end());
-  
+
    Instruction *first_inst = fun->front().begin();
    IRBuilder <> builder (first_inst);
  /* First 3 dwords are reserved for the dimmension info */
@@ -346,43 +374,54 @@ Value* R600KernelParameters::ConstantRead(Function* fun, param& p)
    {
      addrspace = AMDILAS::PARAM_D_ADDRESS;
    }
-  
+
    Argument *arg = dyn_cast<Argument>(p.val);
    Type * argType = p.val->getType();
    PointerType * argPtrType = dyn_cast<PointerType>(p.val->getType());
-  
+
    if (argPtrType and arg->hasByValAttr())
    {
-    Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(Type::getInt32Ty(*Context), addrspace));
-    Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr, ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName(), first_inst);
-    param_ptr = new BitCastInst(param_ptr, PointerType::get(argPtrType->getElementType(), addrspace), arg->getName(), first_inst);
+    Value* param_addr_space_ptr = ConstantPointerNull::get(
+                                    PointerType::get(Type::getInt32Ty(*Context),
+                                    addrspace));
+    Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr,
+                                    ConstantInt::get(Type::getInt32Ty(*Context),
+                                    p.get_rat_offset()), arg->getName(),
+                                    first_inst);
+    param_ptr = new BitCastInst(param_ptr,
+                                PointerType::get(argPtrType->getElementType(),
+                                                 addrspace),
+                                arg->getName(), first_inst);
      p.ptr_val = param_ptr;
      return param_ptr;
    }
    else
    {
-    Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(argType, addrspace));
-    
+    Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(
+                                                        argType, addrspace));
+
      Value* param_ptr = builder.CreateGEP(param_addr_space_ptr,
-             ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName());
-    
+             ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()),
+                              arg->getName());
+
      Value* param_value = builder.CreateLoad(param_ptr, arg->getName());
-    
+
      return param_value;
    }
  }
  
  Value* R600KernelParameters::handleSpecial(Function* fun, param& p)
  {
-  string name = getSpecialTypeName(p.val->getType());
+  std::string name = getSpecialTypeName(p.val->getType());
    int ID;
  
    assert(!name.empty());
-  
+
    if (name == "image2d_t" or name == "image3d_t")
    {
-    int lastID = max(getLastSpecialID("image2d_t"), getLastSpecialID("image3d_t"));
-    
+    int lastID = std::max(getLastSpecialID("image2d_t"),
+                     getLastSpecialID("image3d_t"));
+
      if (lastID == -1)
      {
        ID = 2; ///ID0 and ID1 are used internally by the driver
@@ -403,20 +442,22 @@ Value* R600KernelParameters::handleSpecial(Function* fun, param& p)
      else
      {
        ID = lastID + 1;
-    }    
+    }
    }
    else
    {
      ///TODO: give some error message
      return NULL;
    }
-    
+
    p.specialType = name;
    p.specialID = ID;
  
    Instruction *first_inst = fun->front().begin();
  
-  return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context), p.specialID), p.val->getType(), "resourceID", first_inst);
+  return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context),
+                                           p.specialID), p.val->getType(),
+                                           "resourceID", first_inst);
  }
  
  
@@ -425,7 +466,7 @@ bool R600KernelParameters::isSpecialType(Type* t)
    return !getSpecialTypeName(t).empty();
  }
  
-string R600KernelParameters::getSpecialTypeName(Type* t)
+std::string R600KernelParameters::getSpecialTypeName(Type* t)
  {
    PointerType *pt = dyn_cast<PointerType>(t);
    StructType *st = NULL;
@@ -437,9 +478,9 @@ string R600KernelParameters::getSpecialTypeName(Type* t)
  
    if (st)
    {
-    string prefix = "struct.opencl_builtin_type_";
-    
-    string name = st->getName().str();
+    std::string prefix = "struct.opencl_builtin_type_";
+
+    std::string name = st->getName().str();
  
      if (name.substr(0, prefix.length()) == prefix)
      {
@@ -458,19 +499,15 @@ bool R600KernelParameters::runOnFunction (Function &F)
      return false;
    }
  
-//  F.dump();
-  
    RunAna(&F);
    Replace(&F);
    Propagate(&F);
-  
-   mod->dump();
+
    return false;
  }
  
  void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const
  {
-//   AU.addRequired<FunctionAnalysis>();
    FunctionPass::getAnalysisUsage(AU);
    AU.setPreservesAll();
  }
@@ -484,7 +521,7 @@ bool R600KernelParameters::doInitialization(Module &M)
  {
    Context = &M.getContext();
    mod = &M;
-  
+
    return false;
  }
  
@@ -493,10 +530,12 @@ bool R600KernelParameters::doFinalization(Module &M)
    return false;
  }
  
-llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD)
+} // End anonymous namespace
+
+FunctionPass* llvm::createR600KernelParametersPass(const TargetData* TD)
  {
    FunctionPass *p = new R600KernelParameters(TD);
-  
+
    return p;
  }
  
diff --git a/src/gallium/drivers/radeon/R600KernelParameters.h b/src/gallium/drivers/radeon/R600KernelParameters.h

deleted file mode 100644 (file)

index 904a469..0000000
--- a/src/gallium/drivers/radeon/R600KernelParameters.h
+++ /dev/null
@@ -1,28 +0,0 @@
-//===-- R600KernelParameters.h - TODO: Add brief description -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef KERNELPARAMETERS_H
-#define KERNELPARAMETERS_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Value.h"
-
-#include <vector>
-
-llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD);
-
-
-#endif
diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp

index fb5431d0eef6e8fffdbd1b6fdca383668c79e00d..dca1fe195ccd96f80a2d0d4c2222b5a247d57004 100644 (file)
--- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp
+++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp
@@ -1,4 +1,4 @@
-//===-- R600LowerInstructions.cpp - TODO: Add brief description -------===//
+//===-- R600LowerInstructions.cpp - Lower unsupported AMDIL instructions --===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This pass lowers AMDIL MachineInstrs that aren't supported by the R600
+// target to either supported AMDIL MachineInstrs or R600 MachineInstrs.
  //
  //===----------------------------------------------------------------------===//
  
@@ -93,8 +94,8 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
                             &AMDIL::R600_TReg32RegClass);
            BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT),
                    setgt)
-                  .addOperand(MI.getOperand(1))
-                  .addReg(AMDIL::ZERO);
+                  .addReg(AMDIL::ZERO)
+                  .addOperand(MI.getOperand(1));
  
            unsigned add_int = MRI->createVirtualRegister(
                               &AMDIL::R600_TReg32RegClass);
@@ -311,7 +312,8 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
          MachineInstr * defInstr = MRI->getVRegDef(maskedRegister);
          MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister);
          def->addTargetFlag(MO_FLAG_MASK);
-        break;
+        /* Continue so the instruction is not erased */
+        continue;
        }
  
        case AMDIL::NEGATE_i32:
@@ -342,6 +344,13 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF)
            break;
          }
  
+      case AMDIL::ULT:
+        BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGT_UINT))
+                .addOperand(MI.getOperand(0))
+                .addOperand(MI.getOperand(2))
+                .addOperand(MI.getOperand(1));
+        break;
+
        default:
          continue;
        }
diff --git a/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp b/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp

deleted file mode 100644 (file)

index 394ee70..0000000
--- a/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//===-- R600LowerShaderInstructions.cpp - TODO: Add brief description -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPULowerShaderInstructions.h"
-#include "AMDIL.h"
-#include "AMDILInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
-  class R600LowerShaderInstructionsPass : public MachineFunctionPass,
-        public AMDGPULowerShaderInstructionsPass {
-
-  private:
-    static char ID;
-    TargetMachine &TM;
-
-    void lowerEXPORT_REG_FAKE(MachineInstr &MI, MachineBasicBlock &MBB,
-        MachineBasicBlock::iterator I);
-    void lowerLOAD_INPUT(MachineInstr & MI);
-    bool lowerSTORE_OUTPUT(MachineInstr & MI, MachineBasicBlock &MBB,
-        MachineBasicBlock::iterator I);
-
-  public:
-    R600LowerShaderInstructionsPass(TargetMachine &tm) :
-      MachineFunctionPass(ID), TM(tm) { }
-
-      bool runOnMachineFunction(MachineFunction &MF);
-
-      const char *getPassName() const { return "R600 Lower Shader Instructions"; }
-    };
-} /* End anonymous namespace */
-
-char R600LowerShaderInstructionsPass::ID = 0;
-
-FunctionPass *llvm::createR600LowerShaderInstructionsPass(TargetMachine &tm) {
-    return new R600LowerShaderInstructionsPass(tm);
-}
-
-#define INSTR_CASE_FLOAT_V(inst) \
-  case AMDIL:: inst##_v4f32: \
-
-#define INSTR_CASE_FLOAT_S(inst) \
-  case AMDIL:: inst##_f32:
-
-#define INSTR_CASE_FLOAT(inst) \
-  INSTR_CASE_FLOAT_V(inst) \
-  INSTR_CASE_FLOAT_S(inst)
-bool R600LowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF)
-{
-  MRI = &MF.getRegInfo();
-
-
-  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-                                                  BB != BB_E; ++BB) {
-    MachineBasicBlock &MBB = *BB;
-    for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
-      MachineInstr &MI = *I;
-      bool deleteInstr = false;
-      switch (MI.getOpcode()) {
-
-      default: break;
-
-      case AMDIL::RESERVE_REG:
-      case AMDIL::EXPORT_REG:
-        deleteInstr = true;
-        break;
-
-      case AMDIL::LOAD_INPUT:
-        lowerLOAD_INPUT(MI);
-        deleteInstr = true;
-        break;
-
-      case AMDIL::STORE_OUTPUT:
-        deleteInstr = lowerSTORE_OUTPUT(MI, MBB, I);
-        break;
-
-      }
-
-      ++I;
-
-      if (deleteInstr) {
-        MI.eraseFromParent();
-      }
-    }
-  }
-
-  return false;
-}
-
-/* The goal of this function is to replace the virutal destination register of
- * a LOAD_INPUT instruction with the correct physical register that will.
- *
- * XXX: I don't think this is the right way things assign physical registers,
- * but I'm not sure of another way to do this.
- */
-void R600LowerShaderInstructionsPass::lowerLOAD_INPUT(MachineInstr &MI)
-{
-  MachineOperand &dst = MI.getOperand(0);
-  MachineOperand &arg = MI.getOperand(1);
-  int64_t inputIndex = arg.getImm();
-  const TargetRegisterClass * inputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID);
-  unsigned newRegister = inputClass->getRegister(inputIndex);
-  unsigned dstReg = dst.getReg();
-
-  preloadRegister(MI.getParent()->getParent(), TM.getInstrInfo(), newRegister,
-                  dstReg);
-}
-
-bool R600LowerShaderInstructionsPass::lowerSTORE_OUTPUT(MachineInstr &MI,
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
-{
-  MachineOperand &valueOp = MI.getOperand(1);
-  MachineOperand &indexOp = MI.getOperand(2);
-  unsigned valueReg = valueOp.getReg();
-  int64_t outputIndex = indexOp.getImm();
-  const TargetRegisterClass * outputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID);
-  unsigned newRegister = outputClass->getRegister(outputIndex);
-
-  BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::COPY),
-                  newRegister)
-                  .addReg(valueReg);
-
-  if (!MRI->isLiveOut(newRegister))
-    MRI->addLiveOut(newRegister);
-
-  return true;
-
-}
diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp

new file mode 100644 (file)

index 0000000..48443fb
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp
@@ -0,0 +1,16 @@
+//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600MachineFunctionInfo.h"
+
+using namespace llvm;
+
+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
+  : MachineFunctionInfo()
+  { }
diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h

new file mode 100644 (file)

index 0000000..948e192
--- /dev/null
+++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h
@@ -0,0 +1,33 @@
+//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// R600MachineFunctionInfo is used for keeping track of which registers have
+// been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600MACHINEFUNCTIONINFO_H
+#define R600MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include <vector>
+
+namespace llvm {
+
+class R600MachineFunctionInfo : public MachineFunctionInfo {
+
+public:
+  R600MachineFunctionInfo(const MachineFunction &MF);
+  std::vector<unsigned> ReservedRegs;
+
+};
+
+} // End llvm namespace
+
+#endif //R600MACHINEFUNCTIONINFO_H
diff --git a/src/gallium/drivers/radeon/R600OpenCLUtils.h b/src/gallium/drivers/radeon/R600OpenCLUtils.h

deleted file mode 100644 (file)

index 91e41d6..0000000
--- a/src/gallium/drivers/radeon/R600OpenCLUtils.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- OpenCLUtils.h - TODO: Add brief description -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-#ifndef OPENCLUTILS_H
-#define OPENCLUTILS_H
-
-#include "llvm/Function.h"
-
-#include <llvm/Module.h>
-
-static bool isOpenCLKernel(const llvm::Function* fun)
-{
-  llvm::Module *mod = const_cast<llvm::Function*>(fun)->getParent();
-  llvm::NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels");
-
-  if (!md or !md->getNumOperands())
-  {
-    return false;
-  }
-
-  for (int i = 0; i < int(md->getNumOperands()); i++)
-  {
-    if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0))
-    {
-      continue;
-    }
-    
-    assert(md->getOperand(i)->getNumOperands() == 1);
-
-    if (md->getOperand(i)->getOperand(0)->getName() == fun->getName())
-    {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-
-#endif
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp

index 96507b104cf494189a6c3a4eef68ec388bba2ead..de559bd2dfab842a9664cad7b68c6699a449e102 100644 (file)
--- a/src/gallium/drivers/radeon/R600RegisterInfo.cpp
+++ b/src/gallium/drivers/radeon/R600RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- R600RegisterInfo.cpp - TODO: Add brief description -------===//
+//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,12 +7,13 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// The file contains the R600 implementation of the TargetRegisterInfo class.
  //
  //===----------------------------------------------------------------------===//
  
  #include "R600RegisterInfo.h"
  #include "AMDGPUTargetMachine.h"
+#include "R600MachineFunctionInfo.h"
  
  using namespace llvm;
  
@@ -26,6 +27,8 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
  BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
  {
    BitVector Reserved(getNumRegs());
+  const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
+
    Reserved.set(AMDIL::ZERO);
    Reserved.set(AMDIL::HALF);
    Reserved.set(AMDIL::ONE);
@@ -40,19 +43,11 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
      Reserved.set(*I);
    }
  
-  for (MachineFunction::const_iterator BB = MF.begin(),
-                                 BB_E = MF.end(); BB != BB_E; ++BB) {
-    const MachineBasicBlock &MBB = *BB;
-    for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
-                                                                  I != E; ++I) {
-      const MachineInstr &MI = *I;
-      if (MI.getOpcode() == AMDIL::RESERVE_REG) {
-        if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) {
-          Reserved.set(MI.getOperand(0).getReg());
-        }
-      }
-    }
+  for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
+                                    E = MFI->ReservedRegs.end(); I != E; ++I) {
+    Reserved.set(*I);
    }
+
    return Reserved;
  }
  
diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.h b/src/gallium/drivers/radeon/R600RegisterInfo.h

index 95a44f971a01df538ef2c151f5e8173c4fc1427b..89a11f9333b3fa704a9289072414533f12b35ab7 100644 (file)
--- a/src/gallium/drivers/radeon/R600RegisterInfo.h
+++ b/src/gallium/drivers/radeon/R600RegisterInfo.h
@@ -1,4 +1,4 @@
-//===-- R600RegisterInfo.h - TODO: Add brief description -------===//
+//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// Interface definition for R600RegisterInfo
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/R600Schedule.td b/src/gallium/drivers/radeon/R600Schedule.td

index c6b1ca61bb5375cd7a4d72de0e17891408cc1b1a..d1957903d879e19d8e6385042106e3ddec0d6dbb 100644 (file)
--- a/src/gallium/drivers/radeon/R600Schedule.td
+++ b/src/gallium/drivers/radeon/R600Schedule.td
@@ -1,4 +1,4 @@
-//===-- R600Schedule.td - TODO: Add brief description -------===//
+//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,9 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// R600 has a VLIW architecture.  On pre-cayman cards there are 5 instruction
+// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS.  For cayman cards, the TRANS
+// slot has been removed. 
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp

index b0bdf701a74f025985fa0714bf00d8a8fdad87f6..1ef097f7b1e6bb34c8738003ead544592eba8f47 100644 (file)
--- a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp
+++ b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp
@@ -1,4 +1,4 @@
-//===-- SIAssignInterpRegs.cpp - TODO: Add brief description -------===//
+//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,12 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This pass maps the pseudo interpolation registers to the correct physical
+// registers.  Prior to executing a fragment shader, the GPU loads interpolation
+// parameters into physical registers.  The specific physical register that each
+// interpolation parameter ends up in depends on the type of the interpolation
+// parameter as well as how many interpolation parameters are used by the
+// shader.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SICodeEmitter.cpp b/src/gallium/drivers/radeon/SICodeEmitter.cpp

index ad494fae7c676e3dcbd2f805ec154b1206cb1618..6970d9f0875881eccccca886ac8532d3894bef40 100644 (file)
--- a/src/gallium/drivers/radeon/SICodeEmitter.cpp
+++ b/src/gallium/drivers/radeon/SICodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- SICodeEmitter.cpp - TODO: Add brief description -------===//
+//===-- SICodeEmitter.cpp - SI Code Emitter -------------------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// The SI code emitter produces machine code that can be executed directly on
+// the GPU device.
  //
  //===----------------------------------------------------------------------===//
  
@@ -144,8 +145,6 @@ bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF)
  {
    MF.dump();
    TM = &MF.getTarget();
-  const AMDGPUInstrInfo * TII =
-                        static_cast<const AMDGPUInstrInfo*>(TM->getInstrInfo());
  
    emitState(MF);
  
@@ -155,8 +154,7 @@ bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF)
      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
                                                        I != E; ++I) {
        MachineInstr &MI = *I;
-      if (!TII->isRegPreload(MI) && MI.getOpcode() != AMDIL::KILL
-          && MI.getOpcode() != AMDIL::RETURN) {
+      if (MI.getOpcode() != AMDIL::KILL && MI.getOpcode() != AMDIL::RETURN) {
          emitInstr(MI);
        }
      }
diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl

index 644daa1bc220e403abc103d2aec604c0fd5bbbc2..bb5ebbd67e66989ef75cd47bca199cb7087107f5 100644 (file)
--- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
+++ b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl
@@ -1,16 +1,17 @@
-#===-- SIGenRegisterInfo.pl - TODO: Add brief description -------===#
+#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
  #
  #                     The LLVM Compiler Infrastructure
  #
  # This file is distributed under the University of Illinois Open Source
  # License. See LICENSE.TXT for details.
  #
-#===----------------------------------------------------------------------===#
+#===------------------------------------------------------------------------===#
  #
-# TODO: Add full description
+# This perl script prints to stdout .td code to be used as SIRegisterInfo.td
+# it also generates a file called SIHwRegInfo.include, which contains helper
+# functions for determining the hw encoding of registers.
  #
-#===----------------------------------------------------------------------===#
-
+#===------------------------------------------------------------------------===#
  
  use strict;
  use warnings;
diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp

index 1a4b47ecbf5c5c8a389eace82dd28e10093c9ebe..441a4a072908d5fb77cee5d15ffa06bfd1bf8634 100644 (file)
--- a/src/gallium/drivers/radeon/SIISelLowering.cpp
+++ b/src/gallium/drivers/radeon/SIISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- SIISelLowering.cpp - TODO: Add brief description -------===//
+//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// Most of the DAG lowering is handled in AMDILISelLowering.cpp.  This file is
+// mostly EmitInstrWithCustomInserter().
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h

index e7a79f8e21529804def11a98483186d8e0d946bd..229e682ef51968f142ce04ac5f3b59f3a6deb2c2 100644 (file)
--- a/src/gallium/drivers/radeon/SIISelLowering.h
+++ b/src/gallium/drivers/radeon/SIISelLowering.h
@@ -1,4 +1,4 @@
-//===-- SIISelLowering.h - TODO: Add brief description -------===//
+//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// SI DAG Lowering interface definition
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SIInstrFormats.td b/src/gallium/drivers/radeon/SIInstrFormats.td

index caf9b0ef1202ffd1be7a12e2d9df85e1e1353107..de0d4fa39d29c5bef04bec7ee6e5f72ab0173a65 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstrFormats.td
+++ b/src/gallium/drivers/radeon/SIInstrFormats.td
@@ -1,4 +1,4 @@
-//===-- SIInstrFormats.td - TODO: Add brief description -------===//
+//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,17 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// SI Instruction format definitions.
+//
+// Instructions with _32 take 32-bit operands.
+// Instructions with _64 take 64-bit operands.
+//
+// VOP_* instructions can use either a 32-bit or 64-bit encoding.  The 32-bit
+// encoding is the standard encoding, but instruction that make use of
+// any of the instruction modifiers must use the 64-bit encoding.
+//
+// Instructions with _e32 use the 32-bit encoding.
+// Instructions with _e64 use the 64-bit encoding.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp

index 6f92e96c6e785642a749b293292a87b5d1ecd04c..0cb97643a7fa52b2e1c3090e999a7962aeaeaa27 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstrInfo.cpp
+++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.cpp - TODO: Add brief description -------===//
+//===-- SIInstrInfo.cpp - SI Instruction Information  ---------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// SI Implementation of TargetInstrInfo.
  //
  //===----------------------------------------------------------------------===//
  
@@ -107,6 +107,8 @@ unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const
  {
    switch (AMDILopcode) {
    case AMDIL::MAD_f32: return AMDIL::V_MAD_LEGACY_F32;
+  //XXX We need a better way of detecting end of program
+  case AMDIL::RETURN: return AMDIL::S_ENDPGM;
    default: return AMDGPUInstrInfo::getISAOpcode(AMDILopcode);
    }
  }
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h

index bd76c3f94aa9d72444916379dd5b7965534b7854..68940ea3ca4abd98bc90784d286deded5a8a00ff 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstrInfo.h
+++ b/src/gallium/drivers/radeon/SIInstrInfo.h
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.h - TODO: Add brief description -------===//
+//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// Interface definition for SIInstrInfo.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td

index 65b28ec84ad29e5d4462f15bdfa5058202e4781e..435948ff1decb530245fd05cff84c85b2928ae37 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstrInfo.td
+++ b/src/gallium/drivers/radeon/SIInstrInfo.td
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.td - TODO: Add brief description -------===//
+//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -6,10 +6,6 @@
  // License. See LICENSE.TXT for details.
  //
  //===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
  
  
  
@@ -464,11 +460,4 @@ def IMM12bit : ImmLeaf <
  
  include "SIInstrFormats.td"
  
-def LOAD_CONST : AMDGPUShaderInst <
-  (outs GPRF32:$dst),
-  (ins i32imm:$src),
-  "LOAD_CONST $dst, $src",
-  [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
->;
-
  include "SIInstructions.td"
diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td

index 27a8b31757a0b4bf8c92470e6a3d7e8e14f013c5..57bbc7a5d5aa3d3e1fd7ab3b27a6a4d1e830d267 100644 (file)
--- a/src/gallium/drivers/radeon/SIInstructions.td
+++ b/src/gallium/drivers/radeon/SIInstructions.td
@@ -1,4 +1,4 @@
-//===-- SIInstructions.td - TODO: Add brief description -------===//
+//===-- SIInstructions.td - SI Instruction Defintions ---------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -6,10 +6,6 @@
  // License. See LICENSE.TXT for details.
  //
  //===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
  
  
  def isSI : Predicate<"Subtarget.device()"
@@ -800,6 +796,13 @@ def CONFIG_WRITE : InstSI <
    field bits<32> Inst = 0;
  }
  
+def LOAD_CONST : AMDGPUShaderInst <
+  (outs GPRF32:$dst),
+  (ins i32imm:$src),
+  "LOAD_CONST $dst, $src",
+  [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
+>;
+
  let usesCustomInserter = 1 in {
  
  def SI_V_CNDLT : InstSI <
@@ -833,7 +836,6 @@ def USE_SGPR_32 : InstSI <
  
  > {
    field bits<32> Inst = 0;
-  let PreloadReg = 1;
  }
  
  def USE_SGPR_64 : InstSI <
@@ -844,7 +846,6 @@ def USE_SGPR_64 : InstSI <
  
  > {
    field bits<32> Inst = 0;
-  let PreloadReg = 1;
  }
  
  def VS_LOAD_BUFFER_INDEX : InstSI <
@@ -854,7 +855,6 @@ def VS_LOAD_BUFFER_INDEX : InstSI <
    [(set VReg_32:$dst, (int_SI_vs_load_buffer_index))]> {
  
    field bits<32> Inst = 0;
-  let PreloadReg = 1;
  }
  
  } // end usesCustomInserter 
diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td

index e3014e1391637506546bbf9e3c597a60c8f987c8..4d23072d4f1e2e70b46a2863ce64fcc9c0cd659c 100644 (file)
--- a/src/gallium/drivers/radeon/SIIntrinsics.td
+++ b/src/gallium/drivers/radeon/SIIntrinsics.td
@@ -1,4 +1,4 @@
-//===-- SIIntrinsics.td - TODO: Add brief description -------===//
+//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// SI Intrinsic Definitions
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp b/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp

deleted file mode 100644 (file)

index 5d49d88..0000000
--- a/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- SILowerShaderInstructions.cpp - TODO: Add brief description -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
-
-
-#include "AMDGPU.h"
-#include "AMDGPULowerShaderInstructions.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-namespace {
-  class SILowerShaderInstructionsPass : public MachineFunctionPass,
-      public AMDGPULowerShaderInstructionsPass {
-
-  private:
-    static char ID;
-    TargetMachine &TM;
-
-  public:
-    SILowerShaderInstructionsPass(TargetMachine &tm) :
-      MachineFunctionPass(ID), TM(tm) { }
-
-    bool runOnMachineFunction(MachineFunction &MF);
-
-    const char *getPassName() const { return "SI Lower Shader Instructions"; }
-
-    void lowerRETURN(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
-    void lowerSET_M0(MachineInstr &MI, MachineBasicBlock &MBB,
-                     MachineBasicBlock::iterator I);
-  };
-} /* End anonymous namespace */
-
-char SILowerShaderInstructionsPass::ID = 0;
-
-FunctionPass *llvm::createSILowerShaderInstructionsPass(TargetMachine &tm) {
-    return new SILowerShaderInstructionsPass(tm);
-}
-
-bool SILowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF)
-{
-  MRI = &MF.getRegInfo();
-  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-                                                  BB != BB_E; ++BB) {
-    MachineBasicBlock &MBB = *BB;
-    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
-         I != MBB.end(); I = Next, Next = llvm::next(I) ) {
-      MachineInstr &MI = *I;
-      switch (MI.getOpcode()) {
-      case AMDIL::RETURN:
-        lowerRETURN(MBB, I);
-        break;
-      case AMDIL::SET_M0:
-        lowerSET_M0(MI, MBB, I);
-        break;
-      default: continue;
-      }
-      MI.removeFromParent();
-    }
-  }
-
-  return false;
-}
-
-void SILowerShaderInstructionsPass::lowerRETURN(MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator I)
-{
-  const struct TargetInstrInfo * TII = TM.getInstrInfo();
-  BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_ENDPGM));
-}
-
-void SILowerShaderInstructionsPass::lowerSET_M0(MachineInstr &MI,
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator I)
-{
-  const struct TargetInstrInfo * TII = TM.getInstrInfo();
-  BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_MOV_IMM_I32))
-          .addReg(AMDIL::M0)
-          .addOperand(MI.getOperand(1));
-}
diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp

index eace40c226cd5deff7dda154e0f3ad7b96a1eb65..40ba76f1f86d703bf1450572a78a911c5617a4e0 100644 (file)
--- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp
+++ b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SIMachineFunctionInfo.cpp - TODO: Add brief description -------===//
+//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -6,10 +6,6 @@
  // License. See LICENSE.TXT for details.
  //
  //===----------------------------------------------------------------------===//
-//
-// TODO: Add full description
-//
-//===----------------------------------------------------------------------===//
  
  
  #include "SIMachineFunctionInfo.h"
diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h

index 5647de9d81fe178a56129c48e2eb962407898625..46a021f361303429cc2345d4a8c1c3034d60bb73 100644 (file)
--- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h
+++ b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- SIMachineFunctionInfo.h - TODO: Add brief description -------===//
+//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,9 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config
+// register, which is to tell the hardware which interpolation parameters to
+// load.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SIPropagateImmReads.cpp b/src/gallium/drivers/radeon/SIPropagateImmReads.cpp

index 4f925d5de1c1858eeb9c796a011862b417546cdb..6a1654888311bf0356f22a38cb5b84868189114b 100644 (file)
--- a/src/gallium/drivers/radeon/SIPropagateImmReads.cpp
+++ b/src/gallium/drivers/radeon/SIPropagateImmReads.cpp
@@ -1,4 +1,4 @@
-//===-- SIPropagateImmReads.cpp - TODO: Add brief description -------===//
+//===-- SIPropagateImmReads.cpp - Lower Immediate Reads Pass --------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,9 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// We can't do this in the ConvertToISA pass, because later passes might
+// create LOADCONST_* instructions that we would miss.  This is why we need 
+// a separate pass for this.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.cpp b/src/gallium/drivers/radeon/SIRegisterInfo.cpp

index da2ec36a7733949d0695304a49dc4d173fc00286..2d530a4f0229c88a5939acf7682b3cd77aafc73b 100644 (file)
--- a/src/gallium/drivers/radeon/SIRegisterInfo.cpp
+++ b/src/gallium/drivers/radeon/SIRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SIRegisterInfo.cpp - TODO: Add brief description -------===//
+//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// This file contains the SI implementation of the TargetRegisterInfo class.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.h b/src/gallium/drivers/radeon/SIRegisterInfo.h

index c797e3c8aceea9be588449ce989cc78c49c9ed5b..77f3261efc5b8ba75edf440ff1e2e92b346ae938 100644 (file)
--- a/src/gallium/drivers/radeon/SIRegisterInfo.h
+++ b/src/gallium/drivers/radeon/SIRegisterInfo.h
@@ -1,4 +1,4 @@
-//===-- SIRegisterInfo.h - TODO: Add brief description -------===//
+//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// Interface definition for SIRegisterInfo
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/SISchedule.td b/src/gallium/drivers/radeon/SISchedule.td

index 9e99268e9ca27ca2734bdfba9c2284e7e8d67b1f..28b65b82585529dcfcab2c77504b9d9d05efc48f 100644 (file)
--- a/src/gallium/drivers/radeon/SISchedule.td
+++ b/src/gallium/drivers/radeon/SISchedule.td
@@ -1,4 +1,4 @@
-//===-- SISchedule.td - TODO: Add brief description -------===//
+//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -7,7 +7,7 @@
  //
  //===----------------------------------------------------------------------===//
  //
-// TODO: Add full description
+// TODO: This is just a place holder for now.
  //
  //===----------------------------------------------------------------------===//
  
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h

index 9be7f90c3e6cd95756d32251e14fe3a651b5422b..4a706397fddb6c79fe41f6d5c75e30073279cc39 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -36,6 +36,8 @@
  #define RADEON_LLVM_MAX_BRANCH_DEPTH 16
  #define RADEON_LLVM_MAX_LOOP_DEPTH 16
  
+#define RADEON_LLVM_MAX_SYSTEM_VALUES 4
+
  struct radeon_llvm_branch {
         LLVMBasicBlockRef endif_block;
         LLVMBasicBlockRef if_block;
@@ -78,6 +80,9 @@ struct radeon_llvm_context {
                         unsigned input_index,
                         const struct tgsi_full_declaration *decl);
  
+       void (*load_system_value)(struct radeon_llvm_context *,
+                       unsigned index,
+                       const struct tgsi_full_declaration *decl);
  
         /** User data to use with the callbacks */
         void * userdata;
@@ -90,6 +95,8 @@ struct radeon_llvm_context {
         LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
         unsigned output_reg_count;
  
+       LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
+
         unsigned reserved_reg_count;
         /*=== Private Members ===*/
  
@@ -105,6 +112,37 @@ struct radeon_llvm_context {
         struct gallivm_state gallivm;
  };
  
+static inline LLVMValueRef bitcast(
+               struct lp_build_tgsi_context * bld_base,
+               enum tgsi_opcode_type type,
+               LLVMValueRef value
+)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       LLVMContextRef ctx = bld_base->base.gallivm->context;
+       LLVMTypeRef dst_type;
+
+       switch (type) {
+       case TGSI_TYPE_UNSIGNED:
+       case TGSI_TYPE_SIGNED:
+               dst_type = LLVMInt32TypeInContext(ctx);
+               break;
+       case TGSI_TYPE_UNTYPED:
+       case TGSI_TYPE_FLOAT:
+               dst_type = LLVMFloatTypeInContext(ctx);
+               break;
+       default:
+               dst_type = 0;
+               break;
+       }
+
+       if (dst_type)
+               return LLVMBuildBitCast(builder, value, dst_type, "");
+       else
+               return value;
+}
+
+
  void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
  
  void radeon_llvm_dispose(struct radeon_llvm_context * ctx);
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp

index b409cb2175e64b681f28f51d0e6840ef0b43835f..ebc32106b52356c9833c857d7043e8adfbf55b20 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp
@@ -93,19 +93,20 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
     AMDGPUTriple.setArch(Arch);
  
     Module * mod = unwrap(M);
-   std::string FS = gpu_family;
+   std::string FS;
     TargetOptions TO;
  
+   if (dump) {
+      mod->dump();
+      FS += "+DumpCode";
+   }
+
     std::auto_ptr<TargetMachine> tm(AMDGPUTarget->createTargetMachine(
-                     AMDGPUTriple.getTriple(), gpu_family, "" /* Features */,
+                     AMDGPUTriple.getTriple(), gpu_family, FS,
                       TO, Reloc::Default, CodeModel::Default,
                       CodeGenOpt::Default
                       ));
     TargetMachine &AMDGPUTargetMachine = *tm.get();
-   /* XXX: Use TargetMachine.Options in 3.0 */
-   if (dump) {
-      mod->dump();
-   }
     PassManager PM;
     PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData()));
     PM.add(createPromoteMemoryToRegisterPass());
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

index 62de9da28dea493d53948f3542f4b0d0656e96d7..6e6fc3d12cd177bf51a08efff4e10d9a9134fd3d 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -29,6 +29,7 @@
  #include "gallivm/lp_bld_gather.h"
  #include "gallivm/lp_bld_flow.h"
  #include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_intr.h"
  #include "gallivm/lp_bld_swizzle.h"
  #include "tgsi/tgsi_info.h"
  #include "tgsi/tgsi_parse.h"
@@ -112,8 +113,25 @@ emit_fetch_immediate(
         enum tgsi_opcode_type type,
         unsigned swizzle)
  {
+       LLVMTypeRef ctype;
+       LLVMContextRef ctx = bld_base->base.gallivm->context;
+
+       switch (type) {
+       case TGSI_TYPE_UNSIGNED:
+       case TGSI_TYPE_SIGNED:
+               ctype = LLVMInt32TypeInContext(ctx);
+               break;
+       case TGSI_TYPE_UNTYPED:
+       case TGSI_TYPE_FLOAT:
+               ctype = LLVMFloatTypeInContext(ctx);
+               break;
+       default:
+               ctype = 0;
+               break;
+       }
+
         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
-       return bld->immediates[reg->Register.Index][swizzle];
+       return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
  }
  
  static LLVMValueRef
@@ -134,7 +152,7 @@ emit_fetch_input(
                 return lp_build_gather_values(bld_base->base.gallivm, values,
                                                 TGSI_NUM_CHANNELS);
         } else {
-               return ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+               return bitcast(bld_base, type, ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]);
         }
  }
  
@@ -155,7 +173,7 @@ emit_fetch_temporary(
         } else {
                 LLVMValueRef temp_ptr;
                 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
-               return LLVMBuildLoad(builder, temp_ptr, "");
+               return bitcast(bld_base,type,LLVMBuildLoad(builder, temp_ptr, ""));
         }
  }
  
@@ -213,6 +231,15 @@ static void emit_declaration(
         }
         break;
  
+       case TGSI_FILE_SYSTEM_VALUE:
+       {
+               unsigned idx;
+               for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
+                       ctx->load_system_value(ctx, idx, decl);
+               }
+       }
+       break;
+
         case TGSI_FILE_OUTPUT:
         {
                 unsigned idx;
@@ -304,6 +331,9 @@ emit_store(
                 default:
                         return;
                 }
+
+               value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+
                 LLVMBuildStore(builder, value, temp_ptr);
         }
  }
@@ -444,8 +474,10 @@ static void if_emit(
         struct gallivm_state * gallivm = bld_base->base.gallivm;
         LLVMValueRef cond;
         LLVMBasicBlockRef if_block, else_block, endif_block;
-       cond = LLVMBuildFCmp(gallivm->builder, LLVMRealOEQ, emit_data->args[0],
-                                                       bld_base->base.one, "");
+
+       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+               bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
+                       bld_base->int_bld.zero, "");
  
         endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
                                                 ctx->main_fn, "ENDIF");
@@ -463,6 +495,101 @@ static void if_emit(
         ctx->branch[ctx->branch_depth - 1].has_else = 0;
  }
  
+static void kil_emit(
+       const struct lp_build_tgsi_action * action,
+       struct lp_build_tgsi_context * bld_base,
+       struct lp_build_emit_data * emit_data)
+{
+       unsigned i;
+       for (i = 0; i < emit_data->arg_count; i++) {
+               emit_data->output[i] = lp_build_intrinsic_unary(
+                       bld_base->base.gallivm->builder,
+                       action->intr_name,
+                       emit_data->dst_type, emit_data->args[i]);
+       }
+}
+
+
+static void emit_prepare_cube_coords(
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       boolean shadowcube = (emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE);
+       struct gallivm_state * gallivm = bld_base->base.gallivm;
+       LLVMBuilderRef builder = gallivm->builder;
+       LLVMTypeRef type = bld_base->base.elem_type;
+       LLVMValueRef coords[4];
+       LLVMValueRef mad_args[3];
+       unsigned i, cnt;
+
+       LLVMValueRef v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube",
+                       LLVMVectorType(type, 4),
+                       &emit_data->args[0],1);
+
+       /* save src.w for shadow cube */
+       cnt = shadowcube ? 3 : 4;
+
+       for (i = 0; i < cnt; ++i) {
+               LLVMValueRef idx = lp_build_const_int32(gallivm, i);
+               coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
+       }
+
+       coords[2] = lp_build_intrinsic(builder, "llvm.AMDIL.fabs.",
+                       type, &coords[2], 1);
+       coords[2] = lp_build_intrinsic(builder, "llvm.AMDGPU.rcp",
+                       type, &coords[2], 1);
+
+       mad_args[1] = coords[2];
+       mad_args[2] = LLVMConstReal(type, 1.5);
+
+       mad_args[0] = coords[0];
+       coords[0] = lp_build_intrinsic(builder, "llvm.AMDIL.mad.",
+                       type, mad_args, 3);
+
+       mad_args[0] = coords[1];
+       coords[1] = lp_build_intrinsic(builder, "llvm.AMDIL.mad.",
+                       type, mad_args, 3);
+
+       /* apply yxwy swizzle to cooords */
+       coords[2] = coords[3];
+       coords[3] = coords[1];
+       coords[1] = coords[0];
+       coords[0] = coords[3];
+
+       emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+                                               coords, 4);
+}
+
+static void txp_fetch_args(
+       struct lp_build_tgsi_context * bld_base,
+       struct lp_build_emit_data * emit_data)
+{
+       const struct tgsi_full_instruction * inst = emit_data->inst;
+       LLVMValueRef src_w;
+       unsigned chan;
+       LLVMValueRef coords[4];
+
+       emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+       src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+
+       for (chan = 0; chan < 3; chan++ ) {
+               LLVMValueRef arg = lp_build_emit_fetch(bld_base,
+                                               emit_data->inst, 0, chan);
+               coords[chan] = lp_build_emit_llvm_binary(bld_base,
+                                       TGSI_OPCODE_DIV, arg, src_w);
+       }
+       coords[3] = bld_base->base.one;
+       emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+                                               coords, 4);
+       emit_data->arg_count = 1;
+
+       if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+            inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
+           inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
+               emit_prepare_cube_coords(bld_base, emit_data);
+       }
+}
+
  static void tex_fetch_args(
         struct lp_build_tgsi_context * bld_base,
         struct lp_build_emit_data * emit_data)
@@ -475,16 +602,261 @@ static void tex_fetch_args(
  
         */
  
+       const struct tgsi_full_instruction * inst = emit_data->inst;
+
         LLVMValueRef coords[4];
         unsigned chan;
         for (chan = 0; chan < 4; chan++) {
-               coords[chan] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan);
+               coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
         }
  
         emit_data->arg_count = 1;
         emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
                                                 coords, 4);
         emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+
+       if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+            inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) &&
+           inst->Instruction.Opcode != TGSI_OPCODE_TXQ) {
+               emit_prepare_cube_coords(bld_base, emit_data);
+       }
+}
+
+static void emit_icmp(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       unsigned pred;
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       LLVMContextRef context = bld_base->base.gallivm->context;
+
+       switch (emit_data->inst->Instruction.Opcode) {
+       case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
+       case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
+       case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
+       case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
+       case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
+       case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
+       default:
+               assert(!"unknown instruction");
+       }
+
+       LLVMValueRef v = LLVMBuildICmp(builder, pred,
+                       emit_data->args[0], emit_data->args[1],"");
+
+       v = LLVMBuildSExtOrBitCast(builder, v,
+                       LLVMInt32TypeInContext(context), "");
+
+       emit_data->output[emit_data->chan] = v;
+}
+
+static void emit_not(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
+                       emit_data->args[0]);
+       emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
+}
+
+static void emit_and(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_or(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_uadd(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_udiv(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_idiv(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_mod(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_umod(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_shl(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_ushr(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_ishr(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_xor(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
+                       emit_data->args[0], emit_data->args[1], "");
+}
+
+static void emit_ssg(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+
+       LLVMValueRef cmp, val;
+
+       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
+               cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
+               val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
+               cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
+               val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
+       } else { // float SSG
+               cmp = LLVMBuildFCmp(builder, LLVMRealUGT, emit_data->args[0], bld_base->int_bld.zero, "");
+               val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
+               cmp = LLVMBuildFCmp(builder, LLVMRealUGE, val, bld_base->base.zero, "");
+               val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
+       }
+
+       emit_data->output[emit_data->chan] = val;
+}
+
+static void emit_ineg(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
+                       emit_data->args[0], "");
+}
+
+static void emit_f2i(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
+                       emit_data->args[0], bld_base->int_bld.elem_type, "");
+}
+
+static void emit_f2u(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
+                       emit_data->args[0], bld_base->uint_bld.elem_type, "");
+}
+
+static void emit_i2f(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
+                       emit_data->args[0], bld_base->base.elem_type, "");
+}
+
+static void emit_u2f(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
+                       emit_data->args[0], bld_base->base.elem_type, "");
+}
+
+static void emit_immediate(struct lp_build_tgsi_context * bld_base,
+               const struct tgsi_full_immediate *imm)
+{
+       unsigned i;
+       struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+
+       for (i = 0; i < 4; ++i) {
+               ctx->soa.immediates[ctx->soa.num_immediates][i] =
+                               LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
+       }
+
+       ctx->soa.num_immediates++;
  }
  
  void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
@@ -526,12 +898,13 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
  
         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
         lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
+       lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
  
         bld_base->soa = 1;
         bld_base->emit_store = emit_store;
         bld_base->emit_swizzle = emit_swizzle;
         bld_base->emit_declaration = emit_declaration;
-       bld_base->emit_immediate = lp_emit_immediate_soa;
+       bld_base->emit_immediate = emit_immediate;
  
         bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
         bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
@@ -545,6 +918,60 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
  
         lp_set_default_actions(bld_base);
  
+       bld_base->op_actions[TGSI_OPCODE_IABS].emit = lp_build_tgsi_intrinsic;
+       bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
+       bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
+       bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
+       bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
+       bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
+       bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
+       bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
+       bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
+       bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
+       bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
+       bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
+       bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
+       bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
+       bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
+       bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
+       bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
+       bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
+       bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
+       bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
+       bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
+       bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
+       bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
+       bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
+       bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
+       bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_ROUND].emit = lp_build_tgsi_intrinsic;
+       bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
+       bld_base->op_actions[TGSI_OPCODE_MIN].emit = lp_build_tgsi_intrinsic;
+       bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min.";
+       bld_base->op_actions[TGSI_OPCODE_MAX].emit = lp_build_tgsi_intrinsic;
+       bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max.";
+       bld_base->op_actions[TGSI_OPCODE_IMIN].emit = lp_build_tgsi_intrinsic;
+       bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin";
+       bld_base->op_actions[TGSI_OPCODE_IMAX].emit = lp_build_tgsi_intrinsic;
+       bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax";
+       bld_base->op_actions[TGSI_OPCODE_UMIN].emit = lp_build_tgsi_intrinsic;
+       bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin";
+       bld_base->op_actions[TGSI_OPCODE_UMAX].emit = lp_build_tgsi_intrinsic;
+       bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax";
+       bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = tex_fetch_args;
+       bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
+       bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
+       bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
+       bld_base->op_actions[TGSI_OPCODE_CEIL].emit = lp_build_tgsi_intrinsic;
+       bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.AMDIL.round.neginf.";
+
+
+
         bld_base->op_actions[TGSI_OPCODE_ABS].emit = lp_build_tgsi_intrinsic;
         bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.AMDIL.fabs.";
         bld_base->op_actions[TGSI_OPCODE_ARL].emit = lp_build_tgsi_intrinsic;
@@ -558,10 +985,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
         bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
         bld_base->op_actions[TGSI_OPCODE_COS].emit = lp_build_tgsi_intrinsic;
         bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.AMDGPU.cos";
-       bld_base->op_actions[TGSI_OPCODE_DDX].emit = lp_build_tgsi_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
-       bld_base->op_actions[TGSI_OPCODE_DDY].emit = lp_build_tgsi_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
         bld_base->op_actions[TGSI_OPCODE_DIV].emit = lp_build_tgsi_intrinsic;
         bld_base->op_actions[TGSI_OPCODE_DIV].intr_name = "llvm.AMDGPU.div";
         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
@@ -574,7 +997,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
         bld_base->op_actions[TGSI_OPCODE_FRC].emit = lp_build_tgsi_intrinsic;
         bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
-       bld_base->op_actions[TGSI_OPCODE_KIL].emit = lp_build_tgsi_intrinsic;
+       bld_base->op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
         bld_base->op_actions[TGSI_OPCODE_KIL].intr_name = "llvm.AMDGPU.kill";
         bld_base->op_actions[TGSI_OPCODE_KILP].emit = lp_build_tgsi_intrinsic;
         bld_base->op_actions[TGSI_OPCODE_KILP].intr_name = "llvm.AMDGPU.kilp";
@@ -597,7 +1020,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
         bld_base->op_actions[TGSI_OPCODE_SSG].emit = lp_build_tgsi_intrinsic;
         bld_base->op_actions[TGSI_OPCODE_SSG].intr_name = "llvm.AMDGPU.ssg";
         bld_base->op_actions[TGSI_OPCODE_SGE].emit = lp_build_tgsi_intrinsic;
-       bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDGPU.sge.";
+       bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDGPU.sge";
         bld_base->op_actions[TGSI_OPCODE_SEQ].emit = lp_build_tgsi_intrinsic;
         bld_base->op_actions[TGSI_OPCODE_SEQ].intr_name = "llvm.AMDGPU.seq";
         bld_base->op_actions[TGSI_OPCODE_SLE].fetch_args = radeon_llvm_fetch_args_2_reverse_soa;
@@ -620,6 +1043,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
         bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
         bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
         bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
+       bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
         bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
         bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = lp_build_tgsi_intrinsic;
         bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
diff --git a/src/gallium/drivers/radeonsi/evergreen_state.c b/src/gallium/drivers/radeonsi/evergreen_state.c

index 75d6cadc6cccab60284f20fd2751ae92d0e8f629..b094248fee181ff2d3bc0ebaf72200558fb98af8 100644 (file)
--- a/src/gallium/drivers/radeonsi/evergreen_state.c
+++ b/src/gallium/drivers/radeonsi/evergreen_state.c
@@ -1166,24 +1166,6 @@ static void si_delete_sampler_state(struct pipe_context *ctx,
         free(state);
  }
  
-static unsigned si_map_swizzle(unsigned swizzle)
-{
-       switch (swizzle) {
-       case UTIL_FORMAT_SWIZZLE_Y:
-               return V_008F1C_SQ_SEL_Y;
-       case UTIL_FORMAT_SWIZZLE_Z:
-               return V_008F1C_SQ_SEL_Z;
-       case UTIL_FORMAT_SWIZZLE_W:
-               return V_008F1C_SQ_SEL_W;
-       case UTIL_FORMAT_SWIZZLE_0:
-               return V_008F1C_SQ_SEL_0;
-       case UTIL_FORMAT_SWIZZLE_1:
-               return V_008F1C_SQ_SEL_1;
-       default: /* UTIL_FORMAT_SWIZZLE_X */
-               return V_008F1C_SQ_SEL_X;
-       }
-}
-
  static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_context *ctx,
                                                         struct pipe_resource *texture,
                                                         const struct pipe_sampler_view *state)
@@ -1259,9 +1241,9 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
  
         va = r600_resource_va(ctx->screen, texture);
         view->state[0] = (va + tmp->offset[0]) >> 8;
-       view->state[1] = ((va + tmp->offset[0]) >> 40) & 0xff;
-       view->state[1] |= (S_008F14_DATA_FORMAT(format) |
-                          S_008F14_NUM_FORMAT(num_format));
+       view->state[1] = (S_008F14_BASE_ADDRESS_HI((va + tmp->offset[0]) >> 40) |
+                         S_008F14_DATA_FORMAT(format) |
+                         S_008F14_NUM_FORMAT(num_format));
         view->state[2] = (S_008F18_WIDTH(texture->width0 - 1) |
                           S_008F18_HEIGHT(height - 1));
         view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) |
@@ -2087,9 +2069,9 @@ void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *shader)
         r600_pipe_state_add_reg(rstate,
                                 R_02870C_SPI_SHADER_POS_FORMAT,
                                 S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
-                               S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
-                               S_02870C_POS2_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
-                               S_02870C_POS3_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP),
+                               S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
+                               S_02870C_POS2_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
+                               S_02870C_POS3_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE),
                                 NULL, 0);
  
         va = r600_resource_va(ctx->screen, (void *)shader->bo);
diff --git a/src/gallium/drivers/radeonsi/r600_state_common.c b/src/gallium/drivers/radeonsi/r600_state_common.c

index 53a34ef519c55146464165506b481b501cea9fa3..06eb96b9ee84c05c56c332f49bf63422d790463a 100644 (file)
--- a/src/gallium/drivers/radeonsi/r600_state_common.c
+++ b/src/gallium/drivers/radeonsi/r600_state_common.c
@@ -628,12 +628,15 @@ static void r600_vertex_buffer_update(struct r600_context *rctx)
                 ptr[0] = va & 0xFFFFFFFF;
                 ptr[1] = (S_008F04_BASE_ADDRESS_HI(va >> 32) |
                           S_008F04_STRIDE(vertex_buffer->stride));
-               ptr[2] = (vertex_buffer->buffer->width0 - offset) / vertex_buffer->stride;
-               /* XXX: Hardcoding RGBA */
-               ptr[3] = (S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-                         S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-                         S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                         S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+               if (vertex_buffer->stride > 0)
+                       ptr[2] = ((vertex_buffer->buffer->width0 - offset) /
+                                 vertex_buffer->stride);
+               else
+                       ptr[2] = vertex_buffer->buffer->width0 - offset;
+               ptr[3] = (S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
+                         S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
+                         S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
+                         S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
                           S_008F0C_NUM_FORMAT(num_format) |
                           S_008F0C_DATA_FORMAT(data_format));
  
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h

index ab30892d51a31238922f32d20332ff22c8e79717..bba4cf2369142ddcf39904643051ee02096cc3c6 100644 (file)
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -37,6 +37,7 @@
  #include "r600.h"
  #include "radeonsi_public.h"
  #include "r600_resource.h"
+#include "sid.h"
  
  #define R600_MAX_CONST_BUFFERS 1
  #define R600_MAX_CONST_BUFFER_SIZE 4096
@@ -467,6 +468,24 @@ static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
  }
  #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
  
+static INLINE unsigned si_map_swizzle(unsigned swizzle)
+{
+       switch (swizzle) {
+       case UTIL_FORMAT_SWIZZLE_Y:
+               return V_008F0C_SQ_SEL_Y;
+       case UTIL_FORMAT_SWIZZLE_Z:
+               return V_008F0C_SQ_SEL_Z;
+       case UTIL_FORMAT_SWIZZLE_W:
+               return V_008F0C_SQ_SEL_W;
+       case UTIL_FORMAT_SWIZZLE_0:
+               return V_008F0C_SQ_SEL_0;
+       case UTIL_FORMAT_SWIZZLE_1:
+               return V_008F0C_SQ_SEL_1;
+       default: /* UTIL_FORMAT_SWIZZLE_X */
+               return V_008F0C_SQ_SEL_X;
+       }
+}
+
  static inline unsigned r600_tex_aniso_filter(unsigned filter)
  {
         if (filter <= 1)   return 0;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c

index 6425c352d28a7aafa6ec7983515fb373a8910815..0e1a97bba3e88beab3659bd7cefee7428a465185 100644 (file)
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -199,7 +199,7 @@ static void declare_input_fs(
         LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index);
  
         /* XXX: Handle all possible interpolation modes */
-       switch (decl->Declaration.Interpolate) {
+       switch (decl->Interp.Interpolate) {
         case TGSI_INTERPOLATE_COLOR:
                 if (si_shader_ctx->rctx->rasterizer->flatshade)
                         intr_name = "llvm.SI.fs.interp.constant";
@@ -331,14 +331,14 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
                         i = shader->ninput++;
                         shader->input[i].name = d->Semantic.Name;
                         shader->input[i].sid = d->Semantic.Index;
-                       shader->input[i].interpolate = d->Declaration.Interpolate;
-                       shader->input[i].centroid = d->Declaration.Centroid;
+                       shader->input[i].interpolate = d->Interp.Interpolate;
+                       shader->input[i].centroid = d->Interp.Centroid;
                         break;
                 case TGSI_FILE_OUTPUT:
                         i = shader->noutput++;
                         shader->output[i].name = d->Semantic.Name;
                         shader->output[i].sid = d->Semantic.Index;
-                       shader->output[i].interpolate = d->Declaration.Interpolate;
+                       shader->output[i].interpolate = d->Interp.Interpolate;
                         break;
                 }
  
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c

index d54e02e40cd7b53098cd9536ff22d3aba431e212..d4c01759dbe3425c211f85c1e3c62877738fe74a 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -799,7 +799,8 @@ get_texel_2d_array(const struct sp_sampler_variant *samp,
     const struct pipe_resource *texture = samp->view->texture;
     unsigned level = addr.bits.level;
  
-   assert(layer < texture->array_size);
+   assert(layer < (int) texture->array_size);
+   assert(layer >= 0);
  
     if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
         y < 0 || y >= (int) u_minify(texture->height0, level)) {
@@ -1787,9 +1788,9 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
     float weight_buffer[TGSI_QUAD_SIZE];
     unsigned buffer_next;
     int j;
-   float den;// = 0.0F;
+   float den; /* = 0.0F; */
     float ddq;
-   float U;// = u0 - tex_u;
+   float U; /* = u0 - tex_u; */
     int v;
  
     /* Scale ellipse formula to directly index the Filter Lookup Table.
@@ -1805,8 +1806,8 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
      * also the same. Note that texel/image access can only be performed using
      * a quad, i.e. it is not possible to get the pixel value for a single
      * tex coord. In order to have a better performance, the access is buffered
-    * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is full,
-    * then the pixel values are read from the image.
+    * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
+    * full, then the pixel values are read from the image.
      */
     ddq = 2 * A;
     
@@ -1834,7 +1835,9 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
  
           int u;
           for (u = u0; u <= u1; ++u) {
-            /* Note that the ellipse has been pre-scaled so F = WEIGHT_LUT_SIZE - 1 */
+            /* Note that the ellipse has been pre-scaled so F =
+             * WEIGHT_LUT_SIZE - 1
+             */
              if (q < WEIGHT_LUT_SIZE) {
                 /* as a LUT is used, q must never be negative;
                  * should not happen, though
@@ -1873,10 +1876,11 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
           }
        }
  
-      /* if the tex coord buffer contains unread values, we will read them now.
-       * Note that in most cases we have to read more pixel values than required,
-       * however, as the img_filter_2d_nearest function(s) does not have a count
-       * parameter, we need to read the whole quad and ignore the unused values
+      /* if the tex coord buffer contains unread values, we will read
+       * them now.  Note that in most cases we have to read more pixel
+       * values than required, however, as the img_filter_2d_nearest
+       * function(s) does not have a count parameter, we need to read
+       * the whole quad and ignore the unused values
         */
        if (buffer_next > 0) {
           unsigned jj;
@@ -1895,11 +1899,9 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
        }
  
        if (den <= 0.0F) {
-         /* Reaching this place would mean
-          * that no pixels intersected the ellipse.
-          * This should never happen because
-          * the filter we use always
-          * intersects at least one pixel.
+         /* Reaching this place would mean that no pixels intersected
+          * the ellipse.  This should never happen because the filter
+          * we use always intersects at least one pixel.
            */
  
           /*rgba[0]=0;
@@ -1907,7 +1909,8 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler,
           rgba[2]=0;
           rgba[3]=0;*/
           /* not enough pixels in resampling, resort to direct interpolation */
-         samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba_temp);
+         samp->min_img_filter(tgsi_sampler, s, t, p, NULL,
+                              tgsi_sampler_lod_bias, rgba_temp);
           den = 1;
           num[0] = rgba_temp[0][j];
           num[1] = rgba_temp[1][j];
@@ -2020,7 +2023,6 @@ mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler,
  }
  
  
-
  /**
   * Specialized version of mip_filter_linear with hard-wired calls to
   * 2d lambda calculation and 2d_linear_repeat_POT img filters.
@@ -2090,7 +2092,6 @@ mip_filter_linear_2d_linear_repeat_POT(
  }
  
  
-
  /**
   * Do shadow/depth comparisons.
   */
@@ -2287,9 +2288,11 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
     samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba);
  }
  
-static void do_swizzling(const struct sp_sampler_variant *samp,
-                         float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
-                         float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+
+static void
+do_swizzling(const struct sp_sampler_variant *samp,
+             float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
+             float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
     int j;
     const unsigned swizzle_r = samp->key.bits.swizzle_r;
@@ -2358,6 +2361,7 @@ static void do_swizzling(const struct sp_sampler_variant *samp,
     }
  }
  
+
  static void
  sample_swizzle(struct tgsi_sampler *tgsi_sampler,
                 const float s[TGSI_QUAD_SIZE],
@@ -2464,6 +2468,19 @@ get_linear_wrap(unsigned mode)
  }
  
  
+/**
+ * Is swizzling needed for the given state key?
+ */
+static INLINE bool
+any_swizzle(union sp_sampler_key key)
+{
+   return (key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
+           key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
+           key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
+           key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA);
+}
+
+
  static compute_lambda_func
  get_lambda_func(const union sp_sampler_key key)
  {
@@ -2590,6 +2607,7 @@ sp_sampler_variant_destroy( struct sp_sampler_variant *samp )
     FREE(samp);
  }
  
+
  static void
  sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level,
                 int dims[4])
@@ -2630,35 +2648,43 @@ sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level,
      }
  }
  
-/* this function is only used for unfiltered texel gets
-   via the TGSI TXF opcode. */
+/**
+ * This function is only used for getting unfiltered texels via the
+ * TXF opcode.  The GL spec says that out-of-bounds texel fetches
+ * produce undefined results.  Instead of crashing, lets just clamp
+ * coords to the texture image size.
+ */
  static void
  sample_get_texels(struct tgsi_sampler *tgsi_sampler,
-          const int v_i[TGSI_QUAD_SIZE],
-          const int v_j[TGSI_QUAD_SIZE],
-          const int v_k[TGSI_QUAD_SIZE],
-          const int lod[TGSI_QUAD_SIZE],
-          const int8_t offset[3],
-          float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
+                  const int v_i[TGSI_QUAD_SIZE],
+                  const int v_j[TGSI_QUAD_SIZE],
+                  const int v_k[TGSI_QUAD_SIZE],
+                  const int lod[TGSI_QUAD_SIZE],
+                  const int8_t offset[3],
+                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
     const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
     union tex_tile_address addr;
     const struct pipe_resource *texture = samp->view->texture;
     int j, c;
     const float *tx;
-   bool need_swizzle = (samp->key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
-                        samp->key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
-                        samp->key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
-                        samp->key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA);
+   const bool need_swizzle = any_swizzle(samp->key);
+   int width, height, depth, layers;
  
     addr.value = 0;
     /* TODO write a better test for LOD */
     addr.bits.level = lod[0];
  
+   width = u_minify(texture->width0, addr.bits.level);
+   height = u_minify(texture->height0, addr.bits.level);
+   depth = u_minify(texture->depth0, addr.bits.level);
+   layers = texture->array_size;
+
     switch(texture->target) {
     case PIPE_TEXTURE_1D:
        for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-        tx = get_texel_2d(samp, addr, v_i[j] + offset[0], 0);
+         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+        tx = get_texel_2d(samp, addr, x, 0);
          for (c = 0; c < 4; c++) {
             rgba[c][j] = tx[c];
          }
@@ -2666,8 +2692,9 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler,
        break;
     case PIPE_TEXTURE_1D_ARRAY:
        for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-        tx = get_texel_1d_array(samp, addr, v_i[j] + offset[0],
-                                v_j[j] + offset[1]);
+         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         int y = CLAMP(v_j[j] + offset[1], 0, layers - 1);
+        tx = get_texel_1d_array(samp, addr, x, y);
          for (c = 0; c < 4; c++) {
             rgba[c][j] = tx[c];
          }
@@ -2676,8 +2703,9 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler,
     case PIPE_TEXTURE_2D:
     case PIPE_TEXTURE_RECT:
        for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-        tx = get_texel_2d(samp, addr, v_i[j] + offset[0],
-                          v_j[j] + offset[1]);
+         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
+        tx = get_texel_2d(samp, addr, x, y);
          for (c = 0; c < 4; c++) {
             rgba[c][j] = tx[c];
          }
@@ -2685,9 +2713,10 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler,
        break;
     case PIPE_TEXTURE_2D_ARRAY:
        for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-        tx = get_texel_2d_array(samp, addr, v_i[j] + offset[0],
-                                v_j[j] + offset[1],
-                                v_k[j] + offset[2]);
+         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
+         int layer = CLAMP(v_k[j] + offset[2], 0, layers - 1);
+        tx = get_texel_2d_array(samp, addr, x, y, layer);
          for (c = 0; c < 4; c++) {
             rgba[c][j] = tx[c];
          }
@@ -2695,9 +2724,11 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler,
        break;
     case PIPE_TEXTURE_3D:
        for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-        tx = get_texel_3d(samp, addr, v_i[j] + offset[0], 
-                          v_j[j] + offset[1],
-                          v_k[j] + offset[2]);
+         int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
+         int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
+         int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
+
+        tx = get_texel_3d(samp, addr, x, y, z);
          for (c = 0; c < 4; c++) {
             rgba[c][j] = tx[c];
          }
@@ -2715,6 +2746,8 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler,
        do_swizzling(samp, rgba_temp, rgba);
     }
  }
+
+
  /**
   * Create a sampler variant for a given set of non-orthogonal state.
   */
@@ -2830,10 +2863,7 @@ sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
        samp->sample_target = samp->compare;
     }
  
-   if (key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
-       key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
-       key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
-       key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA) {
+   if (any_swizzle(key)) {
        samp->base.get_samples = sample_swizzle;
     }
     else {
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c

index ac2d35e5ea4a529f3232a9004fd3f476331d2bf0..64ec658b80e632c45a761a8570cf62953e906fa5 100644 (file)
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -241,7 +241,11 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS:
     case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
     case PIPE_CAP_GLSL_FEATURE_LEVEL:
+   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
        return 0;
+   case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+      return 1;
  
     default:
        debug_printf("Unexpected PIPE_CAP_ query %u\n", param);
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c

index 5e6d1fbc9048b22ec1b0f4c94a711d02691b30d5..a68912608bc6d63e18764ef29b80c0b5b40dffdb 100644 (file)
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -871,6 +871,31 @@ static boolean emit_floor(struct svga_shader_emitter *emit,
  }
  
  
+/* Translate the following TGSI CEIL instruction.
+ *    CEIL  DST, SRC
+ * To the following SVGA3D instruction sequence.
+ *    FRC  TMP, -SRC
+ *    ADD  DST, SRC, TMP
+ */
+static boolean emit_ceil(struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
+   const struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
+   SVGA3dShaderDestToken temp = get_temp(emit);
+
+   /* FRC  TMP, -SRC */
+   if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0)))
+      return FALSE;
+
+   /* ADD DST, SRC, TMP */
+   if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp)))
+      return FALSE;
+
+   return TRUE;
+}
+
+
  /* Translate the following TGSI CMP instruction.
   *    CMP  DST, SRC0, SRC1, SRC2
   * To the following SVGA3D instruction sequence.
@@ -2435,6 +2460,9 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
     case TGSI_OPCODE_TRUNC:        /* should be TRUNC, not FLR */
        return emit_floor( emit, insn );
  
+   case TGSI_OPCODE_CEIL:
+      return emit_ceil( emit, insn );
+
     case TGSI_OPCODE_CMP:
        return emit_cmp( emit, insn );
  
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h

index 7709177444f5db177fc125c29a9efafc3df98e5d..f59e388123292a9d9f80064011e8af3b56e62467 100644 (file)
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -64,6 +64,7 @@ struct pipe_vertex_element;
  struct pipe_video_buffer;
  struct pipe_video_decoder;
  struct pipe_viewport_state;
+struct pipe_compute_state;
  union pipe_color_union;
  union pipe_query_result;
  
@@ -142,6 +143,10 @@ struct pipe_context {
     void   (*bind_geometry_sampler_states)(struct pipe_context *,
                                            unsigned num_samplers,
                                            void **samplers);
+   void   (*bind_compute_sampler_states)(struct pipe_context *,
+                                         unsigned start_slot,
+                                         unsigned num_samplers,
+                                         void **samplers);
     void   (*delete_sampler_state)(struct pipe_context *, void *);
  
     void * (*create_rasterizer_state)(struct pipe_context *,
@@ -221,6 +226,26 @@ struct pipe_context {
                                        unsigned num_views,
                                        struct pipe_sampler_view **);
  
+   void (*set_compute_sampler_views)(struct pipe_context *,
+                                     unsigned start_slot, unsigned num_views,
+                                     struct pipe_sampler_view **);
+
+   /**
+    * Bind an array of shader resources that will be used by the
+    * graphics pipeline.  Any resources that were previously bound to
+    * the specified range will be unbound after this call.
+    *
+    * \param first      first resource to bind.
+    * \param count      number of consecutive resources to bind.
+    * \param resources  array of pointers to the resources to bind, it
+    *                   should contain at least \a count elements
+    *                   unless it's NULL, in which case no new
+    *                   resources will be bound.
+    */
+   void (*set_shader_resources)(struct pipe_context *,
+                                unsigned start, unsigned count,
+                                struct pipe_surface **resources);
+
     void (*set_vertex_buffers)( struct pipe_context *,
                                 unsigned num_buffers,
                                 const struct pipe_vertex_buffer * );
@@ -410,6 +435,86 @@ struct pipe_context {
      */
     struct pipe_video_buffer *(*create_video_buffer)( struct pipe_context *context,
                                                       const struct pipe_video_buffer *templat );
+
+   /**
+    * Compute kernel execution
+    */
+   /*@{*/
+   /**
+    * Define the compute program and parameters to be used by
+    * pipe_context::launch_grid.
+    */
+   void *(*create_compute_state)(struct pipe_context *context,
+                                const struct pipe_compute_state *);
+   void (*bind_compute_state)(struct pipe_context *, void *);
+   void (*delete_compute_state)(struct pipe_context *, void *);
+
+   /**
+    * Bind an array of shader resources that will be used by the
+    * compute program.  Any resources that were previously bound to
+    * the specified range will be unbound after this call.
+    *
+    * \param first      first resource to bind.
+    * \param count      number of consecutive resources to bind.
+    * \param resources  array of pointers to the resources to bind, it
+    *                   should contain at least \a count elements
+    *                   unless it's NULL, in which case no new
+    *                   resources will be bound.
+    */
+   void (*set_compute_resources)(struct pipe_context *,
+                                 unsigned start, unsigned count,
+                                 struct pipe_surface **resources);
+
+   /**
+    * Bind an array of buffers to be mapped into the address space of
+    * the GLOBAL resource.  Any buffers that were previously bound
+    * between [first, first + count - 1] are unbound after this call.
+    *
+    * \param first      first buffer to map.
+    * \param count      number of consecutive buffers to map.
+    * \param resources  array of pointers to the buffers to map, it
+    *                   should contain at least \a count elements
+    *                   unless it's NULL, in which case no new
+    *                   resources will be bound.
+    * \param handles    array of pointers to the memory locations that
+    *                   will be filled with the respective base
+    *                   addresses each buffer will be mapped to.  It
+    *                   should contain at least \a count elements,
+    *                   unless \a resources is NULL in which case \a
+    *                   handles should be NULL as well.
+    *
+    * Note that the driver isn't required to make any guarantees about
+    * the contents of the \a handles array being valid anytime except
+    * during the subsequent calls to pipe_context::launch_grid.  This
+    * means that the only sensible location handles[i] may point to is
+    * somewhere within the INPUT buffer itself.  This is so to
+    * accommodate implementations that lack virtual memory but
+    * nevertheless migrate buffers on the fly, leading to resource
+    * base addresses that change on each kernel invocation or are
+    * unknown to the pipe driver.
+    */
+   void (*set_global_binding)(struct pipe_context *context,
+                              unsigned first, unsigned count,
+                              struct pipe_resource **resources,
+                              uint32_t **handles);
+
+   /**
+    * Launch the compute kernel starting from instruction \a pc of the
+    * currently bound compute program.
+    *
+    * \a grid_layout and \a block_layout are arrays of size \a
+    * PIPE_COMPUTE_CAP_GRID_DIMENSION that determine the layout of the
+    * grid (in block units) and working block (in thread units) to be
+    * used, respectively.
+    *
+    * \a input will be used to initialize the INPUT resource, and it
+    * should point to a buffer of at least
+    * pipe_compute_state::req_input_mem bytes.
+    */
+   void (*launch_grid)(struct pipe_context *context,
+                       const uint *block_layout, const uint *grid_layout,
+                       uint32_t pc, const void *input);
+   /*@}*/
  };
  
  
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h

index 398cb98248c02557db642088ce3d1d939c16ff9e..1e05cc4caeee8f9eea5e1fff69922c73f8d5492b 100644 (file)
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -304,6 +304,9 @@ enum pipe_transfer_usage {
  #define PIPE_BIND_STREAM_OUTPUT        (1 << 11) /* set_stream_output_buffers */
  #define PIPE_BIND_CURSOR               (1 << 16) /* mouse cursor */
  #define PIPE_BIND_CUSTOM               (1 << 17) /* state-tracker/winsys usages */
+#define PIPE_BIND_GLOBAL               (1 << 18) /* set_global_binding */
+#define PIPE_BIND_SHADER_RESOURCE      (1 << 19) /* set_shader_resources */
+#define PIPE_BIND_COMPUTE_RESOURCE     (1 << 20) /* set_compute_resources */
  
  /* The first two flags above were previously part of the amorphous
   * TEXTURE_USAGE, most of which are now descriptions of the ways a
@@ -346,7 +349,8 @@ enum pipe_transfer_usage {
  #define PIPE_SHADER_VERTEX   0
  #define PIPE_SHADER_FRAGMENT 1
  #define PIPE_SHADER_GEOMETRY 2
-#define PIPE_SHADER_TYPES    3
+#define PIPE_SHADER_COMPUTE  3
+#define PIPE_SHADER_TYPES    4
  
  
  /**
@@ -477,9 +481,10 @@ enum pipe_cap {
     PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY = 65,
     PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY = 66,
     PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY = 67,
-   PIPE_CAP_USER_INDEX_BUFFERS = 68,
-   PIPE_CAP_USER_CONSTANT_BUFFERS = 69,
-   PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT = 70
+   PIPE_CAP_COMPUTE = 68,
+   PIPE_CAP_USER_INDEX_BUFFERS = 69,
+   PIPE_CAP_USER_CONSTANT_BUFFERS = 70,
+   PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT = 71
  };
  
  /**
@@ -522,9 +527,32 @@ enum pipe_shader_cap
     PIPE_SHADER_CAP_INDIRECT_CONST_ADDR = 15,
     PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */
     PIPE_SHADER_CAP_INTEGERS = 17,
-   PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS = 18
+   PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS = 18,
+   PIPE_SHADER_CAP_PREFERRED_IR = 19
  };
  
+/**
+ * Shader intermediate representation.
+ */
+enum pipe_shader_ir
+{
+   PIPE_SHADER_IR_TGSI
+};
+
+/**
+ * Compute-specific implementation capability.  They can be queried
+ * using pipe_screen::get_compute_param.
+ */
+enum pipe_compute_cap
+{
+   PIPE_COMPUTE_CAP_GRID_DIMENSION,
+   PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
+   PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
+   PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE,
+   PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
+   PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE,
+   PIPE_COMPUTE_CAP_MAX_INPUT_SIZE
+};
  
  /**
   * Composite query types
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h

index 45c441b2fcf775bcd79d633be088c12e28953c2f..7ae7c9a04e15ad553aa95b8fde3fe2425b9e1cfd 100644 (file)
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -98,6 +98,18 @@ struct pipe_screen {
                            enum pipe_video_profile profile,
                            enum pipe_video_cap param );
  
+   /**
+    * Query a compute-specific capability/parameter/limit.
+    * \param param  one of PIPE_COMPUTE_CAP_x
+    * \param ret    pointer to a preallocated buffer that will be
+    *               initialized to the parameter value, or NULL.
+    * \return       size in bytes of the parameter value that would be
+    *               returned.
+    */
+   int (*get_compute_param)(struct pipe_screen *,
+                           enum pipe_compute_cap param,
+                           void *ret);
+
     struct pipe_context * (*context_create)( struct pipe_screen *,
                                             void *priv );
  
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h

index df2dd5e618ef0d7ff2689fd67ab8ff1da4849b85..6b58293f409086edb15250ead004cd347e4dc3bc 100644 (file)
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -43,6 +43,7 @@ struct tgsi_header
  #define TGSI_PROCESSOR_FRAGMENT  0
  #define TGSI_PROCESSOR_VERTEX    1
  #define TGSI_PROCESSOR_GEOMETRY  2
+#define TGSI_PROCESSOR_COMPUTE   3
  
  struct tgsi_processor
  {
@@ -76,6 +77,7 @@ enum tgsi_file_type {
     TGSI_FILE_IMMEDIATE_ARRAY     =10,
     TGSI_FILE_TEMPORARY_ARRAY     =11,
     TGSI_FILE_RESOURCE            =12,
+   TGSI_FILE_SAMPLER_VIEW        =13,
     TGSI_FILE_COUNT      /**< how many TGSI_FILE_ types */
  };
  
@@ -114,12 +116,12 @@ struct tgsi_declaration
     unsigned NrTokens    : 8;  /**< UINT */
     unsigned File        : 4;  /**< one of TGSI_FILE_x */
     unsigned UsageMask   : 4;  /**< bitmask of TGSI_WRITEMASK_x flags */
-   unsigned Interpolate : 4;  /**< one of TGSI_INTERPOLATE_x */
     unsigned Dimension   : 1;  /**< any extra dimension info? */
     unsigned Semantic    : 1;  /**< BOOL, any semantic info? */
-   unsigned Centroid    : 1;  /**< centroid sampling? */
+   unsigned Interpolate : 1;  /**< any interpolation info? */
     unsigned Invariant   : 1;  /**< invariant optimization? */
-   unsigned CylindricalWrap:4;   /**< TGSI_CYLINDRICAL_WRAP_x flags */
+   unsigned Local       : 1;  /**< optimize as subroutine local variable? */
+   unsigned Padding     : 7;
  };
  
  struct tgsi_declaration_range
@@ -134,6 +136,14 @@ struct tgsi_declaration_dimension
     unsigned Padding:16;
  };
  
+struct tgsi_declaration_interp
+{
+   unsigned Interpolate : 4;   /**< one of TGSI_INTERPOLATE_x */
+   unsigned Centroid    : 1;   /**< centroid sampling? */
+   unsigned CylindricalWrap:4; /**< TGSI_CYLINDRICAL_WRAP_x flags */
+   unsigned Padding     : 23;
+};
+
  #define TGSI_SEMANTIC_POSITION   0
  #define TGSI_SEMANTIC_COLOR      1
  #define TGSI_SEMANTIC_BCOLOR     2  /**< back-face color */
@@ -149,7 +159,11 @@ struct tgsi_declaration_dimension
  #define TGSI_SEMANTIC_STENCIL    12
  #define TGSI_SEMANTIC_CLIPDIST   13
  #define TGSI_SEMANTIC_CLIPVERTEX 14
-#define TGSI_SEMANTIC_COUNT      15 /**< number of semantic values */
+#define TGSI_SEMANTIC_GRID_SIZE  15 /**< grid size in blocks */
+#define TGSI_SEMANTIC_BLOCK_ID   16 /**< id of the current block */
+#define TGSI_SEMANTIC_BLOCK_SIZE 17 /**< block size in threads */
+#define TGSI_SEMANTIC_THREAD_ID  18 /**< block-relative id of the current thread */
+#define TGSI_SEMANTIC_COUNT      19 /**< number of semantic values */
  
  struct tgsi_declaration_semantic
  {
@@ -159,6 +173,13 @@ struct tgsi_declaration_semantic
  };
  
  struct tgsi_declaration_resource {
+   unsigned Resource    : 8; /**< one of TGSI_TEXTURE_ */
+   unsigned Raw         : 1;
+   unsigned Writable    : 1;
+   unsigned Padding     : 22;
+};
+
+struct tgsi_declaration_sampler_view {
     unsigned Resource    : 8; /**< one of TGSI_TEXTURE_ */
     unsigned ReturnTypeX : 6; /**< one of enum pipe_type */
     unsigned ReturnTypeY : 6; /**< one of enum pipe_type */
@@ -166,6 +187,15 @@ struct tgsi_declaration_resource {
     unsigned ReturnTypeW : 6; /**< one of enum pipe_type */
  };
  
+/*
+ * Special resources that don't need to be declared.  They map to the
+ * GLOBAL/LOCAL/PRIVATE/INPUT compute memory spaces.
+ */
+#define TGSI_RESOURCE_GLOBAL   0x7fff
+#define TGSI_RESOURCE_LOCAL    0x7ffe
+#define TGSI_RESOURCE_PRIVATE  0x7ffd
+#define TGSI_RESOURCE_INPUT    0x7ffc
+
  #define TGSI_IMM_FLOAT32   0
  #define TGSI_IMM_UINT32    1
  #define TGSI_IMM_INT32     2
@@ -363,16 +393,16 @@ struct tgsi_property_data {
  #define TGSI_OPCODE_ENDSWITCH           144
  
  /* resource related opcodes */
-#define TGSI_OPCODE_LOAD                145
-#define TGSI_OPCODE_LOAD_MS             146
-#define TGSI_OPCODE_SAMPLE              147
+#define TGSI_OPCODE_SAMPLE              145
+#define TGSI_OPCODE_SAMPLE_I            146
+#define TGSI_OPCODE_SAMPLE_I_MS         147
  #define TGSI_OPCODE_SAMPLE_B            148
  #define TGSI_OPCODE_SAMPLE_C            149
  #define TGSI_OPCODE_SAMPLE_C_LZ         150
  #define TGSI_OPCODE_SAMPLE_D            151
  #define TGSI_OPCODE_SAMPLE_L            152
  #define TGSI_OPCODE_GATHER4             153
-#define TGSI_OPCODE_RESINFO             154
+#define TGSI_OPCODE_SVIEWINFO           154
  #define TGSI_OPCODE_SAMPLE_POS          155
  #define TGSI_OPCODE_SAMPLE_INFO         156
  
@@ -381,7 +411,26 @@ struct tgsi_property_data {
  #define TGSI_OPCODE_IABS                159
  #define TGSI_OPCODE_ISSG                160
  
-#define TGSI_OPCODE_LAST                161
+#define TGSI_OPCODE_LOAD                161
+#define TGSI_OPCODE_STORE               162
+
+#define TGSI_OPCODE_MFENCE              163
+#define TGSI_OPCODE_LFENCE              164
+#define TGSI_OPCODE_SFENCE              165
+#define TGSI_OPCODE_BARRIER             166
+
+#define TGSI_OPCODE_ATOMUADD            167
+#define TGSI_OPCODE_ATOMXCHG            168
+#define TGSI_OPCODE_ATOMCAS             169
+#define TGSI_OPCODE_ATOMAND             170
+#define TGSI_OPCODE_ATOMOR              171
+#define TGSI_OPCODE_ATOMXOR             172
+#define TGSI_OPCODE_ATOMUMIN            173
+#define TGSI_OPCODE_ATOMUMAX            174
+#define TGSI_OPCODE_ATOMIMIN            175
+#define TGSI_OPCODE_ATOMIMAX            176
+
+#define TGSI_OPCODE_LAST                177
  
  #define TGSI_SAT_NONE            0  /* do not saturate */
  #define TGSI_SAT_ZERO_ONE        1  /* clamp to [0,1] */
@@ -441,7 +490,7 @@ struct tgsi_instruction_label
     unsigned Padding  : 8;
  };
  
-#define TGSI_TEXTURE_UNKNOWN        0
+#define TGSI_TEXTURE_BUFFER         0
  #define TGSI_TEXTURE_1D             1
  #define TGSI_TEXTURE_2D             2
  #define TGSI_TEXTURE_3D             3
@@ -455,7 +504,8 @@ struct tgsi_instruction_label
  #define TGSI_TEXTURE_SHADOW1D_ARRAY 11
  #define TGSI_TEXTURE_SHADOW2D_ARRAY 12
  #define TGSI_TEXTURE_SHADOWCUBE     13
-#define TGSI_TEXTURE_COUNT          14
+#define TGSI_TEXTURE_UNKNOWN        14
+#define TGSI_TEXTURE_COUNT          15
  
  struct tgsi_instruction_texture
  {
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h

index 3bc35bc77ce68b365aeff5cebfa80ab19e545389..51a956d95329e2f705ee14345706ac6839eaff53 100644 (file)
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -62,6 +62,7 @@ extern "C" {
  #define PIPE_MAX_GEOMETRY_SAMPLERS  16
  #define PIPE_MAX_SHADER_INPUTS    32
  #define PIPE_MAX_SHADER_OUTPUTS   32
+#define PIPE_MAX_SHADER_SAMPLER_VIEWS 32
  #define PIPE_MAX_SHADER_RESOURCES 32
  #define PIPE_MAX_TEXTURE_LEVELS   16
  #define PIPE_MAX_SO_BUFFERS        4
@@ -337,6 +338,7 @@ struct pipe_surface
     unsigned height;              /**< logical height in pixels */
  
     unsigned usage;               /**< bitmask of PIPE_BIND_x */
+   unsigned writable:1;          /**< writable shader resource */
  
     union {
        struct {
@@ -591,6 +593,13 @@ struct pipe_resolve_info
     unsigned mask; /**< PIPE_MASK_RGBA, Z, S or ZS */
  };
  
+struct pipe_compute_state
+{
+   const void *prog; /**< Compute program to be executed. */
+   unsigned req_local_mem; /**< Required size of the LOCAL resource. */
+   unsigned req_private_mem; /**< Required size of the PRIVATE resource. */
+   unsigned req_input_mem; /**< Required size of the INPUT resource. */
+};
  
  #ifdef __cplusplus
  }
diff --git a/src/gallium/state_trackers/Makefile b/src/gallium/state_trackers/Makefile

index 0900efc664fb68499882586a8bfa16794a9e0db8..d5162c17507e25a8af17adf38c25439eaacb2bf4 100644 (file)
--- a/src/gallium/state_trackers/Makefile
+++ b/src/gallium/state_trackers/Makefile
@@ -17,7 +17,7 @@ subdirs:
  
  
  clean:
-       rm -f `find . -name \*.[oa]`
+       rm -f `find . -regex '.*\.l?[oa]'`
         rm -f `find . -name depend`
  
  
diff --git a/src/gallium/state_trackers/clover/Doxyfile b/src/gallium/state_trackers/clover/Doxyfile

new file mode 100644 (file)

index 0000000..50250e7
--- /dev/null
+++ b/src/gallium/state_trackers/clover/Doxyfile
@@ -0,0 +1,1716 @@
+# Doxyfile 1.7.4
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a hash (#) is considered a comment and will be ignored.
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME           = Clover
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER         =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer
+# a quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          =
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is
+# included in the documentation. The maximum height of the logo should not
+# exceed 55 pixels and the maximum width should not exceed 200 pixels.
+# Doxygen will copy the logo to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       =
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful if your file system
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = YES
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this
+# tag. The format is ext=language, where ext is a file extension, and language
+# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
+# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
+# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
+# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
+# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also makes the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = YES
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
+# unions are shown inside the group in which they are included (e.g. using
+# @ingroup) instead of on a separate page (for HTML and Man pages) or
+# section (for LaTeX and RTF).
+
+INLINE_GROUPED_CLASSES = NO
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penalty.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will roughly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols
+
+SYMBOL_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespaces are hidden.
+
+EXTRACT_ANON_NSPACES   = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
+# do proper type resolution of all parameters of a function it will reject a
+# match between the prototype and the implementation of a member function even
+# if there is only one candidate or it is obvious which candidate to choose
+# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
+# will still accept a match between prototype and implementation in such cases.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or macro consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and macros in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. The create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option.
+# You can optionally specify a file name after the option, if omitted
+# DoxygenLayout.xml will be used as the name of the layout file.
+
+LAYOUT_FILE            =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = NO
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# The WARN_NO_PARAMDOC option can be enabled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  = api/ core/
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
+# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
+# *.f90 *.f *.for *.vhd *.vhdl
+
+FILE_PATTERNS          =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS       =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty or if
+# non of the patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
+# and it is also possible to disable source filtering for a specific pattern
+# using *.ext= (so without naming a filter). This option only has effect when
+# FILTER_SOURCE_FILES is enabled.
+
+FILTER_SOURCE_PATTERNS =
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header. Note that when using a custom header you are responsible
+# for the proper inclusion of any scripts and style sheets that doxygen
+# needs, which is dependent on the configuration options used.
+# It is adviced to generate a default header using "doxygen -w html
+# header.html footer.html stylesheet.css YourConfigFile" and then modify
+# that header. Note that the header is subject to change so you typically
+# have to redo this when upgrading to a newer version of doxygen or when changing the value of configuration settings such as GENERATE_TREEVIEW!
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that
+# the files will be copied as-is; there are no commands or markers available.
+
+HTML_EXTRA_FILES       =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
+# Doxygen will adjust the colors in the stylesheet and background images
+# according to this color. Hue is specified as an angle on a colorwheel,
+# see http://en.wikipedia.org/wiki/Hue for more information.
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
+# The allowed range is 0 to 359.
+
+HTML_COLORSTYLE_HUE    = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
+# the colors in the HTML output. For a value of 0 the output will use
+# grayscales only. A value of 255 will produce the most vivid colors.
+
+HTML_COLORSTYLE_SAT    = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
+# the luminance component of the colors in the HTML output. Values below
+# 100 gradually make the output lighter, whereas values above 100 make
+# the output darker. The value divided by 100 is the actual gamma applied,
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
+# and 100 does not change the gamma.
+
+HTML_COLORSTYLE_GAMMA  = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP         = YES
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE               =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
+# that can be used as input for Qt's qhelpgenerator to generate a
+# Qt Compressed Help (.qch) of the generated HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
+# add. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
+# Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
+# Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+#  will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
+# the help appears.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX          = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
+# (range [0,1..20]) that doxygen will group on one line in the generated HTML
+# documentation. Note that a value of 0 will completely suppress the enum
+# values from appearing in the overview section.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW      = NO
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES       = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
+# links to external symbols imported via tag files in a separate window.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are
+# not supported properly for IE 6.0, but are supported on all modern browsers.
+# Note that when changing this option you need to delete any form_*.png files
+# in the HTML output before the changes have effect.
+
+FORMULA_TRANSPARENT    = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
+# (see http://www.mathjax.org) which uses client side Javascript for the
+# rendering instead of using prerendered bitmaps. Use this if you do not
+# have LaTeX installed or if you want to formulas look prettier in the HTML
+# output. When enabled you also need to install MathJax separately and
+# configure the path to it using the MATHJAX_RELPATH option.
+
+USE_MATHJAX            = NO
+
+# When MathJax is enabled you need to specify the location relative to the
+# HTML output directory using the MATHJAX_RELPATH option. The destination
+# directory should contain the MathJax.js script. For instance, if the mathjax
+# directory is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the
+# mathjax.org site, so you can quickly see the result without installing
+# MathJax, but it is strongly recommended to install a local copy of MathJax
+# before deployment.
+
+MATHJAX_RELPATH        = http://www.mathjax.org/mathjax
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a PHP enabled web server instead of at the web client
+# using Javascript. Doxygen will generate the search PHP script and index
+# file to put on the web server. The advantage of the server
+# based approach is that it scales better to large projects and allows
+# full text search. The disadvantages are that it is more difficult to setup
+# and does not have live searching capabilities.
+
+SERVER_BASED_SEARCH    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX         = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
+# the generated latex document. The footer should contain everything after
+# the last chapter. If it is left blank doxygen will generate a
+# standard footer. Notice: only use this tag if you know what you are doing!
+
+LATEX_FOOTER           =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# pointed to by INCLUDE_PATH will be searched when a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition that
+# overrules the definition found in the source code.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all references to function-like macros
+# that are alone on a line, have an all uppercase name, and do not end with a
+# semicolon, because these will confuse the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option also works with HAVE_DOT disabled, but it is recommended to
+# install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
+# allowed to run in parallel. When set to 0 (the default) doxygen will
+# base this on the number of processors available in the system. You can set it
+# explicitly to a value larger than 0 to get control over the balance
+# between CPU load and processing speed.
+
+DOT_NUM_THREADS        = 0
+
+# By default doxygen will write a font called Helvetica to the output
+# directory and reference it in all dot files that doxygen generates.
+# When you want a differently looking font you can specify the font name
+# using DOT_FONTNAME. You need to make sure dot is able to find the font,
+# which can be done by putting it in a standard location or by setting the
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
+# containing the font.
+
+DOT_FONTNAME           = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the output directory to look for the
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a
+# different font using DOT_FONTNAME you can set the path where dot
+# can find it using this tag.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will generate a graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are svg, png, jpg, or gif.
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the
+# \mscfile command).
+
+MSCFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP            = YES
diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am

new file mode 100644 (file)

index 0000000..da9f3bb
--- /dev/null
+++ b/src/gallium/state_trackers/clover/Makefile.am
@@ -0,0 +1,71 @@
+AUTOMAKE_OPTIONS = subdir-objects
+
+AM_CPPFLAGS = \
+       $(GALLIUM_PIPE_LOADER_DEFINES) \
+       -DMESA_VERSION=\"$(MESA_VERSION)\" \
+       -DPIPE_SEARCH_DIR=\"$(OPENCL_LIB_INSTALL_DIR)\" \
+       -I$(top_srcdir)/include \
+       -I$(top_srcdir)/src/gallium/include \
+       -I$(top_srcdir)/src/gallium/drivers \
+       -I$(top_srcdir)/src/gallium/auxiliary \
+       -I$(top_srcdir)/src/gallium/winsys \
+       -I$(srcdir)
+
+noinst_LTLIBRARIES = libclover.la libcltgsi.la libclllvm.la
+
+libcltgsi_la_CXXFLAGS = \
+       -std=c++0x
+
+libcltgsi_la_SOURCES = \
+       tgsi/compiler.cpp
+
+libclllvm_la_CXXFLAGS = \
+       -std=c++98
+
+libclllvm_la_SOURCES = \
+       llvm/invocation.cpp
+
+libclover_la_CXXFLAGS = \
+       -std=c++0x
+
+libclover_la_LIBADD = \
+       libcltgsi.la libclllvm.la
+
+libclover_la_SOURCES = \
+       core/base.hpp \
+       core/compat.hpp \
+       core/compiler.hpp \
+       core/geometry.hpp \
+       core/device.hpp \
+       core/device.cpp \
+       core/context.hpp \
+       core/context.cpp \
+       core/queue.hpp \
+       core/queue.cpp \
+       core/format.hpp \
+       core/format.cpp \
+       core/memory.hpp \
+       core/memory.cpp \
+       core/resource.hpp \
+       core/resource.cpp \
+       core/sampler.hpp \
+       core/sampler.cpp \
+       core/event.hpp \
+       core/event.cpp \
+       core/program.hpp \
+       core/program.cpp \
+       core/kernel.hpp \
+       core/kernel.cpp \
+       core/module.hpp \
+       core/module.cpp \
+       api/util.hpp \
+       api/platform.cpp \
+       api/device.cpp \
+       api/context.cpp \
+       api/queue.cpp \
+       api/memory.cpp \
+       api/transfer.cpp \
+       api/sampler.cpp \
+       api/event.cpp \
+       api/program.cpp \
+       api/kernel.cpp
diff --git a/src/gallium/state_trackers/clover/api/context.cpp b/src/gallium/state_trackers/clover/api/context.cpp

new file mode 100644 (file)

index 0000000..c8d6689
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/context.cpp
@@ -0,0 +1,120 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "api/util.hpp"
+#include "core/context.hpp"
+
+using namespace clover;
+
+PUBLIC cl_context
+clCreateContext(const cl_context_properties *props, cl_uint num_devs,
+                const cl_device_id *devs,
+                void (CL_CALLBACK *pfn_notify)(const char *, const void *,
+                                               size_t, void *),
+                void *user_data, cl_int *errcode_ret) try {
+   auto mprops = property_map(props);
+
+   if (!devs || !num_devs ||
+       (!pfn_notify && user_data))
+      throw error(CL_INVALID_VALUE);
+
+   if (any_of(is_zero<cl_device_id>(), devs, devs + num_devs))
+      throw error(CL_INVALID_DEVICE);
+
+   for (auto p : mprops) {
+      if (!(p.first == CL_CONTEXT_PLATFORM &&
+            (cl_platform_id)p.second == NULL))
+         throw error(CL_INVALID_PROPERTY);
+   }
+
+   ret_error(errcode_ret, CL_SUCCESS);
+   return new context(
+      property_vector(mprops),
+      std::vector<cl_device_id>(devs, devs + num_devs));
+
+} catch(error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_context
+clCreateContextFromType(const cl_context_properties *props,
+                        cl_device_type type,
+                        void (CL_CALLBACK *pfn_notify)(
+                           const char *, const void *, size_t, void *),
+                        void *user_data, cl_int *errcode_ret) {
+   cl_device_id dev;
+   cl_int ret;
+
+   ret = clGetDeviceIDs(0, type, 1, &dev, 0);
+   if (ret) {
+      ret_error(errcode_ret, ret);
+      return NULL;
+   }
+
+   return clCreateContext(props, 1, &dev, pfn_notify, user_data, errcode_ret);
+}
+
+PUBLIC cl_int
+clRetainContext(cl_context ctx) {
+   if (!ctx)
+      return CL_INVALID_CONTEXT;
+
+   ctx->retain();
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clReleaseContext(cl_context ctx) {
+   if (!ctx)
+      return CL_INVALID_CONTEXT;
+
+   if (ctx->release())
+      delete ctx;
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clGetContextInfo(cl_context ctx, cl_context_info param,
+                 size_t size, void *buf, size_t *size_ret) {
+   if (!ctx)
+      return CL_INVALID_CONTEXT;
+
+   switch (param) {
+   case CL_CONTEXT_REFERENCE_COUNT:
+      return scalar_property<cl_uint>(buf, size, size_ret, ctx->ref_count());
+
+   case CL_CONTEXT_NUM_DEVICES:
+      return scalar_property<cl_uint>(buf, size, size_ret, ctx->devs.size());
+
+   case CL_CONTEXT_DEVICES:
+      return vector_property<cl_device_id>(buf, size, size_ret, ctx->devs);
+
+   case CL_CONTEXT_PROPERTIES:
+      return vector_property<cl_context_properties>(buf, size, size_ret,
+                                                    ctx->props());
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
diff --git a/src/gallium/state_trackers/clover/api/device.cpp b/src/gallium/state_trackers/clover/api/device.cpp

new file mode 100644 (file)

index 0000000..0376751
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -0,0 +1,262 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "api/util.hpp"
+#include "core/device.hpp"
+
+using namespace clover;
+
+static device_registry registry;
+
+PUBLIC cl_int
+clGetDeviceIDs(cl_platform_id platform, cl_device_type device_type,
+               cl_uint num_entries, cl_device_id *devices,
+               cl_uint *num_devices) {
+   std::vector<cl_device_id> devs;
+
+   if (platform != NULL)
+      return CL_INVALID_PLATFORM;
+
+   if ((!num_entries && devices) ||
+       (!num_devices && !devices))
+      return CL_INVALID_VALUE;
+
+   // Collect matching devices
+   for (device &dev : registry) {
+      if (((device_type & CL_DEVICE_TYPE_DEFAULT) &&
+           &dev == &registry.front()) ||
+          (device_type & dev.type()))
+         devs.push_back(&dev);
+   }
+
+   if (devs.empty())
+      return CL_DEVICE_NOT_FOUND;
+
+   // ...and return the requested data.
+   if (num_devices)
+      *num_devices = devs.size();
+   if (devices)
+      std::copy_n(devs.begin(),
+                  std::min((cl_uint)devs.size(), num_entries),
+                  devices);
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clGetDeviceInfo(cl_device_id dev, cl_device_info param,
+                size_t size, void *buf, size_t *size_ret) {
+   if (!dev)
+      return CL_INVALID_DEVICE;
+
+   switch (param) {
+   case CL_DEVICE_TYPE:
+      return scalar_property<cl_device_type>(buf, size, size_ret, dev->type());
+
+   case CL_DEVICE_VENDOR_ID:
+      return scalar_property<cl_uint>(buf, size, size_ret, dev->vendor_id());
+
+   case CL_DEVICE_MAX_COMPUTE_UNITS:
+      return scalar_property<cl_uint>(buf, size, size_ret, 1);
+
+   case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
+      return scalar_property<cl_uint>(buf, size, size_ret,
+                                      dev->max_block_size().size());
+
+   case CL_DEVICE_MAX_WORK_ITEM_SIZES:
+      return vector_property<size_t>(buf, size, size_ret,
+                                     dev->max_block_size());
+
+   case CL_DEVICE_MAX_WORK_GROUP_SIZE:
+      return scalar_property<size_t>(buf, size, size_ret, SIZE_MAX);
+
+   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
+      return scalar_property<cl_uint>(buf, size, size_ret, 16);
+
+   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
+      return scalar_property<cl_uint>(buf, size, size_ret, 8);
+
+   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
+      return scalar_property<cl_uint>(buf, size, size_ret, 4);
+
+   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
+      return scalar_property<cl_uint>(buf, size, size_ret, 2);
+
+   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
+      return scalar_property<cl_uint>(buf, size, size_ret, 4);
+
+   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
+      return scalar_property<cl_uint>(buf, size, size_ret, 2);
+
+   case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
+      return scalar_property<cl_uint>(buf, size, size_ret, 0);
+
+   case CL_DEVICE_MAX_CLOCK_FREQUENCY:
+      return scalar_property<cl_uint>(buf, size, size_ret, 0);
+
+   case CL_DEVICE_ADDRESS_BITS:
+      return scalar_property<cl_uint>(buf, size, size_ret, 32);
+
+   case CL_DEVICE_MAX_READ_IMAGE_ARGS:
+      return scalar_property<cl_uint>(buf, size, size_ret,
+                                      dev->max_images_read());
+
+   case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
+      return scalar_property<cl_uint>(buf, size, size_ret,
+                                      dev->max_images_write());
+
+   case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
+      return scalar_property<cl_ulong>(buf, size, size_ret, 0);
+
+   case CL_DEVICE_IMAGE2D_MAX_WIDTH:
+   case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
+      return scalar_property<size_t>(buf, size, size_ret,
+                                     1 << dev->max_image_levels_2d());
+
+   case CL_DEVICE_IMAGE3D_MAX_WIDTH:
+   case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
+   case CL_DEVICE_IMAGE3D_MAX_DEPTH:
+      return scalar_property<size_t>(buf, size, size_ret,
+                                     1 << dev->max_image_levels_3d());
+
+   case CL_DEVICE_IMAGE_SUPPORT:
+      return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE);
+
+   case CL_DEVICE_MAX_PARAMETER_SIZE:
+      return scalar_property<size_t>(buf, size, size_ret,
+                                     dev->max_mem_input());
+
+   case CL_DEVICE_MAX_SAMPLERS:
+      return scalar_property<cl_uint>(buf, size, size_ret,
+                                      dev->max_samplers());
+
+   case CL_DEVICE_MEM_BASE_ADDR_ALIGN:
+   case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE:
+      return scalar_property<cl_uint>(buf, size, size_ret, 128);
+
+   case CL_DEVICE_SINGLE_FP_CONFIG:
+      return scalar_property<cl_device_fp_config>(buf, size, size_ret,
+         CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST);
+
+   case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
+      return scalar_property<cl_device_mem_cache_type>(buf, size, size_ret,
+                                                       CL_NONE);
+
+   case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE:
+      return scalar_property<cl_uint>(buf, size, size_ret, 0);
+
+   case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE:
+      return scalar_property<cl_ulong>(buf, size, size_ret, 0);
+
+   case CL_DEVICE_GLOBAL_MEM_SIZE:
+      return scalar_property<cl_ulong>(buf, size, size_ret,
+                                       dev->max_mem_global());
+
+   case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
+      return scalar_property<cl_ulong>(buf, size, size_ret,
+                                       dev->max_const_buffer_size());
+
+   case CL_DEVICE_MAX_CONSTANT_ARGS:
+      return scalar_property<cl_uint>(buf, size, size_ret,
+                                      dev->max_const_buffers());
+
+   case CL_DEVICE_LOCAL_MEM_TYPE:
+      return scalar_property<cl_device_local_mem_type>(buf, size, size_ret,
+                                                       CL_LOCAL);
+
+   case CL_DEVICE_LOCAL_MEM_SIZE:
+      return scalar_property<cl_ulong>(buf, size, size_ret,
+                                       dev->max_mem_local());
+
+   case CL_DEVICE_ERROR_CORRECTION_SUPPORT:
+      return scalar_property<cl_bool>(buf, size, size_ret, CL_FALSE);
+
+   case CL_DEVICE_PROFILING_TIMER_RESOLUTION:
+      return scalar_property<size_t>(buf, size, size_ret, 0);
+
+   case CL_DEVICE_ENDIAN_LITTLE:
+      return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE);
+
+   case CL_DEVICE_AVAILABLE:
+   case CL_DEVICE_COMPILER_AVAILABLE:
+      return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE);
+
+   case CL_DEVICE_EXECUTION_CAPABILITIES:
+      return scalar_property<cl_device_exec_capabilities>(buf, size, size_ret,
+                                                          CL_EXEC_KERNEL);
+
+   case CL_DEVICE_QUEUE_PROPERTIES:
+      return scalar_property<cl_command_queue_properties>(buf, size, size_ret,
+         CL_QUEUE_PROFILING_ENABLE);
+
+   case CL_DEVICE_NAME:
+      return string_property(buf, size, size_ret, dev->device_name());
+
+   case CL_DEVICE_VENDOR:
+      return string_property(buf, size, size_ret, dev->vendor_name());
+
+   case CL_DRIVER_VERSION:
+      return string_property(buf, size, size_ret, MESA_VERSION);
+
+   case CL_DEVICE_PROFILE:
+      return string_property(buf, size, size_ret, "FULL_PROFILE");
+
+   case CL_DEVICE_VERSION:
+      return string_property(buf, size, size_ret, "OpenCL 1.1 MESA " MESA_VERSION);
+
+   case CL_DEVICE_EXTENSIONS:
+      return string_property(buf, size, size_ret, "");
+
+   case CL_DEVICE_PLATFORM:
+      return scalar_property<cl_platform_id>(buf, size, size_ret, NULL);
+
+   case CL_DEVICE_HOST_UNIFIED_MEMORY:
+      return scalar_property<cl_bool>(buf, size, size_ret, CL_TRUE);
+
+   case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR:
+      return scalar_property<cl_uint>(buf, size, size_ret, 16);
+
+   case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT:
+      return scalar_property<cl_uint>(buf, size, size_ret, 8);
+
+   case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT:
+      return scalar_property<cl_uint>(buf, size, size_ret, 4);
+
+   case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG:
+      return scalar_property<cl_uint>(buf, size, size_ret, 2);
+
+   case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT:
+      return scalar_property<cl_uint>(buf, size, size_ret, 4);
+
+   case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
+      return scalar_property<cl_uint>(buf, size, size_ret, 2);
+
+   case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
+      return scalar_property<cl_uint>(buf, size, size_ret, 0);
+
+   case CL_DEVICE_OPENCL_C_VERSION:
+      return string_property(buf, size, size_ret, "OpenCL C 1.1");
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
diff --git a/src/gallium/state_trackers/clover/api/event.cpp b/src/gallium/state_trackers/clover/api/event.cpp

new file mode 100644 (file)

index 0000000..d6c37f6
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/event.cpp
@@ -0,0 +1,239 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "api/util.hpp"
+#include "core/event.hpp"
+
+using namespace clover;
+
+PUBLIC cl_event
+clCreateUserEvent(cl_context ctx, cl_int *errcode_ret) try {
+   if (!ctx)
+      throw error(CL_INVALID_CONTEXT);
+
+   ret_error(errcode_ret, CL_SUCCESS);
+   return new soft_event(*ctx, {}, false);
+
+} catch(error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_int
+clSetUserEventStatus(cl_event ev, cl_int status) {
+   if (!dynamic_cast<soft_event *>(ev))
+      return CL_INVALID_EVENT;
+
+   if (status > 0)
+      return CL_INVALID_VALUE;
+
+   if (ev->status() <= 0)
+      return CL_INVALID_OPERATION;
+
+   if (status)
+      ev->abort(status);
+   else
+      ev->trigger();
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clWaitForEvents(cl_uint num_evs, const cl_event *evs) try {
+   if (!num_evs || !evs)
+      throw error(CL_INVALID_VALUE);
+
+   std::for_each(evs, evs + num_evs, [&](const cl_event ev) {
+         if (!ev)
+            throw error(CL_INVALID_EVENT);
+
+         if (&ev->ctx != &evs[0]->ctx)
+            throw error(CL_INVALID_CONTEXT);
+
+         if (ev->status() < 0)
+            throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
+      });
+
+   // Create a temporary soft event that depends on all the events in
+   // the wait list
+   ref_ptr<soft_event> sev = transfer(
+      new soft_event(evs[0]->ctx, { evs, evs + num_evs }, true));
+
+   // ...and wait on it.
+   sev->wait();
+
+   return CL_SUCCESS;
+
+} catch(error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clGetEventInfo(cl_event ev, cl_event_info param,
+               size_t size, void *buf, size_t *size_ret) {
+   if (!ev)
+      return CL_INVALID_EVENT;
+
+   switch (param) {
+   case CL_EVENT_COMMAND_QUEUE:
+      return scalar_property<cl_command_queue>(buf, size, size_ret, ev->queue());
+
+   case CL_EVENT_CONTEXT:
+      return scalar_property<cl_context>(buf, size, size_ret, &ev->ctx);
+
+   case CL_EVENT_COMMAND_TYPE:
+      return scalar_property<cl_command_type>(buf, size, size_ret, ev->command());
+
+   case CL_EVENT_COMMAND_EXECUTION_STATUS:
+      return scalar_property<cl_int>(buf, size, size_ret, ev->status());
+
+   case CL_EVENT_REFERENCE_COUNT:
+      return scalar_property<cl_uint>(buf, size, size_ret, ev->ref_count());
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
+
+PUBLIC cl_int
+clSetEventCallback(cl_event ev, cl_int type,
+                   void (CL_CALLBACK *pfn_event_notify)(cl_event, cl_int,
+                                                        void *),
+                   void *user_data) try {
+   if (!ev)
+      throw error(CL_INVALID_EVENT);
+
+   if (!pfn_event_notify || type != CL_COMPLETE)
+      throw error(CL_INVALID_VALUE);
+
+   // Create a temporary soft event that depends on ev, with
+   // pfn_event_notify as completion action.
+   ref_ptr<soft_event> sev = transfer(
+      new soft_event(ev->ctx, { ev }, true,
+                     [=](event &) {
+                        ev->wait();
+                        pfn_event_notify(ev, ev->status(), user_data);
+                     }));
+
+   return CL_SUCCESS;
+
+} catch(error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clRetainEvent(cl_event ev) {
+   if (!ev)
+      return CL_INVALID_EVENT;
+
+   ev->retain();
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clReleaseEvent(cl_event ev) {
+   if (!ev)
+      return CL_INVALID_EVENT;
+
+   if (ev->release())
+      delete ev;
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clEnqueueMarker(cl_command_queue q, cl_event *ev) try {
+   if (!q)
+      throw error(CL_INVALID_COMMAND_QUEUE);
+
+   if (!ev)
+      throw error(CL_INVALID_VALUE);
+
+   *ev = new hard_event(*q, CL_COMMAND_MARKER, {});
+
+   return CL_SUCCESS;
+
+} catch(error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueBarrier(cl_command_queue q) {
+   if (!q)
+      return CL_INVALID_COMMAND_QUEUE;
+
+   // No need to do anything, q preserves data ordering strictly.
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clEnqueueWaitForEvents(cl_command_queue q, cl_uint num_evs,
+                       const cl_event *evs) try {
+   if (!q)
+      throw error(CL_INVALID_COMMAND_QUEUE);
+
+   if (!num_evs || !evs)
+      throw error(CL_INVALID_VALUE);
+
+   std::for_each(evs, evs + num_evs, [&](const cl_event ev) {
+         if (!ev)
+            throw error(CL_INVALID_EVENT);
+
+         if (&ev->ctx != &q->ctx)
+            throw error(CL_INVALID_CONTEXT);
+      });
+
+   // Create a hard event that depends on the events in the wait list:
+   // subsequent commands in the same queue will be implicitly
+   // serialized with respect to it -- hard events always are.
+   ref_ptr<hard_event> hev = transfer(
+      new hard_event(*q, 0, { evs, evs + num_evs }));
+
+   return CL_SUCCESS;
+
+} catch(error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clGetEventProfilingInfo(cl_event ev, cl_profiling_info param,
+                        size_t size, void *buf, size_t *size_ret) {
+   return CL_PROFILING_INFO_NOT_AVAILABLE;
+}
+
+PUBLIC cl_int
+clFinish(cl_command_queue q) try {
+   if (!q)
+      throw error(CL_INVALID_COMMAND_QUEUE);
+
+   // Create a temporary hard event -- it implicitly depends on all
+   // the previously queued hard events.
+   ref_ptr<hard_event> hev = transfer(new hard_event(*q, 0, { }));
+
+   // And wait on it.
+   hev->wait();
+
+   return CL_SUCCESS;
+
+} catch(error &e) {
+   return e.get();
+}
diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp b/src/gallium/state_trackers/clover/api/kernel.cpp

new file mode 100644 (file)

index 0000000..44eeb27
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/kernel.cpp
@@ -0,0 +1,318 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "api/util.hpp"
+#include "core/kernel.hpp"
+#include "core/event.hpp"
+
+using namespace clover;
+
+PUBLIC cl_kernel
+clCreateKernel(cl_program prog, const char *name,
+               cl_int *errcode_ret) try {
+   if (!prog)
+      throw error(CL_INVALID_PROGRAM);
+
+   if (!name)
+      throw error(CL_INVALID_VALUE);
+
+   if (prog->binaries().empty())
+      throw error(CL_INVALID_PROGRAM_EXECUTABLE);
+
+   auto sym = prog->binaries().begin()->second.sym(name);
+
+   ret_error(errcode_ret, CL_SUCCESS);
+   return new kernel(*prog, name, { sym.args.begin(), sym.args.end() });
+
+} catch (module::noent_error &e) {
+   ret_error(errcode_ret, CL_INVALID_KERNEL_NAME);
+   return NULL;
+
+} catch(error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_int
+clCreateKernelsInProgram(cl_program prog, cl_uint count,
+                         cl_kernel *kerns, cl_uint *count_ret) {
+   if (!prog)
+      throw error(CL_INVALID_PROGRAM);
+
+   if (prog->binaries().empty())
+      throw error(CL_INVALID_PROGRAM_EXECUTABLE);
+
+   auto &syms = prog->binaries().begin()->second.syms;
+
+   if (kerns && count < syms.size())
+      throw error(CL_INVALID_VALUE);
+
+   if (kerns)
+      std::transform(syms.begin(), syms.end(), kerns,
+                     [=](const module::symbol &sym) {
+                        return new kernel(*prog, compat::string(sym.name),
+                                          { sym.args.begin(), sym.args.end() });
+                     });
+
+   if (count_ret)
+      *count_ret = syms.size();
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clRetainKernel(cl_kernel kern) {
+   if (!kern)
+      return CL_INVALID_KERNEL;
+
+   kern->retain();
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clReleaseKernel(cl_kernel kern) {
+   if (!kern)
+      return CL_INVALID_KERNEL;
+
+   if (kern->release())
+      delete kern;
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size,
+               const void *value) try {
+   if (!kern)
+      throw error(CL_INVALID_KERNEL);
+
+   if (idx >= kern->args.size())
+      throw error(CL_INVALID_ARG_INDEX);
+
+   kern->args[idx]->set(size, value);
+
+   return CL_SUCCESS;
+
+} catch(error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clGetKernelInfo(cl_kernel kern, cl_kernel_info param,
+                size_t size, void *buf, size_t *size_ret) {
+   if (!kern)
+      return CL_INVALID_KERNEL;
+
+   switch (param) {
+   case CL_KERNEL_FUNCTION_NAME:
+      return string_property(buf, size, size_ret, kern->name());
+
+   case CL_KERNEL_NUM_ARGS:
+      return scalar_property<cl_uint>(buf, size, size_ret,
+                                      kern->args.size());
+
+   case CL_KERNEL_REFERENCE_COUNT:
+      return scalar_property<cl_uint>(buf, size, size_ret,
+                                      kern->ref_count());
+
+   case CL_KERNEL_CONTEXT:
+      return scalar_property<cl_context>(buf, size, size_ret,
+                                         &kern->prog.ctx);
+
+   case CL_KERNEL_PROGRAM:
+      return scalar_property<cl_program>(buf, size, size_ret,
+                                         &kern->prog);
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
+
+PUBLIC cl_int
+clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev,
+                         cl_kernel_work_group_info param,
+                         size_t size, void *buf, size_t *size_ret) {
+   if (!kern)
+      return CL_INVALID_KERNEL;
+
+   if ((!dev && kern->prog.binaries().size() != 1) ||
+       (dev && !kern->prog.binaries().count(dev)))
+      return CL_INVALID_DEVICE;
+
+   switch (param) {
+   case CL_KERNEL_WORK_GROUP_SIZE:
+      return scalar_property<size_t>(buf, size, size_ret,
+                                     kern->max_block_size());
+
+   case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
+      return vector_property<size_t>(buf, size, size_ret,
+                                     kern->block_size());
+
+   case CL_KERNEL_LOCAL_MEM_SIZE:
+      return scalar_property<cl_ulong>(buf, size, size_ret,
+                                       kern->mem_local());
+
+   case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
+      return scalar_property<size_t>(buf, size, size_ret, 1);
+
+   case CL_KERNEL_PRIVATE_MEM_SIZE:
+      return scalar_property<cl_ulong>(buf, size, size_ret,
+                                       kern->mem_private());
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
+
+namespace {
+   ///
+   /// Common argument checking shared by kernel invocation commands.
+   ///
+   void
+   kernel_validate(cl_command_queue q, cl_kernel kern,
+                   cl_uint dims, const size_t *grid_offset,
+                   const size_t *grid_size, const size_t *block_size,
+                   cl_uint num_deps, const cl_event *deps,
+                   cl_event *ev) {
+      if (!q)
+         throw error(CL_INVALID_COMMAND_QUEUE);
+
+      if (!kern)
+         throw error(CL_INVALID_KERNEL);
+
+      if (&kern->prog.ctx != &q->ctx ||
+          any_of([&](const cl_event ev) {
+                return &ev->ctx != &q->ctx;
+             }, deps, deps + num_deps))
+         throw error(CL_INVALID_CONTEXT);
+
+      if (bool(num_deps) != bool(deps) ||
+          any_of(is_zero<cl_event>(), deps, deps + num_deps))
+         throw error(CL_INVALID_EVENT_WAIT_LIST);
+
+      if (any_of([](std::unique_ptr<kernel::argument> &arg) {
+               return !arg->set();
+            }, kern->args.begin(), kern->args.end()))
+         throw error(CL_INVALID_KERNEL_ARGS);
+
+      if (!kern->prog.binaries().count(&q->dev))
+         throw error(CL_INVALID_PROGRAM_EXECUTABLE);
+
+      if (dims < 1 || dims > q->dev.max_block_size().size())
+         throw error(CL_INVALID_WORK_DIMENSION);
+
+      if (!grid_size || any_of(is_zero<size_t>(), grid_size, grid_size + dims))
+         throw error(CL_INVALID_GLOBAL_WORK_SIZE);
+
+      if (block_size && any_of([](size_t b, size_t max) {
+               return b == 0 || b > max;
+            }, block_size, block_size + dims,
+            q->dev.max_block_size().begin()))
+         throw error(CL_INVALID_WORK_ITEM_SIZE);
+
+      if (block_size && any_of([](size_t b, size_t g) {
+               return g % b;
+            }, block_size, block_size + dims, grid_size))
+         throw error(CL_INVALID_WORK_GROUP_SIZE);
+   }
+
+   ///
+   /// Common event action shared by kernel invocation commands.
+   ///
+   std::function<void (event &)>
+   kernel_op(cl_command_queue q, cl_kernel kern,
+             const std::vector<size_t> &grid_offset,
+             const std::vector<size_t> &grid_size,
+             const std::vector<size_t> &block_size) {
+      const std::vector<size_t> reduced_grid_size = map(
+         std::divides<size_t>(), grid_size.begin(), grid_size.end(),
+         block_size.begin());
+
+      return [=](event &) {
+         kern->launch(*q, grid_offset, reduced_grid_size, block_size);
+      };
+   }
+
+   template<typename T, typename S>
+   std::vector<T>
+   opt_vector(const T *p, S n) {
+      if (p)
+         return { p, p + n };
+      else
+         return { n };
+   }
+}
+
+PUBLIC cl_int
+clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern,
+                       cl_uint dims, const size_t *pgrid_offset,
+                       const size_t *pgrid_size, const size_t *pblock_size,
+                       cl_uint num_deps, const cl_event *deps,
+                       cl_event *ev) try {
+   const std::vector<size_t> grid_offset = opt_vector(pgrid_offset, dims);
+   const std::vector<size_t> grid_size = opt_vector(pgrid_size, dims);
+   const std::vector<size_t> block_size = opt_vector(pblock_size, dims);
+
+   kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size,
+                   num_deps, deps, ev);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps },
+      kernel_op(q, kern, grid_offset, grid_size, block_size));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch(error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueTask(cl_command_queue q, cl_kernel kern,
+              cl_uint num_deps, const cl_event *deps,
+              cl_event *ev) try {
+   const std::vector<size_t> grid_offset = { 0 };
+   const std::vector<size_t> grid_size = { 1 };
+   const std::vector<size_t> block_size = { 1 };
+
+   kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(),
+                   block_size.data(), num_deps, deps, ev);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_TASK, { deps, deps + num_deps },
+      kernel_op(q, kern, grid_offset, grid_size, block_size));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch(error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *),
+                      void *args, size_t args_size,
+                      cl_uint obj_count, const cl_mem *obj_list,
+                      const void **obj_args, cl_uint num_deps,
+                      const cl_event *deps, cl_event *ev) {
+   return CL_INVALID_OPERATION;
+}
diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp

new file mode 100644 (file)

index 0000000..1b1ae73
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/memory.cpp
@@ -0,0 +1,305 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "api/util.hpp"
+#include "core/memory.hpp"
+#include "core/format.hpp"
+
+using namespace clover;
+
+PUBLIC cl_mem
+clCreateBuffer(cl_context ctx, cl_mem_flags flags, size_t size,
+               void *host_ptr, cl_int *errcode_ret) try {
+   if (!ctx)
+      throw error(CL_INVALID_CONTEXT);
+
+   if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
+                                       CL_MEM_COPY_HOST_PTR)))
+      throw error(CL_INVALID_HOST_PTR);
+
+   if (!size)
+      throw error(CL_INVALID_BUFFER_SIZE);
+
+   if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
+                 CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR |
+                 CL_MEM_COPY_HOST_PTR))
+      throw error(CL_INVALID_VALUE);
+
+   ret_error(errcode_ret, CL_SUCCESS);
+   return new root_buffer(*ctx, flags, size, host_ptr);
+
+} catch (error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_mem
+clCreateSubBuffer(cl_mem obj, cl_mem_flags flags, cl_buffer_create_type op,
+                  const void *op_info, cl_int *errcode_ret) try {
+   root_buffer *parent = dynamic_cast<root_buffer *>(obj);
+
+   if (!parent)
+      throw error(CL_INVALID_MEM_OBJECT);
+
+   if ((flags & (CL_MEM_USE_HOST_PTR |
+                 CL_MEM_ALLOC_HOST_PTR |
+                 CL_MEM_COPY_HOST_PTR)) ||
+       (~flags & parent->flags() & (CL_MEM_READ_ONLY |
+                                    CL_MEM_WRITE_ONLY)))
+      throw error(CL_INVALID_VALUE);
+
+   if (op == CL_BUFFER_CREATE_TYPE_REGION) {
+      const cl_buffer_region *reg = (const cl_buffer_region *)op_info;
+
+      if (!reg ||
+          reg->origin > parent->size() ||
+          reg->origin + reg->size > parent->size())
+         throw error(CL_INVALID_VALUE);
+
+      if (!reg->size)
+         throw error(CL_INVALID_BUFFER_SIZE);
+
+      ret_error(errcode_ret, CL_SUCCESS);
+      return new sub_buffer(*parent, flags, reg->origin, reg->size);
+
+   } else {
+      throw error(CL_INVALID_VALUE);
+   }
+
+} catch (error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_mem
+clCreateImage2D(cl_context ctx, cl_mem_flags flags,
+                const cl_image_format *format,
+                size_t width, size_t height, size_t row_pitch,
+                void *host_ptr, cl_int *errcode_ret) try {
+   if (!ctx)
+      throw error(CL_INVALID_CONTEXT);
+
+   if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
+                 CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR |
+                 CL_MEM_COPY_HOST_PTR))
+      throw error(CL_INVALID_VALUE);
+
+   if (!format)
+      throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+
+   if (width < 1 || height < 1)
+      throw error(CL_INVALID_IMAGE_SIZE);
+
+   if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
+                                       CL_MEM_COPY_HOST_PTR)))
+      throw error(CL_INVALID_HOST_PTR);
+
+   if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE2D).count(*format))
+      throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
+
+   ret_error(errcode_ret, CL_SUCCESS);
+   return new image2d(*ctx, flags, format, width, height,
+                      row_pitch, host_ptr);
+
+} catch (error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_mem
+clCreateImage3D(cl_context ctx, cl_mem_flags flags,
+                const cl_image_format *format,
+                size_t width, size_t height, size_t depth,
+                size_t row_pitch, size_t slice_pitch,
+                void *host_ptr, cl_int *errcode_ret) try {
+   if (!ctx)
+      throw error(CL_INVALID_CONTEXT);
+
+   if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
+                 CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR |
+                 CL_MEM_COPY_HOST_PTR))
+      throw error(CL_INVALID_VALUE);
+
+   if (!format)
+      throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR);
+
+   if (width < 1 || height < 1 || depth < 2)
+      throw error(CL_INVALID_IMAGE_SIZE);
+
+   if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
+                                       CL_MEM_COPY_HOST_PTR)))
+      throw error(CL_INVALID_HOST_PTR);
+
+   if (!supported_formats(ctx, CL_MEM_OBJECT_IMAGE3D).count(*format))
+      throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
+
+   ret_error(errcode_ret, CL_SUCCESS);
+   return new image3d(*ctx, flags, format, width, height, depth,
+                      row_pitch, slice_pitch, host_ptr);
+
+} catch (error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_int
+clGetSupportedImageFormats(cl_context ctx, cl_mem_flags flags,
+                           cl_mem_object_type type, cl_uint count,
+                           cl_image_format *buf, cl_uint *count_ret) try {
+   if (!ctx)
+      throw error(CL_INVALID_CONTEXT);
+
+   if (flags & ~(CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
+                 CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR |
+                 CL_MEM_COPY_HOST_PTR))
+      throw error(CL_INVALID_VALUE);
+
+   if (!count && buf)
+      throw error(CL_INVALID_VALUE);
+
+   auto formats = supported_formats(ctx, type);
+
+   if (buf)
+      std::copy_n(formats.begin(), std::min((cl_uint)formats.size(), count),
+                  buf);
+   if (count_ret)
+      *count_ret = formats.size();
+
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clGetMemObjectInfo(cl_mem obj, cl_mem_info param,
+                   size_t size, void *buf, size_t *size_ret) {
+   if (!obj)
+      return CL_INVALID_MEM_OBJECT;
+
+   switch (param) {
+   case CL_MEM_TYPE:
+      return scalar_property<cl_mem_object_type>(buf, size, size_ret,
+                                                 obj->type());
+
+   case CL_MEM_FLAGS:
+      return scalar_property<cl_mem_flags>(buf, size, size_ret, obj->flags());
+
+   case CL_MEM_SIZE:
+      return scalar_property<size_t>(buf, size, size_ret, obj->size());
+
+   case CL_MEM_HOST_PTR:
+      return scalar_property<void *>(buf, size, size_ret, obj->host_ptr());
+
+   case CL_MEM_MAP_COUNT:
+      return scalar_property<cl_uint>(buf, size, size_ret, 0);
+
+   case CL_MEM_REFERENCE_COUNT:
+      return scalar_property<cl_uint>(buf, size, size_ret, obj->ref_count());
+
+   case CL_MEM_CONTEXT:
+      return scalar_property<cl_context>(buf, size, size_ret, &obj->ctx);
+
+   case CL_MEM_ASSOCIATED_MEMOBJECT: {
+      sub_buffer *sub = dynamic_cast<sub_buffer *>(obj);
+      return scalar_property<cl_mem>(buf, size, size_ret,
+                                     (sub ? &sub->parent : NULL));
+   }
+   case CL_MEM_OFFSET: {
+      sub_buffer *sub = dynamic_cast<sub_buffer *>(obj);
+      return scalar_property<size_t>(buf, size, size_ret,
+                                     (sub ? sub->offset() : 0));
+   }
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
+
+PUBLIC cl_int
+clGetImageInfo(cl_mem obj, cl_image_info param,
+               size_t size, void *buf, size_t *size_ret) {
+   image *img = dynamic_cast<image *>(obj);
+   if (!img)
+      return CL_INVALID_MEM_OBJECT;
+
+   switch (param) {
+   case CL_IMAGE_FORMAT:
+      return scalar_property<cl_image_format>(buf, size, size_ret,
+                                              img->format());
+
+   case CL_IMAGE_ELEMENT_SIZE:
+      return scalar_property<size_t>(buf, size, size_ret, 0);
+
+   case CL_IMAGE_ROW_PITCH:
+      return scalar_property<size_t>(buf, size, size_ret, img->row_pitch());
+
+   case CL_IMAGE_SLICE_PITCH:
+      return scalar_property<size_t>(buf, size, size_ret, img->slice_pitch());
+
+   case CL_IMAGE_WIDTH:
+      return scalar_property<size_t>(buf, size, size_ret, img->width());
+
+   case CL_IMAGE_HEIGHT:
+      return scalar_property<size_t>(buf, size, size_ret, img->height());
+
+   case CL_IMAGE_DEPTH:
+      return scalar_property<size_t>(buf, size, size_ret, img->depth());
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
+
+PUBLIC cl_int
+clRetainMemObject(cl_mem obj) {
+   if (!obj)
+      return CL_INVALID_MEM_OBJECT;
+
+   obj->retain();
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clReleaseMemObject(cl_mem obj) {
+   if (!obj)
+      return CL_INVALID_MEM_OBJECT;
+
+   if (obj->release())
+      delete obj;
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clSetMemObjectDestructorCallback(cl_mem obj,
+                                 void (CL_CALLBACK *pfn_notify)(cl_mem, void *),
+                                 void *user_data) {
+   if (!obj)
+      return CL_INVALID_MEM_OBJECT;
+
+   if (!pfn_notify)
+      return CL_INVALID_VALUE;
+
+   obj->destroy_notify([=]{ pfn_notify(obj, user_data); });
+
+   return CL_SUCCESS;
+}
diff --git a/src/gallium/state_trackers/clover/api/platform.cpp b/src/gallium/state_trackers/clover/api/platform.cpp

new file mode 100644 (file)

index 0000000..e5e80b8
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/platform.cpp
@@ -0,0 +1,68 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "api/util.hpp"
+
+using namespace clover;
+
+PUBLIC cl_int
+clGetPlatformIDs(cl_uint num_entries, cl_platform_id *platforms,
+                 cl_uint *num_platforms) {
+   if ((!num_entries && platforms) ||
+       (!num_platforms && !platforms))
+      return CL_INVALID_VALUE;
+
+   if (num_platforms)
+      *num_platforms = 1;
+   if (platforms)
+      *platforms = NULL;
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name,
+                  size_t size, void *buf, size_t *size_ret) {
+   if (platform != NULL)
+      return CL_INVALID_PLATFORM;
+
+   switch (param_name) {
+   case CL_PLATFORM_PROFILE:
+      return string_property(buf, size, size_ret, "FULL_PROFILE");
+
+   case CL_PLATFORM_VERSION:
+      return string_property(buf, size, size_ret,
+                             "OpenCL 1.1 MESA " MESA_VERSION);
+
+   case CL_PLATFORM_NAME:
+      return string_property(buf, size, size_ret, "Default");
+
+   case CL_PLATFORM_VENDOR:
+      return string_property(buf, size, size_ret, "Mesa");
+
+   case CL_PLATFORM_EXTENSIONS:
+      return string_property(buf, size, size_ret, "");
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp

new file mode 100644 (file)

index 0000000..e874c51
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/program.cpp
@@ -0,0 +1,241 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "api/util.hpp"
+#include "core/program.hpp"
+
+using namespace clover;
+
+PUBLIC cl_program
+clCreateProgramWithSource(cl_context ctx, cl_uint count,
+                          const char **strings, const size_t *lengths,
+                          cl_int *errcode_ret) try {
+   std::string source;
+
+   if (!ctx)
+      throw error(CL_INVALID_CONTEXT);
+
+   if (!count || !strings ||
+       any_of(is_zero<const char *>(), strings, strings + count))
+      throw error(CL_INVALID_VALUE);
+
+   // Concatenate all the provided fragments together
+   for (unsigned i = 0; i < count; ++i)
+         source += (lengths && lengths[i] ?
+                    std::string(strings[i], strings[i] + lengths[i]) :
+                    std::string(strings[i]));
+
+   // ...and create a program object for them.
+   ret_error(errcode_ret, CL_SUCCESS);
+   return new program(*ctx, source);
+
+} catch (error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_program
+clCreateProgramWithBinary(cl_context ctx, cl_uint count,
+                          const cl_device_id *devs, const size_t *lengths,
+                          const unsigned char **binaries, cl_int *status_ret,
+                          cl_int *errcode_ret) try {
+   if (!ctx)
+      throw error(CL_INVALID_CONTEXT);
+
+   if (!count || !devs || !lengths || !binaries)
+      throw error(CL_INVALID_VALUE);
+
+   if (any_of([&](const cl_device_id dev) {
+            return !ctx->has_device(dev);
+         }, devs, devs + count))
+      throw error(CL_INVALID_DEVICE);
+
+   // Deserialize the provided binaries,
+   auto modules = map(
+      [](const unsigned char *p, size_t l) -> std::pair<cl_int, module> {
+         if (!p || !l)
+            return { CL_INVALID_VALUE, {} };
+
+         try {
+            compat::istream::buffer_t bin(p, l);
+            compat::istream s(bin);
+
+            return { CL_SUCCESS, module::deserialize(s) };
+
+         } catch (compat::istream::error &e) {
+            return { CL_INVALID_BINARY, {} };
+         }
+      },
+      binaries, binaries + count, lengths);
+
+   // update the status array,
+   if (status_ret)
+      std::transform(modules.begin(), modules.end(), status_ret,
+                     keys<cl_int, module>);
+
+   if (any_of(key_equals<cl_int, module>(CL_INVALID_VALUE),
+              modules.begin(), modules.end()))
+      throw error(CL_INVALID_VALUE);
+
+   if (any_of(key_equals<cl_int, module>(CL_INVALID_BINARY),
+              modules.begin(), modules.end()))
+      throw error(CL_INVALID_BINARY);
+
+   // initialize a program object with them.
+   ret_error(errcode_ret, CL_SUCCESS);
+   return new program(*ctx, { devs, devs + count },
+                      map(values<cl_int, module>,
+                          modules.begin(), modules.end()));
+
+} catch (error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_int
+clRetainProgram(cl_program prog) {
+   if (!prog)
+      return CL_INVALID_PROGRAM;
+
+   prog->retain();
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clReleaseProgram(cl_program prog) {
+   if (!prog)
+      return CL_INVALID_PROGRAM;
+
+   if (prog->release())
+      delete prog;
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clBuildProgram(cl_program prog, cl_uint count, const cl_device_id *devs,
+               const char *opts, void (*pfn_notify)(cl_program, void *),
+               void *user_data) try {
+   if (!prog)
+      throw error(CL_INVALID_PROGRAM);
+
+   if (bool(count) != bool(devs) ||
+       (!pfn_notify && user_data))
+      throw error(CL_INVALID_VALUE);
+
+   if (any_of([&](const cl_device_id dev) {
+            return !prog->ctx.has_device(dev);
+         }, devs, devs + count))
+      throw error(CL_INVALID_DEVICE);
+
+   prog->build({ devs, devs + count });
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clUnloadCompiler() {
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clGetProgramInfo(cl_program prog, cl_program_info param,
+                 size_t size, void *buf, size_t *size_ret) {
+   if (!prog)
+      return CL_INVALID_PROGRAM;
+
+   switch (param) {
+   case CL_PROGRAM_REFERENCE_COUNT:
+      return scalar_property<cl_uint>(buf, size, size_ret,
+                                      prog->ref_count());
+
+   case CL_PROGRAM_CONTEXT:
+      return scalar_property<cl_context>(buf, size, size_ret,
+                                         &prog->ctx);
+
+   case CL_PROGRAM_NUM_DEVICES:
+      return scalar_property<cl_uint>(buf, size, size_ret,
+                                      prog->binaries().size());
+
+   case CL_PROGRAM_DEVICES:
+      return vector_property<cl_device_id>(
+         buf, size, size_ret,
+         map(keys<device *, module>,
+             prog->binaries().begin(), prog->binaries().end()));
+
+   case CL_PROGRAM_SOURCE:
+      return string_property(buf, size, size_ret, prog->source());
+
+   case CL_PROGRAM_BINARY_SIZES:
+      return vector_property<size_t>(
+         buf, size, size_ret,
+         map([](const std::pair<device *, module> &ent) {
+               compat::ostream::buffer_t bin;
+               compat::ostream s(bin);
+               ent.second.serialize(s);
+               return bin.size();
+            },
+            prog->binaries().begin(), prog->binaries().end()));
+
+   case CL_PROGRAM_BINARIES:
+      return matrix_property<unsigned char>(
+         buf, size, size_ret,
+         map([](const std::pair<device *, module> &ent) {
+               compat::ostream::buffer_t bin;
+               compat::ostream s(bin);
+               ent.second.serialize(s);
+               return bin;
+            },
+            prog->binaries().begin(), prog->binaries().end()));
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
+
+PUBLIC cl_int
+clGetProgramBuildInfo(cl_program prog, cl_device_id dev,
+                      cl_program_build_info param,
+                      size_t size, void *buf, size_t *size_ret) {
+   if (!prog)
+      return CL_INVALID_PROGRAM;
+
+   if (!prog->ctx.has_device(dev))
+      return CL_INVALID_DEVICE;
+
+   switch (param) {
+   case CL_PROGRAM_BUILD_STATUS:
+      return scalar_property<cl_build_status>(buf, size, size_ret,
+                                              prog->build_status(dev));
+
+   case CL_PROGRAM_BUILD_OPTIONS:
+      return string_property(buf, size, size_ret, prog->build_opts(dev));
+
+   case CL_PROGRAM_BUILD_LOG:
+      return string_property(buf, size, size_ret, prog->build_log(dev));
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
diff --git a/src/gallium/state_trackers/clover/api/queue.cpp b/src/gallium/state_trackers/clover/api/queue.cpp

new file mode 100644 (file)

index 0000000..a7905bc
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/queue.cpp
@@ -0,0 +1,102 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "api/util.hpp"
+#include "core/queue.hpp"
+
+using namespace clover;
+
+PUBLIC cl_command_queue
+clCreateCommandQueue(cl_context ctx, cl_device_id dev,
+                     cl_command_queue_properties props,
+                     cl_int *errcode_ret) try {
+   if (!ctx)
+      throw error(CL_INVALID_CONTEXT);
+
+   if (!ctx->has_device(dev))
+      throw error(CL_INVALID_DEVICE);
+
+   if (props & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
+                 CL_QUEUE_PROFILING_ENABLE))
+      throw error(CL_INVALID_VALUE);
+
+   ret_error(errcode_ret, CL_SUCCESS);
+   return new command_queue(*ctx, *dev, props);
+
+} catch (error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_int
+clRetainCommandQueue(cl_command_queue q) {
+   if (!q)
+      return CL_INVALID_COMMAND_QUEUE;
+
+   q->retain();
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clReleaseCommandQueue(cl_command_queue q) {
+   if (!q)
+      return CL_INVALID_COMMAND_QUEUE;
+
+   if (q->release())
+      delete q;
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clGetCommandQueueInfo(cl_command_queue q, cl_command_queue_info param,
+                      size_t size, void *buf, size_t *size_ret) {
+   if (!q)
+      return CL_INVALID_COMMAND_QUEUE;
+
+   switch (param) {
+   case CL_QUEUE_CONTEXT:
+      return scalar_property<cl_context>(buf, size, size_ret, &q->ctx);
+
+   case CL_QUEUE_DEVICE:
+      return scalar_property<cl_device_id>(buf, size, size_ret, &q->dev);
+
+   case CL_QUEUE_REFERENCE_COUNT:
+      return scalar_property<cl_uint>(buf, size, size_ret, q->ref_count());
+
+   case CL_QUEUE_PROPERTIES:
+      return scalar_property<cl_command_queue_properties>(buf, size, size_ret,
+                                                          q->props());
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
+
+PUBLIC cl_int
+clFlush(cl_command_queue q) {
+   if (!q)
+      return CL_INVALID_COMMAND_QUEUE;
+
+   q->flush();
+   return CL_SUCCESS;
+}
diff --git a/src/gallium/state_trackers/clover/api/sampler.cpp b/src/gallium/state_trackers/clover/api/sampler.cpp

new file mode 100644 (file)

index 0000000..32ce22e
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/sampler.cpp
@@ -0,0 +1,90 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "api/util.hpp"
+#include "core/sampler.hpp"
+
+using namespace clover;
+
+PUBLIC cl_sampler
+clCreateSampler(cl_context ctx, cl_bool norm_mode,
+                cl_addressing_mode addr_mode, cl_filter_mode filter_mode,
+                cl_int *errcode_ret) try {
+   if (!ctx)
+      throw error(CL_INVALID_CONTEXT);
+
+   ret_error(errcode_ret, CL_SUCCESS);
+   return new sampler(*ctx, norm_mode, addr_mode, filter_mode);
+
+} catch (error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_int
+clRetainSampler(cl_sampler s) {
+   if (!s)
+      throw error(CL_INVALID_SAMPLER);
+
+   s->retain();
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clReleaseSampler(cl_sampler s) {
+   if (!s)
+      throw error(CL_INVALID_SAMPLER);
+
+   if (s->release())
+      delete s;
+
+   return CL_SUCCESS;
+}
+
+PUBLIC cl_int
+clGetSamplerInfo(cl_sampler s, cl_sampler_info param,
+                 size_t size, void *buf, size_t *size_ret) {
+   if (!s)
+      throw error(CL_INVALID_SAMPLER);
+
+   switch (param) {
+   case CL_SAMPLER_REFERENCE_COUNT:
+      return scalar_property<cl_uint>(buf, size, size_ret, s->ref_count());
+
+   case CL_SAMPLER_CONTEXT:
+      return scalar_property<cl_context>(buf, size, size_ret, &s->ctx);
+
+   case CL_SAMPLER_NORMALIZED_COORDS:
+      return scalar_property<cl_bool>(buf, size, size_ret, s->norm_mode());
+
+   case CL_SAMPLER_ADDRESSING_MODE:
+      return scalar_property<cl_addressing_mode>(buf, size, size_ret,
+                                                 s->addr_mode());
+
+   case CL_SAMPLER_FILTER_MODE:
+      return scalar_property<cl_filter_mode>(buf, size, size_ret,
+                                             s->filter_mode());
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+}
diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp b/src/gallium/state_trackers/clover/api/transfer.cpp

new file mode 100644 (file)

index 0000000..c67b75e
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/transfer.cpp
@@ -0,0 +1,506 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include <cstring>
+
+#include "api/util.hpp"
+#include "core/event.hpp"
+#include "core/resource.hpp"
+
+using namespace clover;
+
+namespace {
+   typedef resource::point point;
+
+   ///
+   /// Common argument checking shared by memory transfer commands.
+   ///
+   void
+   validate_base(cl_command_queue q, cl_uint num_deps, const cl_event *deps) {
+      if (!q)
+         throw error(CL_INVALID_COMMAND_QUEUE);
+
+      if (bool(num_deps) != bool(deps) ||
+          any_of(is_zero<cl_event>(), deps, deps + num_deps))
+         throw error(CL_INVALID_EVENT_WAIT_LIST);
+
+      if (any_of([&](const cl_event ev) {
+               return &ev->ctx != &q->ctx;
+            }, deps, deps + num_deps))
+         throw error(CL_INVALID_CONTEXT);
+   }
+
+   ///
+   /// Memory object-specific argument checking shared by most memory
+   /// transfer commands.
+   ///
+   void
+   validate_obj(cl_command_queue q, cl_mem obj) {
+      if (!obj)
+         throw error(CL_INVALID_MEM_OBJECT);
+
+      if (&obj->ctx != &q->ctx)
+         throw error(CL_INVALID_CONTEXT);
+   }
+
+   ///
+   /// Class that encapsulates the task of mapping an object of type
+   /// \a T.  The return value of get() should be implicitly
+   /// convertible to \a void *.
+   ///
+   template<typename T> struct __map;
+
+   template<> struct __map<void *> {
+      static void *
+      get(cl_command_queue q, void *obj, cl_map_flags flags,
+          size_t offset, size_t size) {
+         return (char *)obj + offset;
+      }
+   };
+
+   template<> struct __map<const void *> {
+      static const void *
+      get(cl_command_queue q, const void *obj, cl_map_flags flags,
+          size_t offset, size_t size) {
+         return (const char *)obj + offset;
+      }
+   };
+
+   template<> struct __map<memory_obj *> {
+      static mapping
+      get(cl_command_queue q, memory_obj *obj, cl_map_flags flags,
+          size_t offset, size_t size) {
+         return { *q, obj->resource(q), flags, true, { offset }, { size }};
+      }
+   };
+
+   ///
+   /// Software copy from \a src_obj to \a dst_obj.  They can be
+   /// either pointers or memory objects.
+   ///
+   template<typename T, typename S>
+   std::function<void (event &)>
+   soft_copy_op(cl_command_queue q,
+                T dst_obj, const point &dst_orig, const point &dst_pitch,
+                S src_obj, const point &src_orig, const point &src_pitch,
+                const point &region) {
+      return [=](event &) {
+         auto dst = __map<T>::get(q, dst_obj, CL_MAP_WRITE,
+                                  dst_pitch(dst_orig), dst_pitch(region));
+         auto src = __map<S>::get(q, src_obj, CL_MAP_READ,
+                                  src_pitch(src_orig), src_pitch(region));
+         point p;
+
+         for (p[2] = 0; p[2] < region[2]; ++p[2]) {
+            for (p[1] = 0; p[1] < region[1]; ++p[1]) {
+               std::memcpy(static_cast<char *>(dst) + dst_pitch(p),
+                           static_cast<const char *>(src) + src_pitch(p),
+                           src_pitch[0] * region[0]);
+            }
+         }
+      };
+   }
+
+   ///
+   /// Hardware copy from \a src_obj to \a dst_obj.
+   ///
+   template<typename T, typename S>
+   std::function<void (event &)>
+   hard_copy_op(cl_command_queue q, T dst_obj, const point &dst_orig,
+                S src_obj, const point &src_orig, const point &region) {
+      return [=](event &) {
+         dst_obj->resource(q).copy(*q, dst_orig, region,
+                                   src_obj->resource(q), src_orig);
+      };
+   }
+}
+
+PUBLIC cl_int
+clEnqueueReadBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
+                    size_t offset, size_t size, void *ptr,
+                    cl_uint num_deps, const cl_event *deps,
+                    cl_event *ev) try {
+   validate_base(q, num_deps, deps);
+   validate_obj(q, obj);
+
+   if (!ptr || offset > obj->size() || offset + size > obj->size())
+      throw error(CL_INVALID_VALUE);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_READ_BUFFER, { deps, deps + num_deps },
+      soft_copy_op(q,
+                   ptr, { 0 }, { 1 },
+                   obj, { offset }, { 1 },
+                   { size, 1, 1 }));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueWriteBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
+                     size_t offset, size_t size, const void *ptr,
+                     cl_uint num_deps, const cl_event *deps,
+                     cl_event *ev) try {
+   validate_base(q, num_deps, deps);
+   validate_obj(q, obj);
+
+   if (!ptr || offset > obj->size() || offset + size > obj->size())
+      throw error(CL_INVALID_VALUE);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_WRITE_BUFFER, { deps, deps + num_deps },
+      soft_copy_op(q,
+                   obj, { offset }, { 1 },
+                   ptr, { 0 }, { 1 },
+                   { size, 1, 1 }));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueReadBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking,
+                        const size_t *obj_origin, const size_t *host_origin,
+                        const size_t *region,
+                        size_t obj_row_pitch, size_t obj_slice_pitch,
+                        size_t host_row_pitch, size_t host_slice_pitch,
+                        void *ptr,
+                        cl_uint num_deps, const cl_event *deps,
+                        cl_event *ev) try {
+   validate_base(q, num_deps, deps);
+   validate_obj(q, obj);
+
+   if (!ptr)
+      throw error(CL_INVALID_VALUE);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_READ_BUFFER_RECT, { deps, deps + num_deps },
+      soft_copy_op(q,
+                   ptr, host_origin,
+                   { 1, host_row_pitch, host_slice_pitch },
+                   obj, obj_origin,
+                   { 1, obj_row_pitch, obj_slice_pitch },
+                   region));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueWriteBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking,
+                         const size_t *obj_origin, const size_t *host_origin,
+                         const size_t *region,
+                         size_t obj_row_pitch, size_t obj_slice_pitch,
+                         size_t host_row_pitch, size_t host_slice_pitch,
+                         const void *ptr,
+                         cl_uint num_deps, const cl_event *deps,
+                         cl_event *ev) try {
+   validate_base(q, num_deps, deps);
+   validate_obj(q, obj);
+
+   if (!ptr)
+      throw error(CL_INVALID_VALUE);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_WRITE_BUFFER_RECT, { deps, deps + num_deps },
+      soft_copy_op(q,
+                   obj, obj_origin,
+                   { 1, obj_row_pitch, obj_slice_pitch },
+                   ptr, host_origin,
+                   { 1, host_row_pitch, host_slice_pitch },
+                   region));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueCopyBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
+                    size_t src_offset, size_t dst_offset, size_t size,
+                    cl_uint num_deps, const cl_event *deps,
+                    cl_event *ev) try {
+   validate_base(q, num_deps, deps);
+   validate_obj(q, src_obj);
+   validate_obj(q, dst_obj);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_COPY_BUFFER, { deps, deps + num_deps },
+      hard_copy_op(q, dst_obj, { dst_offset },
+                   src_obj, { src_offset },
+                   { size, 1, 1 }));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueCopyBufferRect(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
+                        const size_t *src_origin, const size_t *dst_origin,
+                        const size_t *region,
+                        size_t src_row_pitch, size_t src_slice_pitch,
+                        size_t dst_row_pitch, size_t dst_slice_pitch,
+                        cl_uint num_deps, const cl_event *deps,
+                        cl_event *ev) try {
+   validate_base(q, num_deps, deps);
+   validate_obj(q, src_obj);
+   validate_obj(q, dst_obj);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_COPY_BUFFER_RECT, { deps, deps + num_deps },
+      soft_copy_op(q,
+                   dst_obj, dst_origin,
+                   { 1, dst_row_pitch, dst_slice_pitch },
+                   src_obj, src_origin,
+                   { 1, src_row_pitch, src_slice_pitch },
+                   region));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueReadImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
+                   const size_t *origin, const size_t *region,
+                   size_t row_pitch, size_t slice_pitch, void *ptr,
+                   cl_uint num_deps, const cl_event *deps,
+                   cl_event *ev) try {
+   image *img = dynamic_cast<image *>(obj);
+
+   validate_base(q, num_deps, deps);
+   validate_obj(q, img);
+
+   if (!ptr)
+      throw error(CL_INVALID_VALUE);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_READ_IMAGE, { deps, deps + num_deps },
+      soft_copy_op(q,
+                   ptr, {},
+                   { 1, row_pitch, slice_pitch },
+                   obj, origin,
+                   { 1, img->row_pitch(), img->slice_pitch() },
+                   region));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueWriteImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
+                    const size_t *origin, const size_t *region,
+                    size_t row_pitch, size_t slice_pitch, const void *ptr,
+                    cl_uint num_deps, const cl_event *deps,
+                    cl_event *ev) try {
+   image *img = dynamic_cast<image *>(obj);
+
+   validate_base(q, num_deps, deps);
+   validate_obj(q, img);
+
+   if (!ptr)
+      throw error(CL_INVALID_VALUE);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_WRITE_IMAGE, { deps, deps + num_deps },
+      soft_copy_op(q,
+                   obj, origin,
+                   { 1, img->row_pitch(), img->slice_pitch() },
+                   ptr, {},
+                   { 1, row_pitch, slice_pitch },
+                   region));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueCopyImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
+                   const size_t *src_origin, const size_t *dst_origin,
+                   const size_t *region,
+                   cl_uint num_deps, const cl_event *deps,
+                   cl_event *ev) try {
+   image *src_img = dynamic_cast<image *>(src_obj);
+   image *dst_img = dynamic_cast<image *>(dst_obj);
+
+   validate_base(q, num_deps, deps);
+   validate_obj(q, src_img);
+   validate_obj(q, dst_img);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_COPY_IMAGE, { deps, deps + num_deps },
+      hard_copy_op(q, dst_obj, dst_origin, src_obj, src_origin, region));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueCopyImageToBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
+                           const size_t *src_origin, const size_t *region,
+                           size_t dst_offset,
+                           cl_uint num_deps, const cl_event *deps,
+                           cl_event *ev) try {
+   image *src_img = dynamic_cast<image *>(src_obj);
+
+   validate_base(q, num_deps, deps);
+   validate_obj(q, src_img);
+   validate_obj(q, dst_obj);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, { deps, deps + num_deps },
+      soft_copy_op(q,
+                   dst_obj, { dst_offset },
+                   { 0, 0, 0 },
+                   src_obj, src_origin,
+                   { 1, src_img->row_pitch(), src_img->slice_pitch() },
+                   region));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC cl_int
+clEnqueueCopyBufferToImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
+                           size_t src_offset,
+                           const size_t *dst_origin, const size_t *region,
+                           cl_uint num_deps, const cl_event *deps,
+                           cl_event *ev) try {
+   image *dst_img = dynamic_cast<image *>(src_obj);
+
+   validate_base(q, num_deps, deps);
+   validate_obj(q, src_obj);
+   validate_obj(q, dst_img);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, { deps, deps + num_deps },
+      soft_copy_op(q,
+                   dst_obj, dst_origin,
+                   { 1, dst_img->row_pitch(), dst_img->slice_pitch() },
+                   src_obj, { src_offset },
+                   { 0, 0, 0 },
+                   region));
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
+
+PUBLIC void *
+clEnqueueMapBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
+                   cl_map_flags flags, size_t offset, size_t size,
+                   cl_uint num_deps, const cl_event *deps,
+                   cl_event *ev, cl_int *errcode_ret) try {
+   validate_base(q, num_deps, deps);
+   validate_obj(q, obj);
+
+   if (offset > obj->size() || offset + size > obj->size())
+      throw error(CL_INVALID_VALUE);
+
+   void *map = obj->resource(q).add_map(
+      *q, flags, blocking, { offset }, { size });
+
+   ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_BUFFER,
+                                 { deps, deps + num_deps }));
+   ret_error(errcode_ret, CL_SUCCESS);
+   return map;
+
+} catch (error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC void *
+clEnqueueMapImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
+                  cl_map_flags flags,
+                  const size_t *origin, const size_t *region,
+                  size_t *row_pitch, size_t *slice_pitch,
+                  cl_uint num_deps, const cl_event *deps,
+                  cl_event *ev, cl_int *errcode_ret) try {
+   image *img = dynamic_cast<image *>(obj);
+
+   validate_base(q, num_deps, deps);
+   validate_obj(q, img);
+
+   void *map = obj->resource(q).add_map(
+      *q, flags, blocking, origin, region);
+
+   ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_IMAGE,
+                                 { deps, deps + num_deps }));
+   ret_error(errcode_ret, CL_SUCCESS);
+   return map;
+
+} catch (error &e) {
+   ret_error(errcode_ret, e);
+   return NULL;
+}
+
+PUBLIC cl_int
+clEnqueueUnmapMemObject(cl_command_queue q, cl_mem obj, void *ptr,
+                        cl_uint num_deps, const cl_event *deps,
+                        cl_event *ev) try {
+   validate_base(q, num_deps, deps);
+   validate_obj(q, obj);
+
+   hard_event *hev = new hard_event(
+      *q, CL_COMMAND_UNMAP_MEM_OBJECT, { deps, deps + num_deps },
+      [=](event &) {
+         obj->resource(q).del_map(ptr);
+      });
+
+   ret_object(ev, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
+}
diff --git a/src/gallium/state_trackers/clover/api/util.hpp b/src/gallium/state_trackers/clover/api/util.hpp

new file mode 100644 (file)

index 0000000..2f9ec1f
--- /dev/null
+++ b/src/gallium/state_trackers/clover/api/util.hpp
@@ -0,0 +1,166 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CL_UTIL_HPP__
+#define __CL_UTIL_HPP__
+
+#include <cstdint>
+#include <cstring>
+#include <algorithm>
+#include <map>
+
+#include "core/base.hpp"
+#include "pipe/p_compiler.h"
+
+namespace clover {
+   ///
+   /// Return a matrix (a container of containers) in \a buf with
+   /// argument and bounds checking.  Intended to be used by
+   /// implementations of \a clGetXXXInfo().
+   ///
+   template<typename T, typename V>
+   cl_int
+   matrix_property(void *buf, size_t size, size_t *size_ret, const V& v) {
+      if (buf && size < sizeof(T *) * v.size())
+         return CL_INVALID_VALUE;
+
+      if (size_ret)
+         *size_ret = sizeof(T *) * v.size();
+
+      if (buf)
+         for_each([](typename V::value_type src, T *dst) {
+               if (dst)
+                  std::copy(src.begin(), src.end(), dst);
+            },
+            v.begin(), v.end(), (T **)buf);
+
+      return CL_SUCCESS;
+   }
+
+   ///
+   /// Return a vector in \a buf with argument and bounds checking.
+   /// Intended to be used by implementations of \a clGetXXXInfo().
+   ///
+   template<typename T, typename V>
+   cl_int
+   vector_property(void *buf, size_t size, size_t *size_ret, const V& v) {
+      if (buf && size < sizeof(T) * v.size())
+         return CL_INVALID_VALUE;
+
+      if (size_ret)
+         *size_ret = sizeof(T) * v.size();
+      if (buf)
+         std::copy(v.begin(), v.end(), (T *)buf);
+
+      return CL_SUCCESS;
+   }
+
+   ///
+   /// Return a scalar in \a buf with argument and bounds checking.
+   /// Intended to be used by implementations of \a clGetXXXInfo().
+   ///
+   template<typename T>
+   cl_int
+   scalar_property(void *buf, size_t size, size_t *size_ret, T v) {
+      return vector_property<T>(buf, size, size_ret, std::vector<T>(1, v));
+   }
+
+   ///
+   /// Return a string in \a buf with argument and bounds checking.
+   /// Intended to be used by implementations of \a clGetXXXInfo().
+   ///
+   inline cl_int
+   string_property(void *buf, size_t size, size_t *size_ret,
+                   const std::string &v) {
+      if (buf && size < v.size() + 1)
+         return CL_INVALID_VALUE;
+
+      if (size_ret)
+         *size_ret = v.size() + 1;
+      if (buf)
+         std::strcpy((char *)buf, v.c_str());
+
+      return CL_SUCCESS;
+   }
+
+   ///
+   /// Convert a NULL-terminated property list into an std::map.
+   ///
+   template<typename T>
+   std::map<T, T>
+   property_map(const T *props) {
+      std::map<T, T> m;
+
+      while (props && *props) {
+         T key = *props++;
+         T value = *props++;
+
+         if (m.count(key))
+            throw clover::error(CL_INVALID_PROPERTY);
+
+         m.insert({ key, value });
+      }
+
+      return m;
+   }
+
+   ///
+   /// Convert an std::map into a NULL-terminated property list.
+   ///
+   template<typename T>
+   std::vector<T>
+   property_vector(const std::map<T, T> &m) {
+      std::vector<T> v;
+
+      for (auto &p : m) {
+         v.push_back(p.first);
+         v.push_back(p.second);
+      }
+
+      v.push_back(0);
+      return v;
+   }
+
+   ///
+   /// Return an error code in \a p if non-zero.
+   ///
+   inline void
+   ret_error(cl_int *p, const clover::error &e) {
+      if (p)
+         *p = e.get();
+   }
+
+   ///
+   /// Return a reference-counted object in \a p if non-zero.
+   /// Otherwise release object ownership.
+   ///
+   template<typename T, typename S>
+   void
+   ret_object(T p, S v) {
+      if (p)
+         *p = v;
+      else
+         v->release();
+   }
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/base.hpp b/src/gallium/state_trackers/clover/core/base.hpp

new file mode 100644 (file)

index 0000000..19053f3
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/base.hpp
@@ -0,0 +1,285 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_BASE_HPP__
+#define __CORE_BASE_HPP__
+
+#include <stdexcept>
+#include <atomic>
+#include <cassert>
+#include <tuple>
+#include <vector>
+#include <functional>
+
+#include "CL/cl.h"
+
+///
+/// Main namespace of the CL state tracker.
+///
+namespace clover {
+   ///
+   /// Class that represents an error that can be converted to an
+   /// OpenCL status code.
+   ///
+   class error : public std::runtime_error {
+   public:
+      error(cl_int code, std::string what = "") :
+         std::runtime_error(what), code(code) {
+      }
+
+      cl_int get() const {
+         return code;
+      }
+
+   protected:
+      cl_int code;
+   };
+
+   ///
+   /// Base class for objects that support reference counting.
+   ///
+   class ref_counter {
+   public:
+      ref_counter() : __ref_count(1) {}
+
+      unsigned ref_count() {
+         return __ref_count;
+      }
+
+      void retain() {
+         __ref_count++;
+      }
+
+      bool release() {
+         return (--__ref_count) == 0;
+      }
+
+   private:
+      std::atomic<unsigned> __ref_count;
+   };
+
+   ///
+   /// Intrusive smart pointer for objects that implement the
+   /// clover::ref_counter interface.
+   ///
+   template<typename T>
+   class ref_ptr {
+   public:
+      ref_ptr(T *q = NULL) : p(NULL) {
+         reset(q);
+      }
+
+      template<typename S>
+      ref_ptr(const ref_ptr<S> &ref) : p(NULL) {
+         reset(ref.p);
+      }
+
+      ~ref_ptr() {
+         reset(NULL);
+      }
+
+      void reset(T *q = NULL) {
+         if (q)
+            q->retain();
+         if (p && p->release())
+            delete p;
+         p = q;
+      }
+
+      ref_ptr &operator=(const ref_ptr &ref) {
+         reset(ref.p);
+         return *this;
+      }
+
+      T *operator*() const {
+         return p;
+      }
+
+      T *operator->() const {
+         return p;
+      }
+
+      operator bool() const {
+         return p;
+      }
+
+   private:
+      T *p;
+   };
+
+   ///
+   /// Transfer the caller's ownership of a reference-counted object
+   /// to a clover::ref_ptr smart pointer.
+   ///
+   template<typename T>
+   inline ref_ptr<T>
+   transfer(T *p) {
+      ref_ptr<T> ref { p };
+      p->release();
+      return ref;
+   }
+
+   template<typename T, typename S, int N>
+   struct __iter_helper {
+      template<typename F, typename Its, typename... Args>
+      static T
+      step(F op, S state, Its its, Args... args) {
+         return __iter_helper<T, S, N - 1>::step(
+            op, state, its, *(std::get<N>(its)++), args...);
+      }
+   };
+
+   template<typename T, typename S>
+   struct __iter_helper<T, S, 0> {
+      template<typename F, typename Its, typename... Args>
+      static T
+      step(F op, S state, Its its, Args... args) {
+         return op(state, *(std::get<0>(its)++), args...);
+      }
+   };
+
+   struct __empty {};
+
+   template<typename T>
+   struct __iter_helper<T, __empty, 0> {
+      template<typename F, typename Its, typename... Args>
+      static T
+      step(F op, __empty state, Its its, Args... args) {
+         return op(*(std::get<0>(its)++), args...);
+      }
+   };
+
+   template<typename F, typename... Its>
+   struct __result_helper {
+      typedef typename std::remove_const<
+         typename std::result_of<
+            F (typename std::iterator_traits<Its>::value_type...)
+            >::type
+         >::type type;
+   };
+
+   ///
+   /// Iterate \a op on the result of zipping all the specified
+   /// iterators together.
+   ///
+   /// Similar to std::for_each, but it accepts functions of an
+   /// arbitrary number of arguments.
+   ///
+   template<typename F, typename It0, typename... Its>
+   F
+   for_each(F op, It0 it0, It0 end0, Its... its) {
+      while (it0 != end0)
+         __iter_helper<void, __empty, sizeof...(Its)>::step(
+            op, {}, std::tie(it0, its...));
+
+      return op;
+   }
+
+   ///
+   /// Iterate \a op on the result of zipping all the specified
+   /// iterators together, storing return values in a new container.
+   ///
+   /// Similar to std::transform, but it accepts functions of an
+   /// arbitrary number of arguments and it doesn't have to be
+   /// provided with an output iterator.
+   ///
+   template<typename F, typename It0, typename... Its,
+            typename C = std::vector<
+               typename __result_helper<F, It0, Its...>::type>>
+   C
+   map(F op, It0 it0, It0 end0, Its... its) {
+      C c;
+
+      while (it0 != end0)
+         c.push_back(
+            __iter_helper<typename C::value_type, __empty, sizeof...(Its)>
+            ::step(op, {}, std::tie(it0, its...)));
+
+      return c;
+   }
+
+   ///
+   /// Reduce the result of zipping all the specified iterators
+   /// together, using iterative application of \a op from left to
+   /// right.
+   ///
+   /// Similar to std::accumulate, but it accepts functions of an
+   /// arbitrary number of arguments.
+   ///
+   template<typename F, typename T, typename It0, typename... Its>
+   T
+   fold(F op, T a, It0 it0, It0 end0, Its... its) {
+      while (it0 != end0)
+         a = __iter_helper<T, T, sizeof...(Its)>::step(
+            op, a, std::tie(it0, its...));
+
+      return a;
+   }
+
+   ///
+   /// Iterate \a op on the result of zipping the specified iterators
+   /// together, checking if any of the evaluations returns \a true.
+   ///
+   /// Similar to std::any_of, but it accepts functions of an
+   /// arbitrary number of arguments.
+   ///
+   template<typename F, typename It0, typename... Its>
+   bool
+   any_of(F op, It0 it0, It0 end0, Its... its) {
+      while (it0 != end0)
+         if (__iter_helper<bool, __empty, sizeof...(Its)>::step(
+                op, {}, std::tie(it0, its...)))
+            return true;
+
+      return false;
+   }
+
+   template<typename T, typename S>
+   T
+   keys(const std::pair<T, S> &ent) {
+      return ent.first;
+   }
+
+   template<typename T, typename S>
+   std::function<bool (const std::pair<T, S> &)>
+   key_equals(const T &x) {
+      return [=](const std::pair<T, S> &ent) {
+         return ent.first == x;
+      };
+   }
+
+   template<typename T, typename S>
+   S
+   values(const std::pair<T, S> &ent) {
+      return ent.second;
+   }
+
+   template<typename T>
+   std::function<bool (const T &)>
+   is_zero() {
+      return [](const T &x) {
+         return x == 0;
+      };
+   }
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/compat.hpp b/src/gallium/state_trackers/clover/core/compat.hpp

new file mode 100644 (file)

index 0000000..c0057af
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/compat.hpp
@@ -0,0 +1,290 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_COMPAT_HPP__
+#define __CORE_COMPAT_HPP__
+
+#include <new>
+#include <cstring>
+#include <cstdlib>
+#include <string>
+#include <stdint.h>
+
+
+namespace clover {
+   namespace compat {
+      // XXX - For cases where we can't rely on STL...  I.e. the
+      //       interface between code compiled as C++98 and C++11
+      //       source.  Get rid of this as soon as everything can be
+      //       compiled as C++11.
+
+      template<typename T>
+      class vector {
+      protected:
+         static T *
+         alloc(int n, const T *q, int m) {
+            T *p = reinterpret_cast<T *>(std::malloc(n * sizeof(T)));
+
+            for (int i = 0; i < m; ++i)
+               new(&p[i]) T(q[i]);
+
+            return p;
+         }
+
+         static void
+         free(int n, T *p) {
+            for (int i = 0; i < n; ++i)
+               p[i].~T();
+
+            std::free(p);
+         }
+
+      public:
+         vector() : p(NULL), n(0) {
+         }
+
+         vector(const vector &v) : p(alloc(v.n, v.p, v.n)), n(v.n) {
+         }
+
+         vector(T *p, size_t n) : p(alloc(n, p, n)), n(n) {
+         }
+
+         template<typename C>
+         vector(const C &v) :
+            p(alloc(v.size(), &*v.begin(), v.size())), n(v.size()) {
+         }
+
+         ~vector() {
+            free(n, p);
+         }
+
+         vector &
+         operator=(const vector &v) {
+            free(n, p);
+
+            p = alloc(v.n, v.p, v.n);
+            n = v.n;
+
+            return *this;
+         }
+
+         void
+         reserve(size_t m) {
+            if (n < m) {
+               T *q = alloc(m, p, n);
+               free(n, p);
+
+               p = q;
+               n = m;
+            }
+         }
+
+         void
+         resize(size_t m, T x = T()) {
+            size_t n = size();
+
+            reserve(m);
+
+            for (size_t i = n; i < m; ++i)
+               new(&p[i]) T(x);
+         }
+
+         void
+         push_back(const T &x) {
+            size_t n = size();
+            reserve(n + 1);
+            new(&p[n]) T(x);
+         }
+
+         size_t
+         size() const {
+            return n;
+         }
+
+         T *
+         begin() {
+            return p;
+         }
+
+         const T *
+         begin() const {
+            return p;
+         }
+
+         T *
+         end() {
+            return p + n;
+         }
+
+         const T *
+         end() const {
+            return p + n;
+         }
+
+         T &
+         operator[](int i) {
+            return p[i];
+         }
+
+         const T &
+         operator[](int i) const {
+            return p[i];
+         }
+
+      private:
+         T *p;
+         size_t n;
+      };
+
+      template<typename T>
+      class vector_ref {
+      public:
+         vector_ref(T *p, size_t n) : p(p), n(n) {
+         }
+
+         template<typename C>
+         vector_ref(C &v) : p(&*v.begin()), n(v.size()) {
+         }
+
+         size_t
+         size() const {
+            return n;
+         }
+
+         T *
+         begin() {
+            return p;
+         }
+
+         const T *
+         begin() const {
+            return p;
+         }
+
+         T *
+         end() {
+            return p + n;
+         }
+
+         const T *
+         end() const {
+            return p + n;
+         }
+
+         T &
+         operator[](int i) {
+            return p[i];
+         }
+
+         const T &
+         operator[](int i) const {
+            return p[i];
+         }
+
+      private:
+         T *p;
+         size_t n;
+      };
+
+      class istream {
+      public:
+         typedef vector_ref<const unsigned char> buffer_t;
+
+         class error {
+         public:
+            virtual ~error() {}
+         };
+
+         istream(const buffer_t &buf) : buf(buf), offset(0) {}
+
+         void
+         read(char *p, size_t n) {
+            if (offset + n > buf.size())
+               throw error();
+
+            std::memcpy(p, buf.begin() + offset, n);
+            offset += n;
+         }
+
+      private:
+         const buffer_t &buf;
+         size_t offset;
+      };
+
+      class ostream {
+      public:
+         typedef vector<unsigned char> buffer_t;
+
+         ostream(buffer_t &buf) : buf(buf), offset(buf.size()) {}
+
+         void
+         write(const char *p, size_t n) {
+            buf.resize(offset + n);
+            std::memcpy(buf.begin() + offset, p, n);
+            offset += n;
+         }
+
+      private:
+         buffer_t &buf;
+         size_t offset;
+      };
+
+      class string : public vector_ref<const char> {
+      public:
+         string(const char *p) : vector_ref(p, std::strlen(p)) {
+         }
+
+         template<typename C>
+         string(const C &v) : vector_ref(v) {
+         }
+
+         operator std::string() const {
+            return std::string(begin(), end());
+         }
+
+         const char *
+         find(const string &s) const {
+            for (size_t i = 0; i + s.size() < size(); ++i) {
+               if (!std::memcmp(begin() + i, s.begin(), s.size()))
+                  return begin() + i;
+            }
+
+            return end();
+         }
+      };
+
+      template<typename T>
+      bool
+      operator==(const vector_ref<T> &a, const vector_ref<T> &b) {
+         if (a.size() != b.size())
+            return false;
+
+         for (size_t i = 0; i < a.size(); ++i)
+            if (a[i] != b[i])
+               return false;
+
+         return true;
+      }
+   }
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp

new file mode 100644 (file)

index 0000000..a3998d5
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/compiler.hpp
@@ -0,0 +1,53 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_COMPILER_HPP__
+#define __CORE_COMPILER_HPP__
+
+#include "core/compat.hpp"
+#include "core/module.hpp"
+
+namespace clover {
+   class build_error {
+   public:
+      build_error(const compat::string &log) : log(log) {
+      }
+
+      virtual ~build_error() {
+      }
+
+      compat::string what() {
+         return log;
+      }
+
+   private:
+      compat::vector<char> log;
+   };
+
+   module compile_program_llvm(const compat::string &source,
+                               const compat::string &target);
+
+   module compile_program_tgsi(const compat::string &source,
+                               const compat::string &target);
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/context.cpp b/src/gallium/state_trackers/clover/core/context.cpp

new file mode 100644 (file)

index 0000000..6e09a1a
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/context.cpp
@@ -0,0 +1,37 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include <algorithm>
+
+#include "core/context.hpp"
+
+using namespace clover;
+
+_cl_context::_cl_context(const std::vector<cl_context_properties> &props,
+                         const std::vector<device *> &devs) :
+   devs(devs), __props(props) {
+}
+
+bool
+_cl_context::has_device(clover::device *dev) const {
+   return std::count(devs.begin(), devs.end(), dev);
+}
diff --git a/src/gallium/state_trackers/clover/core/context.hpp b/src/gallium/state_trackers/clover/core/context.hpp

new file mode 100644 (file)

index 0000000..d783fb6
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/context.hpp
@@ -0,0 +1,51 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_CONTEXT_HPP__
+#define __CORE_CONTEXT_HPP__
+
+#include "core/base.hpp"
+#include "core/device.hpp"
+
+namespace clover {
+   typedef struct _cl_context context;
+}
+
+struct _cl_context : public clover::ref_counter {
+public:
+   _cl_context(const std::vector<cl_context_properties> &props,
+               const std::vector<clover::device *> &devs);
+   _cl_context(const _cl_context &ctx) = delete;
+
+   bool has_device(clover::device *dev) const;
+
+   const std::vector<cl_context_properties> &props() const {
+      return __props;
+   }
+
+   const std::vector<clover::device *> devs;
+
+private:
+   std::vector<cl_context_properties> __props;
+};
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp

new file mode 100644 (file)

index 0000000..8390f3f
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -0,0 +1,179 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "core/device.hpp"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+
+using namespace clover;
+
+namespace {
+   template<typename T>
+   std::vector<T>
+   get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) {
+      int sz = pipe->get_compute_param(pipe, cap, NULL);
+      std::vector<T> v(sz / sizeof(T));
+
+      pipe->get_compute_param(pipe, cap, &v.front());
+      return v;
+   }
+}
+
+_cl_device_id::_cl_device_id(pipe_loader_device *ldev) : ldev(ldev) {
+   pipe = pipe_loader_create_screen(ldev, PIPE_SEARCH_DIR);
+   if (!pipe || !pipe->get_param(pipe, PIPE_CAP_COMPUTE))
+      throw error(CL_INVALID_DEVICE);
+}
+
+_cl_device_id::_cl_device_id(_cl_device_id &&dev) : pipe(dev.pipe), ldev(dev.ldev) {
+   dev.ldev = NULL;
+   dev.pipe = NULL;
+}
+
+_cl_device_id::~_cl_device_id() {
+   if (pipe)
+      pipe->destroy(pipe);
+   if (ldev)
+      pipe_loader_release(&ldev, 1);
+}
+
+cl_device_type
+_cl_device_id::type() const {
+   switch (ldev->type) {
+   case PIPE_LOADER_DEVICE_SOFTWARE:
+      return CL_DEVICE_TYPE_CPU;
+   case PIPE_LOADER_DEVICE_PCI:
+      return CL_DEVICE_TYPE_GPU;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+cl_uint
+_cl_device_id::vendor_id() const {
+   switch (ldev->type) {
+   case PIPE_LOADER_DEVICE_SOFTWARE:
+      return 0;
+   case PIPE_LOADER_DEVICE_PCI:
+      return ldev->pci.vendor_id;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+size_t
+_cl_device_id::max_images_read() const {
+   return PIPE_MAX_SHADER_RESOURCES;
+}
+
+size_t
+_cl_device_id::max_images_write() const {
+   return PIPE_MAX_SHADER_RESOURCES;
+}
+
+cl_uint
+_cl_device_id::max_image_levels_2d() const {
+   return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
+}
+
+cl_uint
+_cl_device_id::max_image_levels_3d() const {
+   return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS);
+}
+
+cl_uint
+_cl_device_id::max_samplers() const {
+   return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+                                 PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
+}
+
+cl_ulong
+_cl_device_id::max_mem_global() const {
+   return get_compute_param<uint64_t>(pipe,
+                                      PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
+}
+
+cl_ulong
+_cl_device_id::max_mem_local() const {
+   return get_compute_param<uint64_t>(pipe,
+                                      PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
+}
+
+cl_ulong
+_cl_device_id::max_mem_input() const {
+   return get_compute_param<uint64_t>(pipe,
+                                      PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
+}
+
+cl_ulong
+_cl_device_id::max_const_buffer_size() const {
+   return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+                                 PIPE_SHADER_CAP_MAX_CONSTS) * 16;
+}
+
+cl_uint
+_cl_device_id::max_const_buffers() const {
+   return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+                                 PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
+}
+
+std::vector<size_t>
+_cl_device_id::max_block_size() const {
+   return get_compute_param<uint64_t>(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
+}
+
+std::string
+_cl_device_id::device_name() const {
+   return pipe->get_name(pipe);
+}
+
+std::string
+_cl_device_id::vendor_name() const {
+   return pipe->get_vendor(pipe);
+}
+
+std::string
+_cl_device_id::ir_target() const {
+   switch (pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+                                  PIPE_SHADER_CAP_PREFERRED_IR)) {
+   case PIPE_SHADER_IR_TGSI:
+      return "tgsi";
+   default:
+      assert(0);
+      return "";
+   }
+}
+
+device_registry::device_registry() {
+   int n = pipe_loader_probe(NULL, 0);
+   std::vector<pipe_loader_device *> ldevs(n);
+
+   pipe_loader_probe(&ldevs.front(), n);
+
+   for (pipe_loader_device *ldev : ldevs) {
+      try {
+         devs.emplace_back(ldev);
+      } catch (error &) {}
+   }
+}
diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp

new file mode 100644 (file)

index 0000000..8f284ba
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -0,0 +1,107 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_DEVICE_HPP__
+#define __CORE_DEVICE_HPP__
+
+#include <set>
+#include <vector>
+
+#include "core/base.hpp"
+#include "core/format.hpp"
+#include "pipe-loader/pipe_loader.h"
+
+namespace clover {
+   typedef struct _cl_device_id device;
+   class root_resource;
+   class hard_event;
+}
+
+struct _cl_device_id {
+public:
+   _cl_device_id(pipe_loader_device *ldev);
+   _cl_device_id(_cl_device_id &&dev);
+   _cl_device_id(const _cl_device_id &dev) = delete;
+   ~_cl_device_id();
+
+   cl_device_type type() const;
+   cl_uint vendor_id() const;
+   size_t max_images_read() const;
+   size_t max_images_write() const;
+   cl_uint max_image_levels_2d() const;
+   cl_uint max_image_levels_3d() const;
+   cl_uint max_samplers() const;
+   cl_ulong max_mem_global() const;
+   cl_ulong max_mem_local() const;
+   cl_ulong max_mem_input() const;
+   cl_ulong max_const_buffer_size() const;
+   cl_uint max_const_buffers() const;
+
+   std::vector<size_t> max_block_size() const;
+   std::string device_name() const;
+   std::string vendor_name() const;
+   std::string ir_target() const;
+
+   friend struct _cl_command_queue;
+   friend class clover::root_resource;
+   friend class clover::hard_event;
+   friend std::set<cl_image_format>
+   clover::supported_formats(cl_context, cl_mem_object_type);
+
+private:
+   pipe_screen *pipe;
+   pipe_loader_device *ldev;
+};
+
+namespace clover {
+   ///
+   /// Container of all the compute devices that are available in the
+   /// system.
+   ///
+   class device_registry {
+   public:
+      typedef std::vector<device>::iterator iterator;
+
+      device_registry();
+
+      iterator begin() {
+         return devs.begin();
+      }
+
+      iterator end() {
+         return devs.end();
+      }
+
+      device &front() {
+         return devs.front();
+      }
+
+      device &back() {
+         return devs.back();
+      }
+
+   protected:
+      std::vector<device> devs;
+   };
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp

new file mode 100644 (file)

index 0000000..aa287e9
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/event.cpp
@@ -0,0 +1,175 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "core/event.hpp"
+#include "pipe/p_screen.h"
+
+using namespace clover;
+
+_cl_event::_cl_event(clover::context &ctx,
+                     std::vector<clover::event *> deps,
+                     action action_ok, action action_fail) :
+   ctx(ctx), __status(0), wait_count(1),
+   action_ok(action_ok), action_fail(action_fail) {
+   for (auto ev : deps)
+      ev->chain(this);
+}
+
+_cl_event::~_cl_event() {
+}
+
+void
+_cl_event::trigger() {
+   if (!--wait_count) {
+      action_ok(*this);
+
+      while (!__chain.empty()) {
+         __chain.back()->trigger();
+         __chain.pop_back();
+      }
+   }
+}
+
+void
+_cl_event::abort(cl_int status) {
+   __status = status;
+   action_fail(*this);
+
+   while (!__chain.empty()) {
+      __chain.back()->abort(status);
+      __chain.pop_back();
+   }
+}
+
+bool
+_cl_event::signalled() const {
+   return !wait_count;
+}
+
+void
+_cl_event::chain(clover::event *ev) {
+   if (wait_count) {
+      ev->wait_count++;
+      __chain.push_back(ev);
+      ev->deps.push_back(this);
+   }
+}
+
+hard_event::hard_event(clover::command_queue &q, cl_command_type command,
+                       std::vector<clover::event *> deps, action action) :
+   _cl_event(q.ctx, deps, action, [](event &ev){}),
+   __queue(q), __command(command), __fence(NULL) {
+   q.sequence(this);
+   trigger();
+}
+
+hard_event::~hard_event() {
+   pipe_screen *screen = queue()->dev.pipe;
+   screen->fence_reference(screen, &__fence, NULL);
+}
+
+cl_int
+hard_event::status() const {
+   pipe_screen *screen = queue()->dev.pipe;
+
+   if (__status < 0)
+      return __status;
+
+   else if (!__fence)
+      return CL_QUEUED;
+
+   else if (!screen->fence_signalled(screen, __fence))
+      return CL_SUBMITTED;
+
+   else
+      return CL_COMPLETE;
+}
+
+cl_command_queue
+hard_event::queue() const {
+   return &__queue;
+}
+
+cl_command_type
+hard_event::command() const {
+   return __command;
+}
+
+void
+hard_event::wait() const {
+   pipe_screen *screen = queue()->dev.pipe;
+
+   if (status() == CL_QUEUED)
+      queue()->flush();
+
+   if (!__fence ||
+       !screen->fence_finish(screen, __fence, PIPE_TIMEOUT_INFINITE))
+      throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
+}
+
+void
+hard_event::fence(pipe_fence_handle *fence) {
+   pipe_screen *screen = queue()->dev.pipe;
+   screen->fence_reference(screen, &__fence, fence);
+}
+
+soft_event::soft_event(clover::context &ctx,
+                       std::vector<clover::event *> deps,
+                       bool __trigger, action action) :
+   _cl_event(ctx, deps, action, action) {
+   if (__trigger)
+      trigger();
+}
+
+cl_int
+soft_event::status() const {
+   if (__status < 0)
+      return __status;
+
+   else if (!signalled() ||
+            any_of([](const ref_ptr<event> &ev) {
+                  return ev->status() != CL_COMPLETE;
+               }, deps.begin(), deps.end()))
+      return CL_SUBMITTED;
+
+   else
+      return CL_COMPLETE;
+}
+
+cl_command_queue
+soft_event::queue() const {
+   return NULL;
+}
+
+cl_command_type
+soft_event::command() const {
+   return CL_COMMAND_USER;
+}
+
+void
+soft_event::wait() const {
+   for (auto ev : deps)
+      ev->wait();
+
+   if (status() != CL_COMPLETE)
+      throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
+}
diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp

new file mode 100644 (file)

index 0000000..ea4ac4a
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/event.hpp
@@ -0,0 +1,138 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_EVENT_HPP__
+#define __CORE_EVENT_HPP__
+
+#include <functional>
+
+#include "core/base.hpp"
+#include "core/queue.hpp"
+
+namespace clover {
+   typedef struct _cl_event event;
+}
+
+///
+/// Class that represents a task that might be executed asynchronously
+/// at some point in the future.
+///
+/// An event consists of a list of dependencies, a boolean signalled()
+/// flag, and an associated task.  An event is considered signalled as
+/// soon as all its dependencies (if any) are signalled as well, and
+/// the trigger() method is called; at that point the associated task
+/// will be started through the specified \a action_ok.  If the
+/// abort() method is called instead, the specified \a action_fail is
+/// executed and the associated task will never be started.  Dependent
+/// events will be aborted recursively.
+///
+/// The execution status of the associated task can be queried using
+/// the status() method, and it can be waited for completion using the
+/// wait() method.
+///
+struct _cl_event : public clover::ref_counter {
+public:
+   typedef std::function<void (clover::event &)> action;
+
+   _cl_event(clover::context &ctx, std::vector<clover::event *> deps,
+             action action_ok, action action_fail);
+   virtual ~_cl_event();
+
+   void trigger();
+   void abort(cl_int status);
+   bool signalled() const;
+
+   virtual cl_int status() const = 0;
+   virtual cl_command_queue queue() const = 0;
+   virtual cl_command_type command() const = 0;
+   virtual void wait() const = 0;
+
+   clover::context &ctx;
+
+protected:
+   void chain(clover::event *ev);
+
+   cl_int __status;
+   std::vector<clover::ref_ptr<clover::event>> deps;
+
+private:
+   unsigned wait_count;
+   action action_ok;
+   action action_fail;
+   std::vector<clover::ref_ptr<clover::event>> __chain;
+};
+
+namespace clover {
+   ///
+   /// Class that represents a task executed by a command queue.
+   ///
+   /// Similar to a normal clover::event.  In addition it's associated
+   /// with a given command queue \a q and a given OpenCL \a command.
+   /// hard_event instances created for the same queue are implicitly
+   /// ordered with respect to each other, and they are implicitly
+   /// triggered on construction.
+   ///
+   /// A hard_event is considered complete when the associated
+   /// hardware task finishes execution.
+   ///
+   class hard_event : public event {
+   public:
+      hard_event(clover::command_queue &q, cl_command_type command,
+                 std::vector<clover::event *> deps,
+                 action action = [](event &){});
+      ~hard_event();
+
+      virtual cl_int status() const;
+      virtual cl_command_queue queue() const;
+      virtual cl_command_type command() const;
+      virtual void wait() const;
+
+      friend class ::_cl_command_queue;
+
+   private:
+      virtual void fence(pipe_fence_handle *fence);
+
+      clover::command_queue &__queue;
+      cl_command_type __command;
+      pipe_fence_handle *__fence;
+   };
+
+   ///
+   /// Class that represents a software event.
+   ///
+   /// A soft_event is not associated with any specific hardware task
+   /// or command queue.  It's considered complete as soon as all its
+   /// dependencies finish execution.
+   ///
+   class soft_event : public event {
+   public:
+      soft_event(clover::context &ctx, std::vector<clover::event *> deps,
+                 bool trigger, action action = [](event &){});
+
+      virtual cl_int status() const;
+      virtual cl_command_queue queue() const;
+      virtual cl_command_type command() const;
+      virtual void wait() const;
+   };
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/format.cpp b/src/gallium/state_trackers/clover/core/format.cpp

new file mode 100644 (file)

index 0000000..8f6e14d
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/format.cpp
@@ -0,0 +1,167 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include <algorithm>
+
+#include "core/format.hpp"
+#include "core/memory.hpp"
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+
+namespace clover {
+   static const std::map<cl_image_format, pipe_format> formats {
+      { { CL_BGRA, CL_UNORM_INT8 }, PIPE_FORMAT_B8G8R8A8_UNORM },
+      { { CL_ARGB, CL_UNORM_INT8 }, PIPE_FORMAT_A8R8G8B8_UNORM },
+      { { CL_RGB, CL_UNORM_SHORT_565 }, PIPE_FORMAT_B5G6R5_UNORM },
+      { { CL_LUMINANCE, CL_UNORM_INT8 }, PIPE_FORMAT_L8_UNORM },
+      { { CL_A, CL_UNORM_INT8 }, PIPE_FORMAT_A8_UNORM },
+      { { CL_INTENSITY, CL_UNORM_INT8 }, PIPE_FORMAT_I8_UNORM },
+      { { CL_LUMINANCE, CL_UNORM_INT16 }, PIPE_FORMAT_L16_UNORM },
+      { { CL_R, CL_FLOAT }, PIPE_FORMAT_R32_FLOAT },
+      { { CL_RG, CL_FLOAT }, PIPE_FORMAT_R32G32_FLOAT },
+      { { CL_RGB, CL_FLOAT }, PIPE_FORMAT_R32G32B32_FLOAT },
+      { { CL_RGBA, CL_FLOAT }, PIPE_FORMAT_R32G32B32A32_FLOAT },
+      { { CL_R, CL_UNORM_INT16 }, PIPE_FORMAT_R16_UNORM },
+      { { CL_RG, CL_UNORM_INT16 }, PIPE_FORMAT_R16G16_UNORM },
+      { { CL_RGB, CL_UNORM_INT16 }, PIPE_FORMAT_R16G16B16_UNORM },
+      { { CL_RGBA, CL_UNORM_INT16 }, PIPE_FORMAT_R16G16B16A16_UNORM },
+      { { CL_R, CL_SNORM_INT16 }, PIPE_FORMAT_R16_SNORM },
+      { { CL_RG, CL_SNORM_INT16 }, PIPE_FORMAT_R16G16_SNORM },
+      { { CL_RGB, CL_SNORM_INT16 }, PIPE_FORMAT_R16G16B16_SNORM },
+      { { CL_RGBA, CL_SNORM_INT16 }, PIPE_FORMAT_R16G16B16A16_SNORM },
+      { { CL_R, CL_UNORM_INT8 }, PIPE_FORMAT_R8_UNORM },
+      { { CL_RG, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8_UNORM },
+      { { CL_RGB, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8B8_UNORM },
+      { { CL_RGBA, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8B8A8_UNORM },
+      { { CL_R, CL_SNORM_INT8 }, PIPE_FORMAT_R8_SNORM },
+      { { CL_RG, CL_SNORM_INT8 }, PIPE_FORMAT_R8G8_SNORM },
+      { { CL_RGB, CL_SNORM_INT8 }, PIPE_FORMAT_R8G8B8_SNORM },
+      { { CL_RGBA, CL_SNORM_INT8 }, PIPE_FORMAT_R8G8B8A8_SNORM },
+      { { CL_R, CL_HALF_FLOAT }, PIPE_FORMAT_R16_FLOAT },
+      { { CL_RG, CL_HALF_FLOAT }, PIPE_FORMAT_R16G16_FLOAT },
+      { { CL_RGB, CL_HALF_FLOAT }, PIPE_FORMAT_R16G16B16_FLOAT },
+      { { CL_RGBA, CL_HALF_FLOAT }, PIPE_FORMAT_R16G16B16A16_FLOAT },
+      { { CL_RGBx, CL_UNORM_SHORT_555 }, PIPE_FORMAT_B5G5R5X1_UNORM },
+      { { CL_RGBx, CL_UNORM_INT8 }, PIPE_FORMAT_R8G8B8X8_UNORM },
+      { { CL_A, CL_UNORM_INT16 }, PIPE_FORMAT_A16_UNORM },
+      { { CL_INTENSITY, CL_UNORM_INT16 }, PIPE_FORMAT_I16_UNORM },
+      { { CL_LUMINANCE, CL_SNORM_INT8 }, PIPE_FORMAT_L8_SNORM },
+      { { CL_INTENSITY, CL_SNORM_INT8 }, PIPE_FORMAT_I8_SNORM },
+      { { CL_A, CL_SNORM_INT16 }, PIPE_FORMAT_A16_SNORM },
+      { { CL_LUMINANCE, CL_SNORM_INT16 }, PIPE_FORMAT_L16_SNORM },
+      { { CL_INTENSITY, CL_SNORM_INT16 }, PIPE_FORMAT_I16_SNORM },
+      { { CL_A, CL_HALF_FLOAT }, PIPE_FORMAT_A16_FLOAT },
+      { { CL_LUMINANCE, CL_HALF_FLOAT }, PIPE_FORMAT_L16_FLOAT },
+      { { CL_INTENSITY, CL_HALF_FLOAT }, PIPE_FORMAT_I16_FLOAT },
+      { { CL_A, CL_FLOAT }, PIPE_FORMAT_A32_FLOAT },
+      { { CL_LUMINANCE, CL_FLOAT }, PIPE_FORMAT_L32_FLOAT },
+      { { CL_INTENSITY, CL_FLOAT }, PIPE_FORMAT_I32_FLOAT },
+      { { CL_RA, CL_UNORM_INT8 }, PIPE_FORMAT_R8A8_UNORM },
+      { { CL_R, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8_UINT },
+      { { CL_RG, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8G8_UINT },
+      { { CL_RGB, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8G8B8_UINT },
+      { { CL_RGBA, CL_UNSIGNED_INT8 }, PIPE_FORMAT_R8G8B8A8_UINT },
+      { { CL_R, CL_SIGNED_INT8 }, PIPE_FORMAT_R8_SINT },
+      { { CL_RG, CL_SIGNED_INT8 }, PIPE_FORMAT_R8G8_SINT },
+      { { CL_RGB, CL_SIGNED_INT8 }, PIPE_FORMAT_R8G8B8_SINT },
+      { { CL_RGBA, CL_SIGNED_INT8 }, PIPE_FORMAT_R8G8B8A8_SINT },
+      { { CL_R, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16_UINT },
+      { { CL_RG, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16G16_UINT },
+      { { CL_RGB, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16G16B16_UINT },
+      { { CL_RGBA, CL_UNSIGNED_INT16 }, PIPE_FORMAT_R16G16B16A16_UINT },
+      { { CL_R, CL_SIGNED_INT16 }, PIPE_FORMAT_R16_SINT },
+      { { CL_RG, CL_SIGNED_INT16 }, PIPE_FORMAT_R16G16_SINT },
+      { { CL_RGB, CL_SIGNED_INT16 }, PIPE_FORMAT_R16G16B16_SINT },
+      { { CL_RGBA, CL_SIGNED_INT16 }, PIPE_FORMAT_R16G16B16A16_SINT },
+      { { CL_R, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32_UINT },
+      { { CL_RG, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32G32_UINT },
+      { { CL_RGB, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32G32B32_UINT },
+      { { CL_RGBA, CL_UNSIGNED_INT32 }, PIPE_FORMAT_R32G32B32A32_UINT },
+      { { CL_R, CL_SIGNED_INT32 }, PIPE_FORMAT_R32_SINT },
+      { { CL_RG, CL_SIGNED_INT32 }, PIPE_FORMAT_R32G32_SINT },
+      { { CL_RGB, CL_SIGNED_INT32 }, PIPE_FORMAT_R32G32B32_SINT },
+      { { CL_RGBA, CL_SIGNED_INT32 }, PIPE_FORMAT_R32G32B32A32_SINT },
+      { { CL_A, CL_UNSIGNED_INT8 }, PIPE_FORMAT_A8_UINT },
+      { { CL_INTENSITY, CL_UNSIGNED_INT8 }, PIPE_FORMAT_I8_UINT },
+      { { CL_LUMINANCE, CL_UNSIGNED_INT8 }, PIPE_FORMAT_L8_UINT },
+      { { CL_A, CL_SIGNED_INT8 }, PIPE_FORMAT_A8_SINT },
+      { { CL_INTENSITY, CL_SIGNED_INT8 }, PIPE_FORMAT_I8_SINT },
+      { { CL_LUMINANCE, CL_SIGNED_INT8 }, PIPE_FORMAT_L8_SINT },
+      { { CL_A, CL_UNSIGNED_INT16 }, PIPE_FORMAT_A16_UINT },
+      { { CL_INTENSITY, CL_UNSIGNED_INT16 }, PIPE_FORMAT_I16_UINT },
+      { { CL_LUMINANCE, CL_UNSIGNED_INT16 }, PIPE_FORMAT_L16_UINT },
+      { { CL_A, CL_SIGNED_INT16 }, PIPE_FORMAT_A16_SINT },
+      { { CL_INTENSITY, CL_SIGNED_INT16 }, PIPE_FORMAT_I16_SINT },
+      { { CL_LUMINANCE, CL_SIGNED_INT16 }, PIPE_FORMAT_L16_SINT },
+      { { CL_A, CL_UNSIGNED_INT32 }, PIPE_FORMAT_A32_UINT },
+      { { CL_INTENSITY, CL_UNSIGNED_INT32 }, PIPE_FORMAT_I32_UINT },
+      { { CL_LUMINANCE, CL_UNSIGNED_INT32 }, PIPE_FORMAT_L32_UINT },
+      { { CL_A, CL_SIGNED_INT32 }, PIPE_FORMAT_A32_SINT },
+      { { CL_INTENSITY, CL_SIGNED_INT32 }, PIPE_FORMAT_I32_SINT },
+      { { CL_LUMINANCE, CL_SIGNED_INT32 }, PIPE_FORMAT_L32_SINT }
+   };
+
+   pipe_texture_target
+   translate_target(cl_mem_object_type type) {
+      switch (type) {
+      case CL_MEM_OBJECT_BUFFER:
+         return PIPE_BUFFER;
+      case CL_MEM_OBJECT_IMAGE2D:
+         return PIPE_TEXTURE_2D;
+      case CL_MEM_OBJECT_IMAGE3D:
+         return PIPE_TEXTURE_3D;
+      default:
+         throw error(CL_INVALID_VALUE);
+      }
+   }
+
+   pipe_format
+   translate_format(const cl_image_format &format) {
+      auto it = formats.find(format);
+
+      if (it == formats.end())
+         throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
+
+      return it->second;
+   }
+
+   std::set<cl_image_format>
+   supported_formats(cl_context ctx, cl_mem_object_type type) {
+      std::set<cl_image_format> s;
+      pipe_texture_target target = translate_target(type);
+      unsigned bindings = (PIPE_BIND_SAMPLER_VIEW |
+                           PIPE_BIND_COMPUTE_RESOURCE |
+                           PIPE_BIND_TRANSFER_READ |
+                           PIPE_BIND_TRANSFER_WRITE);
+
+      for (auto f : formats) {
+         if (std::all_of(ctx->devs.begin(), ctx->devs.end(),
+                         [=](const device *dev) {
+                            return dev->pipe->is_format_supported(
+                               dev->pipe, f.second, target, 1, bindings);
+                         }))
+            s.insert(f.first);
+      }
+
+      return s;
+   }
+}
diff --git a/src/gallium/state_trackers/clover/core/format.hpp b/src/gallium/state_trackers/clover/core/format.hpp

new file mode 100644 (file)

index 0000000..a24cbf3
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/format.hpp
@@ -0,0 +1,51 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_FORMAT_HPP__
+#define __CORE_FORMAT_HPP__
+
+#include <set>
+
+#include "core/base.hpp"
+#include "pipe/p_defines.h"
+#include "pipe/p_format.h"
+
+namespace clover {
+   pipe_texture_target translate_target(cl_mem_object_type type);
+   pipe_format translate_format(const cl_image_format &format);
+
+   ///
+   /// Return all the image formats supported by a given context for
+   /// the given memory object type.
+   ///
+   std::set<cl_image_format> supported_formats(cl_context ctx,
+                                               cl_mem_object_type type);
+}
+
+static inline bool
+operator<(const cl_image_format &a, const cl_image_format &b) {
+   return (a.image_channel_order != b.image_channel_order ?
+           a.image_channel_order < b.image_channel_order :
+           a.image_channel_data_type < b.image_channel_data_type);
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/geometry.hpp b/src/gallium/state_trackers/clover/core/geometry.hpp

new file mode 100644 (file)

index 0000000..027264e
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/geometry.hpp
@@ -0,0 +1,72 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_GEOMETRY_HPP__
+#define __CORE_GEOMETRY_HPP__
+
+#include <array>
+#include <algorithm>
+
+namespace clover {
+   ///
+   /// N-dimensional coordinate array.
+   ///
+   template<typename T, int N>
+   class point {
+   public:
+      point() : a() {
+      }
+
+      point(std::initializer_list<T> v) {
+         auto it = std::copy(v.begin(), v.end(), a.begin());
+         std::fill(it, a.end(), 0);
+      }
+
+      point(const T *v) {
+         std::copy(v, v + N, a.begin());
+      }
+
+      T &operator[](int i) {
+         return a[i];
+      }
+
+      const T &operator[](int i) const {
+         return a[i];
+      }
+
+      point operator+(const point &p) const {
+         point q;
+         std::transform(a.begin(), a.end(), p.a.begin(),
+                        q.a.begin(), std::plus<T>());
+         return q;
+      }
+
+      T operator()(const point &p) const {
+         return std::inner_product(p.a.begin(), p.a.end(), a.begin(), 0);
+      }
+
+   protected:
+      std::array<T, N> a;
+   };
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp

new file mode 100644 (file)

index 0000000..6fa8bd6
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/kernel.cpp
@@ -0,0 +1,393 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "core/kernel.hpp"
+#include "core/resource.hpp"
+#include "pipe/p_context.h"
+
+using namespace clover;
+
+_cl_kernel::_cl_kernel(clover::program &prog,
+                       const std::string &name,
+                       const std::vector<clover::module::argument> &args) :
+   prog(prog), __name(name), exec(*this) {
+   for (auto arg : args) {
+      if (arg.type == module::argument::scalar)
+         this->args.emplace_back(new scalar_argument(arg.size));
+      else if (arg.type == module::argument::global)
+         this->args.emplace_back(new global_argument(arg.size));
+      else if (arg.type == module::argument::local)
+         this->args.emplace_back(new local_argument());
+      else if (arg.type == module::argument::constant)
+         this->args.emplace_back(new constant_argument());
+      else if (arg.type == module::argument::image2d_rd ||
+               arg.type == module::argument::image3d_rd)
+         this->args.emplace_back(new image_rd_argument());
+      else if (arg.type == module::argument::image2d_wr ||
+               arg.type == module::argument::image3d_wr)
+         this->args.emplace_back(new image_wr_argument());
+      else if (arg.type == module::argument::sampler)
+         this->args.emplace_back(new sampler_argument());
+      else
+         throw error(CL_INVALID_KERNEL_DEFINITION);
+   }
+}
+
+template<typename T, typename V>
+static inline std::vector<T>
+pad_vector(clover::command_queue &q, const V &v, T x) {
+   std::vector<T> w { v.begin(), v.end() };
+   w.resize(q.dev.max_block_size().size(), x);
+   return w;
+}
+
+void
+_cl_kernel::launch(clover::command_queue &q,
+                   const std::vector<size_t> &grid_offset,
+                   const std::vector<size_t> &grid_size,
+                   const std::vector<size_t> &block_size) {
+   void *st = exec.bind(&q);
+   auto g_handles = map([&](size_t h) { return (uint32_t *)&exec.input[h]; },
+                        exec.g_handles.begin(), exec.g_handles.end());
+
+   q.pipe->bind_compute_state(q.pipe, st);
+   q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(),
+                                       exec.samplers.data());
+   q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(),
+                                     exec.sviews.data());
+   q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
+                                     exec.resources.data());
+   q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
+                              exec.g_buffers.data(), g_handles.data());
+
+   q.pipe->launch_grid(q.pipe,
+                       pad_vector<uint>(q, block_size, 1).data(),
+                       pad_vector<uint>(q, grid_size, 1).data(),
+                       module(q).sym(__name).offset,
+                       exec.input.data());
+
+   q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
+   q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
+   q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL);
+   q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), NULL);
+   exec.unbind();
+}
+
+size_t
+_cl_kernel::mem_local() const {
+   size_t sz = 0;
+
+   for (auto &arg : args) {
+      if (dynamic_cast<local_argument *>(arg.get()))
+         sz += arg->storage();
+   }
+
+   return sz;
+}
+
+size_t
+_cl_kernel::mem_private() const {
+   return 0;
+}
+
+size_t
+_cl_kernel::max_block_size() const {
+   return SIZE_MAX;
+}
+
+const std::string &
+_cl_kernel::name() const {
+   return __name;
+}
+
+std::vector<size_t>
+_cl_kernel::block_size() const {
+   return { 0, 0, 0 };
+}
+
+const clover::module &
+_cl_kernel::module(const clover::command_queue &q) const {
+   return prog.binaries().find(&q.dev)->second;
+}
+
+
+_cl_kernel::exec_context::exec_context(clover::kernel &kern) :
+   kern(kern), q(NULL), mem_local(0), st(NULL) {
+}
+
+_cl_kernel::exec_context::~exec_context() {
+   if (st)
+      q->pipe->delete_compute_state(q->pipe, st);
+}
+
+void *
+_cl_kernel::exec_context::bind(clover::command_queue *__q) {
+   std::swap(q, __q);
+
+   for (auto &arg : kern.args)
+      arg->bind(*this);
+
+   // Create a new compute state if anything changed.
+   if (!st || q != __q ||
+       cs.req_local_mem != mem_local ||
+       cs.req_input_mem != input.size()) {
+      if (st)
+         __q->pipe->delete_compute_state(__q->pipe, st);
+
+      cs.prog = kern.module(*q).sec(module::section::text).data.begin();
+      cs.req_local_mem = mem_local;
+      cs.req_input_mem = input.size();
+      st = q->pipe->create_compute_state(q->pipe, &cs);
+   }
+
+   return st;
+}
+
+void
+_cl_kernel::exec_context::unbind() {
+   for (auto &arg : kern.args)
+      arg->unbind(*this);
+
+   input.clear();
+   samplers.clear();
+   sviews.clear();
+   resources.clear();
+   g_buffers.clear();
+   g_handles.clear();
+   mem_local = 0;
+}
+
+_cl_kernel::argument::argument(size_t size) :
+   __size(size), __set(false) {
+}
+
+bool
+_cl_kernel::argument::set() const {
+   return __set;
+}
+
+size_t
+_cl_kernel::argument::storage() const {
+   return 0;
+}
+
+_cl_kernel::scalar_argument::scalar_argument(size_t size) :
+   argument(size) {
+}
+
+void
+_cl_kernel::scalar_argument::set(size_t size, const void *value) {
+   if (size != __size)
+      throw error(CL_INVALID_ARG_SIZE);
+
+   v = { (uint8_t *)value, (uint8_t *)value + size };
+   __set = true;
+}
+
+void
+_cl_kernel::scalar_argument::bind(exec_context &ctx) {
+   ctx.input.insert(ctx.input.end(), v.begin(), v.end());
+}
+
+void
+_cl_kernel::scalar_argument::unbind(exec_context &ctx) {
+}
+
+_cl_kernel::global_argument::global_argument(size_t size) :
+   argument(size) {
+}
+
+void
+_cl_kernel::global_argument::set(size_t size, const void *value) {
+   if (size != sizeof(cl_mem))
+      throw error(CL_INVALID_ARG_SIZE);
+
+   obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
+   __set = true;
+}
+
+void
+_cl_kernel::global_argument::bind(exec_context &ctx) {
+   size_t offset = ctx.input.size();
+   size_t idx = ctx.g_buffers.size();
+
+   ctx.input.resize(offset + __size);
+
+   ctx.g_buffers.resize(idx + 1);
+   ctx.g_buffers[idx] = obj->resource(ctx.q).pipe;
+
+   ctx.g_handles.resize(idx + 1);
+   ctx.g_handles[idx] = offset;
+}
+
+void
+_cl_kernel::global_argument::unbind(exec_context &ctx) {
+}
+
+_cl_kernel::local_argument::local_argument() :
+   argument(sizeof(uint32_t)) {
+}
+
+size_t
+_cl_kernel::local_argument::storage() const {
+   return __storage;
+}
+
+void
+_cl_kernel::local_argument::set(size_t size, const void *value) {
+   if (value)
+      throw error(CL_INVALID_ARG_VALUE);
+
+   __storage = size;
+   __set = true;
+}
+
+void
+_cl_kernel::local_argument::bind(exec_context &ctx) {
+   size_t offset = ctx.input.size();
+   size_t ptr = ctx.mem_local;
+
+   ctx.input.resize(offset + sizeof(uint32_t));
+   *(uint32_t *)&ctx.input[offset] = ptr;
+
+   ctx.mem_local += __storage;
+}
+
+void
+_cl_kernel::local_argument::unbind(exec_context &ctx) {
+}
+
+_cl_kernel::constant_argument::constant_argument() :
+   argument(sizeof(uint32_t)) {
+}
+
+void
+_cl_kernel::constant_argument::set(size_t size, const void *value) {
+   if (size != sizeof(cl_mem))
+      throw error(CL_INVALID_ARG_SIZE);
+
+   obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
+   __set = true;
+}
+
+void
+_cl_kernel::constant_argument::bind(exec_context &ctx) {
+   size_t offset = ctx.input.size();
+   size_t idx = ctx.resources.size();
+
+   ctx.input.resize(offset + sizeof(uint32_t));
+   *(uint32_t *)&ctx.input[offset] = idx << 24;
+
+   ctx.resources.resize(idx + 1);
+   ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, false);
+}
+
+void
+_cl_kernel::constant_argument::unbind(exec_context &ctx) {
+   obj->resource(ctx.q).unbind_surface(*ctx.q, st);
+}
+
+_cl_kernel::image_rd_argument::image_rd_argument() :
+   argument(sizeof(uint32_t)) {
+}
+
+void
+_cl_kernel::image_rd_argument::set(size_t size, const void *value) {
+   if (size != sizeof(cl_mem))
+      throw error(CL_INVALID_ARG_SIZE);
+
+   obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
+   __set = true;
+}
+
+void
+_cl_kernel::image_rd_argument::bind(exec_context &ctx) {
+   size_t offset = ctx.input.size();
+   size_t idx = ctx.sviews.size();
+
+   ctx.input.resize(offset + sizeof(uint32_t));
+   *(uint32_t *)&ctx.input[offset] = idx;
+
+   ctx.sviews.resize(idx + 1);
+   ctx.sviews[idx] = st = obj->resource(ctx.q).bind_sampler_view(*ctx.q);
+}
+
+void
+_cl_kernel::image_rd_argument::unbind(exec_context &ctx) {
+   obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st);
+}
+
+_cl_kernel::image_wr_argument::image_wr_argument() :
+   argument(sizeof(uint32_t)) {
+}
+
+void
+_cl_kernel::image_wr_argument::set(size_t size, const void *value) {
+   if (size != sizeof(cl_mem))
+      throw error(CL_INVALID_ARG_SIZE);
+
+   obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
+   __set = true;
+}
+
+void
+_cl_kernel::image_wr_argument::bind(exec_context &ctx) {
+   size_t offset = ctx.input.size();
+   size_t idx = ctx.resources.size();
+
+   ctx.input.resize(offset + sizeof(uint32_t));
+   *(uint32_t *)&ctx.input[offset] = idx;
+
+   ctx.resources.resize(idx + 1);
+   ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, true);
+}
+
+void
+_cl_kernel::image_wr_argument::unbind(exec_context &ctx) {
+   obj->resource(ctx.q).unbind_surface(*ctx.q, st);
+}
+
+_cl_kernel::sampler_argument::sampler_argument() :
+   argument(0) {
+}
+
+void
+_cl_kernel::sampler_argument::set(size_t size, const void *value) {
+   if (size != sizeof(cl_sampler))
+      throw error(CL_INVALID_ARG_SIZE);
+
+   obj = *(cl_sampler *)value;
+   __set = true;
+}
+
+void
+_cl_kernel::sampler_argument::bind(exec_context &ctx) {
+   size_t idx = ctx.samplers.size();
+
+   ctx.samplers.resize(idx + 1);
+   ctx.samplers[idx] = st = obj->bind(*ctx.q);
+}
+
+void
+_cl_kernel::sampler_argument::unbind(exec_context &ctx) {
+   obj->unbind(*ctx.q, st);
+}
diff --git a/src/gallium/state_trackers/clover/core/kernel.hpp b/src/gallium/state_trackers/clover/core/kernel.hpp

new file mode 100644 (file)

index 0000000..bc21de8
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/kernel.hpp
@@ -0,0 +1,214 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_KERNEL_HPP__
+#define __CORE_KERNEL_HPP__
+
+#include <memory>
+
+#include "core/base.hpp"
+#include "core/program.hpp"
+#include "core/memory.hpp"
+#include "core/sampler.hpp"
+#include "pipe/p_state.h"
+
+namespace clover {
+   typedef struct _cl_kernel kernel;
+   class argument;
+}
+
+struct _cl_kernel : public clover::ref_counter {
+private:
+   ///
+   /// Class containing all the state required to execute a compute
+   /// kernel.
+   ///
+   struct exec_context {
+      exec_context(clover::kernel &kern);
+      ~exec_context();
+
+      void *bind(clover::command_queue *q);
+      void unbind();
+
+      clover::kernel &kern;
+      clover::command_queue *q;
+
+      std::vector<uint8_t> input;
+      std::vector<void *> samplers;
+      std::vector<pipe_sampler_view *> sviews;
+      std::vector<pipe_surface *> resources;
+      std::vector<pipe_resource *> g_buffers;
+      std::vector<size_t> g_handles;
+      size_t mem_local;
+
+   private:
+      void *st;
+      pipe_compute_state cs;
+   };
+
+public:
+   class argument {
+   public:
+      argument(size_t size);
+
+      /// \a true if the argument has been set.
+      bool set() const;
+
+      /// Argument size in the input buffer.
+      size_t size() const;
+
+      /// Storage space required for the referenced object.
+      virtual size_t storage() const;
+
+      /// Set this argument to some object.
+      virtual void set(size_t size, const void *value) = 0;
+
+      /// Allocate the necessary resources to bind the specified
+      /// object to this argument, and update \a ctx accordingly.
+      virtual void bind(exec_context &ctx) = 0;
+
+      /// Free any resources that were allocated in bind().
+      virtual void unbind(exec_context &ctx) = 0;
+
+   protected:
+      size_t __size;
+      bool __set;
+   };
+
+   _cl_kernel(clover::program &prog,
+              const std::string &name,
+              const std::vector<clover::module::argument> &args);
+
+   void launch(clover::command_queue &q,
+               const std::vector<size_t> &grid_offset,
+               const std::vector<size_t> &grid_size,
+               const std::vector<size_t> &block_size);
+
+   size_t mem_local() const;
+   size_t mem_private() const;
+   size_t max_block_size() const;
+
+   const std::string &name() const;
+   std::vector<size_t> block_size() const;
+
+   clover::program &prog;
+   std::vector<std::unique_ptr<argument>> args;
+
+private:
+   const clover::module &
+   module(const clover::command_queue &q) const;
+
+   class scalar_argument : public argument {
+   public:
+      scalar_argument(size_t size);
+
+      virtual void set(size_t size, const void *value);
+      virtual void bind(exec_context &ctx);
+      virtual void unbind(exec_context &ctx);
+
+   private:
+      std::vector<uint8_t> v;
+   };
+
+   class global_argument : public argument {
+   public:
+      global_argument(size_t size);
+
+      virtual void set(size_t size, const void *value);
+      virtual void bind(exec_context &ctx);
+      virtual void unbind(exec_context &ctx);
+
+   private:
+      clover::buffer *obj;
+   };
+
+   class local_argument : public argument {
+   public:
+      local_argument();
+
+      virtual size_t storage() const;
+
+      virtual void set(size_t size, const void *value);
+      virtual void bind(exec_context &ctx);
+      virtual void unbind(exec_context &ctx);
+
+   private:
+      size_t __storage;
+   };
+
+   class constant_argument : public argument {
+   public:
+      constant_argument();
+
+      virtual void set(size_t size, const void *value);
+      virtual void bind(exec_context &ctx);
+      virtual void unbind(exec_context &ctx);
+
+   private:
+      clover::buffer *obj;
+      pipe_surface *st;
+   };
+
+   class image_rd_argument : public argument {
+   public:
+      image_rd_argument();
+
+      virtual void set(size_t size, const void *value);
+      virtual void bind(exec_context &ctx);
+      virtual void unbind(exec_context &ctx);
+
+   private:
+      clover::image *obj;
+      pipe_sampler_view *st;
+   };
+
+   class image_wr_argument : public argument {
+   public:
+      image_wr_argument();
+
+      virtual void set(size_t size, const void *value);
+      virtual void bind(exec_context &ctx);
+      virtual void unbind(exec_context &ctx);
+
+   private:
+      clover::image *obj;
+      pipe_surface *st;
+   };
+
+   class sampler_argument : public argument {
+   public:
+      sampler_argument();
+
+      virtual void set(size_t size, const void *value);
+      virtual void bind(exec_context &ctx);
+      virtual void unbind(exec_context &ctx);
+
+   private:
+      clover::sampler *obj;
+      void *st;
+   };
+
+   std::string __name;
+   exec_context exec;
+};
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/memory.cpp b/src/gallium/state_trackers/clover/core/memory.cpp

new file mode 100644 (file)

index 0000000..8d8be2e
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/memory.cpp
@@ -0,0 +1,199 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "core/memory.hpp"
+#include "core/resource.hpp"
+
+using namespace clover;
+
+_cl_mem::_cl_mem(clover::context &ctx, cl_mem_flags flags,
+                 size_t size, void *host_ptr) :
+   ctx(ctx), __flags(flags),
+   __size(size), __host_ptr(host_ptr),
+   __destroy_notify([]{}) {
+   if (flags & CL_MEM_COPY_HOST_PTR)
+      data.append((char *)host_ptr, size);
+}
+
+_cl_mem::~_cl_mem() {
+   __destroy_notify();
+}
+
+void
+_cl_mem::destroy_notify(std::function<void ()> f) {
+   __destroy_notify = f;
+}
+
+cl_mem_flags
+_cl_mem::flags() const {
+   return __flags;
+}
+
+size_t
+_cl_mem::size() const {
+   return __size;
+}
+
+void *
+_cl_mem::host_ptr() const {
+   return __host_ptr;
+}
+
+buffer::buffer(clover::context &ctx, cl_mem_flags flags,
+               size_t size, void *host_ptr) :
+   memory_obj(ctx, flags, size, host_ptr) {
+}
+
+cl_mem_object_type
+buffer::type() const {
+   return CL_MEM_OBJECT_BUFFER;
+}
+
+root_buffer::root_buffer(clover::context &ctx, cl_mem_flags flags,
+                         size_t size, void *host_ptr) :
+   buffer(ctx, flags, size, host_ptr) {
+}
+
+clover::resource &
+root_buffer::resource(cl_command_queue q) {
+   // Create a new resource if there's none for this device yet.
+   if (!resources.count(&q->dev)) {
+      auto r = (!resources.empty() ?
+                new root_resource(q->dev, *this, *resources.begin()->second) :
+                new root_resource(q->dev, *this, *q, data));
+
+      resources.insert(std::make_pair(&q->dev,
+                                      std::unique_ptr<root_resource>(r)));
+      data.clear();
+   }
+
+   return *resources.find(&q->dev)->second;
+}
+
+sub_buffer::sub_buffer(clover::root_buffer &parent, cl_mem_flags flags,
+                       size_t offset, size_t size) :
+   buffer(parent.ctx, flags, size,
+          (char *)parent.host_ptr() + offset),
+   parent(parent), __offset(offset) {
+}
+
+clover::resource &
+sub_buffer::resource(cl_command_queue q) {
+   // Create a new resource if there's none for this device yet.
+   if (!resources.count(&q->dev)) {
+      auto r = new sub_resource(parent.resource(q), { offset() });
+
+      resources.insert(std::make_pair(&q->dev,
+                                      std::unique_ptr<sub_resource>(r)));
+   }
+
+   return *resources.find(&q->dev)->second;
+}
+
+size_t
+sub_buffer::offset() const {
+   return __offset;
+}
+
+image::image(clover::context &ctx, cl_mem_flags flags,
+             const cl_image_format *format,
+             size_t width, size_t height, size_t depth,
+             size_t row_pitch, size_t slice_pitch, size_t size,
+             void *host_ptr) :
+   memory_obj(ctx, flags, size, host_ptr),
+   __format(*format), __width(width), __height(height), __depth(depth),
+   __row_pitch(row_pitch), __slice_pitch(slice_pitch) {
+}
+
+clover::resource &
+image::resource(cl_command_queue q) {
+   // Create a new resource if there's none for this device yet.
+   if (!resources.count(&q->dev)) {
+      auto r = (!resources.empty() ?
+                new root_resource(q->dev, *this, *resources.begin()->second) :
+                new root_resource(q->dev, *this, *q, data));
+
+      resources.insert(std::make_pair(&q->dev,
+                                      std::unique_ptr<root_resource>(r)));
+      data.clear();
+   }
+
+   return *resources.find(&q->dev)->second;
+}
+
+cl_image_format
+image::format() const {
+   return __format;
+}
+
+size_t
+image::width() const {
+   return __width;
+}
+
+size_t
+image::height() const {
+   return __height;
+}
+
+size_t
+image::depth() const {
+   return __depth;
+}
+
+size_t
+image::row_pitch() const {
+   return __row_pitch;
+}
+
+size_t
+image::slice_pitch() const {
+   return __slice_pitch;
+}
+
+image2d::image2d(clover::context &ctx, cl_mem_flags flags,
+                 const cl_image_format *format, size_t width,
+                 size_t height, size_t row_pitch,
+                 void *host_ptr) :
+   image(ctx, flags, format, width, height, 0,
+         row_pitch, 0, height * row_pitch, host_ptr) {
+}
+
+cl_mem_object_type
+image2d::type() const {
+   return CL_MEM_OBJECT_IMAGE2D;
+}
+
+image3d::image3d(clover::context &ctx, cl_mem_flags flags,
+                 const cl_image_format *format,
+                 size_t width, size_t height, size_t depth,
+                 size_t row_pitch, size_t slice_pitch,
+                 void *host_ptr) :
+   image(ctx, flags, format, width, height, depth,
+         row_pitch, slice_pitch, depth * slice_pitch,
+         host_ptr) {
+}
+
+cl_mem_object_type
+image3d::type() const {
+   return CL_MEM_OBJECT_IMAGE3D;
+}
diff --git a/src/gallium/state_trackers/clover/core/memory.hpp b/src/gallium/state_trackers/clover/core/memory.hpp

new file mode 100644 (file)

index 0000000..96f70e9
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/memory.hpp
@@ -0,0 +1,157 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_MEMORY_HPP__
+#define __CORE_MEMORY_HPP__
+
+#include <functional>
+#include <map>
+#include <memory>
+
+#include "core/base.hpp"
+#include "core/queue.hpp"
+
+namespace clover {
+   typedef struct _cl_mem memory_obj;
+
+   class resource;
+   class sub_resource;
+}
+
+struct _cl_mem : public clover::ref_counter {
+protected:
+   _cl_mem(clover::context &ctx, cl_mem_flags flags,
+           size_t size, void *host_ptr);
+   _cl_mem(const _cl_mem &obj) = delete;
+
+public:
+   virtual ~_cl_mem();
+
+   virtual cl_mem_object_type type() const = 0;
+   virtual clover::resource &resource(cl_command_queue q) = 0;
+
+   void destroy_notify(std::function<void ()> f);
+   cl_mem_flags flags() const;
+   size_t size() const;
+   void *host_ptr() const;
+
+   clover::context &ctx;
+
+private:
+   cl_mem_flags __flags;
+   size_t __size;
+   void *__host_ptr;
+   std::function<void ()> __destroy_notify;
+
+protected:
+   std::string data;
+};
+
+namespace clover {
+   struct buffer : public memory_obj {
+   protected:
+      buffer(clover::context &ctx, cl_mem_flags flags,
+             size_t size, void *host_ptr);
+
+   public:
+      virtual cl_mem_object_type type() const;
+   };
+
+   struct root_buffer : public buffer {
+   public:
+      root_buffer(clover::context &ctx, cl_mem_flags flags,
+                  size_t size, void *host_ptr);
+
+      virtual clover::resource &resource(cl_command_queue q);
+
+   private:
+      std::map<clover::device *,
+               std::unique_ptr<clover::root_resource>> resources;
+   };
+
+   struct sub_buffer : public buffer {
+   public:
+      sub_buffer(clover::root_buffer &parent, cl_mem_flags flags,
+                 size_t offset, size_t size);
+
+      virtual clover::resource &resource(cl_command_queue q);
+      size_t offset() const;
+
+      clover::root_buffer &parent;
+
+   private:
+      size_t __offset;
+      std::map<clover::device *,
+               std::unique_ptr<clover::sub_resource>> resources;
+   };
+
+   struct image : public memory_obj {
+   protected:
+      image(clover::context &ctx, cl_mem_flags flags,
+            const cl_image_format *format,
+            size_t width, size_t height, size_t depth,
+            size_t row_pitch, size_t slice_pitch, size_t size,
+            void *host_ptr);
+
+   public:
+      virtual clover::resource &resource(cl_command_queue q);
+      cl_image_format format() const;
+      size_t width() const;
+      size_t height() const;
+      size_t depth() const;
+      size_t row_pitch() const;
+      size_t slice_pitch() const;
+
+   private:
+      cl_image_format __format;
+      size_t __width;
+      size_t __height;
+      size_t __depth;
+      size_t __row_pitch;
+      size_t __slice_pitch;
+      std::map<clover::device *,
+               std::unique_ptr<clover::root_resource>> resources;
+   };
+
+   struct image2d : public image {
+   public:
+      image2d(clover::context &ctx, cl_mem_flags flags,
+              const cl_image_format *format, size_t width,
+              size_t height, size_t row_pitch,
+              void *host_ptr);
+
+      virtual cl_mem_object_type type() const;
+   };
+
+   struct image3d : public image {
+   public:
+      image3d(clover::context &ctx, cl_mem_flags flags,
+              const cl_image_format *format,
+              size_t width, size_t height, size_t depth,
+              size_t row_pitch, size_t slice_pitch,
+              void *host_ptr);
+
+      virtual cl_mem_object_type type() const;
+   };
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/module.cpp b/src/gallium/state_trackers/clover/core/module.cpp

new file mode 100644 (file)

index 0000000..1865771
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/module.cpp
@@ -0,0 +1,172 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include <type_traits>
+#include <algorithm>
+
+#include "core/module.hpp"
+
+using namespace clover;
+
+namespace {
+   template<typename T, typename = void>
+   struct __serializer;
+
+   /// Serialize the specified object.
+   template<typename T>
+   void
+   __proc(compat::ostream &os, const T &x) {
+      __serializer<T>::proc(os, x);
+   }
+
+   /// Deserialize the specified object.
+   template<typename T>
+   void
+   __proc(compat::istream &is, T &x) {
+      __serializer<T>::proc(is, x);
+   }
+
+   template<typename T>
+   T
+   __proc(compat::istream &is) {
+      T x;
+      __serializer<T>::proc(is, x);
+      return x;
+   }
+
+   /// (De)serialize a scalar value.
+   template<typename T>
+   struct __serializer<T, typename std::enable_if<
+                             std::is_scalar<T>::value>::type> {
+      static void
+      proc(compat::ostream &os, const T &x) {
+         os.write(reinterpret_cast<const char *>(&x), sizeof(x));
+      }
+
+      static void
+      proc(compat::istream &is, T &x) {
+         is.read(reinterpret_cast<char *>(&x), sizeof(x));
+      }
+   };
+
+   /// (De)serialize a vector.
+   template<typename T>
+   struct __serializer<compat::vector<T>> {
+      static void
+      proc(compat::ostream &os, const compat::vector<T> &v) {
+         __proc<uint32_t>(os, v.size());
+
+         for (size_t i = 0; i < v.size(); i++)
+            __proc<T>(os, v[i]);
+      }
+
+      static void
+      proc(compat::istream &is, compat::vector<T> &v) {
+         v.reserve(__proc<uint32_t>(is));
+
+         for (size_t i = 0; i < v.size(); i++)
+            new(&v[i]) T(__proc<T>(is));
+      }
+   };
+
+   /// (De)serialize a module::section.
+   template<>
+   struct __serializer<module::section> {
+      template<typename S, typename QT>
+      static void
+      proc(S &s, QT &x) {
+         __proc(s, x.type);
+         __proc(s, x.size);
+         __proc(s, x.data);
+      }
+   };
+
+   /// (De)serialize a module::argument.
+   template<>
+   struct __serializer<module::argument> {
+      template<typename S, typename QT>
+      static void
+      proc(S &s, QT &x) {
+         __proc(s, x.type);
+         __proc(s, x.size);
+      }
+   };
+
+   /// (De)serialize a module::symbol.
+   template<>
+   struct __serializer<module::symbol> {
+      template<typename S, typename QT>
+      static void
+      proc(S &s, QT &x) {
+         __proc(s, x.section);
+         __proc(s, x.offset);
+         __proc(s, x.args);
+      }
+   };
+
+   /// (De)serialize a module.
+   template<>
+   struct __serializer<module> {
+      template<typename S, typename QT>
+      static void
+      proc(S &s, QT &x) {
+         __proc(s, x.syms);
+         __proc(s, x.secs);
+      }
+   };
+};
+
+namespace clover {
+   void
+   module::serialize(compat::ostream &os) const {
+      __proc(os, *this);
+   }
+
+   module
+   module::deserialize(compat::istream &is) {
+      return __proc<module>(is);
+   }
+
+   const module::symbol &
+   module::sym(compat::string name) const {
+      auto it = std::find_if(syms.begin(), syms.end(), [&](const symbol &x) {
+            return compat::string(x.name) == name;
+         });
+
+      if (it == syms.end())
+         throw noent_error();
+
+      return *it;
+   }
+
+   const module::section &
+   module::sec(typename section::type type) const {
+      auto it = std::find_if(secs.begin(), secs.end(), [&](const section &x) {
+            return x.type == type;
+         });
+
+      if (it == secs.end())
+         throw noent_error();
+
+      return *it;
+   }
+}
diff --git a/src/gallium/state_trackers/clover/core/module.hpp b/src/gallium/state_trackers/clover/core/module.hpp

new file mode 100644 (file)

index 0000000..bc4b203
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/module.hpp
@@ -0,0 +1,93 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_MODULE_HPP__
+#define __CORE_MODULE_HPP__
+
+#include "core/compat.hpp"
+
+namespace clover {
+   struct module {
+      class noent_error {
+      public:
+         virtual ~noent_error() {}
+      };
+
+      typedef uint32_t resource_id;
+      typedef uint32_t size_t;
+
+      struct section {
+         enum type {
+            text,
+            data_constant,
+            data_global,
+            data_local,
+            data_private
+         };
+
+         resource_id id;
+         type type;
+         size_t size;
+         clover::compat::vector<char> data;
+      };
+
+      struct argument {
+         enum type {
+            scalar,
+            constant,
+            global,
+            local,
+            image2d_rd,
+            image2d_wr,
+            image3d_rd,
+            image3d_wr,
+            sampler
+         };
+
+         type type;
+         size_t size;
+      };
+
+      struct symbol {
+         clover::compat::vector<char> name;
+         resource_id section;
+         size_t offset;
+         clover::compat::vector<argument> args;
+      };
+
+      void serialize(compat::ostream &os) const;
+      static module deserialize(compat::istream &is);
+
+      /// Look up a symbol by name.  Throws module::noent_error if not
+      /// found.
+      const symbol &sym(compat::string name) const;
+
+      /// Look up a section by type.  Throws module::noent_error if not
+      /// found.
+      const section &sec(typename section::type type) const;
+
+      clover::compat::vector<symbol> syms;
+      clover::compat::vector<section> secs;
+   };
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp

new file mode 100644 (file)

index 0000000..5ac9f93
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -0,0 +1,85 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "core/program.hpp"
+#include "core/compiler.hpp"
+
+using namespace clover;
+
+_cl_program::_cl_program(clover::context &ctx,
+                         const std::string &source) :
+   ctx(ctx), __source(source) {
+}
+
+_cl_program::_cl_program(clover::context &ctx,
+                         const std::vector<clover::device *> &devs,
+                         const std::vector<clover::module> &binaries) :
+   ctx(ctx) {
+   for_each([&](clover::device *dev, const clover::module &bin) {
+         __binaries.insert({ dev, bin });
+      },
+      devs.begin(), devs.end(), binaries.begin());
+}
+
+void
+_cl_program::build(const std::vector<clover::device *> &devs) {
+   __binaries.clear();
+   __logs.clear();
+
+   for (auto dev : devs) {
+      try {
+         auto module = (dev->ir_target() == "tgsi" ?
+                        compile_program_tgsi(__source, dev->ir_target()) :
+                        compile_program_llvm(__source, dev->ir_target()));
+         __binaries.insert({ dev, module });
+
+      } catch (build_error &e) {
+         __logs.insert({ dev, e.what() });
+         throw error(CL_BUILD_PROGRAM_FAILURE);
+      }
+   }
+}
+
+const std::string &
+_cl_program::source() const {
+   return __source;
+}
+
+const std::map<clover::device *, clover::module> &
+_cl_program::binaries() const {
+   return __binaries;
+}
+
+cl_build_status
+_cl_program::build_status(clover::device *dev) const {
+   return __binaries.count(dev) ? CL_BUILD_SUCCESS : CL_BUILD_NONE;
+}
+
+std::string
+_cl_program::build_opts(clover::device *dev) const {
+   return {};
+}
+
+std::string
+_cl_program::build_log(clover::device *dev) const {
+   return __logs.count(dev) ? __logs.find(dev)->second : "";
+}
diff --git a/src/gallium/state_trackers/clover/core/program.hpp b/src/gallium/state_trackers/clover/core/program.hpp

new file mode 100644 (file)

index 0000000..f3858f6
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/program.hpp
@@ -0,0 +1,61 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_PROGRAM_HPP__
+#define __CORE_PROGRAM_HPP__
+
+#include <map>
+
+#include "core/base.hpp"
+#include "core/context.hpp"
+#include "core/module.hpp"
+
+namespace clover {
+   typedef struct _cl_program program;
+}
+
+struct _cl_program : public clover::ref_counter {
+public:
+   _cl_program(clover::context &ctx,
+               const std::string &source);
+   _cl_program(clover::context &ctx,
+               const std::vector<clover::device *> &devs,
+               const std::vector<clover::module> &binaries);
+
+   void build(const std::vector<clover::device *> &devs);
+
+   const std::string &source() const;
+   const std::map<clover::device *, clover::module> &binaries() const;
+
+   cl_build_status build_status(clover::device *dev) const;
+   std::string build_opts(clover::device *dev) const;
+   std::string build_log(clover::device *dev) const;
+
+   clover::context &ctx;
+
+private:
+   std::map<clover::device *, clover::module> __binaries;
+   std::map<clover::device *, std::string> __logs;
+   std::string __source;
+};
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/queue.cpp b/src/gallium/state_trackers/clover/core/queue.cpp

new file mode 100644 (file)

index 0000000..7e476c7
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/queue.cpp
@@ -0,0 +1,69 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include <algorithm>
+
+#include "core/queue.hpp"
+#include "core/event.hpp"
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+
+using namespace clover;
+
+_cl_command_queue::_cl_command_queue(context &ctx, device &dev,
+                                     cl_command_queue_properties props) :
+   ctx(ctx), dev(dev), __props(props) {
+   pipe = dev.pipe->context_create(dev.pipe, NULL);
+   if (!pipe)
+      throw error(CL_INVALID_DEVICE);
+}
+
+_cl_command_queue::~_cl_command_queue() {
+   pipe->destroy(pipe);
+}
+
+void
+_cl_command_queue::flush() {
+   pipe_screen *screen = dev.pipe;
+   pipe_fence_handle *fence = NULL;
+
+   if (!queued_events.empty()) {
+      // Find out which events have already been signalled.
+      auto first = queued_events.begin();
+      auto last = std::find_if(queued_events.begin(), queued_events.end(),
+                               [](event_ptr &ev) { return !ev->signalled(); });
+
+      // Flush and fence them.
+      pipe->flush(pipe, &fence);
+      std::for_each(first, last, [&](event_ptr &ev) { ev->fence(fence); });
+      screen->fence_reference(screen, &fence, NULL);
+      queued_events.erase(first, last);
+   }
+}
+
+void
+_cl_command_queue::sequence(clover::hard_event *ev) {
+   if (!queued_events.empty())
+      queued_events.back()->chain(ev);
+
+   queued_events.push_back(ev);
+}
diff --git a/src/gallium/state_trackers/clover/core/queue.hpp b/src/gallium/state_trackers/clover/core/queue.hpp

new file mode 100644 (file)

index 0000000..6c124ea
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/queue.hpp
@@ -0,0 +1,72 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_QUEUE_HPP__
+#define __CORE_QUEUE_HPP__
+
+#include "core/base.hpp"
+#include "core/context.hpp"
+#include "pipe/p_context.h"
+
+namespace clover {
+   typedef struct _cl_command_queue command_queue;
+   class resource;
+   class mapping;
+   class hard_event;
+}
+
+struct _cl_command_queue : public clover::ref_counter {
+public:
+   _cl_command_queue(clover::context &ctx, clover::device &dev,
+                     cl_command_queue_properties props);
+   _cl_command_queue(const _cl_command_queue &q) = delete;
+   ~_cl_command_queue();
+
+   void flush();
+
+   cl_command_queue_properties props() const {
+      return __props;
+   }
+
+   clover::context &ctx;
+   clover::device &dev;
+
+   friend class clover::resource;
+   friend class clover::root_resource;
+   friend class clover::mapping;
+   friend class clover::hard_event;
+   friend struct _cl_sampler;
+   friend struct _cl_kernel;
+
+private:
+   /// Serialize a hardware event with respect to the previous ones,
+   /// and push it to the pending list.
+   void sequence(clover::hard_event *ev);
+
+   cl_command_queue_properties __props;
+   pipe_context *pipe;
+
+   typedef clover::ref_ptr<clover::hard_event> event_ptr;
+   std::vector<event_ptr> queued_events;
+};
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/resource.cpp b/src/gallium/state_trackers/clover/core/resource.cpp

new file mode 100644 (file)

index 0000000..61085b2
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/resource.cpp
@@ -0,0 +1,203 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "core/resource.hpp"
+#include "pipe/p_screen.h"
+#include "util/u_sampler.h"
+#include "util/u_format.h"
+
+using namespace clover;
+
+namespace {
+   class box {
+   public:
+      box(const resource::point &origin, const resource::point &size) :
+         pipe({ (unsigned)origin[0], (unsigned)origin[1],
+                (unsigned)origin[2], (unsigned)size[0],
+                (unsigned)size[1], (unsigned)size[2] }) {
+      }
+
+      operator const pipe_box *() {
+         return &pipe;
+      }
+
+   protected:
+      pipe_box pipe;
+   };
+}
+
+resource::resource(clover::device &dev, clover::memory_obj &obj) :
+   dev(dev), obj(obj), pipe(NULL), offset{0} {
+}
+
+resource::~resource() {
+}
+
+void
+resource::copy(command_queue &q, const point &origin, const point &region,
+               resource &src_res, const point &src_origin) {
+   point p = offset + origin;
+
+   q.pipe->resource_copy_region(q.pipe, pipe, 0, p[0], p[1], p[2],
+                                src_res.pipe, 0,
+                                box(src_res.offset + src_origin, region));
+}
+
+void *
+resource::add_map(command_queue &q, cl_map_flags flags, bool blocking,
+                  const point &origin, const point &region) {
+   maps.emplace_back(q, *this, flags, blocking, origin, region);
+   return maps.back();
+}
+
+void
+resource::del_map(void *p) {
+   auto it = std::find(maps.begin(), maps.end(), p);
+   if (it != maps.end())
+      maps.erase(it);
+}
+
+unsigned
+resource::map_count() const {
+   return maps.size();
+}
+
+pipe_sampler_view *
+resource::bind_sampler_view(clover::command_queue &q) {
+   pipe_sampler_view info;
+
+   u_sampler_view_default_template(&info, pipe, pipe->format);
+   return q.pipe->create_sampler_view(q.pipe, pipe, &info);
+}
+
+void
+resource::unbind_sampler_view(clover::command_queue &q,
+                              pipe_sampler_view *st) {
+   q.pipe->sampler_view_destroy(q.pipe, st);
+}
+
+pipe_surface *
+resource::bind_surface(clover::command_queue &q, bool rw) {
+   pipe_surface info {};
+
+   info.format = pipe->format;
+   info.usage = pipe->bind;
+   info.writable = rw;
+
+   if (pipe->target == PIPE_BUFFER)
+      info.u.buf.last_element = pipe->width0 - 1;
+
+   return q.pipe->create_surface(q.pipe, pipe, &info);
+}
+
+void
+resource::unbind_surface(clover::command_queue &q, pipe_surface *st) {
+   q.pipe->surface_destroy(q.pipe, st);
+}
+
+root_resource::root_resource(clover::device &dev, clover::memory_obj &obj,
+                             clover::command_queue &q,
+                             const std::string &data) :
+   resource(dev, obj) {
+   pipe_resource info {};
+
+   if (image *img = dynamic_cast<image *>(&obj)) {
+      info.format = translate_format(img->format());
+      info.width0 = img->width();
+      info.height0 = img->height();
+      info.depth0 = img->depth();
+   } else {
+      info.width0 = obj.size();
+      info.height0 = 1;
+      info.depth0 = 1;
+   }
+
+   info.target = translate_target(obj.type());
+   info.bind = (PIPE_BIND_SAMPLER_VIEW |
+                PIPE_BIND_COMPUTE_RESOURCE |
+                PIPE_BIND_GLOBAL |
+                PIPE_BIND_TRANSFER_READ |
+                PIPE_BIND_TRANSFER_WRITE);
+
+   pipe = dev.pipe->resource_create(dev.pipe, &info);
+   if (!pipe)
+      throw error(CL_OUT_OF_RESOURCES);
+
+   if (!data.empty()) {
+      box rect { { 0, 0, 0 }, { info.width0, info.height0, info.depth0 } };
+      unsigned cpp = util_format_get_blocksize(info.format);
+
+      q.pipe->transfer_inline_write(q.pipe, pipe, 0, PIPE_TRANSFER_WRITE,
+                                    rect, data.data(), cpp * info.width0,
+                                    cpp * info.width0 * info.height0);
+   }
+}
+
+root_resource::root_resource(clover::device &dev, clover::memory_obj &obj,
+                             clover::root_resource &r) :
+   resource(dev, obj) {
+   assert(0); // XXX -- resource shared among dev and r.dev
+}
+
+root_resource::~root_resource() {
+   dev.pipe->resource_destroy(dev.pipe, pipe);
+}
+
+sub_resource::sub_resource(clover::resource &r, point offset) :
+   resource(r.dev, r.obj) {
+   pipe = r.pipe;
+   offset = r.offset + offset;
+}
+
+mapping::mapping(command_queue &q, resource &r,
+                 cl_map_flags flags, bool blocking,
+                 const resource::point &origin,
+                 const resource::point &region) :
+   pctx(q.pipe) {
+   unsigned usage = ((flags & CL_MAP_WRITE ? PIPE_TRANSFER_WRITE : 0 ) |
+                     (flags & CL_MAP_READ ? PIPE_TRANSFER_READ : 0 ) |
+                     (blocking ? PIPE_TRANSFER_UNSYNCHRONIZED : 0));
+
+   pxfer = pctx->get_transfer(pctx, r.pipe, 0, usage,
+                              box(origin + r.offset, region));
+   if (!pxfer)
+      throw error(CL_OUT_OF_RESOURCES);
+
+   p = pctx->transfer_map(pctx, pxfer);
+   if (!p) {
+      pctx->transfer_destroy(pctx, pxfer);
+      throw error(CL_OUT_OF_RESOURCES);
+   }
+}
+
+mapping::mapping(mapping &&m) :
+   pctx(m.pctx), pxfer(m.pxfer), p(m.p) {
+   m.p = NULL;
+   m.pxfer = NULL;
+}
+
+mapping::~mapping() {
+   if (pxfer) {
+      pctx->transfer_unmap(pctx, pxfer);
+      pctx->transfer_destroy(pctx, pxfer);
+   }
+}
diff --git a/src/gallium/state_trackers/clover/core/resource.hpp b/src/gallium/state_trackers/clover/core/resource.hpp

new file mode 100644 (file)

index 0000000..9470601
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/resource.hpp
@@ -0,0 +1,129 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_RESOURCE_HPP__
+#define __CORE_RESOURCE_HPP__
+
+#include <list>
+
+#include "core/base.hpp"
+#include "core/memory.hpp"
+#include "core/geometry.hpp"
+#include "pipe/p_state.h"
+
+namespace clover {
+   class mapping;
+
+   ///
+   /// Class that represents a device-specific instance of some memory
+   /// object.
+   ///
+   class resource {
+   public:
+      typedef clover::point<size_t, 3> point;
+
+      resource(const resource &r) = delete;
+      virtual ~resource();
+
+      void copy(command_queue &q, const point &origin, const point &region,
+                resource &src_resource, const point &src_origin);
+
+      void *add_map(command_queue &q, cl_map_flags flags, bool blocking,
+                    const point &origin, const point &region);
+      void del_map(void *p);
+      unsigned map_count() const;
+
+      clover::device &dev;
+      clover::memory_obj &obj;
+
+      friend class sub_resource;
+      friend class mapping;
+      friend struct ::_cl_kernel;
+
+   protected:
+      resource(clover::device &dev, clover::memory_obj &obj);
+
+      pipe_sampler_view *bind_sampler_view(clover::command_queue &q);
+      void unbind_sampler_view(clover::command_queue &q,
+                               pipe_sampler_view *st);
+
+      pipe_surface *bind_surface(clover::command_queue &q, bool rw);
+      void unbind_surface(clover::command_queue &q, pipe_surface *st);
+
+      pipe_resource *pipe;
+      point offset;
+
+   private:
+      std::list<mapping> maps;
+   };
+
+   ///
+   /// Resource associated with its own top-level data storage
+   /// allocated in some device.
+   ///
+   class root_resource : public resource {
+   public:
+      root_resource(clover::device &dev, clover::memory_obj &obj,
+                    clover::command_queue &q, const std::string &data);
+      root_resource(clover::device &dev, clover::memory_obj &obj,
+                    root_resource &r);
+      virtual ~root_resource();
+   };
+
+   ///
+   /// Resource that reuses a portion of some other resource as data
+   /// storage.
+   ///
+   class sub_resource : public resource {
+   public:
+      sub_resource(clover::resource &r, point offset);
+   };
+
+   ///
+   /// Class that represents a mapping of some resource into the CPU
+   /// memory space.
+   ///
+   class mapping {
+   public:
+      mapping(command_queue &q, resource &r, cl_map_flags flags,
+              bool blocking, const resource::point &origin,
+              const resource::point &region);
+      mapping(const mapping &m) = delete;
+      mapping(mapping &&m);
+      ~mapping();
+
+      operator void *() {
+         return p;
+      }
+
+      operator char *() {
+         return (char *)p;
+      }
+
+   private:
+      pipe_context *pctx;
+      pipe_transfer *pxfer;
+      void *p;
+   };
+}
+
+#endif
diff --git a/src/gallium/state_trackers/clover/core/sampler.cpp b/src/gallium/state_trackers/clover/core/sampler.cpp

new file mode 100644 (file)

index 0000000..6d683f2
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/sampler.cpp
@@ -0,0 +1,73 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "core/sampler.hpp"
+#include "pipe/p_state.h"
+
+using namespace clover;
+
+_cl_sampler::_cl_sampler(clover::context &ctx, bool norm_mode,
+                         cl_addressing_mode addr_mode,
+                         cl_filter_mode filter_mode) :
+   ctx(ctx), __norm_mode(norm_mode),
+   __addr_mode(addr_mode), __filter_mode(filter_mode) {
+}
+
+bool
+_cl_sampler::norm_mode() {
+   return __norm_mode;
+}
+
+cl_addressing_mode
+_cl_sampler::addr_mode() {
+   return __addr_mode;
+}
+
+cl_filter_mode
+_cl_sampler::filter_mode() {
+   return __filter_mode;
+}
+
+void *
+_cl_sampler::bind(clover::command_queue &q) {
+   struct pipe_sampler_state info {};
+
+   info.normalized_coords = norm_mode();
+
+   info.wrap_s = info.wrap_t = info.wrap_r =
+      (addr_mode() == CL_ADDRESS_CLAMP_TO_EDGE ? PIPE_TEX_WRAP_CLAMP_TO_EDGE :
+       addr_mode() == CL_ADDRESS_CLAMP ? PIPE_TEX_WRAP_CLAMP_TO_BORDER :
+       addr_mode() == CL_ADDRESS_REPEAT ? PIPE_TEX_WRAP_REPEAT :
+       addr_mode() == CL_ADDRESS_MIRRORED_REPEAT ? PIPE_TEX_WRAP_MIRROR_REPEAT :
+       PIPE_TEX_WRAP_CLAMP_TO_EDGE);
+
+   info.min_img_filter = info.mag_img_filter =
+      (filter_mode() == CL_FILTER_LINEAR ? PIPE_TEX_FILTER_LINEAR :
+       PIPE_TEX_FILTER_NEAREST);
+
+   return q.pipe->create_sampler_state(q.pipe, &info);
+}
+
+void
+_cl_sampler::unbind(clover::command_queue &q, void *st) {
+   q.pipe->delete_sampler_state(q.pipe, st);
+}
diff --git a/src/gallium/state_trackers/clover/core/sampler.hpp b/src/gallium/state_trackers/clover/core/sampler.hpp

new file mode 100644 (file)

index 0000000..5bb5bcc
--- /dev/null
+++ b/src/gallium/state_trackers/clover/core/sampler.hpp
@@ -0,0 +1,55 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#ifndef __CORE_SAMPLER_HPP__
+#define __CORE_SAMPLER_HPP__
+
+#include "core/base.hpp"
+#include "core/queue.hpp"
+
+namespace clover {
+   typedef struct _cl_sampler sampler;
+}
+
+struct _cl_sampler : public clover::ref_counter {
+public:
+   _cl_sampler(clover::context &ctx, bool norm_mode,
+               cl_addressing_mode addr_mode, cl_filter_mode filter_mode);
+
+   bool norm_mode();
+   cl_addressing_mode addr_mode();
+   cl_filter_mode filter_mode();
+
+   clover::context &ctx;
+
+   friend class _cl_kernel;
+
+private:
+   void *bind(clover::command_queue &q);
+   void unbind(clover::command_queue &q, void *st);
+
+   bool __norm_mode;
+   cl_addressing_mode __addr_mode;
+   cl_filter_mode __filter_mode;
+};
+
+#endif
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp

new file mode 100644 (file)

index 0000000..89e21bf
--- /dev/null
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -0,0 +1,94 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include "core/compiler.hpp"
+
+#if 0
+#include <clang/Frontend/CompilerInstance.h>
+#include <clang/Frontend/TextDiagnosticPrinter.h>
+#include <clang/CodeGen/CodeGenAction.h>
+#include <llvm/LLVMContext.h>
+#include <llvm/Support/TargetSelect.h>
+#include <llvm/Support/MemoryBuffer.h>
+
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <cstdio>
+#endif
+
+using namespace clover;
+
+#if 0
+namespace {
+   void
+   build_binary(const std::string &source, const std::string &target,
+                const std::string &name) {
+      clang::CompilerInstance c;
+      clang::EmitObjAction act(&llvm::getGlobalContext());
+      std::string log;
+      llvm::raw_string_ostream s_log(log);
+
+      LLVMInitializeTGSITarget();
+      LLVMInitializeTGSITargetInfo();
+      LLVMInitializeTGSITargetMC();
+      LLVMInitializeTGSIAsmPrinter();
+
+      c.getFrontendOpts().Inputs.push_back(
+         std::make_pair(clang::IK_OpenCL, name));
+      c.getHeaderSearchOpts().UseBuiltinIncludes = false;
+      c.getHeaderSearchOpts().UseStandardIncludes = false;
+      c.getLangOpts().NoBuiltin = true;
+      c.getTargetOpts().Triple = target;
+      c.getInvocation().setLangDefaults(clang::IK_OpenCL);
+      c.createDiagnostics(0, NULL, new clang::TextDiagnosticPrinter(
+                             s_log, c.getDiagnosticOpts()));
+
+      c.getPreprocessorOpts().addRemappedFile(
+         name, llvm::MemoryBuffer::getMemBuffer(source));
+
+      if (!c.ExecuteAction(act))
+         throw build_error(log);
+   }
+
+   module
+   load_binary(const char *name) {
+      std::ifstream fs((name));
+      std::vector<unsigned char> str((std::istreambuf_iterator<char>(fs)),
+                                     (std::istreambuf_iterator<char>()));
+      compat::istream cs(str);
+      return module::deserialize(cs);
+   }
+}
+#endif
+
+module
+clover::compile_program_llvm(const compat::string &source,
+                             const compat::string &target) {
+#if 0
+   build_binary(source, target, "cl_input");
+   module m = load_binary("cl_input.o");
+   std::remove("cl_input.o");
+   return m;
+#endif
+   return module();
+}
diff --git a/src/gallium/state_trackers/clover/tgsi/compiler.cpp b/src/gallium/state_trackers/clover/tgsi/compiler.cpp

new file mode 100644 (file)

index 0000000..eb27db1
--- /dev/null
+++ b/src/gallium/state_trackers/clover/tgsi/compiler.cpp
@@ -0,0 +1,100 @@
+//
+// Copyright 2012 Francisco Jerez
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+
+#include <sstream>
+
+#include "core/compiler.hpp"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+#include "util/u_memory.h"
+
+using namespace clover;
+
+namespace {
+   void
+   read_header(const std::string &header, module &m) {
+      std::istringstream ls(header);
+      std::string line;
+
+      while (getline(ls, line)) {
+         std::istringstream ts(line);
+         std::string name, tok;
+         module::size_t offset;
+         compat::vector<module::argument> args;
+
+         if (!(ts >> name))
+            continue;
+
+         if (!(ts >> offset))
+            throw build_error("invalid kernel start address");
+
+         while (ts >> tok) {
+            if (tok == "scalar")
+               args.push_back({ module::argument::scalar, 4 });
+            else if (tok == "global")
+               args.push_back({ module::argument::global, 4 });
+            else if (tok == "local")
+               args.push_back({ module::argument::local, 4 });
+            else if (tok == "constant")
+               args.push_back({ module::argument::constant, 4 });
+            else if (tok == "image2d_rd")
+               args.push_back({ module::argument::image2d_rd, 4 });
+            else if (tok == "image2d_wr")
+               args.push_back({ module::argument::image2d_wr, 4 });
+            else if (tok == "image3d_rd")
+               args.push_back({ module::argument::image3d_rd, 4 });
+            else if (tok == "image3d_wr")
+               args.push_back({ module::argument::image3d_wr, 4 });
+            else if (tok == "sampler")
+               args.push_back({ module::argument::sampler, 0 });
+            else
+               throw build_error("invalid kernel argument");
+         }
+
+         m.syms.push_back({ name, 0, offset, args });
+      }
+   }
+
+   void
+   read_body(const char *source, module &m) {
+      tgsi_token prog[1024];
+
+      if (!tgsi_text_translate(source, prog, Elements(prog)))
+         throw build_error("translate failed");
+
+      unsigned sz = tgsi_num_tokens(prog) * sizeof(tgsi_token);
+      m.secs.push_back({ 0, module::section::text, sz, { (char *)prog, sz } });
+   }
+}
+
+module
+clover::compile_program_tgsi(const compat::string &source,
+                             const compat::string &target) {
+   const char *body = source.find("COMP\n");
+   module m;
+
+   read_header({ source.begin(), body }, m);
+   read_body(body, m);
+
+   return m;
+}
diff --git a/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp

index 392fd3e0921583a248fa4e15cb3d0c596ab6cee5..aaa46f19e8c9287edf997d77fedbca55383304d0 100644 (file)
--- a/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp
+++ b/src/gallium/state_trackers/d3d1x/gd3d1x/sm4_to_tgsi.cpp
@@ -446,7 +446,7 @@ struct sm4_to_tgsi_converter
                                 break;
                         case SM4_OPCODE_RESINFO:
                                 // TODO: return type
-                               ureg_RESINFO(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
+                               ureg_SVIEWINFO(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]);
                                 break;
                         // TODO: sample index, texture offset
                         case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg
@@ -750,11 +750,12 @@ next:;
                                 }
                                 if(resources.size() <= (unsigned)idx)
                                         resources.resize(idx + 1);
-                               resources[idx] = ureg_DECL_resource(ureg, idx, targets[idx].first,
-                                                                   res_return_type(dcl.rrt.x),
-                                                                   res_return_type(dcl.rrt.y),
-                                                                   res_return_type(dcl.rrt.z),
-                                                                   res_return_type(dcl.rrt.w));
+                               resources[idx] = ureg_DECL_sampler_view(
+                                   ureg, idx, targets[idx].first,
+                                   res_return_type(dcl.rrt.x),
+                                   res_return_type(dcl.rrt.y),
+                                   res_return_type(dcl.rrt.z),
+                                   res_return_type(dcl.rrt.w));
                                 break;
                         case SM4_OPCODE_DCL_SAMPLER:
                                 check(idx >= 0);
diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c

index b45ab5c4f2e86b5b3375616e870d9060a8ca250b..b17a8ce78c9a4ddaa795e7873d4603d916298286 100644 (file)
--- a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c
+++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c
@@ -41,6 +41,7 @@
   *  - no pixmap support
   */
  
+#include <errno.h>
  #include <sys/ioctl.h>
  #include <sys/types.h>
  #include <sys/stat.h>
diff --git a/src/gallium/state_trackers/gbm/gbm_drm.c b/src/gallium/state_trackers/gbm/gbm_drm.c

index d4baf87096a76233d55b524b0f7d1d3f13fed7e7..a327fdaae64f871aac552ee27cb6994de29dde66 100644 (file)
--- a/src/gallium/state_trackers/gbm/gbm_drm.c
+++ b/src/gallium/state_trackers/gbm/gbm_drm.c
@@ -190,10 +190,7 @@ gbm_gallium_drm_destroy(struct gbm_device *gbm)
  {
     struct gbm_gallium_drm_device *gdrm = gbm_gallium_drm_device(gbm);
  
-   gdrm->screen->destroy(gdrm->screen);
-
-   FREE(gdrm->base.driver_name);
-
+   gallium_screen_destroy(gdrm);
     FREE(gdrm);
  }
  
diff --git a/src/gallium/state_trackers/gbm/gbm_gallium_drmint.h b/src/gallium/state_trackers/gbm/gbm_gallium_drmint.h

index 6277b8dba2eae3ce32e80b408e3076c7bab14853..a5d6d834737b28dff45e3000921901f05de13d17 100644 (file)
--- a/src/gallium/state_trackers/gbm/gbm_gallium_drmint.h
+++ b/src/gallium/state_trackers/gbm/gbm_gallium_drmint.h
@@ -71,4 +71,7 @@ gbm_gallium_drm_device_create(int fd);
  int
  gallium_screen_create(struct gbm_gallium_drm_device *gdrm);
  
+void
+gallium_screen_destroy(struct gbm_gallium_drm_device *gdrm);
+
  #endif
diff --git a/src/gallium/targets/gbm/Makefile b/src/gallium/targets/gbm/Makefile

index cd0c61080d80f5e793e487a6cf4779b24db27db4..423debf176dd54ba5c41f6c73f51b35c4723678e 100644 (file)
--- a/src/gallium/targets/gbm/Makefile
+++ b/src/gallium/targets/gbm/Makefile
@@ -3,192 +3,39 @@
  TOP = ../../../..
  include $(TOP)/configs/current
  
-PIPE_PREFIX := pipe_
-
  GBM_BACKEND = gbm_gallium_drm
-GBM_SOURCES = gbm.c pipe_loader.c
+GBM_SOURCES = gbm.c
  
  GBM_INCLUDES = \
                -I$(TOP)/include \
                -I$(TOP)/src/gallium/state_trackers/gbm \
                -I$(TOP)/src/gbm/main \
                -I$(TOP)/src/gallium/auxiliary \
-              -I$(TOP)/src/gallium/include \
+              -I$(TOP)/src/gallium/winsys \
+              -I$(TOP)/src/gallium/include
  
  GBM_LIBS = $(LIBUDEV_LIBS) $(LIBDRM_LIB) -lm \
            $(TOP)/src/gallium/state_trackers/gbm/libgbm.a \
-          $(TOP)/src/gallium/drivers/identity/libidentity.a \
-          $(TOP)/src/gallium/drivers/galahad/libgalahad.a \
-          $(TOP)/src/gallium/drivers/trace/libtrace.a \
-          $(TOP)/src/gallium/drivers/rbug/librbug.a \
-          $(GALLIUM_AUXILIARIES)
-
+          $(GALLIUM_PIPE_LOADER_LIBS) $(GALLIUM_AUXILIARIES)
  
  GBM_CFLAGS = \
-            -DGBM_BACKEND_SEARCH_DIR=\"$(INSTALL_LIB_DIR)/gbm\" \
-            -DPIPE_PREFIX=\"$(PIPE_PREFIX)\" \
+            -DPIPE_SEARCH_DIR=\"$(PIPE_INSTALL_DIR)\" \
+             $(GALLIUM_PIPE_LOADER_DEFINES) \
              $(LIBUDEV_CFLAGS) \
-            $(LIBDRM_CFLAGS)  
-
-
-pipe_INCLUDES = \
-       -I$(TOP)/include \
-       -I$(TOP)/src/gallium/auxiliary \
-       -I$(TOP)/src/gallium/drivers \
-       -I$(TOP)/src/gallium/include \
-       -I$(TOP)/src/gallium/winsys
-
-pipe_LIBS = \
-       $(TOP)/src/gallium/drivers/identity/libidentity.a \
-       $(TOP)/src/gallium/drivers/trace/libtrace.a \
-       $(TOP)/src/gallium/drivers/rbug/librbug.a \
-       $(GALLIUM_AUXILIARIES)
-
-# as if we are DRI modules
-pipe_SYS = $(DRI_LIB_DEPS)
-
-pipe_CLFLAGS = \
-       -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD \
-       $(LIBDRM_CFLAGS)
-
-pipe_LDFLAGS = -Wl,--no-undefined
-
-# i915 pipe driver
-i915_LIBS = \
-       $(TOP)/src/gallium/winsys/i915/drm/libi915drm.a \
-       $(TOP)/src/gallium/drivers/i915/libi915.a
-i915_SYS = -ldrm_intel
-
-# nouveau pipe driver
-nouveau_LIBS = \
-       $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \
-       $(TOP)/src/gallium/drivers/nv30/libnv30.a \
-       $(TOP)/src/gallium/drivers/nv50/libnv50.a \
-       $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \
-       $(TOP)/src/gallium/drivers/nouveau/libnouveau.a
-nouveau_SYS = -ldrm_nouveau
-
-# r300 pipe driver
-r300_LIBS = \
-       $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
-       $(TOP)/src/gallium/drivers/r300/libr300.a
-r300_SYS += -ldrm_radeon
-
-# r600 pipe driver
-r600_LIBS = \
-       $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
-       $(TOP)/src/gallium/drivers/r600/libr600.a
-r600_SYS += -ldrm_radeon
-
-# radeonsi pipe driver
-radeonsi_LIBS = \
-       $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
-       $(TOP)/src/gallium/drivers/radeonsi/libradeonsi.a
-radeonsi_SYS += -ldrm_radeon
-
-# vmwgfx pipe driver
-vmwgfx_LIBS = \
-       $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \
-       $(TOP)/src/gallium/drivers/svga/libsvga.a
-
-
+            $(LIBDRM_CFLAGS)
  
-# LLVM
-ifeq ($(MESA_LLVM),1)
-pipe_SYS += $(LLVM_LIBS)
-pipe_LDFLAGS += $(LLVM_LDFLAGS)
-endif
-
-ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),)
-pipe_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
-endif
-
-# determine the targets/sources
-_pipe_TARGETS_CC =
-_pipe_TARGETS_CXX =
-pipe_SOURCES =
-
-ifneq ($(findstring i915/drm,$(GALLIUM_WINSYS_DIRS)),)
-_pipe_TARGETS_CC += $(PIPE_PREFIX)i915.so
-pipe_SOURCES += pipe_i915.c
-endif
-
-ifneq ($(findstring nouveau/drm,$(GALLIUM_WINSYS_DIRS)),)
-_pipe_TARGETS_CXX += $(PIPE_PREFIX)nouveau.so
-pipe_SOURCES += pipe_nouveau.c
-endif
-
-ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),)
-ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),)
-_pipe_TARGETS_CC += $(PIPE_PREFIX)r300.so
-pipe_SOURCES += pipe_r300.c
-endif
-endif
-
-ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),)
-ifneq ($(findstring r600,$(GALLIUM_DRIVERS_DIRS)),)
-_pipe_TARGETS_CC += $(PIPE_PREFIX)r600.so
-pipe_SOURCES += pipe_r600.c
-endif
-endif
-
-ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),)
-ifneq ($(findstring radeonsi,$(GALLIUM_DRIVERS_DIRS)),)
-_pipe_TARGETS_CC += $(PIPE_PREFIX)radeonsi.so
-pipe_SOURCES += pipe_radeonsi.c
-endif
-endif
-
-ifneq ($(findstring svga/drm,$(GALLIUM_WINSYS_DIRS)),)
-_pipe_TARGETS_CC += $(PIPE_PREFIX)vmwgfx.so
-pipe_SOURCES += pipe_vmwgfx.c
-endif
-
-pipe_OBJECTS = $(pipe_SOURCES:.c=.o)
-
-ifeq ($(MESA_LLVM),1)
-pipe_TARGETS_CXX = $(_pipe_TARGETS_CXX) $(_pipe_TARGETS_CC)
-pipe_TARGETS_CC =
-else
-pipe_TARGETS_CXX = $(_pipe_TARGETS_CXX)
-pipe_TARGETS_CC = $(_pipe_TARGETS_CC)
-endif
-
-GBM_EXTRA_TARGETS = $(addprefix $(TOP)/$(LIB_DIR)/gbm/, $(pipe_TARGETS_CC)) $(addprefix $(TOP)/$(LIB_DIR)/gbm/, $(pipe_TARGETS_CXX))
+GBM_EXTRA_TARGETS = pipes
  GBM_EXTRA_INSTALL = install-pipes
  GBM_EXTRA_CLEAN = clean-pipes
-GBM_EXTRA_SOURCES = $(pipe_SOURCES)
  
  include $(TOP)/src/gbm/backends/Makefile.template
  
+PIPE_SRC_DIR = $(TOP)/src/gallium/targets/pipe-loader
+PIPE_INSTALL_DIR = $(INSTALL_LIB_DIR)/gbm
  
-$(GBM_EXTRA_TARGETS): $(TOP)/$(LIB_DIR)/gbm/%: %
-       @$(INSTALL) -d $(dir $@)
-       $(INSTALL) $< $(dir $@)
-
-$(pipe_TARGETS_CC): $(PIPE_PREFIX)%.so: pipe_%.o $(pipe_LIBS) $($*_LIBS)
-       $(MKLIB) -o $@ -noprefix -linker '$(CC)' \
-               -ldflags '-L$(TOP)/$(LIB_DIR) $(pipe_LDFLAGS) $(LDFLAGS)' \
-               $(MKLIB_OPTIONS) $< \
-               -Wl,--start-group $(pipe_LIBS) $($*_LIBS) -Wl,--end-group \
-               $(pipe_SYS) $($*_SYS)
-
-$(pipe_TARGETS_CXX): $(PIPE_PREFIX)%.so: pipe_%.o $(pipe_LIBS) $($*_LIBS)
-       $(MKLIB) -o $@ -noprefix -linker '$(CXX)' \
-               -ldflags '-L$(TOP)/$(LIB_DIR) $(pipe_LDFLAGS) $(LDFLAGS)' \
-               $(MKLIB_OPTIONS) $< \
-               -Wl,--start-group $(pipe_LIBS) $($*_LIBS) -Wl,--end-group \
-               $(pipe_SYS) $($*_SYS)
-
-$(pipe_OBJECTS): %.o: %.c
-       $(CC) -c -o $@ $< $(pipe_INCLUDES) $(pipe_CFLAGS) $(CFLAGS)
-
-install-pipes: $(GBM_EXTRA_TARGETS)
-       $(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/gbm
-       for tgt in $(GBM_EXTRA_TARGETS); do \
-               $(MINSTALL) "$$tgt" $(DESTDIR)$(INSTALL_LIB_DIR)/gbm; \
-       done
-
+pipes:
+       @$(MAKE) -C $(PIPE_SRC_DIR)
+install-pipes:
+       @$(MAKE) -C $(PIPE_SRC_DIR) PIPE_INSTALL_DIR=$(PIPE_INSTALL_DIR) install
  clean-pipes:
-       rm -f $(pipe_TARGETS)
-       rm -f $(pipe_OBJECTS)
+       @$(MAKE) -C $(PIPE_SRC_DIR) clean
diff --git a/src/gallium/targets/gbm/gbm.c b/src/gallium/targets/gbm/gbm.c

index e840fc5fa1ad4f0b188899b2779600c5ac7187cb..7d2af513db82f9a60a5b6c0d2d809fe5e2b7ff00 100644 (file)
--- a/src/gallium/targets/gbm/gbm.c
+++ b/src/gallium/targets/gbm/gbm.c
@@ -25,36 +25,56 @@
   *    Benjamin Franzke <benjaminfranzke@googlemail.com>
   */
  
-#include "util/u_inlines.h"
-
  #include "gbm_gallium_drmint.h"
-#include "pipe_loader.h"
  
-static struct pipe_screen *
-create_drm_screen(const char *name, int fd)
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "pipe-loader/pipe_loader.h"
+
+static const char *
+get_library_search_path(void)
  {
-   struct pipe_module *pmod = get_pipe_module(name);
- 
-   return (pmod && pmod->drmdd && pmod->drmdd->create_screen) ?
-      pmod->drmdd->create_screen(fd) : NULL;
+   const char *search_path = NULL;
+
+   /* don't allow setuid apps to use GBM_BACKENDS_PATH */
+   if (geteuid() == getuid())
+      search_path = getenv("GBM_BACKENDS_PATH");
+   if (search_path == NULL)
+      search_path = PIPE_SEARCH_DIR;
+
+   return search_path;
  }
  
  int
  gallium_screen_create(struct gbm_gallium_drm_device *gdrm)
  {
-   gdrm->base.driver_name = drm_fd_get_screen_name(gdrm->base.base.fd);
-   if (gdrm->base.driver_name == NULL)
+   struct pipe_loader_device *dev;
+   int ret;
+
+   ret = pipe_loader_drm_probe_fd(&dev, gdrm->base.base.fd);
+   if (!ret)
        return -1;
  
-   gdrm->screen = create_drm_screen(gdrm->base.driver_name, gdrm->base.base.fd);
+   gdrm->screen = pipe_loader_create_screen(dev, get_library_search_path());
     if (gdrm->screen == NULL) {
        debug_printf("failed to load driver: %s\n", gdrm->base.driver_name);
+      pipe_loader_release(&dev, 1);
        return -1;
     };
  
+   gdrm->driver = dev;
+   gdrm->base.driver_name = strdup(dev->driver_name);
     return 0;
  }
  
+void
+gallium_screen_destroy(struct gbm_gallium_drm_device *gdrm)
+{
+   FREE(gdrm->base.driver_name);
+   gdrm->screen->destroy(gdrm->screen);
+   pipe_loader_release((struct pipe_loader_device **)&gdrm->driver, 1);
+}
+
  GBM_EXPORT struct gbm_backend gbm_backend = {
     .backend_name = "gallium_drm",
     .create_device = gbm_gallium_drm_device_create,
diff --git a/src/gallium/targets/gbm/pipe_i915.c b/src/gallium/targets/gbm/pipe_i915.c

deleted file mode 100644 (file)

index 85662cb..0000000
--- a/src/gallium/targets/gbm/pipe_i915.c
+++ /dev/null
@@ -1,27 +0,0 @@
-
-#include "target-helpers/inline_debug_helper.h"
-#include "state_tracker/drm_driver.h"
-#include "i915/drm/i915_drm_public.h"
-#include "i915/i915_public.h"
-
-static struct pipe_screen *
-create_screen(int fd)
-{
-   struct i915_winsys *iws;
-   struct pipe_screen *screen;
-
-   iws = i915_drm_winsys_create(fd);
-   if (!iws)
-      return NULL;
-
-   screen = i915_screen_create(iws);
-   if (!screen)
-      return NULL;
-
-   screen = debug_screen_wrap(screen);
-
-   return screen;
-}
-
-PUBLIC
-DRM_DRIVER_DESCRIPTOR("i915", "i915", create_screen, NULL)
diff --git a/src/gallium/targets/gbm/pipe_loader.c b/src/gallium/targets/gbm/pipe_loader.c

deleted file mode 100644 (file)

index 6200541..0000000
--- a/src/gallium/targets/gbm/pipe_loader.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Kristian Høgsberg <krh@bitplanet.net>
- *    Benjamin Franzke <benjaminfranzke@googlemail.com>
- */
-
-#include <stdio.h>
-#include "util/u_string.h"
-#include "util/u_memory.h"
-
-#include <libudev.h>
-
-#include "gbm_gallium_drmint.h"
-#include "pipe_loader.h"
-#define DRIVER_MAP_GALLIUM_ONLY
-#include "pci_ids/pci_id_driver_map.h"
-
-static struct pipe_module pipe_modules[16];
-
-static INLINE char *
-loader_strdup(const char *str)
-{
-   return mem_dup(str, strlen(str) + 1);
-}
-
-char *
-drm_fd_get_screen_name(int fd)
-{
-   struct udev *udev;
-   struct udev_device *device, *parent;
-   const char *pci_id;
-   char *driver = NULL;
-   int vendor_id, chip_id, i, j;
-
-   udev = udev_new();
-   device = _gbm_udev_device_new_from_fd(udev, fd);
-   if (device == NULL)
-      return NULL;
-
-   parent = udev_device_get_parent(device);
-   if (parent == NULL) {
-      fprintf(stderr, "gbm: could not get parent device");
-      goto out;
-   }
-
-   pci_id = udev_device_get_property_value(parent, "PCI_ID");
-   if (pci_id == NULL ||
-       sscanf(pci_id, "%x:%x", &vendor_id, &chip_id) != 2) {
-      fprintf(stderr, "gbm: malformed or no PCI ID");
-      goto out;
-   }
-
-   for (i = 0; driver_map[i].driver; i++) {
-      if (vendor_id != driver_map[i].vendor_id)
-         continue;
-      if (driver_map[i].num_chips_ids == -1) {
-         driver = loader_strdup(driver_map[i].driver);
-         _gbm_log("pci id for %d: %04x:%04x, driver %s",
-                  fd, vendor_id, chip_id, driver);
-         goto out;
-      }
-
-      for (j = 0; j < driver_map[i].num_chips_ids; j++)
-         if (driver_map[i].chip_ids[j] == chip_id) {
-            driver = loader_strdup(driver_map[i].driver);
-            _gbm_log("pci id for %d: %04x:%04x, driver %s",
-                     fd, vendor_id, chip_id, driver);
-            goto out;
-         }
-   }
-
-out:
-   udev_device_unref(device);
-   udev_unref(udev);
-
-   return driver;
-}
-
-static void
-find_pipe_module(struct pipe_module *pmod, const char *name)
-{
-   char *search_paths, *end, *next, *p;
-   char path[PATH_MAX];
-   int ret;
-   
-   search_paths = NULL;
-   if (geteuid() == getuid()) {
-      /* don't allow setuid apps to use GBM_BACKENDS_PATH */
-      search_paths = getenv("GBM_BACKENDS_PATH");
-   }
-   if (search_paths == NULL)
-      search_paths = GBM_BACKEND_SEARCH_DIR;
-
-   end = search_paths + strlen(search_paths);
-   for (p = search_paths; p < end && pmod->lib == NULL; p = next + 1) {
-      int len;
-      next = strchr(p, ':');
-      if (next == NULL)
-         next = end;
-
-      len = next - p;
-
-      if (len) {
-         ret = util_snprintf(path, sizeof(path),
-                             "%.*s/" PIPE_PREFIX "%s" UTIL_DL_EXT, len, p, pmod->name);
-      }
-      else {
-         ret = util_snprintf(path, sizeof(path),
-                             PIPE_PREFIX "%s" UTIL_DL_EXT, pmod->name);
-      }
-      if (ret > 0 && ret < sizeof(path)) {
-         pmod->lib = util_dl_open(path);
-         debug_printf("loaded %s\n", path);
-      }
-
-   }
-}
-
-static boolean
-load_pipe_module(struct pipe_module *pmod, const char *name)
-{
-   pmod->name = loader_strdup(name);
-   if (!pmod->name)
-      return FALSE;
-
-   find_pipe_module(pmod, name);
-
-   if (pmod->lib) {
-      pmod->drmdd = (const struct drm_driver_descriptor *)
-         util_dl_get_proc_address(pmod->lib, "driver_descriptor");
-
-      /* sanity check on the name */
-      if (pmod->drmdd && strcmp(pmod->drmdd->name, pmod->name) != 0)
-         pmod->drmdd = NULL;
-
-      if (!pmod->drmdd) {
-         util_dl_close(pmod->lib);
-         pmod->lib = NULL;
-      }
-   }
-
-   return (pmod->drmdd != NULL);
-}
-
-struct pipe_module *
-get_pipe_module(const char *name)
-{
-   struct pipe_module *pmod = NULL;
-   int i;
-
-   if (!name)
-      return NULL;
-
-   for (i = 0; i < Elements(pipe_modules); i++) {
-      if (!pipe_modules[i].initialized ||
-          strcmp(pipe_modules[i].name, name) == 0) {
-         pmod = &pipe_modules[i];
-         break;
-      }
-   }
-   if (!pmod)
-      return NULL;
-
-   if (!pmod->initialized) {
-      load_pipe_module(pmod, name);
-      pmod->initialized = TRUE;
-   }
-
-   return pmod;
-}
diff --git a/src/gallium/targets/gbm/pipe_loader.h b/src/gallium/targets/gbm/pipe_loader.h

deleted file mode 100644 (file)

index 2e4cd99..0000000
--- a/src/gallium/targets/gbm/pipe_loader.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *    Benjamin Franzke <benjaminfranzke@googlemail.com>
- */
-
-#ifndef _PIPE_LOADER_H_
-#define _PIPE_LOADER_H_
-
-#include "pipe/p_compiler.h"
-#include "util/u_dl.h"
-#include "state_tracker/drm_driver.h"
-
-struct pipe_module {
-   boolean initialized;
-   char *name;
-   struct util_dl_library *lib;
-   const struct drm_driver_descriptor *drmdd;
-};
-
-struct pipe_module *
-get_pipe_module(const char *name);
-
-char *
-drm_fd_get_screen_name(int fd);
-
-#endif
diff --git a/src/gallium/targets/gbm/pipe_nouveau.c b/src/gallium/targets/gbm/pipe_nouveau.c

deleted file mode 100644 (file)

index 65425e8..0000000
--- a/src/gallium/targets/gbm/pipe_nouveau.c
+++ /dev/null
@@ -1,21 +0,0 @@
-
-#include "target-helpers/inline_debug_helper.h"
-#include "state_tracker/drm_driver.h"
-#include "nouveau/drm/nouveau_drm_public.h"
-
-static struct pipe_screen *
-create_screen(int fd)
-{
-   struct pipe_screen *screen;
-
-   screen = nouveau_drm_screen_create(fd);
-   if (!screen)
-      return NULL;
-
-   screen = debug_screen_wrap(screen);
-
-   return screen;
-}
-
-PUBLIC
-DRM_DRIVER_DESCRIPTOR("nouveau", "nouveau", create_screen, NULL)
diff --git a/src/gallium/targets/gbm/pipe_r300.c b/src/gallium/targets/gbm/pipe_r300.c

deleted file mode 100644 (file)

index 0556859..0000000
--- a/src/gallium/targets/gbm/pipe_r300.c
+++ /dev/null
@@ -1,27 +0,0 @@
-
-#include "target-helpers/inline_debug_helper.h"
-#include "state_tracker/drm_driver.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "r300/r300_public.h"
-
-static struct pipe_screen *
-create_screen(int fd)
-{
-   struct radeon_winsys *sws;
-   struct pipe_screen *screen;
-
-   sws = radeon_drm_winsys_create(fd);
-   if (!sws)
-      return NULL;
-
-   screen = r300_screen_create(sws);
-   if (!screen)
-      return NULL;
-
-   screen = debug_screen_wrap(screen);
-
-   return screen;
-}
-
-PUBLIC
-DRM_DRIVER_DESCRIPTOR("r300", "radeon", create_screen, NULL)
diff --git a/src/gallium/targets/gbm/pipe_r600.c b/src/gallium/targets/gbm/pipe_r600.c

deleted file mode 100644 (file)

index 5d89aca..0000000
--- a/src/gallium/targets/gbm/pipe_r600.c
+++ /dev/null
@@ -1,26 +0,0 @@
-#include "state_tracker/drm_driver.h"
-#include "target-helpers/inline_debug_helper.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "r600/r600_public.h"
-
-static struct pipe_screen *
-create_screen(int fd)
-{
-   struct radeon_winsys *rw;
-   struct pipe_screen *screen;
-
-   rw = radeon_drm_winsys_create(fd);
-   if (!rw)
-      return NULL;
-
-   screen = r600_screen_create(rw);
-   if (!screen)
-      return NULL;
-
-   screen = debug_screen_wrap(screen);
-
-   return screen;
-}
-
-PUBLIC
-DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen, NULL)
diff --git a/src/gallium/targets/gbm/pipe_swrast.c b/src/gallium/targets/gbm/pipe_swrast.c

deleted file mode 100644 (file)

index 092abf0..0000000
--- a/src/gallium/targets/gbm/pipe_swrast.c
+++ /dev/null
@@ -1,22 +0,0 @@
-
-#include "target-helpers/inline_sw_helper.h"
-#include "target-helpers/inline_debug_helper.h"
-#include "state_tracker/drm_driver.h"
-
-PUBLIC struct pipe_screen *
-swrast_create_screen(struct sw_winsys *ws);
-
-PUBLIC
-DRM_DRIVER_DESCRIPTOR("swrast", NULL, NULL, NULL)
-
-struct pipe_screen *
-swrast_create_screen(struct sw_winsys *ws)
-{
-   struct pipe_screen *screen;
-
-   screen = sw_screen_create(ws);
-   if (screen)
-      screen = debug_screen_wrap(screen);
-
-   return screen;
-}
diff --git a/src/gallium/targets/gbm/pipe_vmwgfx.c b/src/gallium/targets/gbm/pipe_vmwgfx.c

deleted file mode 100644 (file)

index bfe665b..0000000
--- a/src/gallium/targets/gbm/pipe_vmwgfx.c
+++ /dev/null
@@ -1,27 +0,0 @@
-
-#include "target-helpers/inline_debug_helper.h"
-#include "state_tracker/drm_driver.h"
-#include "svga/drm/svga_drm_public.h"
-#include "svga/svga_public.h"
-
-static struct pipe_screen *
-create_screen(int fd)
-{
-   struct svga_winsys_screen *sws;
-   struct pipe_screen *screen;
-
-   sws = svga_drm_winsys_screen_create(fd);
-   if (!sws)
-      return NULL;
-
-   screen = svga_screen_create(sws);
-   if (!screen)
-      return NULL;
-
-   screen = debug_screen_wrap(screen);
-
-   return screen;
-}
-
-PUBLIC
-DRM_DRIVER_DESCRIPTOR("vmwgfx", "vmwgfx", create_screen, NULL)
diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am

new file mode 100644 (file)

index 0000000..0d233c1
--- /dev/null
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -0,0 +1,36 @@
+AUTOMAKE_OPTIONS = subdir-objects
+
+lib_LTLIBRARIES = libOpenCL.la
+
+libOpenCL_la_LDFLAGS = \
+       -version-number 1:0
+
+libOpenCL_la_LIBADD = \
+       $(top_builddir)/src/gallium/state_trackers/clover/libclover.la \
+       $(top_builddir)/src/gallium/auxiliary/libgallium.a \
+       $(GALLIUM_PIPE_LOADER_LIBS) $(LIBUDEV_LIBS) \
+       -ldl
+
+libOpenCL_la_SOURCES =
+
+# Force usage of a C++ linker
+nodist_EXTRA_libOpenCL_la_SOURCES = dummy.cpp
+
+PIPE_SRC_DIR = $(top_srcdir)/src/gallium/targets/pipe-loader
+
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: libOpenCL.la
+       @$(MAKE) -C $(PIPE_SRC_DIR)
+       $(MKDIR_P) $(top_builddir)/$(LIB_DIR)
+       ln -f .libs/libOpenCL.so* $(top_builddir)/$(LIB_DIR)/
+
+install-exec-local:
+       @$(MAKE) -C $(PIPE_SRC_DIR) PIPE_INSTALL_DIR=$(OPENCL_LIB_INSTALL_DIR) install
+
+clean-local:
+       @$(MAKE) -C $(PIPE_SRC_DIR) clean
+
+# FIXME: Remove when the rest of Gallium is converted to automake.
+TOP=$(top_builddir)
+default: all
diff --git a/src/gallium/targets/pipe-loader/Makefile b/src/gallium/targets/pipe-loader/Makefile

new file mode 100644 (file)

index 0000000..eb3b4fc
--- /dev/null
+++ b/src/gallium/targets/pipe-loader/Makefile
@@ -0,0 +1,165 @@
+# Makefile for building pipe driver shared libraries.
+#
+# Input variables: PIPE_INSTALL_DIR, PIPE_PREFIX (optional)
+#
+TOP = ../../../..
+include $(TOP)/configs/current
+
+PIPE_PREFIX ?= pipe_
+
+PIPE_CPPFLAGS = \
+       -DGALLIUM_RBUG \
+       -DGALLIUM_TRACE \
+       -DGALLIUM_GALAHAD \
+       -I$(TOP)/include \
+       -I$(TOP)/src/gallium/auxiliary \
+       -I$(TOP)/src/gallium/drivers \
+       -I$(TOP)/src/gallium/include \
+       -I$(TOP)/src/gallium/winsys
+
+PIPE_LIBS = \
+       $(TOP)/src/gallium/drivers/identity/libidentity.a \
+       $(TOP)/src/gallium/drivers/galahad/libgalahad.a \
+       $(TOP)/src/gallium/drivers/trace/libtrace.a \
+       $(TOP)/src/gallium/drivers/rbug/librbug.a \
+       $(GALLIUM_AUXILIARIES)
+
+PIPE_SYS = $(LIBDRM_LIB) -lm -lpthread $(DLOPEN_LIBS)
+
+PIPE_CFLAGS = $(LIBDRM_CFLAGS)
+
+PIPE_LDFLAGS = -Wl,--no-undefined
+
+# i915 pipe driver
+i915_LIBS = \
+       $(TOP)/src/gallium/winsys/i915/drm/libi915drm.a \
+       $(TOP)/src/gallium/drivers/i915/libi915.a
+i915_SYS = -ldrm_intel
+
+# nouveau pipe driver
+nouveau_LIBS = \
+       $(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \
+       $(TOP)/src/gallium/drivers/nv30/libnv30.a \
+       $(TOP)/src/gallium/drivers/nv50/libnv50.a \
+       $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \
+       $(TOP)/src/gallium/drivers/nouveau/libnouveau.a
+nouveau_SYS = -ldrm_nouveau
+
+# r300 pipe driver
+r300_LIBS = \
+       $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
+       $(TOP)/src/gallium/drivers/r300/libr300.a
+r300_SYS += -ldrm_radeon
+
+# r600 pipe driver
+r600_LIBS = \
+       $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
+       $(TOP)/src/gallium/drivers/r600/libr600.a
+r600_SYS += -ldrm_radeon
+
+# vmwgfx pipe driver
+vmwgfx_LIBS = \
+       $(TOP)/src/gallium/winsys/svga/drm/libsvgadrm.a \
+       $(TOP)/src/gallium/drivers/svga/libsvga.a
+
+ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),)
+   swrast_LIBS = $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
+   PIPE_CFLAGS += -DGALLIUM_LLVMPIPE
+else ifneq ($(findstring softpipe,$(GALLIUM_DRIVERS_DIRS)),)
+   swrast_LIBS = $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a
+   PIPE_CFLAGS += -DGALLIUM_SOFTPIPE
+endif
+
+# LLVM
+ifeq ($(MESA_LLVM),1)
+   PIPE_SYS += $(LLVM_LIBS)
+   PIPE_LDFLAGS += $(LLVM_LDFLAGS)
+endif
+
+# determine the targets/sources
+_PIPE_TARGETS_CC =
+_PIPE_TARGETS_CXX =
+PIPE_SOURCES =
+
+ifneq ($(findstring i915/drm,$(GALLIUM_WINSYS_DIRS)),)
+   _PIPE_TARGETS_CC += $(PIPE_PREFIX)i915.so
+   PIPE_SOURCES += pipe_i915.c
+endif
+
+ifneq ($(findstring nouveau/drm,$(GALLIUM_WINSYS_DIRS)),)
+   _PIPE_TARGETS_CXX += $(PIPE_PREFIX)nouveau.so
+   PIPE_SOURCES += pipe_nouveau.c
+endif
+
+ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),)
+ifneq ($(findstring r300,$(GALLIUM_DRIVERS_DIRS)),)
+   _PIPE_TARGETS_CC += $(PIPE_PREFIX)r300.so
+   PIPE_SOURCES += pipe_r300.c
+endif
+endif
+
+ifneq ($(findstring radeon/drm,$(GALLIUM_WINSYS_DIRS)),)
+ifneq ($(findstring r600,$(GALLIUM_DRIVERS_DIRS)),)
+   _PIPE_TARGETS_CC += $(PIPE_PREFIX)r600.so
+   PIPE_SOURCES += pipe_r600.c
+endif
+endif
+
+ifneq ($(findstring svga/drm,$(GALLIUM_WINSYS_DIRS)),)
+   _PIPE_TARGETS_CC += $(PIPE_PREFIX)vmwgfx.so
+   PIPE_SOURCES += pipe_vmwgfx.c
+endif
+
+ifneq ($(filter llvmpipe softpipe,$(GALLIUM_DRIVERS_DIRS)),)
+   _PIPE_TARGETS_CC += $(PIPE_PREFIX)swrast.so
+   PIPE_SOURCES += pipe_swrast.c
+endif
+
+PIPE_OBJECTS := $(PIPE_SOURCES:.c=.o)
+
+ifeq ($(MESA_LLVM),1)
+   PIPE_TARGETS_CXX = $(_PIPE_TARGETS_CXX) $(_PIPE_TARGETS_CC)
+   PIPE_TARGETS_CC =
+else
+   PIPE_TARGETS_CXX = $(_PIPE_TARGETS_CXX)
+   PIPE_TARGETS_CC = $(_PIPE_TARGETS_CC)
+endif
+
+PIPE_TARGETS = $(PIPE_TARGETS_CC) $(PIPE_TARGETS_CXX)
+
+default: depend $(PIPE_TARGETS)
+
+.SECONDEXPANSION:
+
+$(PIPE_TARGETS_CC): $(PIPE_PREFIX)%.so: pipe_%.o $(PIPE_LIBS) $$(%_LIBS)
+       $(MKLIB) -o $@ -noprefix -linker '$(CC)' \
+               -ldflags '-L$(TOP)/$(LIB_DIR) $(PIPE_LDFLAGS) $(LDFLAGS)' \
+               $(MKLIB_OPTIONS) $< \
+               -Wl,--start-group $(PIPE_LIBS) $($*_LIBS) -Wl,--end-group \
+               $(PIPE_SYS) $($*_SYS)
+
+$(PIPE_TARGETS_CXX): $(PIPE_PREFIX)%.so: pipe_%.o $(PIPE_LIBS) $$(%_LIBS)
+       $(MKLIB) -o $@ -noprefix -linker '$(CXX)' \
+               -ldflags '-L$(TOP)/$(LIB_DIR) $(PIPE_LDFLAGS) $(LDFLAGS)' \
+               $(MKLIB_OPTIONS) $< \
+               -Wl,--start-group $(PIPE_LIBS) $($*_LIBS) -Wl,--end-group \
+               $(PIPE_SYS) $($*_SYS)
+
+$(PIPE_OBJECTS): %.o: %.c
+       $(CC) -c -o $@ $< $(PIPE_CPPFLAGS) $(PIPE_CFLAGS) $(CFLAGS)
+
+install: $(PIPE_TARGETS)
+       $(INSTALL) -d $(DESTDIR)/$(PIPE_INSTALL_DIR)
+       for tgt in $(PIPE_TARGETS); do \
+               $(MINSTALL) "$$tgt" $(DESTDIR)/$(PIPE_INSTALL_DIR); \
+       done
+
+clean:
+       rm -f $(PIPE_TARGETS) $(PIPE_OBJECTS) depend depend.bak
+
+depend: $(PIPE_SOURCES)
+       rm -f depend
+       touch depend
+       $(MKDEP) $(MKDEP_OPTIONS) $(PIPE_CPPFLAGS) $(PIPE_SOURCES) 2>/dev/null
+
+sinclude depend
diff --git a/src/gallium/targets/pipe-loader/pipe_i915.c b/src/gallium/targets/pipe-loader/pipe_i915.c

new file mode 100644 (file)

index 0000000..85662cb
--- /dev/null
+++ b/src/gallium/targets/pipe-loader/pipe_i915.c
@@ -0,0 +1,27 @@
+
+#include "target-helpers/inline_debug_helper.h"
+#include "state_tracker/drm_driver.h"
+#include "i915/drm/i915_drm_public.h"
+#include "i915/i915_public.h"
+
+static struct pipe_screen *
+create_screen(int fd)
+{
+   struct i915_winsys *iws;
+   struct pipe_screen *screen;
+
+   iws = i915_drm_winsys_create(fd);
+   if (!iws)
+      return NULL;
+
+   screen = i915_screen_create(iws);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+PUBLIC
+DRM_DRIVER_DESCRIPTOR("i915", "i915", create_screen, NULL)
diff --git a/src/gallium/targets/pipe-loader/pipe_nouveau.c b/src/gallium/targets/pipe-loader/pipe_nouveau.c

new file mode 100644 (file)

index 0000000..65425e8
--- /dev/null
+++ b/src/gallium/targets/pipe-loader/pipe_nouveau.c
@@ -0,0 +1,21 @@
+
+#include "target-helpers/inline_debug_helper.h"
+#include "state_tracker/drm_driver.h"
+#include "nouveau/drm/nouveau_drm_public.h"
+
+static struct pipe_screen *
+create_screen(int fd)
+{
+   struct pipe_screen *screen;
+
+   screen = nouveau_drm_screen_create(fd);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+PUBLIC
+DRM_DRIVER_DESCRIPTOR("nouveau", "nouveau", create_screen, NULL)
diff --git a/src/gallium/targets/pipe-loader/pipe_r300.c b/src/gallium/targets/pipe-loader/pipe_r300.c

new file mode 100644 (file)

index 0000000..0556859
--- /dev/null
+++ b/src/gallium/targets/pipe-loader/pipe_r300.c
@@ -0,0 +1,27 @@
+
+#include "target-helpers/inline_debug_helper.h"
+#include "state_tracker/drm_driver.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r300/r300_public.h"
+
+static struct pipe_screen *
+create_screen(int fd)
+{
+   struct radeon_winsys *sws;
+   struct pipe_screen *screen;
+
+   sws = radeon_drm_winsys_create(fd);
+   if (!sws)
+      return NULL;
+
+   screen = r300_screen_create(sws);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+PUBLIC
+DRM_DRIVER_DESCRIPTOR("r300", "radeon", create_screen, NULL)
diff --git a/src/gallium/targets/pipe-loader/pipe_r600.c b/src/gallium/targets/pipe-loader/pipe_r600.c

new file mode 100644 (file)

index 0000000..5d89aca
--- /dev/null
+++ b/src/gallium/targets/pipe-loader/pipe_r600.c
@@ -0,0 +1,26 @@
+#include "state_tracker/drm_driver.h"
+#include "target-helpers/inline_debug_helper.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r600/r600_public.h"
+
+static struct pipe_screen *
+create_screen(int fd)
+{
+   struct radeon_winsys *rw;
+   struct pipe_screen *screen;
+
+   rw = radeon_drm_winsys_create(fd);
+   if (!rw)
+      return NULL;
+
+   screen = r600_screen_create(rw);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+PUBLIC
+DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen, NULL)
diff --git a/src/gallium/targets/pipe-loader/pipe_swrast.c b/src/gallium/targets/pipe-loader/pipe_swrast.c

new file mode 100644 (file)

index 0000000..092abf0
--- /dev/null
+++ b/src/gallium/targets/pipe-loader/pipe_swrast.c
@@ -0,0 +1,22 @@
+
+#include "target-helpers/inline_sw_helper.h"
+#include "target-helpers/inline_debug_helper.h"
+#include "state_tracker/drm_driver.h"
+
+PUBLIC struct pipe_screen *
+swrast_create_screen(struct sw_winsys *ws);
+
+PUBLIC
+DRM_DRIVER_DESCRIPTOR("swrast", NULL, NULL, NULL)
+
+struct pipe_screen *
+swrast_create_screen(struct sw_winsys *ws)
+{
+   struct pipe_screen *screen;
+
+   screen = sw_screen_create(ws);
+   if (screen)
+      screen = debug_screen_wrap(screen);
+
+   return screen;
+}
diff --git a/src/gallium/targets/pipe-loader/pipe_vmwgfx.c b/src/gallium/targets/pipe-loader/pipe_vmwgfx.c

new file mode 100644 (file)

index 0000000..bfe665b
--- /dev/null
+++ b/src/gallium/targets/pipe-loader/pipe_vmwgfx.c
@@ -0,0 +1,27 @@
+
+#include "target-helpers/inline_debug_helper.h"
+#include "state_tracker/drm_driver.h"
+#include "svga/drm/svga_drm_public.h"
+#include "svga/svga_public.h"
+
+static struct pipe_screen *
+create_screen(int fd)
+{
+   struct svga_winsys_screen *sws;
+   struct pipe_screen *screen;
+
+   sws = svga_drm_winsys_screen_create(fd);
+   if (!sws)
+      return NULL;
+
+   screen = svga_screen_create(sws);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+PUBLIC
+DRM_DRIVER_DESCRIPTOR("vmwgfx", "vmwgfx", create_screen, NULL)
diff --git a/src/gallium/tests/trivial/Makefile b/src/gallium/tests/trivial/Makefile

index 4ddbb0b73dcbede4d7242b7b157f692f61f28dc0..8c0320165388fa5aa7cecb154df9539a2c0fc774 100644 (file)
--- a/src/gallium/tests/trivial/Makefile
+++ b/src/gallium/tests/trivial/Makefile
@@ -11,39 +11,39 @@ INCLUDES = \
         -I$(TOP)/src/gallium/winsys \
         $(PROG_INCLUDES)
  
-ifeq ($(MESA_LLVM),1)
-LINKS = $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
-LDFLAGS += $(LLVM_LDFLAGS)
-endif
-
  LINKS += \
-       $(TOP)/src/gallium/drivers/rbug/librbug.a \
-       $(TOP)/src/gallium/drivers/trace/libtrace.a \
-       $(TOP)/src/gallium/drivers/galahad/libgalahad.a \
-       $(TOP)/src/gallium/winsys/sw/null/libws_null.a \
-       $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+       $(GALLIUM_PIPE_LOADER_LIBS) \
         $(GALLIUM_AUXILIARIES) \
-       $(PROG_LINKS)
+       $(PROG_LINKS) $(LIBUDEV_LIBS)
  
  SOURCES = \
         tri.c \
-       quad-tex.c
+       quad-tex.c \
+       compute.c
  
  OBJECTS = $(SOURCES:.c=.o)
  
  PROGS = $(OBJECTS:.o=)
  
-PROG_DEFINES = \
-       -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE -DGALLIUM_GALAHAD
+PROG_DEFINES = -DPIPE_SEARCH_DIR=\"$(PIPE_SRC_DIR)\" \
+               $(GALLIUM_PIPE_LOADER_DEFINES)
+
+PIPE_SRC_DIR = $(TOP)/src/gallium/targets/pipe-loader
  
  ##### TARGETS #####
  
-default: $(PROGS)
+default: $(PROGS) pipes
+
+install:
  
  clean:
         -rm -f $(PROGS)
         -rm -f *.o
         -rm -f result.bmp
+       @$(MAKE) -C $(PIPE_SRC_DIR) clean
+
+pipes:
+       @$(MAKE) -C $(PIPE_SRC_DIR)
  
  ##### RULES #####
  
diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c

new file mode 100644 (file)

index 0000000..1812090
--- /dev/null
+++ b/src/gallium/tests/trivial/compute.c
@@ -0,0 +1,1592 @@
+/*
+ * Copyright (C) 2011 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <inttypes.h>
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_sampler.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_text.h"
+#include "pipe-loader/pipe_loader.h"
+
+#define MAX_RESOURCES 4
+
+struct context {
+        struct pipe_loader_device *dev;
+        struct pipe_screen *screen;
+        struct pipe_context *pipe;
+        void *hwcs;
+        void *hwsmp[MAX_RESOURCES];
+        struct pipe_resource *tex[MAX_RESOURCES];
+        bool tex_rw[MAX_RESOURCES];
+        struct pipe_sampler_view *view[MAX_RESOURCES];
+        struct pipe_surface *surf[MAX_RESOURCES];
+};
+
+#define DUMP_COMPUTE_PARAM(p, c) do {                                   \
+                uint64_t __v[4];                                        \
+                int __i, __n;                                           \
+                                                                        \
+                __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
+                printf("%s: {", #c);                                    \
+                                                                        \
+                for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
+                        printf(" %"PRIu64, __v[__i]);                   \
+                                                                        \
+                printf(" }\n");                                         \
+        } while (0)
+
+static void init_ctx(struct context *ctx)
+{
+        int ret;
+
+        ret = pipe_loader_probe(&ctx->dev, 1);
+        assert(ret);
+
+        ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR);
+        assert(ctx->screen);
+
+        ctx->pipe = ctx->screen->context_create(ctx->screen, NULL);
+        assert(ctx->pipe);
+
+        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION);
+        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
+        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
+}
+
+static void destroy_ctx(struct context *ctx)
+{
+        ctx->pipe->destroy(ctx->pipe);
+        ctx->screen->destroy(ctx->screen);
+        pipe_loader_release(&ctx->dev, 1);
+        FREE(ctx);
+}
+
+static char *
+preprocess_prog(struct context *ctx, const char *src, const char *defs)
+{
+        const char header[] =
+                "#define RGLOBAL        RES[32767]\n"
+                "#define RLOCAL         RES[32766]\n"
+                "#define RPRIVATE       RES[32765]\n"
+                "#define RINPUT         RES[32764]\n";
+        char cmd[512];
+        char tmp[] = "/tmp/test-compute.tgsi-XXXXXX";
+        char *buf;
+        int fd, ret;
+        struct stat st;
+        FILE *p;
+
+        /* Open a temporary file */
+        fd = mkstemp(tmp);
+        assert(fd >= 0);
+        snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s",
+                 defs ? defs : "", tmp);
+
+        /* Preprocess */
+        p = popen(cmd, "w");
+        fwrite(header, strlen(header), 1, p);
+        fwrite(src, strlen(src), 1, p);
+        ret = pclose(p);
+        assert(!ret);
+
+        /* Read back */
+        ret = fstat(fd, &st);
+        assert(!ret);
+
+        buf = malloc(st.st_size + 1);
+        ret = read(fd, buf, st.st_size);
+        assert(ret == st.st_size);
+        buf[ret] = 0;
+
+        /* Clean up */
+        close(fd);
+        unlink(tmp);
+
+        return buf;
+}
+
+static void init_prog(struct context *ctx, unsigned local_sz,
+                      unsigned private_sz, unsigned input_sz,
+                      const char *src, const char *defs)
+{
+        struct pipe_context *pipe = ctx->pipe;
+        struct tgsi_token prog[1024];
+        struct pipe_compute_state cs = {
+                .prog = prog,
+                .req_local_mem = local_sz,
+                .req_private_mem = private_sz,
+                .req_input_mem = input_sz
+        };
+        char *psrc = preprocess_prog(ctx, src, defs);
+        int ret;
+
+        ret = tgsi_text_translate(psrc, prog, Elements(prog));
+        assert(ret);
+        free(psrc);
+
+        ctx->hwcs = pipe->create_compute_state(pipe, &cs);
+        assert(ctx->hwcs);
+
+        pipe->bind_compute_state(pipe, ctx->hwcs);
+}
+
+static void destroy_prog(struct context *ctx)
+{
+        struct pipe_context *pipe = ctx->pipe;
+
+        pipe->delete_compute_state(pipe, ctx->hwcs);
+        ctx->hwcs = NULL;
+}
+
+static void init_tex(struct context *ctx, int slot,
+                     enum pipe_texture_target target, bool rw,
+                     enum pipe_format format, int w, int h,
+                     void (*init)(void *, int, int, int))
+{
+        struct pipe_context *pipe = ctx->pipe;
+        struct pipe_resource **tex = &ctx->tex[slot];
+        struct pipe_resource ttex = {
+                .target = target,
+                .format = format,
+                .width0 = w,
+                .height0 = h,
+                .depth0 = 1,
+                .array_size = 1,
+                .bind = (PIPE_BIND_SAMPLER_VIEW |
+                         PIPE_BIND_COMPUTE_RESOURCE |
+                         PIPE_BIND_GLOBAL)
+        };
+        int dx = util_format_get_blocksize(format);
+        int dy = util_format_get_stride(format, w);
+        int nx = (target == PIPE_BUFFER ? (w / dx) :
+                  util_format_get_nblocksx(format, w));
+        int ny = (target == PIPE_BUFFER ? 1 :
+                  util_format_get_nblocksy(format, h));
+        struct pipe_transfer *xfer;
+        char *map;
+        int x, y;
+
+        *tex = ctx->screen->resource_create(ctx->screen, &ttex);
+        assert(*tex);
+
+        xfer = pipe->get_transfer(pipe, *tex, 0, PIPE_TRANSFER_WRITE,
+                                  &(struct pipe_box) { .width = w,
+                                                  .height = h,
+                                                  .depth = 1 });
+        assert(xfer);
+
+        map = pipe->transfer_map(pipe, xfer);
+        assert(map);
+
+        for (y = 0; y < ny; ++y) {
+                for (x = 0; x < nx; ++x) {
+                        init(map + y * dy + x * dx, slot, x, y);
+                }
+        }
+
+        pipe->transfer_unmap(pipe, xfer);
+        pipe->transfer_destroy(pipe, xfer);
+
+        ctx->tex_rw[slot] = rw;
+}
+
+static bool default_check(void *x, void *y, int sz) {
+        return !memcmp(x, y, sz);
+}
+
+static void check_tex(struct context *ctx, int slot,
+                      void (*expect)(void *, int, int, int),
+                      bool (*check)(void *, void *, int))
+{
+        struct pipe_context *pipe = ctx->pipe;
+        struct pipe_resource *tex = ctx->tex[slot];
+        int dx = util_format_get_blocksize(tex->format);
+        int dy = util_format_get_stride(tex->format, tex->width0);
+        int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) :
+                  util_format_get_nblocksx(tex->format, tex->width0));
+        int ny = (tex->target == PIPE_BUFFER ? 1 :
+                  util_format_get_nblocksy(tex->format, tex->height0));
+        struct pipe_transfer *xfer;
+        char *map;
+        int x, y, i;
+        int err = 0;
+
+        if (!check)
+                check = default_check;
+
+        xfer = pipe->get_transfer(pipe, tex, 0, PIPE_TRANSFER_READ,
+                                  &(struct pipe_box) { .width = tex->width0,
+                                        .height = tex->height0,
+                                        .depth = 1 });
+        assert(xfer);
+
+        map = pipe->transfer_map(pipe, xfer);
+        assert(map);
+
+        for (y = 0; y < ny; ++y) {
+                for (x = 0; x < nx; ++x) {
+                        uint32_t exp[4];
+                        uint32_t *res = (uint32_t *)(map + y * dy + x * dx);
+
+                        expect(exp, slot, x, y);
+                        if (check(res, exp, dx) || (++err) > 20)
+                                continue;
+
+                        if (dx < 4) {
+                                uint32_t u = 0, v = 0;
+
+                                for (i = 0; i < dx; i++) {
+                                        u |= ((uint8_t *)exp)[i] << (8 * i);
+                                        v |= ((uint8_t *)res)[i] << (8 * i);
+                                }
+                                printf("(%d, %d): got 0x%x, expected 0x%x\n",
+                                       x, y, v, u);
+                        } else {
+                                for (i = 0; i < dx / 4; i++) {
+                                        printf("(%d, %d)[%d]: got 0x%x/%f,"
+                                               " expected 0x%x/%f\n", x, y, i,
+                                               res[i], ((float *)res)[i],
+                                               exp[i], ((float *)exp)[i]);
+                                }
+                        }
+                }
+        }
+
+        pipe->transfer_unmap(pipe, xfer);
+        pipe->transfer_destroy(pipe, xfer);
+
+        if (err)
+                printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err);
+        else
+                printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y);
+}
+
+static void destroy_tex(struct context *ctx)
+{
+        int i;
+
+        for (i = 0; i < MAX_RESOURCES; ++i) {
+                if (ctx->tex[i])
+                        pipe_resource_reference(&ctx->tex[i], NULL);
+        }
+}
+
+static void init_sampler_views(struct context *ctx, const int *slots)
+{
+        struct pipe_context *pipe = ctx->pipe;
+        struct pipe_sampler_view tview;
+        int i;
+
+        for (i = 0; *slots >= 0; ++i, ++slots) {
+                u_sampler_view_default_template(&tview, ctx->tex[*slots],
+                                                ctx->tex[*slots]->format);
+
+                ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots],
+                                                         &tview);
+                assert(ctx->view[i]);
+        }
+
+        pipe->set_compute_sampler_views(pipe, 0, i, ctx->view);
+}
+
+static void destroy_sampler_views(struct context *ctx)
+{
+        struct pipe_context *pipe = ctx->pipe;
+        int i;
+
+        pipe->set_compute_sampler_views(pipe, 0, MAX_RESOURCES, NULL);
+
+        for (i = 0; i < MAX_RESOURCES; ++i) {
+                if (ctx->view[i]) {
+                        pipe->sampler_view_destroy(pipe, ctx->view[i]);
+                        ctx->view[i] = NULL;
+                }
+        }
+}
+
+static void init_compute_resources(struct context *ctx, const int *slots)
+{
+        struct pipe_context *pipe = ctx->pipe;
+        int i;
+
+        for (i = 0; *slots >= 0; ++i, ++slots) {
+                struct pipe_surface tsurf = {
+                        .format = ctx->tex[*slots]->format,
+                        .usage = ctx->tex[*slots]->bind,
+                        .writable = ctx->tex_rw[*slots]
+                };
+
+                if (ctx->tex[*slots]->target == PIPE_BUFFER)
+                        tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1;
+
+                ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots],
+                                                    &tsurf);
+                assert(ctx->surf[i]);
+        }
+
+        pipe->set_compute_resources(pipe, 0, i, ctx->surf);
+}
+
+static void destroy_compute_resources(struct context *ctx)
+{
+        struct pipe_context *pipe = ctx->pipe;
+        int i;
+
+        pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL);
+
+        for (i = 0; i < MAX_RESOURCES; ++i) {
+                if (ctx->surf[i]) {
+                        pipe->surface_destroy(pipe, ctx->surf[i]);
+                        ctx->surf[i] = NULL;
+                }
+        }
+}
+
+static void init_sampler_states(struct context *ctx, int n)
+{
+        struct pipe_context *pipe = ctx->pipe;
+        struct pipe_sampler_state smp = {
+                .normalized_coords = 1,
+        };
+        int i;
+
+        for (i = 0; i < n; ++i) {
+                ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp);
+                assert(ctx->hwsmp[i]);
+        }
+
+        pipe->bind_compute_sampler_states(pipe, 0, i, ctx->hwsmp);
+}
+
+static void destroy_sampler_states(struct context *ctx)
+{
+        struct pipe_context *pipe = ctx->pipe;
+        int i;
+
+        pipe->bind_compute_sampler_states(pipe, 0, MAX_RESOURCES, NULL);
+
+        for (i = 0; i < MAX_RESOURCES; ++i) {
+                if (ctx->hwsmp[i]) {
+                        pipe->delete_sampler_state(pipe, ctx->hwsmp[i]);
+                        ctx->hwsmp[i] = NULL;
+                }
+        }
+}
+
+static void init_globals(struct context *ctx, const int *slots,
+                         uint32_t **handles)
+{
+        struct pipe_context *pipe = ctx->pipe;
+        struct pipe_resource *res[MAX_RESOURCES];
+        int i;
+
+        for (i = 0; *slots >= 0; ++i, ++slots)
+                res[i] = ctx->tex[*slots];
+
+        pipe->set_global_binding(pipe, 0, i, res, handles);
+}
+
+static void destroy_globals(struct context *ctx)
+{
+        struct pipe_context *pipe = ctx->pipe;
+
+        pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL);
+}
+
+static void launch_grid(struct context *ctx, const uint *block_layout,
+                        const uint *grid_layout, uint32_t pc,
+                        const void *input)
+{
+        struct pipe_context *pipe = ctx->pipe;
+
+        pipe->launch_grid(pipe, block_layout, grid_layout, pc, input);
+}
+
+static void test_system_values(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], BUFFER, RAW, WR\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL SV[1], BLOCK_SIZE[0]\n"
+                "DCL SV[2], GRID_SIZE[0]\n"
+                "DCL SV[3], THREAD_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "IMM UINT32 { 64, 0, 0, 0 }\n"
+                "IMM UINT32 { 16, 0, 0, 0 }\n"
+                "IMM UINT32 { 0, 0, 0, 0 }\n"
+                "\n"
+                "BGNSUB"
+                "  UMUL TEMP[0], SV[0], SV[1]\n"
+                "  UADD TEMP[0], TEMP[0], SV[3]\n"
+                "  UMUL TEMP[1], SV[1], SV[2]\n"
+                "  UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n"
+                "  UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n"
+                "  UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n"
+                "  UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n"
+                "  UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n"
+                "  UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
+                "  STORE RES[0].xyzw, TEMP[0], SV[0]\n"
+                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
+                "  STORE RES[0].xyzw, TEMP[0], SV[1]\n"
+                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
+                "  STORE RES[0].xyzw, TEMP[0], SV[2]\n"
+                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
+                "  STORE RES[0].xyzw, TEMP[0], SV[3]\n"
+                "  RET\n"
+                "ENDSUB\n";
+        void init(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xdeadbeef;
+        }
+        void expect(void *p, int s, int x, int y) {
+                int id = x / 16, sv = (x % 16) / 4, c = x % 4;
+                int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 };
+                int bsz[] = { 4, 3, 5, 1};
+                int gsz[] = { 5, 4, 1, 1};
+
+                switch (sv) {
+                case 0:
+                        *(uint32_t *)p = tid[c] / bsz[c];
+                        break;
+                case 1:
+                        *(uint32_t *)p = bsz[c];
+                        break;
+                case 2:
+                        *(uint32_t *)p = gsz[c];
+                        break;
+                case 3:
+                        *(uint32_t *)p = tid[c] % bsz[c];
+                        break;
+                }
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 0, 0, src, NULL);
+        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
+                 76800, 0, init);
+        init_compute_resources(ctx, (int []) { 0, -1 });
+        launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL);
+        check_tex(ctx, 0, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_resource_access(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], BUFFER, RAW, WR\n"
+                "DCL RES[1], 2D, RAW, WR\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "IMM UINT32 { 15, 0, 0, 0 }\n"
+                "IMM UINT32 { 16, 1, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n"
+                "       AND TEMP[0].x, TEMP[0], IMM[0]\n"
+                "       UMUL TEMP[0].x, TEMP[0], IMM[1]\n"
+                "       LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n"
+                "       UMUL TEMP[1], SV[0], IMM[1]\n"
+                "       STORE RES[1].xyzw, TEMP[1], TEMP[0]\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        void init0(void *p, int s, int x, int y) {
+                *(float *)p = 8.0 - (float)x;
+        }
+        void init1(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xdeadbeef;
+        }
+        void expect(void *p, int s, int x, int y) {
+                *(float *)p = 8.0 - (float)((x + 4*y) & 0x3f);
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 0, 0, src, NULL);
+        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
+                 256, 0, init0);
+        init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
+                 60, 12, init1);
+        init_compute_resources(ctx, (int []) { 0, 1, -1 });
+        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL);
+        check_tex(ctx, 1, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_function_calls(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], 2D, RAW, WR\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL SV[1], BLOCK_SIZE[0]\n"
+                "DCL SV[2], GRID_SIZE[0]\n"
+                "DCL SV[3], THREAD_ID[0]\n"
+                "DCL TEMP[0]\n"
+                "DCL TEMP[1]\n"
+                "DCL TEMP[2], LOCAL\n"
+                "IMM UINT32 { 0, 11, 22, 33 }\n"
+                "IMM FLT32 { 11, 33, 55, 99 }\n"
+                "IMM UINT32 { 4, 1, 0, 0 }\n"
+                "IMM UINT32 { 12, 0, 0, 0 }\n"
+                "\n"
+                "00: BGNSUB\n"
+                "01:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
+                "02:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
+                "03:  USLT TEMP[0].x, TEMP[0], IMM[0]\n"
+                "04:  RET\n"
+                "05: ENDSUB\n"
+                "06: BGNSUB\n"
+                "07:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
+                "08:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
+                "09:  USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n"
+                "10:  IF TEMP[0].xxxx\n"
+                "11:   CAL :0\n"
+                "12:  ENDIF\n"
+                "13:  RET\n"
+                "14: ENDSUB\n"
+                "15: BGNSUB\n"
+                "16:  UMUL TEMP[2], SV[0], SV[1]\n"
+                "17:  UADD TEMP[2], TEMP[2], SV[3]\n"
+                "18:  UMUL TEMP[2], TEMP[2], IMM[2]\n"
+                "00:  MOV TEMP[1].x, IMM[2].wwww\n"
+                "19:  LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n"
+                "20:  CAL :6\n"
+                "21:  STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n"
+                "22:  RET\n"
+                "23: ENDSUB\n";
+        void init(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 15 * y + x;
+        }
+        void expect(void *p, int s, int x, int y) {
+                *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ;
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 0, 0, src, NULL);
+        init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
+                 15, 12, init);
+        init_compute_resources(ctx, (int []) { 0, -1 });
+        launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL);
+        check_tex(ctx, 0, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_input_global(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL SV[0], THREAD_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "IMM UINT32 { 8, 0, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0], SV[0], IMM[0]\n"
+                "       LOAD TEMP[1].xy, RINPUT, TEMP[0]\n"
+                "       LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n"
+                "       UADD TEMP[1].x, TEMP[0], -TEMP[1]\n"
+                "       STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        void init(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xdeadbeef;
+        }
+        void expect(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0);
+        }
+        uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004,
+                              0x10005, 0x10006, 0x10007, 0x10008 };
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 0, 32, src, NULL);
+        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
+        init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
+        init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
+        init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
+        init_globals(ctx, (int []){ 0, 1, 2, 3, -1 },
+                     (uint32_t *[]){ &input[1], &input[3],
+                                     &input[5], &input[7] });
+        launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input);
+        check_tex(ctx, 0, expect, NULL);
+        check_tex(ctx, 1, expect, NULL);
+        check_tex(ctx, 2, expect, NULL);
+        check_tex(ctx, 3, expect, NULL);
+        destroy_globals(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_private(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], BUFFER, RAW, WR\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL SV[1], BLOCK_SIZE[0]\n"
+                "DCL SV[2], THREAD_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "DCL TEMP[2], LOCAL\n"
+                "IMM UINT32 { 128, 0, 0, 0 }\n"
+                "IMM UINT32 { 4, 0, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0].x, SV[0], SV[1]\n"
+                "       UADD TEMP[0].x, TEMP[0], SV[2]\n"
+                "       MOV TEMP[1].x, IMM[0].wwww\n"
+                "       BGNLOOP\n"
+                "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
+                "               IF TEMP[2]\n"
+                "                       BRK\n"
+                "               ENDIF\n"
+                "               UDIV TEMP[2].x, TEMP[1], IMM[1]\n"
+                "               UADD TEMP[2].x, TEMP[2], TEMP[0]\n"
+                "               STORE RPRIVATE.x, TEMP[1], TEMP[2]\n"
+                "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
+                "       ENDLOOP\n"
+                "       MOV TEMP[1].x, IMM[0].wwww\n"
+                "       UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
+                "       BGNLOOP\n"
+                "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
+                "               IF TEMP[2]\n"
+                "                       BRK\n"
+                "               ENDIF\n"
+                "               LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n"
+                "               STORE RES[0].x, TEMP[0], TEMP[2]\n"
+                "               UADD TEMP[0].x, TEMP[0], IMM[1]\n"
+                "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
+                "       ENDLOOP\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        void init(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xdeadbeef;
+        }
+        void expect(void *p, int s, int x, int y) {
+                *(uint32_t *)p = (x / 32) + x % 32;
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 128, 0, src, NULL);
+        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
+                 32768, 0, init);
+        init_compute_resources(ctx, (int []) { 0, -1 });
+        launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
+        check_tex(ctx, 0, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_local(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], BUFFER, RAW, WR\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL SV[1], BLOCK_SIZE[0]\n"
+                "DCL SV[2], THREAD_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "DCL TEMP[2], LOCAL\n"
+                "IMM UINT32 { 1, 0, 0, 0 }\n"
+                "IMM UINT32 { 2, 0, 0, 0 }\n"
+                "IMM UINT32 { 4, 0, 0, 0 }\n"
+                "IMM UINT32 { 32, 0, 0, 0 }\n"
+                "IMM UINT32 { 128, 0, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0].x, SV[2], IMM[2]\n"
+                "       STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n"
+                "       MFENCE RLOCAL\n"
+                "       USLT TEMP[1].x, SV[2], IMM[3]\n"
+                "       IF TEMP[1]\n"
+                "               UADD TEMP[1].x, TEMP[0], IMM[4]\n"
+                "               BGNLOOP\n"
+                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
+                "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
+                "                       IF TEMP[2]\n"
+                "                               BRK\n"
+                "                       ENDIF\n"
+                "               ENDLOOP\n"
+                "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
+                "               MFENCE RLOCAL\n"
+                "               BGNLOOP\n"
+                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
+                "                       USEQ TEMP[2].x, TEMP[2], IMM[1]\n"
+                "                       IF TEMP[2]\n"
+                "                               BRK\n"
+                "                       ENDIF\n"
+                "               ENDLOOP\n"
+                "       ELSE\n"
+                "               UADD TEMP[1].x, TEMP[0], -IMM[4]\n"
+                "               BGNLOOP\n"
+                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
+                "                       USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n"
+                "                       IF TEMP[2]\n"
+                "                               BRK\n"
+                "                       ENDIF\n"
+                "               ENDLOOP\n"
+                "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
+                "               MFENCE RLOCAL\n"
+                "               BGNLOOP\n"
+                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
+                "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
+                "                       IF TEMP[2]\n"
+                "                               BRK\n"
+                "                       ENDIF\n"
+                "               ENDLOOP\n"
+                "               STORE RLOCAL.x, TEMP[0], IMM[1]\n"
+                "               MFENCE RLOCAL\n"
+                "       ENDIF\n"
+                "       UMUL TEMP[1].x, SV[0], SV[1]\n"
+                "       UMUL TEMP[1].x, TEMP[1], IMM[2]\n"
+                "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
+                "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
+                "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        void init(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xdeadbeef;
+        }
+        void expect(void *p, int s, int x, int y) {
+                *(uint32_t *)p = x & 0x20 ? 2 : 1;
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 256, 0, 0, src, NULL);
+        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
+                 4096, 0, init);
+        init_compute_resources(ctx, (int []) { 0, -1 });
+        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
+        check_tex(ctx, 0, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_sample(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL SVIEW[0], 2D, FLOAT\n"
+                "DCL RES[0], 2D, RAW, WR\n"
+                "DCL SAMP[0]\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "IMM UINT32 { 16, 1, 0, 0 }\n"
+                "IMM FLT32 { 128, 32, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       I2F TEMP[1], SV[0]\n"
+                "       DIV TEMP[1], TEMP[1], IMM[1]\n"
+                "       SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n"
+                "       UMUL TEMP[0], SV[0], IMM[0]\n"
+                "       STORE RES[0].xyzw, TEMP[0], TEMP[1]\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        void init(void *p, int s, int x, int y) {
+                *(float *)p = s ? 1 : x * y;
+        }
+        void expect(void *p, int s, int x, int y) {
+                switch (x % 4) {
+                case 0:
+                        *(float *)p = x / 4 * y;
+                        break;
+                case 1:
+                case 2:
+                        *(float *)p = 0;
+                        break;
+                case 3:
+                        *(float *)p = 1;
+                        break;
+                }
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 0, 0, src, NULL);
+        init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
+                 128, 32, init);
+        init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
+                 512, 32, init);
+        init_compute_resources(ctx, (int []) { 1, -1 });
+        init_sampler_views(ctx, (int []) { 0, -1 });
+        init_sampler_states(ctx, 2);
+        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL);
+        check_tex(ctx, 1, expect, NULL);
+        destroy_sampler_states(ctx);
+        destroy_sampler_views(ctx);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_many_kern(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], BUFFER, RAW, WR\n"
+                "DCL TEMP[0], LOCAL\n"
+                "IMM UINT32 { 0, 1, 2, 3 }\n"
+                "IMM UINT32 { 4, 0, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n"
+                "       STORE RES[0].x, TEMP[0], IMM[0].xxxx\n"
+                "       RET\n"
+                "    ENDSUB\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n"
+                "       STORE RES[0].x, TEMP[0], IMM[0].yyyy\n"
+                "       RET\n"
+                "    ENDSUB\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n"
+                "       STORE RES[0].x, TEMP[0], IMM[0].zzzz\n"
+                "       RET\n"
+                "    ENDSUB\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n"
+                "       STORE RES[0].x, TEMP[0], IMM[0].wwww\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        void init(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xdeadbeef;
+        }
+        void expect(void *p, int s, int x, int y) {
+                *(uint32_t *)p = x;
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 0, 0, src, NULL);
+        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
+                 16, 0, init);
+        init_compute_resources(ctx, (int []) { 0, -1 });
+        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
+        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL);
+        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL);
+        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL);
+        check_tex(ctx, 0, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_constant(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], BUFFER, RAW\n"
+                "DCL RES[1], BUFFER, RAW, WR\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "IMM UINT32 { 4, 0, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
+                "       LOAD TEMP[1].x, RES[0], TEMP[0]\n"
+                "       STORE RES[1].x, TEMP[0], TEMP[1]\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        void init(void *p, int s, int x, int y) {
+                *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x;
+        }
+        void expect(void *p, int s, int x, int y) {
+                *(float *)p = 8.0 - (float)x;
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 0, 0, src, NULL);
+        init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
+                 256, 0, init);
+        init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
+                 256, 0, init);
+        init_compute_resources(ctx, (int []) { 0, 1, -1 });
+        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
+        check_tex(ctx, 1, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_resource_indirect(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], BUFFER, RAW, WR\n"
+                "DCL RES[1..3], BUFFER, RAW\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "IMM UINT32 { 4, 0, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
+                "       LOAD TEMP[1].x, RES[1], TEMP[0]\n"
+                "       LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n"
+                "       STORE RES[0].x, TEMP[0], TEMP[1]\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        void init(void *p, int s, int x, int y) {
+                *(uint32_t *)p = s == 0 ? 0xdeadbeef :
+                   s == 1 ? x % 2 :
+                   s == 2 ? 2 * x :
+                   2 * x + 1;
+        }
+        void expect(void *p, int s, int x, int y) {
+           *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0);
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 0, 0, src, NULL);
+        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
+                 256, 0, init);
+        init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
+                 256, 0, init);
+        init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
+                 256, 0, init);
+        init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
+                 256, 0, init);
+        init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 });
+        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
+        check_tex(ctx, 0, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+enum pipe_format surface_fmts[] = {
+        PIPE_FORMAT_B8G8R8A8_UNORM,
+        PIPE_FORMAT_B8G8R8X8_UNORM,
+        PIPE_FORMAT_A8R8G8B8_UNORM,
+        PIPE_FORMAT_X8R8G8B8_UNORM,
+        PIPE_FORMAT_X8R8G8B8_UNORM,
+        PIPE_FORMAT_L8_UNORM,
+        PIPE_FORMAT_A8_UNORM,
+        PIPE_FORMAT_I8_UNORM,
+        PIPE_FORMAT_L8A8_UNORM,
+        PIPE_FORMAT_R32_FLOAT,
+        PIPE_FORMAT_R32G32_FLOAT,
+        PIPE_FORMAT_R32G32B32A32_FLOAT,
+        PIPE_FORMAT_R32_UNORM,
+        PIPE_FORMAT_R32G32_UNORM,
+        PIPE_FORMAT_R32G32B32A32_UNORM,
+        PIPE_FORMAT_R32_SNORM,
+        PIPE_FORMAT_R32G32_SNORM,
+        PIPE_FORMAT_R32G32B32A32_SNORM,
+        PIPE_FORMAT_R8_UINT,
+        PIPE_FORMAT_R8G8_UINT,
+        PIPE_FORMAT_R8G8B8A8_UINT,
+        PIPE_FORMAT_R8_SINT,
+        PIPE_FORMAT_R8G8_SINT,
+        PIPE_FORMAT_R8G8B8A8_SINT,
+        PIPE_FORMAT_R32_UINT,
+        PIPE_FORMAT_R32G32_UINT,
+        PIPE_FORMAT_R32G32B32A32_UINT,
+        PIPE_FORMAT_R32_SINT,
+        PIPE_FORMAT_R32G32_SINT,
+        PIPE_FORMAT_R32G32B32A32_SINT
+};
+
+static void test_surface_ld(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], 2D\n"
+                "DCL RES[1], 2D, RAW, WR\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "IMM UINT32 { 16, 1, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       LOAD TEMP[1], RES[0], SV[0]\n"
+                "       UMUL TEMP[0], SV[0], IMM[0]\n"
+                "       STORE RES[1].xyzw, TEMP[0], TEMP[1]\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        int i = 0;
+        void init0f(void *p, int s, int x, int y) {
+                float v[] = { 1.0, -.75, .50, -.25 };
+                util_format_write_4f(surface_fmts[i], v, 0,
+                                     p, 0, 0, 0, 1, 1);
+        }
+        void init0i(void *p, int s, int x, int y) {
+                int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
+                util_format_write_4i(surface_fmts[i], v, 0,
+                                     p, 0, 0, 0, 1, 1);
+        }
+        void init1(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xdeadbeef;
+        }
+        void expectf(void *p, int s, int x, int y) {
+                float v[4], w[4];
+                init0f(v, s, x / 4, y);
+                util_format_read_4f(surface_fmts[i], w, 0,
+                                    v, 0, 0, 0, 1, 1);
+                *(float *)p = w[x % 4];
+        }
+        void expecti(void *p, int s, int x, int y) {
+                int32_t v[4], w[4];
+                init0i(v, s, x / 4, y);
+                util_format_read_4i(surface_fmts[i], w, 0,
+                                    v, 0, 0, 0, 1, 1);
+                *(uint32_t *)p = w[x % 4];
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 0, 0, src, NULL);
+
+        for (i = 0; i < Elements(surface_fmts); i++) {
+                bool is_int = util_format_is_pure_integer(surface_fmts[i]);
+
+                printf("   - %s\n", util_format_name(surface_fmts[i]));
+
+                init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i],
+                         128, 32, (is_int ? init0i : init0f));
+                init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
+                         512, 32, init1);
+                init_compute_resources(ctx, (int []) { 0, 1, -1 });
+                init_sampler_states(ctx, 2);
+                launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
+                            NULL);
+                check_tex(ctx, 1, (is_int ? expecti : expectf), NULL);
+                destroy_sampler_states(ctx);
+                destroy_compute_resources(ctx);
+                destroy_tex(ctx);
+        }
+
+        destroy_prog(ctx);
+}
+
+static void test_surface_st(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], 2D, RAW\n"
+                "DCL RES[1], 2D, WR\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "IMM UINT32 { 16, 1, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0], SV[0], IMM[0]\n"
+                "       LOAD TEMP[1], RES[0], TEMP[0]\n"
+                "       STORE RES[1], SV[0], TEMP[1]\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        int i = 0;
+        void init0f(void *p, int s, int x, int y) {
+                float v[] = { 1.0, -.75, 0.5, -.25 };
+                *(float *)p = v[x % 4];
+        }
+        void init0i(void *p, int s, int x, int y) {
+                int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
+                *(int32_t *)p = v[x % 4];
+        }
+        void init1(void *p, int s, int x, int y) {
+                memset(p, 1, util_format_get_blocksize(surface_fmts[i]));
+        }
+        void expectf(void *p, int s, int x, int y) {
+                float vf[4];
+                int j;
+
+                for (j = 0; j < 4; j++)
+                        init0f(&vf[j], s, 4 * x + j, y);
+                util_format_write_4f(surface_fmts[i], vf, 0,
+                                     p, 0, 0, 0, 1, 1);
+        }
+        void expects(void *p, int s, int x, int y) {
+                int32_t v[4];
+                int j;
+
+                for (j = 0; j < 4; j++)
+                        init0i(&v[j], s, 4 * x + j, y);
+                util_format_write_4i(surface_fmts[i], v, 0,
+                                     p, 0, 0, 0, 1, 1);
+        }
+        void expectu(void *p, int s, int x, int y) {
+                uint32_t v[4];
+                int j;
+
+                for (j = 0; j < 4; j++)
+                        init0i(&v[j], s, 4 * x + j, y);
+                util_format_write_4ui(surface_fmts[i], v, 0,
+                                      p, 0, 0, 0, 1, 1);
+        }
+        bool check(void *x, void *y, int sz) {
+                int j;
+
+                if (util_format_is_float(surface_fmts[i])) {
+                        return fabs(*(float *)x - *(float *)y) < 3.92156863e-3;
+
+                } else if ((sz % 4) == 0) {
+                        for (j = 0; j < sz / 4; j++)
+                                if (abs(((uint32_t *)x)[j] -
+                                        ((uint32_t *)y)[j]) > 1)
+                                        return false;
+                        return true;
+                } else {
+                        return !memcmp(x, y, sz);
+                }
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 0, 0, 0, src, NULL);
+
+        for (i = 0; i < Elements(surface_fmts); i++) {
+                bool is_signed = (util_format_description(surface_fmts[i])
+                                  ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED);
+                bool is_int = util_format_is_pure_integer(surface_fmts[i]);
+
+                printf("   - %s\n", util_format_name(surface_fmts[i]));
+
+                init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
+                         512, 32, (is_int ? init0i : init0f));
+                init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i],
+                         128, 32, init1);
+                init_compute_resources(ctx, (int []) { 0, 1, -1 });
+                init_sampler_states(ctx, 2);
+                launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
+                            NULL);
+                check_tex(ctx, 1, (is_int && is_signed ? expects :
+                                   is_int && !is_signed ? expectu :
+                                   expectf), check);
+                destroy_sampler_states(ctx);
+                destroy_compute_resources(ctx);
+                destroy_tex(ctx);
+        }
+
+        destroy_prog(ctx);
+}
+
+static void test_barrier(struct context *ctx)
+{
+        const char *src = "COMP\n"
+                "DCL RES[0], BUFFER, RAW, WR\n"
+                "DCL SV[0], BLOCK_ID[0]\n"
+                "DCL SV[1], BLOCK_SIZE[0]\n"
+                "DCL SV[2], THREAD_ID[0]\n"
+                "DCL TEMP[0], LOCAL\n"
+                "DCL TEMP[1], LOCAL\n"
+                "DCL TEMP[2], LOCAL\n"
+                "DCL TEMP[3], LOCAL\n"
+                "IMM UINT32 { 1, 0, 0, 0 }\n"
+                "IMM UINT32 { 4, 0, 0, 0 }\n"
+                "IMM UINT32 { 32, 0, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       UMUL TEMP[0].x, SV[2], IMM[1]\n"
+                "       MOV TEMP[1].x, IMM[0].wwww\n"
+                "       BGNLOOP\n"
+                "               BARRIER\n"
+                "               STORE RLOCAL.x, TEMP[0], TEMP[1]\n"
+                "               BARRIER\n"
+                "               MOV TEMP[2].x, IMM[0].wwww\n"
+                "               BGNLOOP\n"
+                "                       UMUL TEMP[3].x, TEMP[2], IMM[1]\n"
+                "                       LOAD TEMP[3].x, RLOCAL, TEMP[3]\n"
+                "                       USNE TEMP[3].x, TEMP[3], TEMP[1]\n"
+                "                       IF TEMP[3]\n"
+                "                               END\n"
+                "                       ENDIF\n"
+                "                       UADD TEMP[2].x, TEMP[2], IMM[0]\n"
+                "                       USEQ TEMP[3].x, TEMP[2], SV[1]\n"
+                "                       IF TEMP[3]\n"
+                "                               BRK\n"
+                "                       ENDIF\n"
+                "               ENDLOOP\n"
+                "               UADD TEMP[1].x, TEMP[1], IMM[0]\n"
+                "               USEQ TEMP[2].x, TEMP[1], IMM[2]\n"
+                "               IF TEMP[2]\n"
+                "                       BRK\n"
+                "               ENDIF\n"
+                "       ENDLOOP\n"
+                "       UMUL TEMP[1].x, SV[0], SV[1]\n"
+                "       UMUL TEMP[1].x, TEMP[1], IMM[1]\n"
+                "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
+                "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
+                "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
+                "       RET\n"
+                "    ENDSUB\n";
+        void init(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xdeadbeef;
+        }
+        void expect(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 31;
+        }
+
+        printf("- %s\n", __func__);
+
+        init_prog(ctx, 256, 0, 0, src, NULL);
+        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
+                 4096, 0, init);
+        init_compute_resources(ctx, (int []) { 0, -1 });
+        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
+        check_tex(ctx, 0, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_atom_ops(struct context *ctx, bool global)
+{
+        const char *src = "COMP\n"
+                "#ifdef TARGET_GLOBAL\n"
+                "#define target RES[0]\n"
+                "#else\n"
+                "#define target RLOCAL\n"
+                "#endif\n"
+                ""
+                "DCL RES[0], BUFFER, RAW, WR\n"
+                "#define threadid SV[0]\n"
+                "DCL threadid, THREAD_ID[0]\n"
+                ""
+                "#define offset TEMP[0]\n"
+                "DCL offset, LOCAL\n"
+                "#define tmp TEMP[1]\n"
+                "DCL tmp, LOCAL\n"
+                ""
+                "#define k0 IMM[0]\n"
+                "IMM UINT32 { 0, 0, 0, 0 }\n"
+                "#define k1 IMM[1]\n"
+                "IMM UINT32 { 1, 0, 0, 0 }\n"
+                "#define k2 IMM[2]\n"
+                "IMM UINT32 { 2, 0, 0, 0 }\n"
+                "#define k3 IMM[3]\n"
+                "IMM UINT32 { 3, 0, 0, 0 }\n"
+                "#define k4 IMM[4]\n"
+                "IMM UINT32 { 4, 0, 0, 0 }\n"
+                "#define k5 IMM[5]\n"
+                "IMM UINT32 { 5, 0, 0, 0 }\n"
+                "#define k6 IMM[6]\n"
+                "IMM UINT32 { 6, 0, 0, 0 }\n"
+                "#define k7 IMM[7]\n"
+                "IMM UINT32 { 7, 0, 0, 0 }\n"
+                "#define k8 IMM[8]\n"
+                "IMM UINT32 { 8, 0, 0, 0 }\n"
+                "#define k9 IMM[9]\n"
+                "IMM UINT32 { 9, 0, 0, 0 }\n"
+                "#define korig IMM[10].xxxx\n"
+                "#define karg IMM[10].yyyy\n"
+                "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       UMUL offset.x, threadid, k4\n"
+                "       STORE target.x, offset, korig\n"
+                "       USEQ tmp.x, threadid, k0\n"
+                "       IF tmp\n"
+                "               ATOMUADD tmp.x, target, offset, karg\n"
+                "               ATOMUADD tmp.x, target, offset, tmp\n"
+                "       ENDIF\n"
+                "       USEQ tmp.x, threadid, k1\n"
+                "       IF tmp\n"
+                "               ATOMXCHG tmp.x, target, offset, karg\n"
+                "               ATOMXCHG tmp.x, target, offset, tmp\n"
+                "       ENDIF\n"
+                "       USEQ tmp.x, threadid, k2\n"
+                "       IF tmp\n"
+                "               ATOMCAS tmp.x, target, offset, korig, karg\n"
+                "               ATOMCAS tmp.x, target, offset, tmp, k0\n"
+                "       ENDIF\n"
+                "       USEQ tmp.x, threadid, k3\n"
+                "       IF tmp\n"
+                "               ATOMAND tmp.x, target, offset, karg\n"
+                "               ATOMAND tmp.x, target, offset, tmp\n"
+                "       ENDIF\n"
+                "       USEQ tmp.x, threadid, k4\n"
+                "       IF tmp\n"
+                "               ATOMOR tmp.x, target, offset, karg\n"
+                "               ATOMOR tmp.x, target, offset, tmp\n"
+                "       ENDIF\n"
+                "       USEQ tmp.x, threadid, k5\n"
+                "       IF tmp\n"
+                "               ATOMXOR tmp.x, target, offset, karg\n"
+                "               ATOMXOR tmp.x, target, offset, tmp\n"
+                "       ENDIF\n"
+                "       USEQ tmp.x, threadid, k6\n"
+                "       IF tmp\n"
+                "               ATOMUMIN tmp.x, target, offset, karg\n"
+                "               ATOMUMIN tmp.x, target, offset, tmp\n"
+                "       ENDIF\n"
+                "       USEQ tmp.x, threadid, k7\n"
+                "       IF tmp\n"
+                "               ATOMUMAX tmp.x, target, offset, karg\n"
+                "               ATOMUMAX tmp.x, target, offset, tmp\n"
+                "       ENDIF\n"
+                "       USEQ tmp.x, threadid, k8\n"
+                "       IF tmp\n"
+                "               ATOMIMIN tmp.x, target, offset, karg\n"
+                "               ATOMIMIN tmp.x, target, offset, tmp\n"
+                "       ENDIF\n"
+                "       USEQ tmp.x, threadid, k9\n"
+                "       IF tmp\n"
+                "               ATOMIMAX tmp.x, target, offset, karg\n"
+                "               ATOMIMAX tmp.x, target, offset, tmp\n"
+                "       ENDIF\n"
+                "#ifdef TARGET_LOCAL\n"
+                "       LOAD tmp.x, RLOCAL, offset\n"
+                "       STORE RES[0].x, offset, tmp\n"
+                "#endif\n"
+                "       RET\n"
+                "    ENDSUB\n";
+
+        void init(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xbad;
+        }
+        void expect(void *p, int s, int x, int y) {
+                switch (x) {
+                case 0:
+                        *(uint32_t *)p = 0xce6c8eef;
+                        break;
+                case 1:
+                        *(uint32_t *)p = 0xdeadbeef;
+                        break;
+                case 2:
+                        *(uint32_t *)p = 0x11111111;
+                        break;
+                case 3:
+                        *(uint32_t *)p = 0x10011001;
+                        break;
+                case 4:
+                        *(uint32_t *)p = 0xdfbdbfff;
+                        break;
+                case 5:
+                        *(uint32_t *)p = 0x11111111;
+                        break;
+                case 6:
+                        *(uint32_t *)p = 0x11111111;
+                        break;
+                case 7:
+                        *(uint32_t *)p = 0xdeadbeef;
+                        break;
+                case 8:
+                        *(uint32_t *)p = 0xdeadbeef;
+                        break;
+                case 9:
+                        *(uint32_t *)p = 0x11111111;
+                        break;
+                }
+        }
+
+        printf("- %s (%s)\n", __func__, global ? "global" : "local");
+
+        init_prog(ctx, 40, 0, 0, src,
+                  (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
+        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
+                 40, 0, init);
+        init_compute_resources(ctx, (int []) { 0, -1 });
+        launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
+        check_tex(ctx, 0, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+static void test_atom_race(struct context *ctx, bool global)
+{
+        const char *src = "COMP\n"
+                "#ifdef TARGET_GLOBAL\n"
+                "#define target RES[0]\n"
+                "#else\n"
+                "#define target RLOCAL\n"
+                "#endif\n"
+                ""
+                "DCL RES[0], BUFFER, RAW, WR\n"
+                ""
+                "#define blockid SV[0]\n"
+                "DCL blockid, BLOCK_ID[0]\n"
+                "#define blocksz SV[1]\n"
+                "DCL blocksz, BLOCK_SIZE[0]\n"
+                "#define threadid SV[2]\n"
+                "DCL threadid, THREAD_ID[0]\n"
+                ""
+                "#define offset TEMP[0]\n"
+                "DCL offset, LOCAL\n"
+                "#define arg TEMP[1]\n"
+                "DCL arg, LOCAL\n"
+                "#define count TEMP[2]\n"
+                "DCL count, LOCAL\n"
+                "#define vlocal TEMP[3]\n"
+                "DCL vlocal, LOCAL\n"
+                "#define vshared TEMP[4]\n"
+                "DCL vshared, LOCAL\n"
+                "#define last TEMP[5]\n"
+                "DCL last, LOCAL\n"
+                "#define tmp0 TEMP[6]\n"
+                "DCL tmp0, LOCAL\n"
+                "#define tmp1 TEMP[7]\n"
+                "DCL tmp1, LOCAL\n"
+                ""
+                "#define k0 IMM[0]\n"
+                "IMM UINT32 { 0, 0, 0, 0 }\n"
+                "#define k1 IMM[1]\n"
+                "IMM UINT32 { 1, 0, 0, 0 }\n"
+                "#define k4 IMM[2]\n"
+                "IMM UINT32 { 4, 0, 0, 0 }\n"
+                "#define k32 IMM[3]\n"
+                "IMM UINT32 { 32, 0, 0, 0 }\n"
+                "#define k128 IMM[4]\n"
+                "IMM UINT32 { 128, 0, 0, 0 }\n"
+                "#define kdeadcafe IMM[5]\n"
+                "IMM UINT32 { 3735931646, 0, 0, 0 }\n"
+                "#define kallowed_set IMM[6]\n"
+                "IMM UINT32 { 559035650, 0, 0, 0 }\n"
+                "#define k11111111 IMM[7]\n"
+                "IMM UINT32 { 286331153, 0, 0, 0 }\n"
+                "\n"
+                "    BGNSUB\n"
+                "       MOV offset.x, threadid\n"
+                "#ifdef TARGET_GLOBAL\n"
+                "       UMUL tmp0.x, blockid, blocksz\n"
+                "       UADD offset.x, offset, tmp0\n"
+                "#endif\n"
+                "       UMUL offset.x, offset, k4\n"
+                "       USLT tmp0.x, threadid, k32\n"
+                "       STORE target.x, offset, k0\n"
+                "       BARRIER\n"
+                "       IF tmp0\n"
+                "               MOV vlocal.x, k0\n"
+                "               MOV arg.x, kdeadcafe\n"
+                "               BGNLOOP\n"
+                "                       INEG arg.x, arg\n"
+                "                       ATOMUADD vshared.x, target, offset, arg\n"
+                "                       SFENCE target\n"
+                "                       USNE tmp0.x, vshared, vlocal\n"
+                "                       IF tmp0\n"
+                "                               BRK\n"
+                "                       ENDIF\n"
+                "                       UADD vlocal.x, vlocal, arg\n"
+                "               ENDLOOP\n"
+                "               UADD vlocal.x, vshared, arg\n"
+                "               LOAD vshared.x, target, offset\n"
+                "               USEQ tmp0.x, vshared, vlocal\n"
+                "               STORE target.x, offset, tmp0\n"
+                "       ELSE\n"
+                "               UADD offset.x, offset, -k128\n"
+                "               MOV count.x, k0\n"
+                "               MOV last.x, k0\n"
+                "               BGNLOOP\n"
+                "                       LOAD vshared.x, target, offset\n"
+                "                       USEQ tmp0.x, vshared, kallowed_set.xxxx\n"
+                "                       USEQ tmp1.x, vshared, kallowed_set.yyyy\n"
+                "                       OR tmp0.x, tmp0, tmp1\n"
+                "                       IF tmp0\n"
+                "                               USEQ tmp0.x, vshared, last\n"
+                "                               IF tmp0\n"
+                "                                       CONT\n"
+                "                               ENDIF\n"
+                "                               MOV last.x, vshared\n"
+                "                       ELSE\n"
+                "                               END\n"
+                "                       ENDIF\n"
+                "                       UADD count.x, count, k1\n"
+                "                       USEQ tmp0.x, count, k128\n"
+                "                       IF tmp0\n"
+                "                               BRK\n"
+                "                       ENDIF\n"
+                "               ENDLOOP\n"
+                "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
+                "               UADD offset.x, offset, k128\n"
+                "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
+                "               SFENCE target\n"
+                "       ENDIF\n"
+                "#ifdef TARGET_LOCAL\n"
+                "       LOAD tmp0.x, RLOCAL, offset\n"
+                "       UMUL tmp1.x, blockid, blocksz\n"
+                "       UMUL tmp1.x, tmp1, k4\n"
+                "       UADD offset.x, offset, tmp1\n"
+                "       STORE RES[0].x, offset, tmp0\n"
+                "#endif\n"
+                "       RET\n"
+                "    ENDSUB\n";
+
+        void init(void *p, int s, int x, int y) {
+                *(uint32_t *)p = 0xdeadbeef;
+        }
+        void expect(void *p, int s, int x, int y) {
+                *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff;
+        }
+
+        printf("- %s (%s)\n", __func__, global ? "global" : "local");
+
+        init_prog(ctx, 256, 0, 0, src,
+                  (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
+        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
+                 4096, 0, init);
+        init_compute_resources(ctx, (int []) { 0, -1 });
+        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
+        check_tex(ctx, 0, expect, NULL);
+        destroy_compute_resources(ctx);
+        destroy_tex(ctx);
+        destroy_prog(ctx);
+}
+
+int main(int argc, char *argv[])
+{
+        struct context *ctx = CALLOC_STRUCT(context);
+
+        init_ctx(ctx);
+        test_system_values(ctx);
+        test_resource_access(ctx);
+        test_function_calls(ctx);
+        test_input_global(ctx);
+        test_private(ctx);
+        test_local(ctx);
+        test_sample(ctx);
+        test_many_kern(ctx);
+        test_constant(ctx);
+        test_resource_indirect(ctx);
+        test_surface_ld(ctx);
+        test_surface_st(ctx);
+        test_barrier(ctx);
+        test_atom_ops(ctx, true);
+        test_atom_race(ctx, true);
+        test_atom_ops(ctx, false);
+        test_atom_race(ctx, false);
+        destroy_ctx(ctx);
+
+        return 0;
+}
diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c

index cc19e8d5eece058804cc38571c97344a6b9bbfbb..7caac29299f7a7b09d8912ad8e531d7ec99930ef 100644 (file)
--- a/src/gallium/tests/trivial/quad-tex.c
+++ b/src/gallium/tests/trivial/quad-tex.c
@@ -57,16 +57,12 @@
  #include "util/u_memory.h"
  /* util_make_[fragment|vertex]_passthrough_shader */
  #include "util/u_simple_shaders.h"
-
-/* sw_screen_create: to get a software pipe driver */
-#include "target-helpers/inline_sw_helper.h"
-/* debug_screen_wrap: to wrap with debug pipe drivers */
-#include "target-helpers/inline_debug_helper.h"
-/* null software winsys */
-#include "sw/null/null_sw_winsys.h"
+/* to get a hardware pipe driver */
+#include "pipe-loader/pipe_loader.h"
  
  struct program
  {
+       struct pipe_loader_device *dev;
         struct pipe_screen *screen;
         struct pipe_context *pipe;
         struct cso_context *cso;
@@ -93,10 +89,15 @@ struct program
  static void init_prog(struct program *p)
  {
         struct pipe_surface surf_tmpl;
-       /* create the software rasterizer */
-       p->screen = sw_screen_create(null_sw_create());
-       /* wrap the screen with any debugger */
-       p->screen = debug_screen_wrap(p->screen);
+       int ret;
+
+       /* find a hardware device */
+       ret = pipe_loader_probe(&p->dev, 1);
+       assert(ret);
+
+       /* init a pipe screen */
+       p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR);
+       assert(p->screen);
  
         /* create the pipe driver context and cso context */
         p->pipe = p->screen->context_create(p->screen, NULL);
@@ -298,6 +299,7 @@ static void close_prog(struct program *p)
         cso_destroy_context(p->cso);
         p->pipe->destroy(p->pipe);
         p->screen->destroy(p->screen);
+       pipe_loader_release(&p->dev, 1);
  
         FREE(p);
  }
diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c

index 9190f7824e91e6865fe0c9fd0e2d40968a74af1b..f3e1e944154c7a16bc5d962799c801dcfff5e43d 100644 (file)
--- a/src/gallium/tests/trivial/tri.c
+++ b/src/gallium/tests/trivial/tri.c
@@ -55,16 +55,12 @@
  #include "util/u_memory.h"
  /* util_make_[fragment|vertex]_passthrough_shader */
  #include "util/u_simple_shaders.h"
-
-/* sw_screen_create: to get a software pipe driver */
-#include "target-helpers/inline_sw_helper.h"
-/* debug_screen_wrap: to wrap with debug pipe drivers */
-#include "target-helpers/inline_debug_helper.h"
-/* null software winsys */
-#include "sw/null/null_sw_winsys.h"
+/* to get a hardware pipe driver */
+#include "pipe-loader/pipe_loader.h"
  
  struct program
  {
+       struct pipe_loader_device *dev;
         struct pipe_screen *screen;
         struct pipe_context *pipe;
         struct cso_context *cso;
@@ -88,10 +84,15 @@ struct program
  static void init_prog(struct program *p)
  {
         struct pipe_surface surf_tmpl;
-       /* create the software rasterizer */
-       p->screen = sw_screen_create(null_sw_create());
-       /* wrap the screen with any debugger */
-       p->screen = debug_screen_wrap(p->screen);
+       int ret;
+
+       /* find a hardware device */
+       ret = pipe_loader_probe(&p->dev, 1);
+       assert(ret);
+
+       /* init a pipe screen */
+       p->screen = pipe_loader_create_screen(p->dev, PIPE_SEARCH_DIR);
+       assert(p->screen);
  
         /* create the pipe driver context and cso context */
         p->pipe = p->screen->context_create(p->screen, NULL);
@@ -234,6 +235,7 @@ static void close_prog(struct program *p)
         cso_destroy_context(p->cso);
         p->pipe->destroy(p->pipe);
         p->screen->destroy(p->screen);
+       pipe_loader_release(&p->dev, 1);
  
         FREE(p);
  }
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c

index 4d343b8489bcd5d7c7bc8d258a8a3835e263998f..fc57d676876671daf323073d0901f98f44cc9a37 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -81,6 +81,10 @@
  #define RADEON_INFO_IB_VM_MAX_SIZE  0x0f
  #endif
  
+#ifndef RADEON_INFO_MAX_PIPES
+#define RADEON_INFO_MAX_PIPES 0x10
+#endif
+
  
  /* Enable/disable feature access for one command stream.
   * If enable == TRUE, return TRUE on success.
@@ -299,6 +303,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
         ws->info.r600_has_streamout = ws->info.drm_minor >= 13;
      }
  
+    /* Get max pipes, this is only needed for compute shaders.  All evergreen+
+     * chips have at least 2 pipes, so we use 2 as a default. */
+    ws->info.r600_max_pipes = 2;
+    radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL,
+                         &ws->info.r600_max_pipes);
+
      return TRUE;
  }
  
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h

index 99768248644423ce82f3627691f09c964ef50579..6f85b3e11d26d512bbaa1429168111b588316434 100644 (file)
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -98,6 +98,7 @@ struct radeon_info {
      uint32_t r600_va_start;
      uint32_t r600_ib_vm_max_size;
      boolean r600_has_streamout;
+    uint32_t r600_max_pipes;
  };
  
  enum radeon_feature_id {
diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c

index 4df6e8fcf06dc40cf395a8f93c25304bbbe7e331..e5ddfb6ce69ebf78074062b4fa9d3838f4367312 100644 (file)
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -291,6 +291,18 @@ gbm_dri_is_format_supported(struct gbm_device *gbm,
     return 1;
  }
  
+static int
+gbm_dri_bo_write(struct gbm_bo *_bo, const void *buf, size_t count)
+{
+   struct gbm_dri_device *dri = gbm_dri_device(_bo->gbm);
+   struct gbm_dri_bo *bo = gbm_dri_bo(_bo);
+
+   if (dri->image->base.version < 4)
+      return -1;
+
+   return dri->image->write(bo->image, buf, count);
+}
+
  static void
  gbm_dri_bo_destroy(struct gbm_bo *_bo)
  {
@@ -390,6 +402,9 @@ gbm_dri_bo_create(struct gbm_device *gbm,
     int dri_format;
     unsigned dri_use = 0;
  
+   if (dri->image->base.version < 4 && (usage & GBM_BO_USE_WRITE))
+      return NULL;
+
     bo = calloc(1, sizeof *bo);
     if (bo == NULL)
        return NULL;
@@ -421,6 +436,8 @@ gbm_dri_bo_create(struct gbm_device *gbm,
        dri_use |= __DRI_IMAGE_USE_SCANOUT;
     if (usage & GBM_BO_USE_CURSOR_64X64)
        dri_use |= __DRI_IMAGE_USE_CURSOR;
+   if (usage & GBM_BO_USE_WRITE)
+      dri_use |= __DRI_IMAGE_USE_WRITE;
  
     bo->image =
        dri->image->createImage(dri->screen,
@@ -491,6 +508,7 @@ dri_device_create(int fd)
     dri->base.base.bo_create = gbm_dri_bo_create;
     dri->base.base.bo_create_from_egl_image = gbm_dri_bo_create_from_egl_image;
     dri->base.base.is_format_supported = gbm_dri_is_format_supported;
+   dri->base.base.bo_write = gbm_dri_bo_write;
     dri->base.base.bo_destroy = gbm_dri_bo_destroy;
     dri->base.base.destroy = dri_destroy;
     dri->base.base.surface_create = gbm_dri_surface_create;
diff --git a/src/gbm/backends/dri/gbm_driint.h b/src/gbm/backends/dri/gbm_driint.h

index 3b7db65ce9ffe0452136782e0acf8198ef006e87..f4043683f11dc1fdf048a31ccff69bc4c10fab0f 100644 (file)
--- a/src/gbm/backends/dri/gbm_driint.h
+++ b/src/gbm/backends/dri/gbm_driint.h
@@ -77,9 +77,6 @@ struct gbm_dri_bo {
  struct gbm_dri_surface {
     struct gbm_surface base;
  
-   __DRIbuffer *(*get_front_buffer)(struct gbm_dri_surface *, void *);
-   void (*release_buffer)(struct gbm_dri_surface *, __DRIbuffer *, void *);
-   int (*has_free_buffers)(void *);
     void *dri_private;
  };
  
diff --git a/src/gbm/main/gbm.c b/src/gbm/main/gbm.c

index 79ba65051f221465a0aec966336ddc631a630925..3994f86aafc0dcecd08da8d38207c390ef2344ab 100644 (file)
--- a/src/gbm/main/gbm.c
+++ b/src/gbm/main/gbm.c
@@ -231,6 +231,65 @@ gbm_bo_get_handle(struct gbm_bo *bo)
     return bo->handle;
  }
  
+/** Write data into the buffer object
+ *
+ * If the buffer object was created with the GBM_BO_USE_WRITE flag,
+ * this function can used to write data into the buffer object.  The
+ * data is copied directly into the object and it's the responsiblity
+ * of the caller to make sure the data represents valid pixel data,
+ * according to the width, height, stride and format of the buffer object.
+ *
+ * \param bo The buffer object
+ * \param buf The data to write
+ * \param count The number of bytes to write
+ * \return Returns -1 on error, 0 otherwise
+ */
+GBM_EXPORT int
+gbm_bo_write(struct gbm_bo *bo, const void *buf, size_t count)
+{
+   return bo->gbm->bo_write(bo, buf, count);
+}
+
+/** Get the gbm device used to create the buffer object
+ *
+ * \param bo The buffer object
+ * \return Returns the gbm device with which the buffer object was created
+ */
+GBM_EXPORT struct gbm_device *
+gbm_bo_get_device(struct gbm_bo *bo)
+{
+       return bo->gbm;
+}
+
+/** Set the user data associated with a buffer object
+ *
+ * \param bo The buffer object
+ * \param data The data to associate to the buffer object
+ * \param destroy_user_data A callback (which may be %NULL) that will be
+ * called prior to the buffer destruction
+ */
+GBM_EXPORT void
+gbm_bo_set_user_data(struct gbm_bo *bo, void *data,
+                    void (*destroy_user_data)(struct gbm_bo *, void *))
+{
+   bo->user_data = data;
+   bo->destroy_user_data = destroy_user_data;
+}
+
+/** Get the user data associated with a buffer object
+ *
+ * \param bo The buffer object
+ * \return Returns the user data associated with the buffer object or %NULL
+ * if no data was associated with it
+ *
+ * \sa gbm_bo_set_user_data()
+ */
+GBM_EXPORT void *
+gbm_bo_get_user_data(struct gbm_bo *bo)
+{
+   return bo->user_data;
+}
+
  /**
   * Destroys the given buffer object and frees all resources associated with
   * it.
@@ -240,6 +299,9 @@ gbm_bo_get_handle(struct gbm_bo *bo)
  GBM_EXPORT void
  gbm_bo_destroy(struct gbm_bo *bo)
  {
+   if (bo->destroy_user_data)
+      bo->destroy_user_data(bo, bo->user_data);
+
     bo->gbm->bo_destroy(bo);
  }
  
@@ -357,10 +419,11 @@ gbm_surface_destroy(struct gbm_surface *surf)
   *
   * \param surf The surface
   *
- * \return A newly allocated buffer object that should be released
- * with gbm_surface_release_buffer() when no longer needed.  This bo
- * should not be destroyed using gbm_bo_destroy().  If an error occurs
- * this function returns %NULL.
+ * \return A buffer object that should be released with
+ * gbm_surface_release_buffer() when no longer needed.  The implementation
+ * is free to reuse buffers released with gbm_surface_release_buffer() so
+ * this bo should not be destroyed using gbm_bo_destroy().  If an error
+ * occurs this function returns %NULL.
   */
  GBM_EXPORT struct gbm_bo *
  gbm_surface_lock_front_buffer(struct gbm_surface *surf)
@@ -371,10 +434,11 @@ gbm_surface_lock_front_buffer(struct gbm_surface *surf)
  /**
   * Release a locked buffer obtained with gbm_surface_lock_front_buffer()
   *
- * The bo is destroyed after a call to this function and returns the
- * underlying buffer to the gbm surface.  Releasing a bo will
- * typically make gbm_surface_has_free_buffer() return 1 and thus
- * allow rendering the next frame, but not always.
+ * Returns the underlying buffer to the gbm surface.  Releasing a bo
+ * will typically make gbm_surface_has_free_buffer() return 1 and thus
+ * allow rendering the next frame, but not always. The implementation
+ * may choose to destroy the bo immediately or reuse it, in which case
+ * the user data associated with it is unchanged.
   *
   * \param surf The surface
   * \param bo The buffer object
diff --git a/src/gbm/main/gbm.h b/src/gbm/main/gbm.h

index 6748752d8f1d99cdc34499447bc47a17e871af6e..af5dc5aee8cdff130c240a435d378daee303fedd 100644 (file)
--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -201,6 +201,12 @@ enum gbm_bo_flags {
      * as the storage for a color buffer
      */
     GBM_BO_USE_RENDERING    = (1 << 2),
+   /**
+    * Buffer can be used for gbm_bo_write.  This is guaranteed to work
+    * with GBM_BO_USE_CURSOR_64X64. but may not work for other
+    * combinations.
+    */
+   GBM_BO_USE_WRITE    = (1 << 3),
  };
  
  int
@@ -242,9 +248,22 @@ gbm_bo_get_pitch(struct gbm_bo *bo);
  uint32_t
  gbm_bo_get_format(struct gbm_bo *bo);
  
+struct gbm_device *
+gbm_bo_get_device(struct gbm_bo *bo);
+
  union gbm_bo_handle
  gbm_bo_get_handle(struct gbm_bo *bo);
  
+int
+gbm_bo_write(struct gbm_bo *bo, const void *buf, size_t count);
+
+void
+gbm_bo_set_user_data(struct gbm_bo *bo, void *data,
+                    void (*destroy_user_data)(struct gbm_bo *, void *));
+
+void *
+gbm_bo_get_user_data(struct gbm_bo *bo);
+
  void
  gbm_bo_destroy(struct gbm_bo *bo);
  
diff --git a/src/gbm/main/gbmint.h b/src/gbm/main/gbmint.h

index 53d73f40df61e61f8002fe330ab6ebac6c26cc2f..8eb8671aeb2cb1105315e8b5ae67c1a9d606bc5d 100644 (file)
--- a/src/gbm/main/gbmint.h
+++ b/src/gbm/main/gbmint.h
@@ -70,6 +70,7 @@ struct gbm_device {
                                                void *egl_dpy, void *egl_img,
                                                uint32_t width, uint32_t height,
                                                uint32_t usage);
+   int (*bo_write)(struct gbm_bo *bo, const void *buf, size_t data);
     void (*bo_destroy)(struct gbm_bo *bo);
  
     struct gbm_surface *(*surface_create)(struct gbm_device *gbm,
@@ -94,6 +95,8 @@ struct gbm_bo {
     uint32_t pitch;
     uint32_t format;
     union gbm_bo_handle  handle;
+   void *user_data;
+   void (*destroy_user_data)(struct gbm_bo *, void *);
  };
  
  struct gbm_surface {
diff --git a/src/glsl/TODO b/src/glsl/TODO

index c99d7e152d6d16636eaa4164d1e207f600973940..eb73fc2e8132871b1ca7abb07b4f9b8568ef8c6f 100644 (file)
--- a/src/glsl/TODO
+++ b/src/glsl/TODO
@@ -6,22 +6,10 @@
    constant index values.  For others it is more complicated.  Perhaps these
    cases should be silently converted to uniforms?
  
-- Implement support for ir_binop_dot in ir_algebraic.cpp.  Perform
+- Implement support for ir_binop_dot in opt_algebraic.cpp.  Perform
    transformations such as "dot(v, vec3(0.0, 1.0, 0.0))" -> v.y.
  
  - Track source locations throughout the IR.  There are currently several
    places where we cannot emit line numbers for errors (and currently emit 0:0)
    because we've "lost" the line number information.  This is particularly
    noticeable at link time.
-
-1.30 features:
-
-- Implement AST-to-HIR conversion of switch-statements
-  - switch
-  - case
-  - Update break to correcly handle mixed nexting of switch-statements
-    and loops.
-
-- Implement support for gl_ClipDistance.  This is non-trivial because
-  gl_ClipDistance is exposed as a float[8], but all hardware actually
-  implements it as vec4[2].
-\ No newline at end of file
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp

index 8bf0ba2a8767b41536539b3563bf76440230d2ad..9e7c5995fc9fb574d90be059266dca53c411c240 100644 (file)
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -153,21 +153,21 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
          }
  
          ir_variable *var = actual->variable_referenced();
-        if (var) {
-           if (var->read_only) {
-              _mesa_glsl_error(&loc, state,
-                               "function parameter '%s %s' references the "
-                               "read-only variable '%s'",
-                               mode, formal->name,
-                               actual->variable_referenced()->name);
-              return false;
-           } else if (!actual->is_lvalue()) {
-              _mesa_glsl_error(&loc, state,
-                               "function parameter '%s %s' is not an lvalue",
-                               mode, formal->name);
-              return false;
-           }
+        if (var)
             var->assigned = true;
+
+        if (var && var->read_only) {
+           _mesa_glsl_error(&loc, state,
+                            "function parameter '%s %s' references the "
+                            "read-only variable '%s'",
+                            mode, formal->name,
+                            actual->variable_referenced()->name);
+           return false;
+        } else if (!actual->is_lvalue()) {
+           _mesa_glsl_error(&loc, state,
+                            "function parameter '%s %s' is not an lvalue",
+                            mode, formal->name);
+           return false;
          }
        }
  
@@ -278,7 +278,7 @@ generate_call(exec_list *instructions, ir_function_signature *sig,
      * Function calls were first allowed to be constant expressions in GLSL 1.20.
      */
     if (state->language_version >= 120) {
-      ir_constant *value = sig->constant_expression_value(actual_parameters);
+      ir_constant *value = sig->constant_expression_value(actual_parameters, NULL);
        if (value != NULL) {
          return value;
        }
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp

index e24914b832a67e4399213aa0436933beb268c29b..86bb8741bcc2ccdec10f53c871912c426c08f727 100644 (file)
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -4039,13 +4039,13 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
     foreach_list(node, instructions) {
        ir_variable *var = ((ir_instruction *)node)->as_variable();
  
-      if (!var)
+      if (!var || !var->assigned)
          continue;
  
        if (strcmp(var->name, "gl_FragColor") == 0)
-        gl_FragColor_assigned = var->assigned;
+        gl_FragColor_assigned = true;
        else if (strcmp(var->name, "gl_FragData") == 0)
-        gl_FragData_assigned = var->assigned;
+        gl_FragData_assigned = true;
        else if (strncmp(var->name, "gl_", 3) != 0) {
          if (state->target == fragment_shader &&
              (var->mode == ir_var_out || var->mode == ir_var_inout)) {
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp

index ae7a365f4b2be5ad37dd6b053f74fa08fcdd77be..6f1c86b43ffe872e563e679570895af8d750f89b 100644 (file)
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -36,8 +36,9 @@ extern "C" {
  #include "ir_optimization.h"
  #include "loop_analysis.h"
  
-_mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *ctx,
+_mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
                                                GLenum target, void *mem_ctx)
+ : ctx(_ctx)
  {
     switch (target) {
     case GL_VERTEX_SHADER:   this->target = vertex_shader; break;
@@ -134,24 +135,49 @@ _mesa_glsl_shader_target_name(enum _mesa_glsl_parser_targets target)
     return "unknown";
  }
  
+/* This helper function will append the given message to the shader's
+   info log and report it via GL_ARB_debug_output. Per that extension,
+   'type' is one of the enum values classifying the message, and
+   'id' is the implementation-defined ID of the given message. */
+static void
+_mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state,
+               GLenum type, GLuint id, const char *fmt, va_list ap)
+{
+   bool error = (type == GL_DEBUG_TYPE_ERROR_ARB);
+
+   assert(state->info_log != NULL);
+
+   /* Get the offset that the new message will be written to. */
+   int msg_offset = strlen(state->info_log);
+
+   ralloc_asprintf_append(&state->info_log, "%u:%u(%u): %s: ",
+                                           locp->source,
+                                           locp->first_line,
+                                           locp->first_column,
+                                           error ? "error" : "warning");
+   ralloc_vasprintf_append(&state->info_log, fmt, ap);
+
+   const char *const msg = &state->info_log[msg_offset];
+   struct gl_context *ctx = state->ctx;
+   /* Report the error via GL_ARB_debug_output. */
+   if (error)
+      _mesa_shader_debug(ctx, type, id, msg, strlen(msg));
+
+   ralloc_strcat(&state->info_log, "\n");
+}
  
  void
  _mesa_glsl_error(YYLTYPE *locp, _mesa_glsl_parse_state *state,
                  const char *fmt, ...)
  {
     va_list ap;
+   GLenum type = GL_DEBUG_TYPE_ERROR_ARB;
  
     state->error = true;
  
-   assert(state->info_log != NULL);
-   ralloc_asprintf_append(&state->info_log, "%u:%u(%u): error: ",
-                                           locp->source,
-                                           locp->first_line,
-                                           locp->first_column);
     va_start(ap, fmt);
-   ralloc_vasprintf_append(&state->info_log, fmt, ap);
+   _mesa_glsl_msg(locp, state, type, SHADER_ERROR_UNKNOWN, fmt, ap);
     va_end(ap);
-   ralloc_strcat(&state->info_log, "\n");
  }
  
  
@@ -160,16 +186,11 @@ _mesa_glsl_warning(const YYLTYPE *locp, _mesa_glsl_parse_state *state,
                    const char *fmt, ...)
  {
     va_list ap;
+   GLenum type = GL_DEBUG_TYPE_OTHER_ARB;
  
-   assert(state->info_log != NULL);
-   ralloc_asprintf_append(&state->info_log, "%u:%u(%u): warning: ",
-                                           locp->source,
-                                           locp->first_line,
-                                           locp->first_column);
     va_start(ap, fmt);
-   ralloc_vasprintf_append(&state->info_log, fmt, ap);
+   _mesa_glsl_msg(locp, state, type, 0, fmt, ap);
     va_end(ap);
-   ralloc_strcat(&state->info_log, "\n");
  }
  
  
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h

index 55676f5a9ecfa861d3ed56f79f3ab18e5b96673b..1a909c68b7f174bfc5c00f14f54c974f6f8413a9 100644 (file)
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -57,7 +57,7 @@ struct glsl_switch_state {
  };
  
  struct _mesa_glsl_parse_state {
-   _mesa_glsl_parse_state(struct gl_context *ctx, GLenum target,
+   _mesa_glsl_parse_state(struct gl_context *_ctx, GLenum target,
                           void *mem_ctx);
  
     /* Callers of this ralloc-based new need not call delete. It's
@@ -77,6 +77,7 @@ struct _mesa_glsl_parse_state {
        ralloc_free(mem);
     }
  
+   struct gl_context *const ctx;
     void *scanner;
     exec_list translation_unit;
     glsl_symbol_table *symbols;
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp

index 1ba87515ea7024061b3a6905f7f95a7286bd8a20..970d8f3bac094080a346b226f33d4a572ba32c07 100644 (file)
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -716,12 +716,27 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
  ir_constant *
  ir_constant::zero(void *mem_ctx, const glsl_type *type)
  {
-   assert(type->is_numeric() || type->is_boolean());
+   assert(type->is_scalar() || type->is_vector() || type->is_matrix()
+         || type->is_record() || type->is_array());
  
     ir_constant *c = new(mem_ctx) ir_constant;
     c->type = type;
     memset(&c->value, 0, sizeof(c->value));
  
+   if (type->is_array()) {
+      c->array_elements = ralloc_array(c, ir_constant *, type->length);
+
+      for (unsigned i = 0; i < type->length; i++)
+        c->array_elements[i] = ir_constant::zero(c, type->element_type());
+   }
+
+   if (type->is_record()) {
+      for (unsigned i = 0; i < type->length; i++) {
+        ir_constant *comp = ir_constant::zero(mem_ctx, type->fields.structure[i].type);
+        c->components.push_tail(comp);
+      }
+   }
+
     return c;
  }
  
@@ -841,6 +856,95 @@ ir_constant::get_record_field(const char *name)
     return (ir_constant *) node;
  }
  
+void
+ir_constant::copy_offset(ir_constant *src, int offset)
+{
+   switch (this->type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL: {
+      unsigned int size = src->type->components();
+      assert (size <= this->type->components() - offset);
+      for (unsigned int i=0; i<size; i++) {
+        switch (this->type->base_type) {
+        case GLSL_TYPE_UINT:
+           value.u[i+offset] = src->get_uint_component(i);
+           break;
+        case GLSL_TYPE_INT:
+           value.i[i+offset] = src->get_int_component(i);
+           break;
+        case GLSL_TYPE_FLOAT:
+           value.f[i+offset] = src->get_float_component(i);
+           break;
+        case GLSL_TYPE_BOOL:
+           value.b[i+offset] = src->get_bool_component(i);
+           break;
+        default: // Shut up the compiler
+           break;
+        }
+      }
+      break;
+   }
+
+   case GLSL_TYPE_STRUCT: {
+      assert (src->type == this->type);
+      this->components.make_empty();
+      foreach_list(node, &src->components) {
+        ir_constant *const orig = (ir_constant *) node;
+
+        this->components.push_tail(orig->clone(this, NULL));
+      }
+      break;
+   }
+
+   case GLSL_TYPE_ARRAY: {
+      assert (src->type == this->type);
+      for (unsigned i = 0; i < this->type->length; i++) {
+        this->array_elements[i] = src->array_elements[i]->clone(this, NULL);
+      }
+      break;
+   }
+
+   default:
+      assert(!"Should not get here.");
+      break;
+   }
+}
+
+void
+ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask)
+{
+   assert (!type->is_array() && !type->is_record());
+
+   if (!type->is_vector() && !type->is_matrix()) {
+      offset = 0;
+      mask = 1;
+   }
+
+   int id = 0;
+   for (int i=0; i<4; i++) {
+      if (mask & (1 << i)) {
+        switch (this->type->base_type) {
+        case GLSL_TYPE_UINT:
+           value.u[i+offset] = src->get_uint_component(id++);
+           break;
+        case GLSL_TYPE_INT:
+           value.i[i+offset] = src->get_int_component(id++);
+           break;
+        case GLSL_TYPE_FLOAT:
+           value.f[i+offset] = src->get_float_component(id++);
+           break;
+        case GLSL_TYPE_BOOL:
+           value.b[i+offset] = src->get_bool_component(id++);
+           break;
+        default:
+           assert(!"Should not get here.");
+           return;
+        }
+      }
+   }
+}
  
  bool
  ir_constant::has_value(const ir_constant *c) const
@@ -1377,6 +1481,7 @@ ir_function_signature::ir_function_signature(const glsl_type *return_type)
  {
     this->ir_type = ir_type_function_signature;
     this->is_builtin = false;
+   this->origin = NULL;
  }
  
  
diff --git a/src/glsl/ir.h b/src/glsl/ir.h

index ddfaf3614aed3287467e8d62cc6e9d8c96402030..9c7961ab92c8642bb2b481cce3177767e1717ca8 100644 (file)
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -146,7 +146,7 @@ public:
  
     virtual ir_visitor_status accept(ir_hierarchical_visitor *);
  
-   virtual ir_constant *constant_expression_value();
+   virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
  
     virtual ir_rvalue * as_rvalue()
     {
@@ -502,10 +502,11 @@ public:
     virtual ir_visitor_status accept(ir_hierarchical_visitor *);
  
     /**
-    * Attempt to evaluate this function as a constant expression, given
-    * a list of the actual parameters.  Returns NULL for non-built-ins.
+    * Attempt to evaluate this function as a constant expression,
+    * given a list of the actual parameters and the variable context.
+    * Returns NULL for non-built-ins.
      */
-   ir_constant *constant_expression_value(exec_list *actual_parameters);
+   ir_constant *constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context);
  
     /**
      * Get the name of the function for which this is a signature
@@ -571,7 +572,25 @@ private:
     /** Function of which this signature is one overload. */
     class ir_function *_function;
  
+   /** Function signature of which this one is a prototype clone */
+   const ir_function_signature *origin;
+
     friend class ir_function;
+
+   /**
+    * Helper function to run a list of instructions for constant
+    * expression evaluation.
+    *
+    * The hash table represents the values of the visible variables.
+    * There are no scoping issues because the table is indexed on
+    * ir_variable pointers, not variable names.
+    *
+    * Returns false if the expression is not constant, true otherwise,
+    * and the value in *result if result is non-NULL.
+    */
+   bool constant_expression_evaluate_expression_list(const struct exec_list &body,
+                                                    struct hash_table *variable_context,
+                                                    ir_constant **result);
  };
  
  
@@ -763,7 +782,7 @@ public:
  
     virtual ir_assignment *clone(void *mem_ctx, struct hash_table *ht) const;
  
-   virtual ir_constant *constant_expression_value();
+   virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
  
     virtual void accept(ir_visitor *v)
     {
@@ -999,10 +1018,14 @@ public:
     /**
      * Attempt to constant-fold the expression
      *
+    * The "variable_context" hash table links ir_variable * to ir_constant *
+    * that represent the variables' values.  \c NULL represents an empty
+    * context.
+    *
      * If the expression cannot be constant folded, this method will return
      * \c NULL.
      */
-   virtual ir_constant *constant_expression_value();
+   virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
  
     /**
      * Determine the number of operands used by an expression
@@ -1065,7 +1088,7 @@ public:
  
     virtual ir_call *clone(void *mem_ctx, struct hash_table *ht) const;
  
-   virtual ir_constant *constant_expression_value();
+   virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
  
     virtual ir_call *as_call()
     {
@@ -1297,7 +1320,7 @@ public:
  
     virtual ir_texture *clone(void *mem_ctx, struct hash_table *) const;
  
-   virtual ir_constant *constant_expression_value();
+   virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
  
     virtual void accept(ir_visitor *v)
     {
@@ -1389,7 +1412,7 @@ public:
  
     virtual ir_swizzle *clone(void *mem_ctx, struct hash_table *) const;
  
-   virtual ir_constant *constant_expression_value();
+   virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
  
     virtual ir_swizzle *as_swizzle()
     {
@@ -1446,6 +1469,15 @@ public:
      * Get the variable that is ultimately referenced by an r-value
      */
     virtual ir_variable *variable_referenced() const = 0;
+
+   /**
+    * Get the constant that is ultimately referenced by an r-value,
+    * in a constant expression evaluation context.
+    *
+    * The offset is used when the reference is to a specific column of
+    * a matrix.
+    */
+  virtual void constant_referenced(struct hash_table *variable_context, ir_constant *&store, int &offset) const = 0;
  };
  
  
@@ -1456,7 +1488,7 @@ public:
     virtual ir_dereference_variable *clone(void *mem_ctx,
                                           struct hash_table *) const;
  
-   virtual ir_constant *constant_expression_value();
+   virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
  
     virtual ir_dereference_variable *as_dereference_variable()
     {
@@ -1471,6 +1503,15 @@ public:
        return this->var;
     }
  
+   /**
+    * Get the constant that is ultimately referenced by an r-value,
+    * in a constant expression evaluation context.
+    *
+    * The offset is used when the reference is to a specific column of
+    * a matrix.
+    */
+   virtual void constant_referenced(struct hash_table *variable_context, ir_constant *&store, int &offset) const;
+
     virtual ir_variable *whole_variable_referenced()
     {
        /* ir_dereference_variable objects always dereference the entire
@@ -1505,7 +1546,7 @@ public:
     virtual ir_dereference_array *clone(void *mem_ctx,
                                        struct hash_table *) const;
  
-   virtual ir_constant *constant_expression_value();
+   virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
  
     virtual ir_dereference_array *as_dereference_array()
     {
@@ -1520,6 +1561,15 @@ public:
        return this->array->variable_referenced();
     }
  
+   /**
+    * Get the constant that is ultimately referenced by an r-value,
+    * in a constant expression evaluation context.
+    *
+    * The offset is used when the reference is to a specific column of
+    * a matrix.
+    */
+   virtual void constant_referenced(struct hash_table *variable_context, ir_constant *&store, int &offset) const;
+
     virtual void accept(ir_visitor *v)
     {
        v->visit(this);
@@ -1544,7 +1594,7 @@ public:
     virtual ir_dereference_record *clone(void *mem_ctx,
                                         struct hash_table *) const;
  
-   virtual ir_constant *constant_expression_value();
+   virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
  
     /**
      * Get the variable that is ultimately referenced by an r-value
@@ -1554,6 +1604,15 @@ public:
        return this->record->variable_referenced();
     }
  
+   /**
+    * Get the constant that is ultimately referenced by an r-value,
+    * in a constant expression evaluation context.
+    *
+    * The offset is used when the reference is to a specific column of
+    * a matrix.
+    */
+   virtual void constant_referenced(struct hash_table *variable_context, ir_constant *&store, int &offset) const;
+
     virtual void accept(ir_visitor *v)
     {
        v->visit(this);
@@ -1609,7 +1668,7 @@ public:
  
     virtual ir_constant *clone(void *mem_ctx, struct hash_table *) const;
  
-   virtual ir_constant *constant_expression_value();
+   virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
  
     virtual ir_constant *as_constant()
     {
@@ -1641,6 +1700,31 @@ public:
  
     ir_constant *get_record_field(const char *name);
  
+   /**
+    * Copy the values on another constant at a given offset.
+    *
+    * The offset is ignored for array or struct copies, it's only for
+    * scalars or vectors into vectors or matrices.
+    *
+    * With identical types on both sides and zero offset it's clone()
+    * without creating a new object.
+    */
+
+   void copy_offset(ir_constant *src, int offset);
+
+   /**
+    * Copy the values on another constant at a given offset and
+    * following an assign-like mask.
+    *
+    * The mask is ignored for scalars.
+    *
+    * Note that this function only handles what assign can handle,
+    * i.e. at most a vector as source and a column of a matrix as
+    * destination.
+    */
+
+   void copy_masked_offset(ir_constant *src, int offset, unsigned int mask);
+
     /**
      * Determine whether a constant has the same value as another constant
      *
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp

index 5a7a71cf6bade3a6392c07c2a155056eeb768a78..591fe7b7756780f7b6701ae1f4b2efc974dfd42f 100644 (file)
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -53,6 +53,7 @@ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const
     var->invariant = this->invariant;
     var->interpolation = this->interpolation;
     var->location = this->location;
+   var->index = this->index;
     var->warn_extension = this->warn_extension;
     var->origin_upper_left = this->origin_upper_left;
     var->pixel_center_integer = this->pixel_center_integer;
@@ -72,12 +73,6 @@ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const
              sizeof(this->state_slots[0]) * var->num_state_slots);
     }
  
-   if (this->explicit_location)
-      var->location = this->location;
-
-   if (this->explicit_index)
-      var->index = this->index;
-
     if (this->constant_value)
        var->constant_value = this->constant_value->clone(mem_ctx, ht);
  
@@ -329,6 +324,7 @@ ir_function_signature::clone_prototype(void *mem_ctx, struct hash_table *ht) con
  
     copy->is_defined = false;
     copy->is_builtin = this->is_builtin;
+   copy->origin = this;
  
     /* Clone the parameter list, but NOT the body.
      */
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp

index 4e1714a842087bbf338d886811f9d7901c03b88f..08a33285b3a065253fcb0f6fd8bc3ad058663284 100644 (file)
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -38,6 +38,7 @@
  #include "ir.h"
  #include "ir_visitor.h"
  #include "glsl_types.h"
+#include "program/hash_table.h"
  
  /* Using C99 rounding functions for roundToEven() implementation is
   * difficult, because round(), rint, and nearbyint() are affected by
@@ -71,14 +72,14 @@ dot(ir_constant *op0, ir_constant *op1)
  }
  
  ir_constant *
-ir_rvalue::constant_expression_value()
+ir_rvalue::constant_expression_value(struct hash_table *variable_context)
  {
     assert(this->type->is_error());
     return NULL;
  }
  
  ir_constant *
-ir_expression::constant_expression_value()
+ir_expression::constant_expression_value(struct hash_table *variable_context)
  {
     if (this->type->is_error())
        return NULL;
@@ -89,7 +90,7 @@ ir_expression::constant_expression_value()
     memset(&data, 0, sizeof(data));
  
     for (unsigned operand = 0; operand < this->get_num_operands(); operand++) {
-      op[operand] = this->operands[operand]->constant_expression_value();
+      op[operand] = this->operands[operand]->constant_expression_value(variable_context);
        if (!op[operand])
          return NULL;
     }
@@ -640,13 +641,13 @@ ir_expression::constant_expression_value()
        for (unsigned c = 0; c < op[0]->type->components(); c++) {
          switch (op[0]->type->base_type) {
          case GLSL_TYPE_UINT:
-           data.b[0] = op[0]->value.u[0] < op[1]->value.u[0];
+           data.b[c] = op[0]->value.u[c] < op[1]->value.u[c];
             break;
          case GLSL_TYPE_INT:
-           data.b[0] = op[0]->value.i[0] < op[1]->value.i[0];
+           data.b[c] = op[0]->value.i[c] < op[1]->value.i[c];
             break;
          case GLSL_TYPE_FLOAT:
-           data.b[0] = op[0]->value.f[0] < op[1]->value.f[0];
+           data.b[c] = op[0]->value.f[c] < op[1]->value.f[c];
             break;
          default:
             assert(0);
@@ -676,13 +677,13 @@ ir_expression::constant_expression_value()
        for (unsigned c = 0; c < op[0]->type->components(); c++) {
          switch (op[0]->type->base_type) {
          case GLSL_TYPE_UINT:
-           data.b[0] = op[0]->value.u[0] <= op[1]->value.u[0];
+           data.b[c] = op[0]->value.u[c] <= op[1]->value.u[c];
             break;
          case GLSL_TYPE_INT:
-           data.b[0] = op[0]->value.i[0] <= op[1]->value.i[0];
+           data.b[c] = op[0]->value.i[c] <= op[1]->value.i[c];
             break;
          case GLSL_TYPE_FLOAT:
-           data.b[0] = op[0]->value.f[0] <= op[1]->value.f[0];
+           data.b[c] = op[0]->value.f[c] <= op[1]->value.f[c];
             break;
          default:
             assert(0);
@@ -694,13 +695,13 @@ ir_expression::constant_expression_value()
        for (unsigned c = 0; c < op[0]->type->components(); c++) {
          switch (op[0]->type->base_type) {
          case GLSL_TYPE_UINT:
-           data.b[0] = op[0]->value.u[0] >= op[1]->value.u[0];
+           data.b[c] = op[0]->value.u[c] >= op[1]->value.u[c];
             break;
          case GLSL_TYPE_INT:
-           data.b[0] = op[0]->value.i[0] >= op[1]->value.i[0];
+           data.b[c] = op[0]->value.i[c] >= op[1]->value.i[c];
             break;
          case GLSL_TYPE_FLOAT:
-           data.b[0] = op[0]->value.f[0] >= op[1]->value.f[0];
+           data.b[c] = op[0]->value.f[c] >= op[1]->value.f[c];
             break;
          default:
             assert(0);
@@ -886,7 +887,7 @@ ir_expression::constant_expression_value()
  
  
  ir_constant *
-ir_texture::constant_expression_value()
+ir_texture::constant_expression_value(struct hash_table *variable_context)
  {
     /* texture lookups aren't constant expressions */
     return NULL;
@@ -894,9 +895,9 @@ ir_texture::constant_expression_value()
  
  
  ir_constant *
-ir_swizzle::constant_expression_value()
+ir_swizzle::constant_expression_value(struct hash_table *variable_context)
  {
-   ir_constant *v = this->val->constant_expression_value();
+   ir_constant *v = this->val->constant_expression_value(variable_context);
  
     if (v != NULL) {
        ir_constant_data data = { { 0 } };
@@ -922,13 +923,33 @@ ir_swizzle::constant_expression_value()
  }
  
  
+void
+ir_dereference_variable::constant_referenced(struct hash_table *variable_context,
+                                            ir_constant *&store, int &offset) const
+{
+   if (variable_context) {
+      store = (ir_constant *)hash_table_find(variable_context, var);
+      offset = 0;
+   } else {
+      store = NULL;
+      offset = 0;
+   }
+}
+
  ir_constant *
-ir_dereference_variable::constant_expression_value()
+ir_dereference_variable::constant_expression_value(struct hash_table *variable_context)
  {
     /* This may occur during compile and var->type is glsl_type::error_type */
     if (!var)
        return NULL;
  
+   /* Give priority to the context hashtable, if it exists */
+   if (variable_context) {
+      ir_constant *value = (ir_constant *)hash_table_find(variable_context, var);
+      if(value)
+        return value;
+   }
+
     /* The constant_value of a uniform variable is its initializer,
      * not the lifetime constant value of the uniform.
      */
@@ -942,11 +963,65 @@ ir_dereference_variable::constant_expression_value()
  }
  
  
+void
+ir_dereference_array::constant_referenced(struct hash_table *variable_context,
+                                         ir_constant *&store, int &offset) const
+{
+   ir_constant *index_c = array_index->constant_expression_value(variable_context);
+
+   if (!index_c || !index_c->type->is_scalar() || !index_c->type->is_integer()) {
+      store = 0;
+      offset = 0;
+      return;
+   }
+
+   int index = index_c->type->base_type == GLSL_TYPE_INT ?
+      index_c->get_int_component(0) :
+      index_c->get_uint_component(0);
+
+   ir_constant *substore;
+   int suboffset;
+   const ir_dereference *deref = array->as_dereference();
+   if (!deref) {
+      store = 0;
+      offset = 0;
+      return;
+   }
+
+   deref->constant_referenced(variable_context, substore, suboffset);
+
+   if (!substore) {
+      store = 0;
+      offset = 0;
+      return;
+   }
+
+   const glsl_type *vt = substore->type;
+   if (vt->is_array()) {
+      store = substore->get_array_element(index);
+      offset = 0;
+      return;
+   }
+   if (vt->is_matrix()) {
+      store = substore;
+      offset = index * vt->vector_elements;
+      return;
+   }
+   if (vt->is_vector()) {
+      store = substore;
+      offset = suboffset + index;
+      return;
+   }
+
+   store = 0;
+   offset = 0;
+}
+
  ir_constant *
-ir_dereference_array::constant_expression_value()
+ir_dereference_array::constant_expression_value(struct hash_table *variable_context)
  {
-   ir_constant *array = this->array->constant_expression_value();
-   ir_constant *idx = this->array_index->constant_expression_value();
+   ir_constant *array = this->array->constant_expression_value(variable_context);
+   ir_constant *idx = this->array_index->constant_expression_value(variable_context);
  
     if ((array != NULL) && (idx != NULL)) {
        void *ctx = ralloc_parent(this);
@@ -997,8 +1072,33 @@ ir_dereference_array::constant_expression_value()
  }
  
  
+void
+ir_dereference_record::constant_referenced(struct hash_table *variable_context,
+                                          ir_constant *&store, int &offset) const
+{
+   ir_constant *substore;
+   int suboffset;
+   const ir_dereference *deref = record->as_dereference();
+   if (!deref) {
+      store = 0;
+      offset = 0;
+      return;
+   }
+
+   deref->constant_referenced(variable_context, substore, suboffset);
+
+   if (!substore) {
+      store = 0;
+      offset = 0;
+      return;
+   }
+
+   store = substore->get_record_field(field);
+   offset = 0;
+}
+
  ir_constant *
-ir_dereference_record::constant_expression_value()
+ir_dereference_record::constant_expression_value(struct hash_table *variable_context)
  {
     ir_constant *v = this->record->constant_expression_value();
  
@@ -1007,7 +1107,7 @@ ir_dereference_record::constant_expression_value()
  
  
  ir_constant *
-ir_assignment::constant_expression_value()
+ir_assignment::constant_expression_value(struct hash_table *variable_context)
  {
     /* FINISHME: Handle CEs involving assignment (return RHS) */
     return NULL;
@@ -1015,21 +1115,130 @@ ir_assignment::constant_expression_value()
  
  
  ir_constant *
-ir_constant::constant_expression_value()
+ir_constant::constant_expression_value(struct hash_table *variable_context)
  {
     return this;
  }
  
  
  ir_constant *
-ir_call::constant_expression_value()
+ir_call::constant_expression_value(struct hash_table *variable_context)
  {
-   return this->callee->constant_expression_value(&this->actual_parameters);
+   return this->callee->constant_expression_value(&this->actual_parameters, variable_context);
  }
  
  
+bool ir_function_signature::constant_expression_evaluate_expression_list(const struct exec_list &body,
+                                                                        struct hash_table *variable_context,
+                                                                        ir_constant **result)
+{
+   foreach_list(n, &body) {
+      ir_instruction *inst = (ir_instruction *)n;
+      switch(inst->ir_type) {
+
+        /* (declare () type symbol) */
+      case ir_type_variable: {
+        ir_variable *var = inst->as_variable();
+        hash_table_insert(variable_context, ir_constant::zero(this, var->type), var);
+        break;
+      }
+
+        /* (assign [condition] (write-mask) (ref) (value)) */
+      case ir_type_assignment: {
+        ir_assignment *asg = inst->as_assignment();
+        if (asg->condition) {
+           ir_constant *cond = asg->condition->constant_expression_value(variable_context);
+           if (!cond)
+              return false;
+           if (!cond->get_bool_component(0))
+              break;
+        }
+
+        ir_constant *store = NULL;
+        int offset = 0;
+        asg->lhs->constant_referenced(variable_context, store, offset);
+
+        if (!store)
+           return false;
+
+        ir_constant *value = asg->rhs->constant_expression_value(variable_context);
+
+        if (!value)
+           return false;
+
+        store->copy_masked_offset(value, offset, asg->write_mask);
+        break;
+      }
+
+        /* (return (expression)) */
+      case ir_type_return:
+        assert (result);
+        *result = inst->as_return()->value->constant_expression_value(variable_context);
+        return *result != NULL;
+
+        /* (call name (ref) (params))*/
+      case ir_type_call: {
+        ir_call *call = inst->as_call();
+
+        /* Just say no to void functions in constant expressions.  We
+         * don't need them at that point.
+         */
+
+        if (!call->return_deref)
+           return false;
+
+        ir_constant *store = NULL;
+        int offset = 0;
+        call->return_deref->constant_referenced(variable_context, store, offset);
+
+        if (!store)
+           return false;
+
+        ir_constant *value = call->constant_expression_value(variable_context);
+
+        if(!value)
+           return false;
+
+        store->copy_offset(value, offset);
+        break;
+      }
+
+        /* (if condition (then-instructions) (else-instructions)) */
+      case ir_type_if: {
+        ir_if *iif = inst->as_if();
+
+        ir_constant *cond = iif->condition->constant_expression_value(variable_context);
+        if (!cond || !cond->type->is_boolean())
+           return false;
+
+        exec_list &branch = cond->get_bool_component(0) ? iif->then_instructions : iif->else_instructions;
+
+        *result = NULL;
+        if (!constant_expression_evaluate_expression_list(branch, variable_context, result))
+           return false;
+
+        /* If there was a return in the branch chosen, drop out now. */
+        if (*result)
+           return true;
+
+        break;
+      }
+
+        /* Every other expression type, we drop out. */
+      default:
+        return false;
+      }
+   }
+
+   /* Reaching the end of the block is not an error condition */
+   if (result)
+      *result = NULL;
+
+   return true;
+}
+
  ir_constant *
-ir_function_signature::constant_expression_value(exec_list *actual_parameters)
+ir_function_signature::constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context)
  {
     const glsl_type *type = this->return_type;
     if (type == glsl_type::void_type)
@@ -1042,396 +1251,48 @@ ir_function_signature::constant_expression_value(exec_list *actual_parameters)
     if (!this->is_builtin)
        return NULL;
  
-   unsigned num_parameters = 0;
+   /*
+    * Of the builtin functions, only the texture lookups and the noise
+    * ones must not be used in constant expressions.  They all include
+    * specific opcodes so they don't need to be special-cased at this
+    * point.
+    */
+
+   /* Initialize the table of dereferencable names with the function
+    * parameters.  Verify their const-ness on the way.
+    *
+    * We expect the correctness of the number of parameters to have
+    * been checked earlier.
+    */
+   hash_table *deref_hash = hash_table_ctor(8, hash_table_pointer_hash,
+                                           hash_table_pointer_compare);
+
+   /* If "origin" is non-NULL, then the function body is there.  So we
+    * have to use the variable objects from the object with the body,
+    * but the parameter instanciation on the current object.
+    */
+   const exec_node *parameter_info = origin ? origin->parameters.head : parameters.head;
  
-   /* Check if all parameters are constant */
-   ir_constant *op[3];
     foreach_list(n, actual_parameters) {
-      ir_constant *constant = ((ir_rvalue *) n)->constant_expression_value();
+      ir_constant *constant = ((ir_rvalue *) n)->constant_expression_value(variable_context);
        if (constant == NULL)
          return NULL;
  
-      op[num_parameters] = constant;
+      ir_variable *var = (ir_variable *)parameter_info;
+      hash_table_insert(deref_hash, constant, var);
  
-      assert(num_parameters < 3);
-      num_parameters++;
+      parameter_info = parameter_info->next;
     }
  
-   /* Individual cases below can either:
-    * - Assign "expr" a new ir_expression to evaluate (for basic opcodes)
-    * - Fill "data" with appopriate constant data
-    * - Return an ir_constant directly.
-    */
-   void *mem_ctx = ralloc_parent(this);
-   ir_expression *expr = NULL;
-
-   ir_constant_data data;
-   memset(&data, 0, sizeof(data));
-
-   const char *callee = this->function_name();
-   if (strcmp(callee, "abs") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_abs, type, op[0], NULL);
-   } else if (strcmp(callee, "all") == 0) {
-      assert(op[0]->type->is_boolean());
-      for (unsigned c = 0; c < op[0]->type->components(); c++) {
-        if (!op[0]->value.b[c])
-           return new(mem_ctx) ir_constant(false);
-      }
-      return new(mem_ctx) ir_constant(true);
-   } else if (strcmp(callee, "any") == 0) {
-      assert(op[0]->type->is_boolean());
-      for (unsigned c = 0; c < op[0]->type->components(); c++) {
-        if (op[0]->value.b[c])
-           return new(mem_ctx) ir_constant(true);
-      }
-      return new(mem_ctx) ir_constant(false);
-   } else if (strcmp(callee, "acos") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = acosf(op[0]->value.f[c]);
-   } else if (strcmp(callee, "acosh") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = acoshf(op[0]->value.f[c]);
-   } else if (strcmp(callee, "asin") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = asinf(op[0]->value.f[c]);
-   } else if (strcmp(callee, "asinh") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = asinhf(op[0]->value.f[c]);
-   } else if (strcmp(callee, "atan") == 0) {
-      assert(op[0]->type->is_float());
-      if (num_parameters == 2) {
-        assert(op[1]->type->is_float());
-        for (unsigned c = 0; c < op[0]->type->components(); c++)
-           data.f[c] = atan2f(op[0]->value.f[c], op[1]->value.f[c]);
-      } else {
-        for (unsigned c = 0; c < op[0]->type->components(); c++)
-           data.f[c] = atanf(op[0]->value.f[c]);
-      }
-   } else if (strcmp(callee, "atanh") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = atanhf(op[0]->value.f[c]);
-   } else if (strcmp(callee, "dFdx") == 0 || strcmp(callee, "dFdy") == 0) {
-      return ir_constant::zero(mem_ctx, type);
-   } else if (strcmp(callee, "ceil") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_ceil, type, op[0], NULL);
-   } else if (strcmp(callee, "clamp") == 0) {
-      assert(num_parameters == 3);
-      unsigned c1_inc = op[1]->type->is_scalar() ? 0 : 1;
-      unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1;
-      for (unsigned c = 0, c1 = 0, c2 = 0;
-          c < op[0]->type->components();
-          c1 += c1_inc, c2 += c2_inc, c++) {
-
-        switch (op[0]->type->base_type) {
-        case GLSL_TYPE_UINT:
-           data.u[c] = CLAMP(op[0]->value.u[c], op[1]->value.u[c1],
-                             op[2]->value.u[c2]);
-           break;
-        case GLSL_TYPE_INT:
-           data.i[c] = CLAMP(op[0]->value.i[c], op[1]->value.i[c1],
-                             op[2]->value.i[c2]);
-           break;
-        case GLSL_TYPE_FLOAT:
-           data.f[c] = CLAMP(op[0]->value.f[c], op[1]->value.f[c1],
-                             op[2]->value.f[c2]);
-           break;
-        default:
-           assert(!"Should not get here.");
-        }
-      }
-   } else if (strcmp(callee, "cos") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_cos, type, op[0], NULL);
-   } else if (strcmp(callee, "cosh") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = coshf(op[0]->value.f[c]);
-   } else if (strcmp(callee, "cross") == 0) {
-      assert(op[0]->type == glsl_type::vec3_type);
-      assert(op[1]->type == glsl_type::vec3_type);
-      data.f[0] = (op[0]->value.f[1] * op[1]->value.f[2] -
-                  op[1]->value.f[1] * op[0]->value.f[2]);
-      data.f[1] = (op[0]->value.f[2] * op[1]->value.f[0] -
-                  op[1]->value.f[2] * op[0]->value.f[0]);
-      data.f[2] = (op[0]->value.f[0] * op[1]->value.f[1] -
-                  op[1]->value.f[0] * op[0]->value.f[1]);
-   } else if (strcmp(callee, "degrees") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = 180.0F / M_PI * op[0]->value.f[c];
-   } else if (strcmp(callee, "distance") == 0) {
-      assert(op[0]->type->is_float() && op[1]->type->is_float());
-      float length_squared = 0.0;
-      for (unsigned c = 0; c < op[0]->type->components(); c++) {
-        float t = op[0]->value.f[c] - op[1]->value.f[c];
-        length_squared += t * t;
-      }
-      return new(mem_ctx) ir_constant(sqrtf(length_squared));
-   } else if (strcmp(callee, "dot") == 0) {
-      return new(mem_ctx) ir_constant(dot(op[0], op[1]));
-   } else if (strcmp(callee, "equal") == 0) {
-      assert(op[0]->type->is_vector() && op[1] && op[1]->type->is_vector());
-      for (unsigned c = 0; c < op[0]->type->components(); c++) {
-        switch (op[0]->type->base_type) {
-        case GLSL_TYPE_UINT:
-           data.b[c] = op[0]->value.u[c] == op[1]->value.u[c];
-           break;
-        case GLSL_TYPE_INT:
-           data.b[c] = op[0]->value.i[c] == op[1]->value.i[c];
-           break;
-        case GLSL_TYPE_FLOAT:
-           data.b[c] = op[0]->value.f[c] == op[1]->value.f[c];
-           break;
-        case GLSL_TYPE_BOOL:
-           data.b[c] = op[0]->value.b[c] == op[1]->value.b[c];
-           break;
-        default:
-           assert(!"Should not get here.");
-        }
-      }
-   } else if (strcmp(callee, "exp") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_exp, type, op[0], NULL);
-   } else if (strcmp(callee, "exp2") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_exp2, type, op[0], NULL);
-   } else if (strcmp(callee, "faceforward") == 0) {
-      if (dot(op[2], op[1]) < 0)
-        return op[0];
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = -op[0]->value.f[c];
-   } else if (strcmp(callee, "floor") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_floor, type, op[0], NULL);
-   } else if (strcmp(callee, "fract") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_fract, type, op[0], NULL);
-   } else if (strcmp(callee, "fwidth") == 0) {
-      return ir_constant::zero(mem_ctx, type);
-   } else if (strcmp(callee, "greaterThan") == 0) {
-      assert(op[0]->type->is_vector() && op[1] && op[1]->type->is_vector());
-      for (unsigned c = 0; c < op[0]->type->components(); c++) {
-        switch (op[0]->type->base_type) {
-        case GLSL_TYPE_UINT:
-           data.b[c] = op[0]->value.u[c] > op[1]->value.u[c];
-           break;
-        case GLSL_TYPE_INT:
-           data.b[c] = op[0]->value.i[c] > op[1]->value.i[c];
-           break;
-        case GLSL_TYPE_FLOAT:
-           data.b[c] = op[0]->value.f[c] > op[1]->value.f[c];
-           break;
-        default:
-           assert(!"Should not get here.");
-        }
-      }
-   } else if (strcmp(callee, "greaterThanEqual") == 0) {
-      assert(op[0]->type->is_vector() && op[1] && op[1]->type->is_vector());
-      for (unsigned c = 0; c < op[0]->type->components(); c++) {
-        switch (op[0]->type->base_type) {
-        case GLSL_TYPE_UINT:
-           data.b[c] = op[0]->value.u[c] >= op[1]->value.u[c];
-           break;
-        case GLSL_TYPE_INT:
-           data.b[c] = op[0]->value.i[c] >= op[1]->value.i[c];
-           break;
-        case GLSL_TYPE_FLOAT:
-           data.b[c] = op[0]->value.f[c] >= op[1]->value.f[c];
-           break;
-        default:
-           assert(!"Should not get here.");
-        }
-      }
-   } else if (strcmp(callee, "inversesqrt") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_rsq, type, op[0], NULL);
-   } else if (strcmp(callee, "length") == 0) {
-      return new(mem_ctx) ir_constant(sqrtf(dot(op[0], op[0])));
-   } else if (strcmp(callee, "lessThan") == 0) {
-      assert(op[0]->type->is_vector() && op[1] && op[1]->type->is_vector());
-      for (unsigned c = 0; c < op[0]->type->components(); c++) {
-        switch (op[0]->type->base_type) {
-        case GLSL_TYPE_UINT:
-           data.b[c] = op[0]->value.u[c] < op[1]->value.u[c];
-           break;
-        case GLSL_TYPE_INT:
-           data.b[c] = op[0]->value.i[c] < op[1]->value.i[c];
-           break;
-        case GLSL_TYPE_FLOAT:
-           data.b[c] = op[0]->value.f[c] < op[1]->value.f[c];
-           break;
-        default:
-           assert(!"Should not get here.");
-        }
-      }
-   } else if (strcmp(callee, "lessThanEqual") == 0) {
-      assert(op[0]->type->is_vector() && op[1] && op[1]->type->is_vector());
-      for (unsigned c = 0; c < op[0]->type->components(); c++) {
-        switch (op[0]->type->base_type) {
-        case GLSL_TYPE_UINT:
-           data.b[c] = op[0]->value.u[c] <= op[1]->value.u[c];
-           break;
-        case GLSL_TYPE_INT:
-           data.b[c] = op[0]->value.i[c] <= op[1]->value.i[c];
-           break;
-        case GLSL_TYPE_FLOAT:
-           data.b[c] = op[0]->value.f[c] <= op[1]->value.f[c];
-           break;
-        default:
-           assert(!"Should not get here.");
-        }
-      }
-   } else if (strcmp(callee, "log") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_log, type, op[0], NULL);
-   } else if (strcmp(callee, "log2") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_log2, type, op[0], NULL);
-   } else if (strcmp(callee, "matrixCompMult") == 0) {
-      assert(op[0]->type->is_float() && op[1]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = op[0]->value.f[c] * op[1]->value.f[c];
-   } else if (strcmp(callee, "max") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_binop_max, type, op[0], op[1]);
-   } else if (strcmp(callee, "min") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_binop_min, type, op[0], op[1]);
-   } else if (strcmp(callee, "mix") == 0) {
-      assert(op[0]->type->is_float() && op[1]->type->is_float());
-      if (op[2]->type->is_float()) {
-        unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1;
-        unsigned components = op[0]->type->components();
-        for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) {
-           data.f[c] = op[0]->value.f[c] * (1 - op[2]->value.f[c2]) +
-                       op[1]->value.f[c] * op[2]->value.f[c2];
-        }
-      } else {
-        assert(op[2]->type->is_boolean());
-        for (unsigned c = 0; c < op[0]->type->components(); c++)
-           data.f[c] = op[op[2]->value.b[c] ? 1 : 0]->value.f[c];
-      }
-   } else if (strcmp(callee, "mod") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_binop_mod, type, op[0], op[1]);
-   } else if (strcmp(callee, "normalize") == 0) {
-      assert(op[0]->type->is_float());
-      float length = sqrtf(dot(op[0], op[0]));
-
-      if (length == 0)
-        return ir_constant::zero(mem_ctx, type);
+   ir_constant *result = NULL;
  
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = op[0]->value.f[c] / length;
-   } else if (strcmp(callee, "not") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_logic_not, type, op[0], NULL);
-   } else if (strcmp(callee, "notEqual") == 0) {
-      assert(op[0]->type->is_vector() && op[1] && op[1]->type->is_vector());
-      for (unsigned c = 0; c < op[0]->type->components(); c++) {
-        switch (op[0]->type->base_type) {
-        case GLSL_TYPE_UINT:
-           data.b[c] = op[0]->value.u[c] != op[1]->value.u[c];
-           break;
-        case GLSL_TYPE_INT:
-           data.b[c] = op[0]->value.i[c] != op[1]->value.i[c];
-           break;
-        case GLSL_TYPE_FLOAT:
-           data.b[c] = op[0]->value.f[c] != op[1]->value.f[c];
-           break;
-        case GLSL_TYPE_BOOL:
-           data.b[c] = op[0]->value.b[c] != op[1]->value.b[c];
-           break;
-        default:
-           assert(!"Should not get here.");
-        }
-      }
-   } else if (strcmp(callee, "outerProduct") == 0) {
-      assert(op[0]->type->is_vector() && op[1]->type->is_vector());
-      const unsigned m = op[0]->type->vector_elements;
-      const unsigned n = op[1]->type->vector_elements;
-      for (unsigned j = 0; j < n; j++) {
-        for (unsigned i = 0; i < m; i++) {
-           data.f[i+m*j] = op[0]->value.f[i] * op[1]->value.f[j];
-        }
-      }
-   } else if (strcmp(callee, "pow") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_binop_pow, type, op[0], op[1]);
-   } else if (strcmp(callee, "radians") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = M_PI / 180.0F * op[0]->value.f[c];
-   } else if (strcmp(callee, "reflect") == 0) {
-      assert(op[0]->type->is_float());
-      float dot_NI = dot(op[1], op[0]);
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = op[0]->value.f[c] - 2 * dot_NI * op[1]->value.f[c];
-   } else if (strcmp(callee, "refract") == 0) {
-      const float eta = op[2]->value.f[0];
-      const float dot_NI = dot(op[1], op[0]);
-      const float k = 1.0F - eta * eta * (1.0F - dot_NI * dot_NI);
-      if (k < 0.0) {
-        return ir_constant::zero(mem_ctx, type);
-      } else {
-        for (unsigned c = 0; c < type->components(); c++) {
-           data.f[c] = eta * op[0]->value.f[c] - (eta * dot_NI + sqrtf(k))
-                           * op[1]->value.f[c];
-        }
-      }
-   } else if (strcmp(callee, "round") == 0 ||
-             strcmp(callee, "roundEven") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_round_even, op[0]);
-   } else if (strcmp(callee, "sign") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_sign, type, op[0], NULL);
-   } else if (strcmp(callee, "sin") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_sin, type, op[0], NULL);
-   } else if (strcmp(callee, "sinh") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = sinhf(op[0]->value.f[c]);
-   } else if (strcmp(callee, "smoothstep") == 0) {
-      assert(num_parameters == 3);
-      assert(op[1]->type == op[0]->type);
-      unsigned edge_inc = op[0]->type->is_scalar() ? 0 : 1;
-      for (unsigned c = 0, e = 0; c < type->components(); e += edge_inc, c++) {
-        const float edge0 = op[0]->value.f[e];
-        const float edge1 = op[1]->value.f[e];
-        if (edge0 == edge1) {
-           data.f[c] = 0.0; /* Avoid a crash - results are undefined anyway */
-        } else {
-           const float numerator = op[2]->value.f[c] - edge0;
-           const float denominator = edge1 - edge0;
-           const float t = CLAMP(numerator/denominator, 0, 1);
-           data.f[c] = t * t * (3 - 2 * t);
-        }
-      }
-   } else if (strcmp(callee, "sqrt") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_sqrt, type, op[0], NULL);
-   } else if (strcmp(callee, "step") == 0) {
-      assert(op[0]->type->is_float() && op[1]->type->is_float());
-      /* op[0] (edge) may be either a scalar or a vector */
-      const unsigned c0_inc = op[0]->type->is_scalar() ? 0 : 1;
-      for (unsigned c = 0, c0 = 0; c < type->components(); c0 += c0_inc, c++)
-        data.f[c] = (op[1]->value.f[c] < op[0]->value.f[c0]) ? 0.0F : 1.0F;
-   } else if (strcmp(callee, "tan") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = tanf(op[0]->value.f[c]);
-   } else if (strcmp(callee, "tanh") == 0) {
-      assert(op[0]->type->is_float());
-      for (unsigned c = 0; c < op[0]->type->components(); c++)
-        data.f[c] = tanhf(op[0]->value.f[c]);
-   } else if (strcmp(callee, "transpose") == 0) {
-      assert(op[0]->type->is_matrix());
-      const unsigned n = op[0]->type->vector_elements;
-      const unsigned m = op[0]->type->matrix_columns;
-      for (unsigned j = 0; j < m; j++) {
-        for (unsigned i = 0; i < n; i++) {
-           data.f[m*i+j] += op[0]->value.f[i+n*j];
-        }
-      }
-   } else if (strcmp(callee, "trunc") == 0) {
-      expr = new(mem_ctx) ir_expression(ir_unop_trunc, op[0]);
-   } else {
-      /* Unsupported builtin - some are not allowed in constant expressions. */
-      return NULL;
-   }
+   /* Now run the builtin function until something non-constant
+    * happens or we get the result.
+    */
+   if (constant_expression_evaluate_expression_list(origin ? origin->body : body, deref_hash, &result) && result)
+      result = result->clone(ralloc_parent(this), NULL);
  
-   if (expr != NULL)
-      return expr->constant_expression_value();
+   hash_table_dtor(deref_hash);
  
-   return new(mem_ctx) ir_constant(type, &data);
+   return result;
  }
diff --git a/src/glsl/opt_copy_propagation_elements.cpp b/src/glsl/opt_copy_propagation_elements.cpp

index 314db4e187ff7d74f8fed2d5b56fb3d6c2c4e071..11d9d7baf7294d459060301312111b4848f83599 100644 (file)
--- a/src/glsl/opt_copy_propagation_elements.cpp
+++ b/src/glsl/opt_copy_propagation_elements.cpp
@@ -93,6 +93,7 @@ public:
     ir_copy_propagation_elements_visitor()
     {
        this->progress = false;
+      this->killed_all = false;
        this->mem_ctx = ralloc_context(NULL);
        this->shader_mem_ctx = NULL;
        this->acp = new(mem_ctx) exec_list;
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp

index 24cc64ad97b172a525726e471dce394428335508..f15f2d882debee728cfc1d99c9d96212017791ab 100644 (file)
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -41,6 +41,12 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
     *ptr = sh;
  }
  
+void
+_mesa_shader_debug(struct gl_context *, GLenum, GLuint,
+                   const char *, int)
+{
+}
+
  struct gl_shader *
  _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type)
  {
diff --git a/src/glsl/standalone_scaffolding.h b/src/glsl/standalone_scaffolding.h

index 877332006704beda186abe7c23c1439442cd9d57..41ce35befc628c2f8ab160940502077c6e52c1db 100644 (file)
--- a/src/glsl/standalone_scaffolding.h
+++ b/src/glsl/standalone_scaffolding.h
@@ -40,6 +40,10 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
  extern "C" struct gl_shader *
  _mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type);
  
+extern "C" void
+_mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint id,
+                   const char *msg, int len);
+
  /**
   * Initialize the given gl_context structure to a reasonable set of
   * defaults representing the minimum capabilities required by the
diff --git a/src/glx/apple/Makefile b/src/glx/apple/Makefile

index dc64295f73c907c4d85c0f8de9b1e24682bd1e7c..68fe6ad745ba9479d8871e27062bd6872dbfafe3 100644 (file)
--- a/src/glx/apple/Makefile
+++ b/src/glx/apple/Makefile
@@ -26,6 +26,7 @@ SOURCES = \
         apple_glx.c \
         apple_glx_context.c \
         apple_glx_drawable.c \
+       apple_glx_log.c \
         apple_glx_pbuffer.c \
         apple_glx_pixmap.c \
         apple_glx_surface.c \
diff --git a/src/glx/apple/apple_glx.c b/src/glx/apple/apple_glx.c

index d94c1e0fb16c0ed6b1a766cce971fb90bd86397c..56cff64a15be7408104c91350a0167117d2a997f 100644 (file)
--- a/src/glx/apple/apple_glx.c
+++ b/src/glx/apple/apple_glx.c
@@ -33,6 +33,8 @@
  #include <assert.h>
  #include <stdarg.h>
  #include <dlfcn.h>
+#include <pthread.h>
+#include <inttypes.h>
  #include "appledri.h"
  #include "apple_glx.h"
  #include "apple_glx_context.h"
@@ -43,22 +45,6 @@ static int dri_event_base = 0;
  
  const GLuint __glXDefaultPixelStore[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 1 };
  
-static bool diagnostic = false;
-
-void
-apple_glx_diagnostic(const char *fmt, ...)
-{
-   va_list vl;
-
-   if (diagnostic) {
-      fprintf(stderr, "DIAG: ");
-
-      va_start(vl, fmt);
-      vfprintf(stderr, fmt, vl);
-      va_end(vl);
-   }
-}
-
  int
  apple_get_dri_event_base(void)
  {
@@ -125,10 +111,9 @@ apple_init_glx(Display * dpy)
     if (initialized)
        return false;
  
-   if (getenv("LIBGL_DIAGNOSTIC")) {
-      printf("initializing libGL in %s\n", __func__);
-      diagnostic = true;
-   }
+   apple_glx_log_init();
+
+   apple_glx_log(ASL_LEVEL_INFO, "Initializing libGL.");
  
     apple_cgl_init();
     (void) apple_glx_get_client_id();
diff --git a/src/glx/apple/apple_glx.h b/src/glx/apple/apple_glx.h

index ce8c4884d1aaea63c39358c64147a2a95def7329..0967f1812a05ba3bf551adc774aaf0b4cc0b8038 100644 (file)
--- a/src/glx/apple/apple_glx.h
+++ b/src/glx/apple/apple_glx.h
@@ -38,7 +38,8 @@
  #define XP_NO_X_HEADERS
  #include <Xplugin.h>
  
-void apple_glx_diagnostic(const char *fmt, ...);
+#include "apple_glx_log.h"
+
  xp_client_id apple_glx_get_client_id(void);
  bool apple_init_glx(Display * dpy);
  void apple_glx_swap_buffers(void *ptr);
diff --git a/src/glx/apple/apple_glx_context.c b/src/glx/apple/apple_glx_context.c

index c58d05a59afaf342e511e27b631376994d1c4971..0bb25b425753b6f93f73194e768a8c024ad6194e 100644 (file)
--- a/src/glx/apple/apple_glx_context.c
+++ b/src/glx/apple/apple_glx_context.c
@@ -421,7 +421,7 @@ apple_glx_make_current_context(Display * dpy, void *oldptr, void *ptr,
      */
  
     if (same_drawable && ac->is_current) {
-      apple_glx_diagnostic("%s: same_drawable and ac->is_current\n");
+      apple_glx_diagnostic("same_drawable and ac->is_current\n");
        return false;
     }
  
diff --git a/src/glx/apple/apple_glx_drawable.c b/src/glx/apple/apple_glx_drawable.c

index db283023a63bfcc56ae85f48e160cda94943f814..3f84d560c539573aed6a16e574aab0ca001e45a4 100644 (file)
--- a/src/glx/apple/apple_glx_drawable.c
+++ b/src/glx/apple/apple_glx_drawable.c
@@ -32,6 +32,7 @@
  #include <stdlib.h>
  #include <assert.h>
  #include <pthread.h>
+#include <string.h>
  #include "apple_glx.h"
  #include "apple_glx_context.h"
  #include "apple_glx_drawable.h"
@@ -48,8 +49,8 @@ lock_drawables_list(void)
     err = pthread_mutex_lock(&drawables_lock);
  
     if (err) {
-      fprintf(stderr, "pthread_mutex_lock failure in %s: %d\n",
-              __func__, err);
+      fprintf(stderr, "pthread_mutex_lock failure in %s: %s\n",
+              __func__, strerror(err));
        abort();
     }
  }
@@ -62,8 +63,8 @@ unlock_drawables_list(void)
     err = pthread_mutex_unlock(&drawables_lock);
  
     if (err) {
-      fprintf(stderr, "pthread_mutex_unlock failure in %s: %d\n",
-              __func__, err);
+      fprintf(stderr, "pthread_mutex_unlock failure in %s: %s\n",
+              __func__, strerror(err));
        abort();
     }
  }
@@ -95,7 +96,7 @@ drawable_lock(struct apple_glx_drawable *agd)
     err = pthread_mutex_lock(&agd->mutex);
  
     if (err) {
-      fprintf(stderr, "pthread_mutex_lock error: %d\n", err);
+      fprintf(stderr, "pthread_mutex_lock error: %s\n", strerror(err));
        abort();
     }
  }
@@ -108,7 +109,7 @@ drawable_unlock(struct apple_glx_drawable *d)
     err = pthread_mutex_unlock(&d->mutex);
  
     if (err) {
-      fprintf(stderr, "pthread_mutex_unlock error: %d\n", err);
+      fprintf(stderr, "pthread_mutex_unlock error: %s\n", strerror(err));
        abort();
     }
  }
@@ -245,7 +246,7 @@ common_init(Display * dpy, GLXDrawable drawable, struct apple_glx_drawable *d)
     err = pthread_mutexattr_init(&attr);
  
     if (err) {
-      fprintf(stderr, "pthread_mutexattr_init error: %d\n", err);
+      fprintf(stderr, "pthread_mutexattr_init error: %s\n", strerror(err));
        abort();
     }
  
@@ -257,14 +258,14 @@ common_init(Display * dpy, GLXDrawable drawable, struct apple_glx_drawable *d)
     err = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
  
     if (err) {
-      fprintf(stderr, "error: setting pthread mutex type: %d\n", err);
+      fprintf(stderr, "error: setting pthread mutex type: %s\n", strerror(err));
        abort();
     }
  
     err = pthread_mutex_init(&d->mutex, &attr);
  
     if (err) {
-      fprintf(stderr, "pthread_mutex_init error: %d\n", err);
+      fprintf(stderr, "pthread_mutex_init error: %s\n", strerror(err));
        abort();
     }
  
diff --git a/src/glx/apple/apple_glx_log.c b/src/glx/apple/apple_glx_log.c

new file mode 100644 (file)

index 0000000..9ebf666
--- /dev/null
+++ b/src/glx/apple/apple_glx_log.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2012 Apple Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT
+ * HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name(s) of the above
+ * copyright holders shall not be used in advertising or otherwise to
+ * promote the sale, use or other dealings in this Software without
+ * prior written authorization.
+ */
+
+#include <sys/cdefs.h>
+#include <asl.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include "apple_glx_log.h"
+
+static bool diagnostic = false;
+static aslclient aslc;
+
+void apple_glx_log_init(void) {
+    if (getenv("LIBGL_DIAGNOSTIC")) {
+        diagnostic = true;
+    }
+
+    aslc = asl_open(NULL, NULL, 0);
+}
+
+void _apple_glx_log(int level, const char *file, const char *function,
+                    int line, const char *fmt, ...) {
+    va_list v;
+    va_start(v, fmt);
+    _apple_glx_vlog(level, file, function, line, fmt, v);
+    va_end(v);
+}
+
+static const char *
+_asl_level_string(int level)
+{
+        if (level == ASL_LEVEL_EMERG) return ASL_STRING_EMERG;
+        if (level == ASL_LEVEL_ALERT) return ASL_STRING_ALERT;
+        if (level == ASL_LEVEL_CRIT) return ASL_STRING_CRIT;
+        if (level == ASL_LEVEL_ERR) return ASL_STRING_ERR;
+        if (level == ASL_LEVEL_WARNING) return ASL_STRING_WARNING;
+        if (level == ASL_LEVEL_NOTICE) return ASL_STRING_NOTICE;
+        if (level == ASL_LEVEL_INFO) return ASL_STRING_INFO;
+        if (level == ASL_LEVEL_DEBUG) return ASL_STRING_DEBUG;
+        return "unknown";
+}
+
+void _apple_glx_vlog(int level, const char *file, const char *function,
+                     int line, const char *fmt, va_list args) {
+    aslmsg msg;
+    uint64_t thread = 0;
+
+    if (pthread_is_threaded_np()) {
+        pthread_threadid_np(NULL, &thread);
+    }
+
+    if (diagnostic) {
+        va_list args2;
+        va_copy(args2, args);
+
+        fprintf(stderr, "%-9s %24s:%-4d %s(%"PRIu64"): ",
+                _asl_level_string(level), file, line, function, thread);
+        vfprintf(stderr, fmt, args2);
+    }
+
+    msg = asl_new(ASL_TYPE_MSG);
+    if (msg) {
+        if (file)
+            asl_set(msg, "File", file);
+        if (function)
+            asl_set(msg, "Function", function);
+        if (line) {
+            char *_line;
+            asprintf(&_line, "%d", line);
+            if (_line) {
+                asl_set(msg, "Line", _line);
+                free(_line);
+            }
+        }
+        if (pthread_is_threaded_np()) {
+            char *_thread;
+            asprintf(&_thread, "%"PRIu64, thread);
+            if (_thread) {
+                asl_set(msg, "Thread", _thread);
+                free(_thread);
+            }
+        }
+    }
+
+    asl_vlog(aslc, msg, level, fmt, args);
+    if (msg)
+        asl_free(msg);
+}
diff --git a/src/glx/apple/apple_glx_log.h b/src/glx/apple/apple_glx_log.h

new file mode 100644 (file)

index 0000000..4b1c531
--- /dev/null
+++ b/src/glx/apple/apple_glx_log.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2012 Apple Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT
+ * HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name(s) of the above
+ * copyright holders shall not be used in advertising or otherwise to
+ * promote the sale, use or other dealings in this Software without
+ * prior written authorization.
+ */
+
+#ifndef APPLE_GLX_LOG_H
+#define APPLE_GLX_LOG_H
+
+#include <sys/cdefs.h>
+#include <asl.h>
+
+void apple_glx_log_init(void);
+
+__printflike(5, 6)
+void _apple_glx_log(int level, const char *file, const char *function,
+                    int line, const char *fmt, ...);
+#define apple_glx_log(l, f, args ...) \
+    _apple_glx_log(l, __FILE__, __FUNCTION__, __LINE__, f, ## args)
+
+
+__printflike(5, 0)
+void _apple_glx_vlog(int level, const char *file, const char *function,
+                     int line, const char *fmt, va_list v);
+#define apple_glx_vlog(l, f, v) \
+    _apple_glx_vlog(l, __FILE__, __FUNCTION__, __LINE__, f, v)
+
+/* This is just here to help the transition.
+ * TODO: Replace calls to apple_glx_diagnostic
+ */
+#define apple_glx_diagnostic(f, args ...) \
+    apple_glx_log(ASL_LEVEL_DEBUG, f, ## args)
+
+#endif
diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c

index ea6e0beaeab6198c531a9f810955a5b5e0ba09ce..6f8bd69481fc51272362114c3706b952afae4808 100644 (file)
--- a/src/mesa/drivers/dri/i915/i830_state.c
+++ b/src/mesa/drivers/dri/i915/i830_state.c
@@ -30,6 +30,7 @@
  #include "main/context.h"
  #include "main/macros.h"
  #include "main/enums.h"
+#include "main/fbobject.h"
  #include "main/dd.h"
  #include "main/state.h"
  
@@ -545,7 +546,7 @@ i830Scissor(struct gl_context * ctx, GLint x, GLint y, GLsizei w, GLsizei h)
  
     DBG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
  
-   if (ctx->DrawBuffer->Name == 0) {
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
        x1 = x;
        y1 = ctx->DrawBuffer->Height - (y + h);
        x2 = x + w - 1;
diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c

index 94c7327830b6822af26594c03c6cd950bff831a0..3ab75a9739c7f024cf20bd1094b28d4d7d0113a2 100644 (file)
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -30,6 +30,7 @@
  #include "main/context.h"
  #include "main/macros.h"
  #include "main/enums.h"
+#include "main/fbobject.h"
  #include "main/dd.h"
  #include "main/state.h"
  #include "tnl/tnl.h"
@@ -400,7 +401,7 @@ intelCalcViewport(struct gl_context * ctx)
  {
     struct intel_context *intel = intel_context(ctx);
  
-   if (ctx->DrawBuffer->Name == 0) {
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
        _math_matrix_viewport(&intel->ViewportMatrix,
                             ctx->Viewport.X,
                             ctx->DrawBuffer->Height - ctx->Viewport.Y,
@@ -518,7 +519,7 @@ i915Scissor(struct gl_context * ctx, GLint x, GLint y, GLsizei w, GLsizei h)
  
     DBG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
  
-   if (ctx->DrawBuffer->Name == 0) {
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
        x1 = x;
        y1 = ctx->DrawBuffer->Height - (y + h);
        x2 = x + w - 1;
@@ -577,7 +578,7 @@ i915CullFaceFrontFace(struct gl_context * ctx, GLenum unused)
     else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
        mode = S4_CULLMODE_CW;
  
-      if (ctx->DrawBuffer && ctx->DrawBuffer->Name != 0)
+      if (ctx->DrawBuffer && _mesa_is_user_fbo(ctx->DrawBuffer))
           mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
        if (ctx->Polygon.CullFaceMode == GL_FRONT)
           mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources

index c564d95d44be421c693e3744d95cadc66468dacf..c99a034a462315df69595b9b94e75a53833b2052 100644 (file)
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -88,7 +88,6 @@ i965_C_FILES = \
         gen6_clip_state.c \
         gen6_depthstencil.c \
         gen6_gs_state.c \
-       gen6_hiz.c \
         gen6_sampler_state.c \
         gen6_scissor_state.c \
         gen6_sf_state.c \
@@ -100,7 +99,6 @@ i965_C_FILES = \
         gen7_cc_state.c \
         gen7_clip_state.c \
         gen7_disable.c \
-       gen7_hiz.c \
         gen7_misc_state.c \
         gen7_sampler_state.c \
         gen7_sf_state.c \
@@ -127,6 +125,8 @@ i965_CXX_FILES = \
         brw_vec4_emit.cpp \
         brw_vec4_copy_propagation.cpp \
         brw_vec4_reg_allocate.cpp \
-       brw_vec4_visitor.cpp
+       brw_vec4_visitor.cpp \
+       gen6_blorp.cpp \
+       gen7_blorp.cpp
  
  i965_ASM_FILES =
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h

index 47f56e21f5d6553c8f446642e6320d9dc2bbb922..141fb658d718421ca5a84dabadfc38d4ad7af6d5 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -37,6 +37,9 @@
  #include "brw_structs.h"
  #include "main/imports.h"
  
+#ifdef __cplusplus
+extern "C" {
+#endif
  
  /* Glossary:
   *
@@ -1188,4 +1191,8 @@ brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
  
  bool brw_do_cubemap_normalize(struct exec_list *instructions);
  
+#ifdef __cplusplus
+}
+#endif
+
  #endif
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c

index da37b181f6e8131e821223cb5253d3aaac92bbb9..813f7c8e570623340d6bf966ce51a1d081129240 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -531,7 +531,6 @@ retry:
  }
  
  void brw_draw_prims( struct gl_context *ctx,
-                    const struct gl_client_array *arrays[],
                      const struct _mesa_prim *prim,
                      GLuint nr_prims,
                      const struct _mesa_index_buffer *ib,
@@ -540,6 +539,7 @@ void brw_draw_prims( struct gl_context *ctx,
                      GLuint max_index,
                      struct gl_transform_feedback_object *tfb_vertcount )
  {
+   const struct gl_client_array **arrays = ctx->Array._DrawArrays;
     bool retval;
  
     if (!_mesa_check_conditional_render(ctx))
diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h

index b91041932e193f6c18056e8ca0bdaa8cae85c4d6..2cc4cb383796b6e7865440f5cf6681f704abf2ca 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -35,7 +35,6 @@ struct brw_context;
  
  
  void brw_draw_prims( struct gl_context *ctx,
-                    const struct gl_client_array *arrays[],
                      const struct _mesa_prim *prims,
                      GLuint nr_prims,
                      const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h

index c4c62b2bf4a6087ee54ceebc208fac3ed14f213d..675b50a145680a3b3affc4ceffcaadac0cf6d060 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -38,6 +38,10 @@
  #include "brw_defines.h"
  #include "program/prog_instruction.h"
  
+#ifdef __cplusplus
+extern "C" {
+#endif
+
  #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
  #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
  
@@ -1107,4 +1111,8 @@ void brw_optimize(struct brw_compile *p);
  void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
  void brw_remove_grf_to_mrf_moves(struct brw_compile *p);
  
+#ifdef __cplusplus
+}
+#endif
+
  #endif
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index 8af43000e3d1a42b9025eb2778c36640eff18c0f..fd67318f55019166c292c231722c370edfa63561 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -35,6 +35,7 @@ extern "C" {
  #include "main/macros.h"
  #include "main/shaderobj.h"
  #include "main/uniforms.h"
+#include "main/fbobject.h"
  #include "program/prog_parameter.h"
  #include "program/prog_print.h"
  #include "program/register_allocate.h"
@@ -1828,7 +1829,7 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
  
     if (fp->Base.InputsRead & FRAG_BIT_WPOS) {
        key.drawable_height = ctx->DrawBuffer->Height;
-      key.render_to_fbo = ctx->DrawBuffer->Name != 0;
+      key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
     }
  
     key.nr_color_regions = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h

index 6d9a042ff1b05ec349dcf36c923c80923c8cbdea..6b45c4ece961374c17333f3f08fc6f968c635386 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -582,6 +582,7 @@ public:
     void emit_assignment_writes(fs_reg &l, fs_reg &r,
                                const glsl_type *type, bool predicated);
     void resolve_ud_negate(fs_reg *reg);
+   void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg);
  
     struct brw_reg interp_reg(int location, int channel);
     int setup_uniform_values(int loc, const glsl_type *type);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index d4ebc79dbce39c9d9e60f1d4ccd18dc4782a5dbf..20d4c53a858ffd09f72928d71339b1727fcf2efc 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -395,6 +395,9 @@ fs_visitor::visit(ir_expression *ir)
        resolve_ud_negate(&op[0]);
        resolve_ud_negate(&op[1]);
  
+      resolve_bool_comparison(ir->operands[0], &op[0]);
+      resolve_bool_comparison(ir->operands[1], &op[1]);
+
        inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
        inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
        break;
@@ -1542,6 +1545,9 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
        case ir_binop_all_equal:
        case ir_binop_nequal:
        case ir_binop_any_nequal:
+        resolve_bool_comparison(expr->operands[0], &op[0]);
+        resolve_bool_comparison(expr->operands[1], &op[1]);
+
          inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]);
          inst->conditional_mod =
             brw_conditional_for_comparison(expr->operation);
@@ -2129,3 +2135,14 @@ fs_visitor::resolve_ud_negate(fs_reg *reg)
     emit(BRW_OPCODE_MOV, temp, *reg);
     *reg = temp;
  }
+
+void
+fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
+{
+   if (rvalue->type != glsl_type::bool_type)
+      return;
+
+   fs_reg temp = fs_reg(this, glsl_type::bool_type);
+   emit(BRW_OPCODE_AND, temp, *reg, fs_reg(1));
+   *reg = temp;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c

index 62bcc93eed29ce1eacd02444586a3407818be006..0c0389f8bdf44184bc76bcf02cdc99e42593292f 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -40,6 +40,8 @@
  #include "brw_state.h"
  #include "brw_defines.h"
  
+#include "main/fbobject.h"
+
  /* Constant single cliprect for framebuffer object or DRI2 drawing */
  static void upload_drawing_rect(struct brw_context *brw)
  {
@@ -264,10 +266,45 @@ static void emit_depthbuffer(struct brw_context *brw)
     unsigned int len;
     bool separate_stencil = false;
  
+   /* Amount by which drawing should be offset in order to draw to the
+    * appropriate miplevel/zoffset/cubeface.  We will extract these values
+    * from depth_irb or stencil_irb once we determine which is present.
+    */
+   uint32_t draw_x = 0, draw_y = 0;
+
+   /* Masks used to determine how much of the draw_x and draw_y offsets should
+    * be performed using the fine adjustment of "depth coordinate offset X/Y"
+    * (dw5 of 3DSTATE_DEPTH_BUFFER).  Any remaining coarse adjustment will be
+    * performed by changing the base addresses of the buffers.
+    *
+    * Since the HiZ, depth, and stencil buffers all use the same "depth
+    * coordinate offset X/Y" values, we need to make sure that the coarse
+    * adjustment will be possible to apply to all three buffers.  Since coarse
+    * adjustment can only be applied in multiples of the tile size, we will OR
+    * together the tile masks of all the buffers to determine which offsets to
+    * perform as fine adjustments.
+    */
+   uint32_t tile_mask_x = 0, tile_mask_y = 0;
+
+   if (depth_irb) {
+      intel_region_get_tile_masks(depth_irb->mt->region,
+                                  &tile_mask_x, &tile_mask_y);
+   }
+
     if (depth_irb &&
         depth_irb->mt &&
         depth_irb->mt->hiz_mt) {
        hiz_region = depth_irb->mt->hiz_mt->region;
+
+      uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
+      intel_region_get_tile_masks(hiz_region,
+                                  &hiz_tile_mask_x, &hiz_tile_mask_y);
+
+      /* Each HiZ row represents 2 rows of pixels */
+      hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
+
+      tile_mask_x |= hiz_tile_mask_x;
+      tile_mask_y |= hiz_tile_mask_y;
     }
  
     /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
@@ -284,8 +321,21 @@ static void emit_depthbuffer(struct brw_context *brw)
        if (stencil_mt->stencil_mt)
          stencil_mt = stencil_mt->stencil_mt;
  
-      if (stencil_mt->format == MESA_FORMAT_S8)
+      if (stencil_mt->format == MESA_FORMAT_S8) {
          separate_stencil = true;
+
+         /* Separate stencil buffer uses 64x64 tiles. */
+         tile_mask_x |= 63;
+         tile_mask_y |= 63;
+      } else {
+         uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
+         intel_region_get_tile_masks(stencil_mt->region,
+                                     &stencil_tile_mask_x,
+                                     &stencil_tile_mask_y);
+
+         tile_mask_x |= stencil_tile_mask_x;
+         tile_mask_y |= stencil_tile_mask_y;
+      }
     }
  
     /* If there's a packed depth/stencil bound to stencil only, we need to
@@ -319,6 +369,8 @@ static void emit_depthbuffer(struct brw_context *brw)
        ADVANCE_BATCH();
  
     } else if (!depth_irb && separate_stencil) {
+      uint32_t tile_x, tile_y;
+
        /*
         * There exists a separate stencil buffer but no depth buffer.
         *
@@ -341,6 +393,29 @@ static void emit_depthbuffer(struct brw_context *brw)
         */
        assert(intel->has_separate_stencil);
  
+      draw_x = stencil_irb->draw_x;
+      draw_y = stencil_irb->draw_y;
+      tile_x = draw_x & tile_mask_x;
+      tile_y = draw_y & tile_mask_y;
+
+      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+       * Coordinate Offset X/Y":
+       *
+       *   "The 3 LSBs of both offsets must be zero to ensure correct
+       *   alignment"
+       *
+       * We have no guarantee that tile_x and tile_y are correctly aligned,
+       * since they are determined by the mipmap layout, which is only aligned
+       * to multiples of 4.
+       *
+       * So, to avoid hanging the GPU, just smash the low order 3 bits of
+       * tile_x and tile_y to 0.  This is a temporary workaround until we come
+       * up with a better solution.
+       */
+      tile_x &= ~7;
+      tile_y &= ~7;
+
        BEGIN_BATCH(len);
        OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
        OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
@@ -350,11 +425,15 @@ static void emit_depthbuffer(struct brw_context *brw)
                 (1 << 27) | /* tiled surface */
                 (BRW_SURFACE_2D << 29));
        OUT_BATCH(0);
-      OUT_BATCH(((stencil_irb->Base.Base.Width - 1) << 6) |
-                (stencil_irb->Base.Base.Height - 1) << 19);
-      OUT_BATCH(0);
+      OUT_BATCH(((stencil_irb->Base.Base.Width + tile_x - 1) << 6) |
+                (stencil_irb->Base.Base.Height + tile_y - 1) << 19);
        OUT_BATCH(0);
  
+      if (intel->is_g4x || intel->gen >= 5)
+         OUT_BATCH(tile_x | (tile_y << 16));
+      else
+        assert(tile_x == 0 && tile_y == 0);
+
        if (intel->gen >= 6)
          OUT_BATCH(0);
  
@@ -367,11 +446,36 @@ static void emit_depthbuffer(struct brw_context *brw)
        /* If using separate stencil, hiz must be enabled. */
        assert(!separate_stencil || hiz_region);
  
-      offset = intel_renderbuffer_tile_offsets(depth_irb, &tile_x, &tile_y);
-
        assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
        assert(!hiz_region || region->tiling == I915_TILING_Y);
  
+      draw_x = depth_irb->draw_x;
+      draw_y = depth_irb->draw_y;
+      tile_x = draw_x & tile_mask_x;
+      tile_y = draw_y & tile_mask_y;
+
+      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+       * Coordinate Offset X/Y":
+       *
+       *   "The 3 LSBs of both offsets must be zero to ensure correct
+       *   alignment"
+       *
+       * We have no guarantee that tile_x and tile_y are correctly aligned,
+       * since they are determined by the mipmap layout, which is only aligned
+       * to multiples of 4.
+       *
+       * So, to avoid hanging the GPU, just smash the low order 3 bits of
+       * tile_x and tile_y to 0.  This is a temporary workaround until we come
+       * up with a better solution.
+       */
+      tile_x &= ~7;
+      tile_y &= ~7;
+
+      offset = intel_region_get_aligned_offset(region,
+                                               draw_x & ~tile_mask_x,
+                                               draw_y & ~tile_mask_y);
+
        BEGIN_BATCH(len);
        OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
        OUT_BATCH(((region->pitch * region->cpp) - 1) |
@@ -411,12 +515,17 @@ static void emit_depthbuffer(struct brw_context *brw)
  
        /* Emit hiz buffer. */
        if (hiz_region) {
+         uint32_t hiz_offset =
+            intel_region_get_aligned_offset(hiz_region,
+                                            draw_x & ~tile_mask_x,
+                                            (draw_y & ~tile_mask_y) / 2);
+
          BEGIN_BATCH(3);
          OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
          OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
          OUT_RELOC(hiz_region->bo,
                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                  0);
+                  hiz_offset);
          ADVANCE_BATCH();
        } else {
          BEGIN_BATCH(3);
@@ -429,6 +538,15 @@ static void emit_depthbuffer(struct brw_context *brw)
        /* Emit stencil buffer. */
        if (separate_stencil) {
          struct intel_region *region = stencil_mt->region;
+
+         /* Note: we can't compute the stencil offset using
+          * intel_region_get_aligned_offset(), because stencil_region claims
+          * that the region is untiled; in fact it's W tiled.
+          */
+         uint32_t stencil_offset =
+            (draw_y & ~tile_mask_y) * region->pitch +
+            (draw_x & ~tile_mask_x) * 64;
+
          BEGIN_BATCH(3);
          OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
           /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
@@ -439,7 +557,7 @@ static void emit_depthbuffer(struct brw_context *brw)
          OUT_BATCH(2 * region->pitch * region->cpp - 1);
          OUT_RELOC(region->bo,
                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                  0);
+                  stencil_offset);
          ADVANCE_BATCH();
        } else {
          BEGIN_BATCH(3);
@@ -507,7 +625,7 @@ static void upload_polygon_stipple(struct brw_context *brw)
      * to a FBO (i.e. any named frame buffer object), we *don't*
      * need to invert - we already match the layout.
      */
-   if (ctx->DrawBuffer->Name == 0) {
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
        for (i = 0; i < 32; i++)
           OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
     }
@@ -550,15 +668,13 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
  
     /* _NEW_BUFFERS
      *
-    * If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
-    * we have to invert the Y axis in order to match the OpenGL
-    * pixel coordinate system, and our offset must be matched
-    * to the window position.  If we're drawing to a FBO
-    * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
-    * system works just fine, and there's no window system to
-    * worry about.
+    * If we're drawing to a system window we have to invert the Y axis
+    * in order to match the OpenGL pixel coordinate system, and our
+    * offset must be matched to the window position.  If we're drawing
+    * to a user-created FBO then our native pixel coordinate system
+    * works just fine, and there's no window system to worry about.
      */
-   if (brw->intel.ctx.DrawBuffer->Name == 0)
+   if (_mesa_is_winsys_fbo(brw->intel.ctx.DrawBuffer))
        OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
     else
        OUT_BATCH(0);
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c

index 37d1ee502d8417d5472d24bc0823b1d259ce15c5..23a874aa8ecdc1fe016377ad76b1de45e19abaf1 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -32,7 +32,9 @@
  
  #include "main/glheader.h"
  #include "main/macros.h"
+#include "main/mtypes.h"
  #include "main/enums.h"
+#include "main/fbobject.h"
  
  #include "intel_batchbuffer.h"
  
@@ -136,7 +138,7 @@ brw_upload_sf_prog(struct brw_context *brw)
     struct gl_context *ctx = &brw->intel.ctx;
     struct brw_sf_prog_key key;
     /* _NEW_BUFFERS */
-   bool render_to_fbo = ctx->DrawBuffer->Name != 0;
+   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
  
     memset(&key, 0, sizeof(key));
  
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c

index d7e7aa11f63a11bdc6625ef90255c7092d4f5e25..7c29ba27d1a7834f3c2789f1f0777cac846147ab 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -31,10 +31,12 @@
     
  
  
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/fbobject.h"
  #include "brw_context.h"
  #include "brw_state.h"
  #include "brw_defines.h"
-#include "main/macros.h"
  #include "brw_sf.h"
  
  static void upload_sf_vp(struct brw_context *brw)
@@ -44,7 +46,7 @@ static void upload_sf_vp(struct brw_context *brw)
     const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
     struct brw_sf_viewport *sfv;
     GLfloat y_scale, y_bias;
-   const bool render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
     const GLfloat *v = ctx->Viewport._WindowMap.m;
  
     sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE,
@@ -142,7 +144,7 @@ static void upload_sf_unit( struct brw_context *brw )
     struct brw_sf_unit_state *sf;
     drm_intel_bo *bo = intel->batch.bo;
     int chipset_max_threads;
-   bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
  
     sf = brw_state_batch(brw, AUB_TRACE_SF_STATE,
                         sizeof(*sf), 64, &brw->sf.state_offset);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h

index 8a0e92fab0818a33e90a084a127aca104d67aade..659cb0a2898ee3e3bf20abd24450fa0a0ee94475 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -35,6 +35,10 @@
  
  #include "brw_context.h"
  
+#ifdef __cplusplus
+extern "C" {
+#endif
+
  extern const struct brw_tracked_state brw_blend_constant_color;
  extern const struct brw_tracked_state brw_cc_vp;
  extern const struct brw_tracked_state brw_cc_unit;
@@ -208,4 +212,8 @@ uint32_t
  get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset,
                    int fs_attr, bool two_side_color);
  
+#ifdef __cplusplus
+}
+#endif
+
  #endif
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c

index 7a1b91f3721fd103e7416bd6f7b1292bd5178a4d..8bf1d3ddbcdd382014c42a7e4df6655341bbae3e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -115,6 +115,8 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt)
                intel_miptree_set_image_offset(mt, level, q, x, y);
                x += pack_x_pitch;
             }
+            if (x > mt->total_width)
+               mt->total_width = x;
  
             x = 0;
             y += pack_y_pitch;
@@ -135,10 +137,9 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt)
                pack_x_nr <<= 1;
             }
          } else {
+            pack_x_nr <<= 1;
             if (pack_x_pitch > 4) {
                pack_x_pitch >>= 1;
-              pack_x_nr <<= 1;
-              assert(pack_x_pitch * pack_x_nr <= mt->total_width);
             }
  
             if (pack_y_pitch > 2) {
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c

index 5e7345648db77af86b79b02a51326bf5a809780e..733193425d3152595addce4784a858cd4a7af848 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -49,8 +49,8 @@
  #include "brw_vs.h"
  #include "brw_wm.h"
  
-#include "gen6_hiz.h"
-#include "gen7_hiz.h"
+#include "gen6_blorp.h"
+#include "gen7_blorp.h"
  
  #include "glsl/ralloc.h"
  
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c

index b358306a92820bc9c00ba05823010cdad4cb21c2..63c74ad8f7baa33943a7f577562871aabe32daa1 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -33,6 +33,7 @@
  #include "brw_wm.h"
  #include "brw_state.h"
  #include "main/formats.h"
+#include "main/fbobject.h"
  #include "main/samplerobj.h"
  #include "program/prog_parameter.h"
  
@@ -516,7 +517,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
      */
     if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
        key->drawable_height = ctx->DrawBuffer->Height;
-      key->render_to_fbo = ctx->DrawBuffer->Name != 0;
+      key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
     }
  
     /* _NEW_BUFFERS */
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c

index 69af0eecee1db0679221d654e45b18afd8c2c66d..0bb9414ed588f13c033e71446a2c6a944cb44a81 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -291,6 +291,8 @@ brw_format_for_mesa_format(gl_format mesa_format)
        [MESA_FORMAT_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_UNORM,
        [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM,
        [MESA_FORMAT_ARGB8888_REV] = 0,
+      [MESA_FORMAT_RGBX8888] = 0,
+      [MESA_FORMAT_RGBX8888_REV] = BRW_SURFACEFORMAT_R8G8B8X8_UNORM,
        [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM,
        [MESA_FORMAT_XRGB8888_REV] = 0,
        [MESA_FORMAT_RGB888] = 0,
@@ -959,8 +961,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
  
     switch (rb_format) {
     case MESA_FORMAT_SARGB8:
-      /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB
-        surfaces to the blend/update as sRGB */
+      /* _NEW_BUFFERS
+       *
+       * Without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB surfaces to the
+       * blend/update as sRGB.
+       */
        if (ctx->Color.sRGBEnabled)
          format = brw_format_for_mesa_format(rb_format);
        else
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp

new file mode 100644 (file)

index 0000000..604d380
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -0,0 +1,662 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+
+#include "gen6_blorp.h"
+
+/**
+ * \name Constants for HiZ VBO
+ * \{
+ *
+ * \see brw_context::hiz::vertex_bo
+ */
+#define GEN6_HIZ_NUM_VERTICES 3
+#define GEN6_HIZ_NUM_VUE_ELEMS 8
+#define GEN6_HIZ_VBO_SIZE (GEN6_HIZ_NUM_VERTICES \
+                           * GEN6_HIZ_NUM_VUE_ELEMS \
+                           * sizeof(float))
+/** \} */
+
+void
+gen6_hiz_emit_batch_head(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+
+   /* To ensure that the batch contains only the resolve, flush the batch
+    * before beginning and after finishing emitting the resolve packets.
+    *
+    * Ideally, we would not need to flush for the resolve op. But, I suspect
+    * that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in
+    * a single batch, and there is no safe way to ensure that other than by
+    * fencing the resolve with flushes. Ideally, we would just detect if
+    * a batch is in progress and do the right thing, but that would require
+    * the ability to *safely* access brw_context::state::dirty::brw
+    * outside of the brw_upload_state() codepath.
+    */
+   intel_flush(ctx);
+
+   /* CMD_PIPELINE_SELECT
+    *
+    * Select the 3D pipeline, as opposed to the media pipeline.
+    */
+   {
+      BEGIN_BATCH(1);
+      OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_MULTISAMPLE */
+   {
+      int length = intel->gen == 7 ? 4 : 3;
+
+      BEGIN_BATCH(length);
+      OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2));
+      OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
+                MS_NUMSAMPLES_1);
+      OUT_BATCH(0);
+      if (length >= 4)
+         OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+   }
+
+   /* 3DSTATE_SAMPLE_MASK */
+   {
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
+      OUT_BATCH(1);
+      ADVANCE_BATCH();
+   }
+
+   /* CMD_STATE_BASE_ADDRESS
+    *
+    * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS:
+    *     The following commands must be reissued following any change to the
+    *     base addresses:
+    *         3DSTATE_CC_POINTERS
+    *         3DSTATE_BINDING_TABLE_POINTERS
+    *         3DSTATE_SAMPLER_STATE_POINTERS
+    *         3DSTATE_VIEWPORT_STATE_POINTERS
+    *         MEDIA_STATE_POINTERS
+    */
+   {
+      BEGIN_BATCH(10);
+      OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+      OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */
+      /* SurfaceStateBaseAddress */
+      OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
+      /* DynamicStateBaseAddress */
+      OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
+                                  I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
+      OUT_BATCH(1); /* IndirectObjectBaseAddress */
+      OUT_BATCH(1); /* InstructionBaseAddress */
+      OUT_BATCH(1); /* GeneralStateUpperBound */
+      OUT_BATCH(1); /* DynamicStateUpperBound */
+      OUT_BATCH(1); /* IndirectObjectUpperBound*/
+      OUT_BATCH(1); /* InstructionAccessUpperBound */
+      ADVANCE_BATCH();
+   }
+}
+
+void
+gen6_hiz_emit_vertices(struct brw_context *brw,
+                       struct intel_mipmap_tree *mt,
+                       unsigned int level,
+                       unsigned int layer)
+{
+   struct intel_context *intel = &brw->intel;
+   uint32_t vertex_offset;
+
+   /* Setup VBO for the rectangle primitive..
+    *
+    * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
+    * vertices. The vertices reside in screen space with DirectX coordinates
+    * (that is, (0, 0) is the upper left corner).
+    *
+    *   v2 ------ implied
+    *    |        |
+    *    |        |
+    *   v0 ----- v1
+    *
+    * Since the VS is disabled, the clipper loads each VUE directly from
+    * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
+    * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
+    *   dw0: Reserved, MBZ.
+    *   dw1: Render Target Array Index. The HiZ op does not use indexed
+    *        vertices, so set the dword to 0.
+    *   dw2: Viewport Index. The HiZ op disables viewport mapping and
+    *        scissoring, so set the dword to 0.
+    *   dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
+    *        set the dword to 0.
+    *   dw4: Vertex Position X.
+    *   dw5: Vertex Position Y.
+    *   dw6: Vertex Position Z.
+    *   dw7: Vertex Position W.
+    *
+    * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
+    * "Vertex URB Entry (VUE) Formats".
+    */
+   {
+      const int width = mt->level[level].width;
+      const int height = mt->level[level].height;
+      float *vertex_data;
+
+      const float vertices[GEN6_HIZ_VBO_SIZE] = {
+         /* v0 */ 0, 0, 0, 0,         0, height, 0, 1,
+         /* v1 */ 0, 0, 0, 0,     width, height, 0, 1,
+         /* v2 */ 0, 0, 0, 0,         0,      0, 0, 1,
+      };
+
+      vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_NO_TYPE,
+                                              GEN6_HIZ_VBO_SIZE, 32,
+                                              &vertex_offset);
+      memcpy(vertex_data, vertices, GEN6_HIZ_VBO_SIZE);
+   }
+
+   /* 3DSTATE_VERTEX_BUFFERS */
+   {
+      const int num_buffers = 1;
+      const int batch_length = 1 + 4 * num_buffers;
+
+      uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
+                     (GEN6_HIZ_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT;
+
+      if (intel->gen >= 7)
+         dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+
+      BEGIN_BATCH(batch_length);
+      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
+      OUT_BATCH(dw0);
+      /* start address */
+      OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
+               vertex_offset);
+      /* end address */
+      OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
+               vertex_offset + GEN6_HIZ_VBO_SIZE - 1);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_VERTEX_ELEMENTS
+    *
+    * Fetch dwords 0 - 7 from each VUE. See the comments above where
+    * hiz->vertex_bo is filled with data.
+    */
+   {
+      const int num_elements = 2;
+      const int batch_length = 1 + 2 * num_elements;
+
+      BEGIN_BATCH(batch_length);
+      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2));
+      /* Element 0 */
+      OUT_BATCH(GEN6_VE0_VALID |
+                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+                0 << BRW_VE0_SRC_OFFSET_SHIFT);
+      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+      /* Element 1 */
+      OUT_BATCH(GEN6_VE0_VALID |
+                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+                16 << BRW_VE0_SRC_OFFSET_SHIFT);
+      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+      ADVANCE_BATCH();
+   }
+}
+
+/**
+ * \brief Execute a HiZ op on a miptree slice.
+ *
+ * To execute the HiZ op, this function manually constructs and emits a batch
+ * to "draw" the HiZ op's rectangle primitive. The batchbuffer is flushed
+ * before constructing and after emitting the batch.
+ *
+ * This function alters no GL state.
+ *
+ * For an overview of HiZ ops, see the following sections of the Sandy Bridge
+ * PRM, Volume 1, Part 2:
+ *   - 7.5.3.1 Depth Buffer Clear
+ *   - 7.5.3.2 Depth Buffer Resolve
+ *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ */
+static void
+gen6_hiz_exec(struct intel_context *intel,
+              struct intel_mipmap_tree *mt,
+              unsigned int level,
+              unsigned int layer,
+              enum gen6_hiz_op op)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct brw_context *brw = brw_context(ctx);
+   uint32_t draw_x, draw_y;
+   uint32_t tile_mask_x, tile_mask_y;
+
+   assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
+   assert(mt->hiz_mt != NULL);
+   intel_miptree_check_level_layer(mt, level, layer);
+
+   {
+      /* Construct a dummy renderbuffer just to extract tile offsets. */
+      struct intel_renderbuffer rb;
+      rb.mt = mt;
+      rb.mt_level = level;
+      rb.mt_layer = layer;
+      intel_renderbuffer_set_draw_offset(&rb);
+      draw_x = rb.draw_x;
+      draw_y = rb.draw_y;
+   }
+
+   /* Compute masks to determine how much of draw_x and draw_y should be
+    * performed using the fine adjustment of "depth coordinate offset X/Y"
+    * (dw5 of 3DSTATE_DEPTH_BUFFER).  See the emit_depthbuffer() function for
+    * details.
+    */
+   {
+      uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y;
+      intel_region_get_tile_masks(mt->region, &depth_mask_x, &depth_mask_y);
+      intel_region_get_tile_masks(mt->hiz_mt->region,
+                                  &hiz_mask_x, &hiz_mask_y);
+
+      /* Each HiZ row represents 2 rows of pixels */
+      hiz_mask_y = hiz_mask_y << 1 | 1;
+
+      tile_mask_x = depth_mask_x | hiz_mask_x;
+      tile_mask_y = depth_mask_y | hiz_mask_y;
+   }
+
+   gen6_hiz_emit_batch_head(brw);
+   gen6_hiz_emit_vertices(brw, mt, level, layer);
+
+   /* 3DSTATE_URB
+    *
+    * Assign the entire URB to the VS. Even though the VS disabled, URB space
+    * is still needed because the clipper loads the VUE's from the URB. From
+    * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
+    * Dword 1.15:0 "VS Number of URB Entries":
+    *     This field is always used (even if VS Function Enable is DISABLED).
+    *
+    * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
+    * safely ignore it because this batch contains only one draw call.
+    *     Because of URB corruption caused by allocating a previous GS unit
+    *     URB entry to the VS unit, software is required to send a “GS NULL
+    *     Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
+    *     plus a dummy DRAW call before any case where VS will be taking over
+    *     GS URB space.
+    */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
+      OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_CC_STATE_POINTERS
+    *
+    * The pointer offsets are relative to
+    * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+    *
+    * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
+    */
+   {
+      uint32_t depthstencil_offset;
+      gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset);
+
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
+      OUT_BATCH(1); /* BLEND_STATE offset */
+      OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */
+      OUT_BATCH(1); /* COLOR_CALC_STATE offset */
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_VS
+    *
+    * Disable vertex shader.
+    */
+   {
+      /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section
+       * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
+       *   [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
+       *   command that causes the VS Function Enable to toggle. Pipeline
+       *   flush can be executed by sending a PIPE_CONTROL command with CS
+       *   stall bit set and a post sync operation.
+       */
+      intel_emit_post_sync_nonzero_flush(intel);
+
+      BEGIN_BATCH(6);
+      OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_GS
+    *
+    * Disable the geometry shader.
+    */
+   {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_CLIP
+    *
+    * Disable the clipper.
+    *
+    * The HiZ op emits a rectangle primitive, which requires clipping to
+    * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
+    * Section 1.3 "3D Primitives Overview":
+    *    RECTLIST:
+    *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
+    *    Mode should be set to a value other than CLIPMODE_NORMAL.
+    *
+    * Also disable perspective divide. This doesn't change the clipper's
+    * output, but does spare a few electrons.
+    */
+   {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_SF
+    *
+    * Disable ViewportTransformEnable (dw2.1)
+    *
+    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
+    * Primitives Overview":
+    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
+    *     use of screen- space coordinates).
+    *
+    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
+    * and BackFaceFillMode (dw2.5:6) to SOLID(0).
+    *
+    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
+    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
+    *     SOLID: Any triangle or rectangle object found to be front-facing
+    *     is rendered as a solid object. This setting is required when
+    *     (rendering rectangle (RECTLIST) objects.
+    */
+   {
+      BEGIN_BATCH(20);
+      OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
+      OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */
+                1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+                0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
+      for (int i = 0; i < 18; ++i)
+         OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_WM
+    *
+    * Disable thread dispatch (dw5.19) and enable the HiZ op.
+    *
+    * Even though thread dispatch is disabled, max threads (dw5.25:31) must be
+    * nonzero to prevent the GPU from hanging. See the valid ranges in the
+    * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31
+    * "Maximum Number Of Threads".
+    */
+   {
+      uint32_t dw4 = 0;
+
+      switch (op) {
+      case GEN6_HIZ_OP_DEPTH_CLEAR:
+         assert(!"not implemented");
+         dw4 |= GEN6_WM_DEPTH_CLEAR;
+         break;
+      case GEN6_HIZ_OP_DEPTH_RESOLVE:
+         dw4 |= GEN6_WM_DEPTH_RESOLVE;
+         break;
+      case GEN6_HIZ_OP_HIZ_RESOLVE:
+         dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
+         break;
+      default:
+         assert(0);
+         break;
+      }
+
+      BEGIN_BATCH(9);
+      OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(dw4);
+      OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
+      OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DEPTH_BUFFER */
+   {
+      uint32_t width = mt->level[level].width;
+      uint32_t height = mt->level[level].height;
+
+      uint32_t tile_x = draw_x & tile_mask_x;
+      uint32_t tile_y = draw_y & tile_mask_y;
+      uint32_t offset = intel_region_get_aligned_offset(mt->region,
+                                                        draw_x & ~tile_mask_x,
+                                                        draw_y & ~tile_mask_y);
+
+      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+       * Coordinate Offset X/Y":
+       *
+       *   "The 3 LSBs of both offsets must be zero to ensure correct
+       *   alignment"
+       *
+       * We have no guarantee that tile_x and tile_y are correctly aligned,
+       * since they are determined by the mipmap layout, which is only aligned
+       * to multiples of 4.
+       *
+       * So, to avoid hanging the GPU, just smash the low order 3 bits of
+       * tile_x and tile_y to 0.  This is a temporary workaround until we come
+       * up with a better solution.
+       */
+      tile_x &= ~7;
+      tile_y &= ~7;
+
+      uint32_t format;
+      switch (mt->format) {
+      case MESA_FORMAT_Z16:       format = BRW_DEPTHFORMAT_D16_UNORM; break;
+      case MESA_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; break;
+      case MESA_FORMAT_X8_Z24:    format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
+      default:                    assert(0); break;
+      }
+
+      intel_emit_post_sync_nonzero_flush(intel);
+      intel_emit_depth_stall_flushes(intel);
+
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+      OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) |
+                format << 18 |
+                1 << 21 | /* separate stencil enable */
+                1 << 22 | /* hiz enable */
+                BRW_TILEWALK_YMAJOR << 26 |
+                1 << 27 | /* y-tiled */
+                BRW_SURFACE_2D << 29);
+      OUT_RELOC(mt->region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                offset);
+      OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
+                (width + tile_x - 1) << 6 |
+                (height + tile_y - 1) << 19);
+      OUT_BATCH(0);
+      OUT_BATCH(tile_x |
+                tile_y << 16);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_HIER_DEPTH_BUFFER */
+   {
+      struct intel_region *hiz_region = mt->hiz_mt->region;
+      uint32_t hiz_offset =
+         intel_region_get_aligned_offset(hiz_region,
+                                         draw_x & ~tile_mask_x,
+                                         (draw_y & ~tile_mask_y) / 2);
+
+      BEGIN_BATCH(3);
+      OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
+      OUT_RELOC(hiz_region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                hiz_offset);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_STENCIL_BUFFER */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_CLEAR_PARAMS
+    *
+    * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
+    *   [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
+    *   packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
+    */
+   {
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DRAWING_RECTANGLE */
+   {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(((mt->level[level].width - 1) & 0xffff) |
+                ((mt->level[level].height - 1) << 16));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DPRIMITIVE */
+   {
+     BEGIN_BATCH(6);
+     OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
+               _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+               GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
+     OUT_BATCH(3); /* vertex count per instance */
+     OUT_BATCH(0);
+     OUT_BATCH(1); /* instance count */
+     OUT_BATCH(0);
+     OUT_BATCH(0);
+     ADVANCE_BATCH();
+   }
+
+   /* See comments above at first invocation of intel_flush() in
+    * gen6_hiz_emit_batch_head().
+    */
+   intel_flush(ctx);
+
+   /* Be safe. */
+   brw->state.dirty.brw = ~0;
+   brw->state.dirty.cache = ~0;
+}
+
+/**
+ * \param out_offset is relative to
+ *        CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+ */
+void
+gen6_hiz_emit_depth_stencil_state(struct brw_context *brw,
+                                  enum gen6_hiz_op op,
+                                  uint32_t *out_offset)
+{
+   struct gen6_depth_stencil_state *state;
+   state = (struct gen6_depth_stencil_state *)
+      brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
+                      sizeof(*state), 64,
+                      out_offset);
+   memset(state, 0, sizeof(*state));
+
+   /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
+    *   - 7.5.3.1 Depth Buffer Clear
+    *   - 7.5.3.2 Depth Buffer Resolve
+    *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+    */
+   state->ds2.depth_write_enable = 1;
+   if (op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
+      state->ds2.depth_test_enable = 1;
+      state->ds2.depth_test_func = COMPAREFUNC_NEVER;
+   }
+}
+
+/** \see intel_context::vtbl::resolve_hiz_slice */
+void
+gen6_resolve_hiz_slice(struct intel_context *intel,
+                       struct intel_mipmap_tree *mt,
+                       uint32_t level,
+                       uint32_t layer)
+{
+   gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
+}
+
+/** \see intel_context::vtbl::resolve_depth_slice */
+void
+gen6_resolve_depth_slice(struct intel_context *intel,
+                         struct intel_mipmap_tree *mt,
+                         uint32_t level,
+                         uint32_t layer)
+{
+   gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
+}
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.h b/src/mesa/drivers/dri/i965/gen6_blorp.h

new file mode 100644 (file)

index 0000000..5d6eefc
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct intel_context;
+struct intel_mipmap_tree;
+
+/**
+ * For an overview of the HiZ operations, see the following sections of the
+ * Sandy Bridge PRM, Volume 1, Part2:
+ *   - 7.5.3.1 Depth Buffer Clear
+ *   - 7.5.3.2 Depth Buffer Resolve
+ *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ */
+enum gen6_hiz_op {
+   GEN6_HIZ_OP_DEPTH_CLEAR,
+   GEN6_HIZ_OP_DEPTH_RESOLVE,
+   GEN6_HIZ_OP_HIZ_RESOLVE,
+};
+
+/**
+ * \name HiZ internals
+ * \{
+ *
+ * Used internally by gen6_hiz_exec() and gen7_hiz_exec().
+ */
+
+void
+gen6_hiz_init(struct brw_context *brw);
+
+void
+gen6_hiz_emit_batch_head(struct brw_context *brw);
+
+void
+gen6_hiz_emit_vertices(struct brw_context *brw,
+                       struct intel_mipmap_tree *mt,
+                       unsigned int level,
+                       unsigned int layer);
+
+void
+gen6_hiz_emit_depth_stencil_state(struct brw_context *brw,
+                                  enum gen6_hiz_op op,
+                                  uint32_t *out_offset);
+/** \} */
+
+void
+gen6_resolve_hiz_slice(struct intel_context *intel,
+                       struct intel_mipmap_tree *mt,
+                       uint32_t level,
+                       uint32_t layer);
+
+void
+gen6_resolve_depth_slice(struct intel_context *intel,
+                         struct intel_mipmap_tree *mt,
+                         uint32_t level,
+                         uint32_t layer);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c

index 8a805fa6d7769ed3e24e3fe6398f50bd7597e89e..b4c5329772f25a0287317d74738676660d7637cd 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -92,12 +92,12 @@ gen6_upload_blend_state(struct brw_context *brw)
                intel_translate_logic_op(ctx->Color.LogicOp);
          }
        } else if (ctx->Color.BlendEnabled & (1 << b) && !integer) {
-        GLenum eqRGB = ctx->Color.Blend[0].EquationRGB;
-        GLenum eqA = ctx->Color.Blend[0].EquationA;
-        GLenum srcRGB = ctx->Color.Blend[0].SrcRGB;
-        GLenum dstRGB = ctx->Color.Blend[0].DstRGB;
-        GLenum srcA = ctx->Color.Blend[0].SrcA;
-        GLenum dstA = ctx->Color.Blend[0].DstA;
+        GLenum eqRGB = ctx->Color.Blend[b].EquationRGB;
+        GLenum eqA = ctx->Color.Blend[b].EquationA;
+        GLenum srcRGB = ctx->Color.Blend[b].SrcRGB;
+        GLenum dstRGB = ctx->Color.Blend[b].DstRGB;
+        GLenum srcA = ctx->Color.Blend[b].SrcA;
+        GLenum dstA = ctx->Color.Blend[b].DstA;
  
          if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
             srcRGB = dstRGB = GL_ONE;
diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.c b/src/mesa/drivers/dri/i965/gen6_hiz.c

deleted file mode 100644 (file)

index 9837b1f..0000000
--- a/src/mesa/drivers/dri/i965/gen6_hiz.c
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <assert.h>
-
-#include "intel_batchbuffer.h"
-#include "intel_fbo.h"
-#include "intel_mipmap_tree.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-
-#include "gen6_hiz.h"
-
-/**
- * \name Constants for HiZ VBO
- * \{
- *
- * \see brw_context::hiz::vertex_bo
- */
-#define GEN6_HIZ_NUM_VERTICES 3
-#define GEN6_HIZ_NUM_VUE_ELEMS 8
-#define GEN6_HIZ_VBO_SIZE (GEN6_HIZ_NUM_VERTICES \
-                           * GEN6_HIZ_NUM_VUE_ELEMS \
-                           * sizeof(float))
-/** \} */
-
-void
-gen6_hiz_emit_batch_head(struct brw_context *brw)
-{
-   struct gl_context *ctx = &brw->intel.ctx;
-   struct intel_context *intel = &brw->intel;
-
-   /* To ensure that the batch contains only the resolve, flush the batch
-    * before beginning and after finishing emitting the resolve packets.
-    *
-    * Ideally, we would not need to flush for the resolve op. But, I suspect
-    * that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in
-    * a single batch, and there is no safe way to ensure that other than by
-    * fencing the resolve with flushes. Ideally, we would just detect if
-    * a batch is in progress and do the right thing, but that would require
-    * the ability to *safely* access brw_context::state::dirty::brw
-    * outside of the brw_upload_state() codepath.
-    */
-   intel_flush(ctx);
-
-   /* CMD_PIPELINE_SELECT
-    *
-    * Select the 3D pipeline, as opposed to the media pipeline.
-    */
-   {
-      BEGIN_BATCH(1);
-      OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_MULTISAMPLE */
-   {
-      int length = intel->gen == 7 ? 4 : 3;
-
-      BEGIN_BATCH(length);
-      OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2));
-      OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
-                MS_NUMSAMPLES_1);
-      OUT_BATCH(0);
-      if (length >= 4)
-         OUT_BATCH(0);
-      ADVANCE_BATCH();
-
-   }
-
-   /* 3DSTATE_SAMPLE_MASK */
-   {
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
-      OUT_BATCH(1);
-      ADVANCE_BATCH();
-   }
-
-   /* CMD_STATE_BASE_ADDRESS
-    *
-    * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS:
-    *     The following commands must be reissued following any change to the
-    *     base addresses:
-    *         3DSTATE_CC_POINTERS
-    *         3DSTATE_BINDING_TABLE_POINTERS
-    *         3DSTATE_SAMPLER_STATE_POINTERS
-    *         3DSTATE_VIEWPORT_STATE_POINTERS
-    *         MEDIA_STATE_POINTERS
-    */
-   {
-      BEGIN_BATCH(10);
-      OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
-      OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */
-      /* SurfaceStateBaseAddress */
-      OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
-      /* DynamicStateBaseAddress */
-      OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
-                                  I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
-      OUT_BATCH(1); /* IndirectObjectBaseAddress */
-      OUT_BATCH(1); /* InstructionBaseAddress */
-      OUT_BATCH(1); /* GeneralStateUpperBound */
-      OUT_BATCH(1); /* DynamicStateUpperBound */
-      OUT_BATCH(1); /* IndirectObjectUpperBound*/
-      OUT_BATCH(1); /* InstructionAccessUpperBound */
-      ADVANCE_BATCH();
-   }
-}
-
-void
-gen6_hiz_emit_vertices(struct brw_context *brw,
-                       struct intel_mipmap_tree *mt,
-                       unsigned int level,
-                       unsigned int layer)
-{
-   struct intel_context *intel = &brw->intel;
-   uint32_t vertex_offset;
-
-   /* Setup VBO for the rectangle primitive..
-    *
-    * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
-    * vertices. The vertices reside in screen space with DirectX coordinates
-    * (that is, (0, 0) is the upper left corner).
-    *
-    *   v2 ------ implied
-    *    |        |
-    *    |        |
-    *   v0 ----- v1
-    *
-    * Since the VS is disabled, the clipper loads each VUE directly from
-    * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
-    * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
-    *   dw0: Reserved, MBZ.
-    *   dw1: Render Target Array Index. The HiZ op does not use indexed
-    *        vertices, so set the dword to 0.
-    *   dw2: Viewport Index. The HiZ op disables viewport mapping and
-    *        scissoring, so set the dword to 0.
-    *   dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
-    *        set the dword to 0.
-    *   dw4: Vertex Position X.
-    *   dw5: Vertex Position Y.
-    *   dw6: Vertex Position Z.
-    *   dw7: Vertex Position W.
-    *
-    * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
-    * "Vertex URB Entry (VUE) Formats".
-    */
-   {
-      const int width = mt->level[level].width;
-      const int height = mt->level[level].height;
-      float *vertex_data;
-
-      const float vertices[GEN6_HIZ_VBO_SIZE] = {
-         /* v0 */ 0, 0, 0, 0,         0, height, 0, 1,
-         /* v1 */ 0, 0, 0, 0,     width, height, 0, 1,
-         /* v2 */ 0, 0, 0, 0,         0,      0, 0, 1,
-      };
-
-      vertex_data = brw_state_batch(brw, AUB_TRACE_NO_TYPE,
-                                   GEN6_HIZ_VBO_SIZE, 32, &vertex_offset);
-      memcpy(vertex_data, vertices, GEN6_HIZ_VBO_SIZE);
-   }
-
-   /* 3DSTATE_VERTEX_BUFFERS */
-   {
-      const int num_buffers = 1;
-      const int batch_length = 1 + 4 * num_buffers;
-
-      uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
-                     (GEN6_HIZ_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT;
-
-      if (intel->gen >= 7)
-         dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
-
-      BEGIN_BATCH(batch_length);
-      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
-      OUT_BATCH(dw0);
-      /* start address */
-      OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
-               vertex_offset);
-      /* end address */
-      OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
-               vertex_offset + GEN6_HIZ_VBO_SIZE - 1);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_VERTEX_ELEMENTS
-    *
-    * Fetch dwords 0 - 7 from each VUE. See the comments above where
-    * hiz->vertex_bo is filled with data.
-    */
-   {
-      const int num_elements = 2;
-      const int batch_length = 1 + 2 * num_elements;
-
-      BEGIN_BATCH(batch_length);
-      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2));
-      /* Element 0 */
-      OUT_BATCH(GEN6_VE0_VALID |
-                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
-                0 << BRW_VE0_SRC_OFFSET_SHIFT);
-      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
-      /* Element 1 */
-      OUT_BATCH(GEN6_VE0_VALID |
-                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
-                16 << BRW_VE0_SRC_OFFSET_SHIFT);
-      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
-                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
-      ADVANCE_BATCH();
-   }
-}
-
-/**
- * \brief Execute a HiZ op on a miptree slice.
- *
- * To execute the HiZ op, this function manually constructs and emits a batch
- * to "draw" the HiZ op's rectangle primitive. The batchbuffer is flushed
- * before constructing and after emitting the batch.
- *
- * This function alters no GL state.
- *
- * For an overview of HiZ ops, see the following sections of the Sandy Bridge
- * PRM, Volume 1, Part 2:
- *   - 7.5.3.1 Depth Buffer Clear
- *   - 7.5.3.2 Depth Buffer Resolve
- *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
- */
-static void
-gen6_hiz_exec(struct intel_context *intel,
-              struct intel_mipmap_tree *mt,
-              unsigned int level,
-              unsigned int layer,
-              enum gen6_hiz_op op)
-{
-   struct gl_context *ctx = &intel->ctx;
-   struct brw_context *brw = brw_context(ctx);
-
-   assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
-   assert(mt->hiz_mt != NULL);
-   intel_miptree_check_level_layer(mt, level, layer);
-
-   gen6_hiz_emit_batch_head(brw);
-   gen6_hiz_emit_vertices(brw, mt, level, layer);
-
-   /* 3DSTATE_URB
-    *
-    * Assign the entire URB to the VS. Even though the VS disabled, URB space
-    * is still needed because the clipper loads the VUE's from the URB. From
-    * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
-    * Dword 1.15:0 "VS Number of URB Entries":
-    *     This field is always used (even if VS Function Enable is DISABLED).
-    *
-    * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
-    * safely ignore it because this batch contains only one draw call.
-    *     Because of URB corruption caused by allocating a previous GS unit
-    *     URB entry to the VS unit, software is required to send a “GS NULL
-    *     Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
-    *     plus a dummy DRAW call before any case where VS will be taking over
-    *     GS URB space.
-    */
-   {
-      BEGIN_BATCH(3);
-      OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
-      OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_CC_STATE_POINTERS
-    *
-    * The pointer offsets are relative to
-    * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
-    *
-    * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
-    */
-   {
-      uint32_t depthstencil_offset;
-      gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset);
-
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
-      OUT_BATCH(1); /* BLEND_STATE offset */
-      OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */
-      OUT_BATCH(1); /* COLOR_CALC_STATE offset */
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_VS
-    *
-    * Disable vertex shader.
-    */
-   {
-      /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section
-       * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
-       *   [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
-       *   command that causes the VS Function Enable to toggle. Pipeline
-       *   flush can be executed by sending a PIPE_CONTROL command with CS
-       *   stall bit set and a post sync operation.
-       */
-      intel_emit_post_sync_nonzero_flush(intel);
-
-      BEGIN_BATCH(6);
-      OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_GS
-    *
-    * Disable the geometry shader.
-    */
-   {
-      BEGIN_BATCH(7);
-      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_CLIP
-    *
-    * Disable the clipper.
-    *
-    * The HiZ op emits a rectangle primitive, which requires clipping to
-    * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
-    * Section 1.3 "3D Primitives Overview":
-    *    RECTLIST:
-    *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
-    *    Mode should be set to a value other than CLIPMODE_NORMAL.
-    *
-    * Also disable perspective divide. This doesn't change the clipper's
-    * output, but does spare a few electrons.
-    */
-   {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_SF
-    *
-    * Disable ViewportTransformEnable (dw2.1)
-    *
-    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
-    * Primitives Overview":
-    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
-    *     use of screen- space coordinates).
-    *
-    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
-    * and BackFaceFillMode (dw2.5:6) to SOLID(0).
-    *
-    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
-    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
-    *     SOLID: Any triangle or rectangle object found to be front-facing
-    *     is rendered as a solid object. This setting is required when
-    *     (rendering rectangle (RECTLIST) objects.
-    */
-   {
-      BEGIN_BATCH(20);
-      OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
-      OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */
-                1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
-                0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
-      for (int i = 0; i < 18; ++i)
-         OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_WM
-    *
-    * Disable thread dispatch (dw5.19) and enable the HiZ op.
-    *
-    * Even though thread dispatch is disabled, max threads (dw5.25:31) must be
-    * nonzero to prevent the GPU from hanging. See the valid ranges in the
-    * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31
-    * "Maximum Number Of Threads".
-    */
-   {
-      uint32_t dw4 = 0;
-
-      switch (op) {
-      case GEN6_HIZ_OP_DEPTH_CLEAR:
-         assert(!"not implemented");
-         dw4 |= GEN6_WM_DEPTH_CLEAR;
-         break;
-      case GEN6_HIZ_OP_DEPTH_RESOLVE:
-         dw4 |= GEN6_WM_DEPTH_RESOLVE;
-         break;
-      case GEN6_HIZ_OP_HIZ_RESOLVE:
-         dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
-         break;
-      default:
-         assert(0);
-         break;
-      }
-
-      BEGIN_BATCH(9);
-      OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(dw4);
-      OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
-      OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_DEPTH_BUFFER */
-   {
-      uint32_t width = mt->level[level].width;
-      uint32_t height = mt->level[level].height;
-
-      uint32_t tile_x;
-      uint32_t tile_y;
-      uint32_t offset;
-      {
-         /* Construct a dummy renderbuffer just to extract tile offsets. */
-         struct intel_renderbuffer rb;
-         rb.mt = mt;
-         rb.mt_level = level;
-         rb.mt_layer = layer;
-         intel_renderbuffer_set_draw_offset(&rb);
-         offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y);
-      }
-
-      uint32_t format;
-      switch (mt->format) {
-      case MESA_FORMAT_Z16:       format = BRW_DEPTHFORMAT_D16_UNORM; break;
-      case MESA_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; break;
-      case MESA_FORMAT_X8_Z24:    format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
-      default:                    assert(0); break;
-      }
-
-      intel_emit_post_sync_nonzero_flush(intel);
-      intel_emit_depth_stall_flushes(intel);
-
-      BEGIN_BATCH(7);
-      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
-      OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) |
-                format << 18 |
-                1 << 21 | /* separate stencil enable */
-                1 << 22 | /* hiz enable */
-                BRW_TILEWALK_YMAJOR << 26 |
-                1 << 27 | /* y-tiled */
-                BRW_SURFACE_2D << 29);
-      OUT_RELOC(mt->region->bo,
-                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                offset);
-      OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
-                (width + tile_x - 1) << 6 |
-                (height + tile_y - 1) << 19);
-      OUT_BATCH(0);
-      OUT_BATCH(tile_x |
-                tile_y << 16);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_HIER_DEPTH_BUFFER */
-   {
-      struct intel_region *hiz_region = mt->hiz_mt->region;
-
-      BEGIN_BATCH(3);
-      OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
-      OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
-      OUT_RELOC(hiz_region->bo,
-                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_STENCIL_BUFFER */
-   {
-      BEGIN_BATCH(3);
-      OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_CLEAR_PARAMS
-    *
-    * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
-    *   [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
-    *   packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
-    */
-   {
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_DRAWING_RECTANGLE */
-   {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(((mt->level[level].width - 1) & 0xffff) |
-                ((mt->level[level].height - 1) << 16));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DPRIMITIVE */
-   {
-     BEGIN_BATCH(6);
-     OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
-               _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
-               GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
-     OUT_BATCH(3); /* vertex count per instance */
-     OUT_BATCH(0);
-     OUT_BATCH(1); /* instance count */
-     OUT_BATCH(0);
-     OUT_BATCH(0);
-     ADVANCE_BATCH();
-   }
-
-   /* See comments above at first invocation of intel_flush() in
-    * gen6_hiz_emit_batch_head().
-    */
-   intel_flush(ctx);
-
-   /* Be safe. */
-   brw->state.dirty.brw = ~0;
-   brw->state.dirty.cache = ~0;
-}
-
-/**
- * \param out_offset is relative to
- *        CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
- */
-void
-gen6_hiz_emit_depth_stencil_state(struct brw_context *brw,
-                                  enum gen6_hiz_op op,
-                                  uint32_t *out_offset)
-{
-   struct gen6_depth_stencil_state *state;
-   state = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
-                              sizeof(*state), 64,
-                              out_offset);
-   memset(state, 0, sizeof(*state));
-
-   /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
-    *   - 7.5.3.1 Depth Buffer Clear
-    *   - 7.5.3.2 Depth Buffer Resolve
-    *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
-    */
-   state->ds2.depth_write_enable = 1;
-   if (op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
-      state->ds2.depth_test_enable = 1;
-      state->ds2.depth_test_func = COMPAREFUNC_NEVER;
-   }
-}
-
-/** \see intel_context::vtbl::resolve_hiz_slice */
-void
-gen6_resolve_hiz_slice(struct intel_context *intel,
-                       struct intel_mipmap_tree *mt,
-                       uint32_t level,
-                       uint32_t layer)
-{
-   gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
-}
-
-/** \see intel_context::vtbl::resolve_depth_slice */
-void
-gen6_resolve_depth_slice(struct intel_context *intel,
-                         struct intel_mipmap_tree *mt,
-                         uint32_t level,
-                         uint32_t layer)
-{
-   gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
-}
diff --git a/src/mesa/drivers/dri/i965/gen6_hiz.h b/src/mesa/drivers/dri/i965/gen6_hiz.h

deleted file mode 100644 (file)

index 0a13ba0..0000000
--- a/src/mesa/drivers/dri/i965/gen6_hiz.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#pragma once
-
-#include <stdint.h>
-
-struct intel_context;
-struct intel_mipmap_tree;
-
-/**
- * For an overview of the HiZ operations, see the following sections of the
- * Sandy Bridge PRM, Volume 1, Part2:
- *   - 7.5.3.1 Depth Buffer Clear
- *   - 7.5.3.2 Depth Buffer Resolve
- *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
- */
-enum gen6_hiz_op {
-   GEN6_HIZ_OP_DEPTH_CLEAR,
-   GEN6_HIZ_OP_DEPTH_RESOLVE,
-   GEN6_HIZ_OP_HIZ_RESOLVE,
-};
-
-/**
- * \name HiZ internals
- * \{
- *
- * Used internally by gen6_hiz_exec() and gen7_hiz_exec().
- */
-
-void
-gen6_hiz_init(struct brw_context *brw);
-
-void
-gen6_hiz_emit_batch_head(struct brw_context *brw);
-
-void
-gen6_hiz_emit_vertices(struct brw_context *brw,
-                       struct intel_mipmap_tree *mt,
-                       unsigned int level,
-                       unsigned int layer);
-
-void
-gen6_hiz_emit_depth_stencil_state(struct brw_context *brw,
-                                  enum gen6_hiz_op op,
-                                  uint32_t *out_offset);
-/** \} */
-
-void
-gen6_resolve_hiz_slice(struct intel_context *intel,
-                       struct intel_mipmap_tree *mt,
-                       uint32_t level,
-                       uint32_t layer);
-
-void
-gen6_resolve_depth_slice(struct intel_context *intel,
-                         struct intel_mipmap_tree *mt,
-                         uint32_t level,
-                         uint32_t layer);
diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c

index feeca28d4591a607d15b3bf828febdc6ab064d6d..4c7f81c1dd32c302902c65b78155f57c09300cf2 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
@@ -29,13 +29,14 @@
  #include "brw_state.h"
  #include "brw_defines.h"
  #include "intel_batchbuffer.h"
+#include "main/fbobject.h"
  
  static void
  gen6_upload_scissor_state(struct brw_context *brw)
  {
     struct intel_context *intel = &brw->intel;
     struct gl_context *ctx = &intel->ctx;
-   const bool render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
     struct gen6_scissor_rect *scissor;
     uint32_t scissor_state_offset;
  
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c

index 95ed1f74ce425fbbc8aedbe197ce8a333b472040..5c4293ca91eeefd192d9660d20d029a8853f6929 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -30,6 +30,7 @@
  #include "brw_defines.h"
  #include "brw_util.h"
  #include "main/macros.h"
+#include "main/fbobject.h"
  #include "intel_batchbuffer.h"
  
  /**
@@ -120,7 +121,7 @@ upload_sf_state(struct brw_context *brw)
     uint32_t dw1, dw2, dw3, dw4, dw16, dw17;
     int i;
     /* _NEW_BUFFER */
-   bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
     int attr = 0, input_index = 0;
     int urb_entry_read_offset = 1;
     float point_size;
diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c

index 9ff2bd95680b49122bb4813236b086bee42b372b..f787ac711646a0686fe93210b426b445ea0fffd2 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
@@ -29,6 +29,7 @@
  #include "brw_state.h"
  #include "brw_defines.h"
  #include "intel_batchbuffer.h"
+#include "main/fbobject.h"
  
  /* The clip VP defines the guardband region where expensive clipping is skipped
   * and fragments are allowed to be generated and clipped out cheaply by the SF.
@@ -70,7 +71,7 @@ gen6_upload_sf_vp(struct brw_context *brw)
     const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
     struct brw_sf_viewport *sfv;
     GLfloat y_scale, y_bias;
-   const bool render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
     const GLfloat *v = ctx->Viewport._WindowMap.m;
  
     sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE,
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp

new file mode 100644 (file)

index 0000000..9d21ec9
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -0,0 +1,501 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+
+#include "gen6_blorp.h"
+#include "gen7_blorp.h"
+
+/**
+ * \copydoc gen6_hiz_exec()
+ */
+static void
+gen7_hiz_exec(struct intel_context *intel,
+              struct intel_mipmap_tree *mt,
+              unsigned int level,
+              unsigned int layer,
+              enum gen6_hiz_op op)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct brw_context *brw = brw_context(ctx);
+   uint32_t draw_x, draw_y;
+   uint32_t tile_mask_x, tile_mask_y;
+
+   assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
+   assert(mt->hiz_mt != NULL);
+   intel_miptree_check_level_layer(mt, level, layer);
+
+   uint32_t depth_format;
+   switch (mt->format) {
+   case MESA_FORMAT_Z16:       depth_format = BRW_DEPTHFORMAT_D16_UNORM; break;
+   case MESA_FORMAT_Z32_FLOAT: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break;
+   case MESA_FORMAT_X8_Z24:    depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
+   default:                    assert(0); break;
+   }
+
+   {
+      /* Construct a dummy renderbuffer just to extract tile offsets. */
+      struct intel_renderbuffer rb;
+      rb.mt = mt;
+      rb.mt_level = level;
+      rb.mt_layer = layer;
+      intel_renderbuffer_set_draw_offset(&rb);
+      draw_x = rb.draw_x;
+      draw_y = rb.draw_y;
+   }
+
+   /* Compute masks to determine how much of draw_x and draw_y should be
+    * performed using the fine adjustment of "depth coordinate offset X/Y"
+    * (dw5 of 3DSTATE_DEPTH_BUFFER).  See the emit_depthbuffer() function for
+    * details.
+    */
+   {
+      uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y;
+      intel_region_get_tile_masks(mt->region, &depth_mask_x, &depth_mask_y);
+      intel_region_get_tile_masks(mt->hiz_mt->region,
+                                  &hiz_mask_x, &hiz_mask_y);
+
+      /* Each HiZ row represents 2 rows of pixels */
+      hiz_mask_y = hiz_mask_y << 1 | 1;
+
+      tile_mask_x = depth_mask_x | hiz_mask_x;
+      tile_mask_y = depth_mask_y | hiz_mask_y;
+   }
+
+   gen6_hiz_emit_batch_head(brw);
+   gen6_hiz_emit_vertices(brw, mt, level, layer);
+
+   /* 3DSTATE_URB_VS
+    * 3DSTATE_URB_HS
+    * 3DSTATE_URB_DS
+    * 3DSTATE_URB_GS
+    *
+    * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the
+    * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS:
+    *     3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
+    *     programmed in order for the programming of this state to be
+    *     valid.
+    */
+   {
+      /* The minimum valid value is 32. See 3DSTATE_URB_VS,
+       * Dword 1.15:0 "VS Number of URB Entries".
+       */
+      int num_vs_entries = 32;
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
+      OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT |
+                0 << GEN7_URB_STARTING_ADDRESS_SHIFT |
+                num_vs_entries);
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
+    *
+    * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+    */
+   {
+      uint32_t depthstencil_offset;
+      gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset);
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
+      OUT_BATCH(depthstencil_offset | 1);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_VS
+    *
+    * Disable vertex shader.
+    */
+   {
+      BEGIN_BATCH(6);
+      OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_HS
+    *
+    * Disable the hull shader.
+    */
+   {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_TE
+    *
+    * Disable the tesselation engine.
+    */
+   {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DS
+    *
+    * Disable the domain shader.
+    */
+   {
+      BEGIN_BATCH(6);
+      OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_GS
+    *
+    * Disable the geometry shader.
+    */
+   {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_STREAMOUT
+    *
+    * Disable streamout.
+    */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_CLIP
+    *
+    * Disable the clipper.
+    *
+    * The HiZ op emits a rectangle primitive, which requires clipping to
+    * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
+    * Section 1.3 "3D Primitives Overview":
+    *    RECTLIST:
+    *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
+    *    Mode should be set to a value other than CLIPMODE_NORMAL.
+    *
+    * Also disable perspective divide. This doesn't change the clipper's
+    * output, but does spare a few electrons.
+    */
+   {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_SF
+    *
+    * Disable ViewportTransformEnable (dw1.1)
+    *
+    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
+    * Primitives Overview":
+    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
+    *     use of screen- space coordinates).
+    *
+    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5)
+    * and BackFaceFillMode (dw1.4:3) to SOLID(0).
+    *
+    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
+    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
+    *     SOLID: Any triangle or rectangle object found to be front-facing
+    *     is rendered as a solid object. This setting is required when
+    *     (rendering rectangle (RECTLIST) objects.
+    */
+   {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
+      OUT_BATCH(depth_format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_SBE */
+   {
+      BEGIN_BATCH(14);
+      OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
+      OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */
+                1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+                0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
+      for (int i = 0; i < 12; ++i)
+         OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_WM
+    *
+    * Disable PS thread dispatch (dw1.29) and enable the HiZ op.
+    */
+   {
+      uint32_t dw1 = 0;
+
+      switch (op) {
+      case GEN6_HIZ_OP_DEPTH_CLEAR:
+         assert(!"not implemented");
+         dw1 |= GEN7_WM_DEPTH_CLEAR;
+         break;
+      case GEN6_HIZ_OP_DEPTH_RESOLVE:
+         dw1 |= GEN7_WM_DEPTH_RESOLVE;
+         break;
+      case GEN6_HIZ_OP_HIZ_RESOLVE:
+         dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
+         break;
+      default:
+         assert(0);
+         break;
+      }
+
+      BEGIN_BATCH(3);
+      OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
+      OUT_BATCH(dw1);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_PS
+    *
+    * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
+    * that, thread dispatch info must still be specified.
+    *     - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec
+    *       states that the valid range for this field is [0x3, 0x2f].
+    *     - A dispatch mode must be given; that is, at least one of the
+    *       "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
+    *       discovered through simulator error messages.
+    */
+   {
+      BEGIN_BATCH(8);
+      OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(((brw->max_wm_threads - 1) << IVB_PS_MAX_THREADS_SHIFT) |
+               GEN7_PS_32_DISPATCH_ENABLE);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DEPTH_BUFFER */
+   {
+      uint32_t width = mt->level[level].width;
+      uint32_t height = mt->level[level].height;
+
+      uint32_t tile_x = draw_x & tile_mask_x;
+      uint32_t tile_y = draw_y & tile_mask_y;
+      uint32_t offset = intel_region_get_aligned_offset(mt->region,
+                                                        draw_x & ~tile_mask_x,
+                                                        draw_y & ~tile_mask_y);
+
+      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+       * Coordinate Offset X/Y":
+       *
+       *   "The 3 LSBs of both offsets must be zero to ensure correct
+       *   alignment"
+       *
+       * We have no guarantee that tile_x and tile_y are correctly aligned,
+       * since they are determined by the mipmap layout, which is only aligned
+       * to multiples of 4.
+       *
+       * So, to avoid hanging the GPU, just smash the low order 3 bits of
+       * tile_x and tile_y to 0.  This is a temporary workaround until we come
+       * up with a better solution.
+       */
+      tile_x &= ~7;
+      tile_y &= ~7;
+
+      intel_emit_depth_stall_flushes(intel);
+
+      BEGIN_BATCH(7);
+      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+      OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) |
+                depth_format << 18 |
+                1 << 22 | /* hiz enable */
+                1 << 28 | /* depth write */
+                BRW_SURFACE_2D << 29);
+      OUT_RELOC(mt->region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                offset);
+      OUT_BATCH((width + tile_x - 1) << 4 |
+                (height + tile_y - 1) << 18);
+      OUT_BATCH(0);
+      OUT_BATCH(tile_x |
+                tile_y << 16);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_HIER_DEPTH_BUFFER */
+   {
+      struct intel_region *hiz_region = mt->hiz_mt->region;
+      uint32_t hiz_offset =
+         intel_region_get_aligned_offset(hiz_region,
+                                         draw_x & ~tile_mask_x,
+                                         (draw_y & ~tile_mask_y) / 2);
+
+      BEGIN_BATCH(3);
+      OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
+      OUT_RELOC(hiz_region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                hiz_offset);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_STENCIL_BUFFER */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_CLEAR_PARAMS
+    *
+    * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2
+    * 3DSTATE_CLEAR_PARAMS:
+    *    [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along
+    *    with the other Depth/Stencil state commands(i.e.  3DSTATE_DEPTH_BUFFER,
+    *    3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
+    */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DRAWING_RECTANGLE */
+   {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(((mt->level[level].width - 1) & 0xffff) |
+                ((mt->level[level].height - 1) << 16));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DPRIMITIVE */
+   {
+     BEGIN_BATCH(7);
+     OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
+     OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
+               _3DPRIM_RECTLIST);
+     OUT_BATCH(3); /* vertex count per instance */
+     OUT_BATCH(0);
+     OUT_BATCH(1); /* instance count */
+     OUT_BATCH(0);
+     OUT_BATCH(0);
+     ADVANCE_BATCH();
+   }
+
+   /* See comments above at first invocation of intel_flush() in
+    * gen6_hiz_emit_batch_head().
+    */
+   intel_flush(ctx);
+
+   /* Be safe. */
+   brw->state.dirty.brw = ~0;
+   brw->state.dirty.cache = ~0;
+}
+
+/** \copydoc gen6_resolve_hiz_slice() */
+void
+gen7_resolve_hiz_slice(struct intel_context *intel,
+                       struct intel_mipmap_tree *mt,
+                       uint32_t level,
+                       uint32_t layer)
+{
+   gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
+}
+
+/** \copydoc gen6_resolve_depth_slice() */
+void
+gen7_resolve_depth_slice(struct intel_context *intel,
+                         struct intel_mipmap_tree *mt,
+                         uint32_t level,
+                         uint32_t layer)
+{
+   gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
+}
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.h b/src/mesa/drivers/dri/i965/gen7_blorp.h

new file mode 100644 (file)

index 0000000..6c0fc84
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct intel_context;
+struct intel_mipmap_tree;
+
+/** \copydoc gen6_resolve_hiz_slice() */
+void
+gen7_resolve_hiz_slice(struct intel_context *intel,
+                       struct intel_mipmap_tree *mt,
+                       uint32_t level,
+                       uint32_t layer);
+
+/** \copydoc gen6_resolve_depth_slice() */
+void
+gen7_resolve_depth_slice(struct intel_context *intel,
+                         struct intel_mipmap_tree *mt,
+                         uint32_t level,
+                         uint32_t layer);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c b/src/mesa/drivers/dri/i965/gen7_clip_state.c

index f78b089484d01120b65a8c8a625e4756e3e1a021..5fede36a20d20016e20218286040c1db0fea82ff 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c
@@ -26,6 +26,7 @@
  #include "brw_defines.h"
  #include "brw_util.h"
  #include "intel_batchbuffer.h"
+#include "main/fbobject.h"
  
  static void
  upload_clip_state(struct brw_context *brw)
@@ -38,7 +39,7 @@ upload_clip_state(struct brw_context *brw)
     uint32_t nonperspective_barycentric_enable_flag = 0;
  
     /* _NEW_BUFFERS */
-   bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
  
     /* CACHE_NEW_WM_PROG */
     if (brw->wm.prog_data->barycentric_interp_modes &
diff --git a/src/mesa/drivers/dri/i965/gen7_hiz.c b/src/mesa/drivers/dri/i965/gen7_hiz.c

deleted file mode 100644 (file)

index 18c178e..0000000
--- a/src/mesa/drivers/dri/i965/gen7_hiz.c
+++ /dev/null
@@ -1,455 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <assert.h>
-
-#include "intel_batchbuffer.h"
-#include "intel_fbo.h"
-#include "intel_mipmap_tree.h"
-
-#include "brw_context.h"
-#include "brw_defines.h"
-#include "brw_state.h"
-
-#include "gen6_hiz.h"
-#include "gen7_hiz.h"
-
-/**
- * \copydoc gen6_hiz_exec()
- */
-static void
-gen7_hiz_exec(struct intel_context *intel,
-              struct intel_mipmap_tree *mt,
-              unsigned int level,
-              unsigned int layer,
-              enum gen6_hiz_op op)
-{
-   struct gl_context *ctx = &intel->ctx;
-   struct brw_context *brw = brw_context(ctx);
-
-   assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
-   assert(mt->hiz_mt != NULL);
-   intel_miptree_check_level_layer(mt, level, layer);
-
-   uint32_t depth_format;
-   switch (mt->format) {
-   case MESA_FORMAT_Z16:       depth_format = BRW_DEPTHFORMAT_D16_UNORM; break;
-   case MESA_FORMAT_Z32_FLOAT: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break;
-   case MESA_FORMAT_X8_Z24:    depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
-   default:                    assert(0); break;
-   }
-
-   gen6_hiz_emit_batch_head(brw);
-   gen6_hiz_emit_vertices(brw, mt, level, layer);
-
-   /* 3DSTATE_URB_VS
-    * 3DSTATE_URB_HS
-    * 3DSTATE_URB_DS
-    * 3DSTATE_URB_GS
-    *
-    * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the
-    * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS:
-    *     3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
-    *     programmed in order for the programming of this state to be
-    *     valid.
-    */
-   {
-      /* The minimum valid value is 32. See 3DSTATE_URB_VS,
-       * Dword 1.15:0 "VS Number of URB Entries".
-       */
-      int num_vs_entries = 32;
-
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
-      OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT |
-                0 << GEN7_URB_STARTING_ADDRESS_SHIFT |
-                num_vs_entries);
-      ADVANCE_BATCH();
-
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
-    *
-    * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
-    */
-   {
-      uint32_t depthstencil_offset;
-      gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset);
-
-      BEGIN_BATCH(2);
-      OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
-      OUT_BATCH(depthstencil_offset | 1);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_VS
-    *
-    * Disable vertex shader.
-    */
-   {
-      BEGIN_BATCH(6);
-      OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_HS
-    *
-    * Disable the hull shader.
-    */
-   {
-      BEGIN_BATCH(7);
-      OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_TE
-    *
-    * Disable the tesselation engine.
-    */
-   {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_DS
-    *
-    * Disable the domain shader.
-    */
-   {
-      BEGIN_BATCH(6);
-      OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_GS
-    *
-    * Disable the geometry shader.
-    */
-   {
-      BEGIN_BATCH(7);
-      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_STREAMOUT
-    *
-    * Disable streamout.
-    */
-   {
-      BEGIN_BATCH(3);
-      OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_CLIP
-    *
-    * Disable the clipper.
-    *
-    * The HiZ op emits a rectangle primitive, which requires clipping to
-    * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
-    * Section 1.3 "3D Primitives Overview":
-    *    RECTLIST:
-    *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
-    *    Mode should be set to a value other than CLIPMODE_NORMAL.
-    *
-    * Also disable perspective divide. This doesn't change the clipper's
-    * output, but does spare a few electrons.
-    */
-   {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_SF
-    *
-    * Disable ViewportTransformEnable (dw1.1)
-    *
-    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
-    * Primitives Overview":
-    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
-    *     use of screen- space coordinates).
-    *
-    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5)
-    * and BackFaceFillMode (dw1.4:3) to SOLID(0).
-    *
-    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
-    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
-    *     SOLID: Any triangle or rectangle object found to be front-facing
-    *     is rendered as a solid object. This setting is required when
-    *     (rendering rectangle (RECTLIST) objects.
-    */
-   {
-      BEGIN_BATCH(7);
-      OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
-      OUT_BATCH(depth_format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_SBE */
-   {
-      BEGIN_BATCH(14);
-      OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
-      OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */
-                1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
-                0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
-      for (int i = 0; i < 12; ++i)
-         OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_WM
-    *
-    * Disable PS thread dispatch (dw1.29) and enable the HiZ op.
-    */
-   {
-      uint32_t dw1 = 0;
-
-      switch (op) {
-      case GEN6_HIZ_OP_DEPTH_CLEAR:
-         assert(!"not implemented");
-         dw1 |= GEN7_WM_DEPTH_CLEAR;
-         break;
-      case GEN6_HIZ_OP_DEPTH_RESOLVE:
-         dw1 |= GEN7_WM_DEPTH_RESOLVE;
-         break;
-      case GEN6_HIZ_OP_HIZ_RESOLVE:
-         dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
-         break;
-      default:
-         assert(0);
-         break;
-      }
-
-      BEGIN_BATCH(3);
-      OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
-      OUT_BATCH(dw1);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_PS
-    *
-    * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
-    * that, thread dispatch info must still be specified.
-    *     - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec
-    *       states that the valid range for this field is [0x3, 0x2f].
-    *     - A dispatch mode must be given; that is, at least one of the
-    *       "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
-    *       discovered through simulator error messages.
-    */
-   {
-      BEGIN_BATCH(8);
-      OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(((brw->max_wm_threads - 1) << IVB_PS_MAX_THREADS_SHIFT) |
-               GEN7_PS_32_DISPATCH_ENABLE);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_DEPTH_BUFFER */
-   {
-      uint32_t width = mt->level[level].width;
-      uint32_t height = mt->level[level].height;
-
-      uint32_t tile_x;
-      uint32_t tile_y;
-      uint32_t offset;
-      {
-         /* Construct a dummy renderbuffer just to extract tile offsets. */
-         struct intel_renderbuffer rb;
-         rb.mt = mt;
-         rb.mt_level = level;
-         rb.mt_layer = layer;
-         intel_renderbuffer_set_draw_offset(&rb);
-         offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y);
-      }
-
-      intel_emit_depth_stall_flushes(intel);
-
-      BEGIN_BATCH(7);
-      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
-      OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) |
-                depth_format << 18 |
-                1 << 22 | /* hiz enable */
-                1 << 28 | /* depth write */
-                BRW_SURFACE_2D << 29);
-      OUT_RELOC(mt->region->bo,
-                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                offset);
-      OUT_BATCH((width + tile_x - 1) << 4 |
-                (height + tile_y - 1) << 18);
-      OUT_BATCH(0);
-      OUT_BATCH(tile_x |
-                tile_y << 16);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_HIER_DEPTH_BUFFER */
-   {
-      struct intel_region *hiz_region = mt->hiz_mt->region;
-
-      BEGIN_BATCH(3);
-      OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
-      OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
-      OUT_RELOC(hiz_region->bo,
-                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_STENCIL_BUFFER */
-   {
-      BEGIN_BATCH(3);
-      OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_CLEAR_PARAMS
-    *
-    * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2
-    * 3DSTATE_CLEAR_PARAMS:
-    *    [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along
-    *    with the other Depth/Stencil state commands(i.e.  3DSTATE_DEPTH_BUFFER,
-    *    3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
-    */
-   {
-      BEGIN_BATCH(3);
-      OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DSTATE_DRAWING_RECTANGLE */
-   {
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(((mt->level[level].width - 1) & 0xffff) |
-                ((mt->level[level].height - 1) << 16));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   }
-
-   /* 3DPRIMITIVE */
-   {
-     BEGIN_BATCH(7);
-     OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
-     OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
-               _3DPRIM_RECTLIST);
-     OUT_BATCH(3); /* vertex count per instance */
-     OUT_BATCH(0);
-     OUT_BATCH(1); /* instance count */
-     OUT_BATCH(0);
-     OUT_BATCH(0);
-     ADVANCE_BATCH();
-   }
-
-   /* See comments above at first invocation of intel_flush() in
-    * gen6_hiz_emit_batch_head().
-    */
-   intel_flush(ctx);
-
-   /* Be safe. */
-   brw->state.dirty.brw = ~0;
-   brw->state.dirty.cache = ~0;
-}
-
-/** \copydoc gen6_resolve_hiz_slice() */
-void
-gen7_resolve_hiz_slice(struct intel_context *intel,
-                       struct intel_mipmap_tree *mt,
-                       uint32_t level,
-                       uint32_t layer)
-{
-   gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
-}
-
-/** \copydoc gen6_resolve_depth_slice() */
-void
-gen7_resolve_depth_slice(struct intel_context *intel,
-                         struct intel_mipmap_tree *mt,
-                         uint32_t level,
-                         uint32_t layer)
-{
-   gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
-}
diff --git a/src/mesa/drivers/dri/i965/gen7_hiz.h b/src/mesa/drivers/dri/i965/gen7_hiz.h

deleted file mode 100644 (file)

index b89ffb0..0000000
--- a/src/mesa/drivers/dri/i965/gen7_hiz.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright © 2011 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#pragma once
-
-#include <stdint.h>
-
-struct intel_context;
-struct intel_mipmap_tree;
-
-/** \copydoc gen6_resolve_hiz_slice() */
-void
-gen7_resolve_hiz_slice(struct intel_context *intel,
-                       struct intel_mipmap_tree *mt,
-                       uint32_t level,
-                       uint32_t layer);
-
-/** \copydoc gen6_resolve_depth_slice() */
-void
-gen7_resolve_depth_slice(struct intel_context *intel,
-                         struct intel_mipmap_tree *mt,
-                         uint32_t level,
-                         uint32_t layer);
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c

index 3a6144f2838c102f0d0eeae97e453a59817cc666..4a5b5a6ae2eaef31263fd41e07708ea11763b05a 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -42,18 +42,58 @@ static void emit_depthbuffer(struct brw_context *brw)
                             *stencil_mt = NULL,
                             *hiz_mt = NULL;
  
+   /* Amount by which drawing should be offset in order to draw to the
+    * appropriate miplevel/zoffset/cubeface.  We will extract these values
+    * from depth_irb or stencil_irb once we determine which is present.
+    */
+   uint32_t draw_x = 0, draw_y = 0;
+
+   /* Masks used to determine how much of the draw_x and draw_y offsets should
+    * be performed using the fine adjustment of "depth coordinate offset X/Y"
+    * (dw5 of 3DSTATE_DEPTH_BUFFER).  Any remaining coarse adjustment will be
+    * performed by changing the base addresses of the buffers.
+    *
+    * Since the HiZ, depth, and stencil buffers all use the same "depth
+    * coordinate offset X/Y" values, we need to make sure that the coarse
+    * adjustment will be possible to apply to all three buffers.  Since coarse
+    * adjustment can only be applied in multiples of the tile size, we will OR
+    * together the tile masks of all the buffers to determine which offsets to
+    * perform as fine adjustments.
+    */
+   uint32_t tile_mask_x = 0, tile_mask_y = 0;
+
     if (drb)
        depth_mt = drb->mt;
  
-   if (depth_mt)
+   if (depth_mt) {
        hiz_mt = depth_mt->hiz_mt;
  
+      intel_region_get_tile_masks(depth_mt->region,
+                                  &tile_mask_x, &tile_mask_y);
+
+      if (hiz_mt) {
+         uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
+         intel_region_get_tile_masks(hiz_mt->region,
+                                     &hiz_tile_mask_x, &hiz_tile_mask_y);
+
+         /* Each HiZ row represents 2 rows of pixels */
+         hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
+
+         tile_mask_x |= hiz_tile_mask_x;
+         tile_mask_y |= hiz_tile_mask_y;
+      }
+   }
+
     if (srb) {
        stencil_mt = srb->mt;
        if (stencil_mt->stencil_mt)
          stencil_mt = stencil_mt->stencil_mt;
  
        assert(stencil_mt->format == MESA_FORMAT_S8);
+
+      /* Stencil buffer uses 64x64 tiles. */
+      tile_mask_x |= 63;
+      tile_mask_y |= 63;
     }
  
     /* Gen7 doesn't support packed depth/stencil */
@@ -65,6 +105,7 @@ static void emit_depthbuffer(struct brw_context *brw)
     if (depth_mt == NULL) {
        uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18;
        uint32_t dw3 = 0;
+      uint32_t tile_x, tile_y;
  
        if (stencil_mt == NULL) {
          dw1 |= (BRW_SURFACE_NULL << 29);
@@ -72,10 +113,33 @@ static void emit_depthbuffer(struct brw_context *brw)
          /* _NEW_STENCIL: enable stencil buffer writes */
          dw1 |= ((ctx->Stencil.WriteMask != 0) << 27);
  
+         draw_x = srb->draw_x;
+         draw_y = srb->draw_y;
+         tile_x = draw_x & tile_mask_x;
+         tile_y = draw_y & tile_mask_y;
+
+         /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+          * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+          * Coordinate Offset X/Y":
+          *
+          *   "The 3 LSBs of both offsets must be zero to ensure correct
+          *   alignment"
+          *
+          * We have no guarantee that tile_x and tile_y are correctly aligned,
+          * since they are determined by the mipmap layout, which is only
+          * aligned to multiples of 4.
+          *
+          * So, to avoid hanging the GPU, just smash the low order 3 bits of
+          * tile_x and tile_y to 0.  This is a temporary workaround until we
+          * come up with a better solution.
+          */
+         tile_x &= ~7;
+         tile_y &= ~7;
+
          /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
          dw1 |= (BRW_SURFACE_2D << 29);
-        dw3 = ((srb->Base.Base.Width - 1) << 4) |
-              ((srb->Base.Base.Height - 1) << 18);
+        dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) |
+              ((srb->Base.Base.Height + tile_y - 1) << 18);
        }
  
        BEGIN_BATCH(7);
@@ -84,14 +148,39 @@ static void emit_depthbuffer(struct brw_context *brw)
        OUT_BATCH(0);
        OUT_BATCH(dw3);
        OUT_BATCH(0);
-      OUT_BATCH(0);
+      OUT_BATCH(tile_x | (tile_y << 16));
        OUT_BATCH(0);
        ADVANCE_BATCH();
     } else {
        struct intel_region *region = depth_mt->region;
        uint32_t tile_x, tile_y, offset;
  
-      offset = intel_renderbuffer_tile_offsets(drb, &tile_x, &tile_y);
+      draw_x = drb->draw_x;
+      draw_y = drb->draw_y;
+      tile_x = draw_x & tile_mask_x;
+      tile_y = draw_y & tile_mask_y;
+
+      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+       * Coordinate Offset X/Y":
+       *
+       *   "The 3 LSBs of both offsets must be zero to ensure correct
+       *   alignment"
+       *
+       * We have no guarantee that tile_x and tile_y are correctly aligned,
+       * since they are determined by the mipmap layout, which is only aligned
+       * to multiples of 4.
+       *
+       * So, to avoid hanging the GPU, just smash the low order 3 bits of
+       * tile_x and tile_y to 0.  This is a temporary workaround until we come
+       * up with a better solution.
+       */
+      tile_x &= ~7;
+      tile_y &= ~7;
+
+      offset = intel_region_get_aligned_offset(region,
+                                               draw_x & ~tile_mask_x,
+                                               draw_y & ~tile_mask_y);
  
        assert(region->tiling == I915_TILING_Y);
  
@@ -122,13 +211,17 @@ static void emit_depthbuffer(struct brw_context *brw)
        OUT_BATCH(0);
        ADVANCE_BATCH();
     } else {
+      uint32_t hiz_offset =
+         intel_region_get_aligned_offset(hiz_mt->region,
+                                         draw_x & ~tile_mask_x,
+                                         (draw_y & ~tile_mask_y) / 2);
        BEGIN_BATCH(3);
        OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
        OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1);
        OUT_RELOC(hiz_mt->region->bo,
                  I915_GEM_DOMAIN_RENDER,
                  I915_GEM_DOMAIN_RENDER,
-                0);
+                hiz_offset);
        ADVANCE_BATCH();
     }
  
@@ -141,6 +234,14 @@ static void emit_depthbuffer(struct brw_context *brw)
     } else {
        const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0;
  
+      /* Note: We can't compute the stencil offset using
+       * intel_region_get_aligned_offset(), because the stencil region claims
+       * that the region is untiled; in fact it's W tiled.
+       */
+      uint32_t stencil_offset =
+         (draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
+         (draw_x & ~tile_mask_x) * 64;
+
        BEGIN_BATCH(3);
        OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
        /* The stencil buffer has quirky pitch requirements.  From the Graphics
@@ -161,7 +262,7 @@ static void emit_depthbuffer(struct brw_context *brw)
                 (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
        OUT_RELOC(stencil_mt->region->bo,
                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-               0);
+               stencil_offset);
        ADVANCE_BATCH();
     }
  
diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c

index 5c51abc5a0e980b5eafecc345564167c7049ee11..5c6fcedcedf2898f093cc5d7aa5c98d7936e62b7 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -26,6 +26,7 @@
  #include "brw_defines.h"
  #include "brw_util.h"
  #include "main/macros.h"
+#include "main/fbobject.h"
  #include "intel_batchbuffer.h"
  
  static void
@@ -44,7 +45,7 @@ upload_sbe_state(struct brw_context *brw)
     int urb_entry_read_offset = 1;
     uint16_t attr_overrides[FRAG_ATTRIB_MAX];
     /* _NEW_BUFFERS */
-   bool render_to_fbo = ctx->DrawBuffer->Name != 0;
+   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
     uint32_t point_sprite_origin;
  
     /* CACHE_NEW_VS_PROG */
@@ -159,7 +160,7 @@ upload_sf_state(struct brw_context *brw)
     uint32_t dw1, dw2, dw3;
     float point_size;
     /* _NEW_BUFFERS */
-   bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   bool render_to_fbo = _mesa_is_user_fbo(brw->intel.ctx.DrawBuffer);
  
     dw1 = GEN6_SF_STATISTICS_ENABLE |
           GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c

index d0b89d5fe57120ac0e0097b3175ab54364795b11..2bcf338b85d65da63567a2770bf72c674f3001bb 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c
@@ -25,6 +25,7 @@
  #include "brw_state.h"
  #include "brw_defines.h"
  #include "intel_batchbuffer.h"
+#include "main/fbobject.h"
  
  static void
  gen7_upload_sf_clip_viewport(struct brw_context *brw)
@@ -33,7 +34,7 @@ gen7_upload_sf_clip_viewport(struct brw_context *brw)
     struct gl_context *ctx = &intel->ctx;
     const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
     GLfloat y_scale, y_bias;
-   const bool render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
     const GLfloat *v = ctx->Viewport._WindowMap.m;
     struct gen7_sf_clip_viewport *vp;
  
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c

index cbccd2b852d6b52d6dd1e3dec0673f5c84a8f6a9..8f62c040b6a94fc0aa3f704b93a43f2da231c090 100644 (file)
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -289,8 +289,11 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
  
     switch (rb_format) {
     case MESA_FORMAT_SARGB8:
-      /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB
-        surfaces to the blend/update as sRGB */
+      /* _NEW_BUFFERS
+       *
+       * Without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB surfaces to the
+       * blend/update as sRGB.
+       */
        if (ctx->Color.sRGBEnabled)
          surf->ss0.surface_format = brw_format_for_mesa_format(rb_format);
        else
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h

index 751ec992c97aa4770a304971320e34a4c56b5184..d2744e44ac22a9d7f7fe42596134c3276de8a589 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -7,8 +7,14 @@
  #include "intel_bufmgr.h"
  #include "intel_reg.h"
  
+#ifdef __cplusplus
+extern "C" {
+#endif
+
  #define BATCH_RESERVED 16
  
+struct intel_batchbuffer;
+
  void intel_batchbuffer_init(struct intel_context *intel);
  void intel_batchbuffer_reset(struct intel_context *intel);
  void intel_batchbuffer_free(struct intel_context *intel);
@@ -152,4 +158,8 @@ void intel_batchbuffer_cached_advance(struct intel_context *intel);
  #define ADVANCE_BATCH() intel_batchbuffer_advance(intel);
  #define CACHED_BATCH() intel_batchbuffer_cached_advance(intel);
  
+#ifdef __cplusplus
+}
+#endif
+
  #endif
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c

index 9809f79bbeefa737172c6f9109cf106b360aa768..2b0276327d20342ca8ad1b668494063fe499eff0 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_buffers.c
+++ b/src/mesa/drivers/dri/intel/intel_buffers.c
@@ -30,6 +30,7 @@
  #include "intel_fbo.h"
  #include "intel_mipmap_tree.h"
  
+#include "main/fbobject.h"
  #include "main/framebuffer.h"
  #include "main/renderbuffer.h"
  
@@ -82,7 +83,7 @@ intel_check_front_buffer_rendering(struct intel_context *intel)
  static void
  intelDrawBuffer(struct gl_context * ctx, GLenum mode)
  {
-   if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) {
+   if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
        struct intel_context *const intel = intel_context(ctx);
        const bool was_front_buffer_rendering =
         intel->is_front_buffer_rendering;
@@ -105,7 +106,7 @@ intelDrawBuffer(struct gl_context * ctx, GLenum mode)
  static void
  intelReadBuffer(struct gl_context * ctx, GLenum mode)
  {
-   if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) {
+   if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
        struct intel_context *const intel = intel_context(ctx);
        const bool was_front_buffer_reading =
         intel->is_front_buffer_reading;
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c

index b8472b6fd3872270254571255ef704244891936f..f572f3828821c114214114ec4af40a9df9a024f6 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -215,7 +215,7 @@ intel_flush_front(struct gl_context *ctx)
      __DRIcontext *driContext = intel->driContext;
      __DRIscreen *const screen = intel->intelScreen->driScrnPriv;
  
-   if ((ctx->DrawBuffer->Name == 0) && intel->front_buffer_dirty) {
+    if (_mesa_is_winsys_fbo(ctx->DrawBuffer) && intel->front_buffer_dirty) {
        if (screen->dri2.loader &&
            (screen->dri2.loader->base.version >= 2)
           && (screen->dri2.loader->flushFrontBuffer != NULL) &&
@@ -454,7 +454,7 @@ intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
      if (intel->saved_viewport)
         intel->saved_viewport(ctx, x, y, w, h);
  
-    if (ctx->DrawBuffer->Name == 0) {
+    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
         dri2InvalidateDrawable(driContext->driDrawablePriv);
         dri2InvalidateDrawable(driContext->driReadablePriv);
      }
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h

index 8ba727030fceb21e5c7c10601aef7cef287444c0..065f1d6d01a01f3aba33ee2a544c678197084a92 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -49,7 +49,6 @@ extern "C" {
  
  #ifdef __cplusplus
         #undef virtual
-}
  #endif
  
  #include "tnl/t_vertex.h"
@@ -117,6 +116,32 @@ struct intel_sync_object {
  
  struct brw_context;
  
+struct intel_batchbuffer {
+   /** Current batchbuffer being queued up. */
+   drm_intel_bo *bo;
+   /** Last BO submitted to the hardware.  Used for glFinish(). */
+   drm_intel_bo *last_bo;
+   /** BO for post-sync nonzero writes for gen6 workaround. */
+   drm_intel_bo *workaround_bo;
+   bool need_workaround_flush;
+
+   struct cached_batch_item *cached_items;
+
+   uint16_t emit, total;
+   uint16_t used, reserved_space;
+   uint32_t map[8192];
+#define BATCH_SZ (8192*sizeof(uint32_t))
+
+   uint32_t state_batch_offset;
+   bool is_blit;
+   bool needs_sol_reset;
+
+   struct {
+      uint16_t used;
+      int reloc_count;
+   } saved;
+};
+
  /**
   * intel_context is derived from Mesa's context class: struct gl_context.
   */
@@ -219,31 +244,7 @@ struct intel_context
  
     int urb_size;
  
-   struct intel_batchbuffer {
-      /** Current batchbuffer being queued up. */
-      drm_intel_bo *bo;
-      /** Last BO submitted to the hardware.  Used for glFinish(). */
-      drm_intel_bo *last_bo;
-      /** BO for post-sync nonzero writes for gen6 workaround. */
-      drm_intel_bo *workaround_bo;
-      bool need_workaround_flush;
-
-      struct cached_batch_item *cached_items;
-
-      uint16_t emit, total;
-      uint16_t used, reserved_space;
-      uint32_t map[8192];
-#define BATCH_SZ (8192*sizeof(uint32_t))
-
-      uint32_t state_batch_offset;
-      bool is_blit;
-      bool needs_sol_reset;
-
-      struct {
-        uint16_t used;
-        int reloc_count;
-      } saved;
-   } batch;
+   struct intel_batchbuffer batch;
  
     drm_intel_bo *first_post_swapbuffers_batch;
     bool need_throttle;
@@ -607,4 +608,8 @@ is_power_of_two(uint32_t value)
     return (value & (value - 1)) == 0;
  }
  
+#ifdef __cplusplus
+}
+#endif
+
  #endif
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c

index a50ab777e9bb00e790d184f35e6a11f6ea390721..d4713c985898be95f612788edc8e5cbb5797ae02 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -100,6 +100,10 @@ intelInitExtensions(struct gl_context *ctx)
         (intel->gen == 7 && intel->intelScreen->kernel_has_gen7_sol_reset))
        ctx->Extensions.EXT_transform_feedback = true;
  
+   if (intel->gen >= 6) {
+      ctx->Extensions.ARB_draw_buffers_blend = true;
+   }
+
     if (intel->gen >= 5)
        ctx->Extensions.EXT_timer_query = true;
  
diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es.c b/src/mesa/drivers/dri/intel/intel_extensions_es.c

index 29eb8eab529983849db1dd3fb1766bf558172287..b42907c89bbc896d29ad2d9f5dd2c28de225f911 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_extensions_es.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions_es.c
@@ -66,7 +66,6 @@ static const char *es1_extensions[] = {
     "GL_EXT_blend_func_separate",
     "GL_EXT_blend_subtract",
     "GL_OES_draw_texture",
-   "GL_ARB_framebuffer_object",
     "GL_EXT_framebuffer_object",
     "GL_ARB_point_sprite",
     "GL_EXT_stencil_wrap",
@@ -92,7 +91,6 @@ static const char *es2_extensions[] = {
     "GL_NV_blend_square",
  
     /* Optional GLES2 */
-   "GL_ARB_framebuffer_object",
     "GL_ARB_depth_texture",
     "GL_EXT_framebuffer_object",
  
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c

index 2d1a79856024d75e2c1074b42c98c19b7340dcbf..2f95ad060252bafdffa7ec3e61848710f855de24 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -237,6 +237,9 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer
         _mesa_lookup_enum_by_nr(internalFormat),
         _mesa_get_format_name(rb->Format), width, height);
  
+   if (width == 0 || height == 0)
+      return true;
+
     irb->mt = intel_miptree_create_for_renderbuffer(intel, rb->Format,
                                                    width, height);
     if (!irb->mt)
@@ -532,25 +535,14 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb,
                                 uint32_t *tile_y)
  {
     struct intel_region *region = irb->mt->region;
-   int cpp = region->cpp;
-   uint32_t pitch = region->pitch * cpp;
-
-   if (region->tiling == I915_TILING_NONE) {
-      *tile_x = 0;
-      *tile_y = 0;
-      return irb->draw_x * cpp + irb->draw_y * pitch;
-   } else if (region->tiling == I915_TILING_X) {
-      *tile_x = irb->draw_x % (512 / cpp);
-      *tile_y = irb->draw_y % 8;
-      return ((irb->draw_y / 8) * (8 * pitch) +
-             (irb->draw_x - *tile_x) / (512 / cpp) * 4096);
-   } else {
-      assert(region->tiling == I915_TILING_Y);
-      *tile_x = irb->draw_x % (128 / cpp);
-      *tile_y = irb->draw_y % 32;
-      return ((irb->draw_y / 32) * (32 * pitch) +
-             (irb->draw_x - *tile_x) / (128 / cpp) * 4096);
-   }
+   uint32_t mask_x, mask_y;
+
+   intel_region_get_tile_masks(region, &mask_x, &mask_y);
+
+   *tile_x = irb->draw_x & mask_x;
+   *tile_y = irb->draw_y & mask_y;
+   return intel_region_get_aligned_offset(region, irb->draw_x & ~mask_x,
+                                          irb->draw_y & ~mask_y);
  }
  
  /**
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h

index 724f141535cf29e5c0318cae6afc33f876db6744..758b4a0ec323e4f9c03557cd28c10f1b0e5dfa4c 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -34,6 +34,10 @@
  #include "intel_context.h"
  #include "intel_screen.h"
  
+#ifdef __cplusplus
+extern "C" {
+#endif
+
  struct intel_context;
  struct intel_mipmap_tree;
  struct intel_texture_image;
@@ -153,6 +157,11 @@ intel_flip_renderbuffers(struct gl_framebuffer *fb);
  void
  intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb);
  
+void
+intel_renderbuffer_fine_offset_masks(struct intel_renderbuffer *irb,
+                                     uint32_t *fine_offset_mask_x,
+                                     uint32_t *fine_offset_mask_y);
+
  uint32_t
  intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb,
                                 uint32_t *tile_x,
@@ -192,4 +201,8 @@ bool
  intel_renderbuffer_resolve_depth(struct intel_context *intel,
                                  struct intel_renderbuffer *irb);
  
+#ifdef __cplusplus
+}
+#endif
+
  #endif /* INTEL_FBO_H */
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h

index 9082864201941416e55d4e99f67452b2a7380ba7..0886c95f234c9cbec4fccf54bb85cd48c62787a6 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -33,6 +33,10 @@
  #include "intel_regions.h"
  #include "intel_resolve_map.h"
  
+#ifdef __cplusplus
+extern "C" {
+#endif
+
  /* A layer on top of the intel_regions code which adds:
   *
   * - Code to size and layout a region to hold a set of mipmaps.
@@ -413,4 +417,8 @@ intel_miptree_unmap(struct intel_context *intel,
                     unsigned int level,
                     unsigned int slice);
  
+#ifdef __cplusplus
+}
+#endif
+
  #endif
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c

index bdfb9abd27d081d10ecf750ba0ee4f84aca2a25d..6821b69f0fc6b71a30e8cdef756c6b212210b931 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -337,9 +337,5 @@ intelBitmap(struct gl_context * ctx,
                            unpack, pixels))
        return;
  
-   /* FIXME */
-   if (intel->gen == 6)
-       return _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels);
-
     _mesa_meta_Bitmap(ctx, x, y, width, height, unpack, pixels);
  }
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c

index 34fed3d1f449f2fe276cfb2f6ed0df126a2a2f39..ab4e581c40001749b10074d8a1f08fc35f302f7f 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c
@@ -29,6 +29,7 @@
  #include "main/enums.h"
  #include "main/mtypes.h"
  #include "main/macros.h"
+#include "main/fbobject.h"
  #include "main/image.h"
  #include "main/bufferobj.h"
  #include "main/readpix.h"
@@ -116,7 +117,7 @@ do_blit_readpixels(struct gl_context * ctx,
        return false;
     }
     else {
-      if (ctx->ReadBuffer->Name == 0)
+      if (_mesa_is_winsys_fbo(ctx->ReadBuffer))
          rowLength = -rowLength;
     }
  
@@ -145,7 +146,7 @@ do_blit_readpixels(struct gl_context * ctx,
                                        all ? INTEL_WRITE_FULL :
                                        INTEL_WRITE_PART);
  
-   if (ctx->ReadBuffer->Name == 0)
+   if (_mesa_is_winsys_fbo(ctx->ReadBuffer))
        y = ctx->ReadBuffer->Height - (y + height);
  
     if (!intelEmitCopyBlit(intel,
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c

index abea2bd0f710fdfd12840f238a0a2c59f00e4e1b..1ef1ac663c5bb5477b27ebb96e067dbbcc3c489d 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -390,3 +390,59 @@ intel_region_copy(struct intel_context *intel,
                             srcx, srcy, dstx, dsty, width, height,
                             logicop);
  }
+
+/**
+ * This function computes masks that may be used to select the bits of the X
+ * and Y coordinates that indicate the offset within a tile.  If the region is
+ * untiled, the masks are set to 0.
+ */
+void
+intel_region_get_tile_masks(struct intel_region *region,
+                            uint32_t *mask_x, uint32_t *mask_y)
+{
+   int cpp = region->cpp;
+
+   switch (region->tiling) {
+   default:
+      assert(false);
+   case I915_TILING_NONE:
+      *mask_x = *mask_y = 0;
+      break;
+   case I915_TILING_X:
+      *mask_x = 512 / cpp - 1;
+      *mask_y = 7;
+      break;
+   case I915_TILING_Y:
+      *mask_x = 128 / cpp - 1;
+      *mask_y = 31;
+      break;
+   }
+}
+
+/**
+ * Compute the offset (in bytes) from the start of the region to the given x
+ * and y coordinate.  For tiled regions, caller must ensure that x and y are
+ * multiples of the tile size.
+ */
+uint32_t
+intel_region_get_aligned_offset(struct intel_region *region, uint32_t x,
+                                uint32_t y)
+{
+   int cpp = region->cpp;
+   uint32_t pitch = region->pitch * cpp;
+
+   switch (region->tiling) {
+   default:
+      assert(false);
+   case I915_TILING_NONE:
+      return y * pitch + x * cpp;
+   case I915_TILING_X:
+      assert((x % (512 / cpp)) == 0);
+      assert((y % 8) == 0);
+      return y * pitch + x / (512 / cpp) * 4096;
+   case I915_TILING_Y:
+      assert((x % (128 / cpp)) == 0);
+      assert((y % 32) == 0);
+      return y * pitch + x / (128 / cpp) * 4096;
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h

index 4ea970ad6d139201347e5f86093219e25f953b36..2fb30eb48f1b421e32d4587edef61ef52a6fc810 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_regions.h
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@@ -41,6 +41,10 @@
  #include "main/mtypes.h"
  #include "intel_bufmgr.h"
  
+#ifdef __cplusplus
+extern "C" {
+#endif
+
  struct intel_context;
  struct intel_buffer_object;
  
@@ -129,13 +133,26 @@ void _mesa_copy_rect(GLubyte * dst,
                  const GLubyte * src,
                  GLuint src_pitch, GLuint src_x, GLuint src_y);
  
+void
+intel_region_get_tile_masks(struct intel_region *region,
+                            uint32_t *mask_x, uint32_t *mask_y);
+
+uint32_t
+intel_region_get_aligned_offset(struct intel_region *region, uint32_t x,
+                                uint32_t y);
+
  struct __DRIimageRec {
     struct intel_region *region;
     GLenum internal_format;
+   uint32_t usage;
     uint32_t dri_format;
     GLuint format;
     GLenum data_type;
     void *data;
  };
  
+#ifdef __cplusplus
+}
+#endif
+
  #endif
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c

index e823792ff93252ede67ab2071c3781fdfc5a2b57..458178fe927a0066db4b69da5060a7859df58f48 100644 (file)
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -188,6 +188,8 @@ intel_create_image_from_name(__DRIscreen *screen,
      if (image == NULL)
         return NULL;
  
+    image->dri_format = format;
+
      switch (format) {
      case __DRI_IMAGE_FORMAT_RGB565:
         image->format = MESA_FORMAT_RGB565;
@@ -209,6 +211,11 @@ intel_create_image_from_name(__DRIscreen *screen,
         image->internal_format = GL_RGBA;
         image->data_type = GL_UNSIGNED_BYTE;
         break;
+    case __DRI_IMAGE_FORMAT_XBGR8888:
+       image->format = MESA_FORMAT_RGBX8888_REV;
+       image->internal_format = GL_RGB;
+       image->data_type = GL_UNSIGNED_BYTE;
+       break;
      default:
         free(image);
         return NULL;
@@ -255,6 +262,21 @@ intel_create_image_from_renderbuffer(__DRIcontext *context,
     image->data = loaderPrivate;
     intel_region_reference(&image->region, irb->mt->region);
  
+   switch (image->format) {
+   case MESA_FORMAT_RGB565:
+      image->dri_format = __DRI_IMAGE_FORMAT_RGB565;
+      break;
+   case MESA_FORMAT_XRGB8888:
+      image->dri_format = __DRI_IMAGE_FORMAT_XRGB8888;
+      break;
+   case MESA_FORMAT_ARGB8888:
+      image->dri_format = __DRI_IMAGE_FORMAT_ARGB8888;
+      break;
+   case MESA_FORMAT_RGBA8888_REV:
+      image->dri_format = __DRI_IMAGE_FORMAT_ABGR8888;
+      break;
+   }
+
     return image;
  }
  
@@ -283,10 +305,16 @@ intel_create_image(__DRIscreen *screen,
        tiling = I915_TILING_NONE;
     }
  
+   /* We only support write for cursor drm images */
+   if ((use & __DRI_IMAGE_USE_WRITE) &&
+       use != (__DRI_IMAGE_USE_WRITE | __DRI_IMAGE_USE_CURSOR))
+      return NULL;
+
     image = CALLOC(sizeof *image);
     if (image == NULL)
        return NULL;
  
+   image->usage = use;
     image->dri_format = format;
  
     switch (format) {
@@ -310,6 +338,11 @@ intel_create_image(__DRIscreen *screen,
         image->internal_format = GL_RGBA;
         image->data_type = GL_UNSIGNED_BYTE;
         break;
+    case __DRI_IMAGE_FORMAT_XBGR8888:
+       image->format = MESA_FORMAT_RGBX8888_REV;
+       image->internal_format = GL_RGB;
+       image->data_type = GL_UNSIGNED_BYTE;
+       break;
     default:
        free(image);
        return NULL;
@@ -342,7 +375,8 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
     case __DRI_IMAGE_ATTRIB_NAME:
        return intel_region_flink(image->region, (uint32_t *) value);
     case __DRI_IMAGE_ATTRIB_FORMAT:
-      return image->dri_format;
+      *value = image->dri_format;
+      return true;
     default:
        return false;
     }
@@ -364,6 +398,8 @@ intel_dup_image(__DRIimage *orig_image, void *loaderPrivate)
     }
  
     image->internal_format = orig_image->internal_format;
+   image->usage           = orig_image->usage;
+   image->dri_format      = orig_image->dri_format;
     image->format          = orig_image->format;
     image->data_type       = orig_image->data_type;
     image->data            = loaderPrivate;
@@ -379,18 +415,39 @@ intel_validate_usage(__DRIimage *image, unsigned int use)
          return GL_FALSE;
     }
  
+   /* We only support write for cursor drm images */
+   if ((use & __DRI_IMAGE_USE_WRITE) &&
+       use != (__DRI_IMAGE_USE_WRITE | __DRI_IMAGE_USE_CURSOR))
+      return GL_FALSE;
+
     return GL_TRUE;
  }
  
+static int
+intel_image_write(__DRIimage *image, const void *buf, size_t count)
+{
+   if (image->region->map_refcount)
+      return -1;
+   if (!(image->usage & __DRI_IMAGE_USE_WRITE))
+      return -1;
+
+   drm_intel_bo_map(image->region->bo, true);
+   memcpy(image->region->bo->virtual, buf, count);
+   drm_intel_bo_unmap(image->region->bo);
+
+   return 0;
+}
+
  static struct __DRIimageExtensionRec intelImageExtension = {
-    { __DRI_IMAGE, 3 },
+    { __DRI_IMAGE, 4 },
      intel_create_image_from_name,
      intel_create_image_from_renderbuffer,
      intel_destroy_image,
      intel_create_image,
      intel_query_image,
      intel_dup_image,
-    intel_validate_usage
+    intel_validate_usage,
+    intel_image_write
  };
  
  static const __DRIextension *intelScreenExtensions[] = {
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c

index 48457675fc83c6648c7ed814397daa36dc7541ad..2625b76d14fb6e596c659b95daa80f4a143321fe 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nouveau_context.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c
@@ -33,6 +33,7 @@
  
  #include "main/dd.h"
  #include "main/framebuffer.h"
+#include "main/fbobject.h"
  #include "main/light.h"
  #include "main/state.h"
  #include "main/version.h"
@@ -396,11 +397,11 @@ nouveau_validate_framebuffer(struct gl_context *ctx)
         __DRIdrawable *dri_draw = dri_ctx->driDrawablePriv;
         __DRIdrawable *dri_read = dri_ctx->driReadablePriv;
  
-       if (ctx->DrawBuffer->Name == 0)
+       if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
                 validate_framebuffer(dri_ctx, dri_draw,
                                      &dri_ctx->dri2.draw_stamp);
  
-       if (ctx->ReadBuffer->Name == 0)
+       if (_mesa_is_winsys_fbo(ctx->ReadBuffer))
                 validate_framebuffer(dri_ctx, dri_read,
                                      &dri_ctx->dri2.read_stamp);
  
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c

index 7222f68b439f8b3b1d78036d04df9a98a596912b..69e5cac426afe922a05b6f12c2d052aa2e708351 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
@@ -25,6 +25,8 @@
   */
  
  #include "main/mfeatures.h"
+#include "main/mtypes.h"
+#include "main/fbobject.h"
  
  #include "nouveau_driver.h"
  #include "nouveau_context.h"
@@ -61,7 +63,7 @@ nouveau_flush(struct gl_context *ctx)
  
         PUSH_KICK(push);
  
-       if (ctx->DrawBuffer->Name == 0 &&
+       if (_mesa_is_winsys_fbo(ctx->DrawBuffer) &&
             ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
                 __DRIscreen *screen = nctx->screen->dri_screen;
                 __DRIdri2LoaderExtension *dri2 = screen->dri2.loader;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_screen.c b/src/mesa/drivers/dri/nouveau/nouveau_screen.c

index 7e51b94a2ff5f0e9315b866ada782e03230346f4..2a15c08c6793e048058a493fdd55b5c5b6b2a3b0 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nouveau_screen.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_screen.c
@@ -33,6 +33,7 @@
  #include "nv20_driver.h"
  
  #include "main/framebuffer.h"
+#include "main/fbobject.h"
  #include "main/renderbuffer.h"
  #include "swrast/s_renderbuffer.h"
  
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_surface.c b/src/mesa/drivers/dri/nouveau/nouveau_surface.c

index f25211490889e83edf0f3f5b1080fd6540a596d5..ffac309b9ef93a67e9ea934682879b4408c16de8 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nouveau_surface.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_surface.c
@@ -28,6 +28,8 @@
  #include "nouveau_context.h"
  #include "nouveau_util.h"
  
+#include "main/formats.h"
+
  void
  nouveau_surface_alloc(struct gl_context *ctx, struct nouveau_surface *s,
                       enum nouveau_surface_layout layout,
@@ -45,7 +47,7 @@ nouveau_surface_alloc(struct gl_context *ctx, struct nouveau_surface *s,
                 .width = width,
                 .height = height,
                 .cpp = cpp,
-               .pitch = width * cpp,
+               .pitch = _mesa_format_row_stride(format, width),
         };
  
         if (layout == TILED) {
@@ -64,7 +66,8 @@ nouveau_surface_alloc(struct gl_context *ctx, struct nouveau_surface *s,
                 s->pitch = align(s->pitch, 64);
         }
  
-       ret = nouveau_bo_new(context_dev(ctx), flags, 0, s->pitch * height,
+       ret = nouveau_bo_new(context_dev(ctx), flags, 0,
+                            get_format_blocksy(format, height) * s->pitch,
                              &config, &s->bo);
         assert(!ret);
  }
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c

index eadbeb45d7a1d4931d2707d55e67ab20ae139395..a2e96aa168449be3aeb723df5197811dfebfd4fd 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
@@ -91,6 +91,7 @@ nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti,
         if (s->bo) {
                 if (!(access & GL_MAP_READ_BIT) &&
                     nouveau_pushbuf_refd(context_push(ctx), s->bo)) {
+                       unsigned size;
                         /*
                          * Heuristic: use a bounce buffer to pipeline
                          * teximage transfers.
@@ -104,7 +105,8 @@ nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti,
                         nti->transfer.x = x;
                         nti->transfer.y = y;
  
-                       nti->base.Map = nouveau_get_scratch(ctx, st->pitch * h,
+                       size = get_format_blocksy(st->format, h) * st->pitch;
+                       nti->base.Map = nouveau_get_scratch(ctx, size,
                                                        &st->bo, &st->offset);
  
                 } else {
@@ -120,7 +122,10 @@ nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti,
                                 assert(!ret);
                         }
  
-                       nti->base.Map = s->bo->map + y * s->pitch + x * s->cpp;
+                       nti->base.Map = s->bo->map +
+                               get_format_blocksy(s->format, y) * s->pitch +
+                               get_format_blocksx(s->format, x) * s->cpp;
+
                 }
         }
  }
@@ -163,6 +168,7 @@ nouveau_map_texture_image(struct gl_context *ctx,
         if (s->bo) {
                 if (!(mode & GL_MAP_READ_BIT) &&
                     nouveau_pushbuf_refd(context_push(ctx), s->bo)) {
+                       unsigned size;
                         /*
                          * Heuristic: use a bounce buffer to pipeline
                          * teximage transfers.
@@ -176,8 +182,9 @@ nouveau_map_texture_image(struct gl_context *ctx,
                         nti->transfer.x = x;
                         nti->transfer.y = y;
  
-                       *map = nouveau_get_scratch(ctx, st->pitch * h,
-                                                  &st->bo, &st->offset);
+                       size = get_format_blocksy(st->format, h) * st->pitch;
+                       *map = nouveau_get_scratch(ctx, size,
+                                         &st->bo, &st->offset);
                         *stride = st->pitch;
                 } else {
                         int ret, flags = 0;
@@ -192,11 +199,15 @@ nouveau_map_texture_image(struct gl_context *ctx,
                                 assert(!ret);
                         }
  
-                       *map = s->bo->map + y * s->pitch + x * s->cpp;
+                       *map = s->bo->map +
+                               get_format_blocksy(s->format, y) * s->pitch +
+                               get_format_blocksx(s->format, x) * s->cpp;
                         *stride = s->pitch;
                 }
         } else {
-               *map = nti->base.Map + y * s->pitch + x * s->cpp;
+               *map = nti->base.Map +
+                       get_format_blocksy(s->format, y) * s->pitch +
+                       get_format_blocksx(s->format, x) * s->cpp;
                 *stride = s->pitch;
         }
  }
@@ -286,6 +297,22 @@ nouveau_choose_tex_format(struct gl_context *ctx, GLint internalFormat,
         case GL_INTENSITY8:
                 return MESA_FORMAT_I8;
  
+       case GL_RGB_S3TC:
+       case GL_RGB4_S3TC:
+       case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+               return MESA_FORMAT_RGB_DXT1;
+
+       case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+               return MESA_FORMAT_RGBA_DXT1;
+
+       case GL_RGBA_S3TC:
+       case GL_RGBA4_S3TC:
+       case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+               return MESA_FORMAT_RGBA_DXT3;
+
+       case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+               return MESA_FORMAT_RGBA_DXT5;
+
         default:
                 assert(0);
         }
@@ -353,7 +380,9 @@ relayout_texture(struct gl_context *ctx, struct gl_texture_object *t)
                 struct nouveau_surface *ss = to_nouveau_texture(t)->surfaces;
                 struct nouveau_surface *s = &to_nouveau_teximage(base)->surface;
                 int i, ret, last = get_last_level(t);
-               unsigned size, offset = 0,
+               enum nouveau_surface_layout layout =
+                       (_mesa_is_format_compressed(s->format) ? LINEAR : SWIZZLED);
+               unsigned size, pitch, offset = 0,
                         width = s->width,
                         height = s->height;
  
@@ -363,7 +392,8 @@ relayout_texture(struct gl_context *ctx, struct gl_texture_object *t)
  
                 /* Relayout the mipmap tree. */
                 for (i = t->BaseLevel; i <= last; i++) {
-                       size = width * height * s->cpp;
+                       pitch = _mesa_format_row_stride(s->format, width);
+                       size = get_format_blocksy(s->format, height) * pitch;
  
                         /* Images larger than 16B have to be aligned. */
                         if (size > 16)
@@ -371,12 +401,12 @@ relayout_texture(struct gl_context *ctx, struct gl_texture_object *t)
  
                         ss[i] = (struct nouveau_surface) {
                                 .offset = offset,
-                               .layout = SWIZZLED,
+                               .layout = layout,
                                 .format = s->format,
                                 .width = width,
                                 .height = height,
                                 .cpp = s->cpp,
-                               .pitch = width * s->cpp,
+                               .pitch = pitch,
                         };
  
                         offset += size;
@@ -453,8 +483,10 @@ nouveau_teximage(struct gl_context *ctx, GLint dims,
                  struct gl_texture_image *ti,
                  GLint internalFormat,
                  GLint width, GLint height, GLint depth, GLint border,
+                GLsizei imageSize,
                  GLenum format, GLenum type, const GLvoid *pixels,
-                const struct gl_pixelstore_attrib *packing)
+                const struct gl_pixelstore_attrib *packing,
+                GLboolean compressed)
  {
         struct gl_texture_object *t = ti->TexObject;
         const GLuint level = ti->Level;
@@ -467,9 +499,15 @@ nouveau_teximage(struct gl_context *ctx, GLint dims,
                               ti->TexFormat, width, height);
         nti->base.RowStride = s->pitch / s->cpp;
  
-       pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth,
-                                            format, type, pixels, packing,
-                                            "glTexImage");
+       if (compressed)
+               pixels = _mesa_validate_pbo_compressed_teximage(ctx,
+                       imageSize,
+                       pixels, packing, "glCompressedTexImage");
+       else
+               pixels = _mesa_validate_pbo_teximage(ctx,
+                       dims, width, height, depth, format, type,
+                       pixels, packing, "glTexImage");
+
         if (pixels) {
                 /* Store the pixel data. */
                 nouveau_teximage_map(ctx, ti, GL_MAP_WRITE_BIT,
@@ -511,8 +549,8 @@ nouveau_teximage_1d(struct gl_context *ctx,
                     const struct gl_pixelstore_attrib *packing)
  {
         nouveau_teximage(ctx, 1, ti, internalFormat,
-                        width, 1, 1, border, format, type, pixels,
-                        packing);
+                        width, 1, 1, border, 0, format, type, pixels,
+                        packing, GL_FALSE);
  }
  
  static void
@@ -524,8 +562,8 @@ nouveau_teximage_2d(struct gl_context *ctx,
                     const struct gl_pixelstore_attrib *packing)
  {
         nouveau_teximage(ctx, 2, ti, internalFormat,
-                        width, height, 1, border, format, type, pixels,
-                        packing);
+                        width, height, 1, border, 0, format, type, pixels,
+                        packing, GL_FALSE);
  }
  
  static void
@@ -537,8 +575,20 @@ nouveau_teximage_3d(struct gl_context *ctx,
                     const struct gl_pixelstore_attrib *packing)
  {
         nouveau_teximage(ctx, 3, ti, internalFormat,
-                        width, height, depth, border, format, type, pixels,
-                        packing);
+                        width, height, depth, border, 0, format, type, pixels,
+                        packing, GL_FALSE);
+}
+
+static void
+nouveau_compressed_teximage_2d(struct gl_context *ctx,
+                   struct gl_texture_image *ti,
+                   GLint internalFormat,
+                   GLint width, GLint height, GLint border,
+                   GLsizei imageSize, const GLvoid *data)
+{
+       nouveau_teximage(ctx, 2, ti, internalFormat,
+                        width, height, 1, border, imageSize, 0, 0, data,
+                        &ctx->Unpack, GL_TRUE);
  }
  
  static void
@@ -546,21 +596,29 @@ nouveau_texsubimage(struct gl_context *ctx, GLint dims,
                     struct gl_texture_image *ti,
                     GLint xoffset, GLint yoffset, GLint zoffset,
                     GLint width, GLint height, GLint depth,
+                   GLsizei imageSize,
                     GLenum format, GLenum type, const void *pixels,
-                   const struct gl_pixelstore_attrib *packing)
+                   const struct gl_pixelstore_attrib *packing,
+                   GLboolean compressed)
  {
         struct nouveau_surface *s = &to_nouveau_teximage(ti)->surface;
         struct nouveau_teximage *nti = to_nouveau_teximage(ti);
         int ret;
  
-       pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth,
-                                            format, type, pixels, packing,
-                                            "glTexSubImage");
+       if (compressed)
+               pixels = _mesa_validate_pbo_compressed_teximage(ctx,
+                               imageSize,
+                               pixels, packing, "glCompressedTexSubImage");
+       else
+               pixels = _mesa_validate_pbo_teximage(ctx,
+                               dims, width, height, depth, format, type,
+                               pixels, packing, "glTexSubImage");
+
         if (pixels) {
                 nouveau_teximage_map(ctx, ti, GL_MAP_WRITE_BIT,
                                      xoffset, yoffset, width, height);
  
-               ret = _mesa_texstore(ctx, 3, ti->_BaseFormat, ti->TexFormat,
+               ret = _mesa_texstore(ctx, dims, ti->_BaseFormat, ti->TexFormat,
                                       s->pitch,
                                      &nti->base.Map,
                                       width, height, depth,
@@ -586,8 +644,8 @@ nouveau_texsubimage_3d(struct gl_context *ctx,
                        const struct gl_pixelstore_attrib *packing)
  {
         nouveau_texsubimage(ctx, 3, ti, xoffset, yoffset, zoffset,
-                           width, height, depth, format, type, pixels,
-                           packing);
+                           width, height, depth, 0, format, type, pixels,
+                           packing, GL_FALSE);
  }
  
  static void
@@ -599,8 +657,8 @@ nouveau_texsubimage_2d(struct gl_context *ctx,
                        const struct gl_pixelstore_attrib *packing)
  {
         nouveau_texsubimage(ctx, 2, ti, xoffset, yoffset, 0,
-                           width, height, 1, format, type, pixels,
-                           packing);
+                           width, height, 1, 0, format, type, pixels,
+                           packing, GL_FALSE);
  }
  
  static void
@@ -611,8 +669,21 @@ nouveau_texsubimage_1d(struct gl_context *ctx,
                        const struct gl_pixelstore_attrib *packing)
  {
         nouveau_texsubimage(ctx, 1, ti, xoffset, 0, 0,
-                           width, 1, 1, format, type, pixels,
-                           packing);
+                           width, 1, 1, 0, format, type, pixels,
+                           packing, GL_FALSE);
+}
+
+static void
+nouveau_compressed_texsubimage_2d(struct gl_context *ctx,
+                      struct gl_texture_image *ti,
+                      GLint xoffset, GLint yoffset,
+                      GLsizei width, GLint height,
+                      GLenum format,
+                      GLint imageSize, const void *data)
+{
+       nouveau_texsubimage(ctx, 2, ti, xoffset, yoffset, 0,
+                         width, height, 1, imageSize, format, 0, data,
+                         &ctx->Unpack, GL_TRUE);
  }
  
  static void
@@ -691,6 +762,8 @@ nouveau_texture_functions_init(struct dd_function_table *functions)
         functions->TexSubImage1D = nouveau_texsubimage_1d;
         functions->TexSubImage2D = nouveau_texsubimage_2d;
         functions->TexSubImage3D = nouveau_texsubimage_3d;
+       functions->CompressedTexImage2D = nouveau_compressed_teximage_2d;
+       functions->CompressedTexSubImage2D = nouveau_compressed_texsubimage_2d;
         functions->BindTexture = nouveau_bind_texture;
         functions->MapTextureImage = nouveau_map_texture_image;
         functions->UnmapTextureImage = nouveau_unmap_texture_image;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_util.h b/src/mesa/drivers/dri/nouveau/nouveau_util.h

index d4cc5c4fb9c53c13fb94ebd18ac7430dea574fe6..17d6965ee2c2753c0535cb6362b8ee4743096a1d 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nouveau_util.h
+++ b/src/mesa/drivers/dri/nouveau/nouveau_util.h
@@ -207,4 +207,24 @@ get_texgen_coeff(struct gl_texgen *c)
                 return NULL;
  }
  
+static inline unsigned
+get_format_blocksx(gl_format format,
+                      unsigned x)
+{
+       GLuint blockwidth;
+       GLuint blockheight;
+       _mesa_get_format_block_size(format, &blockwidth, &blockheight);
+       return (x + blockwidth - 1) / blockwidth;
+}
+
+static inline unsigned
+get_format_blocksy(gl_format format,
+                      unsigned y)
+{
+       GLuint blockwidth;
+       GLuint blockheight;
+       _mesa_get_format_block_size(format, &blockwidth, &blockheight);
+       return (y + blockheight - 1) / blockheight;
+}
+
  #endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c

index 62fee2e1e27d337b4219c0d6f39b8e56b74f9ba1..a9a56e78f2d7022e8a37d636fcd60cc279d2cff3 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
@@ -216,7 +216,7 @@ get_max_client_stride(struct gl_context *ctx, const struct gl_client_array **arr
  }
  
  static void
-TAG(vbo_render_prims)(struct gl_context *ctx, const struct gl_client_array **arrays,
+TAG(vbo_render_prims)(struct gl_context *ctx,
                       const struct _mesa_prim *prims, GLuint nr_prims,
                       const struct _mesa_index_buffer *ib,
                       GLboolean index_bounds_valid,
@@ -448,7 +448,6 @@ vbo_draw_imm(struct gl_context *ctx, const struct gl_client_array **arrays,
  
  static void
  TAG(vbo_render_prims)(struct gl_context *ctx,
-                     const struct gl_client_array **arrays,
                       const struct _mesa_prim *prims, GLuint nr_prims,
                       const struct _mesa_index_buffer *ib,
                       GLboolean index_bounds_valid,
@@ -456,6 +455,7 @@ TAG(vbo_render_prims)(struct gl_context *ctx,
                       struct gl_transform_feedback_object *tfb_vertcount)
  {
         struct nouveau_render_state *render = to_render_state(ctx);
+       const struct gl_client_array **arrays = ctx->Array._DrawArrays;
  
         if (!index_bounds_valid)
                 vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index,
@@ -484,7 +484,6 @@ TAG(vbo_render_prims)(struct gl_context *ctx,
  
  static void
  TAG(vbo_check_render_prims)(struct gl_context *ctx,
-                           const struct gl_client_array **arrays,
                             const struct _mesa_prim *prims, GLuint nr_prims,
                             const struct _mesa_index_buffer *ib,
                             GLboolean index_bounds_valid,
@@ -496,12 +495,12 @@ TAG(vbo_check_render_prims)(struct gl_context *ctx,
         nouveau_validate_framebuffer(ctx);
  
         if (nctx->fallback == HWTNL)
-               TAG(vbo_render_prims)(ctx, arrays, prims, nr_prims, ib,
+               TAG(vbo_render_prims)(ctx, prims, nr_prims, ib,
                                       index_bounds_valid, min_index, max_index,
                                       tfb_vertcount);
  
         if (nctx->fallback == SWTNL)
-               _tnl_vbo_draw_prims(ctx, arrays, prims, nr_prims, ib,
+               _tnl_vbo_draw_prims(ctx, prims, nr_prims, ib,
                                     index_bounds_valid, min_index, max_index,
                                     tfb_vertcount);
  }
diff --git a/src/mesa/drivers/dri/nouveau/nv04_surface.c b/src/mesa/drivers/dri/nouveau/nv04_surface.c

index 522c94819c076fafdb71fa7cd409aacae803e14c..103453f1b9ad5fe630a83935492e13478c5bf406 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nv04_surface.c
+++ b/src/mesa/drivers/dri/nouveau/nv04_surface.c
@@ -393,6 +393,15 @@ nv04_surface_copy(struct gl_context *ctx,
                   int dx, int dy, int sx, int sy,
                   int w, int h)
  {
+       if (_mesa_is_format_compressed(src->format)) {
+               sx = get_format_blocksx(src->format, sx);
+               sy = get_format_blocksy(src->format, sy);
+               dx = get_format_blocksx(dst->format, dx);
+               dy = get_format_blocksy(dst->format, dy);
+               w = get_format_blocksx(src->format, w);
+               h = get_format_blocksy(src->format, h);
+       }
+
         /* Linear texture copy. */
         if ((src->layout == LINEAR && dst->layout == LINEAR) ||
             dst->width <= 2 || dst->height <= 1) {
diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c

index 90c1545ce37240b81cd3e68d9a047554682dfc0a..a2448caaf81b0961c2d377237fcab12d6f65bc1d 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nv10_context.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_context.c
@@ -449,6 +449,10 @@ nv10_context_create(struct nouveau_screen *screen, const struct gl_config *visua
         ctx->Extensions.ARB_texture_env_dot3 = true;
         ctx->Extensions.NV_fog_distance = true;
         ctx->Extensions.NV_texture_rectangle = true;
+       if (ctx->Mesa_DXTn) {
+               ctx->Extensions.EXT_texture_compression_s3tc = true;
+               ctx->Extensions.S3_s3tc = true;
+       }
  
         /* GL constants. */
         ctx->Const.MaxTextureLevels = 12;
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_tex.c b/src/mesa/drivers/dri/nouveau/nv10_state_tex.c

index 0254341151b5fe5d213692d704fe419371f87f7b..b467bb33142037019997985b8d1824d0f9f90657 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nv10_state_tex.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_tex.c
@@ -111,6 +111,16 @@ get_tex_format_pot(struct gl_texture_image *ti)
         case MESA_FORMAT_L8:
                 return NV10_3D_TEX_FORMAT_FORMAT_L8;
  
+       case MESA_FORMAT_RGB_DXT1:
+       case MESA_FORMAT_RGBA_DXT1:
+               return NV10_3D_TEX_FORMAT_FORMAT_DXT1;
+
+       case MESA_FORMAT_RGBA_DXT3:
+               return NV10_3D_TEX_FORMAT_FORMAT_DXT3;
+
+       case MESA_FORMAT_RGBA_DXT5:
+               return NV10_3D_TEX_FORMAT_FORMAT_DXT5;
+
         default:
                 assert(0);
         }
diff --git a/src/mesa/drivers/dri/nouveau/nv20_context.c b/src/mesa/drivers/dri/nouveau/nv20_context.c

index c91171791ccc17aead6531c4e5ecbb20800ba635..5a36c8766d77b90637f5497c04792b79e649ad50 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nv20_context.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_context.c
@@ -460,6 +460,10 @@ nv20_context_create(struct nouveau_screen *screen, const struct gl_config *visua
         ctx->Extensions.ARB_texture_env_dot3 = true;
         ctx->Extensions.NV_fog_distance = true;
         ctx->Extensions.NV_texture_rectangle = true;
+       if (ctx->Mesa_DXTn) {
+               ctx->Extensions.EXT_texture_compression_s3tc = true;
+               ctx->Extensions.S3_s3tc = true;
+       }
  
         /* GL constants. */
         ctx->Const.MaxTextureCoordUnits = NV20_TEXTURE_UNITS;
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c

index 799510daa69f1e087f5d2f0219006e9a6d29feaf..d8bfdf2e58fd6b0332992ab8f75fbfe1562bcf97 100644 (file)
--- a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
@@ -108,6 +108,16 @@ get_tex_format_pot(struct gl_texture_image *ti)
         case MESA_FORMAT_L8:
                 return NV20_3D_TEX_FORMAT_FORMAT_L8;
  
+       case MESA_FORMAT_RGB_DXT1:
+       case MESA_FORMAT_RGBA_DXT1:
+               return NV20_3D_TEX_FORMAT_FORMAT_DXT1;
+
+       case MESA_FORMAT_RGBA_DXT3:
+               return NV20_3D_TEX_FORMAT_FORMAT_DXT3;
+
+       case MESA_FORMAT_RGBA_DXT5:
+               return NV20_3D_TEX_FORMAT_FORMAT_DXT5;
+
         default:
                 assert(0);
         }
diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c

index 3131007517b0cfba6667ec9a6e2e2cc437ac28b3..0f7b564024e882fcad452fe9b2637770d080f994 100644 (file)
--- a/src/mesa/drivers/dri/r200/r200_state.c
+++ b/src/mesa/drivers/dri/r200/r200_state.c
@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #include "main/colormac.h"
  #include "main/light.h"
  #include "main/framebuffer.h"
+#include "main/fbobject.h"
  
  #include "swrast/swrast.h"
  #include "vbo/vbo.h"
@@ -536,7 +537,7 @@ static void r200FrontFace( struct gl_context *ctx, GLenum mode )
     rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_CULL_FRONT_IS_CCW;
  
     /* Winding is inverted when rendering to FBO */
-   if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+   if (ctx->DrawBuffer && _mesa_is_user_fbo(ctx->DrawBuffer))
        mode = (mode == GL_CW) ? GL_CCW : GL_CW;
  
     switch ( mode ) {
@@ -1547,7 +1548,7 @@ void r200UpdateWindow( struct gl_context *ctx )
     GLfloat xoffset = 0;
     GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0;
     const GLfloat *v = ctx->Viewport._WindowMap.m;
-   const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0);
+   const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0);
     const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
     GLfloat y_scale, y_bias;
  
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c

index b64ff8160d0fe362f40fd51661042a6c154fa326..a8dfae0192356ab8c7749c53c621469d5711f2bf 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #include "main/imports.h"
  #include "main/context.h"
  #include "main/enums.h"
+#include "main/fbobject.h"
  #include "main/framebuffer.h"
  #include "main/renderbuffer.h"
  #include "drivers/common/meta.h"
@@ -168,7 +169,7 @@ void radeonUpdateScissor( struct gl_context *ctx )
         max_x = ctx->DrawBuffer->Width - 1;
         max_y = ctx->DrawBuffer->Height - 1;
  
-       if ( !ctx->DrawBuffer->Name ) {
+       if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
                 x1 = x;
                 y1 = ctx->DrawBuffer->Height - (y + h);
                 x2 = x + w - 1;
@@ -407,7 +408,7 @@ void radeonDrawBuffer( struct gl_context *ctx, GLenum mode )
                 fprintf(stderr, "%s %s\n", __FUNCTION__,
                         _mesa_lookup_enum_by_nr( mode ));
  
-       if (ctx->DrawBuffer->Name == 0) {
+       if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
                 radeonContextPtr radeon = RADEON_CONTEXT(ctx);
  
                 const GLboolean was_front_buffer_rendering =
@@ -430,7 +431,7 @@ void radeonDrawBuffer( struct gl_context *ctx, GLenum mode )
  
  void radeonReadBuffer( struct gl_context *ctx, GLenum mode )
  {
-       if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) {
+       if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
                 struct radeon_context *const rmesa = RADEON_CONTEXT(ctx);
                 const GLboolean was_front_buffer_reading = rmesa->is_front_buffer_reading;
                 rmesa->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
@@ -465,7 +466,7 @@ void radeon_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GL
         void (*old_viewport)(struct gl_context *ctx, GLint x, GLint y,
                              GLsizei w, GLsizei h);
  
-       if (ctx->DrawBuffer->Name == 0) {
+       if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
                 if (radeon->is_front_buffer_rendering) {
                         ctx->Driver.Flush(ctx);
                 }
@@ -656,7 +657,7 @@ void radeonFlush(struct gl_context *ctx)
                 rcommonFlushCmdBuf(radeon, __FUNCTION__);
  
  flush_front:
-       if ((ctx->DrawBuffer->Name == 0) && radeon->front_buffer_dirty) {
+       if (_mesa_is_winsys_fbo(ctx->DrawBuffer) && radeon->front_buffer_dirty) {
                 __DRIscreen *const screen = radeon->radeonScreen->driScreen;
  
                 if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2)
diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c

index 3a14cc69af4c6b21514fad72e8fe9b5b03cc45a1..db5e01da49b3b88c92fd97495a70997b11424159 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
+++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
@@ -28,6 +28,7 @@
  #include "stdint.h"
  #include "main/bufferobj.h"
  #include "main/enums.h"
+#include "main/fbobject.h"
  #include "main/image.h"
  #include "main/readpix.h"
  #include "main/state.h"
@@ -148,7 +149,7 @@ do_blit_readpixels(struct gl_context * ctx,
      }
  
      /* Disable source Y flipping for FBOs */
-    flip_y = (ctx->ReadBuffer->Name == 0);
+    flip_y = _mesa_is_winsys_fbo(ctx->ReadBuffer);
      if (pack->Invert) {
          y = rrb->base.Base.Height - height - y;
          flip_y = !flip_y;
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c

index a3b4e54edc46421302fb5f1f2272be5691df1abc..6e2bb5bce620c57b2fb404edbb25e4881549d683 100644 (file)
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -39,6 +39,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  #include "main/light.h"
  #include "main/context.h"
  #include "main/framebuffer.h"
+#include "main/fbobject.h"
  #include "main/simple_list.h"
  #include "main/state.h"
  
@@ -444,7 +445,7 @@ static void radeonFrontFace( struct gl_context *ctx, GLenum mode )
     rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_CULL_FRONT_IS_CCW;
  
     /* Winding is inverted when rendering to FBO */
-   if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+   if (ctx->DrawBuffer && _mesa_is_user_fbo(ctx->DrawBuffer))
        mode = (mode == GL_CW) ? GL_CCW : GL_CW;
  
     switch ( mode ) {
@@ -1354,7 +1355,7 @@ void radeonUpdateWindow( struct gl_context *ctx )
     GLfloat xoffset = 0.0;
     GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0;
     const GLfloat *v = ctx->Viewport._WindowMap.m;
-   const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0);
+   const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0);
     const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
     GLfloat y_scale, y_bias;
  
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c

index 7748298d49f40f6328b18b5725c8339b7c2d6499..12004ba71df5db9a029fe9ba61a5334682868d3d 100644 (file)
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -32,6 +32,7 @@
  #include "main/bufferobj.h"
  #include "main/context.h"
  #include "main/colormac.h"
+#include "main/fbobject.h"
  #include "main/macros.h"
  #include "main/image.h"
  #include "main/imports.h"
@@ -69,7 +70,7 @@ color_mask(struct gl_context *ctx,
     const int xclass = xmesa->xm_visual->visualType;
     (void) amask;
  
-   if (ctx->DrawBuffer->Name != 0)
+   if (_mesa_is_user_fbo(ctx->DrawBuffer))
        return;
  
     xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
@@ -240,7 +241,7 @@ clear_nbit_ximage(struct gl_context *ctx, struct xmesa_renderbuffer *xrb,
  static void
  clear_buffers(struct gl_context *ctx, GLbitfield buffers)
  {
-   if (ctx->DrawBuffer->Name == 0) {
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
        /* this is a window system framebuffer */
        const GLuint *colorMask = (GLuint *) &ctx->Color.ColorMask[0];
        const XMesaContext xmesa = XMESA_CONTEXT(ctx);
@@ -304,7 +305,7 @@ can_do_DrawPixels_8R8G8B(struct gl_context *ctx, GLenum format, GLenum type)
     if (format == GL_BGRA &&
         type == GL_UNSIGNED_BYTE &&
         ctx->DrawBuffer &&
-       ctx->DrawBuffer->Name == 0 &&
+       _mesa_is_winsys_fbo(ctx->DrawBuffer) &&
         ctx->Pixel.ZoomX == 1.0 &&        /* no zooming */
         ctx->Pixel.ZoomY == 1.0 &&
         ctx->_ImageTransferState == 0 /* no color tables, scale/bias, etc */) {
@@ -437,7 +438,7 @@ can_do_DrawPixels_5R6G5B(struct gl_context *ctx, GLenum format, GLenum type)
         type == GL_UNSIGNED_SHORT_5_6_5 &&
         !ctx->Color.DitherFlag &&  /* no dithering */
         ctx->DrawBuffer &&
-       ctx->DrawBuffer->Name == 0 &&
+       _mesa_is_winsys_fbo(ctx->DrawBuffer) &&
         ctx->Pixel.ZoomX == 1.0 &&        /* no zooming */
         ctx->Pixel.ZoomY == 1.0 &&
         ctx->_ImageTransferState == 0 /* no color tables, scale/bias, etc */) {
@@ -693,7 +694,7 @@ xmesa_update_state( struct gl_context *ctx, GLbitfield new_state )
     _vbo_InvalidateState( ctx, new_state );
     _swsetup_InvalidateState( ctx, new_state );
  
-   if (ctx->DrawBuffer->Name != 0)
+   if (_mesa_is_user_fbo(ctx->DrawBuffer))
        return;
  
     /*
diff --git a/src/mesa/main/api_arrayelt.c b/src/mesa/main/api_arrayelt.c

index 7bf55f3198b7f8d10f2b8bccaea6ec3992ada986..6de6de2b765df8f41cec9504a80d0774f1475025 100644 (file)
--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1643,12 +1643,20 @@ void GLAPIENTRY _ae_ArrayElement( GLint elt )
     const struct _glapi_table * const disp = GET_DISPATCH();
     GLboolean do_map;
  
+   /* If PrimitiveRestart is enabled and the index is the RestartIndex
+    * then we call PrimitiveRestartNV and return.
+    */
+   if (ctx->Array.PrimitiveRestart && (elt == ctx->Array.RestartIndex)) {
+      CALL_PrimitiveRestartNV((struct _glapi_table *)disp, ());
+      return;
+   }
+
     if (actx->NewState) {
        assert(!actx->mapped_vbos);
        _ae_update_state( ctx );
     }
  
-   /* Determine if w need to map/unmap VBOs */
+   /* Determine if we need to map/unmap VBOs */
     do_map = actx->nr_vbos && !actx->mapped_vbos;
  
     if (do_map)
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c

index d75351c8598429636fdb8b1a11ee43152c72c723..7e2ac98b9a0d5d872f2b02f31cd77f412fa05209 100644 (file)
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -792,6 +792,7 @@ init_attrib_groups(struct gl_context *ctx)
  
     /* Miscellaneous */
     ctx->NewState = _NEW_ALL;
+   ctx->NewDriverState = ~0;
     ctx->ErrorValue = (GLenum) GL_NO_ERROR;
     ctx->ResetStatus = (GLenum) GL_NO_ERROR;
     ctx->varying_vp_inputs = VERT_BIT_ALL;
@@ -1290,6 +1291,7 @@ _mesa_copy_context( const struct gl_context *src, struct gl_context *dst,
     /* XXX FIXME:  Call callbacks?
      */
     dst->NewState = _NEW_ALL;
+   dst->NewDriverState = ~0;
  }
  #endif
  
diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c

index 8cfb97c266e66225d3e3d4f6341c46e7e72866a6..c1c65ea25d2814a7262eec105634f2f9254f1715 100644 (file)
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -134,7 +134,7 @@ do {                                                \
  do {                                                                   \
     if (ctx->Driver.CurrentSavePrimitive <= GL_POLYGON ||               \
         ctx->Driver.CurrentSavePrimitive == PRIM_INSIDE_UNKNOWN_PRIM) { \
-      _mesa_compile_error( ctx, GL_INVALID_OPERATION, "begin/end" );   \
+      _mesa_compile_error( ctx, GL_INVALID_OPERATION, "glBegin/End" ); \
        return retval;                                                   \
     }                                                                   \
  } while (0)
@@ -149,7 +149,7 @@ do {                                                                        \
  do {                                                                   \
     if (ctx->Driver.CurrentSavePrimitive <= GL_POLYGON ||               \
         ctx->Driver.CurrentSavePrimitive == PRIM_INSIDE_UNKNOWN_PRIM) { \
-      _mesa_compile_error( ctx, GL_INVALID_OPERATION, "begin/end" );   \
+      _mesa_compile_error( ctx, GL_INVALID_OPERATION, "glBegin/End" ); \
        return;                                                          \
     }                                                                   \
  } while (0)
@@ -5673,17 +5673,25 @@ save_Indexfv(const GLfloat * v)
  static void GLAPIENTRY
  save_EdgeFlag(GLboolean x)
  {
-   save_Attr1fNV(VERT_ATTRIB_EDGEFLAG, x ? (GLfloat)1.0 : (GLfloat)0.0);
+   save_Attr1fNV(VERT_ATTRIB_EDGEFLAG, x ? 1.0f : 0.0f);
  }
  
-static inline GLboolean compare4fv( const GLfloat *a,
-                                    const GLfloat *b,
-                                    GLuint count )
+
+/**
+ * Compare 'count' elements of vectors 'a' and 'b'.
+ * \return GL_TRUE if equal, GL_FALSE if different.
+ */
+static inline GLboolean
+compare_vec(const GLfloat *a, const GLfloat *b, GLuint count)
  {
     return memcmp( a, b, count * sizeof(GLfloat) ) == 0;
  }
-                              
  
+
+/**
+ * This glMaterial function is used for glMaterial calls that are outside
+ * a glBegin/End pair.  For glMaterial inside glBegin/End, see the VBO code.
+ */
  static void GLAPIENTRY
  save_Materialfv(GLenum face, GLenum pname, const GLfloat * param)
  {
@@ -5698,7 +5706,7 @@ save_Materialfv(GLenum face, GLenum pname, const GLfloat * param)
     case GL_FRONT_AND_BACK:
        break;
     default:
-      _mesa_compile_error(ctx, GL_INVALID_ENUM, "material(face)");
+      _mesa_compile_error(ctx, GL_INVALID_ENUM, "glMaterial(face)");
        return;
     }
  
@@ -5717,7 +5725,7 @@ save_Materialfv(GLenum face, GLenum pname, const GLfloat * param)
        args = 3;
        break;
     default:
-      _mesa_compile_error(ctx, GL_INVALID_ENUM, "material(pname)");
+      _mesa_compile_error(ctx, GL_INVALID_ENUM, "glMaterial(pname)");
        return;
     }
     
@@ -5734,7 +5742,8 @@ save_Materialfv(GLenum face, GLenum pname, const GLfloat * param)
     for (i = 0; i < MAT_ATTRIB_MAX; i++) {
        if (bitmask & (1 << i)) {
           if (ctx->ListState.ActiveMaterialSize[i] == args &&
-             compare4fv(ctx->ListState.CurrentMaterial[i], param, args)) {
+             compare_vec(ctx->ListState.CurrentMaterial[i], param, args)) {
+            /* no change in material value */
              bitmask &= ~(1 << i);
           }
           else {
@@ -5744,8 +5753,7 @@ save_Materialfv(GLenum face, GLenum pname, const GLfloat * param)
        }
     }
  
-   /* If this call has effect, return early:
-    */
+   /* If this call has no effect, return early */
     if (bitmask == 0)
        return;
  
diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c

index fcf873f18c285cb89cabd9e2ce075e9735cbbd98..4a187b7b0f285f5085cc7e6749bbfba984c068dd 100644 (file)
--- a/src/mesa/main/errors.c
+++ b/src/mesa/main/errors.c
@@ -1062,4 +1062,47 @@ _mesa_debug( const struct gl_context *ctx, const char *fmtString, ... )
     (void) fmtString;
  }
  
+
+/**
+ * Report debug information from the shader compiler via GL_ARB_debug_output.
+ *
+ * \param ctx GL context.
+ * \param type The namespace to which this message belongs.
+ * \param id The message ID within the given namespace.
+ * \param msg The message to output. Need not be null-terminated.
+ * \param len The length of 'msg'. If negative, 'msg' must be null-terminated.
+ */
+void
+_mesa_shader_debug( struct gl_context *ctx, GLenum type, GLuint id,
+                    const char *msg, int len )
+{
+   GLenum source = GL_DEBUG_SOURCE_SHADER_COMPILER_ARB,
+          severity;
+
+   switch (type) {
+   case GL_DEBUG_TYPE_ERROR_ARB:
+      assert(id < SHADER_ERROR_COUNT);
+      severity = GL_DEBUG_SEVERITY_HIGH_ARB;
+      break;
+   case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB:
+   case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB:
+   case GL_DEBUG_TYPE_PORTABILITY_ARB:
+   case GL_DEBUG_TYPE_PERFORMANCE_ARB:
+   case GL_DEBUG_TYPE_OTHER_ARB:
+      assert(0 && "other categories not implemented yet");
+   default:
+      _mesa_problem(ctx, "bad enum in _mesa_shader_debug()");
+      return;
+   }
+
+   if (len < 0)
+      len = strlen(msg);
+
+   /* Truncate the message if necessary. */
+   if (len >= MAX_DEBUG_MESSAGE_LENGTH)
+      len = MAX_DEBUG_MESSAGE_LENGTH - 1;
+
+   _mesa_log_msg(ctx, source, type, id, severity, len, msg);
+}
+
  /*@}*/
diff --git a/src/mesa/main/errors.h b/src/mesa/main/errors.h

index ed1c6fc7fb55120a437a58c57564fde0a25c85d6..b4490fac9dd6a58cd8419bcf0710268db3051b2a 100644 (file)
--- a/src/mesa/main/errors.h
+++ b/src/mesa/main/errors.h
@@ -68,6 +68,9 @@ _mesa_error( struct gl_context *ctx, GLenum error, const char *fmtString, ... )
  extern void
  _mesa_debug( const struct gl_context *ctx, const char *fmtString, ... ) PRINTFLIKE(2, 3);
  
+extern void
+_mesa_shader_debug( struct gl_context *ctx, GLenum type, GLuint id, const char *msg, int len );
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c

index 5f2c74a758e7681624f2988ba2c94a8a35a1e9db..a843a40925c95fb584f50ab1b8e4144759eda616 100644 (file)
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -754,7 +754,7 @@ get_extension_override( struct gl_context *ctx )
  
     /* Remove trailing space. */
     len = strlen(extra_exts);
-   if (extra_exts[len - 1] == ' ')
+   if (len > 0 && extra_exts[len - 1] == ' ')
        extra_exts[len - 1] = '\0';
  
     return extra_exts;
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c

index 26ae1087c6e542a811b3b4a61d0bb7b661253cc5..f56369483973c2e70eca6fe557c3680f6dd94453 100644 (file)
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -2023,7 +2023,11 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target,
     _glthread_LOCK_MUTEX(fb->Mutex);
     if (texObj) {
        if (attachment == GL_DEPTH_ATTACHMENT &&
-          texObj == fb->Attachment[BUFFER_STENCIL].Texture) {
+          texObj == fb->Attachment[BUFFER_STENCIL].Texture &&
+          level == fb->Attachment[BUFFER_STENCIL].TextureLevel &&
+          _mesa_tex_target_to_face(textarget) ==
+          fb->Attachment[BUFFER_STENCIL].CubeMapFace &&
+          zoffset == fb->Attachment[BUFFER_STENCIL].Zoffset) {
          /* The texture object is already attached to the stencil attachment
           * point. Don't create a new renderbuffer; just reuse the stencil
           * attachment's. This is required to prevent a GL error in
@@ -2032,8 +2036,12 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target,
          reuse_framebuffer_texture_attachment(fb, BUFFER_DEPTH,
                                               BUFFER_STENCIL);
        } else if (attachment == GL_STENCIL_ATTACHMENT &&
-                texObj == fb->Attachment[BUFFER_DEPTH].Texture) {
-        /* As above, but with depth and stencil juxtaposed. */
+                texObj == fb->Attachment[BUFFER_DEPTH].Texture &&
+                 level == fb->Attachment[BUFFER_DEPTH].TextureLevel &&
+                 _mesa_tex_target_to_face(textarget) ==
+                 fb->Attachment[BUFFER_DEPTH].CubeMapFace &&
+                 zoffset == fb->Attachment[BUFFER_DEPTH].Zoffset) {
+        /* As above, but with depth and stencil transposed. */
          reuse_framebuffer_texture_attachment(fb, BUFFER_STENCIL,
                                               BUFFER_DEPTH);
        } else {
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h

index 2b709ded683f350d6fefb57511abbd6d5f53961b..06ca0d5df169bf73da37c762df9872e85f53e97e 100644 (file)
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1636,6 +1636,11 @@ struct gl_array_attrib
  
     /* GL_ARB_vertex_buffer_object */
     struct gl_buffer_object *ArrayBufferObj;
+
+   /**
+    * Vertex arrays as consumed by a driver.
+    * The array pointer is set up only by the VBO module. */
+   const struct gl_client_array **_DrawArrays; /**< 0..VERT_ATTRIB_MAX-1 */
  };
  
  
@@ -3252,6 +3257,17 @@ typedef enum
     API_OPENGLES2
  } gl_api;
  
+/**
+ * Driver-specific state flags.
+ *
+ * These are or'd with gl_context::NewDriverState to notify a driver about
+ * a state change. The driver sets the flags at context creation and
+ * the meaning of the bits set is opaque to core Mesa.
+ */
+struct gl_driver_flags
+{
+   GLbitfield NewArray;             /**< Vertex array state */
+};
  
  /**
   * Mesa rendering context.
@@ -3411,6 +3427,9 @@ struct gl_context
  
     GLenum RenderMode;        /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */
     GLbitfield NewState;      /**< bitwise-or of _NEW_* flags */
+   GLbitfield NewDriverState;/**< bitwise-or of flags from DriverFlags */
+
+   struct gl_driver_flags DriverFlags;
  
     GLboolean ViewportInitialized;  /**< has viewport size been initialized? */
  
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c

index 23486467dbc089346774aa08dd0dd4f03a24202c..694f6fa00105864173a4c064637fae78b5bb007c 100644 (file)
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -530,6 +530,32 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
  }
  
  
+/**
+ * Is the given texture format a generic compressed format?
+ */
+static GLboolean
+is_generic_compressed_format(GLenum format)
+{
+   switch (format) {
+   case GL_COMPRESSED_RED:
+   case GL_COMPRESSED_RG:
+   case GL_COMPRESSED_RGB:
+   case GL_COMPRESSED_RGBA:
+   case GL_COMPRESSED_ALPHA:
+   case GL_COMPRESSED_LUMINANCE:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+   case GL_COMPRESSED_INTENSITY:
+   case GL_COMPRESSED_SRGB:
+   case GL_COMPRESSED_SRGB_ALPHA:
+   case GL_COMPRESSED_SLUMINANCE:
+   case GL_COMPRESSED_SLUMINANCE_ALPHA:
+      return GL_TRUE;
+   default:
+      return GL_FALSE;
+   }
+}
+
+
  /**
   * For cube map faces, return a face index in [0,5].
   * For other targets return 0;
@@ -1705,7 +1731,8 @@ texture_error_check( struct gl_context *ctx,
     }
  
     /* additional checks for compressed textures */
-   if (_mesa_is_compressed_format(ctx, internalFormat)) {
+   if (_mesa_is_compressed_format(ctx, internalFormat) ||
+       is_generic_compressed_format(internalFormat)) {
        if (!target_can_be_compressed(ctx, target, internalFormat)) {
           if (!isProxy)
              _mesa_error(ctx, GL_INVALID_ENUM,
@@ -2036,7 +2063,8 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
        return GL_TRUE;
     }
  
-   if (_mesa_is_compressed_format(ctx, internalFormat)) {
+   if (_mesa_is_compressed_format(ctx, internalFormat) ||
+       is_generic_compressed_format(internalFormat)) {
        if (!target_can_be_compressed(ctx, target, internalFormat)) {
           _mesa_error(ctx, GL_INVALID_ENUM,
                       "glCopyTexImage%dD(target)", dimensions);
@@ -2692,6 +2720,13 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image)
     texObj = _mesa_get_current_tex_object(ctx, target);
     _mesa_lock_texture(ctx, texObj);
  
+   if (texObj->Immutable) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                 "glEGLImageTargetTexture2D(texture is immutable)");
+      _mesa_unlock_texture(ctx, texObj);
+      return;
+   }
+
     texImage = _mesa_get_tex_image(ctx, texObj, target, 0);
     if (!texImage) {
        _mesa_error(ctx, GL_OUT_OF_MEMORY, "glEGLImageTargetTexture2D");
diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h

index e2bdaca01505e01b735f738bb0e1778c53431564..66a0c88950b9cdaaf0c1b827fdbbe32d8e5d1d91 100644 (file)
--- a/src/mesa/main/teximage.h
+++ b/src/mesa/main/teximage.h
@@ -35,6 +35,9 @@
  #include "mtypes.h"
  #include "formats.h"
  
+#ifdef __cplusplus
+extern "C" {
+#endif
  
  /** Is the given value one of the 6 cube faces? */
  static inline GLboolean
@@ -287,4 +290,8 @@ _mesa_TexBuffer(GLenum target, GLenum internalFormat, GLuint buffer);
  
  /*@}*/
  
+#ifdef __cplusplus
+}
+#endif
+
  #endif
diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c

index 2c21dc9a77685d6e809f20ee60fb0aecc36a5e72..8337f4624186fd0abe1a503dc56c2988f263efcf 100644 (file)
--- a/src/mesa/state_tracker/st_cb_rasterpos.c
+++ b/src/mesa/state_tracker/st_cb_rasterpos.c
@@ -225,6 +225,7 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4])
     struct st_context *st = st_context(ctx);
     struct draw_context *draw = st->draw;
     struct rastpos_stage *rs;
+   const struct gl_client_array **saved_arrays = ctx->Array._DrawArrays;
  
     if (st->rastpos_stage) {
        /* get rastpos stage info */
@@ -250,9 +251,14 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4])
      */
     rs->array[0].Ptr = (GLubyte *) v;
  
-   /* draw the point */
-   st_feedback_draw_vbo(ctx, rs->arrays, &rs->prim, 1, NULL, GL_TRUE, 0, 1,
+   /* Draw the point.
+    *
+    * Don't set DriverFlags.NewArray.
+    * st_feedback_draw_vbo doesn't check for that flag. */
+   ctx->Array._DrawArrays = rs->arrays;
+   st_feedback_draw_vbo(ctx, &rs->prim, 1, NULL, GL_TRUE, 0, 1,
                          NULL);
+   ctx->Array._DrawArrays = saved_arrays;
  
     /* restore draw's rasterization stage depending on rendermode */
     if (ctx->RenderMode == GL_FEEDBACK) {
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c

index 93f4216f34eca2b33b3a8cf972da8c0616c68ff5..b44976525398c62c9d101baa22c47f14cd3ed965 100644 (file)
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -195,6 +195,10 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe )
     return st;
  }
  
+static void st_init_driver_flags(struct gl_driver_flags *f)
+{
+   f->NewArray = ST_NEW_VERTEX_ARRAYS;
+}
  
  struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
                                       const struct gl_config *visual,
@@ -217,6 +221,8 @@ struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
        return NULL;
     }
  
+   st_init_driver_flags(&ctx->DriverFlags);
+
     /* XXX: need a capability bit in gallium to query if the pipe
      * driver prefers DP4 or MUL/MAD for vertex transformation.
      */
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h

index 49b8316842ee2b6452f7c57726825c305da009c1..00a405b69f9828dda9c2e368e54d90a05e5ef7cf 100644 (file)
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -49,6 +49,7 @@ struct u_upload_mgr;
  #define ST_NEW_FRAMEBUFFER             (1 << 3)
  #define ST_NEW_EDGEFLAGS_DATA          (1 << 4)
  #define ST_NEW_GEOMETRY_PROGRAM        (1 << 5)
+#define ST_NEW_VERTEX_ARRAYS           (1 << 6)
  
  
  struct st_state_flags {
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c

index ab2290f2c9da747c87eb7f1b2a46852b9a5fb283..a8c20f45acda0f58e9a20347a5a85b84eefb2058 100644 (file)
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -892,7 +892,6 @@ st_validate_varrays(struct gl_context *ctx,
   */
  void
  st_draw_vbo(struct gl_context *ctx,
-            const struct gl_client_array **arrays,
              const struct _mesa_prim *prims,
              GLuint nr_prims,
              const struct _mesa_index_buffer *ib,
@@ -905,14 +904,21 @@ st_draw_vbo(struct gl_context *ctx,
     struct pipe_context *pipe = st->pipe;
     struct pipe_index_buffer ibuffer = {0};
     struct pipe_draw_info info;
+   const struct gl_client_array **arrays = ctx->Array._DrawArrays;
     unsigned i;
-   GLboolean new_array =
-      st->dirty.st &&
-      (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM | _NEW_BUFFER_OBJECT)) != 0;
+   GLboolean new_array;
  
     /* Mesa core state should have been validated already */
     assert(ctx->NewState == 0x0);
  
+   /* Get Mesa driver state. */
+   st->dirty.st |= ctx->NewDriverState;
+   ctx->NewDriverState = 0;
+
+   new_array =
+      (st->dirty.st & (ST_NEW_VERTEX_ARRAYS | ST_NEW_VERTEX_PROGRAM)) ||
+      (st->dirty.mesa & (_NEW_PROGRAM | _NEW_BUFFER_OBJECT)) != 0;
+
     /* Validate state. */
     if (st->dirty.st) {
        GLboolean vertDataEdgeFlags;
diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h

index 47bdb11f81cdda4746ac343a1fe6b744eeab2835..c608051eba17f9e69d24a84002d3e4e6ed70a6c6 100644 (file)
--- a/src/mesa/state_tracker/st_draw.h
+++ b/src/mesa/state_tracker/st_draw.h
@@ -49,7 +49,6 @@ void st_destroy_draw( struct st_context *st );
  
  extern void
  st_draw_vbo(struct gl_context *ctx,
-            const struct gl_client_array **arrays,
              const struct _mesa_prim *prims,
              GLuint nr_prims,
              const struct _mesa_index_buffer *ib,
@@ -60,7 +59,6 @@ st_draw_vbo(struct gl_context *ctx,
  
  extern void
  st_feedback_draw_vbo(struct gl_context *ctx,
-                     const struct gl_client_array **arrays,
                       const struct _mesa_prim *prims,
                       GLuint nr_prims,
                       const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c

index ee19898b3ea1b1f907c997203912af8306087c1b..4209fb214f8c659a4f2943d188f541f1ee207e05 100644 (file)
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -91,7 +91,6 @@ set_feedback_vertex_format(struct gl_context *ctx)
   */
  void
  st_feedback_draw_vbo(struct gl_context *ctx,
-                     const struct gl_client_array **arrays,
                       const struct _mesa_prim *prims,
                       GLuint nr_prims,
                       const struct _mesa_index_buffer *ib,
@@ -110,6 +109,7 @@ st_feedback_draw_vbo(struct gl_context *ctx,
     struct pipe_index_buffer ibuffer;
     struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {NULL};
     struct pipe_transfer *ib_transfer = NULL;
+   const struct gl_client_array **arrays = ctx->Array._DrawArrays;
     GLuint attr, i;
     const GLubyte *low_addr = NULL;
     const void *mapped_indices = NULL;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index 9e68deb3471724874dbf6ffb55809237ae92afd1..60a4e2831a415e4337b4359d95c77ece5d82cbfe 100644 (file)
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1775,9 +1775,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
        break;
     case ir_unop_ceil:
-      op[0].negate = ~op[0].negate;
-      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
-      result_src.negate = ~result_src.negate;
+      emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
        break;
     case ir_unop_floor:
        emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
@@ -3963,7 +3961,7 @@ dst_register(struct st_translate *t,
  
     case PROGRAM_TEMPORARY:
        if (ureg_dst_is_undef(t->temps[index]))
-         t->temps[index] = ureg_DECL_temporary(t->ureg);
+         t->temps[index] = ureg_DECL_local_temporary(t->ureg);
  
        return t->temps[index];
  
@@ -4004,7 +4002,7 @@ src_register(struct st_translate *t,
        assert(index >= 0);
        assert(index < Elements(t->temps));
        if (ureg_dst_is_undef(t->temps[index]))
-         t->temps[index] = ureg_DECL_temporary(t->ureg);
+         t->temps[index] = ureg_DECL_local_temporary(t->ureg);
        return ureg_src(t->temps[index]);
  
     case PROGRAM_NAMED_PARAM:
@@ -4261,7 +4259,7 @@ emit_wpos_adjustment( struct st_translate *t,
            * or not, which is determined by testing against the inversion
            * state variable used below, which will be either +1 or -1.
            */
-         struct ureg_dst adj_temp = ureg_DECL_temporary(ureg);
+         struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
  
           ureg_CMP(ureg, adj_temp,
                    ureg_scalar(wpostrans, invert ? 2 : 0),
@@ -4624,7 +4622,7 @@ st_translate_program(
         */
        for (i = 0; i < (unsigned)program->next_temp; i++) {
           /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
-         t->temps[i] = ureg_DECL_temporary(t->ureg);
+         t->temps[i] = ureg_DECL_local_temporary(t->ureg);
        }
     }
  
diff --git a/src/mesa/tnl/t_draw.c b/src/mesa/tnl/t_draw.c

index 17042cf8f375a526f026d9e78b525c53881b2a1d..6a3c9662e96fe5fd423636b9f18a94a9e2655e8c 100644 (file)
--- a/src/mesa/tnl/t_draw.c
+++ b/src/mesa/tnl/t_draw.c
@@ -408,7 +408,6 @@ static void unmap_vbos( struct gl_context *ctx,
  
  
  void _tnl_vbo_draw_prims(struct gl_context *ctx,
-                        const struct gl_client_array *arrays[],
                          const struct _mesa_prim *prim,
                          GLuint nr_prims,
                          const struct _mesa_index_buffer *ib,
@@ -417,6 +416,8 @@ void _tnl_vbo_draw_prims(struct gl_context *ctx,
                          GLuint max_index,
                          struct gl_transform_feedback_object *tfb_vertcount)
  {
+   const struct gl_client_array **arrays = ctx->Array._DrawArrays;
+
     if (!index_bounds_valid)
        vbo_get_minmax_indices(ctx, prim, ib, &min_index, &max_index, nr_prims);
  
diff --git a/src/mesa/tnl/tnl.h b/src/mesa/tnl/tnl.h

index 434bd7fcd95e9b5a1155a35a1acc8887fe01177d..f6b70e3236fbc78c844cbbb636a5cb867c0c8f1e 100644 (file)
--- a/src/mesa/tnl/tnl.h
+++ b/src/mesa/tnl/tnl.h
@@ -86,7 +86,6 @@ _tnl_draw_prims( struct gl_context *ctx,
  
  void
  _tnl_vbo_draw_prims( struct gl_context *ctx,
-                    const struct gl_client_array *arrays[],
                      const struct _mesa_prim *prim,
                      GLuint nr_prims,
                      const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h

index 2d01d9823ccbd69b0457f45cbc85843bdc021585..3cff8987e7862ed820381c82d5281b5f25e575cb 100644 (file)
--- a/src/mesa/vbo/vbo.h
+++ b/src/mesa/vbo/vbo.h
@@ -72,7 +72,6 @@ void _vbo_InvalidateState( struct gl_context *ctx, GLuint new_state );
  
  
  typedef void (*vbo_draw_func)( struct gl_context *ctx,
-                              const struct gl_client_array **arrays,
                                const struct _mesa_prim *prims,
                                GLuint nr_prims,
                                const struct _mesa_index_buffer *ib,
diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h

index b9a8affa52099f95e2762ad27edabfaef80435ec..1c49de0ca2ffc6b2813bc6ff6a553ebdbd10c3dc 100644 (file)
--- a/src/mesa/vbo/vbo_context.h
+++ b/src/mesa/vbo/vbo_context.h
@@ -58,6 +58,18 @@
  #include "vbo_save.h"
  
  
+/** Used to signal when transitioning from one kind of drawing method
+ * to another.
+ */
+enum draw_method
+{
+   DRAW_NONE,          /**< Initial value only */
+   DRAW_BEGIN_END,
+   DRAW_DISPLAY_LIST,
+   DRAW_ARRAYS
+};
+
+
  struct vbo_context {
     struct gl_client_array currval[VBO_ATTRIB_MAX];
     
@@ -74,6 +86,8 @@ struct vbo_context {
      * is responsible for initiating any fallback actions required:
      */
     vbo_draw_func draw_prims;
+
+   enum draw_method last_draw_method;
  };
  
  
@@ -101,4 +115,40 @@ get_program_mode( struct gl_context *ctx )
  }
  
  
+/**
+ * This is called by glBegin, glDrawArrays and glDrawElements (and
+ * variations of those calls).  When we transition from immediate mode
+ * drawing to array drawing we need to invalidate the array state.
+ *
+ * glBegin/End builds vertex arrays.  Those arrays may look identical
+ * to glDrawArrays arrays except that the position of the elements may
+ * be different.  For example, arrays of (position3v, normal3f) vs. arrays
+ * of (normal3f, position3f).  So we need to make sure we notify drivers
+ * that arrays may be changing.
+ */
+static inline void
+vbo_draw_method(struct vbo_context *vbo, enum draw_method method)
+{
+   if (vbo->last_draw_method != method) {
+      struct gl_context *ctx = vbo->exec.ctx;
+
+      switch (method) {
+      case DRAW_ARRAYS:
+         ctx->Array._DrawArrays = vbo->exec.array.inputs;
+         break;
+      case DRAW_BEGIN_END:
+         ctx->Array._DrawArrays = vbo->exec.vtx.inputs;
+         break;
+      case DRAW_DISPLAY_LIST:
+         ctx->Array._DrawArrays = vbo->save.inputs;
+         break;
+      default:
+         ASSERT(0);
+      }
+
+      ctx->NewDriverState |= ctx->DriverFlags.NewArray;
+      vbo->last_draw_method = method;
+   }
+}
+
  #endif
diff --git a/src/mesa/vbo/vbo_exec.h b/src/mesa/vbo/vbo_exec.h

index be9f3d78d2b0a045e0c5adeefd9c72794cef80a5..4ac7d168dc7d15b1a196435b2ccf302aa24956eb 100644 (file)
--- a/src/mesa/vbo/vbo_exec.h
+++ b/src/mesa/vbo/vbo_exec.h
@@ -78,26 +78,12 @@ struct vbo_exec_copied_vtx {
  };
  
  
-/** Used to signal when transitioning from one kind of drawing method
- * to another.
- */
-enum draw_method
-{
-   DRAW_NONE,          /**< Initial value only */
-   DRAW_BEGIN_END,
-   DRAW_DISPLAY_LIST,
-   DRAW_ARRAYS
-};
-
-
  struct vbo_exec_context
  {
     struct gl_context *ctx;   
     GLvertexformat vtxfmt;
     GLvertexformat vtxfmt_noop;
  
-   enum draw_method last_draw_method;
-
     struct {
        struct gl_buffer_object *bufferobj;
  
@@ -174,28 +160,6 @@ void vbo_exec_vtx_init( struct vbo_exec_context *exec );
  void vbo_exec_vtx_destroy( struct vbo_exec_context *exec );
  
  
-/**
- * This is called by glBegin, glDrawArrays and glDrawElements (and
- * variations of those calls).  When we transition from immediate mode
- * drawing to array drawing we need to invalidate the array state.
- *
- * glBegin/End builds vertex arrays.  Those arrays may look identical
- * to glDrawArrays arrays except that the position of the elements may
- * be different.  For example, arrays of (position3v, normal3f) vs. arrays
- * of (normal3f, position3f).  So we need to make sure we notify drivers
- * that arrays may be changing.
- */
-static inline void
-vbo_draw_method(struct vbo_exec_context *exec, enum draw_method method)
-{
-   if (exec->last_draw_method != method) {
-      struct gl_context *ctx = exec->ctx;
-      ctx->Driver.UpdateState(ctx, _NEW_ARRAY);
-      exec->last_draw_method = method;
-   }
-}
-
-
  #if FEATURE_beginend
  
  void vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap );
diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c

index 3f95410892e1295ccc8bc557f7dc2a3d8af3fa1e..b87da18f3ebdf55b3ccb4b52da4754c4f3788e1d 100644 (file)
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -700,7 +700,7 @@ static void GLAPIENTRY vbo_exec_Begin( GLenum mode )
           return;
        }
  
-      vbo_draw_method(exec, DRAW_BEGIN_END);
+      vbo_draw_method(vbo_context(ctx), DRAW_BEGIN_END);
  
        if (ctx->Driver.PrepareExecBegin)
          ctx->Driver.PrepareExecBegin(ctx);
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c

index 2dcfb8e5b8cd687c39f602b989250ff8e2bbb274..cc94e761bc189aa397737b84f51011657d63762c 100644 (file)
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -506,7 +506,7 @@ recalculate_input_bindings(struct gl_context *ctx)
     }
  
     _mesa_set_varying_vp_inputs( ctx, VERT_BIT_ALL & (~const_inputs) );
-   ctx->Driver.UpdateState(ctx, _NEW_ARRAY);
+   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
  }
  
  
@@ -523,7 +523,7 @@ vbo_bind_arrays(struct gl_context *ctx)
     struct vbo_context *vbo = vbo_context(ctx);
     struct vbo_exec_context *exec = &vbo->exec;
  
-   vbo_draw_method(exec, DRAW_ARRAYS);
+   vbo_draw_method(vbo, DRAW_ARRAYS);
  
     if (exec->array.recalculate_inputs) {
        recalculate_input_bindings(ctx);
@@ -600,7 +600,7 @@ vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start,
        if (primCount > 0) {
           /* draw one or two prims */
           check_buffers_are_unmapped(exec->array.inputs);
-         vbo->draw_prims(ctx, exec->array.inputs, prim, primCount, NULL,
+         vbo->draw_prims(ctx, prim, primCount, NULL,
                           GL_TRUE, start, start + count - 1, NULL);
        }
     }
@@ -610,7 +610,7 @@ vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start,
        prim[0].count = count;
  
        check_buffers_are_unmapped(exec->array.inputs);
-      vbo->draw_prims(ctx, exec->array.inputs, prim, 1, NULL,
+      vbo->draw_prims(ctx, prim, 1, NULL,
                        GL_TRUE, start, start + count - 1,
                        NULL);
     }
@@ -801,7 +801,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
      */
  
     check_buffers_are_unmapped(exec->array.inputs);
-   vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib,
+   vbo->draw_prims( ctx, prim, 1, &ib,
                     index_bounds_valid, start, end, NULL );
  }
  
@@ -1096,7 +1096,7 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
        }
  
        check_buffers_are_unmapped(exec->array.inputs);
-      vbo->draw_prims(ctx, exec->array.inputs, prim, primcount, &ib,
+      vbo->draw_prims(ctx, prim, primcount, &ib,
                       GL_FALSE, ~0, ~0, NULL);
     } else {
        /* render one prim at a time */
@@ -1121,7 +1121,7 @@ vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
             prim[0].basevertex = 0;
  
           check_buffers_are_unmapped(exec->array.inputs);
-         vbo->draw_prims(ctx, exec->array.inputs, prim, 1, &ib,
+         vbo->draw_prims(ctx, prim, 1, &ib,
                           GL_FALSE, ~0, ~0, NULL);
        }
     }
@@ -1199,7 +1199,7 @@ vbo_draw_transform_feedback(struct gl_context *ctx, GLenum mode,
      * will be rendered. */
  
     check_buffers_are_unmapped(exec->array.inputs);
-   vbo->draw_prims(ctx, exec->array.inputs, prim, 1, NULL,
+   vbo->draw_prims(ctx, prim, 1, NULL,
                     GL_TRUE, 0, 0, obj);
  }
  
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c

index da5ca695eaf46db59176d7b70a98bc0e450fef55..77db8ec7f3eb92161d4f84b0746cca1b709e631b 100644 (file)
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -257,7 +257,7 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
     }
  
     _mesa_set_varying_vp_inputs( ctx, varying_inputs );
-   ctx->Driver.UpdateState(ctx, _NEW_ARRAY);
+   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
  }
  
  
@@ -407,8 +407,7 @@ vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
                    exec->vtx.vert_count);
  
          vbo_context(ctx)->draw_prims( ctx, 
-                                      exec->vtx.inputs, 
-                                      exec->vtx.prim, 
+                                      exec->vtx.prim,
                                        exec->vtx.prim_count,
                                        NULL,
                                        GL_TRUE,
diff --git a/src/mesa/vbo/vbo_rebase.c b/src/mesa/vbo/vbo_rebase.c

index 597a8f46994626ccc41a75774375cd7a9df850da..fff9df0c29dbafc3409ab0281c6819fad2a16173 100644 (file)
--- a/src/mesa/vbo/vbo_rebase.c
+++ b/src/mesa/vbo/vbo_rebase.c
@@ -129,6 +129,7 @@ void vbo_rebase_prims( struct gl_context *ctx,
  
     struct _mesa_index_buffer tmp_ib;
     struct _mesa_prim *tmp_prims = NULL;
+   const struct gl_client_array **saved_arrays = ctx->Array._DrawArrays;
     void *tmp_indices = NULL;
     GLuint i;
  
@@ -226,15 +227,20 @@ void vbo_rebase_prims( struct gl_context *ctx,
     
     /* Re-issue the draw call.
      */
+   ctx->Array._DrawArrays = tmp_array_pointers;
+   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
+
     draw( ctx, 
-        tmp_array_pointers, 
-        prim, 
+        prim,
          nr_prims, 
          ib, 
          GL_TRUE,
          0, 
          max_index - min_index,
          NULL );
+
+   ctx->Array._DrawArrays = saved_arrays;
+   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
     
     if (tmp_indices)
        free(tmp_indices);
diff --git a/src/mesa/vbo/vbo_save.c b/src/mesa/vbo/vbo_save.c

index 040c9b7a95a2664aa592b636a060e4b549865c79..7a97d3c65677eca70b411f6be7a047044a730907 100644 (file)
--- a/src/mesa/vbo/vbo_save.c
+++ b/src/mesa/vbo/vbo_save.c
@@ -49,6 +49,9 @@ static void vbo_save_callback_init( struct gl_context *ctx )
  
  
  
+/**
+ * Called at context creation time.
+ */
  void vbo_save_init( struct gl_context *ctx )
  {
     struct vbo_context *vbo = vbo_context(ctx);
diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c

index 13604333e953dc3b0e353b6eb7e38f4453bfa7cd..74f5dc9ce68bcc25f298f3fd52edb0ab950ab292 100644 (file)
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -1506,6 +1506,9 @@ vbo_print_vertex_list(struct gl_context *ctx, void *data)
  }
  
  
+/**
+ * Called during context creation/init.
+ */
  static void
  _save_current_init(struct gl_context *ctx)
  {
@@ -1529,7 +1532,7 @@ _save_current_init(struct gl_context *ctx)
  
  
  /**
- * Initialize the display list compiler
+ * Initialize the display list compiler.  Called during context creation.
   */
  void
  vbo_save_api_init(struct vbo_save_context *save)
diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c

index 88a9a7e346292c3e69a344bdc3f4907e18e4030e..c6425ab1b79022c8a9cb8667012df02ce5b1678d 100644 (file)
--- a/src/mesa/vbo/vbo_save_draw.c
+++ b/src/mesa/vbo/vbo_save_draw.c
@@ -213,7 +213,7 @@ static void vbo_bind_vertex_list(struct gl_context *ctx,
     }
  
     _mesa_set_varying_vp_inputs( ctx, varying_inputs );
-   ctx->Driver.UpdateState(ctx, _NEW_ARRAY);
+   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
  }
  
  
@@ -250,7 +250,6 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data)
     const struct vbo_save_vertex_list *node =
        (const struct vbo_save_vertex_list *) data;
     struct vbo_save_context *save = &vbo_context(ctx)->save;
-   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
     GLboolean remap_vertex_store = GL_FALSE;
  
     if (save->vertex_store->buffer) {
@@ -304,7 +303,7 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data)
  
        vbo_bind_vertex_list( ctx, node );
  
-      vbo_draw_method(exec, DRAW_DISPLAY_LIST);
+      vbo_draw_method(vbo_context(ctx), DRAW_DISPLAY_LIST);
  
        /* Again...
         */
@@ -313,8 +312,7 @@ vbo_save_playback_vertex_list(struct gl_context *ctx, void *data)
  
        if (node->count > 0) {
           vbo_context(ctx)->draw_prims(ctx, 
-                                      save->inputs, 
-                                      node->prim, 
+                                      node->prim,
                                        node->prim_count,
                                        NULL,
                                        GL_TRUE,
diff --git a/src/mesa/vbo/vbo_split_copy.c b/src/mesa/vbo/vbo_split_copy.c

index b53293c3120b8bad94403fc593ed5d6bdff8450b..528fcfd7f802a10a29cf0873a4f40ff8de5bfd4e 100644 (file)
--- a/src/mesa/vbo/vbo_split_copy.c
+++ b/src/mesa/vbo/vbo_split_copy.c
@@ -171,6 +171,8 @@ dump_draw_info(struct gl_context *ctx,
  static void
  flush( struct copy_context *copy )
  {
+   struct gl_context *ctx = copy->ctx;
+   const struct gl_client_array **saved_arrays = ctx->Array._DrawArrays;
     GLuint i;
  
     /* Set some counters: 
@@ -189,8 +191,10 @@ flush( struct copy_context *copy )
     (void) dump_draw_info;
  #endif
  
-   copy->draw( copy->ctx,
-              copy->dstarray_ptr,
+   ctx->Array._DrawArrays = copy->dstarray_ptr;
+   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
+
+   copy->draw( ctx,
                copy->dstprim,
                copy->dstprim_nr,
                &copy->dstib,
@@ -199,6 +203,9 @@ flush( struct copy_context *copy )
                copy->dstbuf_nr - 1,
                NULL );
  
+   ctx->Array._DrawArrays = saved_arrays;
+   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
+
     /* Reset all pointers: 
      */
     copy->dstprim_nr = 0;
diff --git a/src/mesa/vbo/vbo_split_inplace.c b/src/mesa/vbo/vbo_split_inplace.c

index 9e596f668913fa2aa4f5e189a3dbbbb8f08335bd..00464049dddf62a77890cfa3fa838178745ab956 100644 (file)
--- a/src/mesa/vbo/vbo_split_inplace.c
+++ b/src/mesa/vbo/vbo_split_inplace.c
@@ -62,6 +62,8 @@ struct split_context {
  
  static void flush_vertex( struct split_context *split )
  {
+   struct gl_context *ctx = split->ctx;
+   const struct gl_client_array **saved_arrays = ctx->Array._DrawArrays;
     struct _mesa_index_buffer ib;
     GLuint i;
  
@@ -82,8 +84,10 @@ static void flush_vertex( struct split_context *split )
  
     assert(split->max_index >= split->min_index);
  
-   split->draw(split->ctx,
-              split->array,
+   ctx->Array._DrawArrays = split->array;
+   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
+
+   split->draw(ctx,
                split->dstprim,
                split->dstprim_nr,
                split->ib ? &ib : NULL,
@@ -92,6 +96,9 @@ static void flush_vertex( struct split_context *split )
                split->max_index,
                NULL);
  
+   ctx->Array._DrawArrays = saved_arrays;
+   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
+
     split->dstprim_nr = 0;
     split->min_index = ~0;
     split->max_index = 0;
author	Marek Olšák <maraeo@gmail.com>
	Fri, 11 May 2012 14:38:13 +0000 (16:38 +0200)
committer	Marek Olšák <maraeo@gmail.com>
	Fri, 11 May 2012 14:38:13 +0000 (16:38 +0200)