gallium: add a cap to determine whether the driver supports offset_clamp

[mesa.git] / src / gallium / drivers / ilo / ilo_screen.c
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c

index 5d652e4ad73ebfdf87eaaff8d72e9e3fdfe49a3f..5048ba1ac22b23b0e91c31d9d976b38956be078e 100644 (file)
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -25,29 +25,36 @@
   *    Chia-I Wu <olv@lunarg.com>
   */
  
+#include "pipe/p_state.h"
+#include "os/os_misc.h"
  #include "util/u_format_s3tc.h"
  #include "vl/vl_decoder.h"
  #include "vl/vl_video_buffer.h"
-#include "intel_chipset.h"
-#include "intel_reg.h" /* for TIMESTAMP */
+#include "genhw/genhw.h" /* for GEN6_REG_TIMESTAMP */
  #include "intel_winsys.h"
  
  #include "ilo_context.h"
  #include "ilo_format.h"
  #include "ilo_resource.h"
+#include "ilo_transfer.h" /* for ILO_TRANSFER_MAP_BUFFER_ALIGNMENT */
  #include "ilo_public.h"
  #include "ilo_screen.h"
  
+struct ilo_fence {
+   struct pipe_reference reference;
+   struct intel_bo *bo;
+};
+
  int ilo_debug;
  
  static const struct debug_named_value ilo_debug_flags[] = {
-   { "3d",        ILO_DEBUG_3D,       "Dump 3D commands and states" },
+   { "batch",     ILO_DEBUG_BATCH,    "Dump batch/state/surface/instruction buffers" },
     { "vs",        ILO_DEBUG_VS,       "Dump vertex shaders" },
     { "gs",        ILO_DEBUG_GS,       "Dump geometry shaders" },
     { "fs",        ILO_DEBUG_FS,       "Dump fragment shaders" },
     { "cs",        ILO_DEBUG_CS,       "Dump compute shaders" },
     { "draw",      ILO_DEBUG_DRAW,     "Show draw information" },
-   { "flush",     ILO_DEBUG_FLUSH,    "Show batch buffer flushes" },
+   { "submit",    ILO_DEBUG_SUBMIT,   "Show batch buffer submissions" },
     { "nohw",      ILO_DEBUG_NOHW,     "Do not send commands to HW" },
     { "nocache",   ILO_DEBUG_NOCACHE,  "Always invalidate HW caches" },
     { "nohiz",     ILO_DEBUG_NOHIZ,    "Disable HiZ" },
@@ -114,16 +121,15 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
     case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
        return UINT_MAX;
     case PIPE_SHADER_CAP_MAX_INPUTS:
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
        /* this is limited by how many attributes SF can remap */
        return 16;
-   case PIPE_SHADER_CAP_MAX_CONSTS:
-      return 1024;
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      return 1024 * sizeof(float[4]);
     case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
        return ILO_MAX_CONST_BUFFERS;
     case PIPE_SHADER_CAP_MAX_TEMPS:
        return 256;
-   case PIPE_SHADER_CAP_MAX_ADDRS:
-      return (shader == PIPE_SHADER_FRAGMENT) ? 0 : 1;
     case PIPE_SHADER_CAP_MAX_PREDS:
        return 0;
     case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -188,6 +194,7 @@ ilo_get_compute_param(struct pipe_screen *screen,
                        enum pipe_compute_cap param,
                        void *ret)
  {
+   struct ilo_screen *is = ilo_screen(screen);
     union {
        const char *ir_target;
        uint64_t grid_dimension;
@@ -199,11 +206,13 @@ ilo_get_compute_param(struct pipe_screen *screen,
        uint64_t max_private_size;
        uint64_t max_input_size;
        uint64_t max_mem_alloc_size;
+      uint32_t max_clock_frequency;
+      uint32_t max_compute_units;
+      uint32_t images_supported;
     } val;
     const void *ptr;
     int size;
  
-   /* XXX some randomly chosen values */
     switch (param) {
     case PIPE_COMPUTE_CAP_IR_TARGET:
        val.ir_target = "ilog";
@@ -218,58 +227,79 @@ ilo_get_compute_param(struct pipe_screen *screen,
        size = sizeof(val.grid_dimension);
        break;
     case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
-      val.max_grid_size[0] = 65535;
-      val.max_grid_size[1] = 65535;
-      val.max_grid_size[2] = 1;
+      val.max_grid_size[0] = 0xffffffffu;
+      val.max_grid_size[1] = 0xffffffffu;
+      val.max_grid_size[2] = 0xffffffffu;
  
        ptr = &val.max_grid_size;
        size = sizeof(val.max_grid_size);
        break;
     case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
-      val.max_block_size[0] = 512;
-      val.max_block_size[1] = 512;
-      val.max_block_size[2] = 512;
+      val.max_block_size[0] = 1024;
+      val.max_block_size[1] = 1024;
+      val.max_block_size[2] = 1024;
  
        ptr = &val.max_block_size;
        size = sizeof(val.max_block_size);
        break;
  
     case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
-      val.max_threads_per_block = 512;
+      val.max_threads_per_block = 1024;
  
        ptr = &val.max_threads_per_block;
        size = sizeof(val.max_threads_per_block);
        break;
     case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
-      val.max_global_size = 4;
+      /* \see ilo_max_resource_size */
+      val.max_global_size = 1u << 31;
  
        ptr = &val.max_global_size;
        size = sizeof(val.max_global_size);
        break;
     case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+      /* Shared Local Memory Size of INTERFACE_DESCRIPTOR_DATA */
        val.max_local_size = 64 * 1024;
  
        ptr = &val.max_local_size;
        size = sizeof(val.max_local_size);
        break;
     case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
-      val.max_private_size = 32768;
+      /* scratch size */
+      val.max_private_size = 12 * 1024;
  
        ptr = &val.max_private_size;
        size = sizeof(val.max_private_size);
        break;
     case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
-      val.max_input_size = 256;
+      val.max_input_size = 1024;
  
        ptr = &val.max_input_size;
        size = sizeof(val.max_input_size);
        break;
     case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
-      val.max_mem_alloc_size = 128 * 1024 * 1024;
+      val.max_mem_alloc_size = 1u << 31;
  
        ptr = &val.max_mem_alloc_size;
        size = sizeof(val.max_mem_alloc_size);
        break;
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+      val.max_clock_frequency = 1000;
+
+      ptr = &val.max_clock_frequency;
+      size = sizeof(val.max_clock_frequency);
+      break;
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+      val.max_compute_units = is->dev.eu_count;
+
+      ptr = &val.max_compute_units;
+      size = sizeof(val.max_compute_units);
+      break;
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+      val.images_supported = 1;
+
+      ptr = &val.images_supported;
+      size = sizeof(val.images_supported);
+      break;
     default:
        ptr = NULL;
        size = 0;
@@ -310,30 +340,19 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
         *           Max WxHxD for 2D and CUBE     Max WxHxD for 3D
         *  GEN6           8192x8192x512            2048x2048x2048
         *  GEN7         16384x16384x2048           2048x2048x2048
-       *
-       * However, when the texutre size is large, things become unstable.  We
-       * require the maximum texture size to be 2^30 bytes in
-       * screen->can_create_resource().  Since the maximum pixel size is 2^4
-       * bytes (PIPE_FORMAT_R32G32B32A32_FLOAT), textures should not have more
-       * than 2^26 pixels.
-       *
-       * For 3D textures, we have to set the maximum number of levels to 9,
-       * which has at most 2^24 pixels.  For 2D textures, we set it to 14,
-       * which has at most 2^26 pixels.  And for cube textures, we has to set
-       * it to 12.
         */
-      return 14;
+      return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 15 : 14;
     case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-      return 9;
-   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
        return 12;
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 15 : 14;
     case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
        return false;
     case PIPE_CAP_BLEND_EQUATION_SEPARATE:
     case PIPE_CAP_SM3:
        return true;
     case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
-      if (is->dev.gen >= ILO_GEN(7) && !is->dev.has_gen7_sol_reset)
+      if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) && !is->dev.has_gen7_sol_reset)
           return 0;
        return ILO_MAX_SO_BUFFERS;
     case PIPE_CAP_PRIMITIVE_RESTART:
@@ -342,7 +361,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_INDEP_BLEND_FUNC:
        return true;
     case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-      return (is->dev.gen >= ILO_GEN(7)) ? 2048 : 512;
+      return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 2048 : 512;
     case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
     case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
     case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
@@ -361,8 +380,10 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_SEAMLESS_CUBE_MAP:
     case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
        return true;
+   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
     case PIPE_CAP_MIN_TEXEL_OFFSET:
        return -8;
+   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
     case PIPE_CAP_MAX_TEXEL_OFFSET:
        return 7;
     case PIPE_CAP_CONDITIONAL_RENDER:
@@ -372,11 +393,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
        return ILO_MAX_SO_BINDINGS / ILO_MAX_SO_BUFFERS;
     case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
        return ILO_MAX_SO_BINDINGS;
-   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
-   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
-      return 0;
     case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
-      if (is->dev.gen >= ILO_GEN(7))
+      if (ilo_dev_gen(&is->dev) >= ILO_GEN(7))
           return is->dev.has_gen7_sol_reset;
        else
           return false; /* TODO */
@@ -395,6 +413,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
     case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
        return false;
+   case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+      return 2048;
     case PIPE_CAP_COMPUTE:
        return false; /* TODO */
     case PIPE_CAP_USER_INDEX_BUFFERS:
@@ -410,7 +430,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_TEXTURE_MULTISAMPLE:
        return false; /* TODO */
     case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
-      return 64;
+      return ILO_TRANSFER_MAP_BUFFER_ALIGNMENT;
     case PIPE_CAP_CUBE_MAP_ARRAY:
     case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
        return true;
@@ -419,13 +439,12 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
     case PIPE_CAP_TGSI_TEXCOORD:
        return false;
     case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
-      return true;
     case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
-      return false; /* TODO */
+      return true;
     case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
        return 0;
     case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      /* a BRW_SURFACE_BUFFER can have up to 2^27 elements */
+      /* a GEN6_SURFTYPE_BUFFER can have up to 2^27 elements */
        return 1 << 27;
     case PIPE_CAP_MAX_VIEWPORTS:
        return ILO_MAX_VIEWPORTS;
@@ -433,12 +452,54 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
        return PIPE_ENDIAN_LITTLE;
     case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
        return true;
-   case PIPE_CAP_TGSI_VS_LAYER:
+   case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
     case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
     case PIPE_CAP_TEXTURE_GATHER_SM5:
+      return 0;
     case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+      return true;
+   case PIPE_CAP_FAKE_SW_MSAA:
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
+   case PIPE_CAP_SAMPLE_SHADING:
+   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+   case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+   case PIPE_CAP_MAX_VERTEX_STREAMS:
+   case PIPE_CAP_DRAW_INDIRECT:
+   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+   case PIPE_CAP_SAMPLER_VIEW_TARGET:
        return 0;
  
+   case PIPE_CAP_VENDOR_ID:
+      return 0x8086;
+   case PIPE_CAP_DEVICE_ID:
+      return is->dev.devid;
+   case PIPE_CAP_ACCELERATED:
+      return true;
+   case PIPE_CAP_VIDEO_MEMORY: {
+      /* Once a batch uses more than 75% of the maximum mappable size, we
+       * assume that there's some fragmentation, and we start doing extra
+       * flushing, etc.  That's the big cliff apps will care about.
+       */
+      const uint64_t gpu_memory = is->dev.aperture_total * 3 / 4;
+      uint64_t system_memory;
+
+      if (!os_get_total_physical_memory(&system_memory))
+         return 0;
+
+      return (int) (MIN2(gpu_memory, system_memory) >> 20);
+   }
+   case PIPE_CAP_UMA:
+      return true;
+   case PIPE_CAP_CLIP_HALFZ:
+      return true;
+   case PIPE_CAP_VERTEXID_NOBASE:
+      return false;
+   case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+      return true;
+
     default:
        return 0;
     }
@@ -454,89 +515,39 @@ static const char *
  ilo_get_name(struct pipe_screen *screen)
  {
     struct ilo_screen *is = ilo_screen(screen);
-   const char *chipset;
-
-   /* stolen from classic i965 */
-   switch (is->dev.devid) {
-   case PCI_CHIP_SANDYBRIDGE_GT1:
-   case PCI_CHIP_SANDYBRIDGE_GT2:
-   case PCI_CHIP_SANDYBRIDGE_GT2_PLUS:
-      chipset = "Intel(R) Sandybridge Desktop";
-      break;
-   case PCI_CHIP_SANDYBRIDGE_M_GT1:
-   case PCI_CHIP_SANDYBRIDGE_M_GT2:
-   case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS:
-      chipset = "Intel(R) Sandybridge Mobile";
-      break;
-   case PCI_CHIP_SANDYBRIDGE_S:
-      chipset = "Intel(R) Sandybridge Server";
-      break;
-   case PCI_CHIP_IVYBRIDGE_GT1:
-   case PCI_CHIP_IVYBRIDGE_GT2:
-      chipset = "Intel(R) Ivybridge Desktop";
-      break;
-   case PCI_CHIP_IVYBRIDGE_M_GT1:
-   case PCI_CHIP_IVYBRIDGE_M_GT2:
-      chipset = "Intel(R) Ivybridge Mobile";
-      break;
-   case PCI_CHIP_IVYBRIDGE_S_GT1:
-   case PCI_CHIP_IVYBRIDGE_S_GT2:
-      chipset = "Intel(R) Ivybridge Server";
-      break;
-   case PCI_CHIP_BAYTRAIL_M_1:
-   case PCI_CHIP_BAYTRAIL_M_2:
-   case PCI_CHIP_BAYTRAIL_M_3:
-   case PCI_CHIP_BAYTRAIL_M_4:
-   case PCI_CHIP_BAYTRAIL_D:
+   const char *chipset = NULL;
+
+   if (gen_is_vlv(is->dev.devid)) {
        chipset = "Intel(R) Bay Trail";
-      break;
-   case PCI_CHIP_HASWELL_GT1:
-   case PCI_CHIP_HASWELL_GT2:
-   case PCI_CHIP_HASWELL_GT3:
-   case PCI_CHIP_HASWELL_SDV_GT1:
-   case PCI_CHIP_HASWELL_SDV_GT2:
-   case PCI_CHIP_HASWELL_SDV_GT3:
-   case PCI_CHIP_HASWELL_ULT_GT1:
-   case PCI_CHIP_HASWELL_ULT_GT2:
-   case PCI_CHIP_HASWELL_ULT_GT3:
-   case PCI_CHIP_HASWELL_CRW_GT1:
-   case PCI_CHIP_HASWELL_CRW_GT2:
-   case PCI_CHIP_HASWELL_CRW_GT3:
-      chipset = "Intel(R) Haswell Desktop";
-      break;
-   case PCI_CHIP_HASWELL_M_GT1:
-   case PCI_CHIP_HASWELL_M_GT2:
-   case PCI_CHIP_HASWELL_M_GT3:
-   case PCI_CHIP_HASWELL_SDV_M_GT1:
-   case PCI_CHIP_HASWELL_SDV_M_GT2:
-   case PCI_CHIP_HASWELL_SDV_M_GT3:
-   case PCI_CHIP_HASWELL_ULT_M_GT1:
-   case PCI_CHIP_HASWELL_ULT_M_GT2:
-   case PCI_CHIP_HASWELL_ULT_M_GT3:
-   case PCI_CHIP_HASWELL_CRW_M_GT1:
-   case PCI_CHIP_HASWELL_CRW_M_GT2:
-   case PCI_CHIP_HASWELL_CRW_M_GT3:
-      chipset = "Intel(R) Haswell Mobile";
-      break;
-   case PCI_CHIP_HASWELL_S_GT1:
-   case PCI_CHIP_HASWELL_S_GT2:
-   case PCI_CHIP_HASWELL_S_GT3:
-   case PCI_CHIP_HASWELL_SDV_S_GT1:
-   case PCI_CHIP_HASWELL_SDV_S_GT2:
-   case PCI_CHIP_HASWELL_SDV_S_GT3:
-   case PCI_CHIP_HASWELL_ULT_S_GT1:
-   case PCI_CHIP_HASWELL_ULT_S_GT2:
-   case PCI_CHIP_HASWELL_ULT_S_GT3:
-   case PCI_CHIP_HASWELL_CRW_S_GT1:
-   case PCI_CHIP_HASWELL_CRW_S_GT2:
-   case PCI_CHIP_HASWELL_CRW_S_GT3:
-      chipset = "Intel(R) Haswell Server";
-      break;
-   default:
-      chipset = "Unknown Intel Chipset";
-      break;
+   }
+   else if (gen_is_hsw(is->dev.devid)) {
+      if (gen_is_desktop(is->dev.devid))
+         chipset = "Intel(R) Haswell Desktop";
+      else if (gen_is_mobile(is->dev.devid))
+         chipset = "Intel(R) Haswell Mobile";
+      else if (gen_is_server(is->dev.devid))
+         chipset = "Intel(R) Haswell Server";
+   }
+   else if (gen_is_ivb(is->dev.devid)) {
+      if (gen_is_desktop(is->dev.devid))
+         chipset = "Intel(R) Ivybridge Desktop";
+      else if (gen_is_mobile(is->dev.devid))
+         chipset = "Intel(R) Ivybridge Mobile";
+      else if (gen_is_server(is->dev.devid))
+         chipset = "Intel(R) Ivybridge Server";
+   }
+   else if (gen_is_snb(is->dev.devid)) {
+      if (gen_is_desktop(is->dev.devid))
+         chipset = "Intel(R) Sandybridge Desktop";
+      else if (gen_is_mobile(is->dev.devid))
+         chipset = "Intel(R) Sandybridge Mobile";
+      else if (gen_is_server(is->dev.devid))
+         chipset = "Intel(R) Sandybridge Server";
     }
  
+   if (!chipset)
+      chipset = "Unknown Intel Chipset";
+
     return chipset;
  }
  
@@ -549,7 +560,7 @@ ilo_get_timestamp(struct pipe_screen *screen)
        uint32_t dw[2];
     } timestamp;
  
-   intel_winsys_read_reg(is->winsys, TIMESTAMP, &timestamp.val);
+   intel_winsys_read_reg(is->winsys, GEN6_REG_TIMESTAMP, &timestamp.val);
  
     /*
      * From the Ivy Bridge PRM, volume 1 part 3, page 107:
@@ -573,26 +584,23 @@ ilo_fence_reference(struct pipe_screen *screen,
                      struct pipe_fence_handle **p,
                      struct pipe_fence_handle *f)
  {
-   struct ilo_fence **ptr = (struct ilo_fence **) p;
     struct ilo_fence *fence = ilo_fence(f);
+   struct ilo_fence *old;
  
-   if (!ptr) {
-      /* still need to reference fence */
-      if (fence)
-         pipe_reference(NULL, &fence->reference);
-      return;
+   if (likely(p)) {
+      old = ilo_fence(*p);
+      *p = f;
+   }
+   else {
+      old = NULL;
     }
  
-   /* reference fence and dereference the one pointed to by ptr */
-   if (*ptr && pipe_reference(&(*ptr)->reference, &fence->reference)) {
-      struct ilo_fence *old = *ptr;
-
+   STATIC_ASSERT(&((struct ilo_fence *) NULL)->reference == NULL);
+   if (pipe_reference(&old->reference, &fence->reference)) {
        if (old->bo)
           intel_bo_unreference(old->bo);
        FREE(old);
     }
-
-   *ptr = fence;
  }
  
  static boolean
@@ -633,6 +641,28 @@ ilo_fence_finish(struct pipe_screen *screen,
     return true;
  }
  
+/**
+ * Create a fence for \p bo.  When \p bo is not NULL, it must be submitted
+ * before waited on or checked.
+ */
+struct ilo_fence *
+ilo_fence_create(struct pipe_screen *screen, struct intel_bo *bo)
+{
+   struct ilo_fence *fence;
+
+   fence = CALLOC_STRUCT(ilo_fence);
+   if (!fence)
+      return NULL;
+
+   pipe_reference_init(&fence->reference, 1);
+
+   if (bo)
+      intel_bo_reference(bo);
+   fence->bo = bo;
+
+   return fence;
+}
+
  static void
  ilo_screen_destroy(struct pipe_screen *screen)
  {
@@ -648,10 +678,12 @@ static bool
  init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info)
  {
     dev->devid = info->devid;
-   dev->max_batch_size = info->max_batch_size;
+   dev->aperture_total = info->aperture_total;
+   dev->aperture_mappable = info->aperture_mappable;
     dev->has_llc = info->has_llc;
     dev->has_address_swizzling = info->has_address_swizzling;
     dev->has_logical_context = info->has_logical_context;
+   dev->has_ppgtt = info->has_ppgtt;
     dev->has_timestamp = info->has_timestamp;
     dev->has_gen7_sol_reset = info->has_gen7_sol_reset;
  
@@ -661,62 +693,102 @@ init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info)
     }
  
     /*
-    * From the Sandy Bridge PRM, volume 4 part 2, page 18:
+    * PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT
+    * writes on GEN6.
      *
-    *     "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged
-    *      as 1024 256-bit rows. The GT2 product's URB provides 64KB of
-    *      storage, arranged as 2048 256-bit rows. A row corresponds in size
-    *      to an EU GRF register. Read/write access to the URB is generally
-    *      supported on a row-granular basis."
+    * From the Sandy Bridge PRM, volume 1 part 3, page 101:
      *
-    * From the Ivy Bridge PRM, volume 4 part 2, page 17:
+    *     "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all
+    *      code is in a secure environment, independent of address space.
+    *      Under this condition, this bit only specifies the address space
+    *      (GGTT or PPGTT). All commands are executed "as-is""
      *
-    *     "URB Size    URB Rows    URB Rows when SLM Enabled
-    *      128k        4096        2048
-    *      256k        8096        4096"
+    * We need PPGTT to be enabled on GEN6 too.
      */
+   if (!dev->has_ppgtt) {
+      /* experiments show that it does not really matter... */
+      ilo_warn("PPGTT disabled\n");
+   }
  
-   if (IS_HASWELL(info->devid)) {
-      dev->gen = ILO_GEN(7.5);
-
-      if (IS_HSW_GT3(info->devid)) {
-         dev->gt = 3;
+   if (gen_is_hsw(info->devid)) {
+      /*
+       * From the Haswell PRM, volume 4, page 8:
+       *
+       *     "Description                    GT3      GT2      GT1.5    GT1
+       *      (...)
+       *      EUs (Total)                    40       20       12       10
+       *      Threads (Total)                280      140      84       70
+       *      (...)
+       *      URB Size (max, within L3$)     512KB    256KB    256KB    128KB
+       */
+      dev->gen_opaque = ILO_GEN(7.5);
+      dev->gt = gen_get_hsw_gt(info->devid);
+      if (dev->gt == 3) {
+         dev->eu_count = 40;
+         dev->thread_count = 280;
           dev->urb_size = 512 * 1024;
-      }
-      else if (IS_HSW_GT2(info->devid)) {
-         dev->gt = 2;
+      } else if (dev->gt == 2) {
+         dev->eu_count = 20;
+         dev->thread_count = 140;
           dev->urb_size = 256 * 1024;
-      }
-      else {
-         dev->gt = 1;
+      } else {
+         dev->eu_count = 10;
+         dev->thread_count = 70;
           dev->urb_size = 128 * 1024;
        }
-   }
-   else if (IS_GEN7(info->devid)) {
-      dev->gen = ILO_GEN(7);
-
-      if (IS_IVB_GT2(info->devid)) {
-         dev->gt = 2;
+   } else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) {
+      /*
+       * From the Ivy Bridge PRM, volume 1 part 1, page 18:
+       *
+       *     "Device             # of EUs        #Threads/EU
+       *      Ivy Bridge (GT2)   16              8
+       *      Ivy Bridge (GT1)   6               6"
+       *
+       * From the Ivy Bridge PRM, volume 4 part 2, page 17:
+       *
+       *     "URB Size    URB Rows    URB Rows when SLM Enabled
+       *      128k        4096        2048
+       *      256k        8096        4096"
+       */
+      dev->gen_opaque = ILO_GEN(7);
+      dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1;
+      if (dev->gt == 2) {
+         dev->eu_count = 16;
+         dev->thread_count = 128;
           dev->urb_size = 256 * 1024;
-      }
-      else {
-         dev->gt = 1;
+      } else {
+         dev->eu_count = 6;
+         dev->thread_count = 36;
           dev->urb_size = 128 * 1024;
        }
-   }
-   else if (IS_GEN6(info->devid)) {
-      dev->gen = ILO_GEN(6);
-
-      if (IS_SNB_GT2(info->devid)) {
-         dev->gt = 2;
+   } else if (gen_is_snb(info->devid)) {
+      /*
+       * From the Sandy Bridge PRM, volume 1 part 1, page 22:
+       *
+       *     "Device             # of EUs        #Threads/EU
+       *      SNB GT2            12              5
+       *      SNB GT1            6               4"
+       *
+       * From the Sandy Bridge PRM, volume 4 part 2, page 18:
+       *
+       *     "[DevSNB]: The GT1 product's URB provides 32KB of storage,
+       *      arranged as 1024 256-bit rows. The GT2 product's URB provides
+       *      64KB of storage, arranged as 2048 256-bit rows. A row
+       *      corresponds in size to an EU GRF register. Read/write access to
+       *      the URB is generally supported on a row-granular basis."
+       */
+      dev->gen_opaque = ILO_GEN(6);
+      dev->gt = gen_get_snb_gt(info->devid);
+      if (dev->gt == 2) {
+         dev->eu_count = 12;
+         dev->thread_count = 60;
           dev->urb_size = 64 * 1024;
-      }
-      else {
-         dev->gt = 1;
+      } else {
+         dev->eu_count = 6;
+         dev->thread_count = 24;
           dev->urb_size = 32 * 1024;
        }
-   }
-   else {
+   } else {
        ilo_err("unknown GPU generation\n");
        return false;
     }