gallium: add a cap to determine whether the driver supports offset_clamp
[mesa.git] / src / gallium / drivers / ilo / ilo_screen.c
index 66f2e8e6a0652eb11f99e615fe6280fb0250cf27..5048ba1ac22b23b0e91c31d9d976b38956be078e 100644 (file)
  *    Chia-I Wu <olv@lunarg.com>
  */
 
+#include "pipe/p_state.h"
+#include "os/os_misc.h"
 #include "util/u_format_s3tc.h"
 #include "vl/vl_decoder.h"
 #include "vl/vl_video_buffer.h"
-#include "intel_chipset.h"
-#include "intel_reg.h" /* for TIMESTAMP */
+#include "genhw/genhw.h" /* for GEN6_REG_TIMESTAMP */
 #include "intel_winsys.h"
 
 #include "ilo_context.h"
 #include "ilo_format.h"
 #include "ilo_resource.h"
+#include "ilo_transfer.h" /* for ILO_TRANSFER_MAP_BUFFER_ALIGNMENT */
 #include "ilo_public.h"
 #include "ilo_screen.h"
 
+struct ilo_fence {
+   struct pipe_reference reference;
+   struct intel_bo *bo;
+};
+
 int ilo_debug;
 
 static const struct debug_named_value ilo_debug_flags[] = {
-   { "3d",        ILO_DEBUG_3D,       "Dump 3D commands and states" },
+   { "batch",     ILO_DEBUG_BATCH,    "Dump batch/state/surface/instruction buffers" },
    { "vs",        ILO_DEBUG_VS,       "Dump vertex shaders" },
    { "gs",        ILO_DEBUG_GS,       "Dump geometry shaders" },
    { "fs",        ILO_DEBUG_FS,       "Dump fragment shaders" },
    { "cs",        ILO_DEBUG_CS,       "Dump compute shaders" },
+   { "draw",      ILO_DEBUG_DRAW,     "Show draw information" },
+   { "submit",    ILO_DEBUG_SUBMIT,   "Show batch buffer submissions" },
    { "nohw",      ILO_DEBUG_NOHW,     "Do not send commands to HW" },
    { "nocache",   ILO_DEBUG_NOCACHE,  "Always invalidate HW caches" },
+   { "nohiz",     ILO_DEBUG_NOHIZ,    "Disable HiZ" },
    DEBUG_NAMED_VALUE_END
 };
 
@@ -111,16 +121,15 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
       return UINT_MAX;
    case PIPE_SHADER_CAP_MAX_INPUTS:
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
       /* this is limited by how many attributes SF can remap */
       return 16;
-   case PIPE_SHADER_CAP_MAX_CONSTS:
-      return 1024;
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      return 1024 * sizeof(float[4]);
    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
       return ILO_MAX_CONST_BUFFERS;
    case PIPE_SHADER_CAP_MAX_TEMPS:
       return 256;
-   case PIPE_SHADER_CAP_MAX_ADDRS:
-      return (shader == PIPE_SHADER_FRAGMENT) ? 0 : 1;
    case PIPE_SHADER_CAP_MAX_PREDS:
       return 0;
    case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -132,13 +141,15 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
       return (shader == PIPE_SHADER_FRAGMENT) ? 0 : 1;
    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-      return (shader == PIPE_SHADER_FRAGMENT) ? 0 : 1;
+      return 1;
    case PIPE_SHADER_CAP_SUBROUTINES:
       return 0;
    case PIPE_SHADER_CAP_INTEGERS:
       return 1;
    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
       return ILO_MAX_SAMPLERS;
+   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+      return ILO_MAX_SAMPLER_VIEWS;
    case PIPE_SHADER_CAP_PREFERRED_IR:
       return PIPE_SHADER_IR_TGSI;
    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
@@ -152,11 +163,12 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
 static int
 ilo_get_video_param(struct pipe_screen *screen,
                     enum pipe_video_profile profile,
+                    enum pipe_video_entrypoint entrypoint,
                     enum pipe_video_cap param)
 {
    switch (param) {
    case PIPE_VIDEO_CAP_SUPPORTED:
-      return vl_profile_supported(screen, profile);
+      return vl_profile_supported(screen, profile, entrypoint);
    case PIPE_VIDEO_CAP_NPOT_TEXTURES:
       return 1;
    case PIPE_VIDEO_CAP_MAX_WIDTH:
@@ -170,7 +182,8 @@ ilo_get_video_param(struct pipe_screen *screen,
       return 1;
    case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
       return 0;
-
+   case PIPE_VIDEO_CAP_MAX_LEVEL:
+      return vl_level_supported(screen, profile);
    default:
       return 0;
    }
@@ -181,6 +194,7 @@ ilo_get_compute_param(struct pipe_screen *screen,
                       enum pipe_compute_cap param,
                       void *ret)
 {
+   struct ilo_screen *is = ilo_screen(screen);
    union {
       const char *ir_target;
       uint64_t grid_dimension;
@@ -192,11 +206,13 @@ ilo_get_compute_param(struct pipe_screen *screen,
       uint64_t max_private_size;
       uint64_t max_input_size;
       uint64_t max_mem_alloc_size;
+      uint32_t max_clock_frequency;
+      uint32_t max_compute_units;
+      uint32_t images_supported;
    } val;
    const void *ptr;
    int size;
 
-   /* XXX some randomly chosen values */
    switch (param) {
    case PIPE_COMPUTE_CAP_IR_TARGET:
       val.ir_target = "ilog";
@@ -211,58 +227,79 @@ ilo_get_compute_param(struct pipe_screen *screen,
       size = sizeof(val.grid_dimension);
       break;
    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
-      val.max_grid_size[0] = 65535;
-      val.max_grid_size[1] = 65535;
-      val.max_grid_size[2] = 1;
+      val.max_grid_size[0] = 0xffffffffu;
+      val.max_grid_size[1] = 0xffffffffu;
+      val.max_grid_size[2] = 0xffffffffu;
 
       ptr = &val.max_grid_size;
       size = sizeof(val.max_grid_size);
       break;
    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
-      val.max_block_size[0] = 512;
-      val.max_block_size[1] = 512;
-      val.max_block_size[2] = 512;
+      val.max_block_size[0] = 1024;
+      val.max_block_size[1] = 1024;
+      val.max_block_size[2] = 1024;
 
       ptr = &val.max_block_size;
       size = sizeof(val.max_block_size);
       break;
 
    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
-      val.max_threads_per_block = 512;
+      val.max_threads_per_block = 1024;
 
       ptr = &val.max_threads_per_block;
       size = sizeof(val.max_threads_per_block);
       break;
    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
-      val.max_global_size = 4;
+      /* \see ilo_max_resource_size */
+      val.max_global_size = 1u << 31;
 
       ptr = &val.max_global_size;
       size = sizeof(val.max_global_size);
       break;
    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+      /* Shared Local Memory Size of INTERFACE_DESCRIPTOR_DATA */
       val.max_local_size = 64 * 1024;
 
       ptr = &val.max_local_size;
       size = sizeof(val.max_local_size);
       break;
    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
-      val.max_private_size = 32768;
+      /* scratch size */
+      val.max_private_size = 12 * 1024;
 
       ptr = &val.max_private_size;
       size = sizeof(val.max_private_size);
       break;
    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
-      val.max_input_size = 256;
+      val.max_input_size = 1024;
 
       ptr = &val.max_input_size;
       size = sizeof(val.max_input_size);
       break;
    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
-      val.max_mem_alloc_size = 128 * 1024 * 1024;
+      val.max_mem_alloc_size = 1u << 31;
 
       ptr = &val.max_mem_alloc_size;
       size = sizeof(val.max_mem_alloc_size);
       break;
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+      val.max_clock_frequency = 1000;
+
+      ptr = &val.max_clock_frequency;
+      size = sizeof(val.max_clock_frequency);
+      break;
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+      val.max_compute_units = is->dev.eu_count;
+
+      ptr = &val.max_compute_units;
+      size = sizeof(val.max_compute_units);
+      break;
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+      val.images_supported = 1;
+
+      ptr = &val.images_supported;
+      size = sizeof(val.images_supported);
+      break;
    default:
       ptr = NULL;
       size = 0;
@@ -303,40 +340,28 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
        *           Max WxHxD for 2D and CUBE     Max WxHxD for 3D
        *  GEN6           8192x8192x512            2048x2048x2048
        *  GEN7         16384x16384x2048           2048x2048x2048
-       *
-       * However, when the texutre size is large, things become unstable.  We
-       * require the maximum texture size to be 2^30 bytes in
-       * screen->can_create_resource().  Since the maximum pixel size is 2^4
-       * bytes (PIPE_FORMAT_R32G32B32A32_FLOAT), textures should not have more
-       * than 2^26 pixels.
-       *
-       * For 3D textures, we have to set the maximum number of levels to 9,
-       * which has at most 2^24 pixels.  For 2D textures, we set it to 14,
-       * which has at most 2^26 pixels.
        */
-      return 14;
+      return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 15 : 14;
    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-      return 9;
+      return 12;
    case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
-      return 14;
+      return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 15 : 14;
    case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
       return false;
    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
    case PIPE_CAP_SM3:
       return true;
    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
-      if (is->dev.gen >= ILO_GEN(7) && !is->dev.has_gen7_sol_reset)
+      if (ilo_dev_gen(&is->dev) >= ILO_GEN(7) && !is->dev.has_gen7_sol_reset)
          return 0;
       return ILO_MAX_SO_BUFFERS;
    case PIPE_CAP_PRIMITIVE_RESTART:
       return true;
-   case PIPE_CAP_MAX_COMBINED_SAMPLERS:
-      return ILO_MAX_SAMPLERS * 2;
    case PIPE_CAP_INDEP_BLEND_ENABLE:
    case PIPE_CAP_INDEP_BLEND_FUNC:
       return true;
    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-      return (is->dev.gen >= ILO_GEN(7)) ? 2048 : 512;
+      return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 2048 : 512;
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
@@ -347,17 +372,18 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
       return false;
    case PIPE_CAP_TGSI_INSTANCEID:
    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
-      return false; /* TODO */
+      return true;
    case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
       return false;
    case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
       return true;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
-   case PIPE_CAP_SCALED_RESOLVE:
       return true;
+   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
    case PIPE_CAP_MIN_TEXEL_OFFSET:
       return -8;
+   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
    case PIPE_CAP_MAX_TEXEL_OFFSET:
       return 7;
    case PIPE_CAP_CONDITIONAL_RENDER:
@@ -368,11 +394,10 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
       return ILO_MAX_SO_BINDINGS;
    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
-      if (is->dev.gen >= ILO_GEN(7))
+      if (ilo_dev_gen(&is->dev) >= ILO_GEN(7))
          return is->dev.has_gen7_sol_reset;
       else
          return false; /* TODO */
-   case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS:
    case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
       return false;
    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
@@ -380,7 +405,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_VERTEX_COLOR_CLAMPED:
       return false;
    case PIPE_CAP_GLSL_FEATURE_LEVEL:
-      return 130;
+      return 140;
    case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
    case PIPE_CAP_USER_VERTEX_BUFFERS:
       return false;
@@ -388,34 +413,92 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
    case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
       return false;
+   case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+      return 2048;
    case PIPE_CAP_COMPUTE:
       return false; /* TODO */
    case PIPE_CAP_USER_INDEX_BUFFERS:
-      return false;
    case PIPE_CAP_USER_CONSTANT_BUFFERS:
-      return false; /* TODO push constants */
+      return true;
    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+      /* imposed by OWord (Dual) Block Read */
       return 16;
    case PIPE_CAP_START_INSTANCE:
-   case PIPE_CAP_QUERY_TIMESTAMP:
       return true;
+   case PIPE_CAP_QUERY_TIMESTAMP:
+      return is->dev.has_timestamp;
    case PIPE_CAP_TEXTURE_MULTISAMPLE:
       return false; /* TODO */
    case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
-      return 0; /* TODO */
+      return ILO_TRANSFER_MAP_BUFFER_ALIGNMENT;
    case PIPE_CAP_CUBE_MAP_ARRAY:
    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
-      return false; /* TODO */
+      return true;
    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
-      return 0; /* TODO */
+      return 1;
    case PIPE_CAP_TGSI_TEXCOORD:
       return false;
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
-      return true;
    case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
-      return false; /* TODO */
+      return true;
    case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
       return 0;
+   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+      /* a GEN6_SURFTYPE_BUFFER can have up to 2^27 elements */
+      return 1 << 27;
+   case PIPE_CAP_MAX_VIEWPORTS:
+      return ILO_MAX_VIEWPORTS;
+   case PIPE_CAP_ENDIANNESS:
+      return PIPE_ENDIAN_LITTLE;
+   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+      return true;
+   case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+   case PIPE_CAP_TEXTURE_GATHER_SM5:
+      return 0;
+   case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+      return true;
+   case PIPE_CAP_FAKE_SW_MSAA:
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
+   case PIPE_CAP_SAMPLE_SHADING:
+   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+   case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+   case PIPE_CAP_MAX_VERTEX_STREAMS:
+   case PIPE_CAP_DRAW_INDIRECT:
+   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+   case PIPE_CAP_SAMPLER_VIEW_TARGET:
+      return 0;
+
+   case PIPE_CAP_VENDOR_ID:
+      return 0x8086;
+   case PIPE_CAP_DEVICE_ID:
+      return is->dev.devid;
+   case PIPE_CAP_ACCELERATED:
+      return true;
+   case PIPE_CAP_VIDEO_MEMORY: {
+      /* Once a batch uses more than 75% of the maximum mappable size, we
+       * assume that there's some fragmentation, and we start doing extra
+       * flushing, etc.  That's the big cliff apps will care about.
+       */
+      const uint64_t gpu_memory = is->dev.aperture_total * 3 / 4;
+      uint64_t system_memory;
+
+      if (!os_get_total_physical_memory(&system_memory))
+         return 0;
+
+      return (int) (MIN2(gpu_memory, system_memory) >> 20);
+   }
+   case PIPE_CAP_UMA:
+      return true;
+   case PIPE_CAP_CLIP_HALFZ:
+      return true;
+   case PIPE_CAP_VERTEXID_NOBASE:
+      return false;
+   case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+      return true;
 
    default:
       return 0;
@@ -432,88 +515,38 @@ static const char *
 ilo_get_name(struct pipe_screen *screen)
 {
    struct ilo_screen *is = ilo_screen(screen);
-   const char *chipset;
-
-   /* stolen from classic i965 */
-   switch (is->dev.devid) {
-   case PCI_CHIP_SANDYBRIDGE_GT1:
-   case PCI_CHIP_SANDYBRIDGE_GT2:
-   case PCI_CHIP_SANDYBRIDGE_GT2_PLUS:
-      chipset = "Intel(R) Sandybridge Desktop";
-      break;
-   case PCI_CHIP_SANDYBRIDGE_M_GT1:
-   case PCI_CHIP_SANDYBRIDGE_M_GT2:
-   case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS:
-      chipset = "Intel(R) Sandybridge Mobile";
-      break;
-   case PCI_CHIP_SANDYBRIDGE_S:
-      chipset = "Intel(R) Sandybridge Server";
-      break;
-   case PCI_CHIP_IVYBRIDGE_GT1:
-   case PCI_CHIP_IVYBRIDGE_GT2:
-      chipset = "Intel(R) Ivybridge Desktop";
-      break;
-   case PCI_CHIP_IVYBRIDGE_M_GT1:
-   case PCI_CHIP_IVYBRIDGE_M_GT2:
-      chipset = "Intel(R) Ivybridge Mobile";
-      break;
-   case PCI_CHIP_IVYBRIDGE_S_GT1:
-   case PCI_CHIP_IVYBRIDGE_S_GT2:
-      chipset = "Intel(R) Ivybridge Server";
-      break;
-   case PCI_CHIP_BAYTRAIL_M_1:
-   case PCI_CHIP_BAYTRAIL_M_2:
-   case PCI_CHIP_BAYTRAIL_M_3:
-   case PCI_CHIP_BAYTRAIL_M_4:
-   case PCI_CHIP_BAYTRAIL_D:
+   const char *chipset = NULL;
+
+   if (gen_is_vlv(is->dev.devid)) {
       chipset = "Intel(R) Bay Trail";
-      break;
-   case PCI_CHIP_HASWELL_GT1:
-   case PCI_CHIP_HASWELL_GT2:
-   case PCI_CHIP_HASWELL_GT3:
-   case PCI_CHIP_HASWELL_SDV_GT1:
-   case PCI_CHIP_HASWELL_SDV_GT2:
-   case PCI_CHIP_HASWELL_SDV_GT3:
-   case PCI_CHIP_HASWELL_ULT_GT1:
-   case PCI_CHIP_HASWELL_ULT_GT2:
-   case PCI_CHIP_HASWELL_ULT_GT3:
-   case PCI_CHIP_HASWELL_CRW_GT1:
-   case PCI_CHIP_HASWELL_CRW_GT2:
-   case PCI_CHIP_HASWELL_CRW_GT3:
-      chipset = "Intel(R) Haswell Desktop";
-      break;
-   case PCI_CHIP_HASWELL_M_GT1:
-   case PCI_CHIP_HASWELL_M_GT2:
-   case PCI_CHIP_HASWELL_M_GT3:
-   case PCI_CHIP_HASWELL_SDV_M_GT1:
-   case PCI_CHIP_HASWELL_SDV_M_GT2:
-   case PCI_CHIP_HASWELL_SDV_M_GT3:
-   case PCI_CHIP_HASWELL_ULT_M_GT1:
-   case PCI_CHIP_HASWELL_ULT_M_GT2:
-   case PCI_CHIP_HASWELL_ULT_M_GT3:
-   case PCI_CHIP_HASWELL_CRW_M_GT1:
-   case PCI_CHIP_HASWELL_CRW_M_GT2:
-   case PCI_CHIP_HASWELL_CRW_M_GT3:
-      chipset = "Intel(R) Haswell Mobile";
-      break;
-   case PCI_CHIP_HASWELL_S_GT1:
-   case PCI_CHIP_HASWELL_S_GT2:
-   case PCI_CHIP_HASWELL_S_GT3:
-   case PCI_CHIP_HASWELL_SDV_S_GT1:
-   case PCI_CHIP_HASWELL_SDV_S_GT2:
-   case PCI_CHIP_HASWELL_SDV_S_GT3:
-   case PCI_CHIP_HASWELL_ULT_S_GT1:
-   case PCI_CHIP_HASWELL_ULT_S_GT2:
-   case PCI_CHIP_HASWELL_ULT_S_GT3:
-   case PCI_CHIP_HASWELL_CRW_S_GT1:
-   case PCI_CHIP_HASWELL_CRW_S_GT2:
-   case PCI_CHIP_HASWELL_CRW_S_GT3:
-      chipset = "Intel(R) Haswell Server";
-      break;
-   default:
-      chipset = "Unknown Intel Chipset";
-      break;
    }
+   else if (gen_is_hsw(is->dev.devid)) {
+      if (gen_is_desktop(is->dev.devid))
+         chipset = "Intel(R) Haswell Desktop";
+      else if (gen_is_mobile(is->dev.devid))
+         chipset = "Intel(R) Haswell Mobile";
+      else if (gen_is_server(is->dev.devid))
+         chipset = "Intel(R) Haswell Server";
+   }
+   else if (gen_is_ivb(is->dev.devid)) {
+      if (gen_is_desktop(is->dev.devid))
+         chipset = "Intel(R) Ivybridge Desktop";
+      else if (gen_is_mobile(is->dev.devid))
+         chipset = "Intel(R) Ivybridge Mobile";
+      else if (gen_is_server(is->dev.devid))
+         chipset = "Intel(R) Ivybridge Server";
+   }
+   else if (gen_is_snb(is->dev.devid)) {
+      if (gen_is_desktop(is->dev.devid))
+         chipset = "Intel(R) Sandybridge Desktop";
+      else if (gen_is_mobile(is->dev.devid))
+         chipset = "Intel(R) Sandybridge Mobile";
+      else if (gen_is_server(is->dev.devid))
+         chipset = "Intel(R) Sandybridge Server";
+   }
+
+   if (!chipset)
+      chipset = "Unknown Intel Chipset";
 
    return chipset;
 }
@@ -527,7 +560,7 @@ ilo_get_timestamp(struct pipe_screen *screen)
       uint32_t dw[2];
    } timestamp;
 
-   is->winsys->read_reg(is->winsys, TIMESTAMP, &timestamp.val);
+   intel_winsys_read_reg(is->winsys, GEN6_REG_TIMESTAMP, &timestamp.val);
 
    /*
     * From the Ivy Bridge PRM, volume 1 part 3, page 107:
@@ -551,26 +584,23 @@ ilo_fence_reference(struct pipe_screen *screen,
                     struct pipe_fence_handle **p,
                     struct pipe_fence_handle *f)
 {
-   struct ilo_fence **ptr = (struct ilo_fence **) p;
    struct ilo_fence *fence = ilo_fence(f);
+   struct ilo_fence *old;
 
-   if (!ptr) {
-      /* still need to reference fence */
-      if (fence)
-         pipe_reference(NULL, &fence->reference);
-      return;
+   if (likely(p)) {
+      old = ilo_fence(*p);
+      *p = f;
+   }
+   else {
+      old = NULL;
    }
 
-   /* reference fence and dereference the one pointed to by ptr */
-   if (*ptr && pipe_reference(&(*ptr)->reference, &fence->reference)) {
-      struct ilo_fence *old = *ptr;
-
+   STATIC_ASSERT(&((struct ilo_fence *) NULL)->reference == NULL);
+   if (pipe_reference(&old->reference, &fence->reference)) {
       if (old->bo)
-         old->bo->unreference(old->bo);
+         intel_bo_unreference(old->bo);
       FREE(old);
    }
-
-   *ptr = fence;
 }
 
 static boolean
@@ -581,7 +611,7 @@ ilo_fence_signalled(struct pipe_screen *screen,
 
    /* mark signalled if the bo is idle */
    if (fence->bo && !intel_bo_is_busy(fence->bo)) {
-      fence->bo->unreference(fence->bo);
+      intel_bo_unreference(fence->bo);
       fence->bo = NULL;
    }
 
@@ -601,23 +631,45 @@ ilo_fence_finish(struct pipe_screen *screen,
       return true;
 
    /* wait and see if it returns error */
-   if (fence->bo->wait(fence->bo, wait_timeout))
+   if (intel_bo_wait(fence->bo, wait_timeout))
       return false;
 
    /* mark signalled */
-   fence->bo->unreference(fence->bo);
+   intel_bo_unreference(fence->bo);
    fence->bo = NULL;
 
    return true;
 }
 
+/**
+ * Create a fence for \p bo.  When \p bo is not NULL, it must be submitted
+ * before waited on or checked.
+ */
+struct ilo_fence *
+ilo_fence_create(struct pipe_screen *screen, struct intel_bo *bo)
+{
+   struct ilo_fence *fence;
+
+   fence = CALLOC_STRUCT(ilo_fence);
+   if (!fence)
+      return NULL;
+
+   pipe_reference_init(&fence->reference, 1);
+
+   if (bo)
+      intel_bo_reference(bo);
+   fence->bo = bo;
+
+   return fence;
+}
+
 static void
 ilo_screen_destroy(struct pipe_screen *screen)
 {
    struct ilo_screen *is = ilo_screen(screen);
 
    /* as it seems, winsys is owned by the screen */
-   is->winsys->destroy(is->winsys);
+   intel_winsys_destroy(is->winsys);
 
    FREE(is);
 }
@@ -626,67 +678,117 @@ static bool
 init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info)
 {
    dev->devid = info->devid;
+   dev->aperture_total = info->aperture_total;
+   dev->aperture_mappable = info->aperture_mappable;
    dev->has_llc = info->has_llc;
-   dev->has_gen7_sol_reset = info->has_gen7_sol_reset;
    dev->has_address_swizzling = info->has_address_swizzling;
+   dev->has_logical_context = info->has_logical_context;
+   dev->has_ppgtt = info->has_ppgtt;
+   dev->has_timestamp = info->has_timestamp;
+   dev->has_gen7_sol_reset = info->has_gen7_sol_reset;
+
+   if (!dev->has_logical_context) {
+      ilo_err("missing hardware logical context support\n");
+      return false;
+   }
 
    /*
-    * From the Sandy Bridge PRM, volume 4 part 2, page 18:
+    * PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT
+    * writes on GEN6.
     *
-    *     "[DevSNB]: The GT1 product's URB provides 32KB of storage, arranged
-    *      as 1024 256-bit rows. The GT2 product's URB provides 64KB of
-    *      storage, arranged as 2048 256-bit rows. A row corresponds in size
-    *      to an EU GRF register. Read/write access to the URB is generally
-    *      supported on a row-granular basis."
+    * From the Sandy Bridge PRM, volume 1 part 3, page 101:
     *
-    * From the Ivy Bridge PRM, volume 4 part 2, page 17:
+    *     "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all
+    *      code is in a secure environment, independent of address space.
+    *      Under this condition, this bit only specifies the address space
+    *      (GGTT or PPGTT). All commands are executed "as-is""
     *
-    *     "URB Size    URB Rows    URB Rows when SLM Enabled
-    *      128k        4096        2048
-    *      256k        8096        4096"
+    * We need PPGTT to be enabled on GEN6 too.
     */
+   if (!dev->has_ppgtt) {
+      /* experiments show that it does not really matter... */
+      ilo_warn("PPGTT disabled\n");
+   }
 
-   if (IS_HASWELL(info->devid)) {
-      dev->gen = ILO_GEN(7.5);
-
-      if (IS_HSW_GT3(info->devid)) {
-         dev->gt = 3;
+   if (gen_is_hsw(info->devid)) {
+      /*
+       * From the Haswell PRM, volume 4, page 8:
+       *
+       *     "Description                    GT3      GT2      GT1.5    GT1
+       *      (...)
+       *      EUs (Total)                    40       20       12       10
+       *      Threads (Total)                280      140      84       70
+       *      (...)
+       *      URB Size (max, within L3$)     512KB    256KB    256KB    128KB
+       */
+      dev->gen_opaque = ILO_GEN(7.5);
+      dev->gt = gen_get_hsw_gt(info->devid);
+      if (dev->gt == 3) {
+         dev->eu_count = 40;
+         dev->thread_count = 280;
          dev->urb_size = 512 * 1024;
-      }
-      else if (IS_HSW_GT2(info->devid)) {
-         dev->gt = 2;
+      } else if (dev->gt == 2) {
+         dev->eu_count = 20;
+         dev->thread_count = 140;
          dev->urb_size = 256 * 1024;
-      }
-      else {
-         dev->gt = 1;
+      } else {
+         dev->eu_count = 10;
+         dev->thread_count = 70;
          dev->urb_size = 128 * 1024;
       }
-   }
-   else if (IS_GEN7(info->devid)) {
-      dev->gen = ILO_GEN(7);
-
-      if (IS_IVB_GT2(info->devid)) {
-         dev->gt = 2;
+   } else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) {
+      /*
+       * From the Ivy Bridge PRM, volume 1 part 1, page 18:
+       *
+       *     "Device             # of EUs        #Threads/EU
+       *      Ivy Bridge (GT2)   16              8
+       *      Ivy Bridge (GT1)   6               6"
+       *
+       * From the Ivy Bridge PRM, volume 4 part 2, page 17:
+       *
+       *     "URB Size    URB Rows    URB Rows when SLM Enabled
+       *      128k        4096        2048
+       *      256k        8096        4096"
+       */
+      dev->gen_opaque = ILO_GEN(7);
+      dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1;
+      if (dev->gt == 2) {
+         dev->eu_count = 16;
+         dev->thread_count = 128;
          dev->urb_size = 256 * 1024;
-      }
-      else {
-         dev->gt = 1;
+      } else {
+         dev->eu_count = 6;
+         dev->thread_count = 36;
          dev->urb_size = 128 * 1024;
       }
-   }
-   else if (IS_GEN6(info->devid)) {
-      dev->gen = ILO_GEN(6);
-
-      if (IS_SNB_GT2(info->devid)) {
-         dev->gt = 2;
+   } else if (gen_is_snb(info->devid)) {
+      /*
+       * From the Sandy Bridge PRM, volume 1 part 1, page 22:
+       *
+       *     "Device             # of EUs        #Threads/EU
+       *      SNB GT2            12              5
+       *      SNB GT1            6               4"
+       *
+       * From the Sandy Bridge PRM, volume 4 part 2, page 18:
+       *
+       *     "[DevSNB]: The GT1 product's URB provides 32KB of storage,
+       *      arranged as 1024 256-bit rows. The GT2 product's URB provides
+       *      64KB of storage, arranged as 2048 256-bit rows. A row
+       *      corresponds in size to an EU GRF register. Read/write access to
+       *      the URB is generally supported on a row-granular basis."
+       */
+      dev->gen_opaque = ILO_GEN(6);
+      dev->gt = gen_get_snb_gt(info->devid);
+      if (dev->gt == 2) {
+         dev->eu_count = 12;
+         dev->thread_count = 60;
          dev->urb_size = 64 * 1024;
-      }
-      else {
-         dev->gt = 1;
+      } else {
+         dev->eu_count = 6;
+         dev->thread_count = 24;
          dev->urb_size = 32 * 1024;
       }
-   }
-   else {
+   } else {
       ilo_err("unknown GPU generation\n");
       return false;
    }
@@ -708,7 +810,7 @@ ilo_screen_create(struct intel_winsys *ws)
 
    is->winsys = ws;
 
-   info = is->winsys->get_info(is->winsys);
+   info = intel_winsys_get_info(is->winsys);
    if (!init_dev(&is->dev, info)) {
       FREE(is);
       return NULL;