gallium: add PIPE_CAP_TGSI_FS_FBFETCH
[mesa.git] / src / gallium / drivers / ilo / ilo_screen.c
index 3be134863923d6f5d5f60aa647af9de05c4166f9..20a4ee31e45ac39f62bb52e57f09a6c80a0ea58a 100644 (file)
@@ -40,9 +40,9 @@
 #include "ilo_public.h"
 #include "ilo_screen.h"
 
-struct ilo_fence {
+struct pipe_fence_handle {
    struct pipe_reference reference;
-   struct intel_bo *bo;
+   struct intel_bo *seqno_bo;
 };
 
 static float
@@ -136,8 +136,12 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
       return ILO_MAX_SAMPLER_VIEWS;
    case PIPE_SHADER_CAP_PREFERRED_IR:
       return PIPE_SHADER_IR_TGSI;
+   case PIPE_SHADER_CAP_SUPPORTED_IRS:
+      return 0;
    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
       return 1;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
 
    default:
       return 0;
@@ -175,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen,
 
 static int
 ilo_get_compute_param(struct pipe_screen *screen,
+                      enum pipe_shader_ir ir_type,
                       enum pipe_compute_cap param,
                       void *ret)
 {
@@ -193,6 +198,8 @@ ilo_get_compute_param(struct pipe_screen *screen,
       uint32_t max_clock_frequency;
       uint32_t max_compute_units;
       uint32_t images_supported;
+      uint32_t subgroup_size;
+      uint32_t address_bits;
    } val;
    const void *ptr;
    int size;
@@ -205,7 +212,7 @@ ilo_get_compute_param(struct pipe_screen *screen,
       size = strlen(val.ir_target) + 1;
       break;
    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
-      val.grid_dimension = Elements(val.max_grid_size);
+      val.grid_dimension = ARRAY_SIZE(val.max_grid_size);
 
       ptr = &val.grid_dimension;
       size = sizeof(val.grid_dimension);
@@ -260,6 +267,11 @@ ilo_get_compute_param(struct pipe_screen *screen,
       ptr = &val.max_input_size;
       size = sizeof(val.max_input_size);
       break;
+   case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+      val.address_bits = 32;
+      ptr = &val.address_bits;
+      size = sizeof(val.address_bits);
+      break;
    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
       val.max_mem_alloc_size = 1u << 31;
 
@@ -284,6 +296,15 @@ ilo_get_compute_param(struct pipe_screen *screen,
       ptr = &val.images_supported;
       size = sizeof(val.images_supported);
       break;
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+      /* best case is actually SIMD32 */
+      val.subgroup_size = 16;
+
+      ptr = &val.subgroup_size;
+      size = sizeof(val.subgroup_size);
+      break;
+   case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+      /* fallthrough */
    default:
       ptr = NULL;
       size = 0;
@@ -345,7 +366,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_INDEP_BLEND_FUNC:
       return true;
    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-      return (ilo_dev_gen(&is->dev) >= ILO_GEN(7)) ? 2048 : 512;
+      return (ilo_dev_gen(&is->dev) >= ILO_GEN(7.5)) ? 2048 : 512;
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
    case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
    case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
@@ -378,6 +399,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
       return ILO_MAX_SO_BINDINGS;
    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
       if (ilo_dev_gen(&is->dev) >= ILO_GEN(7))
          return is->dev.has_gen7_sol_reset;
       else
@@ -418,6 +440,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_CUBE_MAP_ARRAY:
    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
       return true;
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+      return 0;
    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
       return 1;
    case PIPE_CAP_TGSI_TEXCOORD:
@@ -435,6 +459,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_ENDIANNESS:
       return PIPE_ENDIAN_LITTLE;
    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+   case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
       return true;
    case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
    case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
@@ -443,6 +468,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TEXTURE_GATHER_SM5:
       return 0;
    case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
       return true;
    case PIPE_CAP_FAKE_SW_MSAA:
    case PIPE_CAP_TEXTURE_QUERY_LOD:
@@ -451,11 +478,49 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
    case PIPE_CAP_MAX_VERTEX_STREAMS:
    case PIPE_CAP_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
    case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
    case PIPE_CAP_SAMPLER_VIEW_TARGET:
    case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+   case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+   case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_CLEAR_TEXTURE:
+   case PIPE_CAP_DRAW_PARAMETERS:
+   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+   case PIPE_CAP_INVALIDATE_BUFFER:
+   case PIPE_CAP_GENERATE_MIPMAP:
+   case PIPE_CAP_STRING_MARKER:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+   case PIPE_CAP_QUERY_BUFFER_OBJECT:
+   case PIPE_CAP_QUERY_MEMORY_INFO:
+   case PIPE_CAP_PCI_GROUP:
+   case PIPE_CAP_PCI_BUS:
+   case PIPE_CAP_PCI_DEVICE:
+   case PIPE_CAP_PCI_FUNCTION:
+   case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+   case PIPE_CAP_CULL_DISTANCE:
+   case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+   case PIPE_CAP_TGSI_VOTE:
+   case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+   case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+   case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
+   case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+   case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
+   case PIPE_CAP_NATIVE_FENCE_FD:
+   case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+   case PIPE_CAP_TGSI_FS_FBFETCH:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -559,7 +624,7 @@ ilo_get_timestamp(struct pipe_screen *screen)
       uint32_t dw[2];
    } timestamp;
 
-   intel_winsys_read_reg(is->winsys, GEN6_REG_TIMESTAMP, &timestamp.val);
+   intel_winsys_read_reg(is->dev.winsys, GEN6_REG_TIMESTAMP, &timestamp.val);
 
    /*
     * From the Ivy Bridge PRM, volume 1 part 3, page 107:
@@ -578,82 +643,109 @@ ilo_get_timestamp(struct pipe_screen *screen)
    return (uint64_t) timestamp.dw[1] * 80;
 }
 
-static void
-ilo_fence_reference(struct pipe_screen *screen,
-                    struct pipe_fence_handle **p,
-                    struct pipe_fence_handle *f)
+static boolean
+ilo_is_format_supported(struct pipe_screen *screen,
+                        enum pipe_format format,
+                        enum pipe_texture_target target,
+                        unsigned sample_count,
+                        unsigned bindings)
 {
-   struct ilo_fence *fence = ilo_fence(f);
-   struct ilo_fence *old;
+   struct ilo_screen *is = ilo_screen(screen);
+   const struct ilo_dev *dev = &is->dev;
 
-   if (likely(p)) {
-      old = ilo_fence(*p);
-      *p = f;
-   } else {
-      old = NULL;
-   }
+   if (!util_format_is_supported(format, bindings))
+      return false;
 
-   STATIC_ASSERT(&((struct ilo_fence *) NULL)->reference == NULL);
-   if (pipe_reference(&old->reference, &fence->reference)) {
-      intel_bo_unref(old->bo);
-      FREE(old);
-   }
+   /* no MSAA support yet */
+   if (sample_count > 1)
+      return false;
+
+   if ((bindings & PIPE_BIND_DEPTH_STENCIL) &&
+       !ilo_format_support_zs(dev, format))
+      return false;
+
+   if ((bindings & PIPE_BIND_RENDER_TARGET) &&
+       !ilo_format_support_rt(dev, format))
+      return false;
+
+   if ((bindings & PIPE_BIND_SAMPLER_VIEW) &&
+       !ilo_format_support_sampler(dev, format))
+      return false;
+
+   if ((bindings & PIPE_BIND_VERTEX_BUFFER) &&
+       !ilo_format_support_vb(dev, format))
+      return false;
+
+   return true;
 }
 
 static boolean
-ilo_fence_signalled(struct pipe_screen *screen,
-                    struct pipe_fence_handle *f)
+ilo_is_video_format_supported(struct pipe_screen *screen,
+                              enum pipe_format format,
+                              enum pipe_video_profile profile,
+                              enum pipe_video_entrypoint entrypoint)
+{
+   return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
+}
+
+static void
+ilo_screen_fence_reference(struct pipe_screen *screen,
+                           struct pipe_fence_handle **ptr,
+                           struct pipe_fence_handle *fence)
 {
-   struct ilo_fence *fence = ilo_fence(f);
+   struct pipe_fence_handle *old;
 
-   /* mark signalled if the bo is idle */
-   if (fence->bo && !intel_bo_is_busy(fence->bo)) {
-      intel_bo_unref(fence->bo);
-      fence->bo = NULL;
+   if (likely(ptr)) {
+      old = *ptr;
+      *ptr = fence;
+   } else {
+      old = NULL;
    }
 
-   return (fence->bo == NULL);
+   STATIC_ASSERT(&((struct pipe_fence_handle *) NULL)->reference == NULL);
+   if (pipe_reference(&old->reference, &fence->reference)) {
+      intel_bo_unref(old->seqno_bo);
+      FREE(old);
+   }
 }
 
 static boolean
-ilo_fence_finish(struct pipe_screen *screen,
-                 struct pipe_fence_handle *f,
-                 uint64_t timeout)
+ilo_screen_fence_finish(struct pipe_screen *screen,
+                        struct pipe_context *ctx,
+                        struct pipe_fence_handle *fence,
+                        uint64_t timeout)
 {
-   struct ilo_fence *fence = ilo_fence(f);
    const int64_t wait_timeout = (timeout > INT64_MAX) ? -1 : timeout;
+   bool signaled;
 
-   /* already signalled */
-   if (!fence->bo)
-      return true;
+   signaled = (!fence->seqno_bo ||
+         intel_bo_wait(fence->seqno_bo, wait_timeout) == 0);
 
-   /* wait and see if it returns error */
-   if (intel_bo_wait(fence->bo, wait_timeout))
-      return false;
-
-   /* mark signalled */
-   intel_bo_unref(fence->bo);
-   fence->bo = NULL;
+   /* XXX not thread safe */
+   if (signaled && fence->seqno_bo) {
+      intel_bo_unref(fence->seqno_bo);
+      fence->seqno_bo = NULL;
+   }
 
-   return true;
+   return signaled;
 }
 
 /**
  * Create a fence for \p bo.  When \p bo is not NULL, it must be submitted
  * before waited on or checked.
  */
-struct ilo_fence *
-ilo_fence_create(struct pipe_screen *screen, struct intel_bo *bo)
+struct pipe_fence_handle *
+ilo_screen_fence_create(struct pipe_screen *screen, struct intel_bo *bo)
 {
-   struct ilo_fence *fence;
+   struct pipe_fence_handle *fence;
 
-   fence = CALLOC_STRUCT(ilo_fence);
+   fence = CALLOC_STRUCT(pipe_fence_handle);
    if (!fence)
       return NULL;
 
    pipe_reference_init(&fence->reference, 1);
 
-   fence->bo = intel_bo_ref(bo);
+   fence->seqno_bo = intel_bo_ref(bo);
 
    return fence;
 }
@@ -663,156 +755,15 @@ ilo_screen_destroy(struct pipe_screen *screen)
 {
    struct ilo_screen *is = ilo_screen(screen);
 
-   /* as it seems, winsys is owned by the screen */
-   intel_winsys_destroy(is->winsys);
+   intel_winsys_destroy(is->dev.winsys);
 
    FREE(is);
 }
 
-static bool
-init_dev(struct ilo_dev_info *dev, const struct intel_winsys_info *info)
-{
-   dev->devid = info->devid;
-   dev->aperture_total = info->aperture_total;
-   dev->aperture_mappable = info->aperture_mappable;
-   dev->has_llc = info->has_llc;
-   dev->has_address_swizzling = info->has_address_swizzling;
-   dev->has_logical_context = info->has_logical_context;
-   dev->has_ppgtt = info->has_ppgtt;
-   dev->has_timestamp = info->has_timestamp;
-   dev->has_gen7_sol_reset = info->has_gen7_sol_reset;
-
-   if (!dev->has_logical_context) {
-      ilo_err("missing hardware logical context support\n");
-      return false;
-   }
-
-   /*
-    * PIPE_CONTROL and MI_* use PPGTT writes on GEN7+ and privileged GGTT
-    * writes on GEN6.
-    *
-    * From the Sandy Bridge PRM, volume 1 part 3, page 101:
-    *
-    *     "[DevSNB] When Per-Process GTT Enable is set, it is assumed that all
-    *      code is in a secure environment, independent of address space.
-    *      Under this condition, this bit only specifies the address space
-    *      (GGTT or PPGTT). All commands are executed "as-is""
-    *
-    * We need PPGTT to be enabled on GEN6 too.
-    */
-   if (!dev->has_ppgtt) {
-      /* experiments show that it does not really matter... */
-      ilo_warn("PPGTT disabled\n");
-   }
-
-   if (gen_is_bdw(info->devid) || gen_is_chv(info->devid)) {
-      dev->gen_opaque = ILO_GEN(8);
-      dev->gt = (gen_is_bdw(info->devid)) ? gen_get_bdw_gt(info->devid) : 1;
-      /* XXX random values */
-      if (dev->gt == 3) {
-         dev->eu_count = 48;
-         dev->thread_count = 336;
-         dev->urb_size = 384 * 1024;
-      } else if (dev->gt == 2) {
-         dev->eu_count = 24;
-         dev->thread_count = 168;
-         dev->urb_size = 384 * 1024;
-      } else {
-         dev->eu_count = 12;
-         dev->thread_count = 84;
-         dev->urb_size = 192 * 1024;
-      }
-   } else if (gen_is_hsw(info->devid)) {
-      /*
-       * From the Haswell PRM, volume 4, page 8:
-       *
-       *     "Description                    GT3      GT2      GT1.5    GT1
-       *      (...)
-       *      EUs (Total)                    40       20       12       10
-       *      Threads (Total)                280      140      84       70
-       *      (...)
-       *      URB Size (max, within L3$)     512KB    256KB    256KB    128KB
-       */
-      dev->gen_opaque = ILO_GEN(7.5);
-      dev->gt = gen_get_hsw_gt(info->devid);
-      if (dev->gt == 3) {
-         dev->eu_count = 40;
-         dev->thread_count = 280;
-         dev->urb_size = 512 * 1024;
-      } else if (dev->gt == 2) {
-         dev->eu_count = 20;
-         dev->thread_count = 140;
-         dev->urb_size = 256 * 1024;
-      } else {
-         dev->eu_count = 10;
-         dev->thread_count = 70;
-         dev->urb_size = 128 * 1024;
-      }
-   } else if (gen_is_ivb(info->devid) || gen_is_vlv(info->devid)) {
-      /*
-       * From the Ivy Bridge PRM, volume 1 part 1, page 18:
-       *
-       *     "Device             # of EUs        #Threads/EU
-       *      Ivy Bridge (GT2)   16              8
-       *      Ivy Bridge (GT1)   6               6"
-       *
-       * From the Ivy Bridge PRM, volume 4 part 2, page 17:
-       *
-       *     "URB Size    URB Rows    URB Rows when SLM Enabled
-       *      128k        4096        2048
-       *      256k        8096        4096"
-       */
-      dev->gen_opaque = ILO_GEN(7);
-      dev->gt = (gen_is_ivb(info->devid)) ? gen_get_ivb_gt(info->devid) : 1;
-      if (dev->gt == 2) {
-         dev->eu_count = 16;
-         dev->thread_count = 128;
-         dev->urb_size = 256 * 1024;
-      } else {
-         dev->eu_count = 6;
-         dev->thread_count = 36;
-         dev->urb_size = 128 * 1024;
-      }
-   } else if (gen_is_snb(info->devid)) {
-      /*
-       * From the Sandy Bridge PRM, volume 1 part 1, page 22:
-       *
-       *     "Device             # of EUs        #Threads/EU
-       *      SNB GT2            12              5
-       *      SNB GT1            6               4"
-       *
-       * From the Sandy Bridge PRM, volume 4 part 2, page 18:
-       *
-       *     "[DevSNB]: The GT1 product's URB provides 32KB of storage,
-       *      arranged as 1024 256-bit rows. The GT2 product's URB provides
-       *      64KB of storage, arranged as 2048 256-bit rows. A row
-       *      corresponds in size to an EU GRF register. Read/write access to
-       *      the URB is generally supported on a row-granular basis."
-       */
-      dev->gen_opaque = ILO_GEN(6);
-      dev->gt = gen_get_snb_gt(info->devid);
-      if (dev->gt == 2) {
-         dev->eu_count = 12;
-         dev->thread_count = 60;
-         dev->urb_size = 64 * 1024;
-      } else {
-         dev->eu_count = 6;
-         dev->thread_count = 24;
-         dev->urb_size = 32 * 1024;
-      }
-   } else {
-      ilo_err("unknown GPU generation\n");
-      return false;
-   }
-
-   return true;
-}
-
 struct pipe_screen *
 ilo_screen_create(struct intel_winsys *ws)
 {
    struct ilo_screen *is;
-   const struct intel_winsys_info *info;
 
    ilo_debug_init("ILO_DEBUG");
 
@@ -820,10 +771,7 @@ ilo_screen_create(struct intel_winsys *ws)
    if (!is)
       return NULL;
 
-   is->winsys = ws;
-
-   info = intel_winsys_get_info(is->winsys);
-   if (!init_dev(&is->dev, info)) {
+   if (!ilo_dev_init(&is->dev, ws)) {
       FREE(is);
       return NULL;
    }
@@ -842,15 +790,16 @@ ilo_screen_create(struct intel_winsys *ws)
 
    is->base.get_timestamp = ilo_get_timestamp;
 
+   is->base.is_format_supported = ilo_is_format_supported;
+   is->base.is_video_format_supported = ilo_is_video_format_supported;
+
    is->base.flush_frontbuffer = NULL;
 
-   is->base.fence_reference = ilo_fence_reference;
-   is->base.fence_signalled = ilo_fence_signalled;
-   is->base.fence_finish = ilo_fence_finish;
+   is->base.fence_reference = ilo_screen_fence_reference;
+   is->base.fence_finish = ilo_screen_fence_finish;
 
    is->base.get_driver_query_info = NULL;
 
-   ilo_init_format_functions(is);
    ilo_init_context_functions(is);
    ilo_init_resource_functions(is);