i965/fs: Allow constant propagation into logical send sources.

[mesa.git] / src / gallium / winsys / svga / drm / vmw_context.c
diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c

index 1234a5edce322006e778c050d49ab9b7d68c2c05..8d23bff5d748b5ff442377bf5a2a88265314ecae 100644 (file)
--- a/src/gallium/winsys/svga/drm/vmw_context.c
+++ b/src/gallium/winsys/svga/drm/vmw_context.c
@@ -1,5 +1,5 @@
  /**********************************************************
- * Copyright 2009 VMware, Inc.  All rights reserved.
+ * Copyright 2009-2015 VMware, Inc.  All rights reserved.
   *
   * Permission is hereby granted, free of charge, to any person
   * obtaining a copy of this software and associated documentation
@@ -41,6 +41,7 @@
  #include "vmw_surface.h"
  #include "vmw_fence.h"
  #include "vmw_shader.h"
+#include "vmw_query.h"
  
  #define VMW_COMMAND_SIZE (64*1024)
  #define VMW_SURFACE_RELOCS (1024)
@@ -49,6 +50,21 @@
  
  #define VMW_MUST_FLUSH_STACK 8
  
+/*
+ * A factor applied to the maximum mob memory size to determine
+ * the optimial time to preemptively flush the command buffer.
+ * The constant is based on some performance trials with SpecViewperf.
+ */
+#define VMW_MAX_MOB_MEM_FACTOR  2
+
+/*
+ * A factor applied to the maximum surface memory size to determine
+ * the optimial time to preemptively flush the command buffer.
+ * The constant is based on some performance trials with SpecViewperf.
+ */
+#define VMW_MAX_SURF_MEM_FACTOR 2
+
+
  struct vmw_buffer_relocation
  {
     struct pb_buffer *buffer;
@@ -137,7 +153,7 @@ struct vmw_svga_winsys_context
  };
  
  
-static INLINE struct vmw_svga_winsys_context *
+static inline struct vmw_svga_winsys_context *
  vmw_svga_winsys_context(struct svga_winsys_context *swc)
  {
     assert(swc);
@@ -145,7 +161,7 @@ vmw_svga_winsys_context(struct svga_winsys_context *swc)
  }
  
  
-static INLINE unsigned
+static inline unsigned
  vmw_translate_to_pb_flags(unsigned flags)
  {
     unsigned f = 0;
@@ -235,6 +251,7 @@ vmw_swc_flush(struct svga_winsys_context *swc,
     vswc->must_flush = FALSE;
     debug_flush_flush(vswc->fctx);
  #endif
+   swc->hints &= ~SVGA_HINT_FLAG_CAN_PRE_FLUSH;
     vswc->preemptive_flush = FALSE;
     vswc->seen_surfaces = 0;
     vswc->seen_regions = 0;
@@ -298,6 +315,13 @@ vmw_swc_reserve(struct svga_winsys_context *swc,
     return vswc->command.buffer + vswc->command.used;
  }
  
+static unsigned
+vmw_swc_get_command_buffer_size(struct svga_winsys_context *swc)
+{
+   const struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   return vswc->command.used;
+}
+
  static void
  vmw_swc_context_relocation(struct svga_winsys_context *swc,
                            uint32 *cid)
@@ -356,7 +380,8 @@ vmw_swc_region_relocation(struct svga_winsys_context *swc,
  
     if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) {
        vswc->seen_regions += reloc->buffer->size;
-      if(vswc->seen_regions >= VMW_GMR_POOL_SIZE/5)
+      if ((swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) &&
+          vswc->seen_regions >= VMW_GMR_POOL_SIZE/5)
           vswc->preemptive_flush = TRUE;
     }
  
@@ -376,26 +401,31 @@ vmw_swc_mob_relocation(struct svga_winsys_context *swc,
  {
     struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
     struct vmw_buffer_relocation *reloc;
+   struct pb_buffer *pb_buffer = vmw_pb_buffer(buffer);
  
-   assert(vswc->region.staged < vswc->region.reserved);
+   if (id) {
+      assert(vswc->region.staged < vswc->region.reserved);
  
-   reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
-   reloc->mob.id = id;
-   reloc->mob.offset_into_mob = offset_into_mob;
+      reloc = &vswc->region.relocs[vswc->region.used + vswc->region.staged];
+      reloc->mob.id = id;
+      reloc->mob.offset_into_mob = offset_into_mob;
  
-   /*
-    * pb_validate holds a refcount to the buffer, so no need to
-    * refcount it again in the relocation.
-    */
-   reloc->buffer = vmw_pb_buffer(buffer);
-   reloc->offset = offset;
-   reloc->is_mob = TRUE;
-   ++vswc->region.staged;
+      /*
+       * pb_validate holds a refcount to the buffer, so no need to
+       * refcount it again in the relocation.
+       */
+      reloc->buffer = pb_buffer;
+      reloc->offset = offset;
+      reloc->is_mob = TRUE;
+      ++vswc->region.staged;
+   }
  
-   if (vmw_swc_add_validate_buffer(vswc, reloc->buffer, flags)) {
-      vswc->seen_mobs += reloc->buffer->size;
-      /* divide by 5, tested for best performance */
-      if (vswc->seen_mobs >= vswc->vws->ioctl.max_mob_memory / 5)
+   if (vmw_swc_add_validate_buffer(vswc, pb_buffer, flags)) {
+      vswc->seen_mobs += pb_buffer->size;
+
+      if ((swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) &&
+          vswc->seen_mobs >=
+            vswc->vws->ioctl.max_mob_memory / VMW_MAX_MOB_MEM_FACTOR)
           vswc->preemptive_flush = TRUE;
     }
  
@@ -456,8 +486,9 @@ vmw_swc_surface_only_relocation(struct svga_winsys_context *swc,
        ++vswc->surface.staged;
  
        vswc->seen_surfaces += vsurf->size;
-      /* divide by 5 not well tuned for performance */
-      if (vswc->seen_surfaces >= vswc->vws->ioctl.max_surface_memory / 5)
+      if ((swc->hints & SVGA_HINT_FLAG_CAN_PRE_FLUSH) &&
+          vswc->seen_surfaces >=
+            vswc->vws->ioctl.max_surface_memory / VMW_MAX_SURF_MEM_FACTOR)
           vswc->preemptive_flush = TRUE;
     }
  
@@ -466,7 +497,8 @@ vmw_swc_surface_only_relocation(struct svga_winsys_context *swc,
        p_atomic_inc(&vsurf->validated);
     }
  
-   *where = vsurf->sid;
+   if (where)
+      *where = vsurf->sid;
  }
  
  static void
@@ -480,7 +512,7 @@ vmw_swc_surface_relocation(struct svga_winsys_context *swc,
  
     assert(swc->have_gb_objects || mobid == NULL);
  
-   if(!surface) {
+   if (!surface) {
        *where = SVGA3D_INVALID_ID;
        if (mobid)
           *mobid = SVGA3D_INVALID_ID;
@@ -510,51 +542,67 @@ vmw_swc_shader_relocation(struct svga_winsys_context *swc,
                           uint32 *shid,
                           uint32 *mobid,
                           uint32 *offset,
-                         struct svga_winsys_gb_shader *shader)
+                         struct svga_winsys_gb_shader *shader,
+                          unsigned flags)
  {
     struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_winsys_screen *vws = vswc->vws;
     struct vmw_svga_winsys_shader *vshader;
     struct vmw_ctx_validate_item *ishader;
+
     if(!shader) {
        *shid = SVGA3D_INVALID_ID;
        return;
     }
  
-   assert(vswc->shader.staged < vswc->shader.reserved);
     vshader = vmw_svga_winsys_shader(shader);
-   ishader = util_hash_table_get(vswc->hash, vshader);
  
-   if (ishader == NULL) {
-      ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged];
-      vmw_svga_winsys_shader_reference(&ishader->vshader, vshader);
-      ishader->referenced = FALSE;
-      /*
-       * Note that a failure here may just fall back to unhashed behavior
-       * and potentially cause unnecessary flushing, so ignore the
-       * return code.
-       */
-      (void) util_hash_table_set(vswc->hash, vshader, ishader);
-      ++vswc->shader.staged;
-   }
+   if (!vws->base.have_vgpu10) {
+      assert(vswc->shader.staged < vswc->shader.reserved);
+      ishader = util_hash_table_get(vswc->hash, vshader);
+
+      if (ishader == NULL) {
+         ishader = &vswc->shader.items[vswc->shader.used + vswc->shader.staged];
+         vmw_svga_winsys_shader_reference(&ishader->vshader, vshader);
+         ishader->referenced = FALSE;
+         /*
+          * Note that a failure here may just fall back to unhashed behavior
+          * and potentially cause unnecessary flushing, so ignore the
+          * return code.
+          */
+         (void) util_hash_table_set(vswc->hash, vshader, ishader);
+         ++vswc->shader.staged;
+      }
  
-   if (!ishader->referenced) {
-      ishader->referenced = TRUE;
-      p_atomic_inc(&vshader->validated);
+      if (!ishader->referenced) {
+         ishader->referenced = TRUE;
+         p_atomic_inc(&vshader->validated);
+      }
     }
  
-   *shid = vshader->shid;
+   if (shid)
+      *shid = vshader->shid;
  
-   if (mobid != NULL && vshader->buf)
+   if (vshader->buf)
        vmw_swc_mob_relocation(swc, mobid, offset, vshader->buf,
                              0, SVGA_RELOC_READ);
  }
  
+static void
+vmw_swc_query_relocation(struct svga_winsys_context *swc,
+                         SVGAMobId *id,
+                         struct svga_winsys_gb_query *query)
+{
+   /* Queries are backed by one big MOB */
+   vmw_swc_mob_relocation(swc, id, NULL, query->buf, 0,
+                          SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+}
+
  static void
  vmw_swc_commit(struct svga_winsys_context *swc)
  {
     struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
  
-   assert(vswc->command.reserved);
     assert(vswc->command.used + vswc->command.reserved <= vswc->command.size);
     vswc->command.used += vswc->command.reserved;
     vswc->command.reserved = 0;
@@ -618,6 +666,96 @@ static int vmw_ptr_compare(void *key1, void *key2)
     return (key1 == key2) ? 0 : 1;
  }
  
+
+/**
+ * vmw_svga_winsys_vgpu10_shader_screate - The winsys shader_crate callback
+ *
+ * @swc: The winsys context.
+ * @shaderId: Previously allocated shader id.
+ * @shaderType: The shader type.
+ * @bytecode: The shader bytecode
+ * @bytecodelen: The length of the bytecode.
+ *
+ * Creates an svga_winsys_gb_shader structure and allocates a buffer for the
+ * shader code and copies the shader code into the buffer. Shader
+ * resource creation is not done.
+ */
+static struct svga_winsys_gb_shader *
+vmw_svga_winsys_vgpu10_shader_create(struct svga_winsys_context *swc,
+                                     uint32 shaderId,
+                                     SVGA3dShaderType shaderType,
+                                     const uint32 *bytecode,
+                                     uint32 bytecodeLen)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_svga_winsys_shader *shader;
+   struct svga_winsys_gb_shader *gb_shader =
+      vmw_svga_winsys_shader_create(&vswc->vws->base, shaderType, bytecode,
+                                    bytecodeLen);
+   if (!gb_shader)
+      return NULL;
+
+   shader = vmw_svga_winsys_shader(gb_shader);
+   shader->shid = shaderId;
+
+   return gb_shader;
+}
+
+/**
+ * vmw_svga_winsys_vgpu10_shader_destroy - The winsys shader_destroy callback.
+ *
+ * @swc: The winsys context.
+ * @shader: A shader structure previously allocated by shader_create.
+ *
+ * Frees the shader structure and the buffer holding the shader code.
+ */
+static void
+vmw_svga_winsys_vgpu10_shader_destroy(struct svga_winsys_context *swc,
+                                      struct svga_winsys_gb_shader *shader)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+
+   vmw_svga_winsys_shader_destroy(&vswc->vws->base, shader);
+}
+
+/**
+ * vmw_svga_winsys_resource_rebind - The winsys resource_rebind callback
+ *
+ * @swc: The winsys context.
+ * @surface: The surface to be referenced.
+ * @shader: The shader to be referenced.
+ * @flags: Relocation flags.
+ *
+ * This callback is needed because shader backing buffers are sub-allocated, and
+ * hence the kernel fencing is not sufficient. The buffers need to be put on
+ * the context's validation list and fenced after command submission to avoid
+ * reuse of busy shader buffers. In addition, surfaces need to be put on the
+ * validation list in order for the driver to regard them as referenced
+ * by the command stream.
+ */
+static enum pipe_error
+vmw_svga_winsys_resource_rebind(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                struct svga_winsys_gb_shader *shader,
+                                unsigned flags)
+{
+   /**
+    * Need to reserve one validation item for either the surface or
+    * the shader.
+    */
+   if (!vmw_swc_reserve(swc, 0, 1))
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   if (surface)
+      vmw_swc_surface_relocation(swc, NULL, NULL, surface, flags);
+   else if (shader)
+      vmw_swc_shader_relocation(swc, NULL, NULL, NULL, shader, flags);
+
+   vmw_swc_commit(swc);
+
+   return PIPE_OK;
+}
+
  struct svga_winsys_context *
  vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
  {
@@ -630,9 +768,12 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
  
     vswc->base.destroy = vmw_swc_destroy;
     vswc->base.reserve = vmw_swc_reserve;
+   vswc->base.get_command_buffer_size = vmw_swc_get_command_buffer_size;
     vswc->base.surface_relocation = vmw_swc_surface_relocation;
     vswc->base.region_relocation = vmw_swc_region_relocation;
     vswc->base.mob_relocation = vmw_swc_mob_relocation;
+   vswc->base.query_relocation = vmw_swc_query_relocation;
+   vswc->base.query_bind = vmw_swc_query_bind;
     vswc->base.context_relocation = vmw_swc_context_relocation;
     vswc->base.shader_relocation = vmw_swc_shader_relocation;
     vswc->base.commit = vmw_swc_commit;
@@ -640,7 +781,19 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
     vswc->base.surface_map = vmw_svga_winsys_surface_map;
     vswc->base.surface_unmap = vmw_svga_winsys_surface_unmap;
  
-   vswc->base.cid = vmw_ioctl_context_create(vws);
+  vswc->base.shader_create = vmw_svga_winsys_vgpu10_shader_create;
+  vswc->base.shader_destroy = vmw_svga_winsys_vgpu10_shader_destroy;
+
+  vswc->base.resource_rebind = vmw_svga_winsys_resource_rebind;
+
+   if (sws->have_vgpu10)
+      vswc->base.cid = vmw_ioctl_extended_context_create(vws, sws->have_vgpu10);
+   else
+      vswc->base.cid = vmw_ioctl_context_create(vws);
+
+   if (vswc->base.cid == -1)
+      goto out_no_context;
+
     vswc->base.have_gb_objects = sws->have_gb_objects;
  
     vswc->vws = vws;
@@ -667,6 +820,8 @@ vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
  out_no_hash:
     pb_validate_destroy(vswc->validate);
  out_no_validate:
+   vmw_ioctl_context_destroy(vws, vswc->base.cid);
+out_no_context:
     FREE(vswc);
     return NULL;
  }