Merge commit '8b0fb1c152fe191768953aa8c77b89034a377f83' into vulkan

[mesa.git] / src / mesa / drivers / dri / i965 / brw_program.c
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c

index d782b4fdafb17baf9062312e6d91452716153f7f..b093a87bb82da8d6456ff419a449f4019efd5696 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -31,18 +31,19 @@
  
  #include <pthread.h>
  #include "main/imports.h"
-#include "main/enums.h"
-#include "main/shaderobj.h"
  #include "program/prog_parameter.h"
  #include "program/prog_print.h"
  #include "program/program.h"
  #include "program/programopt.h"
  #include "tnl/tnl.h"
  #include "util/ralloc.h"
-#include "glsl/ir.h"
+#include "compiler/glsl/ir.h"
  
+#include "brw_program.h"
  #include "brw_context.h"
-#include "brw_wm.h"
+#include "brw_shader.h"
+#include "brw_nir.h"
+#include "intel_batchbuffer.h"
  
  static unsigned
  get_new_program_id(struct intel_screen *screen)
@@ -54,25 +55,6 @@ get_new_program_id(struct intel_screen *screen)
     return id;
  }
  
-static void brwBindProgram( struct gl_context *ctx,
-                           GLenum target,
-                           struct gl_program *prog )
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   switch (target) {
-   case GL_VERTEX_PROGRAM_ARB:
-      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
-      break;
-   case MESA_GEOMETRY_PROGRAM:
-      brw->state.dirty.brw |= BRW_NEW_GEOMETRY_PROGRAM;
-      break;
-   case GL_FRAGMENT_PROGRAM_ARB:
-      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
-      break;
-   }
-}
-
  static struct gl_program *brwNewProgram( struct gl_context *ctx,
                                       GLenum target,
                                       GLuint id )
@@ -85,8 +67,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
        if (prog) {
          prog->id = get_new_program_id(brw->intelScreen);
  
-        return _mesa_init_vertex_program( ctx, &prog->program,
-                                            target, id );
+        return _mesa_init_gl_program(&prog->program.Base, target, id);
        }
        else
          return NULL;
@@ -97,19 +78,40 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
        if (prog) {
          prog->id = get_new_program_id(brw->intelScreen);
  
-        return _mesa_init_fragment_program( ctx, &prog->program,
-                                            target, id );
+        return _mesa_init_gl_program(&prog->program.Base, target, id);
        }
        else
          return NULL;
     }
  
-   case MESA_GEOMETRY_PROGRAM: {
+   case GL_GEOMETRY_PROGRAM_NV: {
        struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program);
        if (prog) {
           prog->id = get_new_program_id(brw->intelScreen);
  
-         return _mesa_init_geometry_program(ctx, &prog->program, target, id);
+         return _mesa_init_gl_program(&prog->program.Base, target, id);
+      } else {
+         return NULL;
+      }
+   }
+
+   case GL_TESS_CONTROL_PROGRAM_NV: {
+      struct brw_tess_ctrl_program *prog = CALLOC_STRUCT(brw_tess_ctrl_program);
+      if (prog) {
+         prog->id = get_new_program_id(brw->intelScreen);
+
+         return _mesa_init_gl_program(&prog->program.Base, target, id);
+      } else {
+         return NULL;
+      }
+   }
+
+   case GL_TESS_EVALUATION_PROGRAM_NV: {
+      struct brw_tess_eval_program *prog = CALLOC_STRUCT(brw_tess_eval_program);
+      if (prog) {
+         prog->id = get_new_program_id(brw->intelScreen);
+
+         return _mesa_init_gl_program(&prog->program.Base, target, id);
        } else {
           return NULL;
        }
@@ -120,7 +122,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
        if (prog) {
           prog->id = get_new_program_id(brw->intelScreen);
  
-         return _mesa_init_compute_program(ctx, &prog->program, target, id);
+         return _mesa_init_gl_program(&prog->program.Base, target, id);
        } else {
           return NULL;
        }
@@ -138,20 +140,13 @@ static void brwDeleteProgram( struct gl_context *ctx,
  }
  
  
-static GLboolean
-brwIsProgramNative(struct gl_context *ctx,
-                  GLenum target,
-                  struct gl_program *prog)
-{
-   return true;
-}
-
  static GLboolean
  brwProgramStringNotify(struct gl_context *ctx,
                        GLenum target,
                        struct gl_program *prog)
  {
     struct brw_context *brw = brw_context(ctx);
+   const struct brw_compiler *compiler = brw->intelScreen->compiler;
  
     switch (target) {
     case GL_FRAGMENT_PROGRAM_ARB: {
@@ -161,8 +156,14 @@ brwProgramStringNotify(struct gl_context *ctx,
           brw_fragment_program_const(brw->fragment_program);
  
        if (newFP == curFP)
-        brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+        brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
        newFP->id = get_new_program_id(brw->intelScreen);
+
+      brw_add_texrect_params(prog);
+
+      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
+
+      brw_fs_precompile(ctx, NULL, prog);
        break;
     }
     case GL_VERTEX_PROGRAM_ARB: {
@@ -172,7 +173,7 @@ brwProgramStringNotify(struct gl_context *ctx,
           brw_vertex_program_const(brw->vertex_program);
  
        if (newVP == curVP)
-        brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+        brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
        if (newVP->program.IsPositionInvariant) {
          _mesa_insert_mvp_code(ctx, &newVP->program);
        }
@@ -181,6 +182,13 @@ brwProgramStringNotify(struct gl_context *ctx,
        /* Also tell tnl about it:
         */
        _tnl_program_string(ctx, target, prog);
+
+      brw_add_texrect_params(prog);
+
+      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
+                                 compiler->scalar_stage[MESA_SHADER_VERTEX]);
+
+      brw_vs_precompile(ctx, NULL, prog);
        break;
     }
     default:
@@ -194,11 +202,46 @@ brwProgramStringNotify(struct gl_context *ctx,
        unreachable("Unexpected target in brwProgramStringNotify");
     }
  
-   brw_add_texrect_params(prog);
-
     return true;
  }
  
+static void
+brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
+{
+   struct brw_context *brw = brw_context(ctx);
+   unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
+                    PIPE_CONTROL_NO_WRITE |
+                    PIPE_CONTROL_CS_STALL);
+   assert(brw->gen >= 7 && brw->gen <= 9);
+
+   if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
+                   GL_ELEMENT_ARRAY_BARRIER_BIT |
+                   GL_COMMAND_BARRIER_BIT))
+      bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+
+   if (barriers & GL_UNIFORM_BARRIER_BIT)
+      bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+               PIPE_CONTROL_CONST_CACHE_INVALIDATE);
+
+   if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
+      bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+
+   if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
+      bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
+
+   if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
+      bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+               PIPE_CONTROL_RENDER_TARGET_FLUSH);
+
+   /* Typed surface messages are handled by the render cache on IVB, so we
+    * need to flush it too.
+    */
+   if (brw->gen == 7 && !brw->is_haswell)
+      bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
+
+   brw_emit_pipe_control_flush(brw, bits);
+}
+
  void
  brw_add_texrect_params(struct gl_program *prog)
  {
@@ -218,18 +261,6 @@ brw_add_texrect_params(struct gl_program *prog)
     }
  }
  
-/* Per-thread scratch space is a power-of-two multiple of 1KB. */
-int
-brw_get_scratch_size(int size)
-{
-   int i;
-
-   for (i = 1024; i < size; i *= 2)
-      ;
-
-   return i;
-}
-
  void
  brw_get_scratch_bo(struct brw_context *brw,
                    drm_intel_bo **scratch_bo, int size)
@@ -248,33 +279,36 @@ brw_get_scratch_bo(struct brw_context *brw,
  
  void brwInitFragProgFuncs( struct dd_function_table *functions )
  {
-   assert(functions->ProgramStringNotify == _tnl_program_string);
+   /* assert(functions->ProgramStringNotify == _tnl_program_string); */
  
-   functions->BindProgram = brwBindProgram;
     functions->NewProgram = brwNewProgram;
     functions->DeleteProgram = brwDeleteProgram;
-   functions->IsProgramNative = brwIsProgramNative;
     functions->ProgramStringNotify = brwProgramStringNotify;
  
     functions->NewShader = brw_new_shader;
-   functions->NewShaderProgram = brw_new_shader_program;
     functions->LinkShader = brw_link_shader;
+
+   functions->MemoryBarrier = brw_memory_barrier;
  }
  
+struct shader_times {
+   uint64_t time;
+   uint64_t written;
+   uint64_t reset;
+};
+
  void
  brw_init_shader_time(struct brw_context *brw)
  {
-   const int max_entries = 4096;
-   brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
-                                            max_entries * SHADER_TIME_STRIDE,
-                                            4096);
-   brw->shader_time.shader_programs = rzalloc_array(brw, struct gl_shader_program *,
-                                                    max_entries);
-   brw->shader_time.programs = rzalloc_array(brw, struct gl_program *,
-                                             max_entries);
+   const int max_entries = 2048;
+   brw->shader_time.bo =
+      drm_intel_bo_alloc(brw->bufmgr, "shader time",
+                         max_entries * SHADER_TIME_STRIDE * 3, 4096);
+   brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
+   brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
     brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
                                            max_entries);
-   brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
+   brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
                                                 max_entries);
     brw->shader_time.max_entries = max_entries;
  }
@@ -294,33 +328,13 @@ compare_time(const void *a, const void *b)
        return 1;
  }
  
-static void
-get_written_and_reset(struct brw_context *brw, int i,
-                      uint64_t *written, uint64_t *reset)
-{
-   enum shader_time_shader_type type = brw->shader_time.types[i];
-   assert(type == ST_VS || type == ST_GS || type == ST_FS8 || type == ST_FS16);
-
-   /* Find where we recorded written and reset. */
-   int wi, ri;
-
-   for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
-      ;
-
-   for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
-      ;
-
-   *written = brw->shader_time.cumulative[wi];
-   *reset = brw->shader_time.cumulative[ri];
-}
-
  static void
  print_shader_time_line(const char *stage, const char *name,
                         int shader_num, uint64_t time, uint64_t total)
  {
     fprintf(stderr, "%-6s%-18s", stage, name);
  
-   if (shader_num != -1)
+   if (shader_num != 0)
        fprintf(stderr, "%4d: ", shader_num);
     else
        fprintf(stderr, "    : ");
@@ -339,7 +353,7 @@ brw_report_shader_time(struct brw_context *brw)
  
     uint64_t scaled[brw->shader_time.num_entries];
     uint64_t *sorted[brw->shader_time.num_entries];
-   uint64_t total_by_type[ST_FS16 + 1];
+   uint64_t total_by_type[ST_CS + 1];
     memset(total_by_type, 0, sizeof(total_by_type));
     double total = 0;
     for (int i = 0; i < brw->shader_time.num_entries; i++) {
@@ -349,23 +363,15 @@ brw_report_shader_time(struct brw_context *brw)
        sorted[i] = &scaled[i];
  
        switch (type) {
-      case ST_VS_WRITTEN:
-      case ST_VS_RESET:
-      case ST_GS_WRITTEN:
-      case ST_GS_RESET:
-      case ST_FS8_WRITTEN:
-      case ST_FS8_RESET:
-      case ST_FS16_WRITTEN:
-      case ST_FS16_RESET:
-         /* We'll handle these when along with the time. */
-         scaled[i] = 0;
-         continue;
-
        case ST_VS:
+      case ST_TCS:
+      case ST_TES:
        case ST_GS:
        case ST_FS8:
        case ST_FS16:
-         get_written_and_reset(brw, i, &written, &reset);
+      case ST_CS:
+         written = brw->shader_time.cumulative[i].written;
+         reset = brw->shader_time.cumulative[i].reset;
           break;
  
        default:
@@ -377,7 +383,7 @@ brw_report_shader_time(struct brw_context *brw)
           break;
        }
  
-      uint64_t time = brw->shader_time.cumulative[i];
+      uint64_t time = brw->shader_time.cumulative[i].time;
        if (written) {
           scaled[i] = time / written * (written + reset);
        } else {
@@ -386,9 +392,12 @@ brw_report_shader_time(struct brw_context *brw)
  
        switch (type) {
        case ST_VS:
+      case ST_TCS:
+      case ST_TES:
        case ST_GS:
        case ST_FS8:
        case ST_FS16:
+      case ST_CS:
           total_by_type[type] += scaled[i];
           break;
        default:
@@ -408,48 +417,26 @@ brw_report_shader_time(struct brw_context *brw)
     fprintf(stderr, "\n");
     fprintf(stderr, "type          ID                  cycles spent                   %% of total\n");
     for (int s = 0; s < brw->shader_time.num_entries; s++) {
-      const char *shader_name;
        const char *stage;
        /* Work back from the sorted pointers times to a time to print. */
        int i = sorted[s] - scaled;
-      struct gl_shader_program *prog = brw->shader_time.shader_programs[i];
  
        if (scaled[i] == 0)
           continue;
  
-      int shader_num = -1;
-      if (prog) {
-         shader_num = prog->Name;
-
-         /* The fixed function fragment shader generates GLSL IR with a Name
-          * of 0, and nothing else does.
-          */
-         if (prog->Label) {
-            shader_name = prog->Label;
-         } else if (shader_num == 0 &&
-             (brw->shader_time.types[i] == ST_FS8 ||
-              brw->shader_time.types[i] == ST_FS16)) {
-            shader_name = "ff";
-            shader_num = -1;
-         } else {
-            shader_name = "glsl";
-         }
-      } else if (brw->shader_time.programs[i]) {
-         shader_num = brw->shader_time.programs[i]->Id;
-         if (shader_num == 0) {
-            shader_name = "ff";
-            shader_num = -1;
-         } else {
-            shader_name = "prog";
-         }
-      } else {
-         shader_name = "other";
-      }
+      int shader_num = brw->shader_time.ids[i];
+      const char *shader_name = brw->shader_time.names[i];
  
        switch (brw->shader_time.types[i]) {
        case ST_VS:
           stage = "vs";
           break;
+      case ST_TCS:
+         stage = "tcs";
+         break;
+      case ST_TES:
+         stage = "tes";
+         break;
        case ST_GS:
           stage = "gs";
           break;
@@ -459,6 +446,9 @@ brw_report_shader_time(struct brw_context *brw)
        case ST_FS16:
           stage = "fs16";
           break;
+      case ST_CS:
+         stage = "cs";
+         break;
        default:
           stage = "other";
           break;
@@ -469,10 +459,13 @@ brw_report_shader_time(struct brw_context *brw)
     }
  
     fprintf(stderr, "\n");
-   print_shader_time_line("total", "vs", -1, total_by_type[ST_VS], total);
-   print_shader_time_line("total", "gs", -1, total_by_type[ST_GS], total);
-   print_shader_time_line("total", "fs8", -1, total_by_type[ST_FS8], total);
-   print_shader_time_line("total", "fs16", -1, total_by_type[ST_FS16], total);
+   print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
+   print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
+   print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
+   print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
+   print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
+   print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
+   print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
  }
  
  static void
@@ -486,16 +479,19 @@ brw_collect_shader_time(struct brw_context *brw)
      * overhead compared to the cost of tracking the time in the first place.
      */
     drm_intel_bo_map(brw->shader_time.bo, true);
-
-   uint32_t *times = brw->shader_time.bo->virtual;
+   void *bo_map = brw->shader_time.bo->virtual;
  
     for (int i = 0; i < brw->shader_time.num_entries; i++) {
-      brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
+      uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE;
+
+      brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4];
+      brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4];
+      brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4];
     }
  
     /* Zero the BO out to clear it out for our next collection.
      */
-   memset(times, 0, brw->shader_time.bo->size);
+   memset(bo_map, 0, brw->shader_time.bo->size);
     drm_intel_bo_unmap(brw->shader_time.bo);
  }
  
@@ -524,19 +520,24 @@ brw_get_shader_time_index(struct brw_context *brw,
                            struct gl_program *prog,
                            enum shader_time_shader_type type)
  {
-   struct gl_context *ctx = &brw->ctx;
-
     int shader_time_index = brw->shader_time.num_entries++;
     assert(shader_time_index < brw->shader_time.max_entries);
     brw->shader_time.types[shader_time_index] = type;
  
-   _mesa_reference_shader_program(ctx,
-                                  &brw->shader_time.shader_programs[shader_time_index],
-                                  shader_prog);
+   int id = shader_prog ? shader_prog->Name : prog->Id;
+   const char *name;
+   if (id == 0) {
+      name = "ff";
+   } else if (!shader_prog) {
+      name = "prog";
+   } else if (shader_prog->Label) {
+      name = ralloc_strdup(brw->shader_time.names, shader_prog->Label);
+   } else {
+      name = "glsl";
+   }
  
-   _mesa_reference_program(ctx,
-                           &brw->shader_time.programs[shader_time_index],
-                           prog);
+   brw->shader_time.names[shader_time_index] = name;
+   brw->shader_time.ids[shader_time_index] = id;
  
     return shader_time_index;
  }
@@ -548,33 +549,6 @@ brw_destroy_shader_time(struct brw_context *brw)
     brw->shader_time.bo = NULL;
  }
  
-void
-brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
-                      unsigned surf_index)
-{
-   assert(surf_index < BRW_MAX_SURFACES);
-
-   prog_data->binding_table.size_bytes =
-      MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
-}
-
-bool
-brw_stage_prog_data_compare(const struct brw_stage_prog_data *a,
-                            const struct brw_stage_prog_data *b)
-{
-   /* Compare all the struct up to the pointers. */
-   if (memcmp(a, b, offsetof(struct brw_stage_prog_data, param)))
-      return false;
-
-   if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
-      return false;
-
-   if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *)))
-      return false;
-
-   return true;
-}
-
  void
  brw_stage_prog_data_free(const void *p)
  {
@@ -582,21 +556,43 @@ brw_stage_prog_data_free(const void *p)
  
     ralloc_free(prog_data->param);
     ralloc_free(prog_data->pull_param);
+   ralloc_free(prog_data->image_param);
  }
  
  void
-brw_dump_ir(struct brw_context *brw, const char *stage,
-            struct gl_shader_program *shader_prog,
+brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
              struct gl_shader *shader, struct gl_program *prog)
  {
     if (shader_prog) {
-      fprintf(stderr,
-              "GLSL IR for native %s shader %d:\n", stage, shader_prog->Name);
-      _mesa_print_ir(stderr, shader->ir, NULL);
-      fprintf(stderr, "\n\n");
+      if (shader->ir) {
+         fprintf(stderr,
+                 "GLSL IR for native %s shader %d:\n",
+                 stage, shader_prog->Name);
+         _mesa_print_ir(stderr, shader->ir, NULL);
+         fprintf(stderr, "\n\n");
+      }
     } else {
        fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
                stage, prog->Id, stage);
        _mesa_print_program(prog);
     }
  }
+
+void
+brw_setup_tex_for_precompile(struct brw_context *brw,
+                             struct brw_sampler_prog_key_data *tex,
+                             struct gl_program *prog)
+{
+   const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+   unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
+   for (unsigned i = 0; i < sampler_count; i++) {
+      if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
+         /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+         tex->swizzles[i] =
+            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+      } else {
+         /* Color sampler: assume no swizzling. */
+         tex->swizzles[i] = SWIZZLE_XYZW;
+      }
+   }
+}