i965: Move brw_create_nir to brw_program.c
[mesa.git] / src / mesa / drivers / dri / i965 / brw_program.c
index 414eab9c0029fae3f42a76ec016027cbbbfa0de2..792f81b80aae857781cae80b0e918f14f8b9072e 100644 (file)
 
 #include <pthread.h>
 #include "main/imports.h"
-#include "main/enums.h"
-#include "main/shaderobj.h"
 #include "program/prog_parameter.h"
 #include "program/prog_print.h"
+#include "program/prog_to_nir.h"
 #include "program/program.h"
 #include "program/programopt.h"
 #include "tnl/tnl.h"
 #include "util/ralloc.h"
-#include "glsl/ir.h"
+#include "compiler/glsl/ir.h"
+#include "compiler/glsl/glsl_to_nir.h"
 
+#include "brw_program.h"
 #include "brw_context.h"
 #include "brw_shader.h"
 #include "brw_nir.h"
-#include "brw_wm.h"
 #include "intel_batchbuffer.h"
 
+static void
+brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
+{
+   if (is_scalar) {
+      nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
+                               type_size_scalar_bytes);
+      nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes);
+   } else {
+      nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
+                               type_size_vec4_bytes);
+      nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes);
+   }
+}
+
+nir_shader *
+brw_create_nir(struct brw_context *brw,
+               const struct gl_shader_program *shader_prog,
+               const struct gl_program *prog,
+               gl_shader_stage stage,
+               bool is_scalar)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const nir_shader_compiler_options *options =
+      ctx->Const.ShaderCompilerOptions[stage].NirOptions;
+   bool progress;
+   nir_shader *nir;
+
+   /* First, lower the GLSL IR or Mesa IR to NIR */
+   if (shader_prog) {
+      nir = glsl_to_nir(shader_prog, stage, options);
+      nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
+      NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+                 nir_shader_get_entrypoint(nir), true, false);
+   } else {
+      nir = prog_to_nir(prog, options);
+      NIR_PASS_V(nir, nir_convert_to_ssa); /* turn registers into SSA */
+   }
+   nir_validate_shader(nir);
+
+   (void)progress;
+
+   nir = brw_preprocess_nir(brw->intelScreen->compiler, nir);
+
+   if (stage == MESA_SHADER_FRAGMENT) {
+      static const struct nir_lower_wpos_ytransform_options wpos_options = {
+         .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
+         .fs_coord_pixel_center_integer = 1,
+         .fs_coord_origin_upper_left = 1,
+      };
+      _mesa_add_state_reference(prog->Parameters,
+                                (gl_state_index *) wpos_options.state_tokens);
+
+      NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
+   }
+
+   NIR_PASS(progress, nir, nir_lower_system_values);
+   NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
+
+   if (shader_prog) {
+      NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
+      NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
+   }
+
+   return nir;
+}
+
 static unsigned
 get_new_program_id(struct intel_screen *screen)
 {
@@ -69,8 +135,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
       if (prog) {
         prog->id = get_new_program_id(brw->intelScreen);
 
-        return _mesa_init_vertex_program( ctx, &prog->program,
-                                            target, id );
+        return _mesa_init_gl_program(&prog->program.Base, target, id);
       }
       else
         return NULL;
@@ -81,8 +146,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
       if (prog) {
         prog->id = get_new_program_id(brw->intelScreen);
 
-        return _mesa_init_fragment_program( ctx, &prog->program,
-                                            target, id );
+        return _mesa_init_gl_program(&prog->program.Base, target, id);
       }
       else
         return NULL;
@@ -93,7 +157,29 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
       if (prog) {
          prog->id = get_new_program_id(brw->intelScreen);
 
-         return _mesa_init_geometry_program(ctx, &prog->program, target, id);
+         return _mesa_init_gl_program(&prog->program.Base, target, id);
+      } else {
+         return NULL;
+      }
+   }
+
+   case GL_TESS_CONTROL_PROGRAM_NV: {
+      struct brw_tess_ctrl_program *prog = CALLOC_STRUCT(brw_tess_ctrl_program);
+      if (prog) {
+         prog->id = get_new_program_id(brw->intelScreen);
+
+         return _mesa_init_gl_program(&prog->program.Base, target, id);
+      } else {
+         return NULL;
+      }
+   }
+
+   case GL_TESS_EVALUATION_PROGRAM_NV: {
+      struct brw_tess_eval_program *prog = CALLOC_STRUCT(brw_tess_eval_program);
+      if (prog) {
+         prog->id = get_new_program_id(brw->intelScreen);
+
+         return _mesa_init_gl_program(&prog->program.Base, target, id);
       } else {
          return NULL;
       }
@@ -104,7 +190,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
       if (prog) {
          prog->id = get_new_program_id(brw->intelScreen);
 
-         return _mesa_init_compute_program(ctx, &prog->program, target, id);
+         return _mesa_init_gl_program(&prog->program.Base, target, id);
       } else {
          return NULL;
       }
@@ -128,6 +214,7 @@ brwProgramStringNotify(struct gl_context *ctx,
                       struct gl_program *prog)
 {
    struct brw_context *brw = brw_context(ctx);
+   const struct brw_compiler *compiler = brw->intelScreen->compiler;
 
    switch (target) {
    case GL_FRAGMENT_PROGRAM_ARB: {
@@ -142,9 +229,7 @@ brwProgramStringNotify(struct gl_context *ctx,
 
       brw_add_texrect_params(prog);
 
-      if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) {
-         prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT);
-      }
+      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 
       brw_fs_precompile(ctx, NULL, prog);
       break;
@@ -168,9 +253,8 @@ brwProgramStringNotify(struct gl_context *ctx,
 
       brw_add_texrect_params(prog);
 
-      if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) {
-         prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX);
-      }
+      prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
+                                 compiler->scalar_stage[MESA_SHADER_VERTEX]);
 
       brw_vs_precompile(ctx, NULL, prog);
       break;
@@ -193,10 +277,10 @@ static void
 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
 {
    struct brw_context *brw = brw_context(ctx);
-   unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+   unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
                     PIPE_CONTROL_NO_WRITE |
                     PIPE_CONTROL_CS_STALL);
-   assert(brw->gen >= 7 && brw->gen <= 8);
+   assert(brw->gen >= 7 && brw->gen <= 9);
 
    if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
                    GL_ELEMENT_ARRAY_BARRIER_BIT |
@@ -245,18 +329,6 @@ brw_add_texrect_params(struct gl_program *prog)
    }
 }
 
-/* Per-thread scratch space is a power-of-two multiple of 1KB. */
-int
-brw_get_scratch_size(int size)
-{
-   int i;
-
-   for (i = 1024; i < size; i *= 2)
-      ;
-
-   return i;
-}
-
 void
 brw_get_scratch_bo(struct brw_context *brw,
                   drm_intel_bo **scratch_bo, int size)
@@ -287,18 +359,24 @@ void brwInitFragProgFuncs( struct dd_function_table *functions )
    functions->MemoryBarrier = brw_memory_barrier;
 }
 
+struct shader_times {
+   uint64_t time;
+   uint64_t written;
+   uint64_t reset;
+};
+
 void
 brw_init_shader_time(struct brw_context *brw)
 {
-   const int max_entries = 4096;
-   brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
-                                            max_entries * SHADER_TIME_STRIDE,
-                                            4096);
+   const int max_entries = 2048;
+   brw->shader_time.bo =
+      drm_intel_bo_alloc(brw->bufmgr, "shader time",
+                         max_entries * SHADER_TIME_STRIDE * 3, 4096);
    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
                                           max_entries);
-   brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
+   brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
                                                max_entries);
    brw->shader_time.max_entries = max_entries;
 }
@@ -318,27 +396,6 @@ compare_time(const void *a, const void *b)
       return 1;
 }
 
-static void
-get_written_and_reset(struct brw_context *brw, int i,
-                      uint64_t *written, uint64_t *reset)
-{
-   enum shader_time_shader_type type = brw->shader_time.types[i];
-   assert(type == ST_VS || type == ST_GS || type == ST_FS8 ||
-          type == ST_FS16 || type == ST_CS);
-
-   /* Find where we recorded written and reset. */
-   int wi, ri;
-
-   for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
-      ;
-
-   for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
-      ;
-
-   *written = brw->shader_time.cumulative[wi];
-   *reset = brw->shader_time.cumulative[ri];
-}
-
 static void
 print_shader_time_line(const char *stage, const char *name,
                        int shader_num, uint64_t time, uint64_t total)
@@ -374,26 +431,15 @@ brw_report_shader_time(struct brw_context *brw)
       sorted[i] = &scaled[i];
 
       switch (type) {
-      case ST_VS_WRITTEN:
-      case ST_VS_RESET:
-      case ST_GS_WRITTEN:
-      case ST_GS_RESET:
-      case ST_FS8_WRITTEN:
-      case ST_FS8_RESET:
-      case ST_FS16_WRITTEN:
-      case ST_FS16_RESET:
-      case ST_CS_WRITTEN:
-      case ST_CS_RESET:
-         /* We'll handle these when along with the time. */
-         scaled[i] = 0;
-         continue;
-
       case ST_VS:
+      case ST_TCS:
+      case ST_TES:
       case ST_GS:
       case ST_FS8:
       case ST_FS16:
       case ST_CS:
-         get_written_and_reset(brw, i, &written, &reset);
+         written = brw->shader_time.cumulative[i].written;
+         reset = brw->shader_time.cumulative[i].reset;
          break;
 
       default:
@@ -405,7 +451,7 @@ brw_report_shader_time(struct brw_context *brw)
          break;
       }
 
-      uint64_t time = brw->shader_time.cumulative[i];
+      uint64_t time = brw->shader_time.cumulative[i].time;
       if (written) {
          scaled[i] = time / written * (written + reset);
       } else {
@@ -414,6 +460,8 @@ brw_report_shader_time(struct brw_context *brw)
 
       switch (type) {
       case ST_VS:
+      case ST_TCS:
+      case ST_TES:
       case ST_GS:
       case ST_FS8:
       case ST_FS16:
@@ -451,6 +499,12 @@ brw_report_shader_time(struct brw_context *brw)
       case ST_VS:
          stage = "vs";
          break;
+      case ST_TCS:
+         stage = "tcs";
+         break;
+      case ST_TES:
+         stage = "tes";
+         break;
       case ST_GS:
          stage = "gs";
          break;
@@ -474,6 +528,8 @@ brw_report_shader_time(struct brw_context *brw)
 
    fprintf(stderr, "\n");
    print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
+   print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
+   print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
    print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
    print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
    print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
@@ -491,16 +547,19 @@ brw_collect_shader_time(struct brw_context *brw)
     * overhead compared to the cost of tracking the time in the first place.
     */
    drm_intel_bo_map(brw->shader_time.bo, true);
-
-   uint32_t *times = brw->shader_time.bo->virtual;
+   void *bo_map = brw->shader_time.bo->virtual;
 
    for (int i = 0; i < brw->shader_time.num_entries; i++) {
-      brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
+      uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE;
+
+      brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4];
+      brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4];
+      brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4];
    }
 
    /* Zero the BO out to clear it out for our next collection.
     */
-   memset(times, 0, brw->shader_time.bo->size);
+   memset(bo_map, 0, brw->shader_time.bo->size);
    drm_intel_bo_unmap(brw->shader_time.bo);
 }
 
@@ -558,33 +617,6 @@ brw_destroy_shader_time(struct brw_context *brw)
    brw->shader_time.bo = NULL;
 }
 
-void
-brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
-                      unsigned surf_index)
-{
-   assert(surf_index < BRW_MAX_SURFACES);
-
-   prog_data->binding_table.size_bytes =
-      MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
-}
-
-bool
-brw_stage_prog_data_compare(const struct brw_stage_prog_data *a,
-                            const struct brw_stage_prog_data *b)
-{
-   /* Compare all the struct up to the pointers. */
-   if (memcmp(a, b, offsetof(struct brw_stage_prog_data, param)))
-      return false;
-
-   if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
-      return false;
-
-   if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *)))
-      return false;
-
-   return true;
-}
-
 void
 brw_stage_prog_data_free(const void *p)
 {
@@ -592,6 +624,7 @@ brw_stage_prog_data_free(const void *p)
 
    ralloc_free(prog_data->param);
    ralloc_free(prog_data->pull_param);
+   ralloc_free(prog_data->image_param);
 }
 
 void
@@ -599,13 +632,35 @@ brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
             struct gl_shader *shader, struct gl_program *prog)
 {
    if (shader_prog) {
-      fprintf(stderr,
-              "GLSL IR for native %s shader %d:\n", stage, shader_prog->Name);
-      _mesa_print_ir(stderr, shader->ir, NULL);
-      fprintf(stderr, "\n\n");
+      if (shader->ir) {
+         fprintf(stderr,
+                 "GLSL IR for native %s shader %d:\n",
+                 stage, shader_prog->Name);
+         _mesa_print_ir(stderr, shader->ir, NULL);
+         fprintf(stderr, "\n\n");
+      }
    } else {
       fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
               stage, prog->Id, stage);
       _mesa_print_program(prog);
    }
 }
+
+void
+brw_setup_tex_for_precompile(struct brw_context *brw,
+                             struct brw_sampler_prog_key_data *tex,
+                             struct gl_program *prog)
+{
+   const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+   unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
+   for (unsigned i = 0; i < sampler_count; i++) {
+      if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
+         /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+         tex->swizzles[i] =
+            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+      } else {
+         /* Color sampler: assume no swizzling. */
+         tex->swizzles[i] = SWIZZLE_XYZW;
+      }
+   }
+}