i965: call brw_nir_lower_uniforms() after uniform linking is complete
[mesa.git] / src / mesa / drivers / dri / i965 / brw_program.c
index 1038d9a47a0a7d2de0ed3c43dd3d7311f270c358..6bb6f84182d8cadff57cb4bf086f6f9cef7d322b 100644 (file)
@@ -30,7 +30,6 @@
   */
 
 #include <pthread.h>
-#include "main/imports.h"
 #include "main/glspirv.h"
 #include "program/prog_parameter.h"
 #include "program/prog_print.h"
@@ -41,8 +40,8 @@
 #include "util/ralloc.h"
 #include "compiler/glsl/ir.h"
 #include "compiler/glsl/program.h"
+#include "compiler/glsl/gl_nir.h"
 #include "compiler/glsl/glsl_to_nir.h"
-#include "glsl/float64_glsl.h"
 
 #include "brw_program.h"
 #include "brw_context.h"
@@ -54,6 +53,7 @@
 #include "brw_gs.h"
 #include "brw_vs.h"
 #include "brw_wm.h"
+#include "brw_state.h"
 
 #include "main/shaderapi.h"
 #include "main/shaderobj.h"
@@ -72,54 +72,10 @@ brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
    }
 }
 
-static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
+static struct gl_program *brwNewProgram(struct gl_context *ctx,
+                                        gl_shader_stage stage,
                                         GLuint id, bool is_arb_asm);
 
-static nir_shader *
-compile_fp64_funcs(struct gl_context *ctx,
-                   const nir_shader_compiler_options *options,
-                   void *mem_ctx,
-                   gl_shader_stage stage)
-{
-   const GLuint name = ~0;
-   struct gl_shader *sh;
-
-   sh = _mesa_new_shader(name, stage);
-
-   sh->Source = float64_source;
-   sh->CompileStatus = COMPILE_FAILURE;
-   _mesa_glsl_compile_shader(ctx, sh, false, false, true);
-
-   if (!sh->CompileStatus) {
-      if (sh->InfoLog) {
-         _mesa_problem(ctx,
-                       "fp64 software impl compile failed:\n%s\nsource:\n%s\n",
-                       sh->InfoLog, float64_source);
-      }
-   }
-
-   struct gl_shader_program *sh_prog;
-   sh_prog = _mesa_new_shader_program(name);
-   sh_prog->Label = NULL;
-   sh_prog->NumShaders = 1;
-   sh_prog->Shaders = malloc(sizeof(struct gl_shader *));
-   sh_prog->Shaders[0] = sh;
-
-   struct gl_linked_shader *linked = rzalloc(NULL, struct gl_linked_shader);
-   linked->Stage = stage;
-   linked->Program =
-      brwNewProgram(ctx,
-                    _mesa_shader_stage_to_program(stage),
-                    name, false);
-
-   linked->ir = sh->ir;
-   sh_prog->_LinkedShaders[stage] = linked;
-
-   nir_shader *nir = glsl_to_nir(sh_prog, stage, options);
-
-   return nir_shader_clone(mem_ctx, nir);
-}
-
 nir_shader *
 brw_create_nir(struct brw_context *brw,
                const struct gl_shader_program *shader_prog,
@@ -138,14 +94,21 @@ brw_create_nir(struct brw_context *brw,
       if (shader_prog->data->spirv) {
          nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
       } else {
-         nir = glsl_to_nir(shader_prog, stage, options);
+         nir = glsl_to_nir(ctx, shader_prog, stage, options);
+
+         /* Remap the locations to slots so those requiring two slots will
+          * occupy two locations. For instance, if we have in the IR code a
+          * dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0
+          * will use locations/slots 0 and 1, and attr1 will use location/slot 2
+          */
+         if (nir->info.stage == MESA_SHADER_VERTEX)
+            nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs);
       }
       assert (nir);
 
-      nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
-      nir_lower_returns(nir);
-      nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir and "
-                               "return lowering");
+      nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out,
+                                NULL);
+      nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir");
       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
                  nir_shader_get_entrypoint(nir), true, false);
    } else {
@@ -156,16 +119,12 @@ brw_create_nir(struct brw_context *brw,
 
    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 
-   if (!devinfo->has_64bit_types && nir->info.uses_64bit) {
-      nir_shader *fp64 = compile_fp64_funcs(ctx, options, ralloc_parent(nir), stage);
-
-      nir_validate_shader(fp64, "fp64");
-      exec_list_append(&nir->functions, &fp64->functions);
+   if (!ctx->SoftFP64 && nir->info.uses_64bit &&
+       (options->lower_doubles_options & nir_lower_fp64_full_software)) {
+      ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options);
    }
 
-   nir = brw_preprocess_nir(brw->screen->compiler, nir);
-
-   NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
+   brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64);
 
    if (stage == MESA_SHADER_TESS_CTRL) {
       /* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gen8+. */
@@ -202,11 +161,46 @@ brw_create_nir(struct brw_context *brw,
       }
    }
 
-   NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
-
    return nir;
 }
 
+static void
+shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+   assert(glsl_type_is_vector_or_scalar(type));
+
+   uint32_t comp_size = glsl_type_is_boolean(type)
+      ? 4 : glsl_get_bit_size(type) / 8;
+   unsigned length = glsl_get_vector_elements(type);
+   *size = comp_size * length,
+   *align = comp_size * (length == 3 ? 4 : length);
+}
+
+void
+brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog,
+                        struct gl_program *prog,
+                        const struct gen_device_info *devinfo)
+{
+   NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar);
+   NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog);
+   prog->info.textures_used = prog->nir->info.textures_used;
+   prog->info.textures_used_by_txf = prog->nir->info.textures_used_by_txf;
+
+   NIR_PASS_V(prog->nir, brw_nir_lower_image_load_store, devinfo, NULL);
+
+   if (prog->nir->info.stage == MESA_SHADER_COMPUTE &&
+       shader_prog->data->spirv) {
+      NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types,
+                 nir_var_mem_shared, shared_type_info);
+      NIR_PASS_V(prog->nir, nir_lower_explicit_io,
+                 nir_var_mem_shared, nir_address_format_32bit_offset);
+   }
+
+   NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog);
+   /* Do a round of constant folding to clean up address calculations */
+   NIR_PASS_V(prog->nir, nir_opt_constant_folding);
+}
+
 void
 brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
 {
@@ -226,7 +220,8 @@ get_new_program_id(struct intel_screen *screen)
    return p_atomic_inc_return(&screen->program_id);
 }
 
-static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
+static struct gl_program *brwNewProgram(struct gl_context *ctx,
+                                        gl_shader_stage stage,
                                         GLuint id, bool is_arb_asm)
 {
    struct brw_context *brw = brw_context(ctx);
@@ -235,7 +230,7 @@ static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
    if (prog) {
       prog->id = get_new_program_id(brw->screen);
 
-      return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
+      return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm);
    }
 
    return NULL;
@@ -296,10 +291,13 @@ brwProgramStringNotify(struct gl_context *ctx,
 
       if (newFP == curFP)
         brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
+      _mesa_program_fragment_position_to_sysval(&newFP->program);
       newFP->id = get_new_program_id(brw->screen);
 
       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 
+      brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
+
       brw_shader_gather_info(prog->nir, prog);
 
       brw_fs_precompile(ctx, prog);
@@ -324,6 +322,8 @@ brwProgramStringNotify(struct gl_context *ctx,
       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
 
+      brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
+
       brw_shader_gather_info(prog->nir, prog);
 
       brw_vs_precompile(ctx, prog);
@@ -468,12 +468,29 @@ brw_alloc_stage_scratch(struct brw_context *brw,
        * brw->screen->subslice_total is the TOTAL number of subslices
        * and we wish to view that there are 4 subslices per slice
        * instead of the actual number of subslices per slice.
+       *
+       * For, ICL, scratch space allocation is based on the number of threads
+       * in the base configuration.
        */
-      if (devinfo->gen >= 9 && devinfo->gen < 11)
+      if (devinfo->gen == 11)
+         subslices = 8;
+      else if (devinfo->gen >= 9 && devinfo->gen < 11)
          subslices = 4 * brw->screen->devinfo.num_slices;
 
       unsigned scratch_ids_per_subslice;
-      if (devinfo->is_haswell) {
+      if (devinfo->gen >= 11) {
+         /* The MEDIA_VFE_STATE docs say:
+          *
+          *    "Starting with this configuration, the Maximum Number of
+          *     Threads must be set to (#EU * 8) for GPGPU dispatches.
+          *
+          *     Although there are only 7 threads per EU in the configuration,
+          *     the FFTID is calculated as if there are 8 threads per EU,
+          *     which in turn requires a larger amount of Scratch Space to be
+          *     allocated by the driver."
+          */
+         scratch_ids_per_subslice = 8 * 8;
+      } else if (devinfo->is_haswell) {
          /* WaCSScratchSize:hsw
           *
           * Haswell's scratch space address calculation appears to be sparse
@@ -806,7 +823,7 @@ brw_dump_arb_asm(const char *stage, struct gl_program *prog)
 void
 brw_setup_tex_for_precompile(const struct gen_device_info *devinfo,
                              struct brw_sampler_prog_key_data *tex,
-                             struct gl_program *prog)
+                             const struct gl_program *prog)
 {
    const bool has_shader_channel_select = devinfo->is_haswell || devinfo->gen >= 8;
    unsigned sampler_count = util_last_bit(prog->SamplersUsed);
@@ -907,47 +924,49 @@ brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
 }
 
 void
-brw_prog_key_set_id(union brw_any_prog_key *key, gl_shader_stage stage,
-                    unsigned id)
-{
-   static const unsigned stage_offsets[] = {
-      offsetof(struct brw_vs_prog_key, program_string_id),
-      offsetof(struct brw_tcs_prog_key, program_string_id),
-      offsetof(struct brw_tes_prog_key, program_string_id),
-      offsetof(struct brw_gs_prog_key, program_string_id),
-      offsetof(struct brw_wm_prog_key, program_string_id),
-      offsetof(struct brw_cs_prog_key, program_string_id),
-   };
-   assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_offsets));
-   *(unsigned*)((uint8_t*)key + stage_offsets[stage]) = id;
-}
-
-void
-brw_populate_default_key(const struct gen_device_info *devinfo,
+brw_populate_default_key(const struct brw_compiler *compiler,
                          union brw_any_prog_key *prog_key,
                          struct gl_shader_program *sh_prog,
                          struct gl_program *prog)
 {
    switch (prog->info.stage) {
    case MESA_SHADER_VERTEX:
-      brw_vs_populate_default_key(devinfo, &prog_key->vs, prog);
+      brw_vs_populate_default_key(compiler, &prog_key->vs, prog);
       break;
    case MESA_SHADER_TESS_CTRL:
-      brw_tcs_populate_default_key(devinfo, &prog_key->tcs, sh_prog, prog);
+      brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog);
       break;
    case MESA_SHADER_TESS_EVAL:
-      brw_tes_populate_default_key(devinfo, &prog_key->tes, sh_prog, prog);
+      brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog);
       break;
    case MESA_SHADER_GEOMETRY:
-      brw_gs_populate_default_key(devinfo, &prog_key->gs, prog);
+      brw_gs_populate_default_key(compiler, &prog_key->gs, prog);
       break;
    case MESA_SHADER_FRAGMENT:
-      brw_wm_populate_default_key(devinfo, &prog_key->wm, prog);
+      brw_wm_populate_default_key(compiler, &prog_key->wm, prog);
       break;
    case MESA_SHADER_COMPUTE:
-      brw_cs_populate_default_key(devinfo, &prog_key->cs, prog);
+      brw_cs_populate_default_key(compiler, &prog_key->cs, prog);
       break;
    default:
       unreachable("Unsupported stage!");
    }
 }
+
+void
+brw_debug_recompile(struct brw_context *brw,
+                    gl_shader_stage stage,
+                    unsigned api_id,
+                    struct brw_base_prog_key *key)
+{
+   const struct brw_compiler *compiler = brw->screen->compiler;
+   enum brw_cache_id cache_id = brw_stage_cache_id(stage);
+
+   compiler->shader_perf_log(brw, "Recompiling %s shader for program %d\n",
+                             _mesa_shader_stage_to_string(stage), api_id);
+
+   const void *old_key =
+      brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id);
+
+   brw_debug_key_recompile(compiler, brw, stage, old_key, key);
+}