X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_vec4_gs_visitor.cpp;h=05cbab26620669494d23d212e0c586f55e27b51e;hb=e5f735a986576a7634dfb8fed23c969bf4260f45;hp=ca59927cd3ab75de88104f8a0f6aa1b500f76a7e;hpb=95c917668ca887432b8a7a299555c6c2ca449e04;p=mesa.git

diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index ca59927cd3a..05cbab26620 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -32,7 +32,7 @@
 #include "brw_cfg.h"
 #include "brw_fs.h"
 #include "brw_nir.h"
-#include "common/gen_debug.h"
+#include "dev/gen_debug.h"
 
 namespace brw {
 
@@ -44,7 +44,7 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
                                  void *mem_ctx,
                                  bool no_spills,
                                  int shader_time_index)
-   : vec4_visitor(compiler, log_data, &c->key.tex,
+   : vec4_visitor(compiler, log_data, &c->key.base.tex,
                   &prog_data->base, shader,  mem_ctx,
                   no_spills, shader_time_index),
      c(c),
@@ -610,15 +610,17 @@ static const GLuint gl_prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = {
    [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
 };
 
+} /* namespace brw */
+
 extern "C" const unsigned *
 brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
                void *mem_ctx,
                const struct brw_gs_prog_key *key,
                struct brw_gs_prog_data *prog_data,
-               const nir_shader *src_shader,
+               nir_shader *nir,
                struct gl_program *prog,
                int shader_time_index,
-               unsigned *final_assembly_size,
+               struct brw_compile_stats *stats,
                char **error_str)
 {
    struct brw_gs_compile c;
@@ -626,7 +628,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
    c.key = *key;
 
    const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY];
-   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
 
    /* The GLSL linker will have already matched up GS inputs and the outputs
     * of prior stages.  The driver does extend VS outputs in some cases, but
@@ -636,40 +637,40 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
     * For SSO pipelines, we use a fixed VUE map layout based on variable
     * locations, so we can rely on rendezvous-by-location making this work.
     */
-   GLbitfield64 inputs_read = shader->info.inputs_read;
+   GLbitfield64 inputs_read = nir->info.inputs_read;
    brw_compute_vue_map(compiler->devinfo,
                        &c.input_vue_map, inputs_read,
-                       shader->info.separate_shader);
+                       nir->info.separate_shader, 1);
 
-   shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar);
-   brw_nir_lower_vue_inputs(shader, &c.input_vue_map);
-   brw_nir_lower_vue_outputs(shader, is_scalar);
-   shader = brw_postprocess_nir(shader, compiler, is_scalar);
+   brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
+   brw_nir_lower_vue_inputs(nir, &c.input_vue_map);
+   brw_nir_lower_vue_outputs(nir);
+   brw_postprocess_nir(nir, compiler, is_scalar);
 
    prog_data->base.clip_distance_mask =
-      ((1 << shader->info.clip_distance_array_size) - 1);
+      ((1 << nir->info.clip_distance_array_size) - 1);
    prog_data->base.cull_distance_mask =
-      ((1 << shader->info.cull_distance_array_size) - 1) <<
-      shader->info.clip_distance_array_size;
+      ((1 << nir->info.cull_distance_array_size) - 1) <<
+      nir->info.clip_distance_array_size;
 
    prog_data->include_primitive_id =
-      (shader->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)) != 0;
+      (nir->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)) != 0;
 
-   prog_data->invocations = shader->info.gs.invocations;
+   prog_data->invocations = nir->info.gs.invocations;
 
    if (compiler->devinfo->gen >= 8)
-      prog_data->static_vertex_count = nir_gs_count_vertices(shader);
+      prog_data->static_vertex_count = nir_gs_count_vertices(nir);
 
    if (compiler->devinfo->gen >= 7) {
-      if (shader->info.gs.output_primitive == GL_POINTS) {
+      if (nir->info.gs.output_primitive == GL_POINTS) {
          /* When the output type is points, the geometry shader may output data
           * to multiple streams, and EndPrimitive() has no effect.  So we
           * configure the hardware to interpret the control data as stream ID.
           */
          prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
 
-         /* We only have to emit control bits if we are using streams */
-         if (prog && prog->info.gs.uses_streams)
+         /* We only have to emit control bits if we are using non-zero streams */
+         if (nir->info.gs.active_stream_mask != (1 << 0))
             c.control_data_bits_per_vertex = 2;
          else
             c.control_data_bits_per_vertex = 0;
@@ -686,14 +687,14 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
           * EndPrimitive().
           */
          c.control_data_bits_per_vertex =
-            shader->info.gs.uses_end_primitive ? 1 : 0;
+            nir->info.gs.uses_end_primitive ? 1 : 0;
       }
    } else {
       /* There are no control data bits in gen6. */
       c.control_data_bits_per_vertex = 0;
    }
    c.control_data_header_size_bits =
-      shader->info.gs.vertices_out * c.control_data_bits_per_vertex;
+      nir->info.gs.vertices_out * c.control_data_bits_per_vertex;
 
    /* 1 HWORD = 32 bytes = 256 bits */
    prog_data->control_data_header_size_hwords =
@@ -788,7 +789,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
    unsigned output_size_bytes;
    if (compiler->devinfo->gen >= 7) {
       output_size_bytes =
-         prog_data->output_vertex_size_hwords * 32 * shader->info.gs.vertices_out;
+         prog_data->output_vertex_size_hwords * 32 * nir->info.gs.vertices_out;
       output_size_bytes += 32 * prog_data->control_data_header_size_hwords;
    } else {
       output_size_bytes = prog_data->output_vertex_size_hwords * 32;
@@ -829,11 +830,11 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
       prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
    }
 
-   assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim));
+   assert(nir->info.gs.output_primitive < ARRAY_SIZE(brw::gl_prim_to_hw_prim));
    prog_data->output_topology =
-      gl_prim_to_hw_prim[shader->info.gs.output_primitive];
+      brw::gl_prim_to_hw_prim[nir->info.gs.output_primitive];
 
-   prog_data->vertices_in = shader->info.gs.vertices_in;
+   prog_data->vertices_in = nir->info.gs.vertices_in;
 
    /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
     * need to program a URB read length of ceiling(num_slots / 2).
@@ -851,24 +852,25 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
    }
 
    if (is_scalar) {
-      fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, shader,
+      fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, nir,
                    shader_time_index);
       if (v.run_gs()) {
          prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
          prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
 
-         fs_generator g(compiler, log_data, mem_ctx, &c.key,
-                        &prog_data->base.base, v.promoted_constants,
-                        false, MESA_SHADER_GEOMETRY);
+         fs_generator g(compiler, log_data, mem_ctx,
+                        &prog_data->base.base, false, MESA_SHADER_GEOMETRY);
          if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
             const char *label =
-               shader->info.label ? shader->info.label : "unnamed";
+               nir->info.label ? nir->info.label : "unnamed";
             char *name = ralloc_asprintf(mem_ctx, "%s geometry shader %s",
-                                         label, shader->info.name);
+                                         label, nir->info.name);
             g.enable_debug(name);
          }
-         g.generate_code(v.cfg, 8);
-         return g.get_assembly(final_assembly_size);
+         g.generate_code(v.cfg, 8, v.shader_stats,
+                         v.performance_analysis.require(), stats);
+         g.add_const_data(nir->constant_data, nir->constant_data_size);
+         return g.get_assembly();
       }
    }
 
@@ -881,7 +883,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
           likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
          prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
 
-         vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
+         brw::vec4_gs_visitor v(compiler, log_data, &c, prog_data, nir,
                            mem_ctx, true /* no_spills */, shader_time_index);
 
          /* Backup 'nr_params' and 'param' as they can be modified by the
@@ -890,17 +892,18 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
           * values.
           */
          const unsigned param_count = prog_data->base.base.nr_params;
-         gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
-                                                  param_count);
+         uint32_t *param = ralloc_array(NULL, uint32_t, param_count);
          memcpy(param, prog_data->base.base.param,
-                sizeof(gl_constant_value*) * param_count);
+                sizeof(uint32_t) * param_count);
 
          if (v.run()) {
             /* Success! Backup is not needed */
             ralloc_free(param);
             return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
-                                              shader, &prog_data->base, v.cfg,
-                                              final_assembly_size);
+                                              nir, &prog_data->base,
+                                              v.cfg,
+                                              v.performance_analysis.require(),
+                                              stats);
          } else {
             /* These variables could be modified by the execution of the GS
              * visitor if it packed the uniforms in the push constant buffer.
@@ -910,8 +913,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
              * FIXME: Could more variables be modified by this execution?
              */
             memcpy(prog_data->base.base.param, param,
-                   sizeof(gl_constant_value*) * param_count);
+                   sizeof(uint32_t) * param_count);
             prog_data->base.base.nr_params = param_count;
+            prog_data->base.base.nr_pull_params = 0;
             ralloc_free(param);
          }
       }
@@ -945,30 +949,28 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
    else
       prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
 
-   vec4_gs_visitor *gs = NULL;
+   brw::vec4_gs_visitor *gs = NULL;
    const unsigned *ret = NULL;
 
    if (compiler->devinfo->gen >= 7)
-      gs = new vec4_gs_visitor(compiler, log_data, &c, prog_data,
-                               shader, mem_ctx, false /* no_spills */,
+      gs = new brw::vec4_gs_visitor(compiler, log_data, &c, prog_data,
+                               nir, mem_ctx, false /* no_spills */,
                                shader_time_index);
    else
-      gs = new gen6_gs_visitor(compiler, log_data, &c, prog_data, prog,
-                               shader, mem_ctx, false /* no_spills */,
+      gs = new brw::gen6_gs_visitor(compiler, log_data, &c, prog_data, prog,
+                               nir, mem_ctx, false /* no_spills */,
                                shader_time_index);
 
    if (!gs->run()) {
       if (error_str)
          *error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
    } else {
-      ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader,
+      ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
                                        &prog_data->base, gs->cfg,
-                                       final_assembly_size);
+                                       gs->performance_analysis.require(),
+                                       stats);
    }
 
    delete gs;
    return ret;
 }
-
-
-} /* namespace brw */