i965: Allow creating planar YUV __DRIimages

[mesa.git] / src / mesa / drivers / dri / i965 / brw_vec4_gs_visitor.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp

index 1a09f76a20cccfc03e5b7d70af31f58805d8f4aa..76a80a54f44113c9067c5ae3cadb8e693af6c60e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -30,6 +30,7 @@
  #include "brw_vec4_gs_visitor.h"
  #include "gen6_gs_visitor.h"
  #include "brw_fs.h"
+#include "brw_nir.h"
  
  namespace brw {
  
@@ -51,10 +52,9 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
  
  
  dst_reg *
-vec4_gs_visitor::make_reg_for_system_value(int location,
-                                           const glsl_type *type)
+vec4_gs_visitor::make_reg_for_system_value(int location)
  {
-   dst_reg *reg = new(mem_ctx) dst_reg(this, type);
+   dst_reg *reg = new(mem_ctx) dst_reg(this, glsl_type::int_type);
  
     switch (location) {
     case SYSTEM_VALUE_INVOCATION_ID:
@@ -153,7 +153,7 @@ vec4_gs_visitor::emit_prolog()
      */
     this->current_annotation = "clear r0.2";
     dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
-   vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, 0u);
+   vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u));
     inst->force_writemask_all = true;
  
     /* Create a virtual register to hold the vertex count */
@@ -161,7 +161,7 @@ vec4_gs_visitor::emit_prolog()
  
     /* Initialize the vertex_count register to 0 */
     this->current_annotation = "initialize vertex_count";
-   inst = emit(MOV(dst_reg(this->vertex_count), 0u));
+   inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u)));
     inst->force_writemask_all = true;
  
     if (c->control_data_header_size_bits > 0) {
@@ -176,30 +176,7 @@ vec4_gs_visitor::emit_prolog()
         */
        if (c->control_data_header_size_bits <= 32) {
           this->current_annotation = "initialize control data bits";
-         inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
-         inst->force_writemask_all = true;
-      }
-   }
-
-   /* If the geometry shader uses the gl_PointSize input, we need to fix it up
-    * to account for the fact that the vertex shader stored it in the w
-    * component of VARYING_SLOT_PSIZ.
-    */
-   if (nir->info.inputs_read & VARYING_BIT_PSIZ) {
-      this->current_annotation = "swizzle gl_PointSize input";
-      for (int vertex = 0; vertex < (int)nir->info.gs.vertices_in; vertex++) {
-         dst_reg dst(ATTR,
-                     BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ);
-         dst.type = BRW_REGISTER_TYPE_F;
-         src_reg src(dst);
-         dst.writemask = WRITEMASK_X;
-         src.swizzle = BRW_SWIZZLE_WWWW;
-         inst = emit(MOV(dst, src));
-
-         /* In dual instanced dispatch mode, dst has a width of 4, so we need
-          * to make sure the MOV happens regardless of which channels are
-          * enabled.
-          */
+         inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
           inst->force_writemask_all = true;
        }
     }
@@ -274,7 +251,7 @@ vec4_gs_visitor::emit_urb_write_header(int mrf)
     vec4_instruction *inst = emit(MOV(mrf_reg, r0));
     inst->force_writemask_all = true;
     emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
-        (uint32_t) gs_prog_data->output_vertex_size_hwords);
+        brw_imm_ud(gs_prog_data->output_vertex_size_hwords));
  }
  
  
@@ -354,11 +331,12 @@ vec4_gs_visitor::emit_control_data_bits()
     src_reg dword_index(this, glsl_type::uint_type);
     if (urb_write_flags) {
        src_reg prev_count(this, glsl_type::uint_type);
-      emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+      emit(ADD(dst_reg(prev_count), this->vertex_count,
+               brw_imm_ud(0xffffffffu)));
        unsigned log2_bits_per_vertex =
           _mesa_fls(c->control_data_bits_per_vertex);
        emit(SHR(dst_reg(dword_index), prev_count,
-               (uint32_t) (6 - log2_bits_per_vertex)));
+               brw_imm_ud(6 - log2_bits_per_vertex)));
     }
  
     /* Start building the URB write message.  The first MRF gets a copy of
@@ -375,8 +353,9 @@ vec4_gs_visitor::emit_control_data_bits()
         * the appropriate OWORD within the control data header.
         */
        src_reg per_slot_offset(this, glsl_type::uint_type);
-      emit(SHR(dst_reg(per_slot_offset), dword_index, 2u));
-      emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
+      emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u)));
+      emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset,
+           brw_imm_ud(1u));
     }
  
     if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
@@ -388,10 +367,10 @@ vec4_gs_visitor::emit_control_data_bits()
         * together.
         */
        src_reg channel(this, glsl_type::uint_type);
-      inst = emit(AND(dst_reg(channel), dword_index, 3u));
+      inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u)));
        inst->force_writemask_all = true;
        src_reg one(this, glsl_type::uint_type);
-      inst = emit(MOV(dst_reg(one), 1u));
+      inst = emit(MOV(dst_reg(one), brw_imm_ud(1u)));
        inst->force_writemask_all = true;
        src_reg channel_mask(this, glsl_type::uint_type);
        inst = emit(SHL(dst_reg(channel_mask), one, channel));
@@ -441,11 +420,11 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id)
  
     /* reg::sid = stream_id */
     src_reg sid(this, glsl_type::uint_type);
-   emit(MOV(dst_reg(sid), stream_id));
+   emit(MOV(dst_reg(sid), brw_imm_ud(stream_id)));
  
     /* reg:shift_count = 2 * (vertex_count - 1) */
     src_reg shift_count(this, glsl_type::uint_type);
-   emit(SHL(dst_reg(shift_count), this->vertex_count, 1u));
+   emit(SHL(dst_reg(shift_count), this->vertex_count, brw_imm_ud(1u)));
  
     /* Note: we're relying on the fact that the GEN SHL instruction only pays
      * attention to the lower 5 bits of its second source argument, so on this
@@ -503,8 +482,8 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
         *     vertex_count & (32 / bits_per_vertex - 1) == 0
         */
        vec4_instruction *inst =
-         emit(AND(dst_null_d(), this->vertex_count,
-                  (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
+         emit(AND(dst_null_ud(), this->vertex_count,
+                  brw_imm_ud(32 / c->control_data_bits_per_vertex - 1)));
        inst->conditional_mod = BRW_CONDITIONAL_Z;
  
        emit(IF(BRW_PREDICATE_NORMAL));
@@ -512,7 +491,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
           /* If vertex_count is 0, then no control data bits have been
            * accumulated yet, so we skip emitting them.
            */
-         emit(CMP(dst_null_d(), this->vertex_count, 0u,
+         emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u),
                    BRW_CONDITIONAL_NEQ));
           emit(IF(BRW_PREDICATE_NORMAL));
           emit_control_data_bits();
@@ -525,7 +504,7 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id)
            * effect of any call to EndPrimitive() that the shader may have
            * made before outputting its first vertex.
            */
-         inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+         inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
           inst->force_writemask_all = true;
        }
        emit(BRW_OPCODE_ENDIF);
@@ -586,9 +565,9 @@ vec4_gs_visitor::gs_end_primitive()
  
     /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
     src_reg one(this, glsl_type::uint_type);
-   emit(MOV(dst_reg(one), 1u));
+   emit(MOV(dst_reg(one), brw_imm_ud(1u)));
     src_reg prev_count(this, glsl_type::uint_type);
-   emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+   emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu)));
     src_reg mask(this, glsl_type::uint_type);
     /* Note: we're relying on the fact that the GEN SHL instruction only pays
      * attention to the lower 5 bits of its second source argument, so on this
@@ -604,7 +583,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
                 void *mem_ctx,
                 const struct brw_gs_prog_key *key,
                 struct brw_gs_prog_data *prog_data,
-               const nir_shader *shader,
+               const nir_shader *src_shader,
                 struct gl_shader_program *shader_prog,
                 int shader_time_index,
                 unsigned *final_assembly_size,
@@ -614,6 +593,32 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
     memset(&c, 0, sizeof(c));
     c.key = *key;
  
+   const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY];
+   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+
+   /* The GLSL linker will have already matched up GS inputs and the outputs
+    * of prior stages.  The driver does extend VS outputs in some cases, but
+    * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+    * geometry shader support.  So we can safely ignore that.
+    *
+    * For SSO pipelines, we use a fixed VUE map layout based on variable
+    * locations, so we can rely on rendezvous-by-location making this work.
+    *
+    * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+    * written by previous stages and shows up via payload magic.
+    */
+   GLbitfield64 inputs_read =
+      shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
+   brw_compute_vue_map(compiler->devinfo,
+                       &c.input_vue_map, inputs_read,
+                       shader->info.separate_shader);
+
+   shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+                                      is_scalar);
+   brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map);
+   brw_nir_lower_vue_outputs(shader, is_scalar);
+   shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar);
+
     prog_data->include_primitive_id =
        (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0;
  
@@ -773,7 +778,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
     if (compiler->devinfo->gen == 6)
        max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
     if (output_size_bytes > max_output_size_bytes)
-      return false;
+      return NULL;
  
  
     /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
@@ -787,22 +792,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
     prog_data->output_topology =
        get_hw_prim_for_gl_prim(shader->info.gs.output_primitive);
  
-   /* The GLSL linker will have already matched up GS inputs and the outputs
-    * of prior stages.  The driver does extend VS outputs in some cases, but
-    * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
-    * geometry shader support.  So we can safely ignore that.
-    *
-    * For SSO pipelines, we use a fixed VUE map layout based on variable
-    * locations, so we can rely on rendezvous-by-location making this work.
-    *
-    * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
-    * written by previous stages and shows up via payload magic.
-    */
-   GLbitfield64 inputs_read =
-      shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
-   brw_compute_vue_map(compiler->devinfo,
-                       &c.input_vue_map, inputs_read,
-                       shader->info.separate_shader);
+   prog_data->vertices_in = shader->info.gs.vertices_in;
  
     /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
      * need to program a URB read length of ceiling(num_slots / 2).
@@ -819,18 +809,16 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
        brw_print_vue_map(stderr, &prog_data->base.vue_map);
     }
  
-   if (compiler->scalar_gs) {
-      /* TODO: Support instanced GS.  We have basically no tests... */
-      assert(prog_data->invocations == 1);
-
+   if (is_scalar) {
        fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, shader,
                     shader_time_index);
        if (v.run_gs()) {
           prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
+         prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
  
           fs_generator g(compiler, log_data, mem_ctx, &c.key,
                          &prog_data->base.base, v.promoted_constants,
-                        false, "GS");
+                        false, MESA_SHADER_GEOMETRY);
           if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
              const char *label =
                 shader->info.label ? shader->info.label : "unnamed";