#include "brw_vec4_gs_visitor.h"
#include "gen6_gs_visitor.h"
#include "brw_fs.h"
+#include "brw_nir.h"
namespace brw {
dst_reg *
-vec4_gs_visitor::make_reg_for_system_value(int location,
- const glsl_type *type)
+vec4_gs_visitor::make_reg_for_system_value(int location)
{
- dst_reg *reg = new(mem_ctx) dst_reg(this, type);
+ dst_reg *reg = new(mem_ctx) dst_reg(this, glsl_type::int_type);
switch (location) {
case SYSTEM_VALUE_INVOCATION_ID:
*/
this->current_annotation = "clear r0.2";
dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD));
- vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, 0u);
+ vec4_instruction *inst = emit(GS_OPCODE_SET_DWORD_2, r0, brw_imm_ud(0u));
inst->force_writemask_all = true;
/* Create a virtual register to hold the vertex count */
/* Initialize the vertex_count register to 0 */
this->current_annotation = "initialize vertex_count";
- inst = emit(MOV(dst_reg(this->vertex_count), 0u));
+ inst = emit(MOV(dst_reg(this->vertex_count), brw_imm_ud(0u)));
inst->force_writemask_all = true;
if (c->control_data_header_size_bits > 0) {
*/
if (c->control_data_header_size_bits <= 32) {
this->current_annotation = "initialize control data bits";
- inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
- inst->force_writemask_all = true;
- }
- }
-
- /* If the geometry shader uses the gl_PointSize input, we need to fix it up
- * to account for the fact that the vertex shader stored it in the w
- * component of VARYING_SLOT_PSIZ.
- */
- if (nir->info.inputs_read & VARYING_BIT_PSIZ) {
- this->current_annotation = "swizzle gl_PointSize input";
- for (int vertex = 0; vertex < (int)nir->info.gs.vertices_in; vertex++) {
- dst_reg dst(ATTR,
- BRW_VARYING_SLOT_COUNT * vertex + VARYING_SLOT_PSIZ);
- dst.type = BRW_REGISTER_TYPE_F;
- src_reg src(dst);
- dst.writemask = WRITEMASK_X;
- src.swizzle = BRW_SWIZZLE_WWWW;
- inst = emit(MOV(dst, src));
-
- /* In dual instanced dispatch mode, dst has a width of 4, so we need
- * to make sure the MOV happens regardless of which channels are
- * enabled.
- */
+ inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
inst->force_writemask_all = true;
}
}
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
inst->force_writemask_all = true;
emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
- (uint32_t) gs_prog_data->output_vertex_size_hwords);
+ brw_imm_ud(gs_prog_data->output_vertex_size_hwords));
}
src_reg dword_index(this, glsl_type::uint_type);
if (urb_write_flags) {
src_reg prev_count(this, glsl_type::uint_type);
- emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+ emit(ADD(dst_reg(prev_count), this->vertex_count,
+ brw_imm_ud(0xffffffffu)));
unsigned log2_bits_per_vertex =
_mesa_fls(c->control_data_bits_per_vertex);
emit(SHR(dst_reg(dword_index), prev_count,
- (uint32_t) (6 - log2_bits_per_vertex)));
+ brw_imm_ud(6 - log2_bits_per_vertex)));
}
/* Start building the URB write message. The first MRF gets a copy of
* the appropriate OWORD within the control data header.
*/
src_reg per_slot_offset(this, glsl_type::uint_type);
- emit(SHR(dst_reg(per_slot_offset), dword_index, 2u));
- emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u);
+ emit(SHR(dst_reg(per_slot_offset), dword_index, brw_imm_ud(2u)));
+ emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset,
+ brw_imm_ud(1u));
}
if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) {
* together.
*/
src_reg channel(this, glsl_type::uint_type);
- inst = emit(AND(dst_reg(channel), dword_index, 3u));
+ inst = emit(AND(dst_reg(channel), dword_index, brw_imm_ud(3u)));
inst->force_writemask_all = true;
src_reg one(this, glsl_type::uint_type);
- inst = emit(MOV(dst_reg(one), 1u));
+ inst = emit(MOV(dst_reg(one), brw_imm_ud(1u)));
inst->force_writemask_all = true;
src_reg channel_mask(this, glsl_type::uint_type);
inst = emit(SHL(dst_reg(channel_mask), one, channel));
/* reg::sid = stream_id */
src_reg sid(this, glsl_type::uint_type);
- emit(MOV(dst_reg(sid), stream_id));
+ emit(MOV(dst_reg(sid), brw_imm_ud(stream_id)));
/* reg:shift_count = 2 * (vertex_count - 1) */
src_reg shift_count(this, glsl_type::uint_type);
- emit(SHL(dst_reg(shift_count), this->vertex_count, 1u));
+ emit(SHL(dst_reg(shift_count), this->vertex_count, brw_imm_ud(1u)));
/* Note: we're relying on the fact that the GEN SHL instruction only pays
* attention to the lower 5 bits of its second source argument, so on this
* vertex_count & (32 / bits_per_vertex - 1) == 0
*/
vec4_instruction *inst =
- emit(AND(dst_null_d(), this->vertex_count,
- (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
+ emit(AND(dst_null_ud(), this->vertex_count,
+ brw_imm_ud(32 / c->control_data_bits_per_vertex - 1)));
inst->conditional_mod = BRW_CONDITIONAL_Z;
emit(IF(BRW_PREDICATE_NORMAL));
/* If vertex_count is 0, then no control data bits have been
* accumulated yet, so we skip emitting them.
*/
- emit(CMP(dst_null_d(), this->vertex_count, 0u,
+ emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u),
BRW_CONDITIONAL_NEQ));
emit(IF(BRW_PREDICATE_NORMAL));
emit_control_data_bits();
* effect of any call to EndPrimitive() that the shader may have
* made before outputting its first vertex.
*/
- inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+ inst = emit(MOV(dst_reg(this->control_data_bits), brw_imm_ud(0u)));
inst->force_writemask_all = true;
}
emit(BRW_OPCODE_ENDIF);
/* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
src_reg one(this, glsl_type::uint_type);
- emit(MOV(dst_reg(one), 1u));
+ emit(MOV(dst_reg(one), brw_imm_ud(1u)));
src_reg prev_count(this, glsl_type::uint_type);
- emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
+ emit(ADD(dst_reg(prev_count), this->vertex_count, brw_imm_ud(0xffffffffu)));
src_reg mask(this, glsl_type::uint_type);
/* Note: we're relying on the fact that the GEN SHL instruction only pays
* attention to the lower 5 bits of its second source argument, so on this
void *mem_ctx,
const struct brw_gs_prog_key *key,
struct brw_gs_prog_data *prog_data,
- const nir_shader *shader,
+ const nir_shader *src_shader,
struct gl_shader_program *shader_prog,
int shader_time_index,
unsigned *final_assembly_size,
memset(&c, 0, sizeof(c));
c.key = *key;
+ const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY];
+ nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+
+ /* The GLSL linker will have already matched up GS inputs and the outputs
+ * of prior stages. The driver does extend VS outputs in some cases, but
+ * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
+ * geometry shader support. So we can safely ignore that.
+ *
+ * For SSO pipelines, we use a fixed VUE map layout based on variable
+ * locations, so we can rely on rendezvous-by-location making this work.
+ *
+ * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+ * written by previous stages and shows up via payload magic.
+ */
+ GLbitfield64 inputs_read =
+ shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
+ brw_compute_vue_map(compiler->devinfo,
+ &c.input_vue_map, inputs_read,
+ shader->info.separate_shader);
+
+ shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+ is_scalar);
+ brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map);
+ brw_nir_lower_vue_outputs(shader, is_scalar);
+ shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar);
+
prog_data->include_primitive_id =
(shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0;
if (compiler->devinfo->gen == 6)
max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES;
if (output_size_bytes > max_output_size_bytes)
- return false;
+ return NULL;
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
prog_data->output_topology =
get_hw_prim_for_gl_prim(shader->info.gs.output_primitive);
- /* The GLSL linker will have already matched up GS inputs and the outputs
- * of prior stages. The driver does extend VS outputs in some cases, but
- * only for legacy OpenGL or Gen4-5 hardware, neither of which offer
- * geometry shader support. So we can safely ignore that.
- *
- * For SSO pipelines, we use a fixed VUE map layout based on variable
- * locations, so we can rely on rendezvous-by-location making this work.
- *
- * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
- * written by previous stages and shows up via payload magic.
- */
- GLbitfield64 inputs_read =
- shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
- brw_compute_vue_map(compiler->devinfo,
- &c.input_vue_map, inputs_read,
- shader->info.separate_shader);
+ prog_data->vertices_in = shader->info.gs.vertices_in;
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
* need to program a URB read length of ceiling(num_slots / 2).
brw_print_vue_map(stderr, &prog_data->base.vue_map);
}
- if (compiler->scalar_gs) {
- /* TODO: Support instanced GS. We have basically no tests... */
- assert(prog_data->invocations == 1);
-
+ if (is_scalar) {
fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, shader,
shader_time_index);
if (v.run_gs()) {
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
+ prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
fs_generator g(compiler, log_data, mem_ctx, &c.key,
&prog_data->base.base, v.promoted_constants,
- false, "GS");
+ false, MESA_SHADER_GEOMETRY);
if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
const char *label =
shader->info.label ? shader->info.label : "unnamed";