X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vec4_gs_visitor.cpp;h=704644e7429c4b29a7558f7ea155d88131412aa8;hb=b38fcd0aea8d17919ecd9cc7afc518cfb2c01c27;hp=eacb2f5be7b38e0bac0ee1819420f5f4d08adc42;hpb=0f8ec779ddff4126837a7d4216ecf1d4b97e93d2;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index eacb2f5be7b..704644e7429 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -34,26 +34,29 @@ const unsigned MAX_GS_INPUT_VERTICES = 6; namespace brw { -vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw, +vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, + void *log_data, struct brw_gs_compile *c, struct gl_shader_program *prog, void *mem_ctx, - bool no_spills) - : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base, + bool no_spills, + int shader_time_index) + : vec4_visitor(compiler, log_data, + &c->gp->program.Base, &c->key.base, &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx, - no_spills, - ST_GS, ST_GS_WRITTEN, ST_GS_RESET), + no_spills, shader_time_index), c(c) { } dst_reg * -vec4_gs_visitor::make_reg_for_system_value(ir_variable *ir) +vec4_gs_visitor::make_reg_for_system_value(int location, + const glsl_type *type) { - dst_reg *reg = new(mem_ctx) dst_reg(this, ir->type); + dst_reg *reg = new(mem_ctx) dst_reg(this, type); - switch (ir->data.location) { + switch (location) { case SYSTEM_VALUE_INVOCATION_ID: this->current_annotation = "initialize gl_InvocationID"; emit(GS_OPCODE_GET_INSTANCE_ID, *reg); @@ -346,90 +349,82 @@ vec4_gs_visitor::emit_control_data_bits() if (c->control_data_header_size_bits > 128) urb_write_flags = urb_write_flags | BRW_URB_WRITE_PER_SLOT_OFFSET; - /* If vertex_count is 0, then no control data bits have been accumulated - * yet, so we should do nothing. + /* If we are using either channel masks or a per-slot offset, then we + * need to figure out which DWORD we are trying to write to, using the + * formula: + * + * dword_index = (vertex_count - 1) * bits_per_vertex / 32 + * + * Since bits_per_vertex is a power of two, and is known at compile + * time, this can be optimized to: + * + * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex)) */ - emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_NEQ)); - emit(IF(BRW_PREDICATE_NORMAL)); - { - /* If we are using either channel masks or a per-slot offset, then we - * need to figure out which DWORD we are trying to write to, using the - * formula: - * - * dword_index = (vertex_count - 1) * bits_per_vertex / 32 - * - * Since bits_per_vertex is a power of two, and is known at compile - * time, this can be optimized to: - * - * dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex)) + src_reg dword_index(this, glsl_type::uint_type); + if (urb_write_flags) { + src_reg prev_count(this, glsl_type::uint_type); + emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); + unsigned log2_bits_per_vertex = + _mesa_fls(c->control_data_bits_per_vertex); + emit(SHR(dst_reg(dword_index), prev_count, + (uint32_t) (6 - log2_bits_per_vertex))); + } + + /* Start building the URB write message. The first MRF gets a copy of + * R0. + */ + int base_mrf = 1; + dst_reg mrf_reg(MRF, base_mrf); + src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + vec4_instruction *inst = emit(MOV(mrf_reg, r0)); + inst->force_writemask_all = true; + + if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) { + /* Set the per-slot offset to dword_index / 4, to that we'll write to + * the appropriate OWORD within the control data header. */ - src_reg dword_index(this, glsl_type::uint_type); - if (urb_write_flags) { - src_reg prev_count(this, glsl_type::uint_type); - emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); - unsigned log2_bits_per_vertex = - _mesa_fls(c->control_data_bits_per_vertex); - emit(SHR(dst_reg(dword_index), prev_count, - (uint32_t) (6 - log2_bits_per_vertex))); - } + src_reg per_slot_offset(this, glsl_type::uint_type); + emit(SHR(dst_reg(per_slot_offset), dword_index, 2u)); + emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u); + } - /* Start building the URB write message. The first MRF gets a copy of - * R0. + if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) { + /* Set the channel masks to 1 << (dword_index % 4), so that we'll + * write to the appropriate DWORD within the OWORD. We need to do + * this computation with force_writemask_all, otherwise garbage data + * from invocation 0 might clobber the mask for invocation 1 when + * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks + * together. */ - int base_mrf = 1; - dst_reg mrf_reg(MRF, base_mrf); - src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); - vec4_instruction *inst = emit(MOV(mrf_reg, r0)); + src_reg channel(this, glsl_type::uint_type); + inst = emit(AND(dst_reg(channel), dword_index, 3u)); inst->force_writemask_all = true; - - if (urb_write_flags & BRW_URB_WRITE_PER_SLOT_OFFSET) { - /* Set the per-slot offset to dword_index / 4, to that we'll write to - * the appropriate OWORD within the control data header. - */ - src_reg per_slot_offset(this, glsl_type::uint_type); - emit(SHR(dst_reg(per_slot_offset), dword_index, 2u)); - emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u); - } - - if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) { - /* Set the channel masks to 1 << (dword_index % 4), so that we'll - * write to the appropriate DWORD within the OWORD. We need to do - * this computation with force_writemask_all, otherwise garbage data - * from invocation 0 might clobber the mask for invocation 1 when - * GS_OPCODE_PREPARE_CHANNEL_MASKS tries to OR the two masks - * together. - */ - src_reg channel(this, glsl_type::uint_type); - inst = emit(AND(dst_reg(channel), dword_index, 3u)); - inst->force_writemask_all = true; - src_reg one(this, glsl_type::uint_type); - inst = emit(MOV(dst_reg(one), 1u)); - inst->force_writemask_all = true; - src_reg channel_mask(this, glsl_type::uint_type); - inst = emit(SHL(dst_reg(channel_mask), one, channel)); - inst->force_writemask_all = true; - emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask), - channel_mask); - emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask); - } - - /* Store the control data bits in the message payload and send it. */ - dst_reg mrf_reg2(MRF, base_mrf + 1); - inst = emit(MOV(mrf_reg2, this->control_data_bits)); + src_reg one(this, glsl_type::uint_type); + inst = emit(MOV(dst_reg(one), 1u)); inst->force_writemask_all = true; - inst = emit(GS_OPCODE_URB_WRITE); - inst->urb_write_flags = urb_write_flags; - /* We need to increment Global Offset by 256-bits to make room for - * Broadwell's extra "Vertex Count" payload at the beginning of the - * URB entry. Since this is an OWord message, Global Offset is counted - * in 128-bit units, so we must set it to 2. - */ - if (devinfo->gen >= 8) - inst->offset = 2; - inst->base_mrf = base_mrf; - inst->mlen = 2; + src_reg channel_mask(this, glsl_type::uint_type); + inst = emit(SHL(dst_reg(channel_mask), one, channel)); + inst->force_writemask_all = true; + emit(GS_OPCODE_PREPARE_CHANNEL_MASKS, dst_reg(channel_mask), + channel_mask); + emit(GS_OPCODE_SET_CHANNEL_MASKS, mrf_reg, channel_mask); } - emit(BRW_OPCODE_ENDIF); + + /* Store the control data bits in the message payload and send it. */ + dst_reg mrf_reg2(MRF, base_mrf + 1); + inst = emit(MOV(mrf_reg2, this->control_data_bits)); + inst->force_writemask_all = true; + inst = emit(GS_OPCODE_URB_WRITE); + inst->urb_write_flags = urb_write_flags; + /* We need to increment Global Offset by 256-bits to make room for + * Broadwell's extra "Vertex Count" payload at the beginning of the + * URB entry. Since this is an OWord message, Global Offset is counted + * in 128-bit units, so we must set it to 2. + */ + if (devinfo->gen >= 8) + inst->offset = 2; + inst->base_mrf = base_mrf; + inst->mlen = 2; } void @@ -529,9 +524,17 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir) emit(AND(dst_null_d(), this->vertex_count, (uint32_t) (32 / c->control_data_bits_per_vertex - 1))); inst->conditional_mod = BRW_CONDITIONAL_Z; + emit(IF(BRW_PREDICATE_NORMAL)); { + /* If vertex_count is 0, then no control data bits have been + * accumulated yet, so we skip emitting them. + */ + emit(CMP(dst_null_d(), this->vertex_count, 0u, + BRW_CONDITIONAL_NEQ)); + emit(IF(BRW_PREDICATE_NORMAL)); emit_control_data_bits(); + emit(BRW_OPCODE_ENDIF); /* Reset control_data_bits to 0 so we can start accumulating a new * batch. @@ -629,7 +632,8 @@ generate_assembly(struct brw_context *brw, const cfg_t *cfg, unsigned *final_assembly_size) { - vec4_generator g(brw, shader_prog, prog, prog_data, mem_ctx, + vec4_generator g(brw->intelScreen->compiler, brw, + shader_prog, prog, prog_data, mem_ctx, INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); return g.generate_assembly(cfg, final_assembly_size); } @@ -648,6 +652,10 @@ brw_gs_emit(struct brw_context *brw, brw_dump_ir("geometry", prog, &shader->base, NULL); } + int st_index = -1; + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + st_index = brw_get_shader_time_index(brw, prog, NULL, ST_GS); + if (brw->gen >= 7) { /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do * so without spilling. If the GS invocations count > 1, then we can't use @@ -657,8 +665,9 @@ brw_gs_emit(struct brw_context *brw, likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_gs_visitor v(brw, c, prog, mem_ctx, true /* no_spills */); - if (v.run()) { + vec4_gs_visitor v(brw->intelScreen->compiler, brw, + c, prog, mem_ctx, true /* no_spills */, st_index); + if (v.run(NULL /* clip planes */)) { return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base, mem_ctx, v.cfg, final_assembly_size); @@ -698,11 +707,15 @@ brw_gs_emit(struct brw_context *brw, const unsigned *ret = NULL; if (brw->gen >= 7) - gs = new vec4_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */); + gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw, + c, prog, mem_ctx, false /* no_spills */, + st_index); else - gs = new gen6_gs_visitor(brw, c, prog, mem_ctx, false /* no_spills */); + gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw, + c, prog, mem_ctx, false /* no_spills */, + st_index); - if (!gs->run()) { + if (!gs->run(NULL /* clip planes */)) { prog->LinkStatus = false; ralloc_strcat(&prog->InfoLog, gs->fail_msg); } else {