From f182e5eafc31ebc7c140e9a369d5f747948733ae Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 17 Oct 2016 11:14:10 -0700 Subject: [PATCH] i965/vec4: Handle component qualifiers on non-generic varyings. ARB_enhanced_layouts only requires component qualifier support for generic varyings, so this is all the vec4 backend knew how to handle. This patch extends the backend to handle it for all varyings, so we can use store_output intrinsics with a component set for things like clip/cull distances. We may want to use that for other VUE header fields in the future as well. Signed-off-by: Kenneth Graunke Reviewed-by: Timothy Arceri --- src/mesa/drivers/dri/i965/brw_vec4.h | 10 +-- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 11 +-- .../drivers/dri/i965/brw_vec4_visitor.cpp | 83 ++++++++----------- .../drivers/dri/i965/brw_vec4_vs_visitor.cpp | 18 ++-- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 4 +- 5 files changed, 53 insertions(+), 73 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 62c6007129a..dc69ea97174 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -113,10 +113,9 @@ public: /* Regs for vertex results. Generated at ir_variable visiting time * for the ir->location's used. */ - dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; - dst_reg output_generic_reg[MAX_VARYINGS_INCL_PATCH][4]; - unsigned output_generic_num_components[MAX_VARYINGS_INCL_PATCH][4]; - const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; + dst_reg output_reg[VARYING_SLOT_TESS_MAX][4]; + unsigned output_num_components[VARYING_SLOT_TESS_MAX][4]; + const char *output_reg_annotation[VARYING_SLOT_TESS_MAX]; int uniforms; src_reg shader_start_time; @@ -270,8 +269,7 @@ public: void emit_ndc_computation(); void emit_psiz_and_flags(dst_reg reg); - vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying); - void emit_generic_urb_slot(dst_reg reg, int varying, int component); + vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp); virtual void emit_urb_slot(dst_reg reg, int varying); void emit_shader_time_begin(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index fc4eb3ac1d7..0d54907cadf 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -416,14 +416,9 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, instr->num_components); - if (varying >= VARYING_SLOT_VAR0) { - unsigned c = nir_intrinsic_component(instr); - unsigned v = varying - VARYING_SLOT_VAR0; - output_generic_reg[v][c] = dst_reg(src); - output_generic_num_components[v][c] = instr->num_components; - } else { - output_reg[varying] = dst_reg(src); - } + unsigned c = nir_intrinsic_component(instr); + output_reg[varying][c] = dst_reg(src); + output_num_components[varying][c] = instr->num_components; break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 954f147fe7e..8c7901f2d41 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1154,15 +1154,16 @@ vec4_visitor::gs_end_primitive() void vec4_visitor::emit_ndc_computation() { - if (output_reg[VARYING_SLOT_POS].file == BAD_FILE) + if (output_reg[VARYING_SLOT_POS][0].file == BAD_FILE) return; /* Get the position */ - src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]); + src_reg pos = src_reg(output_reg[VARYING_SLOT_POS][0]); /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */ dst_reg ndc = dst_reg(this, glsl_type::vec4_type); - output_reg[BRW_VARYING_SLOT_NDC] = ndc; + output_reg[BRW_VARYING_SLOT_NDC][0] = ndc; + output_num_components[BRW_VARYING_SLOT_NDC][0] = 4; current_annotation = "NDC"; dst_reg ndc_w = ndc; @@ -1182,7 +1183,7 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) { if (devinfo->gen < 6 && ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) || - output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE || + output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE || devinfo->has_negative_rhw_bug)) { dst_reg header1 = dst_reg(this, glsl_type::uvec4_type); dst_reg header1_w = header1; @@ -1191,23 +1192,23 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) emit(MOV(header1, brw_imm_ud(0u))); if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { - src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]); + src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ][0]); current_annotation = "Point size"; emit(MUL(header1_w, psiz, brw_imm_f((float)(1 << 11)))); emit(AND(header1_w, src_reg(header1_w), brw_imm_d(0x7ff << 8))); } - if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) { + if (output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE) { current_annotation = "Clipping flags"; dst_reg flags0 = dst_reg(this, glsl_type::uint_type); dst_reg flags1 = dst_reg(this, glsl_type::uint_type); - emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); + emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0)); emit(OR(header1_w, src_reg(header1_w), src_reg(flags0))); - emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); + emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L)); emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0)); emit(SHL(flags1, src_reg(flags1), brw_imm_d(4))); emit(OR(header1_w, src_reg(header1_w), src_reg(flags1))); @@ -1223,15 +1224,15 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) * clipped against all fixed planes. */ if (devinfo->has_negative_rhw_bug && - output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) { - src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]); + output_reg[BRW_VARYING_SLOT_NDC][0].file != BAD_FILE) { + src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC][0]); ndc_w.swizzle = BRW_SWIZZLE_WWWW; emit(CMP(dst_null_f(), ndc_w, brw_imm_f(0.0f), BRW_CONDITIONAL_L)); vec4_instruction *inst; inst = emit(OR(header1_w, src_reg(header1_w), brw_imm_ud(1u << 6))); inst->predicate = BRW_PREDICATE_NORMAL; - output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F; - inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], brw_imm_f(0.0f))); + output_reg[BRW_VARYING_SLOT_NDC][0].type = BRW_REGISTER_TYPE_F; + inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC][0], brw_imm_f(0.0f))); inst->predicate = BRW_PREDICATE_NORMAL; } @@ -1243,7 +1244,7 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) { dst_reg reg_w = reg; reg_w.writemask = WRITEMASK_W; - src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]); + src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ][0]); reg_as_src.type = reg_w.type; reg_as_src.swizzle = brw_swizzle_for_size(1); emit(MOV(reg_w, reg_as_src)); @@ -1252,58 +1253,45 @@ vec4_visitor::emit_psiz_and_flags(dst_reg reg) dst_reg reg_y = reg; reg_y.writemask = WRITEMASK_Y; reg_y.type = BRW_REGISTER_TYPE_D; - output_reg[VARYING_SLOT_LAYER].type = reg_y.type; - emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER]))); + output_reg[VARYING_SLOT_LAYER][0].type = reg_y.type; + emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER][0]))); } if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) { dst_reg reg_z = reg; reg_z.writemask = WRITEMASK_Z; reg_z.type = BRW_REGISTER_TYPE_D; - output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type; - emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT]))); + output_reg[VARYING_SLOT_VIEWPORT][0].type = reg_z.type; + emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT][0]))); } } } vec4_instruction * -vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying) -{ - assert(varying < VARYING_SLOT_MAX); - assert(output_reg[varying].type == reg.type); - current_annotation = output_reg_annotation[varying]; - if (output_reg[varying].file != BAD_FILE) { - return emit(MOV(reg, src_reg(output_reg[varying]))); - } else - return NULL; -} - -void vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component) { assert(varying < VARYING_SLOT_MAX); - assert(varying >= VARYING_SLOT_VAR0); - varying = varying - VARYING_SLOT_VAR0; - unsigned num_comps = output_generic_num_components[varying][component]; + unsigned num_comps = output_num_components[varying][component]; if (num_comps == 0) - return; + return NULL; - assert(output_generic_reg[varying][component].type == reg.type); + assert(output_reg[varying][component].type == reg.type); current_annotation = output_reg_annotation[varying]; - if (output_generic_reg[varying][component].file != BAD_FILE) { - src_reg src = src_reg(output_generic_reg[varying][component]); + if (output_reg[varying][component].file != BAD_FILE) { + src_reg src = src_reg(output_reg[varying][component]); src.swizzle = BRW_SWZ_COMP_OUTPUT(component); reg.writemask = brw_writemask_for_component_packing(num_comps, component); - emit(MOV(reg, src)); + return emit(MOV(reg, src)); } + return NULL; } void vec4_visitor::emit_urb_slot(dst_reg reg, int varying) { reg.type = BRW_REGISTER_TYPE_F; - output_reg[varying].type = reg.type; + output_reg[varying][0].type = reg.type; switch (varying) { case VARYING_SLOT_PSIZ: @@ -1315,13 +1303,13 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) } case BRW_VARYING_SLOT_NDC: current_annotation = "NDC"; - if (output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) - emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC]))); + if (output_reg[BRW_VARYING_SLOT_NDC][0].file != BAD_FILE) + emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC][0]))); break; case VARYING_SLOT_POS: current_annotation = "gl_Position"; - if (output_reg[VARYING_SLOT_POS].file != BAD_FILE) - emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS]))); + if (output_reg[VARYING_SLOT_POS][0].file != BAD_FILE) + emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS][0]))); break; case VARYING_SLOT_EDGE: /* This is present when doing unfilled polygons. We're supposed to copy @@ -1338,12 +1326,8 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying) /* No need to write to this slot */ break; default: - if (varying >= VARYING_SLOT_VAR0) { - for (int i = 0; i < 4; i++) { - emit_generic_urb_slot(reg, varying, i); - } - } else { - emit_generic_urb_slot(reg, varying); + for (int i = 0; i < 4; i++) { + emit_generic_urb_slot(reg, varying, i); } break; } @@ -1795,8 +1779,7 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->current_annotation = NULL; memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation)); - memset(this->output_generic_num_components, 0, - sizeof(this->output_generic_num_components)); + memset(this->output_num_components, 0, sizeof(this->output_num_components)); this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index 058ee3a7dad..a300f768767 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -104,7 +104,7 @@ void vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying) { reg.type = BRW_REGISTER_TYPE_F; - output_reg[varying].type = reg.type; + output_reg[varying][0].type = reg.type; switch (varying) { case VARYING_SLOT_COL0: @@ -115,7 +115,7 @@ vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying) * and we only support GS in core profile. So, this must be a vertex * shader. */ - vec4_instruction *inst = emit_generic_urb_slot(reg, varying); + vec4_instruction *inst = emit_generic_urb_slot(reg, varying, 0); if (inst && key->clamp_vertex_color) inst->saturate = true; break; @@ -150,7 +150,7 @@ vec4_vs_visitor::emit_clip_distances(dst_reg reg, int offset) ++i) { reg.writemask = 1 << i; emit(DP4(reg, - src_reg(output_reg[clip_vertex]), + src_reg(output_reg[clip_vertex][0]), src_reg(this->userplane[i + offset]))); } } @@ -180,11 +180,15 @@ vec4_vs_visitor::emit_thread_end() if (key->nr_userclip_plane_consts > 0) { current_annotation = "user clip distances"; - output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type); - output_reg[VARYING_SLOT_CLIP_DIST1] = dst_reg(this, glsl_type::vec4_type); + output_reg[VARYING_SLOT_CLIP_DIST0][0] = + dst_reg(this, glsl_type::vec4_type); + output_reg[VARYING_SLOT_CLIP_DIST1][0] = + dst_reg(this, glsl_type::vec4_type); + output_num_components[VARYING_SLOT_CLIP_DIST0][0] = 4; + output_num_components[VARYING_SLOT_CLIP_DIST1][0] = 4; - emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0], 0); - emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4); + emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0][0], 0); + emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1][0], 4); } /* For VS, we always end the thread by emitting a single vertex. diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 329a1119f3f..7254729f41c 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -412,7 +412,7 @@ gen6_gs_visitor::emit_thread_end() /* Copy this slot to the appropriate message register */ dst_reg reg = dst_reg(MRF, mrf); - reg.type = output_reg[varying].type; + reg.type = output_reg[varying][0].type; data.type = reg.type; vec4_instruction *inst = emit(MOV(reg, data)); inst->force_writemask_all = true; @@ -688,7 +688,7 @@ gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts) int offset = get_vertex_output_offset_for_varying(vertex, varying); emit(MOV(dst_reg(this->vertex_output_offset), brw_imm_d(offset))); memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg)); - data.type = output_reg[varying].type; + data.type = output_reg[varying][0].type; /* PSIZ, LAYER and VIEWPORT are packed in different channels of the * same slot, so make sure we write the appropriate channel -- 2.30.2