From: Jason Ekstrand Date: Sat, 20 Jul 2019 13:17:59 +0000 (-0500) Subject: intel/vec4: Drop all of the 64-bit varying code X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b539157504367c8b89733e3799da41ab99a78f6d;p=mesa.git intel/vec4: Drop all of the 64-bit varying code Reviewed-by: Matt Turner --- diff --git a/src/intel/compiler/brw_vec4_gs_nir.cpp b/src/intel/compiler/brw_vec4_gs_nir.cpp index 8f9c1fd4047..d4b5582010b 100644 --- a/src/intel/compiler/brw_vec4_gs_nir.cpp +++ b/src/intel/compiler/brw_vec4_gs_nir.cpp @@ -38,6 +38,7 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_per_vertex_input: { + assert(nir_dest_bit_size(instr->dest) == 32); /* The EmitNoIndirectInput flag guarantees our vertex index will * be constant. We should handle indirects someday. */ @@ -46,34 +47,17 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) const unsigned input_array_stride = prog_data->urb_read_length * 2; - if (nir_dest_bit_size(instr->dest) == 64) { - src = src_reg(ATTR, input_array_stride * vertex + - instr->const_index[0] + offset_reg, - glsl_type::dvec4_type); - - dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); - shuffle_64bit_data(tmp, src, false); - - src = src_reg(tmp); - src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr) / 2); - - /* Write to dst reg taking into account original writemask */ - dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); - dest.writemask = brw_writemask_for_size(instr->num_components); - emit(MOV(dest, src)); - } else { - /* Make up a type...we have no way of knowing... */ - const glsl_type *const type = glsl_type::ivec(instr->num_components); - - src = src_reg(ATTR, input_array_stride * vertex + - instr->const_index[0] + offset_reg, - type); - src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); - - dest = get_nir_dest(instr->dest, src.type); - dest.writemask = brw_writemask_for_size(instr->num_components); - emit(MOV(dest, src)); - } + /* Make up a type...we have no way of knowing... */ + const glsl_type *const type = glsl_type::ivec(instr->num_components); + + src = src_reg(ATTR, input_array_stride * vertex + + instr->const_index[0] + offset_reg, + type); + src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); + + dest = get_nir_dest(instr->dest, src.type); + dest.writemask = brw_writemask_for_size(instr->num_components); + emit(MOV(dest, src)); break; } diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 168a27536ea..dcf0e2b7bab 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -407,6 +407,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_input: { + assert(nir_dest_bit_size(instr->dest) == 32); /* We set EmitNoIndirectInput for VS */ unsigned load_offset = nir_src_as_uint(instr->src[0]); @@ -417,53 +418,22 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) glsl_type::uvec4_type); src = retype(src, dest.type); - bool is_64bit = nir_dest_bit_size(instr->dest) == 64; - if (is_64bit) { - dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); - src.swizzle = BRW_SWIZZLE_XYZW; - shuffle_64bit_data(tmp, src, false); - emit(MOV(dest, src_reg(tmp))); - } else { - /* Swizzle source based on component layout qualifier */ - src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); - emit(MOV(dest, src)); - } + /* Swizzle source based on component layout qualifier */ + src.swizzle = BRW_SWZ_COMP_INPUT(nir_intrinsic_component(instr)); + emit(MOV(dest, src)); break; } case nir_intrinsic_store_output: { + assert(nir_src_bit_size(instr->src[0]) == 32); unsigned store_offset = nir_src_as_uint(instr->src[1]); int varying = instr->const_index[0] + store_offset; - - bool is_64bit = nir_src_bit_size(instr->src[0]) == 64; - if (is_64bit) { - src_reg data; - src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_DF, - instr->num_components); - data = src_reg(this, glsl_type::dvec4_type); - shuffle_64bit_data(dst_reg(data), src, true); - src = retype(data, BRW_REGISTER_TYPE_F); - } else { - src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, - instr->num_components); - } + src = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_F, + instr->num_components); unsigned c = nir_intrinsic_component(instr); output_reg[varying][c] = dst_reg(src); output_num_components[varying][c] = instr->num_components; - - unsigned num_components = instr->num_components; - if (is_64bit) - num_components *= 2; - - output_reg[varying][c] = dst_reg(src); - output_num_components[varying][c] = MIN2(4, num_components); - - if (is_64bit && num_components > 4) { - assert(num_components <= 8); - output_reg[varying + 1][c] = byte_offset(dst_reg(src), REG_SIZE); - output_num_components[varying + 1][c] = num_components - 4; - } break; } diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index 712766ca89a..734be075d6a 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -257,6 +257,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) brw_imm_d(key->input_vertices))); break; case nir_intrinsic_load_per_vertex_input: { + assert(nir_dest_bit_size(instr->dest) == 32); src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; @@ -264,36 +265,10 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) BRW_REGISTER_TYPE_UD); unsigned first_component = nir_intrinsic_component(instr); - if (nir_dest_bit_size(instr->dest) == 64) { - /* We need to emit up to two 32-bit URB reads, then shuffle - * the result into a temporary, then move to the destination - * honoring the writemask - * - * We don't need to divide first_component by 2 because - * emit_input_urb_read takes a 32-bit type. - */ - dst_reg tmp = dst_reg(this, glsl_type::dvec4_type); - dst_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); - emit_input_urb_read(tmp_d, vertex_index, imm_offset, - first_component, indirect_offset); - if (instr->num_components > 2) { - emit_input_urb_read(byte_offset(tmp_d, REG_SIZE), vertex_index, - imm_offset + 1, 0, indirect_offset); - } - - src_reg tmp_src = retype(src_reg(tmp_d), BRW_REGISTER_TYPE_DF); - dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type); - shuffle_64bit_data(shuffled, tmp_src, false); - - dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); - dst.writemask = brw_writemask_for_size(instr->num_components); - emit(MOV(dst, src_reg(shuffled))); - } else { - dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); - dst.writemask = brw_writemask_for_size(instr->num_components); - emit_input_urb_read(dst, vertex_index, imm_offset, - first_component, indirect_offset); - } + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); + dst.writemask = brw_writemask_for_size(instr->num_components); + emit_input_urb_read(dst, vertex_index, imm_offset, + first_component, indirect_offset); break; } case nir_intrinsic_load_input: @@ -313,6 +288,7 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) } case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: { + assert(nir_src_bit_size(instr->src[0]) == 32); src_reg value = get_nir_src(instr->src[0]); unsigned mask = instr->const_index[1]; unsigned swiz = BRW_SWIZZLE_XYZW; @@ -322,40 +298,13 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) unsigned first_component = nir_intrinsic_component(instr); if (first_component) { - if (nir_src_bit_size(instr->src[0]) == 64) - first_component /= 2; assert(swiz == BRW_SWIZZLE_XYZW); swiz = BRW_SWZ_COMP_OUTPUT(first_component); mask = mask << first_component; } - if (nir_src_bit_size(instr->src[0]) == 64) { - /* For 64-bit data we need to shuffle the data before we write and - * emit two messages. Also, since each channel is twice as large we - * need to fix the writemask in each 32-bit message to account for it. - */ - value = swizzle(retype(value, BRW_REGISTER_TYPE_DF), swiz); - dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type); - shuffle_64bit_data(shuffled, value, true); - src_reg shuffled_float = src_reg(retype(shuffled, BRW_REGISTER_TYPE_F)); - - for (int n = 0; n < 2; n++) { - unsigned fixed_mask = 0; - if (mask & WRITEMASK_X) - fixed_mask |= WRITEMASK_XY; - if (mask & WRITEMASK_Y) - fixed_mask |= WRITEMASK_ZW; - emit_urb_write(shuffled_float, fixed_mask, - imm_offset, indirect_offset); - - shuffled_float = byte_offset(shuffled_float, REG_SIZE); - mask >>= 2; - imm_offset++; - } - } else { - emit_urb_write(swizzle(value, swiz), mask, - imm_offset, indirect_offset); - } + emit_urb_write(swizzle(value, swiz), mask, + imm_offset, indirect_offset); break; } diff --git a/src/intel/compiler/brw_vec4_tes.cpp b/src/intel/compiler/brw_vec4_tes.cpp index 51840b75ee6..a5e5d966531 100644 --- a/src/intel/compiler/brw_vec4_tes.cpp +++ b/src/intel/compiler/brw_vec4_tes.cpp @@ -63,33 +63,13 @@ vec4_tes_visitor::setup_payload() if (inst->src[i].file != ATTR) continue; - bool is_64bit = type_sz(inst->src[i].type) == 8; - unsigned slot = inst->src[i].nr + inst->src[i].offset / 16; struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2)); - grf = stride(grf, 0, is_64bit ? 2 : 4, 1); + grf = stride(grf, 0, 4, 1); grf.swizzle = inst->src[i].swizzle; grf.type = inst->src[i].type; grf.abs = inst->src[i].abs; grf.negate = inst->src[i].negate; - - /* For 64-bit attributes we can end up with components XY in the - * second half of a register and components ZW in the first half - * of the next. Fix it up here. - */ - if (is_64bit && grf.subnr > 0) { - /* We can't do swizzles that mix XY and ZW channels in this case. - * Such cases should have been handled by the scalarization pass. - */ - assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^ - (brw_mask_for_swizzle(grf.swizzle) & 0xc)); - if (brw_mask_for_swizzle(grf.swizzle) & 0xc) { - grf.subnr = 0; - grf.nr++; - grf.swizzle -= BRW_SWIZZLE_ZZZZ; - } - } - inst->src[i] = grf; } } @@ -176,13 +156,11 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: { + assert(nir_dest_bit_size(instr->dest) == 32); src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; src_reg header = input_read_header; - bool is_64bit = nir_dest_bit_size(instr->dest) == 64; unsigned first_component = nir_intrinsic_component(instr); - if (is_64bit) - first_component /= 2; if (indirect_offset.file != BAD_FILE) { src_reg clamped_indirect_offset = src_reg(this, glsl_type::uvec4_type); @@ -204,67 +182,33 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ const unsigned max_push_slots = 24; if (imm_offset < max_push_slots) { - const glsl_type *src_glsl_type = - is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type; - src_reg src = src_reg(ATTR, imm_offset, src_glsl_type); + src_reg src = src_reg(ATTR, imm_offset, glsl_type::ivec4_type); src.swizzle = BRW_SWZ_COMP_INPUT(first_component); - const brw_reg_type dst_reg_type = - is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D; - emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src)); + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), src)); prog_data->urb_read_length = MAX2(prog_data->urb_read_length, - DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2)); + DIV_ROUND_UP(imm_offset + 1, 2)); break; } } - if (!is_64bit) { - dst_reg temp(this, glsl_type::ivec4_type); - vec4_instruction *read = - emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); - read->offset = imm_offset; - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; - - src_reg src = src_reg(temp); - src.swizzle = BRW_SWZ_COMP_INPUT(first_component); - - /* Copy to target. We might end up with some funky writemasks landing - * in here, but we really don't want them in the above pseudo-ops. - */ - dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); - dst.writemask = brw_writemask_for_size(instr->num_components); - emit(MOV(dst, src)); - } else { - /* For 64-bit we need to load twice as many 32-bit components, and for - * dvec3/4 we need to emit 2 URB Read messages - */ - dst_reg temp(this, glsl_type::dvec4_type); - dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D); - - vec4_instruction *read = - emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header)); - read->offset = imm_offset; - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; - - if (instr->num_components > 2) { - read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE), - src_reg(header)); - read->offset = imm_offset + 1; - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; - } - - src_reg temp_as_src = src_reg(temp); - temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component); - - dst_reg shuffled(this, glsl_type::dvec4_type); - shuffle_64bit_data(shuffled, temp_as_src, false); - - dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); - dst.writemask = brw_writemask_for_size(instr->num_components); - emit(MOV(dst, src_reg(shuffled))); - } + dst_reg temp(this, glsl_type::ivec4_type); + vec4_instruction *read = + emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); + read->offset = imm_offset; + read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + + src_reg src = src_reg(temp); + src.swizzle = BRW_SWZ_COMP_INPUT(first_component); + + /* Copy to target. We might end up with some funky writemasks landing + * in here, but we really don't want them in the above pseudo-ops. + */ + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); + dst.writemask = brw_writemask_for_size(instr->num_components); + emit(MOV(dst, src)); break; } default: