brw_imm_d(key->input_vertices)));
break;
case nir_intrinsic_load_per_vertex_input: {
+ assert(nir_dest_bit_size(instr->dest) == 32);
src_reg indirect_offset = get_indirect_offset(instr);
unsigned imm_offset = instr->const_index[0];
BRW_REGISTER_TYPE_UD);
unsigned first_component = nir_intrinsic_component(instr);
- if (nir_dest_bit_size(instr->dest) == 64) {
- /* We need to emit up to two 32-bit URB reads, then shuffle
- * the result into a temporary, then move to the destination
- * honoring the writemask
- *
- * We don't need to divide first_component by 2 because
- * emit_input_urb_read takes a 32-bit type.
- */
- dst_reg tmp = dst_reg(this, glsl_type::dvec4_type);
- dst_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
- emit_input_urb_read(tmp_d, vertex_index, imm_offset,
- first_component, indirect_offset);
- if (instr->num_components > 2) {
- emit_input_urb_read(byte_offset(tmp_d, REG_SIZE), vertex_index,
- imm_offset + 1, 0, indirect_offset);
- }
-
- src_reg tmp_src = retype(src_reg(tmp_d), BRW_REGISTER_TYPE_DF);
- dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
- shuffle_64bit_data(shuffled, tmp_src, false);
-
- dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF);
- dst.writemask = brw_writemask_for_size(instr->num_components);
- emit(MOV(dst, src_reg(shuffled)));
- } else {
- dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
- dst.writemask = brw_writemask_for_size(instr->num_components);
- emit_input_urb_read(dst, vertex_index, imm_offset,
- first_component, indirect_offset);
- }
+ dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
+ dst.writemask = brw_writemask_for_size(instr->num_components);
+ emit_input_urb_read(dst, vertex_index, imm_offset,
+ first_component, indirect_offset);
break;
}
case nir_intrinsic_load_input:
}
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output: {
+ assert(nir_src_bit_size(instr->src[0]) == 32);
src_reg value = get_nir_src(instr->src[0]);
unsigned mask = instr->const_index[1];
unsigned swiz = BRW_SWIZZLE_XYZW;
unsigned first_component = nir_intrinsic_component(instr);
if (first_component) {
- if (nir_src_bit_size(instr->src[0]) == 64)
- first_component /= 2;
assert(swiz == BRW_SWIZZLE_XYZW);
swiz = BRW_SWZ_COMP_OUTPUT(first_component);
mask = mask << first_component;
}
- if (nir_src_bit_size(instr->src[0]) == 64) {
- /* For 64-bit data we need to shuffle the data before we write and
- * emit two messages. Also, since each channel is twice as large we
- * need to fix the writemask in each 32-bit message to account for it.
- */
- value = swizzle(retype(value, BRW_REGISTER_TYPE_DF), swiz);
- dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
- shuffle_64bit_data(shuffled, value, true);
- src_reg shuffled_float = src_reg(retype(shuffled, BRW_REGISTER_TYPE_F));
-
- for (int n = 0; n < 2; n++) {
- unsigned fixed_mask = 0;
- if (mask & WRITEMASK_X)
- fixed_mask |= WRITEMASK_XY;
- if (mask & WRITEMASK_Y)
- fixed_mask |= WRITEMASK_ZW;
- emit_urb_write(shuffled_float, fixed_mask,
- imm_offset, indirect_offset);
-
- shuffled_float = byte_offset(shuffled_float, REG_SIZE);
- mask >>= 2;
- imm_offset++;
- }
- } else {
- emit_urb_write(swizzle(value, swiz), mask,
- imm_offset, indirect_offset);
- }
+ emit_urb_write(swizzle(value, swiz), mask,
+ imm_offset, indirect_offset);
break;
}
break;
}
+ case nir_intrinsic_memory_barrier_tcs_patch:
+ break;
+
default:
vec4_visitor::nir_emit_intrinsic(instr);
}
struct brw_tcs_prog_data *prog_data,
nir_shader *nir,
int shader_time_index,
+ struct brw_compile_stats *stats,
char **error_str)
{
const struct gen_device_info *devinfo = compiler->devinfo;
nir->info.outputs_written,
nir->info.patch_outputs_written);
- brw_nir_apply_key(nir, compiler, &key->base, is_scalar);
+ brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
brw_nir_lower_vue_inputs(nir, &input_vue_map);
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
key->tes_primitive_mode);
nir->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID);
if (compiler->use_tcs_8_patch &&
- nir->info.tess.tcs_vertices_out <= 16 &&
+ nir->info.tess.tcs_vertices_out <= (devinfo->gen >= 12 ? 32 : 16) &&
2 + has_primitive_id + key->input_vertices <= 31) {
- /* 3DSTATE_HS imposes two constraints on using 8_PATCH mode. First,
- * the "Instance" field limits the number of output vertices to [1, 16].
- * Secondly, the "Dispatch GRF Start Register for URB Data" field is
- * limited to [0, 31] - which imposes a limit on the input vertices.
+ /* 3DSTATE_HS imposes two constraints on using 8_PATCH mode. First, the
+ * "Instance" field limits the number of output vertices to [1, 16] on
+ * gen11 and below, or [1, 32] on gen12 and above. Secondly, the
+ * "Dispatch GRF Start Register for URB Data" field is limited to [0,
+ * 31] - which imposes a limit on the input vertices.
*/
vue_prog_data->dispatch_mode = DISPATCH_MODE_TCS_8_PATCH;
prog_data->instances = nir->info.tess.tcs_vertices_out;
if (is_scalar) {
fs_visitor v(compiler, log_data, mem_ctx, &key->base,
- &prog_data->base.base, NULL, nir, 8,
+ &prog_data->base.base, nir, 8,
shader_time_index, &input_vue_map);
if (!v.run_tcs()) {
if (error_str)
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
fs_generator g(compiler, log_data, mem_ctx,
- &prog_data->base.base, v.promoted_constants, false,
+ &prog_data->base.base, v.shader_stats, false,
MESA_SHADER_TESS_CTRL);
if (unlikely(INTEL_DEBUG & DEBUG_TCS)) {
g.enable_debug(ralloc_asprintf(mem_ctx,
nir->info.name));
}
- g.generate_code(v.cfg, 8);
+ g.generate_code(v.cfg, 8, stats);
assembly = g.get_assembly();
} else {
assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
- &prog_data->base, v.cfg);
+ &prog_data->base, v.cfg, stats);
}
return assembly;