From 3f263ffbb37d77f97a86686e1d2d5eeabf4ecae6 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 16 Jan 2015 02:12:17 -0800 Subject: [PATCH] i965/nir: Replace fs_reg(GRF, virtual_grf_alloc(...)) with vgrf(...). brw_fs_nir.cpp creates almost all of its registers via: fs_reg reg = fs_reg(GRF, virtual_grf_alloc(num_components)); When we add SIMD16 support, we'll need to set reg->width = 16 and double the VGRF size...on pretty much every VGRF it allocates. This patch replaces that pattern with a new "vgrf" helper method: fs_reg reg = vgrf(num_components); The new function correctly takes reg_width into account. For now, reg_width is always 1, so this should have no functional change. v2: Just make vgrf() account for reg_width right away, rather than changing the behavior in the next patch. v3: Replace one last virtual_grf_alloc I missed. It's used in code that only runs for dispatch_width == 8, so it doesn't matter, but consistency is nice. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 8 +++++++ src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 27 ++++++++++++------------ 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 817ca2e860f..6490788118b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1040,6 +1040,14 @@ fs_visitor::vgrf(const glsl_type *const type) brw_type_for_base_type(type), dispatch_width); } +fs_reg +fs_visitor::vgrf(int num_components) +{ + int reg_width = dispatch_width / 8; + return fs_reg(GRF, virtual_grf_alloc(num_components * reg_width), + BRW_REGISTER_TYPE_F, dispatch_width); +} + /** Fixed HW reg constructor. */ fs_reg::fs_reg(enum register_file file, int reg) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 466f90f476c..1de10bb4b85 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -323,6 +323,7 @@ public: fs_reg *variable_storage(ir_variable *var); int virtual_grf_alloc(int size); fs_reg vgrf(const glsl_type *const type); + fs_reg vgrf(int num_components); void import_uniforms(fs_visitor *v); void setup_uniform_clipplane_values(); void compute_clip_distance(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7d7c04cedc7..c4c7d60608b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -95,12 +95,12 @@ fs_visitor::emit_nir_code() */ if (nir->num_inputs > 0) { - nir_inputs = fs_reg(GRF, virtual_grf_alloc(nir->num_inputs)); + nir_inputs = vgrf(nir->num_inputs); nir_setup_inputs(nir); } if (nir->num_outputs > 0) { - nir_outputs = fs_reg(GRF, virtual_grf_alloc(nir->num_outputs)); + nir_outputs = vgrf(nir->num_outputs); nir_setup_outputs(nir); } @@ -116,7 +116,7 @@ fs_visitor::emit_nir_code() unsigned array_elems = reg->num_array_elems == 0 ? 1 : reg->num_array_elems; unsigned size = array_elems * reg->num_components; - nir_globals[reg->index] = fs_reg(GRF, virtual_grf_alloc(size)); + nir_globals[reg->index] = vgrf(size); } /* get the main function and emit it */ @@ -356,7 +356,7 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl) unsigned array_elems = reg->num_array_elems == 0 ? 1 : reg->num_array_elems; unsigned size = array_elems * reg->num_components; - nir_locals[reg->index] = fs_reg(GRF, virtual_grf_alloc(size)); + nir_locals[reg->index] = vgrf(size); } nir_emit_cf_list(&impl->body); @@ -730,7 +730,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_ball_fequal4: case nir_op_ball_iequal4: { unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; - fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components)); + fs_reg temp = vgrf(num_components); emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_Z), (1 << num_components) - 1); emit_reduction(BRW_OPCODE_AND, result, temp, num_components); @@ -744,7 +744,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_bany_fnequal4: case nir_op_bany_inequal4: { unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; - fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components)); + fs_reg temp = vgrf(num_components); temp.type = BRW_REGISTER_TYPE_UD; emit_percomp(CMP(temp, op[0], op[1], BRW_CONDITIONAL_NZ), (1 << num_components) - 1); @@ -769,7 +769,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_fdot3: case nir_op_fdot4: { unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; - fs_reg temp = fs_reg(GRF, virtual_grf_alloc(num_components)); + fs_reg temp = vgrf(num_components); emit_percomp(MUL(temp, op[0], op[1]), (1 << num_components) - 1); emit_reduction(BRW_OPCODE_ADD, result, temp, num_components); if (instr->dest.saturate) { @@ -1038,8 +1038,8 @@ fs_visitor::get_nir_src(nir_src src) if (src.is_ssa) { assert(src.ssa->parent_instr->type == nir_instr_type_load_const); nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); - fs_reg reg(GRF, virtual_grf_alloc(src.ssa->num_components), - BRW_REGISTER_TYPE_D); + fs_reg reg = vgrf(src.ssa->num_components); + reg.type = BRW_REGISTER_TYPE_D; for (unsigned i = 0; i < src.ssa->num_components; ++i) emit(MOV(offset(reg, i), fs_reg(load->value.i[i]))); @@ -1091,7 +1091,8 @@ fs_visitor::get_nir_alu_src(nir_alu_instr *instr, unsigned src) if (needs_swizzle) { /* resolve the swizzle through MOV's */ - fs_reg new_reg = fs_reg(GRF, virtual_grf_alloc(num_components), reg.type); + fs_reg new_reg = vgrf(num_components); + new_reg.type = reg.type; for (unsigned i = 0; i < 4; i++) { if (!nir_alu_instr_channel_used(instr, src, i)) @@ -1244,7 +1245,7 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src, return; } - fs_reg temp1 = fs_reg(GRF, virtual_grf_alloc(1)); + fs_reg temp1 = vgrf(1); temp1.type = src.type; emit(op, temp1, src0, src1); @@ -1260,7 +1261,7 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src, fs_reg src3 = src; src3.reg_offset += 3; - fs_reg temp2 = fs_reg(GRF, virtual_grf_alloc(1)); + fs_reg temp2 = vgrf(1); temp2.type = src.type; emit(op, temp2, src2, src3); @@ -1487,7 +1488,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ no16("interpolate_at_* not yet supported in SIMD16 mode."); - fs_reg dst_x(GRF, virtual_grf_alloc(2), BRW_REGISTER_TYPE_F); + fs_reg dst_x = vgrf(2); fs_reg dst_y = offset(dst_x, 1); /* For most messages, we need one reg of ignored data; the hardware -- 2.30.2