From: Daniel Schürmann Date: Wed, 19 Feb 2020 08:39:42 +0000 (+0100) Subject: aco: refactor regClass setup for subdword VGPRs X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f01bf51a2b5769aa7bb71f3c76f700c3b4257ac1;p=mesa.git aco: refactor regClass setup for subdword VGPRs Reviewed-by: Rhys Perry Reviewed-By: Timur Kristóf Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 462cd48d960..6caedc4347f 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -225,6 +225,25 @@ sanitize_cf_list(nir_function_impl *impl, bool *divergent, struct exec_list *cf_ return progress; } +RegClass get_reg_class(isel_context *ctx, RegType type, unsigned components, unsigned bitsize) +{ + switch (bitsize) { + case 1: + return RegClass(RegType::sgpr, ctx->program->lane_mask.size() * components); + case 8: + return type == RegType::sgpr ? s1 : RegClass(type, components).as_subdword(); + case 16: + return type == RegType::sgpr ? RegClass(type, DIV_ROUND_UP(components, 2)) : + RegClass(type, 2 * components).as_subdword(); + case 32: + return RegClass(type, components); + case 64: + return RegClass(type, components * 2); + default: + unreachable("Unsupported bit size"); + } +} + void init_context(isel_context *ctx, nir_shader *shader) { nir_function_impl *impl = nir_shader_get_entrypoint(shader); @@ -260,9 +279,6 @@ void init_context(isel_context *ctx, nir_shader *shader) switch(instr->type) { case nir_instr_type_alu: { nir_alu_instr *alu_instr = nir_instr_as_alu(instr); - unsigned size = alu_instr->dest.dest.ssa.num_components; - if (alu_instr->dest.dest.ssa.bit_size == 64) - size *= 2; RegType type = RegType::sgpr; switch(alu_instr->op) { case nir_op_fmul: @@ -312,20 +328,6 @@ void init_context(isel_context *ctx, nir_shader *shader) case nir_op_cube_face_coord: type = RegType::vgpr; break; - case nir_op_flt: - case nir_op_fge: - case nir_op_feq: - case nir_op_fne: - case nir_op_ilt: - case nir_op_ige: - case nir_op_ult: - case nir_op_uge: - case nir_op_ieq: - case nir_op_ine: - case nir_op_i2b1: - case nir_op_b2b1: - size = lane_mask_size; - break; case nir_op_f2i64: case nir_op_f2u64: case nir_op_b2i32: @@ -333,45 +335,22 @@ void init_context(isel_context *ctx, nir_shader *shader) case nir_op_b2f32: case nir_op_f2i32: case nir_op_f2u32: + case nir_op_mov: type = ctx->divergent_vals[alu_instr->dest.dest.ssa.index] ? RegType::vgpr : RegType::sgpr; break; case nir_op_bcsel: - if (alu_instr->dest.dest.ssa.bit_size == 1) { - size = lane_mask_size; - } else { - if (ctx->divergent_vals[alu_instr->dest.dest.ssa.index]) { - type = RegType::vgpr; - } else { - if (allocated[alu_instr->src[1].src.ssa->index].type() == RegType::vgpr || - allocated[alu_instr->src[2].src.ssa->index].type() == RegType::vgpr) { - type = RegType::vgpr; - } - } - if (alu_instr->src[1].src.ssa->num_components == 1 && alu_instr->src[2].src.ssa->num_components == 1) { - assert(allocated[alu_instr->src[1].src.ssa->index].size() == allocated[alu_instr->src[2].src.ssa->index].size()); - size = allocated[alu_instr->src[1].src.ssa->index].size(); - } - } - break; - case nir_op_mov: - if (alu_instr->dest.dest.ssa.bit_size == 1) { - size = lane_mask_size; - } else { - type = ctx->divergent_vals[alu_instr->dest.dest.ssa.index] ? RegType::vgpr : RegType::sgpr; - } - break; + type = ctx->divergent_vals[alu_instr->dest.dest.ssa.index] ? RegType::vgpr : RegType::sgpr; + /* fallthrough */ default: - if (alu_instr->dest.dest.ssa.bit_size == 1) { - size = lane_mask_size; - } else { - for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) { - if (allocated[alu_instr->src[i].src.ssa->index].type() == RegType::vgpr) - type = RegType::vgpr; - } + for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) { + if (allocated[alu_instr->src[i].src.ssa->index].type() == RegType::vgpr) + type = RegType::vgpr; } break; } - allocated[alu_instr->dest.dest.ssa.index] = Temp(0, RegClass(type, size)); + + RegClass rc = get_reg_class(ctx, type, alu_instr->dest.dest.ssa.num_components, alu_instr->dest.dest.ssa.bit_size); + allocated[alu_instr->dest.dest.ssa.index] = Temp(0, rc); break; } case nir_instr_type_load_const: { @@ -387,9 +366,6 @@ void init_context(isel_context *ctx, nir_shader *shader) nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); if (!nir_intrinsic_infos[intrinsic->intrinsic].has_dest) break; - unsigned size = intrinsic->dest.ssa.num_components; - if (intrinsic->dest.ssa.bit_size == 64) - size *= 2; RegType type = RegType::sgpr; switch(intrinsic->intrinsic) { case nir_intrinsic_load_push_constant: @@ -405,10 +381,6 @@ void init_context(isel_context *ctx, nir_shader *shader) case nir_intrinsic_read_first_invocation: case nir_intrinsic_read_invocation: case nir_intrinsic_first_invocation: - type = RegType::sgpr; - if (intrinsic->dest.ssa.bit_size == 1) - size = lane_mask_size; - break; case nir_intrinsic_ballot: type = RegType::sgpr; break; @@ -493,46 +465,16 @@ void init_context(isel_context *ctx, nir_shader *shader) case nir_intrinsic_masked_swizzle_amd: case nir_intrinsic_inclusive_scan: case nir_intrinsic_exclusive_scan: - if (intrinsic->dest.ssa.bit_size == 1) { - size = lane_mask_size; - type = RegType::sgpr; - } else if (!ctx->divergent_vals[intrinsic->dest.ssa.index]) { - type = RegType::sgpr; - } else { - type = RegType::vgpr; - } - break; - case nir_intrinsic_load_view_index: - type = ctx->stage == fragment_fs ? RegType::vgpr : RegType::sgpr; - break; - case nir_intrinsic_load_front_face: - case nir_intrinsic_load_helper_invocation: - case nir_intrinsic_is_helper_invocation: - type = RegType::sgpr; - size = lane_mask_size; - break; case nir_intrinsic_reduce: - if (intrinsic->dest.ssa.bit_size == 1) { - size = lane_mask_size; - type = RegType::sgpr; - } else if (!ctx->divergent_vals[intrinsic->dest.ssa.index]) { - type = RegType::sgpr; - } else { - type = RegType::vgpr; - } - break; case nir_intrinsic_load_ubo: case nir_intrinsic_load_ssbo: case nir_intrinsic_load_global: case nir_intrinsic_vulkan_resource_index: + case nir_intrinsic_load_shared: type = ctx->divergent_vals[intrinsic->dest.ssa.index] ? RegType::vgpr : RegType::sgpr; break; - /* due to copy propagation, the swizzled imov is removed if num dest components == 1 */ - case nir_intrinsic_load_shared: - if (ctx->divergent_vals[intrinsic->dest.ssa.index]) - type = RegType::vgpr; - else - type = RegType::sgpr; + case nir_intrinsic_load_view_index: + type = ctx->stage == fragment_fs ? RegType::vgpr : RegType::sgpr; break; default: for (unsigned i = 0; i < nir_intrinsic_infos[intrinsic->intrinsic].num_srcs; i++) { @@ -541,7 +483,8 @@ void init_context(isel_context *ctx, nir_shader *shader) } break; } - allocated[intrinsic->dest.ssa.index] = Temp(0, RegClass(type, size)); + RegClass rc = get_reg_class(ctx, type, intrinsic->dest.ssa.num_components, intrinsic->dest.ssa.bit_size); + allocated[intrinsic->dest.ssa.index] = Temp(0, rc); switch(intrinsic->intrinsic) { case nir_intrinsic_load_barycentric_sample: @@ -635,8 +578,7 @@ void init_context(isel_context *ctx, nir_shader *shader) } } - size *= phi->dest.ssa.bit_size == 64 ? 2 : 1; - RegClass rc = RegClass(type, size); + RegClass rc = get_reg_class(ctx, type, phi->dest.ssa.num_components, phi->dest.ssa.bit_size); if (rc != allocated[phi->dest.ssa.index].regClass()) { done = false; } else { diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 7e010db7cdf..b05f809d21f 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -227,6 +227,7 @@ struct RegClass { constexpr unsigned size() const { return (bytes() + 3) >> 2; } constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); } constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); } + constexpr RegClass as_subdword() const { return RegClass((RC) (rc | 1 << 7)); } private: RC rc;