}
break;
}
- case nir_op_fmax3: {
- if (dst.regClass() == v2b) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_f16, dst, false);
- } else if (dst.regClass() == v1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_f32, dst, ctx->block->fp_mode.must_flush_denorms32);
- } else {
- isel_err(&instr->instr, "Unimplemented NIR instr bit size");
- }
- break;
- }
- case nir_op_fmin3: {
- if (dst.regClass() == v2b) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_f16, dst, false);
- } else if (dst.regClass() == v1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_f32, dst, ctx->block->fp_mode.must_flush_denorms32);
- } else {
- isel_err(&instr->instr, "Unimplemented NIR instr bit size");
- }
- break;
- }
- case nir_op_fmed3: {
- if (dst.regClass() == v2b) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_f16, dst, false);
- } else if (dst.regClass() == v1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_f32, dst, ctx->block->fp_mode.must_flush_denorms32);
- } else {
- isel_err(&instr->instr, "Unimplemented NIR instr bit size");
- }
- break;
- }
- case nir_op_umax3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_u32, dst);
- } else {
- isel_err(&instr->instr, "Unimplemented NIR instr bit size");
- }
- break;
- }
- case nir_op_umin3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_u32, dst);
- } else {
- isel_err(&instr->instr, "Unimplemented NIR instr bit size");
- }
- break;
- }
- case nir_op_umed3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_u32, dst);
- } else {
- isel_err(&instr->instr, "Unimplemented NIR instr bit size");
- }
- break;
- }
- case nir_op_imax3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_max3_i32, dst);
- } else {
- isel_err(&instr->instr, "Unimplemented NIR instr bit size");
- }
- break;
- }
- case nir_op_imin3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_min3_i32, dst);
- } else {
- isel_err(&instr->instr, "Unimplemented NIR instr bit size");
- }
- break;
- }
- case nir_op_imed3: {
- if (dst.size() == 1) {
- emit_vop3a_instruction(ctx, instr, aco_opcode::v_med3_i32, dst);
- } else {
- isel_err(&instr->instr, "Unimplemented NIR instr bit size");
- }
- break;
- }
case nir_op_cube_face_coord: {
Temp in = get_alu_src(ctx, instr->src[0], 3);
Temp src[3] = { emit_extract_vector(ctx, in, 0, v1),
Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]);
Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]);
sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1),
- bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma), Operand(0x3f000000u/*0.5*/));
+ Operand(0x3f000000u/*0.5*/),
+ bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma));
tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1),
- bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma), Operand(0x3f000000u/*0.5*/));
+ Operand(0x3f000000u/*0.5*/),
+ bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma));
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc);
break;
}
Temp src0 = bld.tmp(v1);
Temp src1 = bld.tmp(v1);
bld.pseudo(aco_opcode::p_split_vector, Definition(src0), Definition(src1), src);
- if (!ctx->block->fp_mode.care_about_round32 || ctx->block->fp_mode.round32 == fp_round_tz)
+ if (0 && (!ctx->block->fp_mode.care_about_round32 || ctx->block->fp_mode.round32 == fp_round_tz)) {
bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src0, src1);
- else
- bld.vop3(aco_opcode::v_cvt_pk_u16_u32, Definition(dst),
- bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src0),
- bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), src1));
+ } else {
+ src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v1), src0);
+ src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v1), src1);
+ if (ctx->program->chip_class >= GFX10) {
+ /* the high bits of v_cvt_f16_f32 isn't zero'd on GFX10 */
+ bld.vop3(aco_opcode::v_pack_b32_f16, Definition(dst), src0, src1);
+ } else {
+ bld.vop3(aco_opcode::v_cvt_pk_u16_u32, Definition(dst), src0, src1);
+ }
+ }
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
}
case nir_op_ubfe:
case nir_op_ibfe: {
- Temp base = get_alu_src(ctx, instr->src[0]);
- Temp offset = get_alu_src(ctx, instr->src[1]);
- Temp bits = get_alu_src(ctx, instr->src[2]);
+ if (dst.bytes() != 4)
+ unreachable("Unsupported BFE bit size");
if (dst.type() == RegType::sgpr) {
- Operand extract;
+ Temp base = get_alu_src(ctx, instr->src[0]);
+
nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src);
nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src);
if (const_offset && const_bits) {
- uint32_t const_extract = (const_bits->u32 << 16) | const_offset->u32;
- extract = Operand(const_extract);
- } else {
- Operand width;
- if (const_bits) {
- width = Operand(const_bits->u32 << 16);
- } else {
- width = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u));
- }
- extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), offset, width);
+ uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f);
+ aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32;
+ bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand(extract));
+ break;
}
- aco_opcode opcode;
- if (dst.regClass() == s1) {
- if (instr->op == nir_op_ubfe)
- opcode = aco_opcode::s_bfe_u32;
- else
- opcode = aco_opcode::s_bfe_i32;
- } else if (dst.regClass() == s2) {
- if (instr->op == nir_op_ubfe)
- opcode = aco_opcode::s_bfe_u64;
- else
- opcode = aco_opcode::s_bfe_i64;
+ Temp offset = get_alu_src(ctx, instr->src[1]);
+ Temp bits = get_alu_src(ctx, instr->src[2]);
+ if (instr->op == nir_op_ubfe) {
+ Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset);
+ Temp masked = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask);
+ bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset);
} else {
- unreachable("Unsupported BFE bit size");
- }
-
- bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract);
+ Operand bits_op = const_bits ? Operand(const_bits->u32 << 16) :
+ bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u));
+ Operand offset_op = const_offset ? Operand(const_offset->u32 & 0x1fu) :
+ bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), offset, Operand(0x1fu));
- } else {
- aco_opcode opcode;
- if (dst.regClass() == v1) {
- if (instr->op == nir_op_ubfe)
- opcode = aco_opcode::v_bfe_u32;
- else
- opcode = aco_opcode::v_bfe_i32;
- } else {
- unreachable("Unsupported BFE bit size");
+ Temp extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), bits_op, offset_op);
+ bld.sop2(aco_opcode::s_bfe_i32, Definition(dst), bld.def(s1, scc), base, extract);
}
+ } else {
+ aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::v_bfe_u32 : aco_opcode::v_bfe_i32;
emit_vop3a_instruction(ctx, instr, opcode, dst);
}
break;
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_eq_f16, aco_opcode::v_cmp_eq_f32, aco_opcode::v_cmp_eq_f64);
break;
}
- case nir_op_fne: {
+ case nir_op_fneu: {
emit_comparison(ctx, instr, dst, aco_opcode::v_cmp_neq_f16, aco_opcode::v_cmp_neq_f32, aco_opcode::v_cmp_neq_f64);
break;
}
assert(nir_instr_is_last(&instr->instr));
ctx->block->kind |= block_kind_uniform;
ctx->cf_info.has_branch = true;
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(ctx->block->index, linear_target);
return;
}
ctx->cf_info.nir_to_aco[instr->instr.block->index] = idx;
/* remove critical edges from linear CFG */
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
Block* break_block = ctx->program->create_and_insert_block();
break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
break_block->kind |= block_kind_uniform;
add_linear_edge(idx, break_block);
add_linear_edge(break_block->index, linear_target);
bld.reset(break_block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
Block* continue_block = ctx->program->create_and_insert_block();
continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
/* uniform break - directly jump out of the loop */
ctx->block->kind |= block_kind_uniform;
ctx->cf_info.has_branch = true;
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(idx, logical_target);
return;
}
/* uniform continue - directly jump to the loop header */
ctx->block->kind |= block_kind_uniform;
ctx->cf_info.has_branch = true;
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(idx, logical_target);
return;
}
}
/* remove critical edges from linear CFG */
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
Block* break_block = ctx->program->create_and_insert_block();
break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
break_block->kind |= block_kind_uniform;
logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx];
add_linear_edge(break_block->index, logical_target);
bld.reset(break_block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
Block* continue_block = ctx->program->create_and_insert_block();
continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
append_logical_end(ctx->block);
ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform;
Builder bld(ctx->program, ctx->block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
unsigned loop_preheader_idx = ctx->block->index;
Block loop_exit = Block();
break_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
break_block->kind = block_kind_uniform;
bld.reset(break_block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(block_idx, break_block);
add_linear_edge(break_block->index, &loop_exit);
continue_block->loop_nest_depth = ctx->cf_info.loop_nest_depth;
continue_block->kind = block_kind_uniform;
bld.reset(continue_block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(block_idx, continue_block);
add_linear_edge(continue_block->index, &ctx->program->blocks[loop_header_idx]);
}
bld.reset(ctx->block);
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
}
/* Fixup phis in loop header from unreachable blocks.
/* branch to linear then block */
assert(cond.regClass() == ctx->program->lane_mask);
aco_ptr<Pseudo_branch_instruction> branch;
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
branch->operands[0] = Operand(cond);
ctx->block->instructions.push_back(std::move(branch));
append_logical_end(BB_then_logical);
/* branch from logical then block to invert block */
aco_ptr<Pseudo_branch_instruction> branch;
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_then_logical->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_then_logical->index, &ic->BB_invert);
if (!ctx->cf_info.parent_loop.has_divergent_branch)
BB_then_linear->kind |= block_kind_uniform;
add_linear_edge(ic->BB_if_idx, BB_then_linear);
/* branch from linear then block to invert block */
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_then_linear->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_then_linear->index, &ic->BB_invert);
ic->invert_idx = ctx->block->index;
/* branch to linear else block (skip else) */
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_cbranch_nz, Format::PSEUDO_BRANCH, 1, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
branch->operands[0] = Operand(ic->cond);
ctx->block->instructions.push_back(std::move(branch));
/* branch from logical else block to endif block */
aco_ptr<Pseudo_branch_instruction> branch;
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_else_logical->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_else_logical->index, &ic->BB_endif);
if (!ctx->cf_info.parent_loop.has_divergent_branch)
add_linear_edge(ic->invert_idx, BB_else_linear);
/* branch from linear else block to endif block */
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_else_linear->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_else_linear->index, &ic->BB_endif);
aco_ptr<Pseudo_branch_instruction> branch;
aco_opcode branch_opcode = aco_opcode::p_cbranch_z;
- branch.reset(create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(branch_opcode, Format::PSEUDO_BRANCH, 1, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
branch->operands[0] = Operand(cond);
branch->operands[0].setFixed(scc);
ctx->block->instructions.emplace_back(std::move(branch));
append_logical_end(BB_then);
/* branch from then block to endif block */
aco_ptr<Pseudo_branch_instruction> branch;
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_then->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_then->index, &ic->BB_endif);
if (!ic->then_branch_divergent)
append_logical_end(BB_else);
/* branch from then block to endif block */
aco_ptr<Pseudo_branch_instruction> branch;
- branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
+ branch.reset(create_instruction<Pseudo_branch_instruction>(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
+ branch->definitions[0] = {ctx->program->allocateId(), s2};
+ branch->definitions[0].setHint(vcc);
BB_else->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_else->index, &ic->BB_endif);
if (!ctx->cf_info.parent_loop.has_divergent_branch)
Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand(stream));
append_logical_end(ctx.block);
ctx.block->kind |= block_kind_uniform;
- bld.branch(aco_opcode::p_cbranch_z, cond);
+ bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), cond);
BB_endif.kind |= ctx.block->kind & block_kind_top_level;
append_logical_end(ctx.block);
/* branch from then block to endif block */
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_edge(ctx.block->index, &BB_endif);
ctx.block->kind |= block_kind_uniform;
append_logical_end(BB_else);
/* branch from else block to endif block */
- bld.branch(aco_opcode::p_branch);
+ bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_edge(BB_else->index, &BB_endif);
BB_else->kind |= block_kind_uniform;
cleanup_cfg(program);
}
+
+void select_trap_handler_shader(Program *program, struct nir_shader *shader,
+ ac_shader_config* config,
+ struct radv_shader_args *args)
+{
+ assert(args->options->chip_class == GFX8);
+
+ init_program(program, compute_cs, args->shader_info,
+ args->options->chip_class, args->options->family, config);
+
+ isel_context ctx = {};
+ ctx.program = program;
+ ctx.args = args;
+ ctx.options = args->options;
+ ctx.stage = program->stage;
+
+ ctx.block = ctx.program->create_and_insert_block();
+ ctx.block->loop_nest_depth = 0;
+ ctx.block->kind = block_kind_top_level;
+
+ program->workgroup_size = 1; /* XXX */
+
+ add_startpgm(&ctx);
+ append_logical_start(ctx.block);
+
+ Builder bld(ctx.program, ctx.block);
+
+ /* Load the buffer descriptor from TMA. */
+ bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg{ttmp4}, s4),
+ Operand(PhysReg{tma}, s2), Operand(0u));
+
+ /* Store TTMP0-TTMP1. */
+ bld.smem(aco_opcode::s_buffer_store_dwordx2, Operand(PhysReg{ttmp4}, s4),
+ Operand(0u), Operand(PhysReg{ttmp0}, s2), memory_sync_info(), true);
+
+ uint32_t hw_regs_idx[] = {
+ 2, /* HW_REG_STATUS */
+ 3, /* HW_REG_TRAP_STS */
+ 4, /* HW_REG_HW_ID */
+ 7, /* HW_REG_IB_STS */
+ };
+
+ /* Store some hardware registers. */
+ for (unsigned i = 0; i < ARRAY_SIZE(hw_regs_idx); i++) {
+ /* "((size - 1) << 11) | register" */
+ bld.sopk(aco_opcode::s_getreg_b32, Definition(PhysReg{ttmp8}, s1),
+ ((20 - 1) << 11) | hw_regs_idx[i]);
+
+ bld.smem(aco_opcode::s_buffer_store_dword, Operand(PhysReg{ttmp4}, s4),
+ Operand(8u + i * 4), Operand(PhysReg{ttmp8}, s1), memory_sync_info(), true);
+ }
+
+ program->config->float_mode = program->blocks[0].fp_mode.val;
+
+ append_logical_end(ctx.block);
+ ctx.block->kind |= block_kind_uniform;
+ bld.sopp(aco_opcode::s_endpgm);
+
+ cleanup_cfg(program);
+}
}