-# ACO specific issues.
-dEQP-VK.transform_feedback.simple.multistreams_1
-dEQP-VK.transform_feedback.simple.multistreams_3
-
dEQP-VK.rasterization.flatshading.line_strip_wide
dEQP-VK.rasterization.flatshading.non_strict_line_strip_wide
dEQP-VK.rasterization.flatshading.non_strict_lines_wide
-# ACO specific issues.
-dEQP-VK.transform_feedback.simple.multistreams_1
-dEQP-VK.transform_feedback.simple.multistreams_3
-
dEQP-VK.rasterization.flatshading.line_strip_wide
dEQP-VK.rasterization.flatshading.non_strict_line_strip_wide
dEQP-VK.rasterization.flatshading.non_strict_lines_wide
-# ACO specific issues.
-dEQP-VK.transform_feedback.simple.multistreams_1
-dEQP-VK.transform_feedback.simple.multistreams_3
-
dEQP-VK.rasterization.flatshading.line_strip_wide
dEQP-VK.rasterization.flatshading.non_strict_line_strip_wide
dEQP-VK.rasterization.flatshading.non_strict_lines_wide
-# ACO specific issues.
-dEQP-VK.transform_feedback.simple.multistreams_1
-dEQP-VK.transform_feedback.simple.multistreams_3
-
dEQP-VK.rasterization.flatshading.line_strip_wide
dEQP-VK.rasterization.flatshading.non_strict_line_strip_wide
dEQP-VK.rasterization.flatshading.non_strict_lines_wide
("vopc_e64", [Format.VOPC, Format.VOP3A], 'VOP3A_instruction', itertools.product([1, 2], [2])),
("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]),
("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)])]
+formats = [(f if len(f) == 5 else f + ('',)) for f in formats]
%>\\
-% for name, formats, struct, shapes in formats:
+% for name, formats, struct, shapes, extra_field_setup in formats:
% for num_definitions, num_operands in shapes:
<%
args = ['aco_opcode opcode']
% endfor
${f.get_builder_initialization(num_operands)}
% endfor
+ ${extra_field_setup}
return insert(instr);
}
mubuf->barrier = info->barrier;
mubuf->can_reorder = info->can_reorder;
mubuf->offset = const_offset;
+ mubuf->swizzled = info->swizzle_component_size != 0;
RegClass rc = RegClass::get(RegType::vgpr, align(bytes_size, 4));
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
mubuf->definitions[0] = Definition(val);
}
void emit_single_mubuf_store(isel_context *ctx, Temp descriptor, Temp voffset, Temp soffset, Temp vdata,
- unsigned const_offset = 0u, bool allow_reorder = true, bool slc = false)
+ unsigned const_offset = 0u, bool allow_reorder = true, bool slc = false,
+ bool swizzled = false)
{
assert(vdata.id());
assert(vdata.size() != 3 || ctx->program->chip_class != GFX6);
Operand voffset_op = voffset.id() ? Operand(as_vgpr(ctx, voffset)) : Operand(v1);
Operand soffset_op = soffset.id() ? Operand(soffset) : Operand(0u);
Builder::Result r = bld.mubuf(op, Operand(descriptor), voffset_op, soffset_op, Operand(vdata), const_offset,
- /* offen */ !voffset_op.isUndefined(), /* idxen*/ false, /* addr64 */ false,
- /* disable_wqm */ false, /* glc */ true, /* dlc*/ false, /* slc */ slc);
+ /* offen */ !voffset_op.isUndefined(), /* swizzled */ swizzled,
+ /* idxen*/ false, /* addr64 */ false, /* disable_wqm */ false, /* glc */ true,
+ /* dlc*/ false, /* slc */ slc);
static_cast<MUBUF_instruction *>(r.instr)->can_reorder = allow_reorder;
}
for (unsigned i = 0; i < write_count; i++) {
unsigned const_offset = offsets[i] + base_const_offset;
- emit_single_mubuf_store(ctx, descriptor, voffset, soffset, write_datas[i], const_offset, reorder, slc);
+ emit_single_mubuf_store(ctx, descriptor, voffset, soffset, write_datas[i], const_offset, reorder, slc, !allow_combining);
}
}
if (use_mubuf) {
Instruction *mubuf = bld.mubuf(opcode,
Definition(fetch_dst), list, fetch_index, soffset,
- fetch_offset, false, true).instr;
+ fetch_offset, false, false, true).instr;
static_cast<MUBUF_instruction*>(mubuf)->can_reorder = true;
} else {
Instruction *mtbuf = bld.mtbuf(opcode,
for (unsigned i = 0; i < write_count; i++) {
aco_opcode op = get_buffer_store_op(false, write_datas[i].bytes());
- bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_datas[i], offsets[i], true);
+ bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_datas[i], offsets[i], true, true);
}
}
Temp control_word = bld.copy(bld.def(v1), Operand(0x80000000u));
bld.mubuf(aco_opcode::buffer_store_dword,
/* SRSRC */ hs_ring_tess_factor, /* VADDR */ Operand(v1), /* SOFFSET */ tf_base, /* VDATA */ control_word,
- /* immediate OFFSET */ 0, /* OFFEN */ false, /* idxen*/ false, /* addr64 */ false,
- /* disable_wqm */ false, /* glc */ true);
+ /* immediate OFFSET */ 0, /* OFFEN */ false, /* swizzled */ false, /* idxen*/ false,
+ /* addr64 */ false, /* disable_wqm */ false, /* glc */ true);
tf_const_offset += 4;
begin_divergent_if_else(ctx, &ic_rel_patch_id_is_zero);
bool lds : 1; /* Return read-data to LDS instead of VGPRs */
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
bool can_reorder : 1;
- uint8_t padding : 2;
+ bool swizzled:1;
+ uint8_t padding : 1;
barrier_interaction barrier;
};
static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
elif self == Format.MUBUF:
return [('unsigned', 'offset', None),
('bool', 'offen', None),
+ ('bool', 'swizzled', 'false'),
('bool', 'idxen', 'false'),
('bool', 'addr64', 'false'),
('bool', 'disable_wqm', 'false'),
return true;
}
-bool parse_base_offset(opt_ctx &ctx, Instruction* instr, unsigned op_index, Temp *base, uint32_t *offset)
+bool parse_base_offset(opt_ctx &ctx, Instruction* instr, unsigned op_index, Temp *base, uint32_t *offset, bool prevent_overflow)
{
+ if (prevent_overflow)
+ return false; //TODO
+
Operand op = instr->operands[op_index];
if (!op.isTemp())
continue;
uint32_t offset2 = 0;
- if (parse_base_offset(ctx, add_instr, !i, base, &offset2)) {
+ if (parse_base_offset(ctx, add_instr, !i, base, &offset2, prevent_overflow)) {
*offset += offset2;
} else {
*base = add_instr->operands[!i].getTemp();
while (info.is_temp())
info = ctx.info[info.temp.id()];
+ /* According to AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(), vaddr
+ * overflow for scratch accesses works only on GFX9+ and saddr overflow
+ * never works. Since swizzling is the only thing that separates
+ * scratch accesses and other accesses and swizzling changing how
+ * addressing works significantly, this probably applies to swizzled
+ * MUBUF accesses. */
+ bool vaddr_prevent_overflow = mubuf->swizzled && ctx.program->chip_class < GFX9;
+ bool saddr_prevent_overflow = mubuf->swizzled;
+
if (mubuf->offen && i == 1 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) {
assert(!mubuf->idxen);
instr->operands[1] = Operand(v1);
instr->operands[2] = Operand((uint32_t) 0);
mubuf->offset += info.val;
continue;
- } else if (mubuf->offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == v1 && mubuf->offset + offset < 4096) {
+ } else if (mubuf->offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, vaddr_prevent_overflow) &&
+ base.regClass() == v1 && mubuf->offset + offset < 4096) {
assert(!mubuf->idxen);
instr->operands[1].setTemp(base);
mubuf->offset += offset;
continue;
- } else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == s1 && mubuf->offset + offset < 4096) {
+ } else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, saddr_prevent_overflow) &&
+ base.regClass() == s1 && mubuf->offset + offset < 4096) {
instr->operands[i].setTemp(base);
mubuf->offset += offset;
continue;
uint32_t offset;
bool has_usable_ds_offset = ctx.program->chip_class >= GFX7;
if (has_usable_ds_offset &&
- i == 0 && parse_base_offset(ctx, instr.get(), i, &base, &offset) &&
+ i == 0 && parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
base.regClass() == instr->operands[i].regClass() &&
instr->opcode != aco_opcode::ds_swizzle_b32) {
if (instr->opcode == aco_opcode::ds_write2_b32 || instr->opcode == aco_opcode::ds_read2_b32 ||
(ctx.program->chip_class >= GFX8 && info.val <= 0xFFFFF))) {
instr->operands[i] = Operand(info.val);
continue;
- } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) {
+ } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) {
bool soe = smem->operands.size() >= (!smem->definitions.empty() ? 3 : 4);
if (soe &&
(!ctx.info[smem->operands.back().tempId()].is_constant_or_literal(32) ||
split->definitions[i] = bld.def(v1);
bld.insert(split);
for (unsigned i = 0; i < temp.size(); i++)
- bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
+ bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false, true);
} else {
- bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false);
+ bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false, true);
}
} else {
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
for (unsigned i = 0; i < def.size(); i++) {
Temp tmp = bld.tmp(v1);
vec->operands[i] = Operand(tmp);
- bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), scratch_offset, offset + i * 4, false);
+ bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), scratch_offset, offset + i * 4, false, true);
}
bld.insert(vec);
} else {
- bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false);
+ bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false, true);
}
} else {
uint32_t spill_slot = slots[spill_id];