return 0;
}
+int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
+ const struct r600_bytecode_output *output)
+{
+ assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs));
+ bc->pending_outputs[bc->n_pending_outputs++] = *output;
+
+ return 0;
+}
+
+void r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean need_wait_ack)
+{
+ bc->need_wait_ack = need_wait_ack;
+}
+
+boolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc)
+{
+ return bc->need_wait_ack;
+}
+
/* alu instructions that can ony exits once per group */
static int is_alu_once_inst(struct r600_bytecode_alu *alu)
{
continue;
bank = alu->src[i].kc_bank;
+ assert(bank < R600_MAX_HW_CONST_BUFFERS);
line = (sel-512)>>4;
index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE
if (nalu->dst.rel && bc->r6xx_nop_after_rel_dst)
insert_nop_r6xx(bc);
+ /* Might need to insert spill write ops after current clause */
+ if (nalu->last && bc->n_pending_outputs) {
+ while (bc->n_pending_outputs) {
+ r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]);
+ if (r)
+ return r;
+ }
+ }
+
return 0;
}
int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
{
int r;
+
+ /* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */
+ if (op != CF_OP_MEM_SCRATCH && bc->need_wait_ack) {
+ bc->need_wait_ack = false;
+ r = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
+ }
+
r = r600_bytecode_add_cf(bc);
if (r)
return r;
/* common to all 3 families */
static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
{
- bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
+ return r700_bytecode_fetch_mem_build(bc, vtx, id);
+ bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) |
+ S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+ S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
+ S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
/* common for r600/r700 - eg in eg_asm.c */
o += fprintf(stderr, "LDS_OQ_B_POP");
need_chan = 1;
break;
+ case EG_V_SQ_ALU_SRC_TIME_LO:
+ o += fprintf(stderr, "TIME_LO");
+ break;
+ case EG_V_SQ_ALU_SRC_TIME_HI:
+ o += fprintf(stderr, "TIME_HI");
+ break;
+ case EG_V_SQ_ALU_SRC_SE_ID:
+ o += fprintf(stderr, "SE_ID");
+ break;
+ case EG_V_SQ_ALU_SRC_SIMD_ID:
+ o += fprintf(stderr, "SIMD_ID");
+ break;
+ case EG_V_SQ_ALU_SRC_HW_WAVE_ID:
+ o += fprintf(stderr, "HW_WAVE_ID");
+ break;
case V_SQ_ALU_SRC_PS:
o += fprintf(stderr, "PS");
break;
fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
bc->bytecode[id + 1], cfop->name);
fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
+ if (cf->vpm)
+ fprintf(stderr, "VPM ");
+ if (cf->end_of_program)
+ fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
+
} else if (cfop->flags & CF_EXP) {
int o = 0;
const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
print_indent(o, 67);
fprintf(stderr, " ES:%X ", cf->output.elem_size);
+ if (cf->mark)
+ fprintf(stderr, "MARK ");
if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
if (cf->end_of_program)
bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
o += print_indent(o, 43);
o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
+
+ if (r600_isa_cf(cf->op)->flags & CF_RAT) {
+ o += fprintf(stderr, "RAT%d", cf->rat.id);
+ if (cf->rat.index_mode) {
+ o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1);
+ }
+ o += fprintf(stderr, " INST: %d ", cf->rat.inst);
+ }
+
if (cf->output.burst_count > 1) {
o += fprintf(stderr, "%d-%d ", cf->output.array_base,
cf->output.array_base + cf->output.burst_count - 1);
fprintf(stderr, " ES:%i ", cf->output.elem_size);
if (cf->output.array_size != 0xFFF)
fprintf(stderr, "AS:%i ", cf->output.array_size);
+ if (cf->mark)
+ fprintf(stderr, "MARK ");
if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
+
+ if (cf->output.mark)
+ fprintf(stderr, "MARK ");
+
fprintf(stderr, "\n");
} else {
fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
fprintf(stderr, "POP:%X ", cf->pop_count);
if (cf->count && (cfop->flags & CF_EMIT))
fprintf(stderr, "STREAM%d ", cf->count);
+ if (cf->vpm)
+ fprintf(stderr, "VPM ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
o += fprintf(stderr, ", R%d.", vtx->src_gpr);
o += print_swizzle(vtx->src_sel_x);
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
+ o += print_swizzle(vtx->src_sel_y);
if (vtx->offset)
fprintf(stderr, " +%db", vtx->offset);
if (bc->chip_class >= EVERGREEN && vtx->buffer_index_mode)
fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]);
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM) {
+ if (vtx->uncached)
+ fprintf(stderr, "UNCACHED ");
+ if (vtx->indexed)
+ fprintf(stderr, "INDEXED:%d ", vtx->indexed);
+
+ fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size);
+ if (vtx->burst_count)
+ fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count);
+ fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base);
+ fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size);
+ }
+
fprintf(stderr, "UCF:%d ", vtx->use_const_fields);
fprintf(stderr, "FMT(DTA:%d ", vtx->data_format);
fprintf(stderr, "NUM:%d ", vtx->num_format_all);
return;
}
+ if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) {
+ *format = FMT_5_5_5_1;
+ return;
+ }
+
desc = util_format_description(pformat);
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
goto out_unknown;
/* Signed ints */
case UTIL_FORMAT_TYPE_SIGNED:
switch (desc->channel[i].size) {
+ case 4:
+ switch (desc->nr_channels) {
+ case 2:
+ *format = FMT_4_4;
+ break;
+ case 4:
+ *format = FMT_4_4_4_4;
+ break;
+ }
+ break;
case 8:
switch (desc->nr_channels) {
case 1:
return NULL;
}
- bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+ bytecode = r600_buffer_map_sync_with_rings
+ (&rctx->b, shader->buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
bytecode += shader->offset / 4;
if (R600_BIG_ENDIAN) {