if (!cf)
return NULL;
- LIST_INITHEAD(&cf->list);
- LIST_INITHEAD(&cf->alu);
- LIST_INITHEAD(&cf->vtx);
- LIST_INITHEAD(&cf->tex);
- LIST_INITHEAD(&cf->gds);
+ list_inithead(&cf->list);
+ list_inithead(&cf->alu);
+ list_inithead(&cf->vtx);
+ list_inithead(&cf->tex);
+ list_inithead(&cf->gds);
return cf;
}
if (!alu)
return NULL;
- LIST_INITHEAD(&alu->list);
+ list_inithead(&alu->list);
return alu;
}
if (!vtx)
return NULL;
- LIST_INITHEAD(&vtx->list);
+ list_inithead(&vtx->list);
return vtx;
}
if (!tex)
return NULL;
- LIST_INITHEAD(&tex->list);
+ list_inithead(&tex->list);
return tex;
}
if (gds == NULL)
return NULL;
- LIST_INITHEAD(&gds->list);
+ list_inithead(&gds->list);
return gds;
}
bc->r6xx_nop_after_rel_dst = 0;
}
- LIST_INITHEAD(&bc->cf);
+ list_inithead(&bc->cf);
bc->chip_class = chip_class;
bc->family = family;
bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing;
if (!cf)
return -ENOMEM;
- LIST_ADDTAIL(&cf->list, &bc->cf);
+ list_addtail(&cf->list, &bc->cf);
if (bc->cf_last) {
cf->id = bc->cf_last->id + 2;
if (bc->cf_last->eg_alu_extended) {
return 0;
}
+int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
+ const struct r600_bytecode_output *output)
+{
+ assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs));
+ bc->pending_outputs[bc->n_pending_outputs++] = *output;
+
+ return 0;
+}
+
+void r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean need_wait_ack)
+{
+ bc->need_wait_ack = need_wait_ack;
+}
+
+boolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc)
+{
+ return bc->need_wait_ack;
+}
+
/* alu instructions that can ony exits once per group */
static int is_alu_once_inst(struct r600_bytecode_alu *alu)
{
for (i = 0; i < max_slots; ++i) {
slots[i] = result[i];
if (result[i]) {
- LIST_DEL(&result[i]->list);
+ list_del(&result[i]->list);
result[i]->last = 0;
- LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
+ list_addtail(&result[i]->list, &bc->cf_last->alu);
}
}
continue;
bank = alu->src[i].kc_bank;
+ assert(bank < R600_MAX_HW_CONST_BUFFERS);
line = (sel-512)>>4;
index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE
if (nalu->dst.sel >= bc->ngpr) {
bc->ngpr = nalu->dst.sel + 1;
}
- LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
+ list_addtail(&nalu->list, &bc->cf_last->alu);
/* each alu use 2 dwords */
bc->cf_last->ndw += 2;
bc->ndw += 2;
if (nalu->dst.rel && bc->r6xx_nop_after_rel_dst)
insert_nop_r6xx(bc);
+ /* Might need to insert spill write ops after current clause */
+ if (nalu->last && bc->n_pending_outputs) {
+ while (bc->n_pending_outputs) {
+ r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]);
+ if (r)
+ return r;
+ }
+ }
+
return 0;
}
return -EINVAL;
}
}
- LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx);
+ list_addtail(&nvtx->list, &bc->cf_last->vtx);
/* each fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
bc->cf_last->op == CF_OP_TEX) {
struct r600_bytecode_tex *ttex;
LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
- if (ttex->dst_gpr == ntex->src_gpr) {
+ if (ttex->dst_gpr == ntex->src_gpr &&
+ (ttex->dst_sel_x < 4 || ttex->dst_sel_y < 4 ||
+ ttex->dst_sel_z < 4 || ttex->dst_sel_w < 4)) {
bc->force_add_cf = 1;
break;
}
if (ntex->dst_gpr >= bc->ngpr) {
bc->ngpr = ntex->dst_gpr + 1;
}
- LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex);
+ list_addtail(&ntex->list, &bc->cf_last->tex);
/* each texture fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
bc->cf_last->op = CF_OP_GDS;
}
- LIST_ADDTAIL(&ngds->list, &bc->cf_last->gds);
+ list_addtail(&ngds->list, &bc->cf_last->gds);
bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */
if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
bc->force_add_cf = 1;
int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
{
int r;
+
+ /* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */
+ if (op != CF_OP_MEM_SCRATCH && bc->need_wait_ack) {
+ bc->need_wait_ack = false;
+ r = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
+ }
+
r = r600_bytecode_add_cf(bc);
if (r)
return r;
/* common to all 3 families */
static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
{
- bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
+ return r700_bytecode_fetch_mem_build(bc, vtx, id);
+ bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) |
+ S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+ S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
+ S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
/* common for r600/r700 - eg in eg_asm.c */
free(alu);
}
- LIST_INITHEAD(&cf->alu);
+ list_inithead(&cf->alu);
LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
free(tex);
}
- LIST_INITHEAD(&cf->tex);
+ list_inithead(&cf->tex);
LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
free(vtx);
}
- LIST_INITHEAD(&cf->vtx);
+ list_inithead(&cf->vtx);
LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) {
free(gds);
}
- LIST_INITHEAD(&cf->gds);
+ list_inithead(&cf->gds);
free(cf);
}
- LIST_INITHEAD(&cf->list);
+ list_inithead(&cf->list);
}
static int print_swizzle(unsigned swz)
o += fprintf(stderr, "LDS_OQ_B_POP");
need_chan = 1;
break;
+ case EG_V_SQ_ALU_SRC_TIME_LO:
+ o += fprintf(stderr, "TIME_LO");
+ break;
+ case EG_V_SQ_ALU_SRC_TIME_HI:
+ o += fprintf(stderr, "TIME_HI");
+ break;
+ case EG_V_SQ_ALU_SRC_SE_ID:
+ o += fprintf(stderr, "SE_ID");
+ break;
+ case EG_V_SQ_ALU_SRC_SIMD_ID:
+ o += fprintf(stderr, "SIMD_ID");
+ break;
+ case EG_V_SQ_ALU_SRC_HW_WAVE_ID:
+ o += fprintf(stderr, "HW_WAVE_ID");
+ break;
case V_SQ_ALU_SRC_PS:
o += fprintf(stderr, "PS");
break;
fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
bc->bytecode[id + 1], cfop->name);
fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
+ if (cf->vpm)
+ fprintf(stderr, "VPM ");
+ if (cf->end_of_program)
+ fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
+
} else if (cfop->flags & CF_EXP) {
int o = 0;
const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
print_indent(o, 67);
fprintf(stderr, " ES:%X ", cf->output.elem_size);
+ if (cf->mark)
+ fprintf(stderr, "MARK ");
if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
if (cf->end_of_program)
bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
o += print_indent(o, 43);
o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
+
+ if (r600_isa_cf(cf->op)->flags & CF_RAT) {
+ o += fprintf(stderr, "RAT%d", cf->rat.id);
+ if (cf->rat.index_mode) {
+ o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1);
+ }
+ o += fprintf(stderr, " INST: %d ", cf->rat.inst);
+ }
+
if (cf->output.burst_count > 1) {
o += fprintf(stderr, "%d-%d ", cf->output.array_base,
cf->output.array_base + cf->output.burst_count - 1);
fprintf(stderr, " ES:%i ", cf->output.elem_size);
if (cf->output.array_size != 0xFFF)
fprintf(stderr, "AS:%i ", cf->output.array_size);
+ if (cf->mark)
+ fprintf(stderr, "MARK ");
if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
+
+ if (cf->output.mark)
+ fprintf(stderr, "MARK ");
+
fprintf(stderr, "\n");
} else {
fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
fprintf(stderr, "POP:%X ", cf->pop_count);
if (cf->count && (cfop->flags & CF_EMIT))
fprintf(stderr, "STREAM%d ", cf->count);
+ if (cf->vpm)
+ fprintf(stderr, "VPM ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
o += fprintf(stderr, ", R%d.", vtx->src_gpr);
o += print_swizzle(vtx->src_sel_x);
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
+ o += print_swizzle(vtx->src_sel_y);
if (vtx->offset)
fprintf(stderr, " +%db", vtx->offset);
if (bc->chip_class >= EVERGREEN && vtx->buffer_index_mode)
fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]);
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM) {
+ if (vtx->uncached)
+ fprintf(stderr, "UNCACHED ");
+ if (vtx->indexed)
+ fprintf(stderr, "INDEXED:%d ", vtx->indexed);
+
+ fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size);
+ if (vtx->burst_count)
+ fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count);
+ fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base);
+ fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size);
+ }
+
fprintf(stderr, "UCF:%d ", vtx->use_const_fields);
fprintf(stderr, "FMT(DTA:%d ", vtx->data_format);
fprintf(stderr, "NUM:%d ", vtx->num_format_all);
return;
}
+ if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) {
+ *format = FMT_5_5_5_1;
+ return;
+ }
+
desc = util_format_description(pformat);
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
goto out_unknown;
/* Signed ints */
case UTIL_FORMAT_TYPE_SIGNED:
switch (desc->channel[i].size) {
+ case 4:
+ switch (desc->nr_channels) {
+ case 2:
+ *format = FMT_4_4;
+ break;
+ case 4:
+ *format = FMT_4_4_4_4;
+ break;
+ }
+ break;
case 8:
switch (desc->nr_channels) {
case 1:
return NULL;
}
- bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+ bytecode = r600_buffer_map_sync_with_rings
+ (&rctx->b, shader->buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
bytecode += shader->offset / 4;
if (R600_BIG_ENDIAN) {