return alu->dst.write || alu->is_op3;
}
-static inline unsigned int r600_bytecode_get_num_operands(
- struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu)
{
return r600_isa_alu(alu->op)->src_count;
}
-int r700_bytecode_alu_build(struct r600_bytecode *bc,
- struct r600_bytecode_alu *alu, unsigned id);
-
static struct r600_bytecode_cf *r600_bytecode_cf(void)
{
struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf);
if (!cf)
return NULL;
- LIST_INITHEAD(&cf->list);
- LIST_INITHEAD(&cf->alu);
- LIST_INITHEAD(&cf->vtx);
- LIST_INITHEAD(&cf->tex);
- LIST_INITHEAD(&cf->gds);
+ list_inithead(&cf->list);
+ list_inithead(&cf->alu);
+ list_inithead(&cf->vtx);
+ list_inithead(&cf->tex);
+ list_inithead(&cf->gds);
return cf;
}
if (!alu)
return NULL;
- LIST_INITHEAD(&alu->list);
+ list_inithead(&alu->list);
return alu;
}
if (!vtx)
return NULL;
- LIST_INITHEAD(&vtx->list);
+ list_inithead(&vtx->list);
return vtx;
}
if (!tex)
return NULL;
- LIST_INITHEAD(&tex->list);
+ list_inithead(&tex->list);
return tex;
}
if (gds == NULL)
return NULL;
- LIST_INITHEAD(&gds->list);
+ list_inithead(&gds->list);
return gds;
}
bc->r6xx_nop_after_rel_dst = 0;
}
- LIST_INITHEAD(&bc->cf);
+ list_inithead(&bc->cf);
bc->chip_class = chip_class;
bc->family = family;
bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing;
if (!cf)
return -ENOMEM;
- LIST_ADDTAIL(&cf->list, &bc->cf);
+ list_addtail(&cf->list, &bc->cf);
if (bc->cf_last) {
cf->id = bc->cf_last->id + 2;
if (bc->cf_last->eg_alu_extended) {
return 0;
}
+int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
+ const struct r600_bytecode_output *output)
+{
+ assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs));
+ bc->pending_outputs[bc->n_pending_outputs++] = *output;
+
+ return 0;
+}
+
+void r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean need_wait_ack)
+{
+ bc->need_wait_ack = need_wait_ack;
+}
+
+boolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc)
+{
+ return bc->need_wait_ack;
+}
+
/* alu instructions that can ony exits once per group */
-static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_alu_once_inst(struct r600_bytecode_alu *alu)
{
return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER;
}
(r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V);
}
-static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_alu_mova_inst(struct r600_bytecode_alu *alu)
{
return r600_isa_alu(alu->op)->flags & AF_MOVA;
}
-static int alu_uses_rel(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int alu_uses_rel(struct r600_bytecode_alu *alu)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned src;
if (alu->dst.rel) {
return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP;
}
-static int alu_uses_lds(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int alu_uses_lds(struct r600_bytecode_alu *alu)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned src;
for (src = 0; src < num_src; ++src) {
return 0;
}
-static int is_alu_64bit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_alu_64bit_inst(struct r600_bytecode_alu *alu)
{
const struct alu_op_info *op = r600_isa_alu(alu->op);
return (op->flags & AF_64);
return slots == AF_VS;
}
-static int is_nop_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_nop_inst(struct r600_bytecode_alu *alu)
{
return alu->op == ALU_OP0_NOP;
-}
+}
static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first,
struct r600_bytecode_alu *assignment[5])
return 0;
}
-static int reserve_cfile(struct r600_bytecode *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
+static int reserve_cfile(const struct r600_bytecode *bc,
+ struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
{
int res, num_res = 4;
if (bc->chip_class >= R700) {
sel <= V_SQ_ALU_SRC_LITERAL);
}
-static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
+static int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
struct alu_bank_swizzle *bs, int bank_swizzle)
{
int r, src, num_src, sel, elem, cycle;
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (src = 0; src < num_src; src++) {
sel = alu->src[src].sel;
elem = alu->src[src].chan;
return 0;
}
-static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
+static int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
struct alu_bank_swizzle *bs, int bank_swizzle)
{
int r, src, num_src, const_count, sel, elem, cycle;
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (const_count = 0, src = 0; src < num_src; ++src) {
sel = alu->src[src].sel;
elem = alu->src[src].chan;
return 0;
}
-static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
+static int check_and_set_bank_swizzle(const struct r600_bytecode *bc,
struct r600_bytecode_alu *slots[5])
{
struct alu_bank_swizzle bs;
for (i = 0; i < max_slots; ++i) {
if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) {
- if (is_alu_64bit_inst(bc, prev[i])) {
+ if (is_alu_64bit_inst(prev[i])) {
gpr[i] = -1;
continue;
}
if (!alu)
continue;
- if (is_alu_64bit_inst(bc, alu))
+ if (is_alu_64bit_inst(alu))
continue;
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (src = 0; src < num_src; ++src) {
if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
continue;
}
/* compute how many literal are needed */
-static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
+static int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu,
uint32_t literal[4], unsigned *nliteral)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned i, j;
for (i = 0; i < num_src; ++i) {
return 0;
}
-static void r600_bytecode_alu_adjust_literals(struct r600_bytecode *bc,
- struct r600_bytecode_alu *alu,
- uint32_t literal[4], unsigned nliteral)
+static void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu,
+ uint32_t literal[4], unsigned nliteral)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned i, j;
for (i = 0; i < num_src; ++i) {
if (prev[i]) {
if (prev[i]->pred_sel)
return 0;
- if (is_alu_once_inst(bc, prev[i]))
+ if (is_alu_once_inst(prev[i]))
return 0;
}
if (slots[i]) {
if (slots[i]->pred_sel)
return 0;
- if (is_alu_once_inst(bc, slots[i]))
+ if (is_alu_once_inst(slots[i]))
return 0;
}
}
/* check number of literals */
if (prev[i]) {
- if (r600_bytecode_alu_nliterals(bc, prev[i], literal, &nliteral))
+ if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral))
return 0;
- if (r600_bytecode_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
+ if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral))
return 0;
- if (is_alu_mova_inst(bc, prev[i])) {
+ if (is_alu_mova_inst(prev[i])) {
if (have_rel)
return 0;
have_mova = 1;
}
- if (alu_uses_rel(bc, prev[i])) {
+ if (alu_uses_rel(prev[i])) {
if (have_mova) {
return 0;
}
have_rel = 1;
}
- if (alu_uses_lds(bc, prev[i]))
+ if (alu_uses_lds(prev[i]))
return 0;
- num_once_inst += is_alu_once_inst(bc, prev[i]);
+ num_once_inst += is_alu_once_inst(prev[i]);
}
- if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral))
+ if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral))
return 0;
/* Let's check used slots. */
} else if (prev[i] && slots[i]) {
if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
/* Trans unit is still free try to use it. */
- if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(bc, slots[i])) {
+ if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) {
result[i] = prev[i];
result[4] = slots[i];
} else if (is_alu_any_unit_inst(bc, prev[i])) {
}
alu = slots[i];
- num_once_inst += is_alu_once_inst(bc, alu);
+ num_once_inst += is_alu_once_inst(alu);
/* don't reschedule NOPs */
- if (is_nop_inst(bc, alu))
+ if (is_nop_inst(alu))
return 0;
- if (is_alu_mova_inst(bc, alu)) {
+ if (is_alu_mova_inst(alu)) {
if (have_rel) {
return 0;
}
have_mova = 1;
}
- if (alu_uses_rel(bc, alu)) {
+ if (alu_uses_rel(alu)) {
if (have_mova) {
return 0;
}
return 0; /* data hazard with MOVA */
/* Let's check source gprs */
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (src = 0; src < num_src; ++src) {
/* Constants don't matter. */
for (i = 0; i < max_slots; ++i) {
slots[i] = result[i];
if (result[i]) {
- LIST_DEL(&result[i]->list);
+ list_del(&result[i]->list);
result[i]->last = 0;
- LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
+ list_addtail(&result[i]->list, &bc->cf_last->alu);
}
}
continue;
bank = alu->src[i].kc_bank;
+ assert(bank < R600_MAX_HW_CONST_BUFFERS);
line = (sel-512)>>4;
index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE
return 0;
}
-static int r600_bytecode_assign_kcache_banks(struct r600_bytecode *bc,
+static int r600_bytecode_assign_kcache_banks(
struct r600_bytecode_alu *alu,
struct r600_bytecode_kcache * kcache)
{
if (nalu->dst.sel >= bc->ngpr) {
bc->ngpr = nalu->dst.sel + 1;
}
- LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
+ list_addtail(&nalu->list, &bc->cf_last->alu);
/* each alu use 2 dwords */
bc->cf_last->ndw += 2;
bc->ndw += 2;
for (i = 0, nliteral = 0; i < max_slots; i++) {
if (slots[i]) {
- r = r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral);
+ r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral);
if (r)
return r;
}
if (nalu->dst.rel && bc->r6xx_nop_after_rel_dst)
insert_nop_r6xx(bc);
+ /* Might need to insert spill write ops after current clause */
+ if (nalu->last && bc->n_pending_outputs) {
+ while (bc->n_pending_outputs) {
+ r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]);
+ if (r)
+ return r;
+ }
+ }
+
return 0;
}
static inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc)
{
return !((r600_isa_cf(bc->cf_last->op)->flags & CF_FETCH) &&
- (bc->chip_class == CAYMAN ||
- bc->cf_last->op != CF_OP_TEX));
+ bc->cf_last->op != CF_OP_GDS &&
+ (bc->chip_class == CAYMAN ||
+ bc->cf_last->op != CF_OP_TEX));
}
-int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
+static int r600_bytecode_add_vtx_internal(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx,
+ bool use_tc)
{
struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx();
int r;
/* Load index register if required */
if (bc->chip_class >= EVERGREEN) {
if (vtx->buffer_index_mode)
- egcm_load_index_reg(bc, 0, false);
+ egcm_load_index_reg(bc, vtx->buffer_index_mode - 1, false);
}
/* cf can contains only alu or only vtx or only tex */
switch (bc->chip_class) {
case R600:
case R700:
- case EVERGREEN:
bc->cf_last->op = CF_OP_VTX;
break;
+ case EVERGREEN:
+ if (use_tc)
+ bc->cf_last->op = CF_OP_TEX;
+ else
+ bc->cf_last->op = CF_OP_VTX;
+ break;
case CAYMAN:
bc->cf_last->op = CF_OP_TEX;
break;
return -EINVAL;
}
}
- LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx);
+ list_addtail(&nvtx->list, &bc->cf_last->vtx);
/* each fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
return 0;
}
+int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
+{
+ return r600_bytecode_add_vtx_internal(bc, vtx, false);
+}
+
+int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
+{
+ return r600_bytecode_add_vtx_internal(bc, vtx, true);
+}
+
int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex)
{
struct r600_bytecode_tex *ntex = r600_bytecode_tex();
bc->cf_last->op == CF_OP_TEX) {
struct r600_bytecode_tex *ttex;
LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
- if (ttex->dst_gpr == ntex->src_gpr) {
+ if (ttex->dst_gpr == ntex->src_gpr &&
+ (ttex->dst_sel_x < 4 || ttex->dst_sel_y < 4 ||
+ ttex->dst_sel_z < 4 || ttex->dst_sel_w < 4)) {
bc->force_add_cf = 1;
break;
}
if (ntex->dst_gpr >= bc->ngpr) {
bc->ngpr = ntex->dst_gpr + 1;
}
- LIST_ADDTAIL(&ntex->list, &bc->cf_last->tex);
+ list_addtail(&ntex->list, &bc->cf_last->tex);
/* each texture fetch use 4 dwords */
bc->cf_last->ndw += 4;
bc->ndw += 4;
return -ENOMEM;
memcpy(ngds, gds, sizeof(struct r600_bytecode_gds));
+ if (bc->chip_class >= EVERGREEN) {
+ if (gds->uav_index_mode)
+ egcm_load_index_reg(bc, gds->uav_index_mode - 1, false);
+ }
+
if (bc->cf_last == NULL ||
bc->cf_last->op != CF_OP_GDS ||
bc->force_add_cf) {
bc->cf_last->op = CF_OP_GDS;
}
- LIST_ADDTAIL(&ngds->list, &bc->cf_last->gds);
+ list_addtail(&ngds->list, &bc->cf_last->gds);
bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */
if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
bc->force_add_cf = 1;
int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
{
int r;
+
+ /* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */
+ if (op != CF_OP_MEM_SCRATCH && bc->need_wait_ack) {
+ bc->need_wait_ack = false;
+ r = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
+ }
+
r = r600_bytecode_add_cf(bc);
if (r)
return r;
/* common to all 3 families */
static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
{
- bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
+ return r700_bytecode_fetch_mem_build(bc, vtx, id);
+ bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) |
+ S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+ S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
+ S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
/* common for r600/r700 - eg in eg_asm.c */
unsigned addr;
int i, r;
- if (!bc->nstack) // If not 0, Stack_size already provided by llvm
- bc->nstack = bc->stack.max_entries;
-
- if ((bc->type == PIPE_SHADER_VERTEX || bc->type == PIPE_SHADER_TESS_EVAL || bc->type == PIPE_SHADER_TESS_CTRL) && !bc->nstack) {
- bc->nstack = 1;
+ if (!bc->nstack) { // If not 0, Stack_size already provided by llvm
+ if (bc->stack.max_entries)
+ bc->nstack = bc->stack.max_entries;
+ else if (bc->type == PIPE_SHADER_VERTEX ||
+ bc->type == PIPE_SHADER_TESS_EVAL ||
+ bc->type == PIPE_SHADER_TESS_CTRL)
+ bc->nstack = 1;
}
/* first path compute addr of each CF block */
nliteral = 0;
memset(literal, 0, sizeof(literal));
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
- r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
+ r = r600_bytecode_alu_nliterals(alu, literal, &nliteral);
if (r)
return r;
- r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral);
- r600_bytecode_assign_kcache_banks(bc, alu, cf->kcache);
+ r600_bytecode_alu_adjust_literals(alu, literal, nliteral);
+ r600_bytecode_assign_kcache_banks(alu, cf->kcache);
switch(bc->chip_class) {
case R600:
free(alu);
}
- LIST_INITHEAD(&cf->alu);
+ list_inithead(&cf->alu);
LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
free(tex);
}
- LIST_INITHEAD(&cf->tex);
+ list_inithead(&cf->tex);
LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
free(vtx);
}
- LIST_INITHEAD(&cf->vtx);
+ list_inithead(&cf->vtx);
LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) {
free(gds);
}
- LIST_INITHEAD(&cf->gds);
+ list_inithead(&cf->gds);
free(cf);
}
- LIST_INITHEAD(&cf->list);
+ list_inithead(&cf->list);
}
static int print_swizzle(unsigned swz)
o += fprintf(stderr, "LDS_OQ_B_POP");
need_chan = 1;
break;
+ case EG_V_SQ_ALU_SRC_TIME_LO:
+ o += fprintf(stderr, "TIME_LO");
+ break;
+ case EG_V_SQ_ALU_SRC_TIME_HI:
+ o += fprintf(stderr, "TIME_HI");
+ break;
+ case EG_V_SQ_ALU_SRC_SE_ID:
+ o += fprintf(stderr, "SE_ID");
+ break;
+ case EG_V_SQ_ALU_SRC_SIMD_ID:
+ o += fprintf(stderr, "SIMD_ID");
+ break;
+ case EG_V_SQ_ALU_SRC_HW_WAVE_ID:
+ o += fprintf(stderr, "HW_WAVE_ID");
+ break;
case V_SQ_ALU_SRC_PS:
o += fprintf(stderr, "PS");
break;
fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
bc->bytecode[id + 1], cfop->name);
fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
+ if (cf->vpm)
+ fprintf(stderr, "VPM ");
+ if (cf->end_of_program)
+ fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
+
} else if (cfop->flags & CF_EXP) {
int o = 0;
const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
print_indent(o, 67);
fprintf(stderr, " ES:%X ", cf->output.elem_size);
+ if (cf->mark)
+ fprintf(stderr, "MARK ");
if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
if (cf->end_of_program)
bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
o += print_indent(o, 43);
o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
+
+ if (r600_isa_cf(cf->op)->flags & CF_RAT) {
+ o += fprintf(stderr, "RAT%d", cf->rat.id);
+ if (cf->rat.index_mode) {
+ o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1);
+ }
+ o += fprintf(stderr, " INST: %d ", cf->rat.inst);
+ }
+
if (cf->output.burst_count > 1) {
o += fprintf(stderr, "%d-%d ", cf->output.array_base,
cf->output.array_base + cf->output.burst_count - 1);
o += print_swizzle(7);
}
- if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND)
+ if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND ||
+ cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND)
o += fprintf(stderr, " R%d", cf->output.index_gpr);
o += print_indent(o, 67);
fprintf(stderr, " ES:%i ", cf->output.elem_size);
if (cf->output.array_size != 0xFFF)
fprintf(stderr, "AS:%i ", cf->output.array_size);
+ if (cf->mark)
+ fprintf(stderr, "MARK ");
if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
+
+ if (cf->output.mark)
+ fprintf(stderr, "MARK ");
+
fprintf(stderr, "\n");
} else {
fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
fprintf(stderr, "POP:%X ", cf->pop_count);
if (cf->count && (cfop->flags & CF_EMIT))
fprintf(stderr, "STREAM%d ", cf->count);
+ if (cf->vpm)
+ fprintf(stderr, "VPM ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
const struct alu_op_info *aop = r600_isa_alu(alu->op);
int o = 0;
- r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
+ r600_bytecode_alu_nliterals(alu, literal, &nliteral);
o += fprintf(stderr, " %04d %08X %08X ", id, bc->bytecode[id], bc->bytecode[id+1]);
if (last)
o += fprintf(stderr, "%4d ", ++ngr);
o += fprintf(stderr, ", R%d.", vtx->src_gpr);
o += print_swizzle(vtx->src_sel_x);
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
+ o += print_swizzle(vtx->src_sel_y);
if (vtx->offset)
fprintf(stderr, " +%db", vtx->offset);
if (bc->chip_class >= EVERGREEN && vtx->buffer_index_mode)
fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]);
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM) {
+ if (vtx->uncached)
+ fprintf(stderr, "UNCACHED ");
+ if (vtx->indexed)
+ fprintf(stderr, "INDEXED:%d ", vtx->indexed);
+
+ fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size);
+ if (vtx->burst_count)
+ fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count);
+ fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base);
+ fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size);
+ }
+
fprintf(stderr, "UCF:%d ", vtx->use_const_fields);
fprintf(stderr, "FMT(DTA:%d ", vtx->data_format);
fprintf(stderr, "NUM:%d ", vtx->num_format_all);
if (gds->op != FETCH_OP_TF_WRITE) {
o += fprintf(stderr, ", R%d.", gds->src_gpr2);
}
+ if (gds->alloc_consume) {
+ o += fprintf(stderr, " UAV: %d", gds->uav_id);
+ if (gds->uav_index_mode)
+ o += fprintf(stderr, "[%s]", index_mode[gds->uav_index_mode]);
+ }
fprintf(stderr, "\n");
id += 4;
}
return;
}
+ if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) {
+ *format = FMT_1_5_5_5;
+ *endian = r600_endian_swap(16);
+ return;
+ }
+
+ if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) {
+ *format = FMT_5_5_5_1;
+ return;
+ }
+
desc = util_format_description(pformat);
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
goto out_unknown;
/* Signed ints */
case UTIL_FORMAT_TYPE_SIGNED:
switch (desc->channel[i].size) {
+ case 4:
+ switch (desc->nr_channels) {
+ case 2:
+ *format = FMT_4_4;
+ break;
+ case 4:
+ *format = FMT_4_4_4_4;
+ break;
+ }
+ break;
case 8:
switch (desc->nr_channels) {
case 1:
return NULL;
}
- bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+ bytecode = r600_buffer_map_sync_with_rings
+ (&rctx->b, shader->buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
bytecode += shader->offset / 4;
if (R600_BIG_ENDIAN) {