#include "r600d.h"
#include <errno.h>
+#include "util/u_bitcast.h"
#include "util/u_dump.h"
#include "util/u_memory.h"
#include "util/u_math.h"
return alu->dst.write || alu->is_op3;
}
-static inline unsigned int r600_bytecode_get_num_operands(
- struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu)
{
return r600_isa_alu(alu->op)->src_count;
}
-int r700_bytecode_alu_build(struct r600_bytecode *bc,
- struct r600_bytecode_alu *alu, unsigned id);
-
static struct r600_bytecode_cf *r600_bytecode_cf(void)
{
struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf);
if (!cf)
return NULL;
- LIST_INITHEAD(&cf->list);
- LIST_INITHEAD(&cf->alu);
- LIST_INITHEAD(&cf->vtx);
- LIST_INITHEAD(&cf->tex);
- LIST_INITHEAD(&cf->gds);
+ list_inithead(&cf->list);
+ list_inithead(&cf->alu);
+ list_inithead(&cf->vtx);
+ list_inithead(&cf->tex);
+ list_inithead(&cf->gds);
return cf;
}
if (!alu)
return NULL;
- LIST_INITHEAD(&alu->list);
+ list_inithead(&alu->list);
return alu;
}
if (!vtx)
return NULL;
- LIST_INITHEAD(&vtx->list);
+ list_inithead(&vtx->list);
return vtx;
}
if (!tex)
return NULL;
- LIST_INITHEAD(&tex->list);
+ list_inithead(&tex->list);
return tex;
}
if (gds == NULL)
return NULL;
- LIST_INITHEAD(&gds->list);
+ list_inithead(&gds->list);
return gds;
}
bc->r6xx_nop_after_rel_dst = 0;
}
- LIST_INITHEAD(&bc->cf);
+ list_inithead(&bc->cf);
bc->chip_class = chip_class;
bc->family = family;
bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing;
return 0;
}
+int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
+ const struct r600_bytecode_output *output)
+{
+ assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs));
+ bc->pending_outputs[bc->n_pending_outputs++] = *output;
+
+ return 0;
+}
+
+void r600_bytecode_need_wait_ack(struct r600_bytecode *bc, boolean need_wait_ack)
+{
+ bc->need_wait_ack = need_wait_ack;
+}
+
+boolean r600_bytecode_get_need_wait_ack(struct r600_bytecode *bc)
+{
+ return bc->need_wait_ack;
+}
+
/* alu instructions that can ony exits once per group */
-static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_alu_once_inst(struct r600_bytecode_alu *alu)
{
- return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED);
+ return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER;
}
static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
(r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V);
}
-static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_alu_mova_inst(struct r600_bytecode_alu *alu)
{
return r600_isa_alu(alu->op)->flags & AF_MOVA;
}
-static int alu_uses_rel(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int alu_uses_rel(struct r600_bytecode_alu *alu)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned src;
if (alu->dst.rel) {
return 0;
}
-static int is_alu_64bit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_lds_read(int sel)
+{
+ return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP;
+}
+
+static int alu_uses_lds(struct r600_bytecode_alu *alu)
+{
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
+ unsigned src;
+
+ for (src = 0; src < num_src; ++src) {
+ if (is_lds_read(alu->src[src].sel)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int is_alu_64bit_inst(struct r600_bytecode_alu *alu)
{
const struct alu_op_info *op = r600_isa_alu(alu->op);
return (op->flags & AF_64);
return slots == AF_VS;
}
-static int is_nop_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
+static int is_nop_inst(struct r600_bytecode_alu *alu)
{
return alu->op == ALU_OP0_NOP;
-}
+}
static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first,
struct r600_bytecode_alu *assignment[5])
return 0;
}
-static int reserve_cfile(struct r600_bytecode *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
+static int reserve_cfile(const struct r600_bytecode *bc,
+ struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
{
int res, num_res = 4;
if (bc->chip_class >= R700) {
sel <= V_SQ_ALU_SRC_LITERAL);
}
-static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
+static int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
struct alu_bank_swizzle *bs, int bank_swizzle)
{
int r, src, num_src, sel, elem, cycle;
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (src = 0; src < num_src; src++) {
sel = alu->src[src].sel;
elem = alu->src[src].chan;
return 0;
}
-static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
+static int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
struct alu_bank_swizzle *bs, int bank_swizzle)
{
int r, src, num_src, const_count, sel, elem, cycle;
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (const_count = 0, src = 0; src < num_src; ++src) {
sel = alu->src[src].sel;
elem = alu->src[src].chan;
return 0;
}
-static int check_and_set_bank_swizzle(struct r600_bytecode *bc,
+static int check_and_set_bank_swizzle(const struct r600_bytecode *bc,
struct r600_bytecode_alu *slots[5])
{
struct alu_bank_swizzle bs;
for (i = 0; i < max_slots; ++i) {
if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) {
- if (is_alu_64bit_inst(bc, prev[i])) {
+ if (is_alu_64bit_inst(prev[i])) {
gpr[i] = -1;
continue;
}
if (!alu)
continue;
- if (is_alu_64bit_inst(bc, alu))
+ if (is_alu_64bit_inst(alu))
continue;
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (src = 0; src < num_src; ++src) {
if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
continue;
}
/* compute how many literal are needed */
-static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
+static int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu,
uint32_t literal[4], unsigned *nliteral)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned i, j;
for (i = 0; i < num_src; ++i) {
return 0;
}
-static void r600_bytecode_alu_adjust_literals(struct r600_bytecode *bc,
- struct r600_bytecode_alu *alu,
- uint32_t literal[4], unsigned nliteral)
+static void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu,
+ uint32_t literal[4], unsigned nliteral)
{
- unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
+ unsigned num_src = r600_bytecode_get_num_operands(alu);
unsigned i, j;
for (i = 0; i < num_src; ++i) {
if (prev[i]) {
if (prev[i]->pred_sel)
return 0;
- if (is_alu_once_inst(bc, prev[i]))
+ if (is_alu_once_inst(prev[i]))
return 0;
}
if (slots[i]) {
if (slots[i]->pred_sel)
return 0;
- if (is_alu_once_inst(bc, slots[i]))
+ if (is_alu_once_inst(slots[i]))
return 0;
}
}
/* check number of literals */
if (prev[i]) {
- if (r600_bytecode_alu_nliterals(bc, prev[i], literal, &nliteral))
+ if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral))
return 0;
- if (r600_bytecode_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
+ if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral))
return 0;
- if (is_alu_mova_inst(bc, prev[i])) {
+ if (is_alu_mova_inst(prev[i])) {
if (have_rel)
return 0;
have_mova = 1;
}
- if (alu_uses_rel(bc, prev[i])) {
+ if (alu_uses_rel(prev[i])) {
if (have_mova) {
return 0;
}
have_rel = 1;
}
+ if (alu_uses_lds(prev[i]))
+ return 0;
- num_once_inst += is_alu_once_inst(bc, prev[i]);
+ num_once_inst += is_alu_once_inst(prev[i]);
}
- if (slots[i] && r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral))
+ if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral))
return 0;
/* Let's check used slots. */
} else if (prev[i] && slots[i]) {
if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
/* Trans unit is still free try to use it. */
- if (is_alu_any_unit_inst(bc, slots[i])) {
+ if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) {
result[i] = prev[i];
result[4] = slots[i];
} else if (is_alu_any_unit_inst(bc, prev[i])) {
}
alu = slots[i];
- num_once_inst += is_alu_once_inst(bc, alu);
+ num_once_inst += is_alu_once_inst(alu);
/* don't reschedule NOPs */
- if (is_nop_inst(bc, alu))
+ if (is_nop_inst(alu))
return 0;
- if (is_alu_mova_inst(bc, alu)) {
+ if (is_alu_mova_inst(alu)) {
if (have_rel) {
return 0;
}
have_mova = 1;
}
- if (alu_uses_rel(bc, alu)) {
+ if (alu_uses_rel(alu)) {
if (have_mova) {
return 0;
}
return 0; /* data hazard with MOVA */
/* Let's check source gprs */
- num_src = r600_bytecode_get_num_operands(bc, alu);
+ num_src = r600_bytecode_get_num_operands(alu);
for (src = 0; src < num_src; ++src) {
/* Constants don't matter. */
continue;
bank = alu->src[i].kc_bank;
+ assert(bank < R600_MAX_HW_CONST_BUFFERS);
line = (sel-512)>>4;
index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE
return 0;
}
-static int r600_bytecode_assign_kcache_banks(struct r600_bytecode *bc,
+static int r600_bytecode_assign_kcache_banks(
struct r600_bytecode_alu *alu,
struct r600_bytecode_kcache * kcache)
{
for (i = 0, nliteral = 0; i < max_slots; i++) {
if (slots[i]) {
- r = r600_bytecode_alu_nliterals(bc, slots[i], literal, &nliteral);
+ r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral);
if (r)
return r;
}
if (nalu->dst.rel && bc->r6xx_nop_after_rel_dst)
insert_nop_r6xx(bc);
+ /* Might need to insert spill write ops after current clause */
+ if (nalu->last && bc->n_pending_outputs) {
+ while (bc->n_pending_outputs) {
+ r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]);
+ if (r)
+ return r;
+ }
+ }
+
return 0;
}
static inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc)
{
return !((r600_isa_cf(bc->cf_last->op)->flags & CF_FETCH) &&
- (bc->chip_class == CAYMAN ||
- bc->cf_last->op != CF_OP_TEX));
+ bc->cf_last->op != CF_OP_GDS &&
+ (bc->chip_class == CAYMAN ||
+ bc->cf_last->op != CF_OP_TEX));
}
-int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
+static int r600_bytecode_add_vtx_internal(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx,
+ bool use_tc)
{
struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx();
int r;
/* Load index register if required */
if (bc->chip_class >= EVERGREEN) {
if (vtx->buffer_index_mode)
- egcm_load_index_reg(bc, 0, false);
+ egcm_load_index_reg(bc, vtx->buffer_index_mode - 1, false);
}
/* cf can contains only alu or only vtx or only tex */
switch (bc->chip_class) {
case R600:
case R700:
- case EVERGREEN:
bc->cf_last->op = CF_OP_VTX;
break;
+ case EVERGREEN:
+ if (use_tc)
+ bc->cf_last->op = CF_OP_TEX;
+ else
+ bc->cf_last->op = CF_OP_VTX;
+ break;
case CAYMAN:
bc->cf_last->op = CF_OP_TEX;
break;
return 0;
}
+int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
+{
+ return r600_bytecode_add_vtx_internal(bc, vtx, false);
+}
+
+int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
+{
+ return r600_bytecode_add_vtx_internal(bc, vtx, true);
+}
+
int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex)
{
struct r600_bytecode_tex *ntex = r600_bytecode_tex();
return -ENOMEM;
memcpy(ngds, gds, sizeof(struct r600_bytecode_gds));
+ if (bc->chip_class >= EVERGREEN) {
+ if (gds->uav_index_mode)
+ egcm_load_index_reg(bc, gds->uav_index_mode - 1, false);
+ }
+
if (bc->cf_last == NULL ||
bc->cf_last->op != CF_OP_GDS ||
bc->force_add_cf) {
int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
{
int r;
+
+ /* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */
+ if (op != CF_OP_MEM_SCRATCH && bc->need_wait_ack) {
+ bc->need_wait_ack = false;
+ r = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
+ }
+
r = r600_bytecode_add_cf(bc);
if (r)
return r;
/* common to all 3 families */
static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
{
- bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
+ return r700_bytecode_fetch_mem_build(bc, vtx, id);
+ bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) |
+ S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
S_SQ_CF_WORD1_BARRIER(1) |
- S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+ S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
+ S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
/* common for r600/r700 - eg in eg_asm.c */
unsigned addr;
int i, r;
- if (!bc->nstack) // If not 0, Stack_size already provided by llvm
- bc->nstack = bc->stack.max_entries;
-
- if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) {
- bc->nstack = 1;
+ if (!bc->nstack) { // If not 0, Stack_size already provided by llvm
+ if (bc->stack.max_entries)
+ bc->nstack = bc->stack.max_entries;
+ else if (bc->type == PIPE_SHADER_VERTEX ||
+ bc->type == PIPE_SHADER_TESS_EVAL ||
+ bc->type == PIPE_SHADER_TESS_CTRL)
+ bc->nstack = 1;
}
/* first path compute addr of each CF block */
nliteral = 0;
memset(literal, 0, sizeof(literal));
LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
- r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
+ r = r600_bytecode_alu_nliterals(alu, literal, &nliteral);
if (r)
return r;
- r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral);
- r600_bytecode_assign_kcache_banks(bc, alu, cf->kcache);
+ r600_bytecode_alu_adjust_literals(alu, literal, nliteral);
+ r600_bytecode_assign_kcache_banks(alu, cf->kcache);
switch(bc->chip_class) {
case R600:
r = r600_bytecode_alu_build(bc, alu, addr);
break;
case R700:
- case EVERGREEN: /* eg alu is same encoding as r700 */
- case CAYMAN:
r = r700_bytecode_alu_build(bc, alu, addr);
break;
+ case EVERGREEN:
+ case CAYMAN:
+ r = eg_bytecode_alu_build(bc, alu, addr);
+ break;
default:
R600_ERR("unknown chip class %d.\n", bc->chip_class);
return -EINVAL;
free(alu);
}
- LIST_INITHEAD(&cf->alu);
+ list_inithead(&cf->alu);
LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
free(tex);
}
- LIST_INITHEAD(&cf->tex);
+ list_inithead(&cf->tex);
LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
free(vtx);
}
- LIST_INITHEAD(&cf->vtx);
+ list_inithead(&cf->vtx);
LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) {
free(gds);
}
- LIST_INITHEAD(&cf->gds);
+ list_inithead(&cf->gds);
free(cf);
}
- LIST_INITHEAD(&cf->list);
+ list_inithead(&cf->list);
}
static int print_swizzle(unsigned swz)
need_sel = 0;
need_chan = 0;
switch (sel) {
+ case EG_V_SQ_ALU_SRC_LDS_DIRECT_A:
+ o += fprintf(stderr, "LDS_A[0x%08X]", src->value);
+ break;
+ case EG_V_SQ_ALU_SRC_LDS_DIRECT_B:
+ o += fprintf(stderr, "LDS_B[0x%08X]", src->value);
+ break;
+ case EG_V_SQ_ALU_SRC_LDS_OQ_A:
+ o += fprintf(stderr, "LDS_OQ_A");
+ need_chan = 1;
+ break;
+ case EG_V_SQ_ALU_SRC_LDS_OQ_B:
+ o += fprintf(stderr, "LDS_OQ_B");
+ need_chan = 1;
+ break;
+ case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP:
+ o += fprintf(stderr, "LDS_OQ_A_POP");
+ need_chan = 1;
+ break;
+ case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP:
+ o += fprintf(stderr, "LDS_OQ_B_POP");
+ need_chan = 1;
+ break;
+ case EG_V_SQ_ALU_SRC_TIME_LO:
+ o += fprintf(stderr, "TIME_LO");
+ break;
+ case EG_V_SQ_ALU_SRC_TIME_HI:
+ o += fprintf(stderr, "TIME_HI");
+ break;
+ case EG_V_SQ_ALU_SRC_SE_ID:
+ o += fprintf(stderr, "SE_ID");
+ break;
+ case EG_V_SQ_ALU_SRC_SIMD_ID:
+ o += fprintf(stderr, "SIMD_ID");
+ break;
+ case EG_V_SQ_ALU_SRC_HW_WAVE_ID:
+ o += fprintf(stderr, "HW_WAVE_ID");
+ break;
case V_SQ_ALU_SRC_PS:
o += fprintf(stderr, "PS");
break;
need_chan = 1;
break;
case V_SQ_ALU_SRC_LITERAL:
- o += fprintf(stderr, "[0x%08X %f]", src->value, *(float*)&src->value);
+ o += fprintf(stderr, "[0x%08X %f]", src->value, u_bitcast_u2f(src->value));
break;
case V_SQ_ALU_SRC_0_5:
o += fprintf(stderr, "0.5");
fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
bc->bytecode[id + 1], cfop->name);
fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
+ if (cf->vpm)
+ fprintf(stderr, "VPM ");
+ if (cf->end_of_program)
+ fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
+
} else if (cfop->flags & CF_EXP) {
int o = 0;
const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
print_indent(o, 67);
fprintf(stderr, " ES:%X ", cf->output.elem_size);
+ if (cf->mark)
+ fprintf(stderr, "MARK ");
if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
if (cf->end_of_program)
bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
o += print_indent(o, 43);
o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
+
+ if (r600_isa_cf(cf->op)->flags & CF_RAT) {
+ o += fprintf(stderr, "RAT%d", cf->rat.id);
+ if (cf->rat.index_mode) {
+ o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1);
+ }
+ o += fprintf(stderr, " INST: %d ", cf->rat.inst);
+ }
+
if (cf->output.burst_count > 1) {
o += fprintf(stderr, "%d-%d ", cf->output.array_base,
cf->output.array_base + cf->output.burst_count - 1);
o += print_swizzle(7);
}
- if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND)
+ if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND ||
+ cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND)
o += fprintf(stderr, " R%d", cf->output.index_gpr);
o += print_indent(o, 67);
fprintf(stderr, " ES:%i ", cf->output.elem_size);
if (cf->output.array_size != 0xFFF)
fprintf(stderr, "AS:%i ", cf->output.array_size);
+ if (cf->mark)
+ fprintf(stderr, "MARK ");
if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
+
+ if (cf->output.mark)
+ fprintf(stderr, "MARK ");
+
fprintf(stderr, "\n");
} else {
fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
fprintf(stderr, "POP:%X ", cf->pop_count);
if (cf->count && (cfop->flags & CF_EMIT))
fprintf(stderr, "STREAM%d ", cf->count);
+ if (cf->vpm)
+ fprintf(stderr, "VPM ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
const struct alu_op_info *aop = r600_isa_alu(alu->op);
int o = 0;
- r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
+ r600_bytecode_alu_nliterals(alu, literal, &nliteral);
o += fprintf(stderr, " %04d %08X %08X ", id, bc->bytecode[id], bc->bytecode[id+1]);
if (last)
o += fprintf(stderr, "%4d ", ++ngr);
o += fprintf(stderr, ", R%d.", vtx->src_gpr);
o += print_swizzle(vtx->src_sel_x);
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
+ o += print_swizzle(vtx->src_sel_y);
if (vtx->offset)
fprintf(stderr, " +%db", vtx->offset);
if (bc->chip_class >= EVERGREEN && vtx->buffer_index_mode)
fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]);
+ if (r600_isa_fetch(vtx->op)->flags & FF_MEM) {
+ if (vtx->uncached)
+ fprintf(stderr, "UNCACHED ");
+ if (vtx->indexed)
+ fprintf(stderr, "INDEXED:%d ", vtx->indexed);
+
+ fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size);
+ if (vtx->burst_count)
+ fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count);
+ fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base);
+ fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size);
+ }
+
fprintf(stderr, "UCF:%d ", vtx->use_const_fields);
fprintf(stderr, "FMT(DTA:%d ", vtx->data_format);
fprintf(stderr, "NUM:%d ", vtx->num_format_all);
if (gds->op != FETCH_OP_TF_WRITE) {
o += fprintf(stderr, ", R%d.", gds->src_gpr2);
}
+ if (gds->alloc_consume) {
+ o += fprintf(stderr, " UAV: %d", gds->uav_id);
+ if (gds->uav_index_mode)
+ o += fprintf(stderr, "[%s]", index_mode[gds->uav_index_mode]);
+ }
fprintf(stderr, "\n");
id += 4;
}
return;
}
+ if (pformat == PIPE_FORMAT_B5G6R5_UNORM) {
+ *format = FMT_5_6_5;
+ *endian = r600_endian_swap(16);
+ return;
+ }
+
+ if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) {
+ *format = FMT_1_5_5_5;
+ *endian = r600_endian_swap(16);
+ return;
+ }
+
+ if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) {
+ *format = FMT_5_5_5_1;
+ return;
+ }
+
desc = util_format_description(pformat);
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
goto out_unknown;
/* Signed ints */
case UTIL_FORMAT_TYPE_SIGNED:
switch (desc->channel[i].size) {
+ case 4:
+ switch (desc->nr_channels) {
+ case 2:
+ *format = FMT_4_4;
+ break;
+ case 4:
+ *format = FMT_4_4_4_4;
+ break;
+ }
+ break;
case 8:
switch (desc->nr_channels) {
case 1:
return NULL;
}
- u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, &shader->offset,
+ u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, 256,
+ &shader->offset,
(struct pipe_resource**)&shader->buffer);
if (!shader->buffer) {
r600_bytecode_clear(&bc);
return NULL;
}
- bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED);
+ bytecode = r600_buffer_map_sync_with_rings
+ (&rctx->b, shader->buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY);
bytecode += shader->offset / 4;
if (R600_BIG_ENDIAN) {
} else {
memcpy(bytecode, bc.bytecode, fs_size);
}
- rctx->b.ws->buffer_unmap(shader->buffer->cs_buf);
+ rctx->b.ws->buffer_unmap(shader->buffer->buf);
r600_bytecode_clear(&bc);
return shader;