X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr600%2Fr600_asm.c;h=3dcbde0fe5b311638a1676b9b5cf45673373854b;hb=8260c4648ac48c1b46f123e19f5e0e74943a3287;hp=762cc7fac441168882c683de312629331aa420eb;hpb=d80701df8af4a5d74c4f4eb09a4b3cef6970104b;p=mesa.git diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 762cc7fac44..3dcbde0fe5b 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -27,6 +27,7 @@ #include "r600d.h" #include +#include "util/u_bitcast.h" #include "util/u_dump.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -37,6 +38,11 @@ #define NUM_OF_CYCLES 3 #define NUM_OF_COMPONENTS 4 +static inline bool alu_writes(struct r600_bytecode_alu *alu) +{ + return alu->dst.write || alu->is_op3; +} + static inline unsigned int r600_bytecode_get_num_operands( struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { @@ -50,12 +56,13 @@ static struct r600_bytecode_cf *r600_bytecode_cf(void) { struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf); - if (cf == NULL) + if (!cf) return NULL; LIST_INITHEAD(&cf->list); LIST_INITHEAD(&cf->alu); LIST_INITHEAD(&cf->vtx); LIST_INITHEAD(&cf->tex); + LIST_INITHEAD(&cf->gds); return cf; } @@ -63,7 +70,7 @@ static struct r600_bytecode_alu *r600_bytecode_alu(void) { struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu); - if (alu == NULL) + if (!alu) return NULL; LIST_INITHEAD(&alu->list); return alu; @@ -73,7 +80,7 @@ static struct r600_bytecode_vtx *r600_bytecode_vtx(void) { struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx); - if (vtx == NULL) + if (!vtx) return NULL; LIST_INITHEAD(&vtx->list); return vtx; @@ -83,12 +90,22 @@ static struct r600_bytecode_tex *r600_bytecode_tex(void) { struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex); - if (tex == NULL) + if (!tex) return NULL; LIST_INITHEAD(&tex->list); return tex; } +static struct r600_bytecode_gds *r600_bytecode_gds(void) +{ + struct r600_bytecode_gds *gds = CALLOC_STRUCT(r600_bytecode_gds); + + if (gds == NULL) + return NULL; + LIST_INITHEAD(&gds->list); + return gds; +} + static unsigned stack_entry_size(enum radeon_family chip) { /* Wavefront size: * 64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/ @@ -152,7 +169,7 @@ int r600_bytecode_add_cf(struct r600_bytecode *bc) { struct r600_bytecode_cf *cf = r600_bytecode_cf(); - if (cf == NULL) + if (!cf) return -ENOMEM; LIST_ADDTAIL(&cf->list, &bc->cf); if (bc->cf_last) { @@ -221,7 +238,7 @@ int r600_bytecode_add_output(struct r600_bytecode *bc, /* alu instructions that can ony exits once per group */ static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { - return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED); + return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER; } static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) @@ -252,6 +269,30 @@ static int alu_uses_rel(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) return 0; } +static int is_lds_read(int sel) +{ + return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP; +} + +static int alu_uses_lds(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) +{ + unsigned num_src = r600_bytecode_get_num_operands(bc, alu); + unsigned src; + + for (src = 0; src < num_src; ++src) { + if (is_lds_read(alu->src[src].sel)) { + return 1; + } + } + return 0; +} + +static int is_alu_64bit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) +{ + const struct alu_op_info *op = r600_isa_alu(alu->op); + return (op->flags & AF_64); +} + static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op); @@ -274,7 +315,7 @@ static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_a static int is_nop_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu) { return alu->op == ALU_OP0_NOP; -} +} static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first, struct r600_bytecode_alu *assignment[5]) @@ -575,7 +616,13 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, return r; for (i = 0; i < max_slots; ++i) { - if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) { + if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) { + + if (is_alu_64bit_inst(bc, prev[i])) { + gpr[i] = -1; + continue; + } + gpr[i] = prev[i]->dst.sel; /* cube writes more than PV.X */ if (is_alu_reduction_inst(bc, prev[i])) @@ -588,9 +635,11 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, for (i = 0; i < max_slots; ++i) { struct r600_bytecode_alu *alu = slots[i]; - if(!alu) + if (!alu) continue; + if (is_alu_64bit_inst(bc, alu)) + continue; num_src = r600_bytecode_get_num_operands(bc, alu); for (src = 0; src < num_src; ++src) { if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) @@ -621,7 +670,7 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc, return 0; } -void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg) +void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs) { switch(value) { case 0: @@ -641,11 +690,11 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *ne break; case 0xBF800000: /* -1.0f */ *sel = V_SQ_ALU_SRC_1; - *neg ^= 1; + *neg ^= !abs; break; case 0xBF000000: /* -0.5f */ *sel = V_SQ_ALU_SRC_0_5; - *neg ^= 1; + *neg ^= !abs; break; default: *sel = V_SQ_ALU_SRC_LITERAL; @@ -757,6 +806,8 @@ static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu } have_rel = 1; } + if (alu_uses_lds(bc, prev[i])) + return 0; num_once_inst += is_alu_once_inst(bc, prev[i]); } @@ -770,13 +821,13 @@ static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu } else if (prev[i] && slots[i]) { if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { /* Trans unit is still free try to use it. */ - if (is_alu_any_unit_inst(bc, slots[i])) { + if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(bc, slots[i])) { result[i] = prev[i]; result[4] = slots[i]; } else if (is_alu_any_unit_inst(bc, prev[i])) { if (slots[i]->dst.sel == prev[i]->dst.sel && - (slots[i]->dst.write == 1 || slots[i]->is_op3) && - (prev[i]->dst.write == 1 || prev[i]->is_op3)) + alu_writes(slots[i]) && + alu_writes(prev[i])) return 0; result[i] = slots[i]; @@ -791,8 +842,8 @@ static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu if (max_slots == 5 && slots[i] && prev[4] && slots[i]->dst.sel == prev[4]->dst.sel && slots[i]->dst.chan == prev[4]->dst.chan && - (slots[i]->dst.write == 1 || slots[i]->is_op3) && - (prev[4]->dst.write == 1 || prev[4]->is_op3)) + alu_writes(slots[i]) && + alu_writes(prev[4])) return 0; result[i] = slots[i]; @@ -832,7 +883,7 @@ static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu continue; for (j = 0; j < max_slots; ++j) { - if (!prev[j] || !(prev[j]->dst.write || prev[j]->is_op3)) + if (!prev[j] || !alu_writes(prev[j])) continue; /* If it's relative then we can't determin which gpr is really used. */ @@ -1129,7 +1180,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, struct r600_bytecode_alu *lalu; int i, r; - if (nalu == NULL) + if (!nalu) return -ENOMEM; memcpy(nalu, alu, sizeof(struct r600_bytecode_alu)); @@ -1194,7 +1245,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, } if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) r600_bytecode_special_constants(nalu->src[i].value, - &nalu->src[i].sel, &nalu->src[i].neg); + &nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs); } if (nalu->dst.sel >= bc->ngpr) { bc->ngpr = nalu->dst.sel + 1; @@ -1290,7 +1341,7 @@ int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_v struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx(); int r; - if (nvtx == NULL) + if (!nvtx) return -ENOMEM; memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx)); @@ -1342,7 +1393,7 @@ int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_t struct r600_bytecode_tex *ntex = r600_bytecode_tex(); int r; - if (ntex == NULL) + if (!ntex) return -ENOMEM; memcpy(ntex, tex, sizeof(struct r600_bytecode_tex)); @@ -1393,6 +1444,33 @@ int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_t return 0; } +int r600_bytecode_add_gds(struct r600_bytecode *bc, const struct r600_bytecode_gds *gds) +{ + struct r600_bytecode_gds *ngds = r600_bytecode_gds(); + int r; + + if (ngds == NULL) + return -ENOMEM; + memcpy(ngds, gds, sizeof(struct r600_bytecode_gds)); + + if (bc->cf_last == NULL || + bc->cf_last->op != CF_OP_GDS || + bc->force_add_cf) { + r = r600_bytecode_add_cf(bc); + if (r) { + free(ngds); + return r; + } + bc->cf_last->op = CF_OP_GDS; + } + + LIST_ADDTAIL(&ngds->list, &bc->cf_last->gds); + bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */ + if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc)) + bc->force_add_cf = 1; + return 0; +} + int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op) { int r; @@ -1604,16 +1682,19 @@ int r600_bytecode_build(struct r600_bytecode *bc) struct r600_bytecode_alu *alu; struct r600_bytecode_vtx *vtx; struct r600_bytecode_tex *tex; + struct r600_bytecode_gds *gds; uint32_t literal[4]; unsigned nliteral; unsigned addr; int i, r; - if (!bc->nstack) // If not 0, Stack_size already provided by llvm - bc->nstack = bc->stack.max_entries; - - if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) { - bc->nstack = 1; + if (!bc->nstack) { // If not 0, Stack_size already provided by llvm + if (bc->stack.max_entries) + bc->nstack = bc->stack.max_entries; + else if (bc->type == PIPE_SHADER_VERTEX || + bc->type == PIPE_SHADER_TESS_EVAL || + bc->type == PIPE_SHADER_TESS_CTRL) + bc->nstack = 1; } /* first path compute addr of each CF block */ @@ -1656,10 +1737,12 @@ int r600_bytecode_build(struct r600_bytecode *bc) r = r600_bytecode_alu_build(bc, alu, addr); break; case R700: - case EVERGREEN: /* eg alu is same encoding as r700 */ - case CAYMAN: r = r700_bytecode_alu_build(bc, alu, addr); break; + case EVERGREEN: + case CAYMAN: + r = eg_bytecode_alu_build(bc, alu, addr); + break; default: R600_ERR("unknown chip class %d.\n", bc->chip_class); return -EINVAL; @@ -1682,6 +1765,14 @@ int r600_bytecode_build(struct r600_bytecode *bc) return r; addr += 4; } + } else if (cf->op == CF_OP_GDS) { + assert(bc->chip_class >= EVERGREEN); + LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) { + r = eg_bytecode_gds_build(bc, gds, addr); + if (r) + return r; + addr += 4; + } } else if (cf->op == CF_OP_TEX) { LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { assert(bc->chip_class >= EVERGREEN); @@ -1712,6 +1803,7 @@ void r600_bytecode_clear(struct r600_bytecode *bc) struct r600_bytecode_alu *alu = NULL, *next_alu; struct r600_bytecode_tex *tex = NULL, *next_tex; struct r600_bytecode_tex *vtx = NULL, *next_vtx; + struct r600_bytecode_gds *gds = NULL, *next_gds; LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { free(alu); @@ -1731,6 +1823,12 @@ void r600_bytecode_clear(struct r600_bytecode *bc) LIST_INITHEAD(&cf->vtx); + LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) { + free(gds); + } + + LIST_INITHEAD(&cf->gds); + free(cf); } @@ -1776,7 +1874,7 @@ static int print_dst(struct r600_bytecode_alu *alu) reg_char = 'T'; } - if (alu->dst.write || alu->is_op3) { + if (alu_writes(alu)) { o += fprintf(stderr, "%c", reg_char); o += print_sel(alu->dst.sel, alu->dst.rel, alu->index_mode, 0); } else { @@ -1831,6 +1929,28 @@ static int print_src(struct r600_bytecode_alu *alu, unsigned idx) need_sel = 0; need_chan = 0; switch (sel) { + case EG_V_SQ_ALU_SRC_LDS_DIRECT_A: + o += fprintf(stderr, "LDS_A[0x%08X]", src->value); + break; + case EG_V_SQ_ALU_SRC_LDS_DIRECT_B: + o += fprintf(stderr, "LDS_B[0x%08X]", src->value); + break; + case EG_V_SQ_ALU_SRC_LDS_OQ_A: + o += fprintf(stderr, "LDS_OQ_A"); + need_chan = 1; + break; + case EG_V_SQ_ALU_SRC_LDS_OQ_B: + o += fprintf(stderr, "LDS_OQ_B"); + need_chan = 1; + break; + case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP: + o += fprintf(stderr, "LDS_OQ_A_POP"); + need_chan = 1; + break; + case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP: + o += fprintf(stderr, "LDS_OQ_B_POP"); + need_chan = 1; + break; case V_SQ_ALU_SRC_PS: o += fprintf(stderr, "PS"); break; @@ -1839,7 +1959,7 @@ static int print_src(struct r600_bytecode_alu *alu, unsigned idx) need_chan = 1; break; case V_SQ_ALU_SRC_LITERAL: - o += fprintf(stderr, "[0x%08X %f]", src->value, *(float*)&src->value); + o += fprintf(stderr, "[0x%08X %f]", src->value, u_bitcast_u2f(src->value)); break; case V_SQ_ALU_SRC_0_5: o += fprintf(stderr, "0.5"); @@ -1892,6 +2012,7 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) struct r600_bytecode_alu *alu = NULL; struct r600_bytecode_vtx *vtx = NULL; struct r600_bytecode_tex *tex = NULL; + struct r600_bytecode_gds *gds = NULL; unsigned i, id, ngr = 0, last; uint32_t literal[4]; @@ -2029,6 +2150,10 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) fprintf(stderr, "CND:%X ", cf->cond); if (cf->pop_count) fprintf(stderr, "POP:%X ", cf->pop_count); + if (cf->count && (cfop->flags & CF_EMIT)) + fprintf(stderr, "STREAM%d ", cf->count); + if (cf->end_of_program) + fprintf(stderr, "EOP "); fprintf(stderr, "\n"); } } @@ -2171,6 +2296,33 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) id += 4; } + + LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) { + int o = 0; + o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id], + bc->bytecode[id + 1], bc->bytecode[id + 2]); + + o += fprintf(stderr, "%s ", r600_isa_fetch(gds->op)->name); + + if (gds->op != FETCH_OP_TF_WRITE) { + o += fprintf(stderr, "R%d.", gds->dst_gpr); + o += print_swizzle(gds->dst_sel_x); + o += print_swizzle(gds->dst_sel_y); + o += print_swizzle(gds->dst_sel_z); + o += print_swizzle(gds->dst_sel_w); + } + + o += fprintf(stderr, ", R%d.", gds->src_gpr); + o += print_swizzle(gds->src_sel_x); + o += print_swizzle(gds->src_sel_y); + o += print_swizzle(gds->src_sel_z); + + if (gds->op != FETCH_OP_TF_WRITE) { + o += fprintf(stderr, ", R%d.", gds->src_gpr2); + } + fprintf(stderr, "\n"); + id += 4; + } } fprintf(stderr, "--------------------------------------\n"); @@ -2194,6 +2346,12 @@ void r600_vertex_data_type(enum pipe_format pformat, return; } + if (pformat == PIPE_FORMAT_B5G6R5_UNORM) { + *format = FMT_5_6_5; + *endian = r600_endian_swap(16); + return; + } + desc = util_format_description(pformat); if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) { goto out_unknown; @@ -2397,7 +2555,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, &format, &num_format, &format_comp, &endian); desc = util_format_description(elements[i].src_format); - if (desc == NULL) { + if (!desc) { r600_bytecode_clear(&bc); R600_ERR("unknown format %d\n", elements[i].src_format); return NULL; @@ -2466,7 +2624,8 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, return NULL; } - u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, &shader->offset, + u_suballocator_alloc(rctx->allocator_fetch_shader, fs_size, 256, + &shader->offset, (struct pipe_resource**)&shader->buffer); if (!shader->buffer) { r600_bytecode_clear(&bc); @@ -2484,7 +2643,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, } else { memcpy(bytecode, bc.bytecode, fs_size); } - rctx->b.ws->buffer_unmap(shader->buffer->cs_buf); + rctx->b.ws->buffer_unmap(shader->buffer->buf); r600_bytecode_clear(&bc); return shader;