From 552aae7e47e93ada58649a92ae9e7da37afb05c3 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Wed, 31 Jul 2013 20:02:22 +0400 Subject: [PATCH] r600g: move barrier and end_of_program bits from output to cf struct (v2) v2: fix regression on r600 NOP instructions. Signed-off-by: Vadim Girlin Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/eg_asm.c | 10 ++++++---- src/gallium/drivers/r600/r600_asm.c | 24 +++++++++++++----------- src/gallium/drivers/r600/r600_asm.h | 4 ++-- src/gallium/drivers/r600/r600_shader.c | 26 +++++++++++++------------- 4 files changed, 34 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index fffc436e823..42e78c0f069 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -86,11 +86,11 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | - S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode); if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ - bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); id++; } else if (cfop->flags & CF_STRM) { /* MEM_STREAM instructions */ @@ -99,12 +99,12 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | - S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) | S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) | S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size); if (bc->chip_class == EVERGREEN) /* no EOP on cayman */ - bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); id++; } else { /* branch, loop, call, return instructions */ @@ -118,6 +118,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) return 0; } +#if 0 void eg_bytecode_export_read(struct r600_bytecode *bc, struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) { @@ -138,3 +139,4 @@ void eg_bytecode_export_read(struct r600_bytecode *bc, output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1); output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1); } +#endif diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index c5922a8fa5b..e062fcb6be8 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -193,7 +193,6 @@ int r600_bytecode_add_output(struct r600_bytecode *bc, if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr && (output->array_base + output->burst_count) == bc->cf_last->output.array_base) { - bc->cf_last->output.end_of_program |= output->end_of_program; bc->cf_last->op = bc->cf_last->output.op = output->op; bc->cf_last->output.gpr = output->gpr; bc->cf_last->output.array_base = output->array_base; @@ -203,7 +202,6 @@ int r600_bytecode_add_output(struct r600_bytecode *bc, } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) && output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) { - bc->cf_last->output.end_of_program |= output->end_of_program; bc->cf_last->op = bc->cf_last->output.op = output->op; bc->cf_last->output.burst_count += output->burst_count; return 0; @@ -215,6 +213,7 @@ int r600_bytecode_add_output(struct r600_bytecode *bc, return r; bc->cf_last->op = output->op; memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output)); + bc->cf_last->barrier = 1; return 0; } @@ -1532,18 +1531,18 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | - S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) | - S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program); } else if (cfop->flags & CF_STRM) { bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | - S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) | - S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program) | + S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) | S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) | S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask); } else { @@ -1551,7 +1550,8 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) | S_SQ_CF_WORD1_BARRIER(1) | S_SQ_CF_WORD1_COND(cf->cond) | - S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); + S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) | + S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program); } return 0; } @@ -1932,9 +1932,9 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) print_indent(o, 67); fprintf(stderr, " ES:%X ", cf->output.elem_size); - if (!cf->output.barrier) + if (!cf->barrier) fprintf(stderr, "NO_BARRIER "); - if (cf->output.end_of_program) + if (cf->end_of_program) fprintf(stderr, "EOP "); fprintf(stderr, "\n"); } else if (r600_isa_cf(cf->op)->flags & CF_STRM) { @@ -1968,9 +1968,9 @@ void r600_bytecode_disasm(struct r600_bytecode *bc) fprintf(stderr, " ES:%i ", cf->output.elem_size); if (cf->output.array_size != 0xFFF) fprintf(stderr, "AS:%i ", cf->output.array_size); - if (!cf->output.barrier) + if (!cf->barrier) fprintf(stderr, "NO_BARRIER "); - if (cf->output.end_of_program) + if (cf->end_of_program) fprintf(stderr, "EOP "); fprintf(stderr, "\n"); } else { @@ -2486,6 +2486,7 @@ void r600_bytecode_alu_read(struct r600_bytecode *bc, } } +#if 0 void r600_bytecode_export_read(struct r600_bytecode *bc, struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) { @@ -2506,3 +2507,4 @@ void r600_bytecode_export_read(struct r600_bytecode *bc, output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1); output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1); } +#endif diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 82c6c8d1c7e..3bfbcb282a3 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -115,7 +115,6 @@ struct r600_bytecode_output { unsigned array_size; unsigned comp_mask; unsigned type; - unsigned end_of_program; unsigned op; @@ -126,7 +125,6 @@ struct r600_bytecode_output { unsigned swizzle_z; unsigned swizzle_w; unsigned burst_count; - unsigned barrier; }; struct r600_bytecode_kcache { @@ -148,6 +146,8 @@ struct r600_bytecode_cf { struct r600_bytecode_kcache kcache[4]; unsigned r6xx_uses_waterfall; unsigned eg_alu_extended; + unsigned barrier; + unsigned end_of_program; struct list_head alu; struct list_head tex; struct list_head vtx; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 5fd445e5147..32d2aa73bef 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -939,7 +939,6 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output output.array_base = so->output[i].dst_offset - so->output[i].start_component; output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; output.burst_count = 1; - output.barrier = 1; /* array_size is an upper limit for the burst_count * with MEM_STREAM instructions */ output.array_size = 0xFFF; @@ -1384,7 +1383,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, output[j].swizzle_z = 2; output[j].swizzle_w = 3; output[j].burst_count = 1; - output[j].barrier = 1; output[j].type = -1; output[j].op = CF_OP_EXPORT; switch (ctx.type) { @@ -1445,7 +1443,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, output[j].swizzle_z = 2; output[j].swizzle_w = key.alpha_to_one ? 5 : 3; output[j].burst_count = 1; - output[j].barrier = 1; output[j].array_base = k; output[j].op = CF_OP_EXPORT; output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; @@ -1492,7 +1489,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, output[j].swizzle_z = 7; output[j].swizzle_w = 7; output[j].burst_count = 1; - output[j].barrier = 1; output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; output[j].array_base = next_pos_base; output[j].op = CF_OP_EXPORT; @@ -1509,7 +1505,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, output[j].swizzle_z = 7; output[j].swizzle_w = 7; output[j].burst_count = 1; - output[j].barrier = 1; output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[j].array_base = 0; output[j].op = CF_OP_EXPORT; @@ -1526,7 +1521,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, output[j].swizzle_z = 7; output[j].swizzle_w = 7; output[j].burst_count = 1; - output[j].barrier = 1; output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; output[j].array_base = 0; output[j].op = CF_OP_EXPORT; @@ -1537,11 +1531,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, /* set export done on last export of each type */ for (i = noutput - 1, output_done = 0; i >= 0; i--) { - if (ctx.bc->chip_class < CAYMAN) { - if (i == (noutput - 1)) { - output[i].end_of_program = 1; - } - } if (!(output_done & (1 << output[i].type))) { output_done |= (1 << output[i].type); output[i].op = CF_OP_EXPORT_DONE; @@ -1555,9 +1544,20 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, goto out_err; } } + /* add program end */ - if (!use_llvm && ctx.bc->chip_class == CAYMAN) - cm_bytecode_add_cf_end(ctx.bc); + if (!use_llvm) { + if (ctx.bc->chip_class == CAYMAN) + cm_bytecode_add_cf_end(ctx.bc); + else { + const struct cf_op_info *last = r600_isa_cf(ctx.bc->cf_last->op); + + if (last->flags & CF_CLAUSE) + r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); + + ctx.bc->cf_last->end_of_program = 1; + } + } /* check GPR limit - we have 124 = 128 - 4 * (4 are reserved as alu clause temporary registers) */ -- 2.30.2