r600: Emit EOP for more CF instruction types
authorGert Wollny <gw.fossdev@gmail.com>
Fri, 17 Nov 2017 11:13:40 +0000 (12:13 +0100)
committerDave Airlie <airlied@redhat.com>
Wed, 22 Nov 2017 22:39:42 +0000 (22:39 +0000)
So far on pre-cayman chipsets the CF instructions CF_OP_LOOP_END,
CF_OP_CALL_FS, CF_OP_POP, and CF_OP_GDS an extra CF_NOP instruction
was added to add the EOP flag, even though this is not actually
needed, because all these instrutions support the EOP flag.

This patch removes the fixup code, adds setting the EOP flag for the
according instructions as well as others like CF_OP_TEX and CF_OP_VTX,
and adds writing out EOP for this type of instruction in the disassembler.

This also fixes a bug where shaders were created that didn't actually have
the EOP flag set in the last CF instruction, which might have resulted
in GPU lockups.

[airlied: cleaned up a little]
Signed-off-by: Gert Wollny <gw.fossdev@gmail.com>
Cc: <mesa-stable@lists.freedesktop.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/eg_asm.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r700_asm.c

index ce7e861b7394d18c71a5bd251e24cb0657c4426e..8f9d1b85f231b658b5c17949a960b783a35fcd59 100644 (file)
@@ -71,10 +71,13 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
                } else if (cfop->flags & CF_CLAUSE) {
                        /* CF_TEX/VTX (CF_ALU already handled above) */
                        bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
-                       bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
+                       bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) |
                                        S_SQ_CF_WORD1_BARRIER(1) |
                                        S_SQ_CF_WORD1_VALID_PIXEL_MODE(cf->vpm) |
                                        S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+                       if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
+                               bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
+                       id++;
                } else if (cfop->flags & CF_EXP) {
                        /* EXPORT instructions */
                        bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
@@ -133,12 +136,14 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
                } else {
                        /* other instructions */
                        bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
-                       bc->bytecode[id++] =  S_SQ_CF_WORD1_CF_INST(opcode)|
+                       bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) |
                                        S_SQ_CF_WORD1_BARRIER(1) |
                                        S_SQ_CF_WORD1_COND(cf->cond) |
                                        S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
-                                       S_SQ_CF_WORD1_COUNT(cf->count) |
-                                       S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
+                                       S_SQ_CF_WORD1_COUNT(cf->count);
+                       if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
+                               bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
+                       id++;
                }
        }
        return 0;
index 96bc33727857b144ed112921e7d2067e2b2361b2..69b2d142c15aa2e3d510945aeec2e26f52b3bcaa 100644 (file)
@@ -1625,7 +1625,8 @@ static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_byt
        *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
        *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
                        S_SQ_CF_WORD1_BARRIER(1) |
-                       S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+                       S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
+                       S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
 }
 
 /* common for r600/r700 - eg in eg_asm.c */
@@ -2097,6 +2098,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
                                                bc->bytecode[id + 1], cfop->name);
                                fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
                                fprintf(stderr, "\n");
+                               if (cf->end_of_program)
+                                       fprintf(stderr, "EOP ");
                        } else if (cfop->flags & CF_EXP) {
                                int o = 0;
                                const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
index 1422abf6fa955c47e00961af0f3cd0d19c79b667..82b45b6d5afdebdd628b4314b534016dc1f5f26e 100644 (file)
@@ -3809,7 +3809,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                        last = r600_isa_cf(ctx.bc->cf_last->op);
 
                /* alu clause instructions don't have EOP bit, so add NOP */
-               if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS)
+               if (!last || last->flags & CF_ALU)
                        r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
 
                ctx.bc->cf_last->end_of_program = 1;
index 04f8c6288f0b681798e05730ebdd5a3eb4b512ba..395059cfeaa3c53485f0ba6c51f1258d86384bc6 100644 (file)
@@ -30,7 +30,8 @@ void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_c
        *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R700, cf->op)) |
                        S_SQ_CF_WORD1_BARRIER(1) |
                        S_SQ_CF_WORD1_COUNT(count) |
-                       S_SQ_CF_WORD1_COUNT_3(count >> 3);
+                       S_SQ_CF_WORD1_COUNT_3(count >> 3)|
+                       S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
 }
 
 int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)