r600g: move barrier and end_of_program bits from output to cf struct (v2)
authorVadim Girlin <vadimgirlin@gmail.com>
Wed, 31 Jul 2013 16:02:22 +0000 (20:02 +0400)
committerDave Airlie <airlied@redhat.com>
Wed, 5 Feb 2014 00:40:23 +0000 (10:40 +1000)
v2: fix regression on r600 NOP instructions.

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/eg_asm.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_shader.c

index fffc436e823e016d5ff21e2500eba418f12147c0..42e78c0f06997c0de07997e0c3fba7789799ca1b 100644 (file)
@@ -86,11 +86,11 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
                                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
                                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
                                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
-                                       S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+                                       S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
                                        S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode);
 
                        if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
-                               bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+                               bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
                        id++;
                } else if (cfop->flags & CF_STRM) {
                        /* MEM_STREAM instructions */
@@ -99,12 +99,12 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
                                        S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
                                        S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
                        bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
-                                       S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+                                       S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
                                        S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
                                        S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) |
                                        S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size);
                        if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
-                               bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+                               bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
                        id++;
                } else {
                        /* branch, loop, call, return instructions */
@@ -118,6 +118,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
        return 0;
 }
 
+#if 0
 void eg_bytecode_export_read(struct r600_bytecode *bc,
                struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
 {
@@ -138,3 +139,4 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
        output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
        output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
 }
+#endif
index c5922a8fa5bdbc27c39280358375f975374f174d..e062fcb6be8c3338ba027a94a44ef67a281e8776 100644 (file)
@@ -193,7 +193,6 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
                if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
                        (output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
 
-                       bc->cf_last->output.end_of_program |= output->end_of_program;
                        bc->cf_last->op = bc->cf_last->output.op = output->op;
                        bc->cf_last->output.gpr = output->gpr;
                        bc->cf_last->output.array_base = output->array_base;
@@ -203,7 +202,6 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
                } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
                        output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
 
-                       bc->cf_last->output.end_of_program |= output->end_of_program;
                        bc->cf_last->op = bc->cf_last->output.op = output->op;
                        bc->cf_last->output.burst_count += output->burst_count;
                        return 0;
@@ -215,6 +213,7 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
                return r;
        bc->cf_last->op = output->op;
        memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
+       bc->cf_last->barrier = 1;
        return 0;
 }
 
@@ -1532,18 +1531,18 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
-                       S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+                       S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
-                       S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+                       S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
        } else if (cfop->flags & CF_STRM) {
                bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
                bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
-                       S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+                       S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
-                       S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program) |
+                       S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask);
        } else {
@@ -1551,7 +1550,8 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
                bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
                                        S_SQ_CF_WORD1_BARRIER(1) |
                                        S_SQ_CF_WORD1_COND(cf->cond) |
-                                       S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+                                       S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+                                       S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
        }
        return 0;
 }
@@ -1932,9 +1932,9 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
                                print_indent(o, 67);
 
                                fprintf(stderr, " ES:%X ", cf->output.elem_size);
-                               if (!cf->output.barrier)
+                               if (!cf->barrier)
                                        fprintf(stderr, "NO_BARRIER ");
-                               if (cf->output.end_of_program)
+                               if (cf->end_of_program)
                                        fprintf(stderr, "EOP ");
                                fprintf(stderr, "\n");
                        } else if (r600_isa_cf(cf->op)->flags & CF_STRM) {
@@ -1968,9 +1968,9 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
                                fprintf(stderr, " ES:%i ", cf->output.elem_size);
                                if (cf->output.array_size != 0xFFF)
                                        fprintf(stderr, "AS:%i ", cf->output.array_size);
-                               if (!cf->output.barrier)
+                               if (!cf->barrier)
                                        fprintf(stderr, "NO_BARRIER ");
-                               if (cf->output.end_of_program)
+                               if (cf->end_of_program)
                                        fprintf(stderr, "EOP ");
                                fprintf(stderr, "\n");
                        } else {
@@ -2486,6 +2486,7 @@ void r600_bytecode_alu_read(struct r600_bytecode *bc,
        }
 }
 
+#if 0
 void r600_bytecode_export_read(struct r600_bytecode *bc,
                struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
 {
@@ -2506,3 +2507,4 @@ void r600_bytecode_export_read(struct r600_bytecode *bc,
        output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
        output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
 }
+#endif
index 82c6c8d1c7e05e426b22ca78e00455561f3bb217..3bfbcb282a34058f0307b49ca529381de32ddd6e 100644 (file)
@@ -115,7 +115,6 @@ struct r600_bytecode_output {
        unsigned                        array_size;
        unsigned                        comp_mask;
        unsigned                        type;
-       unsigned                        end_of_program;
 
        unsigned                        op;
 
@@ -126,7 +125,6 @@ struct r600_bytecode_output {
        unsigned                        swizzle_z;
        unsigned                        swizzle_w;
        unsigned                        burst_count;
-       unsigned                        barrier;
 };
 
 struct r600_bytecode_kcache {
@@ -148,6 +146,8 @@ struct r600_bytecode_cf {
        struct r600_bytecode_kcache             kcache[4];
        unsigned                        r6xx_uses_waterfall;
        unsigned                        eg_alu_extended;
+       unsigned                        barrier;
+       unsigned                        end_of_program;
        struct list_head                alu;
        struct list_head                tex;
        struct list_head                vtx;
index 5fd445e5147cedfba334eab01cbfff2e3024ac42..32d2aa73bef203834857dcee6efbbaf3fc5202d9 100644 (file)
@@ -939,7 +939,6 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
                output.array_base = so->output[i].dst_offset - so->output[i].start_component;
                output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
                output.burst_count = 1;
-               output.barrier = 1;
                /* array_size is an upper limit for the burst_count
                 * with MEM_STREAM instructions */
                output.array_size = 0xFFF;
@@ -1384,7 +1383,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                output[j].swizzle_z = 2;
                output[j].swizzle_w = 3;
                output[j].burst_count = 1;
-               output[j].barrier = 1;
                output[j].type = -1;
                output[j].op = CF_OP_EXPORT;
                switch (ctx.type) {
@@ -1445,7 +1443,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                                output[j].swizzle_z = 2;
                                                output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
                                                output[j].burst_count = 1;
-                                               output[j].barrier = 1;
                                                output[j].array_base = k;
                                                output[j].op = CF_OP_EXPORT;
                                                output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
@@ -1492,7 +1489,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                        output[j].swizzle_z = 7;
                        output[j].swizzle_w = 7;
                        output[j].burst_count = 1;
-                       output[j].barrier = 1;
                        output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
                        output[j].array_base = next_pos_base;
                        output[j].op = CF_OP_EXPORT;
@@ -1509,7 +1505,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                        output[j].swizzle_z = 7;
                        output[j].swizzle_w = 7;
                        output[j].burst_count = 1;
-                       output[j].barrier = 1;
                        output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
                        output[j].array_base = 0;
                        output[j].op = CF_OP_EXPORT;
@@ -1526,7 +1521,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                output[j].swizzle_z = 7;
                output[j].swizzle_w = 7;
                output[j].burst_count = 1;
-               output[j].barrier = 1;
                output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
                output[j].array_base = 0;
                output[j].op = CF_OP_EXPORT;
@@ -1537,11 +1531,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 
        /* set export done on last export of each type */
        for (i = noutput - 1, output_done = 0; i >= 0; i--) {
-               if (ctx.bc->chip_class < CAYMAN) {
-                       if (i == (noutput - 1)) {
-                               output[i].end_of_program = 1;
-                       }
-               }
                if (!(output_done & (1 << output[i].type))) {
                        output_done |= (1 << output[i].type);
                        output[i].op = CF_OP_EXPORT_DONE;
@@ -1555,9 +1544,20 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                goto out_err;
                }
        }
+
        /* add program end */
-       if (!use_llvm && ctx.bc->chip_class == CAYMAN)
-               cm_bytecode_add_cf_end(ctx.bc);
+       if (!use_llvm) {
+               if (ctx.bc->chip_class == CAYMAN)
+                       cm_bytecode_add_cf_end(ctx.bc);
+               else {
+                       const struct cf_op_info *last = r600_isa_cf(ctx.bc->cf_last->op);
+
+                       if (last->flags & CF_CLAUSE)
+                               r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+
+                       ctx.bc->cf_last->end_of_program = 1;
+               }
+       }
 
        /* check GPR limit - we have 124 = 128 - 4
         * (4 are reserved as alu clause temporary registers) */