r600g: use burst exports in shaders
authorChristian König <deathsimple@vodafone.de>
Tue, 1 Feb 2011 23:24:34 +0000 (00:24 +0100)
committerChristian König <deathsimple@vodafone.de>
Wed, 2 Feb 2011 00:33:03 +0000 (01:33 +0100)
Join multiple exports into just one instruction
instead of exporting each register separately.

src/gallium/drivers/r600/eg_asm.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_shader.c

index 67d742b376098890b75c7a4a923fd77dba63197b..80c5de39750bc441269328854ed9bc5635aecb34 100644 (file)
@@ -62,7 +62,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
-               bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+               bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+                       S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
index 13bf7644e7e59dc80f5c75bcf8deb74732f9ab39..e910d1cc73fb2a08444b73d4886293972d7cc3a7 100644 (file)
@@ -246,6 +246,37 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 {
        int r;
 
+       if (bc->cf_last && (bc->cf_last->inst == output->inst ||
+               (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) &&
+               output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) &&
+               output->type == bc->cf_last->output.type &&
+               output->elem_size == bc->cf_last->output.elem_size &&
+               output->swizzle_x == bc->cf_last->output.swizzle_x &&
+               output->swizzle_y == bc->cf_last->output.swizzle_y &&
+               output->swizzle_z == bc->cf_last->output.swizzle_z &&
+               output->swizzle_w == bc->cf_last->output.swizzle_w &&
+               (output->burst_count + bc->cf_last->output.burst_count) <= 16) {
+
+               if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
+                       (output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
+
+                       bc->cf_last->output.end_of_program |= output->end_of_program;
+                       bc->cf_last->output.inst = output->inst;
+                       bc->cf_last->output.gpr = output->gpr;
+                       bc->cf_last->output.array_base = output->array_base;
+                       bc->cf_last->output.burst_count += output->burst_count;
+                       return 0;
+
+               } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
+                       output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
+
+                       bc->cf_last->output.end_of_program |= output->end_of_program;
+                       bc->cf_last->output.inst = output->inst;
+                       bc->cf_last->output.burst_count += output->burst_count;
+                       return 0;
+               }
+       }
+
        r = r600_bc_add_cf(bc);
        if (r)
                return r;
@@ -1443,7 +1474,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
-               bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+               bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+                       S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
@@ -1725,9 +1757,9 @@ void r600_bc_dump(struct r600_bc *bc)
                        fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
                        fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
                        fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
-                       fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
                        fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
                        fprintf(stderr, "INST:%d ", cf->output.inst);
+                       fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
                        fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
                        break;
                case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
index 278b4466cb03e0e76f68b97c3d43673168924f64..b91d9b5b41b8444730cd590faaf91feec47c9924 100644 (file)
@@ -116,6 +116,7 @@ struct r600_bc_output {
        unsigned                        swizzle_y;
        unsigned                        swizzle_z;
        unsigned                        swizzle_w;
+       unsigned                        burst_count;
        unsigned                        barrier;
 };
 
index 41849875074c94bd434274feb977cfd4de2f292f..643c47d4bf611f8932df8fffaa6f2323fb74bd8b 100644 (file)
@@ -632,6 +632,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
                output[i].swizzle_y = 1;
                output[i].swizzle_z = 2;
                output[i].swizzle_w = 3;
+               output[i].burst_count = 1;
                output[i].barrier = 1;
                output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
                output[i].array_base = i - pos0;
@@ -695,6 +696,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
                        output[i].swizzle_y = 1;
                        output[i].swizzle_z = 2;
                        output[i].swizzle_w = 3;
+                       output[i].burst_count = 1;
                        output[i].barrier = 1;
                        output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
                        output[i].array_base = 0;
@@ -711,6 +713,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
                output[0].swizzle_y = 7;
                output[0].swizzle_z = 7;
                output[0].swizzle_w = 7;
+               output[0].burst_count = 1;
                output[0].barrier = 1;
                output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
                output[0].array_base = 0;