r600g: split streamout emit code into a separate function
authorDave Airlie <airlied@redhat.com>
Wed, 29 Jan 2014 01:33:14 +0000 (01:33 +0000)
committerDave Airlie <airlied@redhat.com>
Wed, 5 Feb 2014 00:40:17 +0000 (10:40 +1000)
For geometry shaders we need to call this code from a second place.

Just move it out for now to keep future patches cleaner.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/r600_shader.c

index 1ea4ae6c05617de5c0e9318668d283199bee38cb..5fd445e5147cedfba334eab01cbfff2e3024ac42 100644 (file)
@@ -875,6 +875,114 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so)
+{
+       unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
+       int i, j, r;
+
+       /* Sanity checking. */
+       if (so->num_outputs > PIPE_MAX_SHADER_OUTPUTS) {
+               R600_ERR("Too many stream outputs: %d\n", so->num_outputs);
+               r = -EINVAL;
+               goto out_err;
+       }
+       for (i = 0; i < so->num_outputs; i++) {
+               if (so->output[i].output_buffer >= 4) {
+                       R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
+                                so->output[i].output_buffer);
+                       r = -EINVAL;
+                       goto out_err;
+               }
+       }
+
+       /* Initialize locations where the outputs are stored. */
+       for (i = 0; i < so->num_outputs; i++) {
+               so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr;
+
+               /* Lower outputs with dst_offset < start_component.
+                *
+                * We can only output 4D vectors with a write mask, e.g. we can
+                * only output the W component at offset 3, etc. If we want
+                * to store Y, Z, or W at buffer offset 0, we need to use MOV
+                * to move it to X and output X. */
+               if (so->output[i].dst_offset < so->output[i].start_component) {
+                       unsigned tmp = r600_get_temp(ctx);
+
+                       for (j = 0; j < so->output[i].num_components; j++) {
+                               struct r600_bytecode_alu alu;
+                               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                               alu.op = ALU_OP1_MOV;
+                               alu.src[0].sel = so_gpr[i];
+                               alu.src[0].chan = so->output[i].start_component + j;
+
+                               alu.dst.sel = tmp;
+                               alu.dst.chan = j;
+                               alu.dst.write = 1;
+                               if (j == so->output[i].num_components - 1)
+                                       alu.last = 1;
+                               r = r600_bytecode_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+                       so->output[i].start_component = 0;
+                       so_gpr[i] = tmp;
+               }
+       }
+
+       /* Write outputs to buffers. */
+       for (i = 0; i < so->num_outputs; i++) {
+               struct r600_bytecode_output output;
+
+               memset(&output, 0, sizeof(struct r600_bytecode_output));
+               output.gpr = so_gpr[i];
+               output.elem_size = so->output[i].num_components;
+               output.array_base = so->output[i].dst_offset - so->output[i].start_component;
+               output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
+               output.burst_count = 1;
+               output.barrier = 1;
+               /* array_size is an upper limit for the burst_count
+                * with MEM_STREAM instructions */
+               output.array_size = 0xFFF;
+               output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
+               if (ctx->bc->chip_class >= EVERGREEN) {
+                       switch (so->output[i].output_buffer) {
+                       case 0:
+                               output.op = CF_OP_MEM_STREAM0_BUF0;
+                               break;
+                       case 1:
+                               output.op = CF_OP_MEM_STREAM0_BUF1;
+                               break;
+                       case 2:
+                               output.op = CF_OP_MEM_STREAM0_BUF2;
+                               break;
+                       case 3:
+                               output.op = CF_OP_MEM_STREAM0_BUF3;
+                               break;
+                       }
+               } else {
+                       switch (so->output[i].output_buffer) {
+                       case 0:
+                               output.op = CF_OP_MEM_STREAM0;
+                               break;
+                       case 1:
+                               output.op = CF_OP_MEM_STREAM1;
+                               break;
+                       case 2:
+                               output.op = CF_OP_MEM_STREAM2;
+                               break;
+                       case 3:
+                               output.op = CF_OP_MEM_STREAM3;
+                                       break;
+                       }
+               }
+               r = r600_bytecode_add_output(ctx->bc, &output);
+               if (r)
+                       goto out_err;
+       }
+       return 0;
+out_err:
+       return r;
+}
 
 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                 struct r600_pipe_shader *pipeshader,
@@ -1263,109 +1371,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        }
 
        /* Add stream outputs. */
-       if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm) {
-               unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
-
-               /* Sanity checking. */
-               if (so.num_outputs > PIPE_MAX_SHADER_OUTPUTS) {
-                       R600_ERR("Too many stream outputs: %d\n", so.num_outputs);
-                       r = -EINVAL;
-                       goto out_err;
-               }
-               for (i = 0; i < so.num_outputs; i++) {
-                       if (so.output[i].output_buffer >= 4) {
-                               R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
-                                        so.output[i].output_buffer);
-                               r = -EINVAL;
-                               goto out_err;
-                       }
-               }
-
-               /* Initialize locations where the outputs are stored. */
-               for (i = 0; i < so.num_outputs; i++) {
-                       so_gpr[i] = shader->output[so.output[i].register_index].gpr;
-
-                       /* Lower outputs with dst_offset < start_component.
-                        *
-                        * We can only output 4D vectors with a write mask, e.g. we can
-                        * only output the W component at offset 3, etc. If we want
-                        * to store Y, Z, or W at buffer offset 0, we need to use MOV
-                        * to move it to X and output X. */
-                       if (so.output[i].dst_offset < so.output[i].start_component) {
-                               unsigned tmp = r600_get_temp(&ctx);
-
-                               for (j = 0; j < so.output[i].num_components; j++) {
-                                       struct r600_bytecode_alu alu;
-                                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-                                       alu.op = ALU_OP1_MOV;
-                                       alu.src[0].sel = so_gpr[i];
-                                       alu.src[0].chan = so.output[i].start_component + j;
-
-                                       alu.dst.sel = tmp;
-                                       alu.dst.chan = j;
-                                       alu.dst.write = 1;
-                                       if (j == so.output[i].num_components - 1)
-                                               alu.last = 1;
-                                       r = r600_bytecode_add_alu(ctx.bc, &alu);
-                                       if (r)
-                                               return r;
-                               }
-                               so.output[i].start_component = 0;
-                               so_gpr[i] = tmp;
-                       }
-               }
-
-               /* Write outputs to buffers. */
-               for (i = 0; i < so.num_outputs; i++) {
-                       struct r600_bytecode_output output;
-
-                       memset(&output, 0, sizeof(struct r600_bytecode_output));
-                       output.gpr = so_gpr[i];
-                       output.elem_size = so.output[i].num_components;
-                       output.array_base = so.output[i].dst_offset - so.output[i].start_component;
-                       output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
-                       output.burst_count = 1;
-                       output.barrier = 1;
-                       /* array_size is an upper limit for the burst_count
-                        * with MEM_STREAM instructions */
-                       output.array_size = 0xFFF;
-                       output.comp_mask = ((1 << so.output[i].num_components) - 1) << so.output[i].start_component;
-                       if (ctx.bc->chip_class >= EVERGREEN) {
-                               switch (so.output[i].output_buffer) {
-                               case 0:
-                                       output.op = CF_OP_MEM_STREAM0_BUF0;
-                                       break;
-                               case 1:
-                                       output.op = CF_OP_MEM_STREAM0_BUF1;
-                                       break;
-                               case 2:
-                                       output.op = CF_OP_MEM_STREAM0_BUF2;
-                                       break;
-                               case 3:
-                                       output.op = CF_OP_MEM_STREAM0_BUF3;
-                                       break;
-                               }
-                       } else {
-                               switch (so.output[i].output_buffer) {
-                               case 0:
-                                       output.op = CF_OP_MEM_STREAM0;
-                                       break;
-                               case 1:
-                                       output.op = CF_OP_MEM_STREAM1;
-                                       break;
-                               case 2:
-                                       output.op = CF_OP_MEM_STREAM2;
-                                       break;
-                               case 3:
-                                       output.op = CF_OP_MEM_STREAM3;
-                                       break;
-                               }
-                       }
-                       r = r600_bytecode_add_output(ctx.bc, &output);
-                       if (r)
-                               goto out_err;
-               }
-       }
+       if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm)
+               emit_streamout(&ctx, &so);
 
        /* export output */
        for (i = 0, j = 0; i < noutput; i++, j++) {