r600: initial attempt at gl_HelperInvocation (v3)
authorDave Airlie <airlied@redhat.com>
Wed, 31 Jan 2018 04:28:26 +0000 (14:28 +1000)
committerDave Airlie <airlied@redhat.com>
Thu, 1 Feb 2018 23:46:05 +0000 (09:46 +1000)
This passes the CTS and piglit tests.

This also disable sb for helper invocations until it doesn't
mess up the VPM flags.

Thanks to Ilia and Glenn for advice, and Roland for working
out the working evergreen path.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_isa.c
src/gallium/drivers/r600/r600_isa.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_shader.h
src/gallium/drivers/r600/r600_sq.h

index 21d069d81b46e9b867b19fb3fb38b423bbe1dcf2..ec2d34e95049b8a0bd0e0bf504fafea5928721cf 100644 (file)
@@ -2099,9 +2099,12 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
                                fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
                                                bc->bytecode[id + 1], cfop->name);
                                fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
-                               fprintf(stderr, "\n");
+                               if (cf->vpm)
+                                       fprintf(stderr, "VPM ");
                                if (cf->end_of_program)
                                        fprintf(stderr, "EOP ");
+                               fprintf(stderr, "\n");
+
                        } else if (cfop->flags & CF_EXP) {
                                int o = 0;
                                const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
@@ -2198,6 +2201,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
                                        fprintf(stderr, "POP:%X ", cf->pop_count);
                                if (cf->count && (cfop->flags & CF_EMIT))
                                        fprintf(stderr, "STREAM%d ", cf->count);
+                               if (cf->vpm)
+                                       fprintf(stderr, "VPM ");
                                if (cf->end_of_program)
                                        fprintf(stderr, "EOP ");
                                fprintf(stderr, "\n");
index 2633cdcdb9f35c1d3ca2ece5a55da39ede9fe212..611b370bf58ad24e7d14ea153f9f1c1e698e77f8 100644 (file)
@@ -506,6 +506,7 @@ static const struct cf_op_info cf_op_table[] = {
                {"ALU_EXT",                       {   -1,   -1, 0x0C, 0x0C },  CF_CLAUSE | CF_ALU | CF_ALU_EXT  },
                {"ALU_CONTINUE",                  { 0x0D, 0x0D, 0x0D,   -1 },  CF_CLAUSE | CF_ALU  },
                {"ALU_BREAK",                     { 0x0E, 0x0E, 0x0E,   -1 },  CF_CLAUSE | CF_ALU  },
+               {"ALU_VALID_PIXEL_MODE",          {   -1,   -1,   -1, 0x0E },  CF_CLAUSE | CF_ALU  },
                {"ALU_ELSE_AFTER",                { 0x0F, 0x0F, 0x0F, 0x0F },  CF_CLAUSE | CF_ALU  },
                {"CF_NATIVE",                     { 0x00, 0x00, 0x00, 0x00 },  0  }
 };
index f6e26976c5fcbb57599d1a0f40e65404aec64282..fcaf1f766b36b143308ac417f57ceb61e2abd0bd 100644 (file)
@@ -646,10 +646,11 @@ struct cf_op_info
 #define CF_OP_ALU_EXT                      84
 #define CF_OP_ALU_CONTINUE                 85
 #define CF_OP_ALU_BREAK                    86
-#define CF_OP_ALU_ELSE_AFTER               87
+#define CF_OP_ALU_VALID_PIXEL_MODE         87
+#define CF_OP_ALU_ELSE_AFTER               88
 
 /* CF_NATIVE means that r600_bytecode_cf contains pre-encoded native data */
-#define CF_NATIVE                          88
+#define CF_NATIVE                          89
 
 enum r600_chip_class {
        ISA_CC_R600,
index a462691f7aa7af0a1744f9a135bc4a5507b6154a..13aa681049c66011a04bfc0437170c232fa71e39 100644 (file)
@@ -197,6 +197,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 
        use_sb &= !shader->shader.uses_atomics;
        use_sb &= !shader->shader.uses_images;
+       use_sb &= !shader->shader.uses_helper_invocation;
 
        /* Check if the bytecode has already been built. */
        if (!shader->shader.bc.bytecode) {
@@ -346,6 +347,7 @@ struct r600_shader_ctx {
        boolean                 clip_vertex_write;
        unsigned                cv_output;
        unsigned                edgeflag_output;
+       int                                     helper_invoc_reg;
        int                                     cs_block_size_reg;
        int                                     cs_grid_size_reg;
        bool cs_block_size_loaded, cs_grid_size_loaded;
@@ -1295,6 +1297,75 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
        return t1;
 }
 
+static int eg_load_helper_invocation(struct r600_shader_ctx *ctx)
+{
+       int r;
+       struct r600_bytecode_alu alu;
+
+       /* do a vtx fetch with wqm set on the vtx fetch */
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.op = ALU_OP1_MOV;
+       alu.dst.sel = ctx->helper_invoc_reg;
+       alu.dst.chan = 0;
+       alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+       alu.src[0].value = 0xffffffff;
+       alu.dst.write = 1;
+       alu.last = 1;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+
+       /* do a vtx fetch in VPM mode */
+       struct r600_bytecode_vtx vtx;
+       memset(&vtx, 0, sizeof(vtx));
+       vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
+       vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
+       vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+       vtx.src_gpr = 0;
+       vtx.mega_fetch_count = 16; /* no idea here really... */
+       vtx.dst_gpr = ctx->helper_invoc_reg;
+       vtx.dst_sel_x = 4;
+       vtx.dst_sel_y = 7;              /* SEL_Y */
+       vtx.dst_sel_z = 7;              /* SEL_Z */
+       vtx.dst_sel_w = 7;              /* SEL_W */
+       vtx.data_format = FMT_32;
+       if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
+               return r;
+       ctx->bc->cf_last->vpm = 1;
+       return 0;
+}
+
+static int cm_load_helper_invocation(struct r600_shader_ctx *ctx)
+{
+       int r;
+       struct r600_bytecode_alu alu;
+
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.op = ALU_OP1_MOV;
+       alu.dst.sel = ctx->helper_invoc_reg;
+       alu.dst.chan = 0;
+       alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+       alu.src[0].value = 0xffffffff;
+       alu.dst.write = 1;
+       alu.last = 1;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.op = ALU_OP1_MOV;
+       alu.dst.sel = ctx->helper_invoc_reg;
+       alu.dst.chan = 0;
+       alu.src[0].sel = V_SQ_ALU_SRC_0;
+       alu.dst.write = 1;
+       alu.last = 1;
+       r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_VALID_PIXEL_MODE);
+       if (r)
+               return r;
+
+       return ctx->helper_invoc_reg;
+}
+
 static int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
 {
        struct r600_bytecode_vtx vtx;
@@ -1458,6 +1529,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
                        r600_src->sel = load_block_grid_size(ctx, false);
                } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) {
                        r600_src->sel = load_block_grid_size(ctx, true);
+               } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_HELPER_INVOCATION) {
+                       r600_src->sel = ctx->helper_invoc_reg;
+                       r600_src->swizzle[0] = 0;
+                       r600_src->swizzle[1] = 0;
+                       r600_src->swizzle[2] = 0;
+                       r600_src->swizzle[3] = 0;
                }
        } else {
                if (tgsi_src->Register.Indirect)
@@ -3120,6 +3197,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
        tgsi_scan_shader(tokens, &ctx.info);
        shader->indirect_files = ctx.info.indirect_files;
 
+       shader->uses_helper_invocation = false;
        shader->uses_doubles = ctx.info.uses_doubles;
        shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC];
        shader->nsys_inputs = 0;
@@ -3193,6 +3271,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
        ctx.clip_vertex_write = 0;
        ctx.thread_id_gpr_loaded = false;
 
+       ctx.helper_invoc_reg = -1;
        ctx.cs_block_size_reg = -1;
        ctx.cs_grid_size_reg = -1;
        ctx.cs_block_size_loaded = false;
@@ -3238,6 +3317,13 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                        ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
                else
                        ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
+
+               for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
+                       if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_HELPER_INVOCATION) {
+                               ctx.helper_invoc_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
+                               shader->uses_helper_invocation = true;
+                       }
+               }
        }
        if (ctx.type == PIPE_SHADER_GEOMETRY) {
                /* FIXME 1 would be enough in some cases (3 or less input vertices) */
@@ -3439,6 +3525,15 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
        if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
                shader->nr_ps_max_color_exports = 8;
 
+       if (ctx.shader->uses_helper_invocation) {
+               if (ctx.bc->chip_class == CAYMAN)
+                       r = cm_load_helper_invocation(&ctx);
+               else
+                       r = eg_load_helper_invocation(&ctx);
+               if (r)
+                       return r;
+
+       }
        if (ctx.fragcoord_input >= 0) {
                if (ctx.bc->chip_class == CAYMAN) {
                        for (j = 0 ; j < 4; j++) {
index 844490788396e5b1831b50a131164b4bb3fba032..da96688e544598cca67cbe6664922db90f79cfc5 100644 (file)
@@ -119,6 +119,7 @@ struct r600_shader {
        boolean                 uses_doubles;
        boolean                 uses_atomics;
        boolean                 uses_images;
+       boolean                 uses_helper_invocation;
        uint8_t                 atomic_base;
        uint8_t                 rat_base;
        uint8_t                 image_size_const_offset;
index f51ffcf9e23a2a1bf7631d89440502b9bc44b980..6b07dc1ecfc8aed83118c12b90ec0e3259144d2d 100644 (file)
 #define     EG_V_SQ_ALU_SRC_LDS_DIRECT_B                             0x000000E0
 #define     EG_V_SQ_ALU_SRC_TIME_HI                                  0x000000E3
 #define     EG_V_SQ_ALU_SRC_TIME_LO                                  0x000000E4
+#define     EG_V_SQ_ALU_SRC_MASK_HI                                  0x000000E5
+#define     EG_V_SQ_ALU_SRC_MASK_LO                                  0x000000E6
 #define     EG_V_SQ_ALU_SRC_HW_WAVE_ID                               0x000000E7
 #define     EG_V_SQ_ALU_SRC_SIMD_ID                                  0x000000E8
 #define     EG_V_SQ_ALU_SRC_SE_ID                                    0x000000E9