radeonsi/gfx9: add si_shader::previous_stage for merged shaders
authorMarek Olšák <marek.olsak@amd.com>
Fri, 7 Apr 2017 19:41:10 +0000 (21:41 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 28 Apr 2017 19:47:35 +0000 (21:47 +0200)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_debug.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h

index 9d0c0c554cf0c0a9b754b78a07ba816e848add78..038c8b479311216e118beaf8b800ac0d5cef7ea1 100644 (file)
@@ -639,6 +639,10 @@ static void si_print_annotated_shader(struct si_shader *shader,
                si_add_split_disasm(shader->prolog->binary.disasm_string,
                                    start_addr, &num_inst, instructions);
        }
+       if (shader->previous_stage) {
+               si_add_split_disasm(shader->previous_stage->binary.disasm_string,
+                                   start_addr, &num_inst, instructions);
+       }
        si_add_split_disasm(shader->binary.disasm_string,
                            start_addr, &num_inst, instructions);
        if (shader->epilog) {
index 446c8119677de4bd209bd49d918afe6180dff6c3..9640d8a187e17b84859114e54a91231cf2ea26c1 100644 (file)
@@ -6149,6 +6149,8 @@ static unsigned si_get_shader_binary_size(struct si_shader *shader)
 
        if (shader->prolog)
                size += shader->prolog->binary.code_size;
+       if (shader->previous_stage)
+               size += shader->previous_stage->binary.code_size;
        if (shader->epilog)
                size += shader->epilog->binary.code_size;
        return size;
@@ -6158,6 +6160,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
 {
        const struct ac_shader_binary *prolog =
                shader->prolog ? &shader->prolog->binary : NULL;
+       const struct ac_shader_binary *previous_stage =
+               shader->previous_stage ? &shader->previous_stage->binary : NULL;
        const struct ac_shader_binary *epilog =
                shader->epilog ? &shader->epilog->binary : NULL;
        const struct ac_shader_binary *mainb = &shader->binary;
@@ -6166,7 +6170,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
        unsigned char *ptr;
 
        assert(!prolog || !prolog->rodata_size);
-       assert((!prolog && !epilog) || !mainb->rodata_size);
+       assert(!previous_stage || !previous_stage->rodata_size);
+       assert((!prolog && !previous_stage && !epilog) || !mainb->rodata_size);
        assert(!epilog || !epilog->rodata_size);
 
        /* GFX9 can fetch at most 128 bytes past the end of the shader.
@@ -6192,6 +6197,11 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
                util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size);
                ptr += prolog->code_size;
        }
+       if (previous_stage) {
+               util_memcpy_cpu_to_le32(ptr, previous_stage->code,
+                                       previous_stage->code_size);
+               ptr += previous_stage->code_size;
+       }
 
        util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size);
        ptr += mainb->code_size;
@@ -6399,6 +6409,9 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
                if (shader->prolog)
                        si_shader_dump_disassembly(&shader->prolog->binary,
                                                   debug, "prolog", file);
+               if (shader->previous_stage)
+                       si_shader_dump_disassembly(&shader->previous_stage->binary,
+                                                  debug, "previous stage", file);
 
                si_shader_dump_disassembly(&shader->binary, debug, "main", file);
 
@@ -8573,6 +8586,26 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                        shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
                                                        shader->prolog->config.num_vgprs);
                }
+               if (shader->previous_stage) {
+                       shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
+                                                       shader->previous_stage->config.num_sgprs);
+                       shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
+                                                       shader->previous_stage->config.num_vgprs);
+                       shader->config.spilled_sgprs =
+                               MAX2(shader->config.spilled_sgprs,
+                                    shader->previous_stage->config.spilled_sgprs);
+                       shader->config.spilled_vgprs =
+                               MAX2(shader->config.spilled_vgprs,
+                                    shader->previous_stage->config.spilled_vgprs);
+                       shader->config.private_mem_vgprs =
+                               MAX2(shader->config.private_mem_vgprs,
+                                    shader->previous_stage->config.private_mem_vgprs);
+                       shader->config.scratch_bytes_per_wave =
+                               MAX2(shader->config.scratch_bytes_per_wave,
+                                    shader->previous_stage->config.scratch_bytes_per_wave);
+                       shader->info.uses_instanceid |=
+                               shader->previous_stage->info.uses_instanceid;
+               }
                if (shader->epilog) {
                        shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
                                                        shader->epilog->config.num_sgprs);
index 0673c6cfcf1c8032823156794b3247532fa2e2eb..65da65469b99f7d475174000c4017559f4a743da 100644 (file)
@@ -505,6 +505,7 @@ struct si_shader {
        struct si_shader                *next_variant;
 
        struct si_shader_part           *prolog;
+       struct si_shader                *previous_stage; /* for GFX9 */
        struct si_shader_part           *epilog;
 
        struct si_pm4_state             *pm4;