r600g: initial support for geometry shaders on evergreen (v2)
authorVadim Girlin <vadimgirlin@gmail.com>
Fri, 2 Aug 2013 02:38:23 +0000 (06:38 +0400)
committerDave Airlie <airlied@redhat.com>
Wed, 5 Feb 2014 00:49:11 +0000 (10:49 +1000)
This is Vadim's initial work with a few regression fixes squashed in.

v2: (airlied)
fix regression in glsl-max-varyings - need to use vs and ps_dirty
fix regression in shader exports from rebasing.
whitespace fixing.
v2.1: squash fix assert

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
15 files changed:
src/gallium/drivers/r600/eg_asm.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_shader.h
src/gallium/drivers/r600/r600_state_common.c
src/gallium/drivers/r600/sb/sb_bc.h
src/gallium/drivers/r600/sb/sb_bc_dump.cpp
src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
src/gallium/drivers/r600/sb/sb_bc_parser.cpp
src/gallium/drivers/r600/sb/sb_shader.cpp

index 5ad3d7719744ffb6f26502b1ef1e5f66a0c02431..acb30409428322835fe29eeeaba7e337e2ab3713 100644 (file)
@@ -93,8 +93,8 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
                        if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
                                bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
                        id++;
-               } else if (cfop->flags & CF_STRM) {
-                       /* MEM_STREAM instructions */
+               } else if (cfop->flags & CF_MEM) {
+                       /* MEM_STREAM, MEM_RING instructions */
                        bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
                                        S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
                                        S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
@@ -109,12 +109,13 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
                                bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
                        id++;
                } else {
-                       /* branch, loop, call, return instructions */
+                       /* other instructions */
                        bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
                        bc->bytecode[id++] =  S_SQ_CF_WORD1_CF_INST(opcode)|
                                        S_SQ_CF_WORD1_BARRIER(1) |
                                        S_SQ_CF_WORD1_COND(cf->cond) |
-                                       S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+                                       S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+                                       S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
                }
        }
        return 0;
index 48bea1fcfe4fbcfce1203f9a0b6dceb4bd8b56ef..6896617ce938afe438944d18277273aa8bc31651 100644 (file)
@@ -2518,6 +2518,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
                struct r600_resource *rbuffer;
                uint64_t va;
                unsigned buffer_index = ffs(dirty_mask) - 1;
+               unsigned gs_ring_buffer = (buffer_index == R600_GS_RING_CONST_BUFFER);
 
                cb = &state->cb[buffer_index];
                rbuffer = (struct r600_resource*)cb->buffer;
@@ -2526,10 +2527,12 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
                va = r600_resource_va(&rctx->screen->b.b, &rbuffer->b.b);
                va += cb->buffer_offset;
 
-               r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
-                                      ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags);
-               r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
-                                               pkt_flags);
+               if (!gs_ring_buffer) {
+                       r600_write_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
+                                                   ALIGN_DIVUP(cb->buffer_size >> 4, 16), pkt_flags);
+                       r600_write_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
+                                                   pkt_flags);
+               }
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
                radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ));
@@ -2539,10 +2542,12 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
                radeon_emit(cs, va); /* RESOURCEi_WORD0 */
                radeon_emit(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
                radeon_emit(cs, /* RESOURCEi_WORD2 */
-                                S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
-                                S_030008_STRIDE(16) |
-                                S_030008_BASE_ADDRESS_HI(va >> 32UL));
+                           S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
+                           S_030008_STRIDE(gs_ring_buffer ? 4 : 16) |
+                           S_030008_BASE_ADDRESS_HI(va >> 32UL) |
+                           S_030008_DATA_FORMAT(FMT_32_32_32_32_FLOAT));
                radeon_emit(cs, /* RESOURCEi_WORD3 */
+                                S_03000C_UNCACHED(gs_ring_buffer ? 1 : 0) |
                                 S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
                                 S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
                                 S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
@@ -2550,7 +2555,8 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
                radeon_emit(cs, 0); /* RESOURCEi_WORD4 */
                radeon_emit(cs, 0); /* RESOURCEi_WORD5 */
                radeon_emit(cs, 0); /* RESOURCEi_WORD6 */
-               radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */
+               radeon_emit(cs, /* RESOURCEi_WORD7 */
+                           S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
 
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
                radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READ));
@@ -2714,6 +2720,63 @@ static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct
        radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->buffer, RADEON_USAGE_READ));
 }
 
+static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
+{
+       struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+       struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
+
+       uint32_t v = 0, v2 = 0;
+
+       if (state->geom_enable) {
+               v = S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
+                       S_028B54_GS_EN(1) |
+                       S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+
+               v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
+                       S_028A40_CUT_MODE(V_028A40_GS_CUT_128);
+       }
+
+       r600_write_context_reg(cs, R_028B54_VGT_SHADER_STAGES_EN, v);
+       r600_write_context_reg(cs, R_028A40_VGT_GS_MODE, v2);
+}
+
+static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
+{
+       struct pipe_screen *screen = rctx->b.b.screen;
+       struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+       struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
+       struct r600_resource *rbuffer;
+
+       r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
+
+       if (state->enable) {
+               rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
+               r600_write_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
+                               (r600_resource_va(screen, &rbuffer->b.b)) >> 8);
+               radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE));
+               r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
+                               state->esgs_ring.buffer_size >> 8);
+
+               rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
+               r600_write_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
+                               (r600_resource_va(screen, &rbuffer->b.b)) >> 8);
+               radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+               radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, rbuffer, RADEON_USAGE_READWRITE));
+               r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
+                               state->gsvs_ring.buffer_size >> 8);
+       } else {
+               r600_write_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE, 0);
+               r600_write_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE, 0);
+       }
+
+       r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
+       radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+       radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_VGT_FLUSH));
+}
+
 void cayman_init_common_regs(struct r600_command_buffer *cb,
                             enum chip_class ctx_chip_class,
                             enum radeon_family ctx_family,
@@ -3509,6 +3572,77 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                shader->flatshade = rctx->rasterizer->flatshade;
 }
 
+void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+       struct r600_command_buffer *cb = &shader->command_buffer;
+       struct r600_shader *rshader = &shader->shader;
+
+       r600_init_command_buffer(cb, 32);
+
+       r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
+                              S_028890_NUM_GPRS(rshader->bc.ngpr) |
+                              S_028890_STACK_SIZE(rshader->bc.nstack));
+       r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES,
+                              r600_resource_va(ctx->screen, (void *)shader->bo) >> 8);
+       /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
+}
+
+void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+{
+       struct r600_command_buffer *cb = &shader->command_buffer;
+       struct r600_shader *rshader = &shader->shader;
+       struct r600_shader *cp_shader = &shader->gs_copy_shader->shader;
+       unsigned gsvs_itemsize =
+                       (cp_shader->ring_item_size * rshader->gs_max_out_vertices) >> 2;
+
+       r600_init_command_buffer(cb, 64);
+
+       /* VGT_GS_OUT_PRIM_TYPE is written by r6000_draw_vbo */
+       /* VGT_GS_MODE is written by evergreen_emit_shader_stages */
+
+       r600_store_context_reg(cb, R_028AB8_VGT_VTX_CNT_EN, 1);
+
+       r600_store_context_reg(cb, R_028B38_VGT_GS_MAX_VERT_OUT,
+                              S_028B38_MAX_VERT_OUT(rshader->gs_max_out_vertices));
+
+
+/* XXX kernel checker fails
+       r600_store_context_reg(cb, R_028B90_VGT_GS_INSTANCE_CNT,
+             S_028B90_CNT(0) |
+                      S_028B90_ENABLE(0));
+*/
+       r600_store_context_reg_seq(cb, R_02891C_SQ_GS_VERT_ITEMSIZE, 4);
+       r600_store_value(cb, cp_shader->ring_item_size >> 2);
+       r600_store_value(cb, 0);
+       r600_store_value(cb, 0);
+       r600_store_value(cb, 0);
+
+       r600_store_context_reg(cb, R_028900_SQ_ESGS_RING_ITEMSIZE,
+                              (rshader->ring_item_size) >> 2);
+
+       r600_store_context_reg(cb, R_028904_SQ_GSVS_RING_ITEMSIZE,
+                              gsvs_itemsize);
+
+       r600_store_context_reg_seq(cb, R_02892C_SQ_GSVS_RING_OFFSET_1, 3);
+       r600_store_value(cb, gsvs_itemsize);
+       r600_store_value(cb, gsvs_itemsize);
+       r600_store_value(cb, gsvs_itemsize);
+
+       /* FIXME calculate these values somehow ??? */
+       r600_store_context_reg_seq(cb, R_028A54_GS_PER_ES, 3);
+       r600_store_value(cb, 0x80); /* GS_PER_ES */
+       r600_store_value(cb, 0x100); /* ES_PER_GS */
+       r600_store_value(cb, 0x2); /* GS_PER_VS */
+
+       r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS,
+                              S_028878_NUM_GPRS(rshader->bc.ngpr) |
+                              S_028878_STACK_SIZE(rshader->bc.nstack));
+       r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS,
+                              r600_resource_va(ctx->screen, (void *)shader->bo) >> 8);
+       /* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
+}
+
+
 void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 {
        struct r600_command_buffer *cb = &shader->command_buffer;
@@ -3918,6 +4052,10 @@ void evergreen_init_state_functions(struct r600_context *rctx)
        rctx->atoms[id++] = &rctx->b.streamout.begin_atom;
        r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
        r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
+       r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0);
+       r600_init_atom(rctx, &rctx->export_shader.atom, id++, r600_emit_shader, 0);
+       r600_init_atom(rctx, &rctx->shader_stages.atom, id++, evergreen_emit_shader_stages, 6);
+       r600_init_atom(rctx, &rctx->gs_rings.atom, id++, evergreen_emit_gs_rings, 26);
 
        rctx->b.b.create_blend_state = evergreen_create_blend_state;
        rctx->b.b.create_depth_stencil_alpha_state = evergreen_create_dsa_state;
index 14ee2c210b5e1c18b9ac9c6e619e2569dcae4d87..899a8efd21d8338c16b5d1529ac90ec376205dfa 100644 (file)
@@ -1939,7 +1939,7 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
                                if (cf->end_of_program)
                                        fprintf(stderr, "EOP ");
                                fprintf(stderr, "\n");
-                       } else if (r600_isa_cf(cf->op)->flags & CF_STRM) {
+                       } else if (r600_isa_cf(cf->op)->flags & CF_MEM) {
                                int o = 0;
                                const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
                                                "WRITE_IND_ACK"};
index c2ae2f6b28e86fc9a041119b624bad3f572fd92c..6bb7cfe9b9ee2e88f0cd5ed2957efed5e94981da 100644 (file)
@@ -59,6 +59,7 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
        util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer_state.vb);
        util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_fetch_shader.cso);
        util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
+       util_blitter_save_geometry_shader(rctx->blitter, rctx->gs_shader);
        util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets,
                                     (struct pipe_stream_output_target**)rctx->b.streamout.targets);
        util_blitter_save_rasterizer(rctx->blitter, rctx->rasterizer_state.cso);
index 9b0c558c29e01113412497426ca653e873fa6e4c..fc81e95accdd74486eb943419e0314b2af7691cd 100644 (file)
@@ -301,6 +301,12 @@ void r600_begin_new_cs(struct r600_context *ctx)
        ctx->config_state.atom.dirty = true;
        ctx->stencil_ref.atom.dirty = true;
        ctx->vertex_fetch_shader.atom.dirty = true;
+       ctx->export_shader.atom.dirty = true;
+       if (ctx->gs_shader) {
+               ctx->geometry_shader.atom.dirty = true;
+               ctx->shader_stages.atom.dirty = true;
+               ctx->gs_rings.atom.dirty = true;
+       }
        ctx->vertex_shader.atom.dirty = true;
        ctx->viewport.atom.dirty = true;
 
index 49521e0eb5a6c21c8a47ab1030432465aa3140f2..6c80f85f9e3f1dfe803144eb22f3bbbb8fa5ec2e 100644 (file)
@@ -447,15 +447,18 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 
 static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
 {
+       struct r600_screen *rscreen = (struct r600_screen *)pscreen;
+
        switch(shader)
        {
        case PIPE_SHADER_FRAGMENT:
        case PIPE_SHADER_VERTEX:
-        case PIPE_SHADER_COMPUTE:
+       case PIPE_SHADER_COMPUTE:
                break;
        case PIPE_SHADER_GEOMETRY:
-               /* XXX: support and enable geometry programs */
-               return 0;
+               if (rscreen->b.chip_class < EVERGREEN)
+                       return 0;
+               break;
        default:
                /* XXX: support tessellation on Evergreen */
                return 0;
index 31e27f282e5967afbf50e42bb4b445e22c788827..2d2c79b84e3ce8fea6591bb5ac77100207cfc8fb 100644 (file)
@@ -38,7 +38,7 @@
 #include "util/u_double_list.h"
 #include "util/u_transfer.h"
 
-#define R600_NUM_ATOMS 41
+#define R600_NUM_ATOMS 42
 
 /* the number of CS dwords for flushing and drawing */
 #define R600_MAX_FLUSH_CS_DWORDS       16
 #define R600_TRACE_CS_DWORDS           7
 
 #define R600_MAX_USER_CONST_BUFFERS 13
-#define R600_MAX_DRIVER_CONST_BUFFERS 3
+#define R600_MAX_DRIVER_CONST_BUFFERS 4
 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
 
 /* start driver buffers after user buffers */
 #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
 #define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
 #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 3)
 
 #define R600_MAX_CONST_BUFFER_SIZE 4096
 
@@ -179,6 +180,18 @@ struct r600_viewport_state {
        struct pipe_viewport_state state;
 };
 
+struct r600_shader_stages_state {
+       struct r600_atom atom;
+       unsigned geom_enable;
+};
+
+struct r600_gs_rings_state {
+       struct r600_atom atom;
+       unsigned enable;
+       struct pipe_constant_buffer esgs_ring;
+       struct pipe_constant_buffer gsvs_ring;
+};
+
 /* This must start from 16. */
 /* features */
 #define DBG_NO_LLVM            (1 << 17)
@@ -353,7 +366,7 @@ struct r600_fetch_shader {
 
 struct r600_shader_state {
        struct r600_atom                atom;
-       struct r600_pipe_shader_selector *shader;
+       struct r600_pipe_shader *shader;
 };
 
 struct r600_context {
@@ -415,7 +428,11 @@ struct r600_context {
        struct r600_cso_state           vertex_fetch_shader;
        struct r600_shader_state        vertex_shader;
        struct r600_shader_state        pixel_shader;
+       struct r600_shader_state        geometry_shader;
+       struct r600_shader_state        export_shader;
        struct r600_cs_shader_state     cs_shader_state;
+       struct r600_shader_stages_state shader_stages;
+       struct r600_gs_rings_state      gs_rings;
        struct r600_constbuf_state      constbuf_state[PIPE_SHADER_TYPES];
        struct r600_textures_info       samplers[PIPE_SHADER_TYPES];
        /** Vertex buffers for fetch shaders */
@@ -427,6 +444,7 @@ struct r600_context {
        unsigned                        compute_cb_target_mask;
        struct r600_pipe_shader_selector *ps_shader;
        struct r600_pipe_shader_selector *vs_shader;
+       struct r600_pipe_shader_selector *gs_shader;
        struct r600_rasterizer_state    *rasterizer;
        bool                            alpha_to_one;
        bool                            force_blend_disable;
@@ -506,6 +524,8 @@ void cayman_init_common_regs(struct r600_command_buffer *cb,
 void evergreen_init_state_functions(struct r600_context *rctx);
 void evergreen_init_atom_start_cs(struct r600_context *rctx);
 void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx);
 void *evergreen_create_resolve_blend(struct r600_context *rctx);
index 32d2aa73bef203834857dcee6efbbaf3fc5202d9..560672468749991ce838c57f6b8b54e0e4320403 100644 (file)
@@ -60,7 +60,7 @@ issued in the w slot as well.
 The compiler must issue the source argument to slots z, y, and x
 */
 
-static int r600_shader_from_tgsi(struct r600_screen *rscreen,
+static int r600_shader_from_tgsi(struct r600_context *rctx,
                                 struct r600_pipe_shader *pipeshader,
                                 struct r600_shader_key key);
 
@@ -104,17 +104,43 @@ static void r600_dump_streamout(struct pipe_stream_output_info *so)
        }
 }
 
+static int store_shader(struct pipe_context *ctx,
+                       struct r600_pipe_shader *shader)
+{
+       struct r600_context *rctx = (struct r600_context *)ctx;
+       uint32_t *ptr, i;
+
+       if (shader->bo == NULL) {
+               shader->bo = (struct r600_resource*)
+                       pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4);
+               if (shader->bo == NULL) {
+                       return -ENOMEM;
+               }
+               ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);
+               if (R600_BIG_ENDIAN) {
+                       for (i = 0; i < shader->shader.bc.ndw; ++i) {
+                               ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]);
+                       }
+               } else {
+                       memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
+               }
+               rctx->b.ws->buffer_unmap(shader->bo->cs_buf);
+       }
+
+       return 0;
+}
+
 int r600_pipe_shader_create(struct pipe_context *ctx,
                            struct r600_pipe_shader *shader,
                            struct r600_shader_key key)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_pipe_shader_selector *sel = shader->selector;
-       int r, i;
-       uint32_t *ptr;
+       int r;
        bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
        unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
        unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
+       unsigned export_shader = key.vs_as_es;
 
        shader->shader.bc.isa = rctx->isa;
 
@@ -126,7 +152,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
                        r600_dump_streamout(&sel->so);
                }
        }
-       r = r600_shader_from_tgsi(rctx->screen, shader, key);
+       r = r600_shader_from_tgsi(rctx, shader, key);
        if (r) {
                R600_ERR("translation from TGSI failed !\n");
                return r;
@@ -157,29 +183,39 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
                }
        }
 
-       /* Store the shader in a buffer. */
-       if (shader->bo == NULL) {
-               shader->bo = (struct r600_resource*)
-                       pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4);
-               if (shader->bo == NULL) {
-                       return -ENOMEM;
-               }
-               ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);
-               if (R600_BIG_ENDIAN) {
-                       for (i = 0; i < shader->shader.bc.ndw; ++i) {
-                               ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]);
-                       }
-               } else {
-                       memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
+       if (shader->gs_copy_shader) {
+               if (dump) {
+                       // dump copy shader
+                       r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc,
+                                                    &shader->gs_copy_shader->shader, dump, 0);
+                       if (r)
+                               return r;
                }
-               rctx->b.ws->buffer_unmap(shader->bo->cs_buf);
+
+               if ((r = store_shader(ctx, shader->gs_copy_shader)))
+                       return r;
        }
 
+       /* Store the shader in a buffer. */
+       if ((r = store_shader(ctx, shader)))
+               return r;
+
        /* Build state. */
        switch (shader->shader.processor_type) {
+       case TGSI_PROCESSOR_GEOMETRY:
+               if (rctx->b.chip_class >= EVERGREEN) {
+                       evergreen_update_gs_state(ctx, shader);
+                       evergreen_update_vs_state(ctx, shader->gs_copy_shader);
+               } else {
+                       assert(!"not suported yet");
+               }
+               break;
        case TGSI_PROCESSOR_VERTEX:
                if (rctx->b.chip_class >= EVERGREEN) {
-                       evergreen_update_vs_state(ctx, shader);
+                       if (export_shader)
+                               evergreen_update_es_state(ctx, shader);
+                       else
+                               evergreen_update_vs_state(ctx, shader);
                } else {
                        r600_update_vs_state(ctx, shader);
                }
@@ -245,6 +281,9 @@ struct r600_shader_ctx {
        unsigned                cv_output;
        int                                     fragcoord_input;
        int                                     native_integers;
+       int                                     next_ring_offset;
+       int                                     gs_next_vertex;
+       struct r600_shader      *gs_for_vs;
 };
 
 struct r600_shader_tgsi_instruction {
@@ -254,6 +293,7 @@ struct r600_shader_tgsi_instruction {
        int (*process)(struct r600_shader_ctx *ctx);
 };
 
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx);
 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
 static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
@@ -285,7 +325,13 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
 #endif
        for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
                if (i->Src[j].Register.Dimension) {
-                  if (i->Src[j].Register.File != TGSI_FILE_CONSTANT) {
+                  switch (i->Src[j].Register.File) {
+                  case TGSI_FILE_CONSTANT:
+                          break;
+                  case TGSI_FILE_INPUT:
+                          if (ctx->type == TGSI_PROCESSOR_GEOMETRY)
+                                  break;
+                  default:
                           R600_ERR("unsupported src %d (dimension %d)\n", j,
                                    i->Src[j].Register.Dimension);
                           return -EINVAL;
@@ -536,6 +582,10 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                                if ((r = evergreen_interp_input(ctx, i)))
                                        return r;
                        }
+               } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+                       /* FIXME probably skip inputs if they aren't passed in the ring */
+                       ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
+                       ctx->next_ring_offset += 16;
                }
                for (j = 1; j < count; ++j) {
                        ctx->shader->input[i + j] = ctx->shader->input[i];
@@ -550,7 +600,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
                ctx->shader->output[i].interpolate = d->Interp.Interpolate;
                ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
-               if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+               if (ctx->type == TGSI_PROCESSOR_VERTEX ||
+                               ctx->type == TGSI_PROCESSOR_GEOMETRY) {
                        ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
                        switch (d->Semantic.Name) {
                        case TGSI_SEMANTIC_CLIPDIST:
@@ -773,6 +824,59 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int cb_idx
        return 0;
 }
 
+static int fetch_gs_input(struct r600_shader_ctx *ctx, unsigned index, unsigned vtx_id, unsigned int dst_reg)
+{
+       struct r600_bytecode_vtx vtx;
+       int r;
+       int offset_reg = vtx_id / 3;
+       int offset_chan = vtx_id % 3;
+
+       /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y,
+        * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */
+
+       if (offset_reg == 0 && offset_chan == 2)
+               offset_chan = 3;
+
+       memset(&vtx, 0, sizeof(vtx));
+       vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
+       vtx.fetch_type = 2;             /* VTX_FETCH_NO_INDEX_OFFSET */
+       vtx.src_gpr = offset_reg;
+       vtx.src_sel_x = offset_chan;
+       vtx.offset = index * 16; /*bytes*/
+       vtx.mega_fetch_count = 16;
+       vtx.dst_gpr = dst_reg;
+       vtx.dst_sel_x = 0;              /* SEL_X */
+       vtx.dst_sel_y = 1;              /* SEL_Y */
+       vtx.dst_sel_z = 2;              /* SEL_Z */
+       vtx.dst_sel_w = 3;              /* SEL_W */
+       vtx.use_const_fields = 1;
+
+       if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
+               return r;
+
+       return 0;
+}
+
+static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       int i;
+
+       for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+               struct tgsi_full_src_register *src = &inst->Src[i];
+
+               if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) {
+                       int treg = r600_get_temp(ctx);
+                       int index = src->Register.Index;
+                       int vtx_id = src->Dimension.Index;
+
+                       fetch_gs_input(ctx, index, vtx_id, treg);
+                       ctx->src[i].sel = treg;
+               }
+       }
+       return 0;
+}
+
 static int tgsi_split_constant(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -983,10 +1087,247 @@ out_err:
        return r;
 }
 
-static int r600_shader_from_tgsi(struct r600_screen *rscreen,
+static int generate_gs_copy_shader(struct r600_context *rctx,
+                                  struct r600_pipe_shader *gs)
+{
+       struct r600_shader_ctx ctx = {};
+       struct r600_shader *gs_shader = &gs->shader;
+       struct r600_pipe_shader *cshader;
+       int ocnt = gs_shader->noutput;
+       struct r600_bytecode_alu alu;
+       struct r600_bytecode_vtx vtx;
+       struct r600_bytecode_output output;
+       struct r600_bytecode_cf *cf_jump, *cf_pop,
+               *last_exp_pos = NULL, *last_exp_param = NULL;
+       int i, next_pos = 60, next_param = 0;
+
+       cshader = calloc(1, sizeof(struct r600_pipe_shader));
+       if (!cshader)
+               return 0;
+
+       memcpy(cshader->shader.output, gs_shader->output, ocnt *
+              sizeof(struct r600_shader_io));
+
+       cshader->shader.noutput = ocnt;
+
+       ctx.shader = &cshader->shader;
+       ctx.bc = &ctx.shader->bc;
+       ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX;
+
+       r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family,
+                          rctx->screen->has_compressed_msaa_texturing);
+
+       ctx.bc->isa = rctx->isa;
+
+       /* R0.x = R0.x & 0x3fffffff */
+       memset(&alu, 0, sizeof(alu));
+       alu.op = ALU_OP2_AND_INT;
+       alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+       alu.src[1].value = 0x3fffffff;
+       alu.dst.write = 1;
+       r600_bytecode_add_alu(ctx.bc, &alu);
+
+       /* R0.y = R0.x >> 30 */
+       memset(&alu, 0, sizeof(alu));
+       alu.op = ALU_OP2_LSHR_INT;
+       alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+       alu.src[1].value = 0x1e;
+       alu.dst.chan = 1;
+       alu.dst.write = 1;
+       alu.last = 1;
+       r600_bytecode_add_alu(ctx.bc, &alu);
+
+       /* PRED_SETE_INT __, R0.y, 0 */
+       memset(&alu, 0, sizeof(alu));
+       alu.op = ALU_OP2_PRED_SETE_INT;
+       alu.src[0].chan = 1;
+       alu.src[1].sel = V_SQ_ALU_SRC_0;
+       alu.execute_mask = 1;
+       alu.update_pred = 1;
+       alu.last = 1;
+       r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+
+       r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP);
+       cf_jump = ctx.bc->cf_last;
+
+       /* fetch vertex data from GSVS ring */
+       for (i = 0; i < ocnt; ++i) {
+               struct r600_shader_io *out = &ctx.shader->output[i];
+               out->gpr = i + 1;
+               out->ring_offset = i * 16;
+
+               memset(&vtx, 0, sizeof(vtx));
+               vtx.op = FETCH_OP_VFETCH;
+               vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
+               vtx.fetch_type = 2;
+               vtx.offset = out->ring_offset;
+               vtx.dst_gpr = out->gpr;
+               vtx.dst_sel_x = 0;
+               vtx.dst_sel_y = 1;
+               vtx.dst_sel_z = 2;
+               vtx.dst_sel_w = 3;
+               vtx.use_const_fields = 1;
+
+               r600_bytecode_add_vtx(ctx.bc, &vtx);
+       }
+
+       /* XXX handle clipvertex, streamout? */
+
+       /* export vertex data */
+       /* XXX factor out common code with r600_shader_from_tgsi ? */
+       for (i = 0; i < ocnt; ++i) {
+               struct r600_shader_io *out = &ctx.shader->output[i];
+
+               if (out->name == TGSI_SEMANTIC_CLIPVERTEX)
+                       continue;
+
+               memset(&output, 0, sizeof(output));
+               output.gpr = out->gpr;
+               output.elem_size = 3;
+               output.swizzle_x = 0;
+               output.swizzle_y = 1;
+               output.swizzle_z = 2;
+               output.swizzle_w = 3;
+               output.burst_count = 1;
+               output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+               output.op = CF_OP_EXPORT;
+               switch (out->name) {
+               case TGSI_SEMANTIC_POSITION:
+                       output.array_base = next_pos++;
+                       output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+                       break;
+
+               case TGSI_SEMANTIC_PSIZE:
+                       output.array_base = next_pos++;
+                       output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+                       break;
+               case TGSI_SEMANTIC_CLIPDIST:
+                       /* spi_sid is 0 for clipdistance outputs that were generated
+                        * for clipvertex - we don't need to pass them to PS */
+                       if (out->spi_sid) {
+                               /* duplicate it as PARAM to pass to the pixel shader */
+                               output.array_base = next_param++;
+                               r600_bytecode_add_output(ctx.bc, &output);
+                               last_exp_param = ctx.bc->cf_last;
+                       }
+                       output.array_base = next_pos++;
+                       output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+                       break;
+               case TGSI_SEMANTIC_FOG:
+                       output.swizzle_y = 4; /* 0 */
+                       output.swizzle_z = 4; /* 0 */
+                       output.swizzle_w = 5; /* 1 */
+                       break;
+               }
+               r600_bytecode_add_output(ctx.bc, &output);
+               if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM)
+                       last_exp_param = ctx.bc->cf_last;
+               else
+                       last_exp_pos = ctx.bc->cf_last;
+       }
+
+       if (!last_exp_pos) {
+               memset(&output, 0, sizeof(output));
+               output.gpr = 0;
+               output.elem_size = 3;
+               output.swizzle_x = 7;
+               output.swizzle_y = 7;
+               output.swizzle_z = 7;
+               output.swizzle_w = 7;
+               output.burst_count = 1;
+               output.type = 2;
+               output.op = CF_OP_EXPORT;
+               output.array_base = next_pos++;
+               output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+               r600_bytecode_add_output(ctx.bc, &output);
+               last_exp_pos = ctx.bc->cf_last;
+       }
+
+       if (!last_exp_param) {
+               memset(&output, 0, sizeof(output));
+               output.gpr = 0;
+               output.elem_size = 3;
+               output.swizzle_x = 7;
+               output.swizzle_y = 7;
+               output.swizzle_z = 7;
+               output.swizzle_w = 7;
+               output.burst_count = 1;
+               output.type = 2;
+               output.op = CF_OP_EXPORT;
+               output.array_base = next_param++;
+               output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+               r600_bytecode_add_output(ctx.bc, &output);
+               last_exp_param = ctx.bc->cf_last;
+       }
+
+       last_exp_pos->op = CF_OP_EXPORT_DONE;
+       last_exp_param->op = CF_OP_EXPORT_DONE;
+
+       r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP);
+       cf_pop = ctx.bc->cf_last;
+
+       cf_jump->cf_addr = cf_pop->id + 2;
+       cf_jump->pop_count = 1;
+       cf_pop->cf_addr = cf_pop->id + 2;
+       cf_pop->pop_count = 1;
+
+       r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+       ctx.bc->cf_last->end_of_program = 1;
+
+       gs->gs_copy_shader = cshader;
+
+       ctx.bc->nstack = 1;
+       cshader->shader.ring_item_size = ocnt * 16;
+
+       return r600_bytecode_build(ctx.bc);
+}
+
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx)
+{
+       struct r600_bytecode_output output;
+       int i, k, ring_offset;
+
+       for (i = 0; i < ctx->shader->noutput; i++) {
+               if (ctx->gs_for_vs) {
+                       /* for ES we need to lookup corresponding ring offset expected by GS
+                        * (map this output to GS input by name and sid) */
+                       /* FIXME precompute offsets */
+                       ring_offset = -1;
+                       for(k = 0; k < ctx->gs_for_vs->ninput; ++k) {
+                               struct r600_shader_io *in = &ctx->gs_for_vs->input[k];
+                               struct r600_shader_io *out = &ctx->shader->output[i];
+                               if (in->name == out->name && in->sid == out->sid)
+                                       ring_offset = in->ring_offset;
+                       }
+                       if (ring_offset == -1) {
+                               R600_ERR("error mapping VS->GS outputs\n");
+                               return -1;
+                       }
+               } else
+                       ring_offset = i * 16;
+
+               /* next_ring_offset after parsing input decls contains total size of
+                * single vertex data, gs_next_vertex - current vertex index */
+               ring_offset += ctx->next_ring_offset * ctx->gs_next_vertex;
+
+               memset(&output, 0, sizeof(struct r600_bytecode_output));
+               output.gpr = ctx->shader->output[i].gpr;
+               output.elem_size = 3;
+               output.comp_mask = 0xF;
+               output.burst_count = 1;
+               output.op = CF_OP_MEM_RING;
+               output.array_base = ring_offset >> 2; /* in dwords */
+               r600_bytecode_add_output(ctx->bc, &output);
+       }
+       ++ctx->gs_next_vertex;
+       return 0;
+}
+
+static int r600_shader_from_tgsi(struct r600_context *rctx,
                                 struct r600_pipe_shader *pipeshader,
                                 struct r600_shader_key key)
 {
+       struct r600_screen *rscreen = rctx->screen;
        struct r600_shader *shader = &pipeshader->shader;
        struct tgsi_token *tokens = pipeshader->selector->tokens;
        struct pipe_stream_output_info so = pipeshader->selector->so;
@@ -1002,6 +1343,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        /* Declarations used by llvm code */
        bool use_llvm = false;
        bool indirect_gprs;
+       bool ring_outputs = false;
 
 #ifdef R600_USE_LLVM
        use_llvm = !(rscreen->b.debug_flags & DBG_NO_LLVM);
@@ -1010,6 +1352,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        ctx.shader = shader;
        ctx.native_integers = true;
 
+       shader->vs_as_es = key.vs_as_es;
+
        r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
                           rscreen->has_compressed_msaa_texturing);
        ctx.tokens = tokens;
@@ -1021,6 +1365,17 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        shader->processor_type = ctx.type;
        ctx.bc->type = shader->processor_type;
 
+       ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+
+       if (key.vs_as_es) {
+               ctx.gs_for_vs = &rctx->gs_shader->current->shader;
+       } else {
+               ctx.gs_for_vs = NULL;
+       }
+
+       ctx.next_ring_offset = 0;
+       ctx.gs_next_vertex = 0;
+
        ctx.face_gpr = -1;
        ctx.fragcoord_input = -1;
        ctx.colors_used = 0;
@@ -1073,6 +1428,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
                ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
        }
+       if (ctx.type == TGSI_PROCESSOR_GEOMETRY && ctx.bc->chip_class >= EVERGREEN) {
+               /* FIXME 1 would be enough in some cases (3 or less input vertices) */
+               ctx.file_offset[TGSI_FILE_INPUT] = 2;
+       }
        ctx.use_llvm = use_llvm;
 
        if (use_llvm) {
@@ -1149,6 +1508,15 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                        case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
                                /* we don't need this one */
                                break;
+                       case TGSI_PROPERTY_GS_INPUT_PRIM:
+                               shader->gs_input_prim = property->u[0].Data;
+                               break;
+                       case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+                               shader->gs_output_prim = property->u[0].Data;
+                               break;
+                       case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+                               shader->gs_max_out_vertices = property->u[0].Data;
+                               break;
                        }
                        break;
                default:
@@ -1158,6 +1526,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                }
        }
        
+       shader->ring_item_size = ctx.next_ring_offset;
+
        /* Process two side if needed */
        if (shader->two_side && ctx.colors_used) {
                int i, count = ctx.shader->ninput;
@@ -1298,6 +1668,9 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                        goto out_err;
                                if ((r = tgsi_split_literal_constant(&ctx)))
                                        goto out_err;
+                               if (ctx.type == TGSI_PROCESSOR_GEOMETRY)
+                                       if ((r = tgsi_split_gs_inputs(&ctx)))
+                                               goto out_err;
                                if (ctx.bc->chip_class == CAYMAN)
                                        ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
                                else if (ctx.bc->chip_class >= EVERGREEN)
@@ -1319,7 +1692,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 
        noutput = shader->noutput;
 
-       if (ctx.clip_vertex_write) {
+       if (!ring_outputs && ctx.clip_vertex_write) {
                unsigned clipdist_temp[2];
 
                clipdist_temp[0] = r600_get_temp(&ctx);
@@ -1370,117 +1743,122 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        }
 
        /* Add stream outputs. */
-       if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm)
+       if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX &&
+           so.num_outputs && !use_llvm)
                emit_streamout(&ctx, &so);
 
-       /* export output */
-       for (i = 0, j = 0; i < noutput; i++, j++) {
-               memset(&output[j], 0, sizeof(struct r600_bytecode_output));
-               output[j].gpr = shader->output[i].gpr;
-               output[j].elem_size = 3;
-               output[j].swizzle_x = 0;
-               output[j].swizzle_y = 1;
-               output[j].swizzle_z = 2;
-               output[j].swizzle_w = 3;
-               output[j].burst_count = 1;
-               output[j].type = -1;
-               output[j].op = CF_OP_EXPORT;
-               switch (ctx.type) {
-               case TGSI_PROCESSOR_VERTEX:
-                       switch (shader->output[i].name) {
-                       case TGSI_SEMANTIC_POSITION:
-                               output[j].array_base = next_pos_base++;
-                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
-                               break;
+       if (ring_outputs) {
+               if (key.vs_as_es)
+                       emit_gs_ring_writes(&ctx);
+       } else {
+               /* export output */
+               for (i = 0, j = 0; i < noutput; i++, j++) {
+                       memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+                       output[j].gpr = shader->output[i].gpr;
+                       output[j].elem_size = 3;
+                       output[j].swizzle_x = 0;
+                       output[j].swizzle_y = 1;
+                       output[j].swizzle_z = 2;
+                       output[j].swizzle_w = 3;
+                       output[j].burst_count = 1;
+                       output[j].type = -1;
+                       output[j].op = CF_OP_EXPORT;
+                       switch (ctx.type) {
+                       case TGSI_PROCESSOR_VERTEX:
+                               switch (shader->output[i].name) {
+                               case TGSI_SEMANTIC_POSITION:
+                                       output[j].array_base = next_pos_base++;
+                                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+                                       break;
 
-                       case TGSI_SEMANTIC_PSIZE:
-                               output[j].array_base = next_pos_base++;
-                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
-                               break;
-                       case TGSI_SEMANTIC_CLIPVERTEX:
-                               j--;
-                               break;
-                       case TGSI_SEMANTIC_CLIPDIST:
-                               output[j].array_base = next_pos_base++;
-                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
-                               /* spi_sid is 0 for clipdistance outputs that were generated
-                                * for clipvertex - we don't need to pass them to PS */
-                               if (shader->output[i].spi_sid) {
-                                       j++;
-                                       /* duplicate it as PARAM to pass to the pixel shader */
-                                       memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
-                                       output[j].array_base = next_param_base++;
-                                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
-                               }
-                               break;
-                       case TGSI_SEMANTIC_FOG:
-                               output[j].swizzle_y = 4; /* 0 */
-                               output[j].swizzle_z = 4; /* 0 */
-                               output[j].swizzle_w = 5; /* 1 */
-                               break;
-                       }
-                       break;
-               case TGSI_PROCESSOR_FRAGMENT:
-                       if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
-                               /* never export more colors than the number of CBs */
-                               if (shader->output[i].sid >= max_color_exports) {
-                                       /* skip export */
+                               case TGSI_SEMANTIC_PSIZE:
+                                       output[j].array_base = next_pos_base++;
+                                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+                                       break;
+                               case TGSI_SEMANTIC_CLIPVERTEX:
                                        j--;
-                                       continue;
-                               }
-                               output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
-                               output[j].array_base = shader->output[i].sid;
-                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
-                               shader->nr_ps_color_exports++;
-                               if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) {
-                                       for (k = 1; k < max_color_exports; k++) {
+                                       break;
+                               case TGSI_SEMANTIC_CLIPDIST:
+                                       output[j].array_base = next_pos_base++;
+                                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+                                       /* spi_sid is 0 for clipdistance outputs that were generated
+                                        * for clipvertex - we don't need to pass them to PS */
+                                       if (shader->output[i].spi_sid) {
                                                j++;
-                                               memset(&output[j], 0, sizeof(struct r600_bytecode_output));
-                                               output[j].gpr = shader->output[i].gpr;
-                                               output[j].elem_size = 3;
-                                               output[j].swizzle_x = 0;
-                                               output[j].swizzle_y = 1;
-                                               output[j].swizzle_z = 2;
-                                               output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
-                                               output[j].burst_count = 1;
-                                               output[j].array_base = k;
-                                               output[j].op = CF_OP_EXPORT;
-                                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
-                                               shader->nr_ps_color_exports++;
+                                               /* duplicate it as PARAM to pass to the pixel shader */
+                                               memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
+                                               output[j].array_base = next_param_base++;
+                                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
                                        }
+                                       break;
+                               case TGSI_SEMANTIC_FOG:
+                                       output[j].swizzle_y = 4; /* 0 */
+                                       output[j].swizzle_z = 4; /* 0 */
+                                       output[j].swizzle_w = 5; /* 1 */
+                                       break;
                                }
-                       } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
-                               output[j].array_base = 61;
-                               output[j].swizzle_x = 2;
-                               output[j].swizzle_y = 7;
-                               output[j].swizzle_z = output[j].swizzle_w = 7;
-                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
-                       } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
-                               output[j].array_base = 61;
-                               output[j].swizzle_x = 7;
-                               output[j].swizzle_y = 1;
-                               output[j].swizzle_z = output[j].swizzle_w = 7;
-                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
-                       } else {
-                               R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
+                               break;
+                       case TGSI_PROCESSOR_FRAGMENT:
+                               if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
+                                       /* never export more colors than the number of CBs */
+                                       if (shader->output[i].sid >= max_color_exports) {
+                                               /* skip export */
+                                               j--;
+                                               continue;
+                                       }
+                                       output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+                                       output[j].array_base = shader->output[i].sid;
+                                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                                       shader->nr_ps_color_exports++;
+                                       if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) {
+                                               for (k = 1; k < max_color_exports; k++) {
+                                                       j++;
+                                                       memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+                                                       output[j].gpr = shader->output[i].gpr;
+                                                       output[j].elem_size = 3;
+                                                       output[j].swizzle_x = 0;
+                                                       output[j].swizzle_y = 1;
+                                                       output[j].swizzle_z = 2;
+                                                       output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
+                                                       output[j].burst_count = 1;
+                                                       output[j].array_base = k;
+                                                       output[j].op = CF_OP_EXPORT;
+                                                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                                                       shader->nr_ps_color_exports++;
+                                               }
+                                       }
+                               } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
+                                       output[j].array_base = 61;
+                                       output[j].swizzle_x = 2;
+                                       output[j].swizzle_y = 7;
+                                       output[j].swizzle_z = output[j].swizzle_w = 7;
+                                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                               } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) {
+                                       output[j].array_base = 61;
+                                       output[j].swizzle_x = 7;
+                                       output[j].swizzle_y = 1;
+                                       output[j].swizzle_z = output[j].swizzle_w = 7;
+                                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                               } else {
+                                       R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
+                                       r = -EINVAL;
+                                       goto out_err;
+                               }
+                               break;
+                       default:
+                               R600_ERR("unsupported processor type %d\n", ctx.type);
                                r = -EINVAL;
                                goto out_err;
                        }
-                       break;
-               default:
-                       R600_ERR("unsupported processor type %d\n", ctx.type);
-                       r = -EINVAL;
-                       goto out_err;
-               }
 
-               if (output[j].type==-1) {
-                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
-                       output[j].array_base = next_param_base++;
+                       if (output[j].type==-1) {
+                               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+                               output[j].array_base = next_param_base++;
+                       }
                }
-       }
 
-        /* add fake position export */
-       if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) {
+               /* add fake position export */
+               if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) {
                        memset(&output[j], 0, sizeof(struct r600_bytecode_output));
                        output[j].gpr = 0;
                        output[j].elem_size = 3;
@@ -1493,10 +1871,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                        output[j].array_base = next_pos_base;
                        output[j].op = CF_OP_EXPORT;
                        j++;
-       }
+               }
 
-       /* add fake param output for vertex shader if no param is exported */
-       if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
+               /* add fake param output for vertex shader if no param is exported */
+               if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) {
                        memset(&output[j], 0, sizeof(struct r600_bytecode_output));
                        output[j].gpr = 0;
                        output[j].elem_size = 3;
@@ -1509,39 +1887,40 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                        output[j].array_base = 0;
                        output[j].op = CF_OP_EXPORT;
                        j++;
-       }
+               }
+
+               /* add fake pixel export */
+               if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) {
+                       memset(&output[j], 0, sizeof(struct r600_bytecode_output));
+                       output[j].gpr = 0;
+                       output[j].elem_size = 3;
+                       output[j].swizzle_x = 7;
+                       output[j].swizzle_y = 7;
+                       output[j].swizzle_z = 7;
+                       output[j].swizzle_w = 7;
+                       output[j].burst_count = 1;
+                       output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                       output[j].array_base = 0;
+                       output[j].op = CF_OP_EXPORT;
+                       j++;
+               }
+
+               noutput = j;
 
-       /* add fake pixel export */
-       if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) {
-               memset(&output[j], 0, sizeof(struct r600_bytecode_output));
-               output[j].gpr = 0;
-               output[j].elem_size = 3;
-               output[j].swizzle_x = 7;
-               output[j].swizzle_y = 7;
-               output[j].swizzle_z = 7;
-               output[j].swizzle_w = 7;
-               output[j].burst_count = 1;
-               output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
-               output[j].array_base = 0;
-               output[j].op = CF_OP_EXPORT;
-               j++;
-       }
-
-       noutput = j;
-
-       /* set export done on last export of each type */
-       for (i = noutput - 1, output_done = 0; i >= 0; i--) {
-               if (!(output_done & (1 << output[i].type))) {
-                       output_done |= (1 << output[i].type);
-                       output[i].op = CF_OP_EXPORT_DONE;
+               /* set export done on last export of each type */
+               for (i = noutput - 1, output_done = 0; i >= 0; i--) {
+                       if (!(output_done & (1 << output[i].type))) {
+                               output_done |= (1 << output[i].type);
+                               output[i].op = CF_OP_EXPORT_DONE;
+                       }
                }
-       }
-       /* add output to bytecode */
-       if (!use_llvm) {
-               for (i = 0; i < noutput; i++) {
-                       r = r600_bytecode_add_output(ctx.bc, &output[i]);
-                       if (r)
-                               goto out_err;
+               /* add output to bytecode */
+               if (!use_llvm) {
+                       for (i = 0; i < noutput; i++) {
+                               r = r600_bytecode_add_output(ctx.bc, &output[i]);
+                               if (r)
+                                       goto out_err;
+                       }
                }
        }
 
@@ -1552,7 +1931,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                else {
                        const struct cf_op_info *last = r600_isa_cf(ctx.bc->cf_last->op);
 
-                       if (last->flags & CF_CLAUSE)
+                       /* alu clause instructions don't have EOP bit, so add NOP */
+                       if (last->flags & CF_ALU)
                                r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
 
                        ctx.bc->cf_last->end_of_program = 1;
@@ -1567,6 +1947,11 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                goto out_err;
        }
 
+       if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+               if ((r = generate_gs_copy_shader(rctx, pipeshader)))
+                       return r;
+       }
+
        free(ctx.literals);
        tgsi_parse_free(&ctx.parse);
        return 0;
@@ -5561,6 +5946,14 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
+{
+       if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
+               emit_gs_ring_writes(ctx);
+
+       return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
+}
+
 static int tgsi_umad(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -5934,8 +6327,8 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_TXF,       0, FETCH_OP_LD, tgsi_tex},
        {TGSI_OPCODE_TXQ,       0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
        {TGSI_OPCODE_CONT,      0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
-       {TGSI_OPCODE_EMIT,      0, ALU_OP0_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_ENDPRIM,   0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_EMIT,      0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+       {TGSI_OPCODE_ENDPRIM,   0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
        {TGSI_OPCODE_BGNLOOP,   0, ALU_OP0_NOP, tgsi_bgnloop},
        {TGSI_OPCODE_BGNSUB,    0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_ENDLOOP,   0, ALU_OP0_NOP, tgsi_endloop},
@@ -6126,8 +6519,8 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_TXF,       0, FETCH_OP_LD, tgsi_tex},
        {TGSI_OPCODE_TXQ,       0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
        {TGSI_OPCODE_CONT,      0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
-       {TGSI_OPCODE_EMIT,      0, ALU_OP0_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_ENDPRIM,   0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_EMIT,      0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+       {TGSI_OPCODE_ENDPRIM,   0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
        {TGSI_OPCODE_BGNLOOP,   0, ALU_OP0_NOP, tgsi_bgnloop},
        {TGSI_OPCODE_BGNSUB,    0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_ENDLOOP,   0, ALU_OP0_NOP, tgsi_endloop},
index d989ce436497e64311310b8ed9eaee3a495019fa..0bf8b51fff322c0012dd92a747b29e7a8ad2ab8a 100644 (file)
@@ -37,6 +37,7 @@ struct r600_shader_io {
        unsigned                lds_pos; /* for evergreen */
        unsigned                back_color_input;
        unsigned                write_mask;
+       int                             ring_offset;
 };
 
 struct r600_shader {
@@ -64,9 +65,17 @@ struct r600_shader {
        boolean                 has_txq_cube_array_z_comp;
        boolean                 uses_tex_buffers;
 
+       /* geometry shader properties */
+       unsigned                gs_input_prim;
+       unsigned                gs_output_prim;
+       unsigned                gs_max_out_vertices;
+       /* size in bytes of a data item in the ring (single vertex data) */
+       unsigned                ring_item_size;
+
        unsigned                indirect_files;
        unsigned                max_arrays;
        unsigned                num_arrays;
+       unsigned                vs_as_es;
        struct r600_shader_array * arrays;
 };
 
@@ -74,6 +83,7 @@ struct r600_shader_key {
        unsigned color_two_side:1;
        unsigned alpha_to_one:1;
        unsigned nr_cbufs:4;
+       unsigned vs_as_es:1;
 };
 
 struct r600_shader_array {
@@ -85,6 +95,8 @@ struct r600_shader_array {
 struct r600_pipe_shader {
        struct r600_pipe_shader_selector *selector;
        struct r600_pipe_shader *next_variant;
+       /* for GS - corresponding copy shader (installed as VS) */
+       struct r600_pipe_shader *gs_copy_shader;
        struct r600_shader      shader;
        struct r600_command_buffer command_buffer; /* register writes */
        struct r600_resource    *bo;
index c05b74d42cd2bd3d4260f9b54af26fa32535eb14..3221a8e24e84ef0f5f69804e9d001b4e1da26203 100644 (file)
@@ -693,6 +693,8 @@ static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_contex
                /* Dual-source blending only makes sense with nr_cbufs == 1. */
                if (key.nr_cbufs == 1 && rctx->dual_src_blend)
                        key.nr_cbufs = 2;
+       } else if (sel->type == PIPE_SHADER_VERTEX) {
+               key.vs_as_es = (rctx->gs_shader != NULL);
        }
        return key;
 }
@@ -792,6 +794,12 @@ static void *r600_create_vs_state(struct pipe_context *ctx,
        return r600_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
 }
 
+static void *r600_create_gs_state(struct pipe_context *ctx,
+                                        const struct pipe_shader_state *state)
+{
+       return r600_create_shader_state(ctx, state, PIPE_SHADER_GEOMETRY);
+}
+
 static void r600_bind_ps_state(struct pipe_context *ctx, void *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
@@ -813,6 +821,13 @@ static void r600_bind_vs_state(struct pipe_context *ctx, void *state)
        rctx->b.streamout.stride_in_dw = rctx->vs_shader->so.stride;
 }
 
+static void r600_bind_gs_state(struct pipe_context *ctx, void *state)
+{
+       struct r600_context *rctx = (struct r600_context *)ctx;
+
+       rctx->gs_shader = (struct r600_pipe_shader_selector *)state;
+}
+
 static void r600_delete_shader_selector(struct pipe_context *ctx,
                struct r600_pipe_shader_selector *sel)
 {
@@ -853,6 +868,20 @@ static void r600_delete_vs_state(struct pipe_context *ctx, void *state)
        r600_delete_shader_selector(ctx, sel);
 }
 
+
+static void r600_delete_gs_state(struct pipe_context *ctx, void *state)
+{
+       struct r600_context *rctx = (struct r600_context *)ctx;
+       struct r600_pipe_shader_selector *sel = (struct r600_pipe_shader_selector *)state;
+
+       if (rctx->gs_shader == sel) {
+               rctx->gs_shader = NULL;
+       }
+
+       r600_delete_shader_selector(ctx, sel);
+}
+
+
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state)
 {
        if (state->dirty_mask) {
@@ -1046,10 +1075,65 @@ static void r600_setup_txq_cube_array_constants(struct r600_context *rctx, int s
        pipe_resource_reference(&cb.buffer, NULL);
 }
 
+static void update_shader_atom(struct pipe_context *ctx,
+                              struct r600_shader_state *state,
+                              struct r600_pipe_shader *shader)
+{
+       state->shader = shader;
+       if (shader) {
+               state->atom.num_dw = shader->command_buffer.num_dw;
+               state->atom.dirty = true;
+               r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo);
+       } else {
+               state->atom.num_dw = 0;
+               state->atom.dirty = false;
+       }
+}
+
+static void update_gs_block_state(struct r600_context *rctx, unsigned enable)
+{
+       if (rctx->shader_stages.geom_enable != enable) {
+               rctx->shader_stages.geom_enable = enable;
+               rctx->shader_stages.atom.dirty = true;
+       }
+
+       if (rctx->gs_rings.enable != enable) {
+               rctx->gs_rings.enable = enable;
+               rctx->gs_rings.atom.dirty = true;
+
+               if (enable && !rctx->gs_rings.esgs_ring.buffer) {
+                       unsigned size = 0x1C000;
+                       rctx->gs_rings.esgs_ring.buffer =
+                                       pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM,
+                                                       PIPE_USAGE_STATIC, size);
+                       rctx->gs_rings.esgs_ring.buffer_size = size;
+
+                       size = 0x4000000;
+
+                       rctx->gs_rings.gsvs_ring.buffer =
+                                       pipe_buffer_create(rctx->b.b.screen, PIPE_BIND_CUSTOM,
+                                                       PIPE_USAGE_STATIC, size);
+                       rctx->gs_rings.gsvs_ring.buffer_size = size;
+               }
+
+               if (enable) {
+                       r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY,
+                                       R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.esgs_ring);
+                       r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
+                                       R600_GS_RING_CONST_BUFFER, &rctx->gs_rings.gsvs_ring);
+               } else {
+                       r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_GEOMETRY,
+                                       R600_GS_RING_CONST_BUFFER, NULL);
+                       r600_set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
+                                       R600_GS_RING_CONST_BUFFER, NULL);
+               }
+       }
+}
+
 static bool r600_update_derived_state(struct r600_context *rctx)
 {
        struct pipe_context * ctx = (struct pipe_context*)rctx;
-       bool ps_dirty = false, vs_dirty = false;
+       bool ps_dirty = false, vs_dirty = false, gs_dirty = false;
        bool blend_disable;
 
        if (!rctx->blitter->running) {
@@ -1067,22 +1151,54 @@ static bool r600_update_derived_state(struct r600_context *rctx)
                }
        }
 
-       if (unlikely(rctx->vertex_shader.shader != rctx->vs_shader)) {
+       update_gs_block_state(rctx, rctx->gs_shader != NULL);
+
+       if (rctx->gs_shader) {
+               r600_shader_select(ctx, rctx->gs_shader, &gs_dirty);
+               if (unlikely(!rctx->gs_shader->current))
+                       return false;
+
+               if (rctx->b.chip_class >= EVERGREEN && !rctx->shader_stages.geom_enable) {
+                       rctx->shader_stages.geom_enable = true;
+                       rctx->shader_stages.atom.dirty = true;
+               }
+
+               /* gs_shader provides GS and VS (copy shader) */
+               if (unlikely(rctx->geometry_shader.shader != rctx->gs_shader->current)) {
+                       update_shader_atom(ctx, &rctx->geometry_shader, rctx->gs_shader->current);
+                       update_shader_atom(ctx, &rctx->vertex_shader, rctx->gs_shader->current->gs_copy_shader);
+               }
+
                r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
+               if (unlikely(!rctx->vs_shader->current))
+                       return false;
+
+               /* vs_shader is used as ES */
+               if (unlikely(vs_dirty || rctx->export_shader.shader != rctx->vs_shader->current)) {
+                       update_shader_atom(ctx, &rctx->export_shader, rctx->vs_shader->current);
+               }
+       } else {
+               if (unlikely(rctx->geometry_shader.shader)) {
+                       update_shader_atom(ctx, &rctx->geometry_shader, NULL);
+                       update_shader_atom(ctx, &rctx->export_shader, NULL);
+                       rctx->shader_stages.geom_enable = false;
+                       rctx->shader_stages.atom.dirty = true;
+               }
 
+               r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
                if (unlikely(!rctx->vs_shader->current))
                        return false;
 
-               rctx->vertex_shader.shader = rctx->vs_shader;
-               rctx->vertex_shader.atom.dirty = true;
-               r600_context_add_resource_size(ctx, (struct pipe_resource *)rctx->vs_shader->current->bo);
+               if (unlikely(vs_dirty || rctx->vertex_shader.shader != rctx->vs_shader->current)) {
+                       update_shader_atom(ctx, &rctx->vertex_shader, rctx->vs_shader->current);
 
-               /* Update clip misc state. */
-               if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
-                               rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
-                       rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl;
-                       rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write;
-                       rctx->clip_misc_state.atom.dirty = true;
+                       /* Update clip misc state. */
+                       if (rctx->vs_shader->current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
+                                       rctx->vs_shader->current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write) {
+                               rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl;
+                               rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write;
+                               rctx->clip_misc_state.atom.dirty = true;
+                       }
                }
        }
 
@@ -1090,7 +1206,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
        if (unlikely(!rctx->ps_shader->current))
                return false;
 
-       if (unlikely(ps_dirty || rctx->pixel_shader.shader != rctx->ps_shader)) {
+       if (unlikely(ps_dirty || rctx->pixel_shader.shader != rctx->ps_shader->current)) {
 
                if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
                        rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs;
@@ -1112,9 +1228,9 @@ static bool r600_update_derived_state(struct r600_context *rctx)
                        r600_update_db_shader_control(rctx);
                }
 
-               if (!ps_dirty && rctx->ps_shader && rctx->rasterizer &&
+               if (unlikely(!ps_dirty && rctx->ps_shader && rctx->rasterizer &&
                                ((rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable) ||
-                                               (rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade))) {
+                                               (rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade)))) {
 
                        if (rctx->b.chip_class >= EVERGREEN)
                                evergreen_update_ps_state(ctx, rctx->ps_shader->current);
@@ -1122,11 +1238,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
                                r600_update_ps_state(ctx, rctx->ps_shader->current);
                }
 
-               rctx->pixel_shader.shader = rctx->ps_shader;
-               rctx->pixel_shader.atom.num_dw = rctx->ps_shader->current->command_buffer.num_dw;
-               rctx->pixel_shader.atom.dirty = true;
-               r600_context_add_resource_size(ctx,
-                                              (struct pipe_resource *)rctx->ps_shader->current->bo);
+               update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current);
        }
 
        /* on R600 we stuff masks + txq info into one constant buffer */
@@ -1165,6 +1277,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
                                               rctx->blend_state.cso,
                                               blend_disable);
        }
+
        return true;
 }
 
@@ -1606,11 +1719,14 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
 
 void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
 {
+
        struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
-       struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader->current;
+       struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader;
 
-       r600_emit_command_buffer(cs, &shader->command_buffer);
+       if (!shader)
+               return;
 
+       r600_emit_command_buffer(cs, &shader->command_buffer);
        radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
        radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, shader->bo, RADEON_USAGE_READ));
 }
@@ -2139,6 +2255,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)
 {
        rctx->b.b.create_fs_state = r600_create_ps_state;
        rctx->b.b.create_vs_state = r600_create_vs_state;
+       rctx->b.b.create_gs_state = r600_create_gs_state;
        rctx->b.b.create_vertex_elements_state = r600_create_vertex_fetch_shader;
        rctx->b.b.bind_blend_state = r600_bind_blend_state;
        rctx->b.b.bind_depth_stencil_alpha_state = r600_bind_dsa_state;
@@ -2147,6 +2264,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)
        rctx->b.b.bind_rasterizer_state = r600_bind_rs_state;
        rctx->b.b.bind_vertex_elements_state = r600_bind_vertex_elements;
        rctx->b.b.bind_vs_state = r600_bind_vs_state;
+       rctx->b.b.bind_gs_state = r600_bind_gs_state;
        rctx->b.b.delete_blend_state = r600_delete_blend_state;
        rctx->b.b.delete_depth_stencil_alpha_state = r600_delete_dsa_state;
        rctx->b.b.delete_fs_state = r600_delete_ps_state;
@@ -2154,6 +2272,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)
        rctx->b.b.delete_sampler_state = r600_delete_sampler_state;
        rctx->b.b.delete_vertex_elements_state = r600_delete_vertex_elements;
        rctx->b.b.delete_vs_state = r600_delete_vs_state;
+       rctx->b.b.delete_gs_state = r600_delete_gs_state;
        rctx->b.b.set_blend_color = r600_set_blend_color;
        rctx->b.b.set_clip_state = r600_set_clip_state;
        rctx->b.b.set_constant_buffer = r600_set_constant_buffer;
index 73b8b08ba3957347bcf84765e09a2a597ec7b662..d03da98777d838783f8d0b773aea7b8a10557307 100644 (file)
@@ -169,8 +169,10 @@ enum shader_target
 {
        TARGET_UNKNOWN,
        TARGET_VS,
+       TARGET_ES,
        TARGET_PS,
        TARGET_GS,
+       TARGET_GS_COPY,
        TARGET_COMPUTE,
        TARGET_FETCH,
 
index 9b1420d38954215c8c4ec1c67ad26f877aa9d2ef..f79dff16ee3fee32b4b4916187487896215d6241 100644 (file)
@@ -137,7 +137,7 @@ void bc_dump::dump(cf_node& n) {
                for (int k = 0; k < 4; ++k)
                        s << chans[n.bc.sel[k]];
 
-       } else if (n.bc.op_ptr->flags & (CF_STRM | CF_RAT)) {
+       } else if (n.bc.op_ptr->flags & CF_MEM) {
                static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
                                "WRITE_IND_ACK"};
                fill_to(s, 18);
index 355eb63810c4b6c82be75f58b6efc3e83a56a705..d96548322593358f933dc89605380f4f19f6729d 100644 (file)
@@ -63,7 +63,7 @@ int bc_finalizer::run() {
 
        // workaround for some problems on r6xx/7xx
        // add ALU NOP to each vertex shader
-       if (!ctx.is_egcm() && sh.target == TARGET_VS) {
+       if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
                cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
 
                alu_group_node *g = sh.create_alu_group();
index 67e6c3a582df4535bf48e82fb4d417168cae007e..24c4854225dbbf6268cddbfd19961d785bb1d39b 100644 (file)
@@ -58,7 +58,10 @@ int bc_parser::decode() {
        if (pshader) {
                switch (bc->type) {
                case TGSI_PROCESSOR_FRAGMENT: t = TARGET_PS; break;
-               case TGSI_PROCESSOR_VERTEX: t = TARGET_VS; break;
+               case TGSI_PROCESSOR_VERTEX:
+                       t = pshader->vs_as_es ? TARGET_ES : TARGET_VS;
+                       break;
+               case TGSI_PROCESSOR_GEOMETRY: t = TARGET_GS; break;
                case TGSI_PROCESSOR_COMPUTE: t = TARGET_COMPUTE; break;
                default: assert(!"unknown shader target"); return -1; break;
                }
@@ -134,8 +137,12 @@ int bc_parser::parse_decls() {
                }
        }
 
-       if (sh->target == TARGET_VS)
+       if (sh->target == TARGET_VS || sh->target == TARGET_ES)
                sh->add_input(0, 1, 0x0F);
+       else if (sh->target == TARGET_GS) {
+               sh->add_input(0, 1, 0x0F);
+               sh->add_input(1, 1, 0x0F);
+       }
 
        bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN
                        && sh->target == TARGET_PS;
index 38617a8533041e3477d658856760f18dc3baa5cb..f996c0786d123bd51cb71dccb089f774dcbc3597 100644 (file)
@@ -215,7 +215,7 @@ void shader::init() {
 void shader::init_call_fs(cf_node* cf) {
        unsigned gpr = 0;
 
-       assert(target == TARGET_VS);
+       assert(target == TARGET_VS || target == TARGET_ES);
 
        for(inputs_vec::const_iterator I = inputs.begin(),
                        E = inputs.end(); I != E; ++I, ++gpr) {
@@ -433,6 +433,7 @@ std::string shader::get_full_target_name() {
 const char* shader::get_shader_target_name() {
        switch (target) {
                case TARGET_VS: return "VS";
+               case TARGET_ES: return "ES";
                case TARGET_PS: return "PS";
                case TARGET_GS: return "GS";
                case TARGET_COMPUTE: return "COMPUTE";