r600g: build fetch shader from vertex elements
authorJerome Glisse <jglisse@redhat.com>
Sat, 4 Dec 2010 01:47:02 +0000 (20:47 -0500)
committerJerome Glisse <jglisse@redhat.com>
Mon, 6 Dec 2010 20:50:50 +0000 (15:50 -0500)
Vertex elements change are less frequent than draw call, those to
avoid rebuilding fetch shader to often build the fetch shader along
vertex elements. This also allow to move vertex buffer setup out
of draw path and make update to it less frequent.

Shader update can still be improved to only update SPI regs (based
on some rasterizer state like flat shading or point sprite ...).

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
src/gallium/drivers/r600/eg_asm.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_buffer.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c
src/gallium/drivers/r600/r600_translate.c

index 21d66fa9564e79097c43b6f45f45a802ef35d700..b79875c7c7580eeea593968c96eab577582759c6 100644 (file)
@@ -27,6 +27,7 @@
 #include "r600_asm.h"
 #include "eg_sq.h"
 #include "r600_opcodes.h"
+#include "evergreend.h"
 
 int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 {
@@ -89,3 +90,37 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
        }
        return 0;
 }
+
+void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+       struct r600_pipe_state *rstate;
+       unsigned i = 0;
+
+       if (count > 8) {
+               bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+               bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+                               S_SQ_CF_WORD1_BARRIER(1) |
+                               S_SQ_CF_WORD1_COUNT(8 - 1);
+               bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+               bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+                               S_SQ_CF_WORD1_BARRIER(1) |
+                               S_SQ_CF_WORD1_COUNT(count - 8 - 1);
+       } else {
+               bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+               bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+                               S_SQ_CF_WORD1_BARRIER(1) |
+                               S_SQ_CF_WORD1_COUNT(count - 1);
+       }
+       bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+       bytecode[i++] = S_SQ_CF_WORD1_CF_INST(EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+                       S_SQ_CF_WORD1_BARRIER(1);
+
+       rstate = &ve->rstate;
+       rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+       rstate->nregs = 0;
+       r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
+                               0x00000000, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
+                               (r600_bo_offset(ve->fetch_shader)) >> 8,
+                               0xFFFFFFFF, ve->fetch_shader);
+}
index ebd541d5f8d79913db43b877338a865f1a81122f..b313d5250129da7d38c2fe5869799e973f64c7f4 100644 (file)
@@ -1259,6 +1259,90 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx)
        }
 }
 
+void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx)
+{
+       struct r600_pipe_state *rstate;
+       struct r600_resource *rbuffer;
+       struct pipe_vertex_buffer *vertex_buffer;
+       unsigned i, offset;
+
+       /* we don't update until we know vertex elements */
+       if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
+               return;
+
+       /* delete previous translated vertex elements */
+       if (rctx->tran.new_velems) {
+               r600_end_vertex_translate(rctx);
+       }
+
+       if (rctx->vertex_elements->incompatible_layout) {
+               /* translate rebind new vertex elements so
+                * return once translated
+                */
+               r600_begin_vertex_translate(rctx);
+               return;
+       }
+
+       if (rctx->any_user_vbs) {
+               r600_upload_user_buffers(rctx);
+               rctx->any_user_vbs = FALSE;
+       }
+
+       if (rctx->vertex_elements->vbuffer_need_offset) {
+               /* one resource per vertex elements */
+               rctx->nvs_resource = rctx->vertex_elements->count;
+       } else {
+               /* bind vertex buffer once */
+               rctx->nvs_resource = rctx->nvertex_buffer;
+       }
+
+       for (i = 0 ; i < rctx->nvs_resource; i++) {
+               rstate = &rctx->vs_resource[i];
+               rstate->id = R600_PIPE_STATE_RESOURCE;
+               rstate->nregs = 0;
+
+               if (rctx->vertex_elements->vbuffer_need_offset) {
+                       /* one resource per vertex elements */
+                       unsigned vbuffer_index;
+                       vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+                       vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
+                       rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+                       offset = rctx->vertex_elements->vbuffer_offset[i] +
+                               vertex_buffer->buffer_offset +
+                               r600_bo_offset(rbuffer->bo);
+               } else {
+                       /* bind vertex buffer once */
+                       vertex_buffer = &rctx->vertex_buffer[i];
+                       rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+                       offset = vertex_buffer->buffer_offset +
+                               r600_bo_offset(rbuffer->bo);
+               }
+
+               r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0,
+                                       offset, 0xFFFFFFFF, rbuffer->bo);
+               r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1,
+                                       rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2,
+                                       S_030008_STRIDE(vertex_buffer->stride),
+                                       0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3,
+                                       S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) |
+                                       S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) |
+                                       S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) |
+                                       S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W),
+                                       0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
+                                       0x00000000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
+                                       0x00000000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6,
+                                       0x00000000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7,
+                                       0xC0000000, 0xFFFFFFFF, NULL);
+               evergreen_fs_resource_set(&rctx->ctx, rstate, i);
+       }
+}
+
 int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
 void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
 {
@@ -1273,6 +1357,7 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
        struct r600_drawl draw;
        boolean translate = FALSE;
 
+#if 0
        if (rctx->vertex_elements->incompatible_layout) {
                r600_begin_vertex_translate(rctx);
                translate = TRUE;
@@ -1282,6 +1367,7 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
                r600_upload_user_buffers(rctx);
                rctx->any_user_vbs = FALSE;
        }
+#endif
 
        memset(&draw, 0, sizeof(struct r600_drawl));
        draw.ctx = ctx;
@@ -1338,6 +1424,7 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
        if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader))
                return;
 
+#if 0
        for (i = 0 ; i < rctx->vertex_elements->count; i++) {
                uint32_t word3, word2;
                uint32_t format;
@@ -1372,6 +1459,7 @@ void evergreen_draw(struct pipe_context *ctx, const struct pipe_draw_info *info)
                r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL);
                evergreen_fs_resource_set(&rctx->ctx, rstate, i);
        }
+#endif
 
        mask = 0;
        for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
@@ -1587,15 +1675,18 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
        r600_pipe_state_add_reg(rstate,
                                R_028864_SQ_PGM_RESOURCES_2_VS,
                                0x0, 0xFFFFFFFF, NULL);
-       r600_pipe_state_add_reg(rstate,
-                       R_0288A8_SQ_PGM_RESOURCES_FS,
-                       0x00000000, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate,
                        R_02885C_SQ_PGM_START_VS,
                        (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo);
+
+#if 0
+       r600_pipe_state_add_reg(rstate,
+                       R_0288A8_SQ_PGM_RESOURCES_FS,
+                       0x00000000, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate,
                        R_0288A4_SQ_PGM_START_FS,
                        (r600_bo_offset(shader->bo)) >> 8, 0xFFFFFFFF, shader->bo_fetch);
+#endif
 
        r600_pipe_state_add_reg(rstate,
                                R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
index 73daa000809a54945e53e68464a48d6b69ac0283..e13c606434fbd6223830c46ec4e924fd9cb7867b 100644 (file)
  */
 #include <stdio.h>
 #include <errno.h>
+#include "util/u_format.h"
 #include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
 #include "r600_pipe.h"
 #include "r600_sq.h"
 #include "r600_opcodes.h"
 #include "r600_asm.h"
+#include "r600_formats.h"
+#include "r600d.h"
 
 static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
 {
@@ -972,3 +975,317 @@ void r600_bc_dump(struct r600_bc *bc)
        }
        fprintf(stderr, "--------------------------------------\n");
 }
+
+void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+       struct r600_pipe_state *rstate;
+       unsigned i = 0;
+
+       if (count > 8) {
+               bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+               bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+                                               S_SQ_CF_WORD1_BARRIER(1) |
+                                               S_SQ_CF_WORD1_COUNT(8 - 1);
+               bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+               bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+                                               S_SQ_CF_WORD1_BARRIER(1) |
+                                               S_SQ_CF_WORD1_COUNT(count - 8 - 1);
+       } else {
+               bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+               bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
+                                               S_SQ_CF_WORD1_BARRIER(1) |
+                                               S_SQ_CF_WORD1_COUNT(count - 1);
+       }
+       bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+       bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+                       S_SQ_CF_WORD1_BARRIER(1);
+
+       rstate = &ve->rstate;
+       rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+       rstate->nregs = 0;
+       r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+                               0x00000000, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+                               0x00000000, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+                               r600_bo_offset(ve->fetch_shader) >> 8,
+                               0xFFFFFFFF, ve->fetch_shader);
+}
+
+void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
+{
+       struct r600_pipe_state *rstate;
+       unsigned i = 0;
+
+       if (count > 8) {
+               bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+               bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+                                               S_SQ_CF_WORD1_BARRIER(1) |
+                                               S_SQ_CF_WORD1_COUNT(8 - 1);
+               bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
+               bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+                                               S_SQ_CF_WORD1_BARRIER(1) |
+                                               S_SQ_CF_WORD1_COUNT((count - 8) - 1);
+       } else {
+               bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
+               bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
+                                               S_SQ_CF_WORD1_BARRIER(1) |
+                                               S_SQ_CF_WORD1_COUNT(count - 1);
+       }
+       bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
+       bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
+                       S_SQ_CF_WORD1_BARRIER(1);
+
+       rstate = &ve->rstate;
+       rstate->id = R600_PIPE_STATE_FETCH_SHADER;
+       rstate->nregs = 0;
+       r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_RESOURCES_FS,
+                               0x00000000, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_0288DC_SQ_PGM_CF_OFFSET_FS,
+                               0x00000000, 0xFFFFFFFF, NULL);
+       r600_pipe_state_add_reg(rstate, R_028894_SQ_PGM_START_FS,
+                               r600_bo_offset(ve->fetch_shader) >> 8,
+                               0xFFFFFFFF, ve->fetch_shader);
+}
+
+static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
+                               unsigned *num_format, unsigned *format_comp)
+{
+       const struct util_format_description *desc;
+       unsigned i;
+
+       *format = 0;
+       *num_format = 0;
+       *format_comp = 0;
+
+       desc = util_format_description(pformat);
+       if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+               goto out_unknown;
+       }
+
+       /* Find the first non-VOID channel. */
+       for (i = 0; i < 4; i++) {
+               if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
+                       break;
+               }
+       }
+
+       switch (desc->channel[i].type) {
+               /* Half-floats, floats, doubles */
+       case UTIL_FORMAT_TYPE_FLOAT:
+               switch (desc->channel[i].size) {
+               case 16:
+                       switch (desc->nr_channels) {
+                       case 1:
+                               *format = FMT_16_FLOAT;
+                               break;
+                       case 2:
+                               *format = FMT_16_16_FLOAT;
+                               break;
+                       case 3:
+                               *format = FMT_16_16_16_FLOAT;
+                               break;
+                       case 4:
+                               *format = FMT_16_16_16_16_FLOAT;
+                               break;
+                       }
+                       break;
+               case 32:
+                       switch (desc->nr_channels) {
+                       case 1:
+                               *format = FMT_32_FLOAT;
+                               break;
+                       case 2:
+                               *format = FMT_32_32_FLOAT;
+                               break;
+                       case 3:
+                               *format = FMT_32_32_32_FLOAT;
+                               break;
+                       case 4:
+                               *format = FMT_32_32_32_32_FLOAT;
+                               break;
+                       }
+                       break;
+               default:
+                       goto out_unknown;
+               }
+               break;
+               /* Unsigned ints */
+       case UTIL_FORMAT_TYPE_UNSIGNED:
+               /* Signed ints */
+       case UTIL_FORMAT_TYPE_SIGNED:
+               switch (desc->channel[i].size) {
+               case 8:
+                       switch (desc->nr_channels) {
+                       case 1:
+                               *format = FMT_8;
+                               break;
+                       case 2:
+                               *format = FMT_8_8;
+                               break;
+                       case 3:
+                       //      *format = FMT_8_8_8; /* fails piglit draw-vertices test */
+                       //      break;
+                       case 4:
+                               *format = FMT_8_8_8_8;
+                               break;
+                       }
+                       break;
+               case 16:
+                       switch (desc->nr_channels) {
+                       case 1:
+                               *format = FMT_16;
+                               break;
+                       case 2:
+                               *format = FMT_16_16;
+                               break;
+                       case 3:
+                       //      *format = FMT_16_16_16; /* fails piglit draw-vertices test */
+                       //      break;
+                       case 4:
+                               *format = FMT_16_16_16_16;
+                               break;
+                       }
+                       break;
+               case 32:
+                       switch (desc->nr_channels) {
+                       case 1:
+                               *format = FMT_32;
+                               break;
+                       case 2:
+                               *format = FMT_32_32;
+                               break;
+                       case 3:
+                               *format = FMT_32_32_32;
+                               break;
+                       case 4:
+                               *format = FMT_32_32_32_32;
+                               break;
+                       }
+                       break;
+               default:
+                       goto out_unknown;
+               }
+               break;
+       default:
+               goto out_unknown;
+       }
+
+       if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+               *format_comp = 1;
+       }
+       if (desc->channel[i].normalized) {
+               *num_format = 0;
+       } else {
+               *num_format = 2;
+       }
+       return;
+out_unknown:
+       R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
+}
+
+void r600_bc(unsigned ndw, unsigned chiprev, u32 *bytecode)
+{
+       unsigned i;
+       char chip = '6';
+
+       switch (chiprev) {
+       case 1:
+               chip = '7';
+               break;
+       case 2:
+               chip = 'E';
+               break;
+       case 0:
+       default:
+               chip = '6';
+               break;
+       }
+       fprintf(stderr, "bytecode %d dw -----------------------\n", ndw);
+       fprintf(stderr, "    %c\n", chip);
+       for (i = 0; i < ndw; i++) {
+               fprintf(stderr, "0x%08X\n", bytecode[i]);
+       }
+       fprintf(stderr, "--------------------------------------\n");
+}
+
+int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
+{
+       unsigned ndw, i;
+       u32 *bytecode;
+       unsigned fetch_resource_start = 0, format, num_format, format_comp;
+       struct pipe_vertex_element *elements = ve->elements;
+       const struct util_format_description *desc;
+
+       /* 2 dwords for cf aligned to 4 + 4 dwords per input */
+       ndw = 8 + ve->count * 4;
+       ve->fs_size = ndw * 4;
+
+       /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
+       ve->fetch_shader = r600_bo(rctx->radeon, ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
+       if (ve->fetch_shader == NULL) {
+               return -ENOMEM;
+       }
+
+       bytecode = r600_bo_map(rctx->radeon, ve->fetch_shader, 0, NULL);
+       if (bytecode == NULL) {
+               r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+               return -ENOMEM;
+       }
+
+       if (rctx->family >= CHIP_CEDAR) {
+               eg_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
+       } else {
+               r600_cf_vtx(ve, &bytecode[0], (ndw - 8) / 4);
+               fetch_resource_start = 160;
+       }
+
+       /* vertex elements offset need special handling, if offset is bigger
+        * than what we can put in fetch instruction then we need to alterate
+        * the vertex resource offset. In such case in order to simplify code
+        * we will bound one resource per elements. It's a worst case scenario.
+        */
+       for (i = 0; i < ve->count; i++) {
+               ve->vbuffer_offset[i] = C_SQ_VTX_WORD2_OFFSET & elements[i].src_offset;
+               if (ve->vbuffer_offset[i]) {
+                       ve->vbuffer_need_offset = 1;
+               }
+       }
+
+       for (i = 0; i < ve->count; i++) {
+               unsigned vbuffer_index;
+               r600_vertex_data_type(ve->hw_format[i], &format, &num_format, &format_comp);
+               desc = util_format_description(ve->hw_format[i]);
+               if (desc == NULL) {
+                       R600_ERR("unknown format %d\n", ve->hw_format[i]);
+                       r600_bo_reference(rctx->radeon, &ve->fetch_shader, NULL);
+                       return -EINVAL;
+               }
+
+               /* see above for vbuffer_need_offset explanation */
+               vbuffer_index = elements[i].vertex_buffer_index;
+               if (ve->vbuffer_need_offset) {
+                       bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(i + fetch_resource_start);
+               } else {
+                       bytecode[8 + i * 4 + 0] = S_SQ_VTX_WORD0_BUFFER_ID(vbuffer_index + fetch_resource_start);
+               }
+               bytecode[8 + i * 4 + 0] |= S_SQ_VTX_WORD0_SRC_GPR(0) |
+                                       S_SQ_VTX_WORD0_SRC_SEL_X(0) |
+                                       S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(0x1F);
+               bytecode[8 + i * 4 + 1] = S_SQ_VTX_WORD1_DST_SEL_X(desc->swizzle[0]) |
+                                       S_SQ_VTX_WORD1_DST_SEL_Y(desc->swizzle[1]) |
+                                       S_SQ_VTX_WORD1_DST_SEL_Z(desc->swizzle[2]) |
+                                       S_SQ_VTX_WORD1_DST_SEL_W(desc->swizzle[3]) |
+                                       S_SQ_VTX_WORD1_USE_CONST_FIELDS(0) |
+                                       S_SQ_VTX_WORD1_DATA_FORMAT(format) |
+                                       S_SQ_VTX_WORD1_NUM_FORMAT_ALL(num_format) |
+                                       S_SQ_VTX_WORD1_FORMAT_COMP_ALL(format_comp) |
+                                       S_SQ_VTX_WORD1_SRF_MODE_ALL(1) |
+                                       S_SQ_VTX_WORD1_GPR_DST_GPR(i + 1);
+               bytecode[8 + i * 4 + 2] = S_SQ_VTX_WORD2_OFFSET(elements[i].src_offset) |
+                                       S_SQ_VTX_WORD2_MEGA_FETCH(1);
+               bytecode[8 + i * 4 + 3] = 0;
+       }
+       r600_bo_unmap(rctx->radeon, ve->fetch_shader);
+       return 0;
+}
index 1be5e4a396ae1abc3c8d125473ca77fe2f8abb3d..b147f0f5c88d47d50b056844185050a7d62e578d 100644 (file)
@@ -28,6 +28,9 @@
 #define NUM_OF_CYCLES 3
 #define NUM_OF_COMPONENTS 4
 
+struct r600_vertex_element;
+struct r600_pipe_context;
+
 struct r600_bc_alu_src {
        unsigned                        sel;
        unsigned                        chan;
@@ -188,6 +191,7 @@ struct r600_bc {
 
 /* eg_asm.c */
 int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf);
+void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
 
 /* r600_asm.c */
 int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
@@ -201,6 +205,10 @@ int r600_bc_build(struct r600_bc *bc);
 int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
 int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
 void r600_bc_dump(struct r600_bc *bc);
+void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
+void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
+
+int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve);
 
 /* r700_asm.c */
 int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id);
index 51b8abaaa12d133b9b03ca4c99738a1f94c565d7..03a61a3213c5cffda9327082961159858e5c32c4 100644 (file)
@@ -267,10 +267,11 @@ int r600_upload_user_buffers(struct r600_pipe_context *rctx)
        int i, nr;
 
        nr = rctx->vertex_elements->count;
+       nr = rctx->nvertex_buffer;
 
        for (i = 0; i < nr; i++) {
-               struct pipe_vertex_buffer *vb =
-                       &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index];
+//             struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[rctx->vertex_elements->elements[i].vertex_buffer_index];
+               struct pipe_vertex_buffer *vb = &rctx->vertex_buffer[i];
 
                if (r600_buffer_is_user_buffer(vb->buffer)) {
                        struct pipe_resource *upload_buffer = NULL;
index fa0b635636b88fdb4e0a2cbbb293e1ec6cadc212..ea57fba8e470833b020da2af33415f5c32b0b61d 100644 (file)
@@ -90,6 +90,8 @@ static void r600_destroy_context(struct pipe_context *context)
        u_upload_destroy(rctx->upload_vb);
        u_upload_destroy(rctx->upload_ib);
 
+       r600_end_vertex_translate(rctx);
+
        if (rctx->tran.translate_cache)
                translate_cache_destroy(rctx->tran.translate_cache);
 
index deec946e5d460881808cacf2431e3e1918ca3162..ce9f99a7667fd123fdb8ca98e6a80072612d5145 100644 (file)
@@ -54,6 +54,7 @@ enum r600_pipe_state_id {
        R600_PIPE_STATE_SAMPLER,
        R600_PIPE_STATE_RESOURCE,
        R600_PIPE_STATE_POLYGON_OFFSET,
+       R600_PIPE_STATE_FETCH_SHADER,
        R600_PIPE_NSTATES
 };
 
@@ -87,7 +88,15 @@ struct r600_vertex_element
        struct pipe_vertex_element      elements[PIPE_MAX_ATTRIBS];
        enum pipe_format                hw_format[PIPE_MAX_ATTRIBS];
        unsigned                        hw_format_size[PIPE_MAX_ATTRIBS];
-       boolean incompatible_layout;
+       boolean                         incompatible_layout;
+       struct r600_bo                  *fetch_shader;
+       unsigned                        fs_size;
+       struct r600_pipe_state          rstate;
+       /* if offset is to big for fetch instructio we need to alterate
+        * offset of vertex buffer, record here the offset need to add
+        */
+       unsigned                        vbuffer_need_offset;
+       unsigned                        vbuffer_offset[PIPE_MAX_ATTRIBS];
 };
 
 struct r600_pipe_shader {
@@ -108,14 +117,14 @@ struct r600_textures_info {
        unsigned                        n_samplers;
 };
 
+/* vertex buffer translation context, used to translate vertex input that
+ * hw doesn't natively support, so far only FLOAT64 is unsupported.
+ */
 struct r600_translate_context {
        /* Translate cache for incompatible vertex offset/stride/format fallback. */
        struct translate_cache          *translate_cache;
-
        /* The vertex buffer slot containing the translated buffer. */
        unsigned                        vb_slot;
-       /* Saved and new vertex element state. */
-       void                            *saved_velems;
        void                            *new_velems;
 };
 
@@ -142,6 +151,7 @@ struct r600_pipe_context {
        struct pipe_stencil_ref         stencil_ref;
        struct pipe_viewport_state      viewport;
        struct pipe_clip_state          clip;
+       unsigned                        nvs_resource;
        struct r600_pipe_state          *vs_resource;
        struct r600_pipe_state          *ps_resource;
        struct r600_pipe_state          config;
@@ -182,6 +192,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
 void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
 void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
+void evergreen_vertex_buffer_update(struct r600_pipe_context *rctx);
 
 /* r600_blit.c */
 void r600_init_blit_functions(struct r600_pipe_context *rctx);
@@ -220,6 +231,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info);
 void r600_init_config(struct r600_pipe_context *rctx);
 void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx);
 void r600_polygon_offset_update(struct r600_pipe_context *rctx);
+void r600_vertex_buffer_update(struct r600_pipe_context *rctx);
 
 /* r600_helper.h */
 int r600_conv_pipe_prim(unsigned pprim, unsigned *prim);
index f53124d100929c24c3ced19e8f2427db21254aba..e40cd1dbcf13e7eb1268f47d05d328cab4300a58 100644 (file)
@@ -67,21 +67,23 @@ static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shade
                        S_028868_STACK_SIZE(rshader->bc.nstack),
                        0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate,
-                       R_0288A4_SQ_PGM_RESOURCES_FS,
+                       R_0288D0_SQ_PGM_CF_OFFSET_VS,
                        0x00000000, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate,
-                       R_0288D0_SQ_PGM_CF_OFFSET_VS,
+                       R_028858_SQ_PGM_START_VS,
+                       r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
+
+#if 0
+       r600_pipe_state_add_reg(rstate,
+                       R_0288A4_SQ_PGM_RESOURCES_FS,
                        0x00000000, 0xFFFFFFFF, NULL);
        r600_pipe_state_add_reg(rstate,
                        R_0288DC_SQ_PGM_CF_OFFSET_FS,
                        0x00000000, 0xFFFFFFFF, NULL);
-       r600_pipe_state_add_reg(rstate,
-                       R_028858_SQ_PGM_START_VS,
-                       r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
        r600_pipe_state_add_reg(rstate,
                        R_028894_SQ_PGM_START_FS,
                        r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch);
-
+#endif
        r600_pipe_state_add_reg(rstate,
                                R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
                                0xFFFFFFFF, NULL);
@@ -261,6 +263,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s
 
 static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
 {
+#if 0
        struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
        struct r600_shader *shader = &rshader->shader;
        const struct util_format_description *desc;
@@ -304,6 +307,9 @@ static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader
                }
        }
        return r600_bc_build(&shader->bc_fetch);
+#else
+       return 0;
+#endif
 }
 
 int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
index c592ef2bd051544d80c79af686be3723e0ae832a..9b70942eebff20fe50549c27f7369a708f50d43d 100644 (file)
@@ -94,6 +94,84 @@ void r600_polygon_offset_update(struct r600_pipe_context *rctx)
        }
 }
 
+void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
+{
+       struct r600_pipe_state *rstate;
+       struct r600_resource *rbuffer;
+       struct pipe_vertex_buffer *vertex_buffer;
+       unsigned i, offset;
+
+       /* we don't update until we know vertex elements */
+       if (rctx->vertex_elements == NULL || !rctx->nvertex_buffer)
+               return;
+
+       /* delete previous translated vertex elements */
+       if (rctx->tran.new_velems) {
+               r600_end_vertex_translate(rctx);
+       }
+
+       if (rctx->vertex_elements->incompatible_layout) {
+               /* translate rebind new vertex elements so
+                * return once translated
+                */
+               r600_begin_vertex_translate(rctx);
+               return;
+       }
+
+       if (rctx->any_user_vbs) {
+               r600_upload_user_buffers(rctx);
+               rctx->any_user_vbs = FALSE;
+       }
+
+       if (rctx->vertex_elements->vbuffer_need_offset) {
+               /* one resource per vertex elements */
+               rctx->nvs_resource = rctx->vertex_elements->count;
+       } else {
+               /* bind vertex buffer once */
+               rctx->nvs_resource = rctx->nvertex_buffer;
+       }
+
+       for (i = 0 ; i < rctx->nvs_resource; i++) {
+               rstate = &rctx->vs_resource[i];
+               rstate->id = R600_PIPE_STATE_RESOURCE;
+               rstate->nregs = 0;
+
+               if (rctx->vertex_elements->vbuffer_need_offset) {
+                       /* one resource per vertex elements */
+                       unsigned vbuffer_index;
+                       vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
+                       vertex_buffer = &rctx->vertex_buffer[vbuffer_index];
+                       rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+                       offset = rctx->vertex_elements->vbuffer_offset[i] +
+                               vertex_buffer->buffer_offset +
+                               r600_bo_offset(rbuffer->bo);
+               } else {
+                       /* bind vertex buffer once */
+                       vertex_buffer = &rctx->vertex_buffer[i];
+                       rbuffer = (struct r600_resource*)vertex_buffer->buffer;
+                       offset = vertex_buffer->buffer_offset +
+                               r600_bo_offset(rbuffer->bo);
+               }
+
+               r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0,
+                                       offset, 0xFFFFFFFF, rbuffer->bo);
+               r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1,
+                                       rbuffer->size - offset - 1, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2,
+                                       S_038008_STRIDE(vertex_buffer->stride),
+                                       0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3,
+                                       0x00000000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
+                                       0x00000000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5,
+                                       0x00000000, 0xFFFFFFFF, NULL);
+               r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6,
+                                       0xC0000000, 0xFFFFFFFF, NULL);
+               r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
+       }
+}
+
 static void r600_draw_common(struct r600_drawl *draw)
 {
        struct r600_pipe_context *rctx = (struct r600_pipe_context *)draw->ctx;
@@ -132,6 +210,7 @@ static void r600_draw_common(struct r600_drawl *draw)
        if (r600_pipe_shader_update(&rctx->context, rctx->ps_shader))
                return;
 
+#if 0
        for (i = 0 ; i < rctx->vertex_elements->count; i++) {
                uint32_t word2, format;
 
@@ -159,6 +238,7 @@ static void r600_draw_common(struct r600_drawl *draw)
                r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL);
                r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
        }
+#endif
 
        mask = 0;
        for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
@@ -195,6 +275,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
        struct r600_drawl draw;
        boolean translate = FALSE;
 
+#if 0
        if (rctx->vertex_elements->incompatible_layout) {
                r600_begin_vertex_translate(rctx);
                translate = TRUE;
@@ -204,6 +285,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                r600_upload_user_buffers(rctx);
                rctx->any_user_vbs = FALSE;
        }
+#endif
+
        memset(&draw, 0, sizeof(struct r600_drawl));
        draw.ctx = ctx;
        draw.mode = info->mode;
index 856f79158c0042a93feb7e6c771372242ccf5592..889432732cf3c8b810fc4d6b704708de8c675038 100644 (file)
@@ -120,6 +120,16 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
        struct r600_vertex_element *v = (struct r600_vertex_element*)state;
 
        rctx->vertex_elements = v;
+       if (v) {
+               rctx->states[v->rstate.id] = &v->rstate;
+               r600_context_pipe_state_set(&rctx->ctx, &v->rstate);
+               if (rctx->family >= CHIP_CEDAR) {
+                       evergreen_vertex_buffer_update(rctx);
+               } else {
+                       r600_vertex_buffer_update(rctx);
+               }
+       }
+
        if (v) {
 //             rctx->vs_rebuild = TRUE;
        }
@@ -128,11 +138,16 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
 void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
 {
        struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+       struct r600_vertex_element *v = (struct r600_vertex_element*)state;
 
-       FREE(state);
-
+       if (rctx->states[v->rstate.id] == &v->rstate) {
+               rctx->states[v->rstate.id] = NULL;
+       }
        if (rctx->vertex_elements == state)
                rctx->vertex_elements = NULL;
+
+       r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL);
+       FREE(state);
 }
 
 
@@ -182,6 +197,11 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
        }
        rctx->nvertex_buffer = count;
        rctx->vb_max_index = max_index;
+       if (rctx->family >= CHIP_CEDAR) {
+               evergreen_vertex_buffer_update(rctx);
+       } else {
+               r600_vertex_buffer_update(rctx);
+       }
 }
 
 
@@ -192,9 +212,10 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
                                  unsigned count,
                                  const struct pipe_vertex_element *elements)
 {
+       struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
        struct r600_vertex_element *v = CALLOC_STRUCT(r600_vertex_element);
-       int i;
        enum pipe_format *format;
+       int i;
 
        assert(count < 32);
        if (!v)
@@ -216,12 +237,16 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,
                }
                v->incompatible_layout =
                        v->incompatible_layout ||
-                       v->elements[i].src_format != v->hw_format[i] ||
-                       v->elements[i].src_offset % 4 != 0;
+                       v->elements[i].src_format != v->hw_format[i];
 
                v->hw_format_size[i] = align(util_format_get_blocksize(v->hw_format[i]), 4);
        }
 
+       if (r600_vertex_elements_build_fetch_shader(rctx, v)) {
+               FREE(v);
+               return NULL;
+       }
+
        return v;
 }
 
index 2e082f1cff07edaba3403db5e8bce9b814764ba9..d927f53398d37aa1d3ff60324562bce832ce43ab 100644 (file)
@@ -41,6 +41,7 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
        struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
        struct pipe_resource *out_buffer;
        unsigned i, num_verts;
+       struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
 
        /* Initialize the translate key, i.e. the recipe how vertices should be
         * translated. */
@@ -51,9 +52,7 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
                unsigned output_format_size = ve->hw_format_size[i];
 
                /* Check for support. */
-               if (ve->elements[i].src_format == ve->hw_format[i] &&
-                   (vb->buffer_offset + ve->elements[i].src_offset) % 4 == 0 &&
-                   vb->stride % 4 == 0) {
+               if (ve->elements[i].src_format == ve->hw_format[i]) {
                        continue;
                }
 
@@ -147,29 +146,22 @@ void r600_begin_vertex_translate(struct r600_pipe_context *rctx)
        }
 
        /* Save and replace vertex elements. */
-       {
-               struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
-
-               rctx->tran.saved_velems = rctx->vertex_elements;
-
-               for (i = 0; i < ve->count; i++) {
-                       if (vb_translated[ve->elements[i].vertex_buffer_index]) {
-                               te = &key.element[tr_elem_index[i]];
-                               new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
-                               new_velems[i].src_format = te->output_format;
-                               new_velems[i].src_offset = te->output_offset;
-                               new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
-                       } else {
-                               memcpy(&new_velems[i], &ve->elements[i],
-                                      sizeof(struct pipe_vertex_element));
-                       }
+       for (i = 0; i < ve->count; i++) {
+               if (vb_translated[ve->elements[i].vertex_buffer_index]) {
+                       te = &key.element[tr_elem_index[i]];
+                       new_velems[i].instance_divisor = ve->elements[i].instance_divisor;
+                       new_velems[i].src_format = te->output_format;
+                       new_velems[i].src_offset = te->output_offset;
+                       new_velems[i].vertex_buffer_index = rctx->tran.vb_slot;
+               } else {
+                       memcpy(&new_velems[i], &ve->elements[i],
+                                       sizeof(struct pipe_vertex_element));
                }
-
-               rctx->tran.new_velems =
-                       pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
-               pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems);
        }
 
+       rctx->tran.new_velems = pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
+       pipe->bind_vertex_elements_state(pipe, rctx->tran.new_velems);
+
        pipe_resource_reference(&out_buffer, NULL);
 }
 
@@ -178,8 +170,11 @@ void r600_end_vertex_translate(struct r600_pipe_context *rctx)
        struct pipe_context *pipe = &rctx->context;
 
        /* Restore vertex elements. */
-       pipe->bind_vertex_elements_state(pipe, rctx->tran.saved_velems);
+       if (rctx->vertex_elements == rctx->tran.new_velems) {
+               pipe->bind_vertex_elements_state(pipe, NULL);
+       }
        pipe->delete_vertex_elements_state(pipe, rctx->tran.new_velems);
+       rctx->tran.new_velems = NULL;
 
        /* Delete the now-unused VBO. */
        pipe_resource_reference(&rctx->vertex_buffer[rctx->tran.vb_slot].buffer,