gallium: change set_constant_buffer to be UBO-friendly
[mesa.git] / src / gallium / drivers / radeonsi / evergreen_state.c
index cc271df5a64119cdd8854199baa342ed11a66ca5..c70b0b24d79e34260b5436b4e58ed0db956bfce3 100644 (file)
@@ -1120,17 +1120,17 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 
        util_pack_color(state->border_color.f, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
        switch (uc.ui) {
-       case 0x000000FF: /* opaque black */
-               border_color_type = 0;
+       case 0x000000FF:
+               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
                break;
-       case 0x00000000: /* transparent black */
-               border_color_type = 1;
+       case 0x00000000:
+               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
                break;
-       case 0xFFFFFFFF: /* white */
-               border_color_type = 2;
+       case 0xFFFFFFFF:
+               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
                break;
        default: /* Use border color pointer */
-               border_color_type = 3;
+               border_color_type = V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
        }
 
        rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
@@ -1140,13 +1140,13 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
                          S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
                          S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
                          aniso_flag_offset << 16 | /* XXX */
-                         S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
-                         S_008F30_FILTER_MODE(si_tex_mipfilter(state->min_mip_filter)));
+                         S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
        rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
                          S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
        rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
                          S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter)) |
-                         S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter)));
+                         S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter)) |
+                         S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
        rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
 
 #if 0
@@ -1246,8 +1246,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
        height = texture->height0;
        depth = texture->depth0;
 
-       pitch = align(tmp->pitch_in_blocks[0] *
-                     util_format_get_blockwidth(state->format), 8);
+       pitch = tmp->pitch_in_blocks[0] * util_format_get_blockwidth(state->format);
        array_mode = tmp->array_mode[0];
        tile_type = tmp->tile_type;
 
@@ -1271,9 +1270,9 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
                          S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
                          S_008F1C_BASE_LEVEL(state->u.tex.first_level) |
                          S_008F1C_LAST_LEVEL(state->u.tex.last_level) |
+                         S_008F1C_TILING_INDEX(8) | /* XXX */
                          S_008F1C_TYPE(si_tex_dim(texture->target)));
-       view->state[4] = (S_008F20_DEPTH(depth - 1) |
-                         S_008F20_PITCH((pitch / 8) - 1));
+       view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
        view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
                          S_008F24_LAST_ARRAY(state->u.tex.last_layer));
        view->state[6] = 0;
@@ -1307,7 +1306,7 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
        bo = (struct r600_resource*)
                pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
                                   count * sizeof(resource[0]->state));
-       ptr = rctx->ws->buffer_map(bo->buf, rctx->cs, PIPE_TRANSFER_WRITE);
+       ptr = rctx->ws->buffer_map(bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
 
        for (i = 0; i < count; i++, ptr += sizeof(resource[0]->state)) {
                pipe_sampler_view_reference(
@@ -1323,7 +1322,7 @@ static void evergreen_set_ps_sampler_view(struct pipe_context *ctx, unsigned cou
                        memset(ptr, 0, sizeof(resource[0]->state));
        }
 
-       rctx->ws->buffer_unmap(bo->buf);
+       rctx->ws->buffer_unmap(bo->cs_buf);
 
        for (i = count; i < NUM_TEX_UNITS; i++) {
                if (rctx->ps_samplers.views[i])
@@ -1358,13 +1357,13 @@ static void evergreen_bind_ps_sampler(struct pipe_context *ctx, unsigned count,
        bo = (struct r600_resource*)
                pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
                                   count * sizeof(rstates[0]->val));
-       ptr = rctx->ws->buffer_map(bo->buf, rctx->cs, PIPE_TRANSFER_WRITE);
+       ptr = rctx->ws->buffer_map(bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
 
        for (i = 0; i < count; i++, ptr += sizeof(rstates[0]->val)) {
                memcpy(ptr, rstates[i]->val, sizeof(rstates[0]->val));
        }
 
-       rctx->ws->buffer_unmap(bo->buf);
+       rctx->ws->buffer_unmap(bo->cs_buf);
 
        va = r600_resource_va(ctx->screen, (void *)bo);
        r600_pipe_state_add_reg(rstate, R_00B038_SPI_SHADER_USER_DATA_PS_2, va, bo, RADEON_USAGE_READ);
@@ -1547,7 +1546,7 @@ static void evergreen_cb(struct r600_context *rctx, struct r600_pipe_state *rsta
 
        format = si_translate_colorformat(surf->base.format);
        swap = si_translate_colorswap(surf->base.format);
-       if (rtex->resource.b.b.b.usage == PIPE_USAGE_STAGING) {
+       if (rtex->resource.b.b.usage == PIPE_USAGE_STAGING) {
                endian = V_028C70_ENDIAN_NONE;
        } else {
                endian = si_colorformat_endian_swap(format);
@@ -1607,12 +1606,13 @@ static void evergreen_cb(struct r600_context *rctx, struct r600_pipe_state *rsta
                                &rtex->resource, RADEON_USAGE_READWRITE);
 }
 
-static void evergreen_db(struct r600_context *rctx, struct r600_pipe_state *rstate,
-                        const struct pipe_framebuffer_state *state)
+static void si_db(struct r600_context *rctx, struct r600_pipe_state *rstate,
+                 const struct pipe_framebuffer_state *state)
 {
        struct r600_resource_texture *rtex;
        struct r600_surface *surf;
-       unsigned level, first_layer, pitch, slice, format, array_mode;
+       unsigned level, first_layer, pitch, slice, format;
+       uint32_t db_z_info, stencil_info;
        uint64_t offset;
 
        if (state->zsbuf == NULL) {
@@ -1625,10 +1625,6 @@ static void evergreen_db(struct r600_context *rctx, struct r600_pipe_state *rsta
        level = surf->base.u.tex.level;
        rtex = (struct r600_resource_texture*)surf->base.texture;
 
-       /* XXX remove this once tiling is properly supported */
-       array_mode = 0;/*rtex->array_mode[level] ? rtex->array_mode[level] :
-                        V_028C70_ARRAY_1D_TILED_THIN1;*/
-
        first_layer = surf->base.u.tex.first_layer;
        offset = r600_texture_get_offset(rtex, level, first_layer);
        pitch = rtex->pitch_in_blocks[level] / 8 - 1;
@@ -1644,6 +1640,24 @@ static void evergreen_db(struct r600_context *rctx, struct r600_pipe_state *rsta
                                offset, &rtex->resource, RADEON_USAGE_READWRITE);
        r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, NULL, 0);
 
+       db_z_info = S_028040_FORMAT(format);
+       stencil_info = S_028044_FORMAT(rtex->stencil != 0);
+
+       switch (format) {
+       case V_028040_Z_16:
+               db_z_info |= S_028040_TILE_MODE_INDEX(5);
+               stencil_info |= S_028044_TILE_MODE_INDEX(5);
+               break;
+       case V_028040_Z_24:
+       case V_028040_Z_32_FLOAT:
+               db_z_info |= S_028040_TILE_MODE_INDEX(6);
+               stencil_info |= S_028044_TILE_MODE_INDEX(6);
+               break;
+       default:
+               db_z_info |= S_028040_TILE_MODE_INDEX(7);
+               stencil_info |= S_028044_TILE_MODE_INDEX(7);
+       }
+
        if (rtex->stencil) {
                uint64_t stencil_offset =
                        r600_texture_get_offset(rtex->stencil, level, first_layer);
@@ -1656,22 +1670,25 @@ static void evergreen_db(struct r600_context *rctx, struct r600_pipe_state *rsta
                r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
                                        stencil_offset, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
                r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
-                                       1, NULL, 0);
+                                       stencil_info, NULL, 0);
        } else {
                r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
                                        0, NULL, 0);
        }
 
-       r600_pipe_state_add_reg(rstate, R_02803C_DB_DEPTH_INFO, 0x1, NULL, 0);
-       r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO,
-                               /*S_028040_ARRAY_MODE(array_mode) |*/ S_028040_FORMAT(format),
-                               NULL, 0);
-       r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
-                               S_028058_PITCH_TILE_MAX(pitch),
-                               NULL, 0);
-       r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE,
-                               S_02805C_SLICE_TILE_MAX(slice),
-                               NULL, 0);
+       if (format != ~0U) {
+               r600_pipe_state_add_reg(rstate, R_02803C_DB_DEPTH_INFO, 0x1, NULL, 0);
+               r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, db_z_info, NULL, 0);
+               r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
+                                       S_028058_PITCH_TILE_MAX(pitch),
+                                       NULL, 0);
+               r600_pipe_state_add_reg(rstate, R_02805C_DB_DEPTH_SLICE,
+                                       S_02805C_SLICE_TILE_MAX(slice),
+                                       NULL, 0);
+
+       } else {
+               r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO, 0, NULL, 0);
+       }
 }
 
 static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
@@ -1698,7 +1715,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
        for (int i = 0; i < state->nr_cbufs; i++) {
                evergreen_cb(rctx, rstate, state, i);
        }
-       evergreen_db(rctx, rstate, state);
+       si_db(rctx, rstate, state);
 
        shader_mask = 0;
        for (int i = 0; i < state->nr_cbufs; i++) {
@@ -1910,6 +1927,7 @@ void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *shader)
        struct r600_pipe_state *rstate = &shader->rstate;
        struct r600_shader *rshader = &shader->shader;
        unsigned i, exports_ps, num_cout, spi_ps_in_control, db_shader_control;
+       unsigned num_sgprs, num_user_sgprs;
        int pos_index = -1, face_index = -1;
        int ninterp = 0;
        boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = FALSE;
@@ -1923,21 +1941,13 @@ void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *shader)
 
        db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
        for (i = 0; i < rshader->ninput; i++) {
-               /* evergreen NUM_INTERP only contains values interpolated into the LDS,
-                  POSITION goes via GPRs from the SC so isn't counted */
-               if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
-                       pos_index = i;
-               else if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
-                       face_index = i;
-               else {
-                       ninterp++;
-                       if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
-                               have_linear = TRUE;
-                       if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
-                               have_perspective = TRUE;
-                       if (rshader->input[i].centroid)
-                               have_centroid = TRUE;
-               }
+               ninterp++;
+               if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
+                       have_linear = TRUE;
+               if (rshader->input[i].interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+                       have_perspective = TRUE;
+               if (rshader->input[i].centroid)
+                       have_centroid = TRUE;
        }
 
        for (i = 0; i < rshader->noutput; i++) {
@@ -1967,14 +1977,6 @@ void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *shader)
                exports_ps = 2;
        }
 
-       if (ninterp == 0) {
-               ninterp = 1;
-               have_perspective = TRUE;
-       }
-
-       if (!have_perspective && !have_linear)
-               have_perspective = TRUE;
-
        spi_ps_in_control = S_0286D8_NUM_INTERP(ninterp);
 
        spi_baryc_cntl = 0;
@@ -2027,16 +2029,22 @@ void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *shader)
                                va >> 40,
                                shader->bo, RADEON_USAGE_READ);
 
+       num_user_sgprs = 6;
+       num_sgprs = shader->num_sgprs;
+       if (num_user_sgprs > num_sgprs)
+               num_sgprs = num_user_sgprs;
        /* Last 2 reserved SGPRs are used for VCC */
-       /* XXX: Hard-coding 2 SGPRs for constant buffer */
+       num_sgprs += 2;
+       assert(num_sgprs <= 104);
+
        r600_pipe_state_add_reg(rstate,
                                R_00B028_SPI_SHADER_PGM_RSRC1_PS,
-                               S_00B028_VGPRS(shader->num_vgprs / 4) |
-                               S_00B028_SGPRS((shader->num_sgprs + 2 + 2 + 1) / 8),
+                               S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
+                               S_00B028_SGPRS((num_sgprs - 1) / 8),
                                NULL, 0);
        r600_pipe_state_add_reg(rstate,
                                R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
-                               S_00B02C_USER_SGPR(6),
+                               S_00B02C_USER_SGPR(num_user_sgprs),
                                NULL, 0);
 
        r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL,
@@ -2051,6 +2059,7 @@ void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *shader)
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_pipe_state *rstate = &shader->rstate;
        struct r600_shader *rshader = &shader->shader;
+       unsigned num_sgprs, num_user_sgprs;
        unsigned nparams, i;
        uint64_t va;
 
@@ -2094,16 +2103,22 @@ void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *shader)
                                va >> 40,
                                shader->bo, RADEON_USAGE_READ);
 
+       num_user_sgprs = 8;
+       num_sgprs = shader->num_sgprs;
+       if (num_user_sgprs > num_sgprs)
+               num_sgprs = num_user_sgprs;
        /* Last 2 reserved SGPRs are used for VCC */
-       /* XXX: Hard-coding 2 SGPRs for constant buffer */
+       num_sgprs += 2;
+       assert(num_sgprs <= 104);
+
        r600_pipe_state_add_reg(rstate,
                                R_00B128_SPI_SHADER_PGM_RSRC1_VS,
-                               S_00B128_VGPRS(shader->num_vgprs / 4) |
-                               S_00B128_SGPRS((shader->num_sgprs + 2 + 2 + 2) / 8),
+                               S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
+                               S_00B128_SGPRS((num_sgprs - 1) / 8),
                                NULL, 0);
        r600_pipe_state_add_reg(rstate,
                                R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
-                               S_00B12C_USER_SGPR(2 + 2),
+                               S_00B12C_USER_SGPR(num_user_sgprs),
                                NULL, 0);
 }