eg/compute: Drop reference on code_bo in destructor.
[mesa.git] / src / gallium / drivers / r600 / evergreen_state.c
index 96475e64b599a934505501ea3f31d883ffec51c0..48934158bdf926a3bbd3fa884c12ef2a50e7d23d 100644 (file)
@@ -22,6 +22,7 @@
  */
 #include "r600_formats.h"
 #include "r600_shader.h"
+#include "r600_query.h"
 #include "evergreend.h"
 
 #include "pipe/p_shader_tokens.h"
@@ -615,6 +616,7 @@ struct eg_buf_res_params {
        unsigned char swizzle[4];
        bool uncached;
        bool force_swizzle;
+       bool size_in_bytes;
 };
 
 static void evergreen_fill_buffer_resource_words(struct r600_context *rctx,
@@ -653,11 +655,12 @@ static void evergreen_fill_buffer_resource_words(struct r600_context *rctx,
                S_030008_ENDIAN_SWAP(endian);
        tex_resource_words[3] = swizzle_res | S_03000C_UNCACHED(params->uncached);
        /*
-        * in theory dword 4 is for number of elements, for use with resinfo,
-        * but it seems to utterly fail to work, the amd gpu shader analyser
+        * dword 4 is for number of elements, for use with resinfo,
+        * albeit the amd gpu shader analyser
         * uses a const buffer to store the element sizes for buffer txq
         */
-       tex_resource_words[4] = 0;
+       tex_resource_words[4] = params->size_in_bytes ? params->size : (params->size / stride);
+
        tex_resource_words[5] = tex_resource_words[6] = 0;
        tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER);
 }
@@ -809,18 +812,21 @@ static int evergreen_fill_tex_resource_words(struct r600_context *rctx,
        }
        nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
 
-       if (params->target == PIPE_TEXTURE_1D_ARRAY) {
-               height = 1;
-               depth = texture->array_size;
-       } else if (params->target == PIPE_TEXTURE_2D_ARRAY) {
-               depth = texture->array_size;
-       } else if (params->target == PIPE_TEXTURE_CUBE_ARRAY)
-               depth = texture->array_size / 6;
 
        va = tmp->resource.gpu_address;
 
        /* array type views and views into array types need to use layer offset */
        dim = r600_tex_dim(tmp, params->target, texture->nr_samples);
+
+       if (dim == V_030000_SQ_TEX_DIM_1D_ARRAY) {
+               height = 1;
+               depth = texture->array_size;
+       } else if (dim == V_030000_SQ_TEX_DIM_2D_ARRAY ||
+                  dim == V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA) {
+               depth = texture->array_size;
+       } else if (dim == V_030000_SQ_TEX_DIM_CUBEMAP)
+               depth = texture->array_size / 6;
+
        tex_resource_words[0] = (S_030000_DIM(dim) |
                                 S_030000_PITCH((pitch / 8) - 1) |
                                 S_030000_TEX_WIDTH(width - 1));
@@ -1430,7 +1436,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
        struct r600_surface *surf;
        struct r600_texture *rtex;
        uint32_t i, log_samples;
-
+       uint32_t target_mask = 0;
        /* Flush TC when changing the framebuffer state, because the only
         * client not using TC that can change textures is the framebuffer.
         * Other places don't typically have to flush TC.
@@ -1457,6 +1463,8 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
                if (!surf)
                        continue;
 
+               target_mask |= (0xf << (i * 4));
+
                rtex = (struct r600_texture*)surf->base.texture;
 
                r600_context_add_resource_size(ctx, state->cbufs[i]->texture);
@@ -1522,7 +1530,9 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
                r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
        }
 
-       if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
+       if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs ||
+           rctx->cb_misc_state.bound_cbufs_target_mask != target_mask) {
+               rctx->cb_misc_state.bound_cbufs_target_mask = target_mask;
                rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
                r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
        }
@@ -1946,14 +1956,8 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
        if (rctx->b.chip_class == EVERGREEN) {
                evergreen_emit_msaa_state(rctx, rctx->framebuffer.nr_samples, rctx->ps_iter_samples);
        } else {
-               unsigned sc_mode_cntl_1 =
-                       EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
-                       EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
-
-               if (rctx->framebuffer.nr_samples > 1)
-                       cayman_emit_msaa_sample_locs(cs, rctx->framebuffer.nr_samples);
-               cayman_emit_msaa_config(cs, rctx->framebuffer.nr_samples,
-                                       rctx->ps_iter_samples, 0, sc_mode_cntl_1);
+               cayman_emit_msaa_state(cs, rctx->framebuffer.nr_samples,
+                                      rctx->ps_iter_samples, 0);
        }
 }
 
@@ -1997,13 +2001,31 @@ static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600
                               pa_su_poly_offset_db_fmt_cntl);
 }
 
+uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_misc_state *a,
+                                     unsigned nr_cbufs)
+{
+       unsigned base_mask = 0;
+       unsigned dirty_mask = a->image_rat_enabled_mask;
+       while (dirty_mask) {
+               unsigned idx = u_bit_scan(&dirty_mask);
+               base_mask |= (0xf << (idx * 4));
+       }
+       unsigned offset = util_last_bit(a->image_rat_enabled_mask);
+       dirty_mask = a->buffer_rat_enabled_mask;
+       while (dirty_mask) {
+               unsigned idx = u_bit_scan(&dirty_mask);
+               base_mask |= (0xf << (idx + offset) * 4);
+       }
+       return base_mask << (nr_cbufs * 4);
+}
+
 static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
        struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
        struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
-       unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
-       unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
-       unsigned rat_colormask = ((1ULL << ((unsigned)(a->nr_image_rats + a->nr_buffer_rats) * 4)) - 1) << (a->nr_cbufs * 4);
+       unsigned fb_colormask = a->bound_cbufs_target_mask;
+       unsigned ps_colormask = a->ps_color_export_mask;
+       unsigned rat_colormask = evergreen_construct_rat_mask(rctx, a, a->nr_cbufs);
        radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
        radeon_emit(cs, (a->blend_colormask & fb_colormask) | rat_colormask); /* R_028238_CB_TARGET_MASK */
        /* This must match the used export instructions exactly.
@@ -2167,17 +2189,16 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 
                va = rbuffer->gpu_address + cb->buffer_offset;
 
-               if (!gs_ring_buffer) {
+               if (buffer_index < R600_MAX_HW_CONST_BUFFERS) {
                        radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4,
                                                    DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags);
                        radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8,
                                                    pkt_flags);
+                       radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
+                       radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
+                                                                 RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
                }
 
-               radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-               radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
-                                                     RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
-
                radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
                radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
                radeon_emit(cs, va); /* RESOURCEi_WORD0 */
@@ -2276,6 +2297,30 @@ static void evergreen_emit_tcs_constant_buffers(struct r600_context *rctx, struc
                                        0);
 }
 
+void evergreen_setup_scratch_buffers(struct r600_context *rctx) {
+       static const struct {
+               unsigned ring_base;
+               unsigned item_size;
+               unsigned ring_size;
+       } regs[EG_NUM_HW_STAGES] = {
+               [R600_HW_STAGE_PS] = { R_008C68_SQ_PSTMP_RING_BASE, R_028914_SQ_PSTMP_RING_ITEMSIZE, R_008C6C_SQ_PSTMP_RING_SIZE },
+               [R600_HW_STAGE_VS] = { R_008C60_SQ_VSTMP_RING_BASE, R_028910_SQ_VSTMP_RING_ITEMSIZE, R_008C64_SQ_VSTMP_RING_SIZE },
+               [R600_HW_STAGE_GS] = { R_008C58_SQ_GSTMP_RING_BASE, R_02890C_SQ_GSTMP_RING_ITEMSIZE, R_008C5C_SQ_GSTMP_RING_SIZE },
+               [R600_HW_STAGE_ES] = { R_008C50_SQ_ESTMP_RING_BASE, R_028908_SQ_ESTMP_RING_ITEMSIZE, R_008C54_SQ_ESTMP_RING_SIZE },
+               [EG_HW_STAGE_LS] = { R_008E10_SQ_LSTMP_RING_BASE, R_028830_SQ_LSTMP_RING_ITEMSIZE, R_008E14_SQ_LSTMP_RING_SIZE },
+               [EG_HW_STAGE_HS] = { R_008E18_SQ_HSTMP_RING_BASE, R_028834_SQ_HSTMP_RING_ITEMSIZE, R_008E1C_SQ_HSTMP_RING_SIZE }
+       };
+
+       for (unsigned i = 0; i < EG_NUM_HW_STAGES; i++) {
+               struct r600_pipe_shader *stage = rctx->hw_shader_stages[i].shader;
+
+               if (stage && unlikely(stage->scratch_space_needed)) {
+                       r600_setup_scratch_area_for_shader(rctx, stage,
+                               &rctx->scratch_buffers[i], regs[i].ring_base, regs[i].item_size, regs[i].ring_size);
+               }
+       }
+}
+
 static void evergreen_emit_sampler_views(struct r600_context *rctx,
                                         struct r600_samplerview_state *state,
                                         unsigned resource_id_base, unsigned pkt_flags)
@@ -2334,6 +2379,8 @@ static void evergreen_emit_tcs_sampler_views(struct r600_context *rctx, struct r
 
 static void evergreen_emit_tes_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
 {
+       if (!rctx->tes_shader)
+               return;
        evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL].views,
                                     EG_FETCH_CONSTANTS_OFFSET_VS + R600_MAX_CONST_BUFFERS, 0);
 }
@@ -2404,6 +2451,8 @@ static void evergreen_emit_tcs_sampler_states(struct r600_context *rctx, struct
 
 static void evergreen_emit_tes_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
 {
+       if (!rctx->tes_shader)
+               return;
        evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_TESS_EVAL], 18,
                                      R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0);
 }
@@ -3338,7 +3387,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                        exports_ps |= 1;
        }
 
-       num_cout = rshader->nr_ps_color_exports;
+       num_cout = rshader->ps_export_highest + 1;
 
        exports_ps |= S_02884C_EXPORT_COLORS(num_cout);
        if (!exports_ps) {
@@ -3346,6 +3395,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                exports_ps = 2;
        }
        shader->nr_ps_color_outputs = num_cout;
+       shader->ps_color_export_mask = rshader->ps_color_export_mask;
        if (ninterp == 0) {
                ninterp = 1;
                have_perspective = TRUE;
@@ -3792,6 +3842,11 @@ static void evergreen_dma_copy(struct pipe_context *ctx,
                goto fallback;
        }
 
+       if (rctx->cmd_buf_is_compute) {
+               rctx->b.gfx.flush(rctx, PIPE_FLUSH_ASYNC, NULL);
+               rctx->cmd_buf_is_compute = false;
+       }
+
        if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
                evergreen_dma_copy_buffer(rctx, dst, src, dst_x, src_box->x, src_box->width);
                return;
@@ -3874,7 +3929,7 @@ static void evergreen_set_tess_state(struct pipe_context *ctx,
 
        memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4);
        memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2);
-       rctx->tess_state_dirty = true;
+       rctx->driver_consts[PIPE_SHADER_TESS_CTRL].tcs_default_levels_dirty = true;
 }
 
 static void evergreen_setup_immed_buffer(struct r600_context *rctx,
@@ -4010,6 +4065,7 @@ static void evergreen_set_shader_buffers(struct pipe_context *ctx,
                buf_params.swizzle[3] = PIPE_SWIZZLE_W;
                buf_params.force_swizzle = true;
                buf_params.uncached = 1;
+               buf_params.size_in_bytes = true;
                evergreen_fill_buffer_resource_words(rctx, &resource->b.b,
                                                     &buf_params,
                                                     &rview->skip_mip_address_reloc,
@@ -4023,12 +4079,14 @@ static void evergreen_set_shader_buffers(struct pipe_context *ctx,
        if (old_mask != istate->enabled_mask)
                r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
 
-       if (rctx->cb_misc_state.nr_buffer_rats != util_bitcount(istate->enabled_mask)) {
-               rctx->cb_misc_state.nr_buffer_rats = util_bitcount(istate->enabled_mask);
+       /* construct the target mask */
+       if (rctx->cb_misc_state.buffer_rat_enabled_mask != istate->enabled_mask) {
+               rctx->cb_misc_state.buffer_rat_enabled_mask = istate->enabled_mask;
                r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
        }
 
-       r600_mark_atom_dirty(rctx, &istate->atom);
+       if (shader == PIPE_SHADER_FRAGMENT)
+               r600_mark_atom_dirty(rctx, &istate->atom);
 }
 
 static void evergreen_set_shader_images(struct pipe_context *ctx,
@@ -4066,6 +4124,8 @@ static void evergreen_set_shader_images(struct pipe_context *ctx,
                if (!images || !images[idx].resource) {
                        pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL);
                        istate->enabled_mask &= ~(1 << i);
+                       istate->compressed_colortex_mask &= ~(1 << i);
+                       istate->compressed_depthtex_mask &= ~(1 << i);
                        continue;
                }
 
@@ -4197,14 +4257,73 @@ static void evergreen_set_shader_images(struct pipe_context *ctx,
        if (old_mask != istate->enabled_mask)
                r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
 
-       if (rctx->cb_misc_state.nr_image_rats != util_bitcount(istate->enabled_mask)) {
-               rctx->cb_misc_state.nr_image_rats = util_bitcount(istate->enabled_mask);
+       if (rctx->cb_misc_state.image_rat_enabled_mask != istate->enabled_mask) {
+               rctx->cb_misc_state.image_rat_enabled_mask = istate->enabled_mask;
                r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
        }
 
-       r600_mark_atom_dirty(rctx, &istate->atom);
+       if (shader == PIPE_SHADER_FRAGMENT)
+               r600_mark_atom_dirty(rctx, &istate->atom);
+}
+
+static void evergreen_get_pipe_constant_buffer(struct r600_context *rctx,
+                                              enum pipe_shader_type shader, uint slot,
+                                              struct pipe_constant_buffer *cbuf)
+{
+       struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
+       struct pipe_constant_buffer *cb;
+       cbuf->user_buffer = NULL;
+
+       cb = &state->cb[slot];
+
+       cbuf->buffer_size = cb->buffer_size;
+       pipe_resource_reference(&cbuf->buffer, cb->buffer);
 }
 
+static void evergreen_get_shader_buffers(struct r600_context *rctx,
+                                        enum pipe_shader_type shader,
+                                        uint start_slot, uint count,
+                                        struct pipe_shader_buffer *sbuf)
+{
+       assert(shader == PIPE_SHADER_COMPUTE);
+       int idx, i;
+       struct r600_image_state *istate = &rctx->compute_buffers;
+       struct r600_image_view *rview;
+
+       for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
+
+               rview = &istate->views[i];
+
+               pipe_resource_reference(&sbuf[idx].buffer, rview->base.resource);
+               if (rview->base.resource) {
+                       uint64_t rview_va = ((struct r600_resource *)rview->base.resource)->gpu_address;
+
+                       uint64_t prog_va = rview->resource_words[0];
+
+                       prog_va += ((uint64_t)G_030008_BASE_ADDRESS_HI(rview->resource_words[2])) << 32;
+                       prog_va -= rview_va;
+
+                       sbuf[idx].buffer_offset = prog_va & 0xffffffff;
+                       sbuf[idx].buffer_size = rview->resource_words[1] + 1;;
+               } else {
+                       sbuf[idx].buffer_offset = 0;
+                       sbuf[idx].buffer_size = 0;
+               }
+       }
+}
+
+static void evergreen_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
+{
+       struct r600_context *rctx = (struct r600_context *)ctx;
+       st->saved_compute = rctx->cs_shader_state.shader;
+
+       /* save constant buffer 0 */
+       evergreen_get_pipe_constant_buffer(rctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
+       /* save ssbo 0 */
+       evergreen_get_shader_buffers(rctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
+}
+
+
 void evergreen_init_state_functions(struct r600_context *rctx)
 {
        unsigned id = 1;
@@ -4302,6 +4421,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
         else
                 rctx->b.b.get_sample_position = cayman_get_sample_position;
        rctx->b.dma_copy = evergreen_dma_copy;
+       rctx->b.save_qbo_state = evergreen_save_qbo_state;
 
        evergreen_init_compute_state_functions(rctx);
 }
@@ -4336,7 +4456,7 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
        unsigned input_vertex_size, output_vertex_size;
        unsigned input_patch_size, pervertex_output_patch_size, output_patch_size;
        unsigned output_patch0_offset, perpatch_output_offset, lds_size;
-       uint32_t values[16];
+       uint32_t values[8];
        unsigned num_waves;
        unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
        unsigned wave_divisor = (16 * num_pipes);
@@ -4356,7 +4476,6 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
 
        if (rctx->lds_alloc != 0 &&
            rctx->last_ls == ls &&
-           !rctx->tess_state_dirty &&
            rctx->last_num_tcs_input_cp == num_tcs_input_cp &&
            rctx->last_tcs == tcs)
                return;
@@ -4403,17 +4522,12 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
 
        rctx->lds_alloc = (lds_size | (num_waves << 14));
 
-       memcpy(&values[8], rctx->tess_state, 6 * sizeof(float));
-       values[14] = 0;
-       values[15] = 0;
-
-       rctx->tess_state_dirty = false;
        rctx->last_ls = ls;
        rctx->last_tcs = tcs;
        rctx->last_num_tcs_input_cp = num_tcs_input_cp;
 
        constbuf.user_buffer = values;
-       constbuf.buffer_size = 16 * 4;
+       constbuf.buffer_size = 8 * 4;
 
        rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
                                      R600_LDS_INFO_CONST_BUFFER, &constbuf);