r600/sfn: Fix using the result of a fetch instruction in next fetch
[mesa.git] / src / gallium / drivers / r600 / evergreen_state.c
index 629385075b20e2248f352d92f8521073369a5707..9c103c59062630ceeb25042b7bab72d5d03aae9e 100644 (file)
@@ -239,23 +239,27 @@ static bool r600_is_zs_format_supported(enum pipe_format format)
        return r600_translate_dbformat(format) != ~0U;
 }
 
-boolean evergreen_is_format_supported(struct pipe_screen *screen,
-                                     enum pipe_format format,
-                                     enum pipe_texture_target target,
-                                     unsigned sample_count,
-                                     unsigned usage)
+bool evergreen_is_format_supported(struct pipe_screen *screen,
+                                  enum pipe_format format,
+                                  enum pipe_texture_target target,
+                                  unsigned sample_count,
+                                  unsigned storage_sample_count,
+                                  unsigned usage)
 {
        struct r600_screen *rscreen = (struct r600_screen*)screen;
        unsigned retval = 0;
 
        if (target >= PIPE_MAX_TEXTURE_TYPES) {
                R600_ERR("r600: unsupported texture type %d\n", target);
-               return FALSE;
+               return false;
        }
 
+       if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
+               return false;
+
        if (sample_count > 1) {
                if (!rscreen->has_msaa)
-                       return FALSE;
+                       return false;
 
                switch (sample_count) {
                case 2:
@@ -263,7 +267,7 @@ boolean evergreen_is_format_supported(struct pipe_screen *screen,
                case 8:
                        break;
                default:
-                       return FALSE;
+                       return false;
                }
        }
 
@@ -488,8 +492,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
                                S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
        rs->pa_cl_clip_cntl =
                S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) |
-               S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
-               S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
+               S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) |
+               S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) |
                S_028810_DX_LINEAR_ATTR_CLIP_ENA(1) |
                S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard);
        rs->multisample_enable = state->multisample;
@@ -572,11 +576,19 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
        unsigned max_aniso = rscreen->force_aniso >= 0 ? rscreen->force_aniso
                                                       : state->max_anisotropy;
        unsigned max_aniso_ratio = r600_tex_aniso_filter(max_aniso);
+       float max_lod = state->max_lod;
 
        if (!ss) {
                return NULL;
        }
 
+       /* If the min_mip_filter is NONE, then the texture has no mipmapping and
+        * MIP_FILTER will also be set to NONE. However, if more then one LOD is
+        * configured, then the texture lookup seems to fail for some specific texture
+        * formats. Forcing the number of LODs to one in this case fixes it. */
+       if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
+               max_lod = state->min_lod;
+
        ss->border_color_use = sampler_state_needs_border_color(state);
 
        /* R_03C000_SQ_TEX_SAMPLER_WORD0_0 */
@@ -593,7 +605,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx,
        /* R_03C004_SQ_TEX_SAMPLER_WORD1_0 */
        ss->tex_sampler_words[1] =
                S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
-               S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8));
+               S_03C004_MAX_LOD(S_FIXED(CLAMP(max_lod, 0, 15), 8));
        /* R_03C008_SQ_TEX_SAMPLER_WORD2_0 */
        ss->tex_sampler_words[2] =
                S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
@@ -686,7 +698,7 @@ texture_buffer_sampler_view(struct r600_context *rctx,
        view->tex_resource = &tmp->resource;
 
        if (tmp->resource.gpu_address)
-               LIST_ADDTAIL(&view->list, &rctx->texture_buffers);
+               list_addtail(&view->list, &rctx->texture_buffers);
        return &view->base;
 }
 
@@ -1296,7 +1308,7 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
        surf->cb_color_view = 0;
 
        /* Set the buffer range the GPU will have access to: */
-       util_range_add(&r600_resource(pipe_buffer)->valid_buffer_range,
+       util_range_add(pipe_buffer, &r600_resource(pipe_buffer)->valid_buffer_range,
                       0, pipe_buffer->width0);
 }
 
@@ -1591,7 +1603,7 @@ static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_sam
 }
 
 /* 8xMSAA */
-static uint32_t sample_locs_8x[] = {
+static const uint32_t sample_locs_8x[] = {
        FILL_SREG(-1,  1,  1,  5,  3, -5,  5,  3),
        FILL_SREG(-7, -1, -3, -7,  7, -3, -5,  7),
        FILL_SREG(-1,  1,  1,  5,  3, -5,  5,  3),
@@ -1856,7 +1868,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
                if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
                        cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
                                tex->cmask_buffer, RADEON_USAGE_READWRITE,
-                               RADEON_PRIO_CMASK);
+                               RADEON_PRIO_SEPARATE_META);
                } else {
                        cmask_reloc = reloc;
                }
@@ -2045,7 +2057,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
                radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
                radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
                reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, &rtex->resource,
-                                                 RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
+                                                 RADEON_USAGE_READWRITE, RADEON_PRIO_SEPARATE_META);
                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
                radeon_emit(cs, reloc_idx);
        } else {
@@ -2394,6 +2406,37 @@ static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r6
                                     EG_FETCH_CONSTANTS_OFFSET_CS + R600_MAX_CONST_BUFFERS, RADEON_CP_PACKET3_COMPUTE_MODE);
 }
 
+static void evergreen_convert_border_color(union pipe_color_union *in,
+                                           union pipe_color_union *out,
+                                           enum pipe_format format)
+{
+       if (util_format_is_pure_integer(format) &&
+                !util_format_is_depth_or_stencil(format)) {
+               const struct util_format_description *d = util_format_description(format);
+
+               for (int i = 0; i < d->nr_channels; ++i) {
+                       int cs = d->channel[i].size;
+                       if (d->channel[i].type == UTIL_FORMAT_TYPE_SIGNED)
+                               out->f[i] = (double)(in->i[i]) / ((1ul << (cs - 1)) - 1 );
+                       else if (d->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
+                               out->f[i] = (double)(in->ui[i]) / ((1ul << cs) - 1 );
+                       else
+                               out->f[i] = 0;
+               }
+
+       } else {
+               switch (format) {
+               case PIPE_FORMAT_X24S8_UINT:
+               case PIPE_FORMAT_X32_S8X24_UINT:
+                       out->f[0] = (double)(in->ui[0]) / 255.0;
+                       out->f[1] = out->f[2] = out->f[3] = 0.0f;
+                       break;
+               default:
+                       memcpy(out->f, in->f, 4 * sizeof(float));
+               }
+       }
+}
+
 static void evergreen_emit_sampler_states(struct r600_context *rctx,
                                struct r600_textures_info *texinfo,
                                unsigned resource_id_base,
@@ -2402,6 +2445,8 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
 {
        struct radeon_cmdbuf *cs = rctx->b.gfx.cs;
        uint32_t dirty_mask = texinfo->states.dirty_mask;
+       union pipe_color_union border_color = {{0,0,0,1}};
+       union pipe_color_union *border_color_ptr = &border_color;
 
        while (dirty_mask) {
                struct r600_pipe_sampler_state *rstate;
@@ -2410,6 +2455,16 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
                rstate = texinfo->states.states[i];
                assert(rstate);
 
+               if (rstate->border_color_use) {
+                       struct r600_pipe_sampler_view   *rview = texinfo->views.views[i];
+                       if (rview) {
+                               evergreen_convert_border_color(&rstate->border_color,
+                                                              &border_color, rview->base.format);
+                       } else {
+                               border_color_ptr = &rstate->border_color;
+                       }
+               }
+
                radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags);
                radeon_emit(cs, (resource_id_base + i) * 3);
                radeon_emit_array(cs, rstate->tex_sampler_words, 3);
@@ -2417,7 +2472,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
                if (rstate->border_color_use) {
                        radeon_set_config_reg_seq(cs, border_index_reg, 5);
                        radeon_emit(cs, i);
-                       radeon_emit_array(cs, rstate->border_color.ui, 4);
+                       radeon_emit_array(cs, border_color_ptr->ui, 4);
                }
        }
        texinfo->states.dirty_mask = 0;
@@ -3309,6 +3364,12 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
                                spi_baryc_cntl |= spi_baryc_enable_bit[k];
                                have_perspective |= k < 3;
                                have_linear |= !(k < 3);
+                               if (rshader->input[i].uses_interpolate_at_centroid) {
+                                       k = eg_get_interpolator_index(
+                                               rshader->input[i].interpolate,
+                                               TGSI_INTERPOLATE_LOC_CENTROID);
+                                       spi_baryc_cntl |= spi_baryc_enable_bit[k];
+                               }
                        }
                }
 
@@ -3799,9 +3860,9 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
                size = (cheight * pitch) / 4;
                /* emit reloc before writing cs so that cs is always in consistent state */
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource,
-                                     RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
+                                     RADEON_USAGE_READ, 0);
                radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource,
-                                     RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
+                                     RADEON_USAGE_WRITE, 0);
                radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size));
                radeon_emit(cs, base >> 8);
                radeon_emit(cs, (detile << 31) | (array_mode << 27) |
@@ -3962,7 +4023,7 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_atomic_buffer_state *astate;
-       int i, idx;
+       unsigned i, idx;
 
        astate = &rctx->atomic_buffer_state;
 
@@ -3975,7 +4036,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
 
                if (!buffers || !buffers[idx].buffer) {
                        pipe_resource_reference(&abuf->buffer, NULL);
-                       astate->enabled_mask &= ~(1 << i);
                        continue;
                }
                buf = &buffers[idx];
@@ -3983,14 +4043,14 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
                pipe_resource_reference(&abuf->buffer, buf->buffer);
                abuf->buffer_offset = buf->buffer_offset;
                abuf->buffer_size = buf->buffer_size;
-               astate->enabled_mask |= (1 << i);
        }
 }
 
 static void evergreen_set_shader_buffers(struct pipe_context *ctx,
                                         enum pipe_shader_type shader, unsigned start_slot,
                                         unsigned count,
-                                        const struct pipe_shader_buffer *buffers)
+                                        const struct pipe_shader_buffer *buffers,
+                                        unsigned writable_bitmask)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_image_state *istate = NULL;
@@ -3998,7 +4058,7 @@ static void evergreen_set_shader_buffers(struct pipe_context *ctx,
        struct r600_tex_color_info color;
        struct eg_buf_res_params buf_params;
        struct r600_resource *resource;
-       int i, idx;
+       unsigned i, idx;
        unsigned old_mask;
 
        if (shader != PIPE_SHADER_FRAGMENT &&
@@ -4092,7 +4152,7 @@ static void evergreen_set_shader_images(struct pipe_context *ctx,
                                        const struct pipe_image_view *images)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       int i;
+       unsigned i;
        struct r600_image_view *rview;
        struct pipe_resource *image;
        struct r600_resource *resource;
@@ -4813,20 +4873,15 @@ static void cayman_write_count_to_gds(struct r600_context *rctx,
        radeon_emit(cs, reloc);
 }
 
-bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
-                                       struct r600_pipe_shader *cs_shader,
-                                       struct r600_shader_atomic *combined_atomics,
-                                       uint8_t *atomic_used_mask_p)
+void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
+                                             struct r600_pipe_shader *cs_shader,
+                                             struct r600_shader_atomic *combined_atomics,
+                                             uint8_t *atomic_used_mask_p)
 {
-       struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
-       unsigned pkt_flags = 0;
        uint8_t atomic_used_mask = 0;
        int i, j, k;
        bool is_compute = cs_shader ? true : false;
 
-       if (is_compute)
-               pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
-
        for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
                uint8_t num_atomic_stage;
                struct r600_pipe_shader *pshader;
@@ -4859,8 +4914,25 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
                        }
                }
        }
+       *atomic_used_mask_p = atomic_used_mask;
+}
+
+void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
+                                       bool is_compute,
+                                       struct r600_shader_atomic *combined_atomics,
+                                       uint8_t atomic_used_mask)
+{
+       struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
+       unsigned pkt_flags = 0;
+       uint32_t mask;
+
+       if (is_compute)
+               pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
+
+       mask = atomic_used_mask;
+       if (!mask)
+               return;
 
-       uint32_t mask = atomic_used_mask;
        while (mask) {
                unsigned atomic_index = u_bit_scan(&mask);
                struct r600_shader_atomic *atomic = &combined_atomics[atomic_index];
@@ -4872,8 +4944,6 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
                else
                        evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags);
        }
-       *atomic_used_mask_p = atomic_used_mask;
-       return true;
 }
 
 void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
@@ -4885,7 +4955,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
        struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
        uint32_t pkt_flags = 0;
        uint32_t event = EVENT_TYPE_PS_DONE;
-       uint32_t mask = astate->enabled_mask;
+       uint32_t mask;
        uint64_t dst_offset;
        unsigned reloc;