C_SOURCES := \
- r600_blit.c \
- r600_buffer.c \
- r600_hw_context.c \
- radeonsi_pipe.c \
- r600_query.c \
- r600_resource.c \
- radeonsi_shader.c \
- r600_translate.c \
- radeonsi_pm4.c \
- radeonsi_compute.c \
+ si_blit.c \
+ si_buffer.c \
+ si_commands.c \
+ si_compute.c \
si_descriptors.c \
+ si_hw_context.c \
+ si_pipe.c \
+ si_pm4.c \
+ si_query.c \
+ si_resource.c \
+ si_shader.c \
si_state.c \
si_state_draw.c \
- si_commands.c \
- radeonsi_uvd.c
+ si_translate.c \
+ si_uvd.c
+++ /dev/null
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- */
-#ifndef R600_H
-#define R600_H
-
-#include "../../winsys/radeon/drm/radeon_winsys.h"
-#include "util/u_double_list.h"
-#include "util/u_transfer.h"
-
-#include "radeonsi_resource.h"
-
-struct winsys_handle;
-
-/* R600/R700 STATES */
-struct r600_query {
- union {
- uint64_t u64;
- boolean b;
- struct pipe_query_data_so_statistics so;
- } result;
- /* The kind of query */
- unsigned type;
- /* Offset of the first result for current query */
- unsigned results_start;
- /* Offset of the next free result after current query data */
- unsigned results_end;
- /* Size of the result in memory for both begin_query and end_query,
- * this can be one or two numbers, or it could even be a size of a structure. */
- unsigned result_size;
- /* The buffer where query results are stored. It's used as a ring,
- * data blocks for current query are stored sequentially from
- * results_start to results_end, with wrapping on the buffer end */
- struct r600_resource *buffer;
- /* The number of dwords for begin_query or end_query. */
- unsigned num_cs_dw;
- /* linked list of queries */
- struct list_head list;
-};
-
-struct r600_context;
-struct r600_screen;
-
-void si_get_backend_mask(struct r600_context *ctx);
-void si_context_flush(struct r600_context *ctx, unsigned flags);
-void si_begin_new_cs(struct r600_context *ctx);
-
-struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type);
-void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query);
-boolean r600_context_query_result(struct r600_context *ctx,
- struct r600_query *query,
- boolean wait, void *vresult);
-void r600_query_begin(struct r600_context *ctx, struct r600_query *query);
-void r600_query_end(struct r600_context *ctx, struct r600_query *query);
-void r600_context_queries_suspend(struct r600_context *ctx);
-void r600_context_queries_resume(struct r600_context *ctx);
-void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
- int flag_wait);
-
-bool si_is_timer_query(unsigned type);
-bool si_query_needs_begin(unsigned type);
-void si_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
-
-int si_context_init(struct r600_context *ctx);
-
-#endif
+++ /dev/null
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#include "util/u_surface.h"
-#include "util/u_blitter.h"
-#include "util/u_format.h"
-#include "radeonsi_pipe.h"
-#include "si_state.h"
-
-enum r600_blitter_op /* bitmask */
-{
- R600_SAVE_TEXTURES = 1,
- R600_SAVE_FRAMEBUFFER = 2,
- R600_DISABLE_RENDER_COND = 4,
-
- R600_CLEAR = 0,
-
- R600_CLEAR_SURFACE = R600_SAVE_FRAMEBUFFER,
-
- R600_COPY = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES |
- R600_DISABLE_RENDER_COND,
-
- R600_BLIT = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES |
- R600_DISABLE_RENDER_COND,
-
- R600_DECOMPRESS = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND,
-
- R600_COLOR_RESOLVE = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND
-};
-
-static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
-
- r600_context_queries_suspend(rctx);
-
- util_blitter_save_blend(rctx->blitter, rctx->queued.named.blend);
- util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->queued.named.dsa);
- util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref);
- util_blitter_save_rasterizer(rctx->blitter, rctx->queued.named.rasterizer);
- util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
- util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
- util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements);
- if (rctx->queued.named.viewport) {
- util_blitter_save_viewport(rctx->blitter, &rctx->queued.named.viewport->viewport);
- }
- util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer);
- util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets,
- (struct pipe_stream_output_target**)rctx->b.streamout.targets);
-
- if (op & R600_SAVE_FRAMEBUFFER)
- util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);
-
- if (op & R600_SAVE_TEXTURES) {
- util_blitter_save_fragment_sampler_states(
- rctx->blitter, rctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers,
- (void**)rctx->samplers[PIPE_SHADER_FRAGMENT].samplers);
-
- util_blitter_save_fragment_sampler_views(rctx->blitter,
- util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask &
- ((1 << NUM_TEX_UNITS) - 1)),
- rctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
- }
-
- if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) {
- rctx->saved_render_cond = rctx->current_render_cond;
- rctx->saved_render_cond_cond = rctx->current_render_cond_cond;
- rctx->saved_render_cond_mode = rctx->current_render_cond_mode;
- rctx->b.b.render_condition(&rctx->b.b, NULL, FALSE, 0);
- }
-
-}
-
-static void r600_blitter_end(struct pipe_context *ctx)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- if (rctx->saved_render_cond) {
- rctx->b.b.render_condition(&rctx->b.b,
- rctx->saved_render_cond,
- rctx->saved_render_cond_cond,
- rctx->saved_render_cond_mode);
- rctx->saved_render_cond = NULL;
- }
- r600_context_queries_resume(rctx);
-}
-
-static unsigned u_max_sample(struct pipe_resource *r)
-{
- return r->nr_samples ? r->nr_samples - 1 : 0;
-}
-
-static void r600_blit_decompress_depth(struct pipe_context *ctx,
- struct r600_texture *texture,
- struct r600_texture *staging,
- unsigned first_level, unsigned last_level,
- unsigned first_layer, unsigned last_layer,
- unsigned first_sample, unsigned last_sample)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- unsigned layer, level, sample, checked_last_layer, max_layer, max_sample;
- float depth = 1.0f;
- const struct util_format_description *desc;
- void **custom_dsa;
- struct r600_texture *flushed_depth_texture = staging ?
- staging : texture->flushed_depth_texture;
-
- if (!staging && !texture->dirty_level_mask)
- return;
-
- max_sample = u_max_sample(&texture->resource.b.b);
-
- desc = util_format_description(flushed_depth_texture->resource.b.b.format);
- switch (util_format_has_depth(desc) | util_format_has_stencil(desc) << 1) {
- default:
- assert(!"No depth or stencil to uncompress");
- return;
- case 3:
- custom_dsa = rctx->custom_dsa_flush_depth_stencil;
- break;
- case 2:
- custom_dsa = rctx->custom_dsa_flush_stencil;
- break;
- case 1:
- custom_dsa = rctx->custom_dsa_flush_depth;
- break;
- }
-
- for (level = first_level; level <= last_level; level++) {
- if (!staging && !(texture->dirty_level_mask & (1 << level)))
- continue;
-
- /* The smaller the mipmap level, the less layers there are
- * as far as 3D textures are concerned. */
- max_layer = util_max_layer(&texture->resource.b.b, level);
- checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
-
- for (layer = first_layer; layer <= checked_last_layer; layer++) {
- for (sample = first_sample; sample <= last_sample; sample++) {
- struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
-
- surf_tmpl.format = texture->resource.b.b.format;
- surf_tmpl.u.tex.level = level;
- surf_tmpl.u.tex.first_layer = layer;
- surf_tmpl.u.tex.last_layer = layer;
-
- zsurf = ctx->create_surface(ctx, &texture->resource.b.b, &surf_tmpl);
-
- surf_tmpl.format = flushed_depth_texture->resource.b.b.format;
- cbsurf = ctx->create_surface(ctx,
- (struct pipe_resource*)flushed_depth_texture, &surf_tmpl);
-
- r600_blitter_begin(ctx, R600_DECOMPRESS);
- util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, 1 << sample,
- custom_dsa[sample], depth);
- r600_blitter_end(ctx);
-
- pipe_surface_reference(&zsurf, NULL);
- pipe_surface_reference(&cbsurf, NULL);
- }
- }
-
- /* The texture will always be dirty if some layers aren't flushed.
- * I don't think this case can occur though. */
- if (!staging &&
- first_layer == 0 && last_layer == max_layer &&
- first_sample == 0 && last_sample == max_sample) {
- texture->dirty_level_mask &= ~(1 << level);
- }
- }
-}
-
-static void si_blit_decompress_depth_in_place(struct r600_context *rctx,
- struct r600_texture *texture,
- unsigned first_level, unsigned last_level,
- unsigned first_layer, unsigned last_layer)
-{
- struct pipe_surface *zsurf, surf_tmpl = {{0}};
- unsigned layer, max_layer, checked_last_layer, level;
-
- surf_tmpl.format = texture->resource.b.b.format;
-
- for (level = first_level; level <= last_level; level++) {
- if (!(texture->dirty_level_mask & (1 << level)))
- continue;
-
- surf_tmpl.u.tex.level = level;
-
- /* The smaller the mipmap level, the less layers there are
- * as far as 3D textures are concerned. */
- max_layer = util_max_layer(&texture->resource.b.b, level);
- checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
-
- for (layer = first_layer; layer <= checked_last_layer; layer++) {
- surf_tmpl.u.tex.first_layer = layer;
- surf_tmpl.u.tex.last_layer = layer;
-
- zsurf = rctx->b.b.create_surface(&rctx->b.b, &texture->resource.b.b, &surf_tmpl);
-
- r600_blitter_begin(&rctx->b.b, R600_DECOMPRESS);
- util_blitter_custom_depth_stencil(rctx->blitter, zsurf, NULL, ~0,
- rctx->custom_dsa_flush_inplace,
- 1.0f);
- r600_blitter_end(&rctx->b.b);
-
- pipe_surface_reference(&zsurf, NULL);
- }
-
- /* The texture will always be dirty if some layers aren't flushed.
- * I don't think this case occurs often though. */
- if (first_layer == 0 && last_layer == max_layer) {
- texture->dirty_level_mask &= ~(1 << level);
- }
- }
-}
-
-void si_flush_depth_textures(struct r600_context *rctx,
- struct r600_textures_info *textures)
-{
- unsigned i;
-
- for (i = 0; i < textures->n_views; ++i) {
- struct pipe_sampler_view *view;
- struct r600_texture *tex;
-
- view = textures->views.views[i];
- if (!view) continue;
-
- tex = (struct r600_texture *)view->texture;
- if (!tex->is_depth || tex->is_flushing_texture)
- continue;
-
- si_blit_decompress_depth_in_place(rctx, tex,
- view->u.tex.first_level, view->u.tex.last_level,
- 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
- }
-}
-
-static void r600_blit_decompress_color(struct pipe_context *ctx,
- struct r600_texture *rtex,
- unsigned first_level, unsigned last_level,
- unsigned first_layer, unsigned last_layer)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- unsigned layer, level, checked_last_layer, max_layer;
-
- if (!rtex->dirty_level_mask)
- return;
-
- for (level = first_level; level <= last_level; level++) {
- if (!(rtex->dirty_level_mask & (1 << level)))
- continue;
-
- /* The smaller the mipmap level, the less layers there are
- * as far as 3D textures are concerned. */
- max_layer = util_max_layer(&rtex->resource.b.b, level);
- checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
-
- for (layer = first_layer; layer <= checked_last_layer; layer++) {
- struct pipe_surface *cbsurf, surf_tmpl;
-
- surf_tmpl.format = rtex->resource.b.b.format;
- surf_tmpl.u.tex.level = level;
- surf_tmpl.u.tex.first_layer = layer;
- surf_tmpl.u.tex.last_layer = layer;
- cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
-
- r600_blitter_begin(ctx, R600_DECOMPRESS);
- util_blitter_custom_color(rctx->blitter, cbsurf,
- rctx->custom_blend_decompress);
- r600_blitter_end(ctx);
-
- pipe_surface_reference(&cbsurf, NULL);
- }
-
- /* The texture will always be dirty if some layers aren't flushed.
- * I don't think this case occurs often though. */
- if (first_layer == 0 && last_layer == max_layer) {
- rtex->dirty_level_mask &= ~(1 << level);
- }
- }
-}
-
-void r600_decompress_color_textures(struct r600_context *rctx,
- struct r600_textures_info *textures)
-{
- unsigned i;
- unsigned mask = textures->compressed_colortex_mask;
-
- while (mask) {
- struct pipe_sampler_view *view;
- struct r600_texture *tex;
-
- i = u_bit_scan(&mask);
-
- view = textures->views.views[i];
- assert(view);
-
- tex = (struct r600_texture *)view->texture;
- assert(tex->cmask.size || tex->fmask.size);
-
- r600_blit_decompress_color(&rctx->b.b, tex,
- view->u.tex.first_level, view->u.tex.last_level,
- 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
- }
-}
-
-static void r600_clear(struct pipe_context *ctx, unsigned buffers,
- const union pipe_color_union *color,
- double depth, unsigned stencil)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct pipe_framebuffer_state *fb = &rctx->framebuffer;
-
- r600_blitter_begin(ctx, R600_CLEAR);
- util_blitter_clear(rctx->blitter, fb->width, fb->height,
- util_framebuffer_get_num_layers(fb),
- buffers, color, depth, stencil);
- r600_blitter_end(ctx);
-}
-
-static void r600_clear_render_target(struct pipe_context *ctx,
- struct pipe_surface *dst,
- const union pipe_color_union *color,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
-
- r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
- util_blitter_clear_render_target(rctx->blitter, dst, color,
- dstx, dsty, width, height);
- r600_blitter_end(ctx);
-}
-
-static void r600_clear_depth_stencil(struct pipe_context *ctx,
- struct pipe_surface *dst,
- unsigned clear_flags,
- double depth,
- unsigned stencil,
- unsigned dstx, unsigned dsty,
- unsigned width, unsigned height)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
-
- r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
- util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil,
- dstx, dsty, width, height);
- r600_blitter_end(ctx);
-}
-
-/* Helper for decompressing a portion of a color or depth resource before
- * blitting if any decompression is needed.
- * The driver doesn't decompress resources automatically while u_blitter is
- * rendering. */
-static void r600_decompress_subresource(struct pipe_context *ctx,
- struct pipe_resource *tex,
- unsigned level,
- unsigned first_layer, unsigned last_layer)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_texture *rtex = (struct r600_texture*)tex;
-
- if (rtex->is_depth && !rtex->is_flushing_texture) {
- si_blit_decompress_depth_in_place(rctx, rtex,
- level, level,
- first_layer, last_layer);
- } else if (rtex->fmask.size || rtex->cmask.size) {
- r600_blit_decompress_color(ctx, rtex, level, level,
- first_layer, last_layer);
- }
-}
-
-struct texture_orig_info {
- unsigned format;
- unsigned width0;
- unsigned height0;
- unsigned npix_x;
- unsigned npix_y;
- unsigned npix0_x;
- unsigned npix0_y;
-};
-
-static void r600_compressed_to_blittable(struct pipe_resource *tex,
- unsigned level,
- struct texture_orig_info *orig)
-{
- struct r600_texture *rtex = (struct r600_texture*)tex;
- unsigned pixsize = util_format_get_blocksize(rtex->resource.b.b.format);
- int new_format;
- int new_height, new_width;
-
- orig->format = tex->format;
- orig->width0 = tex->width0;
- orig->height0 = tex->height0;
- orig->npix0_x = rtex->surface.level[0].npix_x;
- orig->npix0_y = rtex->surface.level[0].npix_y;
- orig->npix_x = rtex->surface.level[level].npix_x;
- orig->npix_y = rtex->surface.level[level].npix_y;
-
- if (pixsize == 8)
- new_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
- else
- new_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
-
- new_width = util_format_get_nblocksx(tex->format, orig->width0);
- new_height = util_format_get_nblocksy(tex->format, orig->height0);
-
- tex->width0 = new_width;
- tex->height0 = new_height;
- tex->format = new_format;
- rtex->surface.level[0].npix_x = util_format_get_nblocksx(orig->format, orig->npix0_x);
- rtex->surface.level[0].npix_y = util_format_get_nblocksy(orig->format, orig->npix0_y);
- rtex->surface.level[level].npix_x = util_format_get_nblocksx(orig->format, orig->npix_x);
- rtex->surface.level[level].npix_y = util_format_get_nblocksy(orig->format, orig->npix_y);
-
- /* By dividing the dimensions by 4, we effectively decrement
- * last_level by 2, therefore the last 2 mipmap levels disappear and
- * aren't blittable. Note that the last 3 mipmap levels (4x4, 2x2,
- * 1x1) have equal slice sizes, which is an important assumption
- * for this to work.
- *
- * In order to make the last 2 mipmap levels blittable, we have to
- * add the slice size of the last mipmap level to the texture
- * address, so that even though the hw thinks it reads last_level-2,
- * it will actually read last_level-1, and if we add the slice size*2,
- * it will read last_level. That's how this workaround works.
- */
- if (level > rtex->resource.b.b.last_level-2)
- rtex->mipmap_shift = level - (rtex->resource.b.b.last_level-2);
-}
-
-static void r600_change_format(struct pipe_resource *tex,
- unsigned level,
- struct texture_orig_info *orig,
- enum pipe_format format)
-{
- struct r600_texture *rtex = (struct r600_texture*)tex;
-
- orig->format = tex->format;
- orig->width0 = tex->width0;
- orig->height0 = tex->height0;
- orig->npix0_x = rtex->surface.level[0].npix_x;
- orig->npix0_y = rtex->surface.level[0].npix_y;
- orig->npix_x = rtex->surface.level[level].npix_x;
- orig->npix_y = rtex->surface.level[level].npix_y;
-
- tex->format = format;
-}
-
-static void r600_reset_blittable_to_orig(struct pipe_resource *tex,
- unsigned level,
- struct texture_orig_info *orig)
-{
- struct r600_texture *rtex = (struct r600_texture*)tex;
-
- tex->format = orig->format;
- tex->width0 = orig->width0;
- tex->height0 = orig->height0;
- rtex->surface.level[0].npix_x = orig->npix0_x;
- rtex->surface.level[0].npix_y = orig->npix0_y;
- rtex->surface.level[level].npix_x = orig->npix_x;
- rtex->surface.level[level].npix_y = orig->npix_y;
- rtex->mipmap_shift = 0;
-}
-
-static void r600_resource_copy_region(struct pipe_context *ctx,
- struct pipe_resource *dst,
- unsigned dst_level,
- unsigned dstx, unsigned dsty, unsigned dstz,
- struct pipe_resource *src,
- unsigned src_level,
- const struct pipe_box *src_box)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct texture_orig_info orig_info[2];
- struct pipe_box sbox;
- const struct pipe_box *psbox = src_box;
- boolean restore_orig[2];
-
- /* Fallback for buffers. */
- if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
- si_copy_buffer(rctx, dst, src, dstx, src_box->x, src_box->width);
- return;
- }
-
- memset(orig_info, 0, sizeof(orig_info));
-
- /* The driver doesn't decompress resources automatically while
- * u_blitter is rendering. */
- r600_decompress_subresource(ctx, src, src_level,
- src_box->z, src_box->z + src_box->depth - 1);
-
- restore_orig[0] = restore_orig[1] = FALSE;
-
- if (util_format_is_compressed(src->format) &&
- util_format_is_compressed(dst->format)) {
- r600_compressed_to_blittable(src, src_level, &orig_info[0]);
- restore_orig[0] = TRUE;
- sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x);
- sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y);
- sbox.z = src_box->z;
- sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width);
- sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height);
- sbox.depth = src_box->depth;
- psbox=&sbox;
-
- r600_compressed_to_blittable(dst, dst_level, &orig_info[1]);
- restore_orig[1] = TRUE;
- /* translate the dst box as well */
- dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
- dsty = util_format_get_nblocksy(orig_info[1].format, dsty);
- } else if (!util_blitter_is_copy_supported(rctx->blitter, dst, src)) {
- unsigned blocksize = util_format_get_blocksize(src->format);
-
- switch (blocksize) {
- case 1:
- r600_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8_UNORM);
- r600_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8_UNORM);
- break;
- case 2:
- r600_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8G8_UNORM);
- r600_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8G8_UNORM);
- break;
- case 4:
- r600_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8G8B8A8_UNORM);
- r600_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8G8B8A8_UNORM);
- break;
- case 8:
- r600_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R16G16B16A16_UINT);
- r600_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R16G16B16A16_UINT);
- break;
- case 16:
- r600_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R32G32B32A32_UINT);
- r600_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R32G32B32A32_UINT);
- break;
- default:
- fprintf(stderr, "Unhandled format %s with blocksize %u\n",
- util_format_short_name(src->format), blocksize);
- assert(0);
- }
- restore_orig[0] = TRUE;
- restore_orig[1] = TRUE;
- }
-
- r600_blitter_begin(ctx, R600_COPY);
- util_blitter_copy_texture(rctx->blitter, dst, dst_level, dstx, dsty, dstz,
- src, src_level, psbox);
- r600_blitter_end(ctx);
-
- if (restore_orig[0])
- r600_reset_blittable_to_orig(src, src_level, &orig_info[0]);
-
- if (restore_orig[1])
- r600_reset_blittable_to_orig(dst, dst_level, &orig_info[1]);
-}
-
-/* For MSAA integer resolving to work, we change the format to NORM using this function. */
-static enum pipe_format int_to_norm_format(enum pipe_format format)
-{
- switch (format) {
-#define REPLACE_FORMAT_SIGN(format,sign) \
- case PIPE_FORMAT_##format##_##sign##INT: \
- return PIPE_FORMAT_##format##_##sign##NORM
-#define REPLACE_FORMAT(format) \
- REPLACE_FORMAT_SIGN(format, U); \
- REPLACE_FORMAT_SIGN(format, S)
-
- REPLACE_FORMAT_SIGN(B10G10R10A2, U);
- REPLACE_FORMAT(R8);
- REPLACE_FORMAT(R8G8);
- REPLACE_FORMAT(R8G8B8X8);
- REPLACE_FORMAT(R8G8B8A8);
- REPLACE_FORMAT(A8);
- REPLACE_FORMAT(I8);
- REPLACE_FORMAT(L8);
- REPLACE_FORMAT(L8A8);
- REPLACE_FORMAT(R16);
- REPLACE_FORMAT(R16G16);
- REPLACE_FORMAT(R16G16B16X16);
- REPLACE_FORMAT(R16G16B16A16);
- REPLACE_FORMAT(A16);
- REPLACE_FORMAT(I16);
- REPLACE_FORMAT(L16);
- REPLACE_FORMAT(L16A16);
-
-#undef REPLACE_FORMAT
-#undef REPLACE_FORMAT_SIGN
- default:
- return format;
- }
-}
-
-static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
- const struct pipe_blit_info *info)
-{
- struct r600_context *rctx = (struct r600_context*)ctx;
- struct r600_texture *dst = (struct r600_texture*)info->dst.resource;
- unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
- unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
- enum pipe_format format = int_to_norm_format(info->dst.format);
- unsigned sample_mask = ~0;
-
- if (info->src.resource->nr_samples > 1 &&
- info->dst.resource->nr_samples <= 1 &&
- util_max_layer(info->src.resource, 0) == 0 &&
- util_max_layer(info->dst.resource, info->dst.level) == 0 &&
- info->dst.format == info->src.format &&
- !util_format_is_pure_integer(format) &&
- !util_format_is_depth_or_stencil(format) &&
- !info->scissor_enable &&
- (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA &&
- dst_width == info->src.resource->width0 &&
- dst_height == info->src.resource->height0 &&
- info->dst.box.x == 0 &&
- info->dst.box.y == 0 &&
- info->dst.box.width == dst_width &&
- info->dst.box.height == dst_height &&
- info->dst.box.depth == 1 &&
- info->src.box.x == 0 &&
- info->src.box.y == 0 &&
- info->src.box.width == dst_width &&
- info->src.box.height == dst_height &&
- info->src.box.depth == 1 &&
- dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
- !(dst->surface.flags & RADEON_SURF_SCANOUT)) {
- r600_blitter_begin(ctx, R600_COLOR_RESOLVE);
- util_blitter_custom_resolve_color(rctx->blitter,
- info->dst.resource, info->dst.level,
- info->dst.box.z,
- info->src.resource, info->src.box.z,
- sample_mask, rctx->custom_blend_resolve,
- format);
- r600_blitter_end(ctx);
- return true;
- }
- return false;
-}
-
-static void si_blit(struct pipe_context *ctx,
- const struct pipe_blit_info *info)
-{
- struct r600_context *rctx = (struct r600_context*)ctx;
-
- if (do_hardware_msaa_resolve(ctx, info)) {
- return;
- }
-
- assert(util_blitter_is_blit_supported(rctx->blitter, info));
-
- /* The driver doesn't decompress resources automatically while
- * u_blitter is rendering. */
- r600_decompress_subresource(ctx, info->src.resource, info->src.level,
- info->src.box.z,
- info->src.box.z + info->src.box.depth - 1);
-
- r600_blitter_begin(ctx, R600_BLIT);
- util_blitter_blit(rctx->blitter, info);
- r600_blitter_end(ctx);
-}
-
-static void si_flush_resource(struct pipe_context *ctx,
- struct pipe_resource *resource)
-{
-}
-
-void si_init_blit_functions(struct r600_context *rctx)
-{
- rctx->b.b.clear = r600_clear;
- rctx->b.b.clear_render_target = r600_clear_render_target;
- rctx->b.b.clear_depth_stencil = r600_clear_depth_stencil;
- rctx->b.b.resource_copy_region = r600_resource_copy_region;
- rctx->b.b.blit = si_blit;
- rctx->b.b.flush_resource = si_flush_resource;
- rctx->b.blit_decompress_depth = r600_blit_decompress_depth;
-}
+++ /dev/null
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- * Corbin Simpson <MostAwesomeDude@gmail.com>
- */
-
-#include "pipe/p_screen.h"
-#include "util/u_format.h"
-#include "util/u_math.h"
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-#include "util/u_upload_mgr.h"
-
-#include "r600.h"
-#include "radeonsi_pipe.h"
-
-void r600_upload_index_buffer(struct r600_context *rctx,
- struct pipe_index_buffer *ib, unsigned count)
-{
- u_upload_data(rctx->b.uploader, 0, count * ib->index_size,
- ib->user_buffer, &ib->offset, &ib->buffer);
-}
-
-void r600_upload_const_buffer(struct r600_context *rctx, struct r600_resource **rbuffer,
- const uint8_t *ptr, unsigned size,
- uint32_t *const_offset)
-{
- if (R600_BIG_ENDIAN) {
- uint32_t *tmpPtr;
- unsigned i;
-
- if (!(tmpPtr = malloc(size))) {
- R600_ERR("Failed to allocate BE swap buffer.\n");
- return;
- }
-
- for (i = 0; i < size / 4; ++i) {
- tmpPtr[i] = util_bswap32(((uint32_t *)ptr)[i]);
- }
-
- u_upload_data(rctx->b.uploader, 0, size, tmpPtr, const_offset,
- (struct pipe_resource**)rbuffer);
-
- free(tmpPtr);
- } else {
- u_upload_data(rctx->b.uploader, 0, size, ptr, const_offset,
- (struct pipe_resource**)rbuffer);
- }
-}
+++ /dev/null
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- */
-#include "../radeon/r600_cs.h"
-#include "radeonsi_pm4.h"
-#include "radeonsi_pipe.h"
-#include "sid.h"
-#include "util/u_memory.h"
-#include <errno.h>
-
-#define GROUP_FORCE_NEW_BLOCK 0
-
-/* Get backends mask */
-void si_get_backend_mask(struct r600_context *ctx)
-{
- struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
- struct r600_resource *buffer;
- uint32_t *results;
- unsigned num_backends = ctx->screen->b.info.r600_num_backends;
- unsigned i, mask = 0;
-
- /* if backend_map query is supported by the kernel */
- if (ctx->screen->b.info.r600_backend_map_valid) {
- unsigned num_tile_pipes = ctx->screen->b.info.r600_num_tile_pipes;
- unsigned backend_map = ctx->screen->b.info.r600_backend_map;
- unsigned item_width = 4, item_mask = 0x7;
-
- while(num_tile_pipes--) {
- i = backend_map & item_mask;
- mask |= (1<<i);
- backend_map >>= item_width;
- }
- if (mask != 0) {
- ctx->backend_mask = mask;
- return;
- }
- }
-
- /* otherwise backup path for older kernels */
-
- /* create buffer for event data */
- buffer = r600_resource_create_custom(&ctx->screen->b.b,
- PIPE_USAGE_STAGING,
- ctx->max_db*16);
- if (!buffer)
- goto err;
-
- /* initialize buffer with zeroes */
- results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
- if (results) {
- uint64_t va = 0;
-
- memset(results, 0, ctx->max_db * 4 * 4);
- ctx->b.ws->buffer_unmap(buffer->cs_buf);
-
- /* emit EVENT_WRITE for ZPASS_DONE */
- va = r600_resource_va(&ctx->screen->b.b, (void *)buffer);
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
- cs->buf[cs->cdw++] = va;
- cs->buf[cs->cdw++] = va >> 32;
-
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, buffer, RADEON_USAGE_WRITE);
-
- /* analyze results */
- results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_READ);
- if (results) {
- for(i = 0; i < ctx->max_db; i++) {
- /* at least highest bit will be set if backend is used */
- if (results[i*4 + 1])
- mask |= (1<<i);
- }
- ctx->b.ws->buffer_unmap(buffer->cs_buf);
- }
- }
-
- r600_resource_reference(&buffer, NULL);
-
- if (mask != 0) {
- ctx->backend_mask = mask;
- return;
- }
-
-err:
- /* fallback to old method - set num_backends lower bits to 1 */
- ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends);
- return;
-}
-
-bool si_is_timer_query(unsigned type)
-{
- return type == PIPE_QUERY_TIME_ELAPSED ||
- type == PIPE_QUERY_TIMESTAMP ||
- type == PIPE_QUERY_TIMESTAMP_DISJOINT;
-}
-
-bool si_query_needs_begin(unsigned type)
-{
- return type != PIPE_QUERY_TIMESTAMP;
-}
-
-/* initialize */
-void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
- boolean count_draw_in)
-{
- int i;
-
- /* The number of dwords we already used in the CS so far. */
- num_dw += ctx->b.rings.gfx.cs->cdw;
-
- for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
- if (ctx->atoms.array[i]->dirty) {
- num_dw += ctx->atoms.array[i]->num_dw;
- }
- }
-
- if (count_draw_in) {
- /* The number of dwords all the dirty states would take. */
- num_dw += ctx->pm4_dirty_cdwords;
-
- /* The upper-bound of how much a draw command would take. */
- num_dw += SI_MAX_DRAW_CS_DWORDS;
- }
-
- /* Count in queries_suspend. */
- num_dw += ctx->num_cs_dw_nontimer_queries_suspend;
-
- /* Count in streamout_end at the end of CS. */
- if (ctx->b.streamout.begin_emitted) {
- num_dw += ctx->b.streamout.num_dw_for_end;
- }
-
- /* Count in render_condition(NULL) at the end of CS. */
- if (ctx->predicate_drawing) {
- num_dw += 3;
- }
-
- /* Count in framebuffer cache flushes at the end of CS. */
- num_dw += ctx->atoms.cache_flush->num_dw;
-
-#if R600_TRACE_CS
- if (ctx->screen->trace_bo) {
- num_dw += R600_TRACE_CS_DWORDS;
- }
-#endif
-
- /* Flush if there's not enough space. */
- if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
- radeonsi_flush(&ctx->b.b, NULL, RADEON_FLUSH_ASYNC);
- }
-}
-
-void si_context_flush(struct r600_context *ctx, unsigned flags)
-{
- struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
-
- if (!cs->cdw)
- return;
-
- /* suspend queries */
- ctx->nontimer_queries_suspended = false;
- if (ctx->num_cs_dw_nontimer_queries_suspend) {
- r600_context_queries_suspend(ctx);
- ctx->nontimer_queries_suspended = true;
- }
-
- ctx->b.streamout.suspended = false;
-
- if (ctx->b.streamout.begin_emitted) {
- r600_emit_streamout_end(&ctx->b);
- ctx->b.streamout.suspended = true;
- }
-
- ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
- R600_CONTEXT_FLUSH_AND_INV_CB_META |
- R600_CONTEXT_FLUSH_AND_INV_DB |
- R600_CONTEXT_FLUSH_AND_INV_DB_META |
- R600_CONTEXT_INV_TEX_CACHE;
- si_emit_cache_flush(&ctx->b, NULL);
-
- /* this is probably not needed anymore */
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
-
- /* force to keep tiling flags */
- flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
-
-#if R600_TRACE_CS
- if (ctx->screen->trace_bo) {
- struct r600_screen *rscreen = ctx->screen;
- unsigned i;
-
- for (i = 0; i < cs->cdw; i++) {
- fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
- }
- rscreen->cs_count++;
- }
-#endif
-
- /* Flush the CS. */
- ctx->b.ws->cs_flush(ctx->b.rings.gfx.cs, flags, 0);
-
-#if R600_TRACE_CS
- if (ctx->screen->trace_bo) {
- struct r600_screen *rscreen = ctx->screen;
- unsigned i;
-
- for (i = 0; i < 10; i++) {
- usleep(5);
- if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
- break;
- }
- }
- if (i == 10) {
- fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
- rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
- } else {
- fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
- }
- }
-#endif
-
- si_begin_new_cs(ctx);
-}
-
-void si_begin_new_cs(struct r600_context *ctx)
-{
- ctx->pm4_dirty_cdwords = 0;
-
- /* Flush read caches at the beginning of CS. */
- ctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
- R600_CONTEXT_INV_CONST_CACHE |
- R600_CONTEXT_INV_SHADER_CACHE;
-
- /* set all valid group as dirty so they get reemited on
- * next draw command
- */
- si_pm4_reset_emitted(ctx);
-
- /* The CS initialization should be emitted before everything else. */
- si_pm4_emit(ctx, ctx->queued.named.init);
- ctx->emitted.named.init = ctx->queued.named.init;
-
- if (ctx->b.streamout.suspended) {
- ctx->b.streamout.append_bitmask = ctx->b.streamout.enabled_mask;
- r600_streamout_buffers_dirty(&ctx->b);
- }
-
- /* resume queries */
- if (ctx->nontimer_queries_suspended) {
- r600_context_queries_resume(ctx);
- }
-
- si_all_descriptors_begin_new_cs(ctx);
-}
-
-static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
- bool test_status_bit)
-{
- uint32_t *current_result = (uint32_t*)map;
- uint64_t start, end;
-
- start = (uint64_t)current_result[start_index] |
- (uint64_t)current_result[start_index+1] << 32;
- end = (uint64_t)current_result[end_index] |
- (uint64_t)current_result[end_index+1] << 32;
-
- if (!test_status_bit ||
- ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
- return end - start;
- }
- return 0;
-}
-
-static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
-{
- unsigned results_base = query->results_start;
- char *map;
-
- map = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs,
- PIPE_TRANSFER_READ |
- (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
- if (!map)
- return FALSE;
-
- /* count all results across all data blocks */
- switch (query->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- while (results_base != query->results_end) {
- query->result.u64 +=
- r600_query_read_result(map + results_base, 0, 2, true);
- results_base = (results_base + 16) % query->buffer->b.b.width0;
- }
- break;
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- while (results_base != query->results_end) {
- query->result.b = query->result.b ||
- r600_query_read_result(map + results_base, 0, 2, true) != 0;
- results_base = (results_base + 16) % query->buffer->b.b.width0;
- }
- break;
- case PIPE_QUERY_TIMESTAMP:
- {
- uint32_t *current_result = (uint32_t*)map;
- query->result.u64 = (uint64_t)current_result[0] | (uint64_t)current_result[1] << 32;
- break;
- }
- case PIPE_QUERY_TIME_ELAPSED:
- while (results_base != query->results_end) {
- query->result.u64 +=
- r600_query_read_result(map + results_base, 0, 2, false);
- results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
- }
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- /* SAMPLE_STREAMOUTSTATS stores this structure:
- * {
- * u64 NumPrimitivesWritten;
- * u64 PrimitiveStorageNeeded;
- * }
- * We only need NumPrimitivesWritten here. */
- while (results_base != query->results_end) {
- query->result.u64 +=
- r600_query_read_result(map + results_base, 2, 6, true);
- results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
- }
- break;
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- /* Here we read PrimitiveStorageNeeded. */
- while (results_base != query->results_end) {
- query->result.u64 +=
- r600_query_read_result(map + results_base, 0, 4, true);
- results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
- }
- break;
- case PIPE_QUERY_SO_STATISTICS:
- while (results_base != query->results_end) {
- query->result.so.num_primitives_written +=
- r600_query_read_result(map + results_base, 2, 6, true);
- query->result.so.primitives_storage_needed +=
- r600_query_read_result(map + results_base, 0, 4, true);
- results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
- }
- break;
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- while (results_base != query->results_end) {
- query->result.b = query->result.b ||
- r600_query_read_result(map + results_base, 2, 6, true) !=
- r600_query_read_result(map + results_base, 0, 4, true);
- results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
- }
- break;
- default:
- assert(0);
- }
-
- query->results_start = query->results_end;
- ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
- return TRUE;
-}
-
-void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
-{
- struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
- unsigned new_results_end, i;
- uint32_t *results;
- uint64_t va;
-
- si_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
-
- new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
-
- /* collect current results if query buffer is full */
- if (new_results_end == query->results_start) {
- r600_query_result(ctx, query, TRUE);
- }
-
- switch (query->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
- if (results) {
- results = (uint32_t*)((char*)results + query->results_end);
- memset(results, 0, query->result_size);
-
- /* Set top bits for unused backends */
- for (i = 0; i < ctx->max_db; i++) {
- if (!(ctx->backend_mask & (1<<i))) {
- results[(i * 4)+1] = 0x80000000;
- results[(i * 4)+3] = 0x80000000;
- }
- }
- ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
- }
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
- results = (uint32_t*)((char*)results + query->results_end);
- memset(results, 0, query->result_size);
- ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
- break;
- default:
- assert(0);
- }
-
- /* emit begin query */
- va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
- va += query->results_end;
-
- switch (query->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
- cs->buf[cs->cdw++] = va;
- cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
- cs->buf[cs->cdw++] = va;
- cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
- cs->buf[cs->cdw++] = va;
- cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
- cs->buf[cs->cdw++] = 0;
- cs->buf[cs->cdw++] = 0;
- break;
- default:
- assert(0);
- }
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE);
-
- if (!si_is_timer_query(query->type)) {
- ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
- }
-}
-
-void r600_query_end(struct r600_context *ctx, struct r600_query *query)
-{
- struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
- uint64_t va;
- unsigned new_results_end;
-
- /* The queries which need begin already called this in begin_query. */
- if (!si_query_needs_begin(query->type)) {
- si_need_cs_space(ctx, query->num_cs_dw, TRUE);
-
- new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
-
- /* collect current results if query buffer is full */
- if (new_results_end == query->results_start) {
- r600_query_result(ctx, query, TRUE);
- }
- }
-
- va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
- /* emit end query */
- switch (query->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- va += query->results_end + 8;
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
- cs->buf[cs->cdw++] = va;
- cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- va += query->results_end + query->result_size/2;
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
- cs->buf[cs->cdw++] = va;
- cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- va += query->results_end + query->result_size/2;
- /* fall through */
- case PIPE_QUERY_TIMESTAMP:
- cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
- cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
- cs->buf[cs->cdw++] = va;
- cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
- cs->buf[cs->cdw++] = 0;
- cs->buf[cs->cdw++] = 0;
- break;
- default:
- assert(0);
- }
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE);
-
- query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
-
- if (si_query_needs_begin(query->type) && !si_is_timer_query(query->type)) {
- ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
- }
-}
-
-void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
- int flag_wait)
-{
- struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
- uint64_t va;
-
- if (operation == PREDICATION_OP_CLEAR) {
- si_need_cs_space(ctx, 3, FALSE);
-
- cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
- cs->buf[cs->cdw++] = 0;
- cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR);
- } else {
- unsigned results_base = query->results_start;
- unsigned count;
- uint32_t op;
-
- /* find count of the query data blocks */
- count = (query->buffer->b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.width0;
- count /= query->result_size;
-
- si_need_cs_space(ctx, 5 * count, TRUE);
-
- op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
- (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
- va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
-
- /* emit predicate packets for all data blocks */
- while (results_base != query->results_end) {
- cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
- cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
- cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
- cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx,
- query->buffer, RADEON_USAGE_READ);
- results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
-
- /* set CONTINUE bit for all packets except the first */
- op |= PREDICATION_CONTINUE;
- }
- }
-}
-
-struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
-{
- struct r600_query *query;
- unsigned buffer_size = 4096;
-
- query = CALLOC_STRUCT(r600_query);
- if (query == NULL)
- return NULL;
-
- query->type = query_type;
-
- switch (query_type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- query->result_size = 16 * ctx->max_db;
- query->num_cs_dw = 6;
- break;
- case PIPE_QUERY_TIMESTAMP:
- query->result_size = 8;
- query->num_cs_dw = 8;
- break;
- case PIPE_QUERY_TIME_ELAPSED:
- query->result_size = 16;
- query->num_cs_dw = 8;
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
- query->result_size = 32;
- query->num_cs_dw = 6;
- break;
- default:
- assert(0);
- FREE(query);
- return NULL;
- }
-
- /* adjust buffer size to simplify offsets wrapping math */
- buffer_size -= buffer_size % query->result_size;
-
- /* Queries are normally read by the CPU after
- * being written by the gpu, hence staging is probably a good
- * usage pattern.
- */
- query->buffer = r600_resource_create_custom(&ctx->screen->b.b,
- PIPE_USAGE_STAGING,
- buffer_size);
- if (!query->buffer) {
- FREE(query);
- return NULL;
- }
- return query;
-}
-
-void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
-{
- r600_resource_reference(&query->buffer, NULL);
- free(query);
-}
-
-boolean r600_context_query_result(struct r600_context *ctx,
- struct r600_query *query,
- boolean wait, void *vresult)
-{
- boolean *result_b = (boolean*)vresult;
- uint64_t *result_u64 = (uint64_t*)vresult;
- struct pipe_query_data_so_statistics *result_so =
- (struct pipe_query_data_so_statistics*)vresult;
-
- if (!r600_query_result(ctx, query, wait))
- return FALSE;
-
- switch (query->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- *result_u64 = query->result.u64;
- break;
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- *result_b = query->result.b;
- break;
- case PIPE_QUERY_TIMESTAMP:
- case PIPE_QUERY_TIME_ELAPSED:
- *result_u64 = (1000000 * query->result.u64) / ctx->screen->b.info.r600_clock_crystal_freq;
- break;
- case PIPE_QUERY_SO_STATISTICS:
- *result_so = query->result.so;
- break;
- default:
- assert(0);
- }
- return TRUE;
-}
-
-void r600_context_queries_suspend(struct r600_context *ctx)
-{
- struct r600_query *query;
-
- LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_query_list, list) {
- r600_query_end(ctx, query);
- }
- assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
-}
-
-void r600_context_queries_resume(struct r600_context *ctx)
-{
- struct r600_query *query;
-
- assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
-
- LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_query_list, list) {
- r600_query_begin(ctx, query);
- }
-}
-
-#if R600_TRACE_CS
-void r600_trace_emit(struct r600_context *rctx)
-{
- struct r600_screen *rscreen = rctx->screen;
- struct radeon_winsys_cs *cs = rctx->cs;
- uint64_t va;
-
- va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
- r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
- cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
- cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
- PKT3_WRITE_DATA_WR_CONFIRM |
- PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
- cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
- cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
- cs->buf[cs->cdw++] = cs->cdw;
- cs->buf[cs->cdw++] = rscreen->cs_count;
-}
-#endif
+++ /dev/null
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#include "radeonsi_pipe.h"
-#include "sid.h"
-
-static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
-
- return (struct pipe_query*)r600_context_query_create(rctx, query_type);
-}
-
-static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
-
- r600_context_query_destroy(rctx, (struct r600_query *)query);
-}
-
-static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
-
- if (!si_query_needs_begin(rquery->type)) {
- assert(0);
- return;
- }
-
- memset(&rquery->result, 0, sizeof(rquery->result));
- rquery->results_start = rquery->results_end;
- r600_query_begin(rctx, (struct r600_query *)query);
-
- if (!si_is_timer_query(rquery->type)) {
- LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_query_list);
- }
-}
-
-static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
-
- if (!si_query_needs_begin(rquery->type)) {
- memset(&rquery->result, 0, sizeof(rquery->result));
- }
-
- r600_query_end(rctx, rquery);
-
- if (si_query_needs_begin(rquery->type) && !si_is_timer_query(rquery->type)) {
- LIST_DELINIT(&rquery->list);
- }
-}
-
-static boolean r600_get_query_result(struct pipe_context *ctx,
- struct pipe_query *query,
- boolean wait, union pipe_query_result *vresult)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
-
- return r600_context_query_result(rctx, rquery, wait, vresult);
-}
-
-static void r600_render_condition(struct pipe_context *ctx,
- struct pipe_query *query,
- boolean condition,
- uint mode)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct r600_query *rquery = (struct r600_query *)query;
- int wait_flag = 0;
-
- /* If we already have nonzero result, render unconditionally */
- if (query != NULL && rquery->result.u64 != 0) {
- if (rctx->current_render_cond) {
- r600_render_condition(ctx, NULL, FALSE, 0);
- }
- return;
- }
-
- rctx->current_render_cond = query;
- rctx->current_render_cond_cond = condition;
- rctx->current_render_cond_mode = mode;
-
- if (query == NULL) {
- if (rctx->predicate_drawing) {
- rctx->predicate_drawing = false;
- r600_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, 1);
- }
- return;
- }
-
- if (mode == PIPE_RENDER_COND_WAIT ||
- mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
- wait_flag = 1;
- }
-
- rctx->predicate_drawing = true;
-
- switch (rquery->type) {
- case PIPE_QUERY_OCCLUSION_COUNTER:
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- r600_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
- break;
- case PIPE_QUERY_PRIMITIVES_EMITTED:
- case PIPE_QUERY_PRIMITIVES_GENERATED:
- case PIPE_QUERY_SO_STATISTICS:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- r600_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
- break;
- default:
- assert(0);
- }
-}
-
-void r600_init_query_functions(struct r600_context *rctx)
-{
- rctx->b.b.create_query = r600_create_query;
- rctx->b.b.destroy_query = r600_destroy_query;
- rctx->b.b.begin_query = r600_begin_query;
- rctx->b.b.end_query = r600_end_query;
- rctx->b.b.get_query_result = r600_get_query_result;
-
- if (rctx->screen->b.info.r600_num_backends > 0)
- rctx->b.b.render_condition = r600_render_condition;
-}
+++ /dev/null
-/*
- * Copyright 2010 Marek Olšák <maraeo@gmail.com
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "radeonsi_pipe.h"
-
-static struct pipe_resource *r600_resource_create(struct pipe_screen *screen,
- const struct pipe_resource *templ)
-{
- if (templ->target == PIPE_BUFFER) {
- return r600_buffer_create(screen, templ, 4096);
- } else {
- return r600_texture_create(screen, templ);
- }
-}
-
-static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * screen,
- const struct pipe_resource *templ,
- struct winsys_handle *whandle)
-{
- if (templ->target == PIPE_BUFFER) {
- return NULL;
- } else {
- return r600_texture_from_handle(screen, templ, whandle);
- }
-}
-
-void r600_init_screen_resource_functions(struct pipe_screen *screen)
-{
- screen->resource_create = r600_resource_create;
- screen->resource_from_handle = r600_resource_from_handle;
- screen->resource_get_handle = u_resource_get_handle_vtbl;
- screen->resource_destroy = u_resource_destroy_vtbl;
-}
-
-void r600_init_context_resource_functions(struct r600_context *r600)
-{
- r600->b.b.transfer_map = u_transfer_map_vtbl;
- r600->b.b.transfer_flush_region = u_default_transfer_flush_region;
- r600->b.b.transfer_unmap = u_transfer_unmap_vtbl;
- r600->b.b.transfer_inline_write = u_default_transfer_inline_write;
-}
+++ /dev/null
-/*
- * Copyright 2010 Marek Olšák <maraeo@gmail.com
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#ifndef R600_RESOURCE_H
-#define R600_RESOURCE_H
-
-#include "../radeon/r600_pipe_common.h"
-
-struct r600_surface {
- struct pipe_surface base;
-};
-
-void r600_init_screen_resource_functions(struct pipe_screen *screen);
-
-struct r600_context;
-
-void r600_upload_const_buffer(struct r600_context *rctx, struct r600_resource **rbuffer,
- const uint8_t *ptr, unsigned size,
- uint32_t *const_offset);
-
-#endif
+++ /dev/null
-/*
- * Copyright 2010 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Dave Airlie <airlied@redhat.com>
- */
-
-#include "util/u_index_modify.h"
-#include "util/u_upload_mgr.h"
-#include "radeonsi_pipe.h"
-
-
-void r600_translate_index_buffer(struct r600_context *r600,
- struct pipe_index_buffer *ib,
- unsigned count)
-{
- struct pipe_resource *out_buffer = NULL;
- unsigned out_offset;
- void *ptr;
-
- switch (ib->index_size) {
- case 1:
- u_upload_alloc(r600->b.uploader, 0, count * 2,
- &out_offset, &out_buffer, &ptr);
-
- util_shorten_ubyte_elts_to_userptr(
- &r600->b.b, ib, 0, ib->offset, count, ptr);
-
- pipe_resource_reference(&ib->buffer, NULL);
- ib->buffer = out_buffer;
- ib->offset = out_offset;
- ib->index_size = 2;
- break;
- }
-}
+++ /dev/null
-#include "util/u_memory.h"
-
-#include "../radeon/r600_cs.h"
-#include "radeonsi_pipe.h"
-#include "radeonsi_shader.h"
-
-#include "radeon_llvm_util.h"
-
-#define MAX_GLOBAL_BUFFERS 20
-
-struct si_pipe_compute {
- struct r600_context *ctx;
-
- unsigned local_size;
- unsigned private_size;
- unsigned input_size;
- unsigned num_kernels;
- struct si_pipe_shader *kernels;
- unsigned num_user_sgprs;
-
- struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
-
- LLVMContextRef llvm_ctx;
-};
-
-static void *radeonsi_create_compute_state(
- struct pipe_context *ctx,
- const struct pipe_compute_state *cso)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct si_pipe_compute *program =
- CALLOC_STRUCT(si_pipe_compute);
- const struct pipe_llvm_program_header *header;
- const unsigned char *code;
- unsigned i;
-
- program->llvm_ctx = LLVMContextCreate();
-
- header = cso->prog;
- code = cso->prog + sizeof(struct pipe_llvm_program_header);
-
- program->ctx = rctx;
- program->local_size = cso->req_local_mem;
- program->private_size = cso->req_private_mem;
- program->input_size = cso->req_input_mem;
-
- program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code,
- header->num_bytes);
- program->kernels = CALLOC(sizeof(struct si_pipe_shader),
- program->num_kernels);
- for (i = 0; i < program->num_kernels; i++) {
- LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
- code, header->num_bytes);
- si_compile_llvm(rctx, &program->kernels[i], mod);
- LLVMDisposeModule(mod);
- }
-
- return program;
-}
-
-static void radeonsi_bind_compute_state(struct pipe_context *ctx, void *state)
-{
- struct r600_context *rctx = (struct r600_context*)ctx;
- rctx->cs_shader_state.program = (struct si_pipe_compute*)state;
-}
-
-static void radeonsi_set_global_binding(
- struct pipe_context *ctx, unsigned first, unsigned n,
- struct pipe_resource **resources,
- uint32_t **handles)
-{
- unsigned i;
- struct r600_context *rctx = (struct r600_context*)ctx;
- struct si_pipe_compute *program = rctx->cs_shader_state.program;
-
- if (!resources) {
- for (i = first; i < first + n; i++) {
- program->global_buffers[i] = NULL;
- }
- return;
- }
-
- for (i = first; i < first + n; i++) {
- uint64_t va;
- program->global_buffers[i] = resources[i];
- va = r600_resource_va(ctx->screen, resources[i]);
- memcpy(handles[i], &va, sizeof(va));
- }
-}
-
-static void radeonsi_launch_grid(
- struct pipe_context *ctx,
- const uint *block_layout, const uint *grid_layout,
- uint32_t pc, const void *input)
-{
- struct r600_context *rctx = (struct r600_context*)ctx;
- struct si_pipe_compute *program = rctx->cs_shader_state.program;
- struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
- struct r600_resource *kernel_args_buffer = NULL;
- unsigned kernel_args_size;
- unsigned num_work_size_bytes = 36;
- uint32_t kernel_args_offset = 0;
- uint32_t *kernel_args;
- uint64_t kernel_args_va;
- uint64_t shader_va;
- unsigned arg_user_sgpr_count = 2;
- unsigned i;
- struct si_pipe_shader *shader = &program->kernels[pc];
- unsigned lds_blocks;
-
- pm4->compute_pkt = true;
- si_cmd_context_control(pm4);
-
- si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
- si_pm4_cmd_add(pm4, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) |
- EVENT_INDEX(0x7) |
- EVENT_WRITE_INV_L2);
- si_pm4_cmd_end(pm4, false);
-
- si_pm4_inval_texture_cache(pm4);
- si_pm4_inval_shader_cache(pm4);
- si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
-
- /* Upload the kernel arguments */
-
- /* The extra num_work_size_bytes are for work group / work item size information */
- kernel_args_size = program->input_size + num_work_size_bytes;
- kernel_args = MALLOC(kernel_args_size);
- for (i = 0; i < 3; i++) {
- kernel_args[i] = grid_layout[i];
- kernel_args[i + 3] = grid_layout[i] * block_layout[i];
- kernel_args[i + 6] = block_layout[i];
- }
-
- memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size);
-
- r600_upload_const_buffer(rctx, &kernel_args_buffer, (uint8_t*)kernel_args,
- kernel_args_size, &kernel_args_offset);
- kernel_args_va = r600_resource_va(ctx->screen,
- (struct pipe_resource*)kernel_args_buffer);
- kernel_args_va += kernel_args_offset;
-
- si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ);
-
- si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
- si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
-
- si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
- si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
- si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0);
-
- si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
- S_00B81C_NUM_THREAD_FULL(block_layout[0]));
- si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
- S_00B820_NUM_THREAD_FULL(block_layout[1]));
- si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z,
- S_00B824_NUM_THREAD_FULL(block_layout[2]));
-
- /* Global buffers */
- for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
- struct r600_resource *buffer =
- (struct r600_resource*)program->global_buffers[i];
- if (!buffer) {
- continue;
- }
- si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE);
- }
-
- /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
- * and is now per pipe, so it should be handled in the
- * kernel if we want to use something other than the default value,
- * which is now 0x22f.
- */
- if (rctx->b.chip_class <= SI) {
- /* XXX: This should be:
- * (number of compute units) * 4 * (waves per simd) - 1 */
-
- si_pm4_set_reg(pm4, R_00B82C_COMPUTE_MAX_WAVE_ID,
- 0x190 /* Default value */);
- }
-
- shader_va = r600_resource_va(ctx->screen, (void *)shader->bo);
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
- si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
- si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
-
- si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1,
- /* We always use at least 3 VGPRS, these come from
- * TIDIG_COMP_CNT.
- * XXX: The compiler should account for this.
- */
- S_00B848_VGPRS((MAX2(3, shader->num_vgprs) - 1) / 4)
- /* We always use at least 4 + arg_user_sgpr_count. The 4 extra
- * sgprs are from TGID_X_EN, TGID_Y_EN, TGID_Z_EN, TG_SIZE_EN
- * XXX: The compiler should account for this.
- */
- | S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count,
- shader->num_sgprs)) - 1) / 8))
- ;
-
- lds_blocks = shader->lds_size;
- /* XXX: We are over allocating LDS. For SI, the shader reports LDS in
- * blocks of 256 bytes, so if there are 4 bytes lds allocated in
- * the shader and 4 bytes allocated by the state tracker, then
- * we will set LDS_SIZE to 512 bytes rather than 256.
- */
- if (rctx->b.chip_class <= SI) {
- lds_blocks += align(program->local_size, 256) >> 8;
- } else {
- lds_blocks += align(program->local_size, 512) >> 9;
- }
-
- assert(lds_blocks <= 0xFF);
-
- si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2,
- S_00B84C_SCRATCH_EN(0)
- | S_00B84C_USER_SGPR(arg_user_sgpr_count)
- | S_00B84C_TGID_X_EN(1)
- | S_00B84C_TGID_Y_EN(1)
- | S_00B84C_TGID_Z_EN(1)
- | S_00B84C_TG_SIZE_EN(1)
- | S_00B84C_TIDIG_COMP_CNT(2)
- | S_00B84C_LDS_SIZE(lds_blocks)
- | S_00B84C_EXCP_EN(0))
- ;
- si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);
-
- si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0,
- S_00B858_SH0_CU_EN(0xffff /* Default value */)
- | S_00B858_SH1_CU_EN(0xffff /* Default value */))
- ;
-
- si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1,
- S_00B85C_SH0_CU_EN(0xffff /* Default value */)
- | S_00B85C_SH1_CU_EN(0xffff /* Default value */))
- ;
-
- si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
- si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */
- si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */
- si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */
- si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */
- si_pm4_cmd_end(pm4, false);
-
- si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
- si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(0x4)));
- si_pm4_cmd_end(pm4, false);
-
- si_pm4_inval_texture_cache(pm4);
- si_pm4_inval_shader_cache(pm4);
- si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
-
- si_pm4_emit(rctx, pm4);
-
-#if 0
- fprintf(stderr, "cdw: %i\n", rctx->cs->cdw);
- for (i = 0; i < rctx->cs->cdw; i++) {
- fprintf(stderr, "%4i : 0x%08X\n", i, rctx->cs->buf[i]);
- }
-#endif
-
- FREE(pm4);
- FREE(kernel_args);
-}
-
-
-static void si_delete_compute_state(struct pipe_context *ctx, void* state){
- struct si_pipe_compute *program = (struct si_pipe_compute *)state;
-
- if (!state) {
- return;
- }
-
- if (program->kernels) {
- FREE(program->kernels);
- }
-
- if (program->llvm_ctx){
- LLVMContextDispose(program->llvm_ctx);
- }
-
- //And then free the program itself.
- FREE(program);
-}
-
-static void si_set_compute_resources(struct pipe_context * ctx_,
- unsigned start, unsigned count,
- struct pipe_surface ** surfaces) { }
-
-void si_init_compute_functions(struct r600_context *rctx)
-{
- rctx->b.b.create_compute_state = radeonsi_create_compute_state;
- rctx->b.b.delete_compute_state = si_delete_compute_state;
- rctx->b.b.bind_compute_state = radeonsi_bind_compute_state;
-/* ctx->context.create_sampler_view = evergreen_compute_create_sampler_view; */
- rctx->b.b.set_compute_resources = si_set_compute_resources;
- rctx->b.b.set_global_binding = radeonsi_set_global_binding;
- rctx->b.b.launch_grid = radeonsi_launch_grid;
-}
+++ /dev/null
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#include <stdio.h>
-#include <errno.h>
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-#include "pipe/p_context.h"
-#include "tgsi/tgsi_scan.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_util.h"
-#include "util/u_blitter.h"
-#include "util/u_double_list.h"
-#include "util/u_format.h"
-#include "util/u_transfer.h"
-#include "util/u_surface.h"
-#include "util/u_pack_color.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/u_simple_shaders.h"
-#include "util/u_upload_mgr.h"
-#include "vl/vl_decoder.h"
-#include "vl/vl_video_buffer.h"
-#include "os/os_time.h"
-#include "pipebuffer/pb_buffer.h"
-#include "radeonsi_pipe.h"
-#include "radeon/radeon_uvd.h"
-#include "r600.h"
-#include "sid.h"
-#include "r600_resource.h"
-#include "radeonsi_pipe.h"
-#include "si_state.h"
-#include "../radeon/r600_cs.h"
-
-/*
- * pipe_context
- */
-void radeonsi_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
- unsigned flags)
-{
- struct r600_context *rctx = (struct r600_context *)ctx;
- struct pipe_query *render_cond = NULL;
- boolean render_cond_cond = FALSE;
- unsigned render_cond_mode = 0;
-
- if (fence) {
- *fence = rctx->b.ws->cs_create_fence(rctx->b.rings.gfx.cs);
- }
-
- /* Disable render condition. */
- if (rctx->current_render_cond) {
- render_cond = rctx->current_render_cond;
- render_cond_cond = rctx->current_render_cond_cond;
- render_cond_mode = rctx->current_render_cond_mode;
- ctx->render_condition(ctx, NULL, FALSE, 0);
- }
-
- si_context_flush(rctx, flags);
-
- /* Re-enable render condition. */
- if (render_cond) {
- ctx->render_condition(ctx, render_cond, render_cond_cond, render_cond_mode);
- }
-}
-
-static void r600_flush_from_st(struct pipe_context *ctx,
- struct pipe_fence_handle **fence,
- unsigned flags)
-{
- radeonsi_flush(ctx, fence,
- flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0);
-}
-
-static void r600_flush_from_winsys(void *ctx, unsigned flags)
-{
- radeonsi_flush((struct pipe_context*)ctx, NULL, flags);
-}
-
-static void r600_destroy_context(struct pipe_context *context)
-{
- struct r600_context *rctx = (struct r600_context *)context;
-
- si_release_all_descriptors(rctx);
-
- pipe_resource_reference(&rctx->null_const_buf.buffer, NULL);
- r600_resource_reference(&rctx->border_color_table, NULL);
-
- if (rctx->dummy_pixel_shader) {
- rctx->b.b.delete_fs_state(&rctx->b.b, rctx->dummy_pixel_shader);
- }
- for (int i = 0; i < 8; i++) {
- rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_depth_stencil[i]);
- rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_depth[i]);
- rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_stencil[i]);
- }
- rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_inplace);
- rctx->b.b.delete_blend_state(&rctx->b.b, rctx->custom_blend_resolve);
- rctx->b.b.delete_blend_state(&rctx->b.b, rctx->custom_blend_decompress);
- util_unreference_framebuffer_state(&rctx->framebuffer);
-
- util_blitter_destroy(rctx->blitter);
-
- r600_common_context_cleanup(&rctx->b);
- FREE(rctx);
-}
-
-static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
-{
- struct r600_context *rctx = CALLOC_STRUCT(r600_context);
- struct r600_screen* rscreen = (struct r600_screen *)screen;
- int shader, i;
-
- if (rctx == NULL)
- return NULL;
-
- if (!r600_common_context_init(&rctx->b, &rscreen->b))
- goto fail;
-
- rctx->b.b.screen = screen;
- rctx->b.b.priv = priv;
- rctx->b.b.destroy = r600_destroy_context;
- rctx->b.b.flush = r600_flush_from_st;
-
- /* Easy accessing of screen/winsys. */
- rctx->screen = rscreen;
-
- si_init_blit_functions(rctx);
- r600_init_query_functions(rctx);
- r600_init_context_resource_functions(rctx);
- si_init_compute_functions(rctx);
-
- if (rscreen->b.info.has_uvd) {
- rctx->b.b.create_video_codec = radeonsi_uvd_create_decoder;
- rctx->b.b.create_video_buffer = radeonsi_video_buffer_create;
- } else {
- rctx->b.b.create_video_codec = vl_create_decoder;
- rctx->b.b.create_video_buffer = vl_video_buffer_create;
- }
-
- rctx->b.rings.gfx.cs = rctx->b.ws->cs_create(rctx->b.ws, RING_GFX, NULL);
- rctx->b.rings.gfx.flush = r600_flush_from_winsys;
-
- si_init_all_descriptors(rctx);
-
- /* Initialize cache_flush. */
- rctx->cache_flush = si_atom_cache_flush;
- rctx->atoms.cache_flush = &rctx->cache_flush;
-
- rctx->atoms.streamout_begin = &rctx->b.streamout.begin_atom;
-
- switch (rctx->b.chip_class) {
- case SI:
- case CIK:
- si_init_state_functions(rctx);
- LIST_INITHEAD(&rctx->active_nontimer_query_list);
- rctx->max_db = 8;
- si_init_config(rctx);
- break;
- default:
- R600_ERR("Unsupported chip class %d.\n", rctx->b.chip_class);
- goto fail;
- }
-
- rctx->b.ws->cs_set_flush_callback(rctx->b.rings.gfx.cs, r600_flush_from_winsys, rctx);
-
- rctx->blitter = util_blitter_create(&rctx->b.b);
- if (rctx->blitter == NULL)
- goto fail;
-
- rctx->dummy_pixel_shader =
- util_make_fragment_cloneinput_shader(&rctx->b.b, 0,
- TGSI_SEMANTIC_GENERIC,
- TGSI_INTERPOLATE_CONSTANT);
- rctx->b.b.bind_fs_state(&rctx->b.b, rctx->dummy_pixel_shader);
-
- /* these must be last */
- si_begin_new_cs(rctx);
- si_get_backend_mask(rctx);
-
- /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
- * with a NULL buffer). We need to use a dummy buffer instead. */
- if (rctx->b.chip_class == CIK) {
- rctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
- PIPE_USAGE_STATIC, 16);
- rctx->null_const_buf.buffer_size = rctx->null_const_buf.buffer->width0;
-
- for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
- for (i = 0; i < NUM_CONST_BUFFERS; i++) {
- rctx->b.b.set_constant_buffer(&rctx->b.b, shader, i,
- &rctx->null_const_buf);
- }
- }
-
- /* Clear the NULL constant buffer, because loads should return zeros. */
- rctx->b.clear_buffer(&rctx->b.b, rctx->null_const_buf.buffer, 0,
- rctx->null_const_buf.buffer->width0, 0);
- }
-
- return &rctx->b.b;
-fail:
- r600_destroy_context(&rctx->b.b);
- return NULL;
-}
-
-/*
- * pipe_screen
- */
-static const char* r600_get_vendor(struct pipe_screen* pscreen)
-{
- return "X.Org";
-}
-
-const char *r600_get_llvm_processor_name(enum radeon_family family)
-{
- switch (family) {
- case CHIP_TAHITI: return "tahiti";
- case CHIP_PITCAIRN: return "pitcairn";
- case CHIP_VERDE: return "verde";
- case CHIP_OLAND: return "oland";
-#if HAVE_LLVM <= 0x0303
- default: return "SI";
-#else
- case CHIP_HAINAN: return "hainan";
- case CHIP_BONAIRE: return "bonaire";
- case CHIP_KABINI: return "kabini";
- case CHIP_KAVERI: return "kaveri";
- case CHIP_HAWAII: return "hawaii";
- default: return "";
-#endif
- }
-}
-
-static const char *r600_get_family_name(enum radeon_family family)
-{
- switch(family) {
- case CHIP_TAHITI: return "AMD TAHITI";
- case CHIP_PITCAIRN: return "AMD PITCAIRN";
- case CHIP_VERDE: return "AMD CAPE VERDE";
- case CHIP_OLAND: return "AMD OLAND";
- case CHIP_HAINAN: return "AMD HAINAN";
- case CHIP_BONAIRE: return "AMD BONAIRE";
- case CHIP_KAVERI: return "AMD KAVERI";
- case CHIP_KABINI: return "AMD KABINI";
- case CHIP_HAWAII: return "AMD HAWAII";
- default: return "AMD unknown";
- }
-}
-
-static const char* r600_get_name(struct pipe_screen* pscreen)
-{
- struct r600_screen *rscreen = (struct r600_screen *)pscreen;
-
- return r600_get_family_name(rscreen->b.family);
-}
-
-static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
-{
- struct r600_screen *rscreen = (struct r600_screen *)pscreen;
-
- switch (param) {
- /* Supported features (boolean caps). */
- case PIPE_CAP_TWO_SIDED_STENCIL:
- case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
- case PIPE_CAP_ANISOTROPIC_FILTER:
- case PIPE_CAP_POINT_SPRITE:
- case PIPE_CAP_OCCLUSION_QUERY:
- case PIPE_CAP_TEXTURE_SHADOW_MAP:
- case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- case PIPE_CAP_TEXTURE_SWIZZLE:
- case PIPE_CAP_DEPTH_CLIP_DISABLE:
- case PIPE_CAP_SHADER_STENCIL_EXPORT:
- case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
- case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
- case PIPE_CAP_SM3:
- case PIPE_CAP_SEAMLESS_CUBE_MAP:
- case PIPE_CAP_PRIMITIVE_RESTART:
- case PIPE_CAP_CONDITIONAL_RENDER:
- case PIPE_CAP_TEXTURE_BARRIER:
- case PIPE_CAP_INDEP_BLEND_ENABLE:
- case PIPE_CAP_INDEP_BLEND_FUNC:
- case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
- case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
- case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
- case PIPE_CAP_USER_INDEX_BUFFERS:
- case PIPE_CAP_USER_CONSTANT_BUFFERS:
- case PIPE_CAP_START_INSTANCE:
- case PIPE_CAP_NPOT_TEXTURES:
- case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
- case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
- case PIPE_CAP_TGSI_INSTANCEID:
- case PIPE_CAP_COMPUTE:
- case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
- case PIPE_CAP_TGSI_VS_LAYER:
- return 1;
-
- case PIPE_CAP_TEXTURE_MULTISAMPLE:
- /* 2D tiling on CIK is supported since DRM 2.35.0 */
- return HAVE_LLVM >= 0x0304 && (rscreen->b.chip_class < CIK ||
- rscreen->b.info.drm_minor >= 35);
-
- case PIPE_CAP_TGSI_TEXCOORD:
- return 0;
-
- case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
- return 64;
-
- case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
- return 256;
-
- case PIPE_CAP_GLSL_FEATURE_LEVEL:
- return 140;
-
- case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
- return 1;
- case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
- return MIN2(rscreen->b.info.vram_size, 0xFFFFFFFF);
-
- /* Unsupported features. */
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
- case PIPE_CAP_SCALED_RESOLVE:
- case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
- case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
- case PIPE_CAP_VERTEX_COLOR_CLAMPED:
- case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
- case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
- case PIPE_CAP_CUBE_MAP_ARRAY:
- return 0;
-
- case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
- return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600;
-
- /* Stream output. */
- case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
- return rscreen->b.has_streamout ? 4 : 0;
- case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
- return rscreen->b.has_streamout ? 1 : 0;
- case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
- case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
- return rscreen->b.has_streamout ? 32*4 : 0;
-
- /* Texturing. */
- case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
- case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- return 15;
- case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
- return 16384;
- case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return 32;
-
- /* Render targets. */
- case PIPE_CAP_MAX_RENDER_TARGETS:
- return 8;
-
- case PIPE_CAP_MAX_VIEWPORTS:
- return 1;
-
- /* Timer queries, present when the clock frequency is non zero. */
- case PIPE_CAP_QUERY_TIMESTAMP:
- case PIPE_CAP_QUERY_TIME_ELAPSED:
- return rscreen->b.info.r600_clock_crystal_freq != 0;
-
- case PIPE_CAP_MIN_TEXEL_OFFSET:
- return -8;
-
- case PIPE_CAP_MAX_TEXEL_OFFSET:
- return 7;
- case PIPE_CAP_ENDIANNESS:
- return PIPE_ENDIAN_LITTLE;
- }
- return 0;
-}
-
-static float r600_get_paramf(struct pipe_screen* pscreen,
- enum pipe_capf param)
-{
- switch (param) {
- case PIPE_CAPF_MAX_LINE_WIDTH:
- case PIPE_CAPF_MAX_LINE_WIDTH_AA:
- case PIPE_CAPF_MAX_POINT_WIDTH:
- case PIPE_CAPF_MAX_POINT_WIDTH_AA:
- return 16384.0f;
- case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
- return 16.0f;
- case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
- return 16.0f;
- case PIPE_CAPF_GUARD_BAND_LEFT:
- case PIPE_CAPF_GUARD_BAND_TOP:
- case PIPE_CAPF_GUARD_BAND_RIGHT:
- case PIPE_CAPF_GUARD_BAND_BOTTOM:
- return 0.0f;
- }
- return 0.0f;
-}
-
-static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
-{
- switch(shader)
- {
- case PIPE_SHADER_FRAGMENT:
- case PIPE_SHADER_VERTEX:
- break;
- case PIPE_SHADER_GEOMETRY:
- /* TODO: support and enable geometry programs */
- return 0;
- case PIPE_SHADER_COMPUTE:
- switch (param) {
- case PIPE_SHADER_CAP_PREFERRED_IR:
- return PIPE_SHADER_IR_LLVM;
- default:
- return 0;
- }
- default:
- /* TODO: support tessellation */
- return 0;
- }
-
- switch (param) {
- case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
- case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
- return 16384;
- case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
- return 32;
- case PIPE_SHADER_CAP_MAX_INPUTS:
- return 32;
- case PIPE_SHADER_CAP_MAX_TEMPS:
- return 256; /* Max native temporaries. */
- case PIPE_SHADER_CAP_MAX_ADDRS:
- /* FIXME Isn't this equal to TEMPS? */
- return 1; /* Max native address registers */
- case PIPE_SHADER_CAP_MAX_CONSTS:
- return 4096; /* actually only memory limits this */
- case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return NUM_PIPE_CONST_BUFFERS;
- case PIPE_SHADER_CAP_MAX_PREDS:
- return 0; /* FIXME */
- case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
- return 1;
- case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
- return 0;
- case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
- case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
- case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
- case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
- return 1;
- case PIPE_SHADER_CAP_INTEGERS:
- return 1;
- case PIPE_SHADER_CAP_SUBROUTINES:
- return 0;
- case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
- case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
- return 16;
- case PIPE_SHADER_CAP_PREFERRED_IR:
- return PIPE_SHADER_IR_TGSI;
- }
- return 0;
-}
-
-static int r600_get_video_param(struct pipe_screen *screen,
- enum pipe_video_profile profile,
- enum pipe_video_entrypoint entrypoint,
- enum pipe_video_cap param)
-{
- switch (param) {
- case PIPE_VIDEO_CAP_SUPPORTED:
- return vl_profile_supported(screen, profile, entrypoint);
- case PIPE_VIDEO_CAP_NPOT_TEXTURES:
- return 1;
- case PIPE_VIDEO_CAP_MAX_WIDTH:
- case PIPE_VIDEO_CAP_MAX_HEIGHT:
- return vl_video_buffer_max_size(screen);
- case PIPE_VIDEO_CAP_PREFERED_FORMAT:
- return PIPE_FORMAT_NV12;
- case PIPE_VIDEO_CAP_MAX_LEVEL:
- return vl_level_supported(screen, profile);
- default:
- return 0;
- }
-}
-
-static int r600_get_compute_param(struct pipe_screen *screen,
- enum pipe_compute_cap param,
- void *ret)
-{
- struct r600_screen *rscreen = (struct r600_screen *)screen;
- //TODO: select these params by asic
- switch (param) {
- case PIPE_COMPUTE_CAP_IR_TARGET: {
- const char *gpu = r600_get_llvm_processor_name(rscreen->b.family);
- if (ret) {
- sprintf(ret, "%s-r600--", gpu);
- }
- return (8 + strlen(gpu)) * sizeof(char);
- }
- case PIPE_COMPUTE_CAP_GRID_DIMENSION:
- if (ret) {
- uint64_t * grid_dimension = ret;
- grid_dimension[0] = 3;
- }
- return 1 * sizeof(uint64_t);
- case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
- if (ret) {
- uint64_t * grid_size = ret;
- grid_size[0] = 65535;
- grid_size[1] = 65535;
- grid_size[2] = 1;
- }
- return 3 * sizeof(uint64_t) ;
-
- case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
- if (ret) {
- uint64_t * block_size = ret;
- block_size[0] = 256;
- block_size[1] = 256;
- block_size[2] = 256;
- }
- return 3 * sizeof(uint64_t);
- case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
- if (ret) {
- uint64_t * max_threads_per_block = ret;
- *max_threads_per_block = 256;
- }
- return sizeof(uint64_t);
-
- case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
- if (ret) {
- uint64_t *max_global_size = ret;
- /* XXX: Not sure what to put here. */
- *max_global_size = 2000000000;
- }
- return sizeof(uint64_t);
- case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
- if (ret) {
- uint64_t *max_local_size = ret;
- /* Value reported by the closed source driver. */
- *max_local_size = 32768;
- }
- return sizeof(uint64_t);
- case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
- if (ret) {
- uint64_t *max_input_size = ret;
- /* Value reported by the closed source driver. */
- *max_input_size = 1024;
- }
- return sizeof(uint64_t);
- case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
- if (ret) {
- uint64_t max_global_size;
- uint64_t *max_mem_alloc_size = ret;
- r600_get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, &max_global_size);
- *max_mem_alloc_size = max_global_size / 4;
- }
- return sizeof(uint64_t);
- default:
- fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
- return 0;
- }
-}
-
-static void r600_destroy_screen(struct pipe_screen* pscreen)
-{
- struct r600_screen *rscreen = (struct r600_screen *)pscreen;
-
- if (rscreen == NULL)
- return;
-
- if (!radeon_winsys_unref(rscreen->b.ws))
- return;
-
- r600_common_screen_cleanup(&rscreen->b);
-
-#if R600_TRACE_CS
- if (rscreen->trace_bo) {
- rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
- pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
- }
-#endif
-
- rscreen->b.ws->destroy(rscreen->b.ws);
- FREE(rscreen);
-}
-
-static uint64_t r600_get_timestamp(struct pipe_screen *screen)
-{
- struct r600_screen *rscreen = (struct r600_screen*)screen;
-
- return 1000000 * rscreen->b.ws->query_value(rscreen->b.ws, RADEON_TIMESTAMP) /
- rscreen->b.info.r600_clock_crystal_freq;
-}
-
-struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
-{
- struct r600_screen *rscreen = CALLOC_STRUCT(r600_screen);
- if (rscreen == NULL) {
- return NULL;
- }
-
- ws->query_info(ws, &rscreen->b.info);
-
- /* Set functions first. */
- rscreen->b.b.context_create = r600_create_context;
- rscreen->b.b.destroy = r600_destroy_screen;
- rscreen->b.b.get_name = r600_get_name;
- rscreen->b.b.get_vendor = r600_get_vendor;
- rscreen->b.b.get_param = r600_get_param;
- rscreen->b.b.get_shader_param = r600_get_shader_param;
- rscreen->b.b.get_paramf = r600_get_paramf;
- rscreen->b.b.get_compute_param = r600_get_compute_param;
- rscreen->b.b.get_timestamp = r600_get_timestamp;
- rscreen->b.b.is_format_supported = si_is_format_supported;
- if (rscreen->b.info.has_uvd) {
- rscreen->b.b.get_video_param = ruvd_get_video_param;
- rscreen->b.b.is_video_format_supported = ruvd_is_format_supported;
- } else {
- rscreen->b.b.get_video_param = r600_get_video_param;
- rscreen->b.b.is_video_format_supported = vl_video_buffer_is_format_supported;
- }
- r600_init_screen_resource_functions(&rscreen->b.b);
-
- if (!r600_common_screen_init(&rscreen->b, ws)) {
- FREE(rscreen);
- return NULL;
- }
-
- rscreen->b.has_cp_dma = true;
- rscreen->b.has_streamout = HAVE_LLVM >= 0x0304;
-
- if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
- rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
-
-#if R600_TRACE_CS
- rscreen->cs_count = 0;
- if (rscreen->info.drm_minor >= 28) {
- rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->screen,
- PIPE_BIND_CUSTOM,
- PIPE_USAGE_STAGING,
- 4096);
- if (rscreen->trace_bo) {
- rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
- PIPE_TRANSFER_UNSYNCHRONIZED);
- }
- }
-#endif
-
- /* Create the auxiliary context. This must be done last. */
- rscreen->b.aux_context = rscreen->b.b.context_create(&rscreen->b.b, NULL);
-
- return &rscreen->b.b;
-}
+++ /dev/null
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Jerome Glisse
- */
-#ifndef RADEONSI_PIPE_H
-#define RADEONSI_PIPE_H
-
-#include "../radeon/r600_pipe_common.h"
-
-#include "pipe/p_screen.h"
-#include "pipe/p_context.h"
-#include "util/u_format.h"
-#include "util/u_math.h"
-#include "util/u_slab.h"
-#include "r600.h"
-#include "radeonsi_public.h"
-#include "radeonsi_pm4.h"
-#include "si_state.h"
-#include "r600_resource.h"
-#include "sid.h"
-
-#ifdef PIPE_ARCH_BIG_ENDIAN
-#define R600_BIG_ENDIAN 1
-#else
-#define R600_BIG_ENDIAN 0
-#endif
-
-#define R600_TRACE_CS 0
-#define R600_TRACE_CS_DWORDS 6
-
-#define SI_MAX_DRAW_CS_DWORDS 18
-
-struct si_pipe_compute;
-
-struct r600_screen {
- struct r600_common_screen b;
-#if R600_TRACE_CS
- struct r600_resource *trace_bo;
- uint32_t *trace_ptr;
- unsigned cs_count;
-#endif
-};
-
-struct si_pipe_sampler_view {
- struct pipe_sampler_view base;
- struct r600_resource *resource;
- uint32_t state[8];
- uint32_t fmask_state[8];
-};
-
-struct si_pipe_sampler_state {
- uint32_t val[4];
- uint32_t border_color[4];
-};
-
-struct si_cs_shader_state {
- struct si_pipe_compute *program;
-};
-
-struct r600_textures_info {
- struct si_sampler_views views;
- struct si_pipe_sampler_state *samplers[NUM_TEX_UNITS];
- unsigned n_views;
- uint32_t depth_texture_mask; /* which textures are depth */
- uint32_t compressed_colortex_mask;
- unsigned n_samplers;
-};
-
-#define SI_NUM_ATOMS(rctx) (sizeof((rctx)->atoms)/sizeof((rctx)->atoms.array[0]))
-#define SI_NUM_SHADERS (PIPE_SHADER_FRAGMENT+1)
-
-struct r600_context {
- struct r600_common_context b;
- struct blitter_context *blitter;
- void *custom_dsa_flush_depth_stencil[8];
- void *custom_dsa_flush_depth[8];
- void *custom_dsa_flush_stencil[8];
- void *custom_dsa_flush_inplace;
- void *custom_blend_resolve;
- void *custom_blend_decompress;
- struct r600_screen *screen;
-
- union {
- struct {
- /* The order matters. */
- struct r600_atom *const_buffers[SI_NUM_SHADERS];
- struct r600_atom *sampler_views[SI_NUM_SHADERS];
- struct r600_atom *streamout_buffers;
- /* Caches must be flushed after resource descriptors are
- * updated in memory. */
- struct r600_atom *cache_flush;
- struct r600_atom *streamout_begin;
- };
- struct r600_atom *array[0];
- } atoms;
-
- struct si_vertex_element *vertex_elements;
- struct pipe_framebuffer_state framebuffer;
- unsigned fb_log_samples;
- unsigned fb_cb0_is_integer;
- unsigned fb_compressed_cb_mask;
- unsigned pa_sc_line_stipple;
- unsigned pa_su_sc_mode_cntl;
- /* for saving when using blitter */
- struct pipe_stencil_ref stencil_ref;
- struct si_pipe_shader_selector *ps_shader;
- struct si_pipe_shader_selector *vs_shader;
- struct si_cs_shader_state cs_shader_state;
- struct pipe_query *current_render_cond;
- unsigned current_render_cond_mode;
- boolean current_render_cond_cond;
- struct pipe_query *saved_render_cond;
- unsigned saved_render_cond_mode;
- boolean saved_render_cond_cond;
- /* shader information */
- unsigned sprite_coord_enable;
- unsigned export_16bpc;
- struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
- struct si_buffer_resources streamout_buffers;
- struct r600_textures_info samplers[SI_NUM_SHADERS];
- struct r600_resource *border_color_table;
- unsigned border_color_offset;
-
- unsigned default_ps_gprs, default_vs_gprs;
-
- /* Below are variables from the old r600_context.
- */
- unsigned pm4_dirty_cdwords;
-
- /* The list of active queries. Only one query of each type can be active. */
- struct list_head active_nontimer_query_list;
- unsigned num_cs_dw_nontimer_queries_suspend;
- /* If queries have been suspended. */
- bool nontimer_queries_suspended;
-
- unsigned backend_mask;
- unsigned max_db; /* for OQ */
- boolean predicate_drawing;
-
- /* Vertex and index buffers. */
- bool vertex_buffers_dirty;
- struct pipe_index_buffer index_buffer;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- unsigned nr_vertex_buffers;
-
- /* With rasterizer discard, there doesn't have to be a pixel shader.
- * In that case, we bind this one: */
- void *dummy_pixel_shader;
- struct r600_atom cache_flush;
- struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
-
- /* SI state handling */
- union si_state queued;
- union si_state emitted;
-};
-
-/* r600_blit.c */
-void si_init_blit_functions(struct r600_context *rctx);
-void si_flush_depth_textures(struct r600_context *rctx,
- struct r600_textures_info *textures);
-void r600_decompress_color_textures(struct r600_context *rctx,
- struct r600_textures_info *textures);
-
-/* r600_buffer.c */
-void r600_upload_index_buffer(struct r600_context *rctx,
- struct pipe_index_buffer *ib, unsigned count);
-
-
-/* r600_pipe.c */
-void radeonsi_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
- unsigned flags);
-const char *r600_get_llvm_processor_name(enum radeon_family family);
-
-/* r600_query.c */
-void r600_init_query_functions(struct r600_context *rctx);
-
-/* r600_resource.c */
-void r600_init_context_resource_functions(struct r600_context *r600);
-
-/* r600_translate.c */
-void r600_translate_index_buffer(struct r600_context *r600,
- struct pipe_index_buffer *ib,
- unsigned count);
-
-#if R600_TRACE_CS
-void r600_trace_emit(struct r600_context *rctx);
-#endif
-
-/* radeonsi_compute.c */
-void si_init_compute_functions(struct r600_context *rctx);
-
-/* radeonsi_uvd.c */
-struct pipe_video_codec *radeonsi_uvd_create_decoder(struct pipe_context *context,
- const struct pipe_video_codec *templ);
-
-struct pipe_video_buffer *radeonsi_video_buffer_create(struct pipe_context *pipe,
- const struct pipe_video_buffer *tmpl);
-
-/*
- * common helpers
- */
-static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
-{
- return value * (1 << frac_bits);
-}
-#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
-
-static INLINE unsigned si_map_swizzle(unsigned swizzle)
-{
- switch (swizzle) {
- case UTIL_FORMAT_SWIZZLE_Y:
- return V_008F0C_SQ_SEL_Y;
- case UTIL_FORMAT_SWIZZLE_Z:
- return V_008F0C_SQ_SEL_Z;
- case UTIL_FORMAT_SWIZZLE_W:
- return V_008F0C_SQ_SEL_W;
- case UTIL_FORMAT_SWIZZLE_0:
- return V_008F0C_SQ_SEL_0;
- case UTIL_FORMAT_SWIZZLE_1:
- return V_008F0C_SQ_SEL_1;
- default: /* UTIL_FORMAT_SWIZZLE_X */
- return V_008F0C_SQ_SEL_X;
- }
-}
-
-static inline unsigned r600_tex_aniso_filter(unsigned filter)
-{
- if (filter <= 1) return 0;
- if (filter <= 2) return 1;
- if (filter <= 4) return 2;
- if (filter <= 8) return 3;
- /* else */ return 4;
-}
-
-/* 12.4 fixed-point */
-static INLINE unsigned r600_pack_float_12p4(float x)
-{
- return x <= 0 ? 0 :
- x >= 4096 ? 0xffff : x * 16;
-}
-
-#endif
+++ /dev/null
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Christian König <christian.koenig@amd.com>
- */
-
-#include "../radeon/r600_cs.h"
-#include "util/u_memory.h"
-#include "radeonsi_pipe.h"
-#include "radeonsi_pm4.h"
-#include "sid.h"
-
-#define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
-
-void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode)
-{
- state->last_opcode = opcode;
- state->last_pm4 = state->ndw++;
-}
-
-void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
-{
- state->pm4[state->ndw++] = dw;
-}
-
-void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate)
-{
- unsigned count;
- count = state->ndw - state->last_pm4 - 2;
- state->pm4[state->last_pm4] =
- PKT3(state->last_opcode, count, predicate)
- | PKT3_SHADER_TYPE_S(state->compute_pkt);
-
- assert(state->ndw <= SI_PM4_MAX_DW);
-}
-
-void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val)
-{
- unsigned opcode;
-
- if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) {
- opcode = PKT3_SET_CONFIG_REG;
- reg -= SI_CONFIG_REG_OFFSET;
-
- } else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) {
- opcode = PKT3_SET_SH_REG;
- reg -= SI_SH_REG_OFFSET;
-
- } else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) {
- opcode = PKT3_SET_CONTEXT_REG;
- reg -= SI_CONTEXT_REG_OFFSET;
-
- } else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) {
- opcode = PKT3_SET_UCONFIG_REG;
- reg -= CIK_UCONFIG_REG_OFFSET;
-
- } else {
- R600_ERR("Invalid register offset %08x!\n", reg);
- return;
- }
-
- reg >>= 2;
-
- if (opcode != state->last_opcode || reg != (state->last_reg + 1)) {
- si_pm4_cmd_begin(state, opcode);
- si_pm4_cmd_add(state, reg);
- }
-
- state->last_reg = reg;
- si_pm4_cmd_add(state, val);
- si_pm4_cmd_end(state, false);
-}
-
-void si_pm4_add_bo(struct si_pm4_state *state,
- struct r600_resource *bo,
- enum radeon_bo_usage usage)
-{
- unsigned idx = state->nbo++;
- assert(idx < SI_PM4_MAX_BO);
-
- r600_resource_reference(&state->bo[idx], bo);
- state->bo_usage[idx] = usage;
-}
-
-void si_pm4_sh_data_begin(struct si_pm4_state *state)
-{
- si_pm4_cmd_begin(state, PKT3_NOP);
-}
-
-void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw)
-{
- si_pm4_cmd_add(state, dw);
-}
-
-void si_pm4_sh_data_end(struct si_pm4_state *state, unsigned base, unsigned idx)
-{
- unsigned offs = state->last_pm4 + 1;
- unsigned reg = base + idx * 4;
-
- /* Bail if no data was added */
- if (state->ndw == offs) {
- state->ndw--;
- return;
- }
-
- si_pm4_cmd_end(state, false);
-
- si_pm4_cmd_begin(state, PKT3_SET_SH_REG_OFFSET);
- si_pm4_cmd_add(state, (reg - SI_SH_REG_OFFSET) >> 2);
- state->relocs[state->nrelocs++] = state->ndw;
- si_pm4_cmd_add(state, offs << 2);
- si_pm4_cmd_add(state, 0);
- si_pm4_cmd_end(state, false);
-}
-
-void si_pm4_inval_shader_cache(struct si_pm4_state *state)
-{
- state->cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
- state->cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
-}
-
-void si_pm4_inval_texture_cache(struct si_pm4_state *state)
-{
- state->cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
- state->cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
-}
-
-void si_pm4_free_state(struct r600_context *rctx,
- struct si_pm4_state *state,
- unsigned idx)
-{
- if (state == NULL)
- return;
-
- if (idx != ~0 && rctx->emitted.array[idx] == state) {
- rctx->emitted.array[idx] = NULL;
- }
-
- for (int i = 0; i < state->nbo; ++i) {
- r600_resource_reference(&state->bo[i], NULL);
- }
- FREE(state);
-}
-
-struct si_pm4_state * si_pm4_alloc_state(struct r600_context *rctx)
-{
- struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-
- if (pm4 == NULL)
- return NULL;
-
- pm4->chip_class = rctx->b.chip_class;
-
- return pm4;
-}
-
-uint32_t si_pm4_sync_flags(struct r600_context *rctx)
-{
- uint32_t cp_coher_cntl = 0;
-
- for (int i = 0; i < NUMBER_OF_STATES; ++i) {
- struct si_pm4_state *state = rctx->queued.array[i];
-
- if (!state || rctx->emitted.array[i] == state)
- continue;
-
- cp_coher_cntl |= state->cp_coher_cntl;
- }
- return cp_coher_cntl;
-}
-
-unsigned si_pm4_dirty_dw(struct r600_context *rctx)
-{
- unsigned count = 0;
-
- for (int i = 0; i < NUMBER_OF_STATES; ++i) {
- struct si_pm4_state *state = rctx->queued.array[i];
-
- if (!state || rctx->emitted.array[i] == state)
- continue;
-
- count += state->ndw;
-#if R600_TRACE_CS
- /* for tracing each states */
- if (rctx->screen->trace_bo) {
- count += R600_TRACE_CS_DWORDS;
- }
-#endif
- }
-
- return count;
-}
-
-void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state)
-{
- struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
- for (int i = 0; i < state->nbo; ++i) {
- r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, state->bo[i],
- state->bo_usage[i]);
- }
-
- memcpy(&cs->buf[cs->cdw], state->pm4, state->ndw * 4);
-
- for (int i = 0; i < state->nrelocs; ++i) {
- cs->buf[cs->cdw + state->relocs[i]] += cs->cdw << 2;
- }
-
- cs->cdw += state->ndw;
-
-#if R600_TRACE_CS
- if (rctx->screen->trace_bo) {
- r600_trace_emit(rctx);
- }
-#endif
-}
-
-void si_pm4_emit_dirty(struct r600_context *rctx)
-{
- for (int i = 0; i < NUMBER_OF_STATES; ++i) {
- struct si_pm4_state *state = rctx->queued.array[i];
-
- if (!state || rctx->emitted.array[i] == state)
- continue;
-
- assert(state != rctx->queued.named.init);
- si_pm4_emit(rctx, state);
- rctx->emitted.array[i] = state;
- }
-}
-
-void si_pm4_reset_emitted(struct r600_context *rctx)
-{
- memset(&rctx->emitted, 0, sizeof(rctx->emitted));
-}
+++ /dev/null
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Christian König <christian.koenig@amd.com>
- */
-
-#ifndef RADEONSI_PM4_H
-#define RADEONSI_PM4_H
-
-#include "../../winsys/radeon/drm/radeon_winsys.h"
-
-#define SI_PM4_MAX_DW 256
-#define SI_PM4_MAX_BO 32
-#define SI_PM4_MAX_RELOCS 4
-
-// forward defines
-struct r600_context;
-enum chip_class;
-
-struct si_pm4_state
-{
- /* family specific handling */
- enum chip_class chip_class;
- /* PKT3_SET_*_REG handling */
- unsigned last_opcode;
- unsigned last_reg;
- unsigned last_pm4;
-
- /* flush flags for SURFACE_SYNC */
- uint32_t cp_coher_cntl;
-
- /* commands for the DE */
- unsigned ndw;
- uint32_t pm4[SI_PM4_MAX_DW];
-
- /* BO's referenced by this state */
- unsigned nbo;
- struct r600_resource *bo[SI_PM4_MAX_BO];
- enum radeon_bo_usage bo_usage[SI_PM4_MAX_BO];
-
- /* relocs for shader data */
- unsigned nrelocs;
- unsigned relocs[SI_PM4_MAX_RELOCS];
-
- bool compute_pkt;
-};
-
-void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode);
-void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
-void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate);
-
-void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
-void si_pm4_add_bo(struct si_pm4_state *state,
- struct r600_resource *bo,
- enum radeon_bo_usage usage);
-
-void si_pm4_sh_data_begin(struct si_pm4_state *state);
-void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw);
-void si_pm4_sh_data_end(struct si_pm4_state *state, unsigned base, unsigned idx);
-
-void si_pm4_inval_shader_cache(struct si_pm4_state *state);
-void si_pm4_inval_texture_cache(struct si_pm4_state *state);
-
-void si_pm4_free_state(struct r600_context *rctx,
- struct si_pm4_state *state,
- unsigned idx);
-struct si_pm4_state * si_pm4_alloc_state(struct r600_context *rctx);
-
-uint32_t si_pm4_sync_flags(struct r600_context *rctx);
-unsigned si_pm4_dirty_dw(struct r600_context *rctx);
-void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state);
-void si_pm4_emit_dirty(struct r600_context *rctx);
-void si_pm4_reset_emitted(struct r600_context *rctx);
-
-#endif
+++ /dev/null
-/*
- * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-#ifndef RADEONSI_PUBLIC_H
-#define RADEONSI_PUBLIC_H
-
-struct radeon_winsys;
-
-struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws);
-
-#endif
+++ /dev/null
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Christian König <christian.koenig@amd.com>
- */
-
-#ifndef RADEONSI_RESOURCE_H
-#define RADEONSI_RESOURCE_H
-
-#include "../radeon/r600_pipe_common.h"
-#include "util/u_transfer.h"
-#include "util/u_inlines.h"
-
-static INLINE struct r600_resource *
-r600_resource_create_custom(struct pipe_screen *screen,
- unsigned usage, unsigned size)
-{
- assert(size);
- return r600_resource(pipe_buffer_create(screen,
- PIPE_BIND_CUSTOM, usage, size));
-}
-
-#endif
+++ /dev/null
-
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Tom Stellard <thomas.stellard@amd.com>
- * Michel Dänzer <michel.daenzer@amd.com>
- * Christian König <christian.koenig@amd.com>
- */
-
-#include "gallivm/lp_bld_tgsi_action.h"
-#include "gallivm/lp_bld_const.h"
-#include "gallivm/lp_bld_gather.h"
-#include "gallivm/lp_bld_intr.h"
-#include "gallivm/lp_bld_logic.h"
-#include "gallivm/lp_bld_tgsi.h"
-#include "gallivm/lp_bld_arit.h"
-#include "gallivm/lp_bld_flow.h"
-#include "radeon_llvm.h"
-#include "radeon_llvm_emit.h"
-#include "util/u_memory.h"
-#include "tgsi/tgsi_info.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_scan.h"
-#include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_dump.h"
-
-#include "radeonsi_pipe.h"
-#include "radeonsi_shader.h"
-#include "si_state.h"
-#include "sid.h"
-
-#include <assert.h>
-#include <errno.h>
-#include <stdio.h>
-
-struct si_shader_context
-{
- struct radeon_llvm_context radeon_bld;
- struct tgsi_parse_context parse;
- struct tgsi_token * tokens;
- struct si_pipe_shader *shader;
- unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
- int param_streamout_config;
- int param_streamout_write_index;
- int param_streamout_offset[4];
- int param_vertex_id;
- int param_instance_id;
- LLVMValueRef const_md;
- LLVMValueRef const_resource[NUM_CONST_BUFFERS];
-#if HAVE_LLVM >= 0x0304
- LLVMValueRef ddxy_lds;
-#endif
- LLVMValueRef *constants[NUM_CONST_BUFFERS];
- LLVMValueRef *resources;
- LLVMValueRef *samplers;
- LLVMValueRef so_buffers[4];
-};
-
-static struct si_shader_context * si_shader_context(
- struct lp_build_tgsi_context * bld_base)
-{
- return (struct si_shader_context *)bld_base;
-}
-
-
-#define PERSPECTIVE_BASE 0
-#define LINEAR_BASE 9
-
-#define SAMPLE_OFFSET 0
-#define CENTER_OFFSET 2
-#define CENTROID_OFSET 4
-
-#define USE_SGPR_MAX_SUFFIX_LEN 5
-#define CONST_ADDR_SPACE 2
-#define LOCAL_ADDR_SPACE 3
-#define USER_SGPR_ADDR_SPACE 8
-
-/**
- * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad
- *
- * @param offset The offset parameter specifies the number of
- * elements to offset, not the number of bytes or dwords. An element is the
- * the type pointed to by the base_ptr parameter (e.g. int is the element of
- * an int* pointer)
- *
- * When LLVM lowers the load instruction, it will convert the element offset
- * into a dword offset automatically.
- *
- */
-static LLVMValueRef build_indexed_load(
- struct si_shader_context * si_shader_ctx,
- LLVMValueRef base_ptr,
- LLVMValueRef offset)
-{
- struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
-
- LLVMValueRef indices[2] = {
- LLVMConstInt(LLVMInt64TypeInContext(base->gallivm->context), 0, false),
- offset
- };
- LLVMValueRef computed_ptr = LLVMBuildGEP(
- base->gallivm->builder, base_ptr, indices, 2, "");
-
- LLVMValueRef result = LLVMBuildLoad(base->gallivm->builder, computed_ptr, "");
- LLVMSetMetadata(result, 1, si_shader_ctx->const_md);
- return result;
-}
-
-static LLVMValueRef get_instance_index_for_fetch(
- struct radeon_llvm_context * radeon_bld,
- unsigned divisor)
-{
- struct si_shader_context *si_shader_ctx =
- si_shader_context(&radeon_bld->soa.bld_base);
- struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm;
-
- LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn,
- si_shader_ctx->param_instance_id);
- result = LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
- radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
-
- if (divisor > 1)
- result = LLVMBuildUDiv(gallivm->builder, result,
- lp_build_const_int32(gallivm, divisor), "");
-
- return result;
-}
-
-static void declare_input_vs(
- struct si_shader_context * si_shader_ctx,
- unsigned input_index,
- const struct tgsi_full_declaration *decl)
-{
- struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
- unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index];
-
- unsigned chan;
-
- LLVMValueRef t_list_ptr;
- LLVMValueRef t_offset;
- LLVMValueRef t_list;
- LLVMValueRef attribute_offset;
- LLVMValueRef buffer_index;
- LLVMValueRef args[3];
- LLVMTypeRef vec4_type;
- LLVMValueRef input;
-
- /* Load the T list */
- t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER);
-
- t_offset = lp_build_const_int32(base->gallivm, input_index);
-
- t_list = build_indexed_load(si_shader_ctx, t_list_ptr, t_offset);
-
- /* Build the attribute offset */
- attribute_offset = lp_build_const_int32(base->gallivm, 0);
-
- if (divisor) {
- /* Build index from instance ID, start instance and divisor */
- si_shader_ctx->shader->shader.uses_instanceid = true;
- buffer_index = get_instance_index_for_fetch(&si_shader_ctx->radeon_bld, divisor);
- } else {
- /* Load the buffer index, which is always stored in VGPR0
- * for Vertex Shaders */
- buffer_index = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- si_shader_ctx->param_vertex_id);
- }
-
- vec4_type = LLVMVectorType(base->elem_type, 4);
- args[0] = t_list;
- args[1] = attribute_offset;
- args[2] = buffer_index;
- input = build_intrinsic(base->gallivm->builder,
- "llvm.SI.vs.load.input", vec4_type, args, 3,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-
- /* Break up the vec4 into individual components */
- for (chan = 0; chan < 4; chan++) {
- LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
- /* XXX: Use a helper function for this. There is one in
- * tgsi_llvm.c. */
- si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
- LLVMBuildExtractElement(base->gallivm->builder,
- input, llvm_chan, "");
- }
-}
-
-static void declare_input_fs(
- struct si_shader_context * si_shader_ctx,
- unsigned input_index,
- const struct tgsi_full_declaration *decl)
-{
- struct si_shader *shader = &si_shader_ctx->shader->shader;
- struct lp_build_context * base =
- &si_shader_ctx->radeon_bld.soa.bld_base.base;
- struct lp_build_context *uint =
- &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
- struct gallivm_state * gallivm = base->gallivm;
- LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
- LLVMValueRef main_fn = si_shader_ctx->radeon_bld.main_fn;
-
- LLVMValueRef interp_param;
- const char * intr_name;
-
- /* This value is:
- * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
- * quad begins a new primitive. Bit 0 always needs
- * to be unset)
- * [32:16] ParamOffset
- *
- */
- LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
- LLVMValueRef attr_number;
-
- unsigned chan;
-
- if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- unsigned soa_index =
- radeon_llvm_reg_index_soa(input_index, chan);
- si_shader_ctx->radeon_bld.inputs[soa_index] =
- LLVMGetParam(main_fn, SI_PARAM_POS_X_FLOAT + chan);
-
- if (chan == 3)
- /* RCP for fragcoord.w */
- si_shader_ctx->radeon_bld.inputs[soa_index] =
- LLVMBuildFDiv(gallivm->builder,
- lp_build_const_float(gallivm, 1.0f),
- si_shader_ctx->radeon_bld.inputs[soa_index],
- "");
- }
- return;
- }
-
- if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
- LLVMValueRef face, is_face_positive;
-
- face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
-
- is_face_positive = LLVMBuildFCmp(gallivm->builder,
- LLVMRealUGT, face,
- lp_build_const_float(gallivm, 0.0f),
- "");
-
- si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
- LLVMBuildSelect(gallivm->builder,
- is_face_positive,
- lp_build_const_float(gallivm, 1.0f),
- lp_build_const_float(gallivm, 0.0f),
- "");
- si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
- si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
- lp_build_const_float(gallivm, 0.0f);
- si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
- lp_build_const_float(gallivm, 1.0f);
-
- return;
- }
-
- shader->input[input_index].param_offset = shader->ninterp++;
- attr_number = lp_build_const_int32(gallivm,
- shader->input[input_index].param_offset);
-
- switch (decl->Interp.Interpolate) {
- case TGSI_INTERPOLATE_COLOR:
- if (si_shader_ctx->shader->key.ps.flatshade) {
- interp_param = 0;
- } else {
- if (decl->Interp.Centroid)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
- else
- interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
- }
- break;
- case TGSI_INTERPOLATE_CONSTANT:
- interp_param = 0;
- break;
- case TGSI_INTERPOLATE_LINEAR:
- if (decl->Interp.Centroid)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTROID);
- else
- interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER);
- break;
- case TGSI_INTERPOLATE_PERSPECTIVE:
- if (decl->Interp.Centroid)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
- else
- interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
- break;
- default:
- fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
- return;
- }
-
- intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
-
- /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
- if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
- si_shader_ctx->shader->key.ps.color_two_side) {
- LLVMValueRef args[4];
- LLVMValueRef face, is_face_positive;
- LLVMValueRef back_attr_number =
- lp_build_const_int32(gallivm,
- shader->input[input_index].param_offset + 1);
-
- face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
-
- is_face_positive = LLVMBuildFCmp(gallivm->builder,
- LLVMRealUGT, face,
- lp_build_const_float(gallivm, 0.0f),
- "");
-
- args[2] = params;
- args[3] = interp_param;
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
- unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
- LLVMValueRef front, back;
-
- args[0] = llvm_chan;
- args[1] = attr_number;
- front = build_intrinsic(base->gallivm->builder, intr_name,
- input_type, args, args[3] ? 4 : 3,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-
- args[1] = back_attr_number;
- back = build_intrinsic(base->gallivm->builder, intr_name,
- input_type, args, args[3] ? 4 : 3,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-
- si_shader_ctx->radeon_bld.inputs[soa_index] =
- LLVMBuildSelect(gallivm->builder,
- is_face_positive,
- front,
- back,
- "");
- }
-
- shader->ninterp++;
- } else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) {
- LLVMValueRef args[4];
-
- args[0] = uint->zero;
- args[1] = attr_number;
- args[2] = params;
- args[3] = interp_param;
- si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
- build_intrinsic(base->gallivm->builder, intr_name,
- input_type, args, args[3] ? 4 : 3,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
- si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
- lp_build_const_float(gallivm, 0.0f);
- si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
- lp_build_const_float(gallivm, 1.0f);
- } else {
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- LLVMValueRef args[4];
- LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
- unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
- args[0] = llvm_chan;
- args[1] = attr_number;
- args[2] = params;
- args[3] = interp_param;
- si_shader_ctx->radeon_bld.inputs[soa_index] =
- build_intrinsic(base->gallivm->builder, intr_name,
- input_type, args, args[3] ? 4 : 3,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- }
- }
-}
-
-static void declare_input(
- struct radeon_llvm_context * radeon_bld,
- unsigned input_index,
- const struct tgsi_full_declaration *decl)
-{
- struct si_shader_context * si_shader_ctx =
- si_shader_context(&radeon_bld->soa.bld_base);
- if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
- declare_input_vs(si_shader_ctx, input_index, decl);
- } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- declare_input_fs(si_shader_ctx, input_index, decl);
- } else {
- fprintf(stderr, "Warning: Unsupported shader type,\n");
- }
-}
-
-static void declare_system_value(
- struct radeon_llvm_context * radeon_bld,
- unsigned index,
- const struct tgsi_full_declaration *decl)
-{
- struct si_shader_context *si_shader_ctx =
- si_shader_context(&radeon_bld->soa.bld_base);
- LLVMValueRef value = 0;
-
- switch (decl->Semantic.Name) {
- case TGSI_SEMANTIC_INSTANCEID:
- value = LLVMGetParam(radeon_bld->main_fn,
- si_shader_ctx->param_instance_id);
- break;
-
- case TGSI_SEMANTIC_VERTEXID:
- value = LLVMGetParam(radeon_bld->main_fn,
- si_shader_ctx->param_vertex_id);
- break;
-
- default:
- assert(!"unknown system value");
- return;
- }
-
- radeon_bld->system_values[index] = value;
-}
-
-static LLVMValueRef fetch_constant(
- struct lp_build_tgsi_context * bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle)
-{
- struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- struct lp_build_context * base = &bld_base->base;
- const struct tgsi_ind_register *ireg = ®->Indirect;
- unsigned buf, idx;
-
- LLVMValueRef args[2];
- LLVMValueRef addr;
- LLVMValueRef result;
-
- if (swizzle == LP_CHAN_ALL) {
- unsigned chan;
- LLVMValueRef values[4];
- for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
- values[chan] = fetch_constant(bld_base, reg, type, chan);
-
- return lp_build_gather_values(bld_base->base.gallivm, values, 4);
- }
-
- buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
- idx = reg->Register.Index * 4 + swizzle;
-
- if (!reg->Register.Indirect)
- return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
-
- args[0] = si_shader_ctx->const_resource[buf];
- args[1] = lp_build_const_int32(base->gallivm, idx * 4);
- addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
- addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
- addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
- args[1] = lp_build_add(&bld_base->uint_bld, addr, args[1]);
-
- result = build_intrinsic(base->gallivm->builder, "llvm.SI.load.const", base->elem_type,
- args, 2, LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-
- return bitcast(bld_base, type, result);
-}
-
-/* Initialize arguments for the shader export intrinsic */
-static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
- struct tgsi_full_declaration *d,
- unsigned index,
- unsigned target,
- LLVMValueRef *args)
-{
- struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- struct lp_build_context *uint =
- &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
- struct lp_build_context *base = &bld_base->base;
- unsigned compressed = 0;
- unsigned chan;
-
- if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
- int cbuf = target - V_008DFC_SQ_EXP_MRT;
-
- if (cbuf >= 0 && cbuf < 8) {
- compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
-
- if (compressed)
- si_shader_ctx->shader->spi_shader_col_format |=
- V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf);
- else
- si_shader_ctx->shader->spi_shader_col_format |=
- V_028714_SPI_SHADER_32_ABGR << (4 * cbuf);
-
- si_shader_ctx->shader->cb_shader_mask |= 0xf << (4 * cbuf);
- }
- }
-
- if (compressed) {
- /* Pixel shader needs to pack output values before export */
- for (chan = 0; chan < 2; chan++ ) {
- LLVMValueRef *out_ptr =
- si_shader_ctx->radeon_bld.soa.outputs[index];
- args[0] = LLVMBuildLoad(base->gallivm->builder,
- out_ptr[2 * chan], "");
- args[1] = LLVMBuildLoad(base->gallivm->builder,
- out_ptr[2 * chan + 1], "");
- args[chan + 5] =
- build_intrinsic(base->gallivm->builder,
- "llvm.SI.packf16",
- LLVMInt32TypeInContext(base->gallivm->context),
- args, 2,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- args[chan + 7] = args[chan + 5] =
- LLVMBuildBitCast(base->gallivm->builder,
- args[chan + 5],
- LLVMFloatTypeInContext(base->gallivm->context),
- "");
- }
-
- /* Set COMPR flag */
- args[4] = uint->one;
- } else {
- for (chan = 0; chan < 4; chan++ ) {
- LLVMValueRef out_ptr =
- si_shader_ctx->radeon_bld.soa.outputs[index][chan];
- /* +5 because the first output value will be
- * the 6th argument to the intrinsic. */
- args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
- out_ptr, "");
- }
-
- /* Clear COMPR flag */
- args[4] = uint->zero;
- }
-
- /* XXX: This controls which components of the output
- * registers actually get exported. (e.g bit 0 means export
- * X component, bit 1 means export Y component, etc.) I'm
- * hard coding this to 0xf for now. In the future, we might
- * want to do something else. */
- args[0] = lp_build_const_int32(base->gallivm, 0xf);
-
- /* Specify whether the EXEC mask represents the valid mask */
- args[1] = uint->zero;
-
- /* Specify whether this is the last export */
- args[2] = uint->zero;
-
- /* Specify the target we are exporting */
- args[3] = lp_build_const_int32(base->gallivm, target);
-
- /* XXX: We probably need to keep track of the output
- * values, so we know what we are passing to the next
- * stage. */
-}
-
-static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
- unsigned index)
-{
- struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = bld_base->base.gallivm;
-
- if (si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
- LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][3];
- LLVMValueRef alpha_ref = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_ALPHA_REF);
-
- LLVMValueRef alpha_pass =
- lp_build_cmp(&bld_base->base,
- si_shader_ctx->shader->key.ps.alpha_func,
- LLVMBuildLoad(gallivm->builder, out_ptr, ""),
- alpha_ref);
- LLVMValueRef arg =
- lp_build_select(&bld_base->base,
- alpha_pass,
- lp_build_const_float(gallivm, 1.0f),
- lp_build_const_float(gallivm, -1.0f));
-
- build_intrinsic(gallivm->builder,
- "llvm.AMDGPU.kill",
- LLVMVoidTypeInContext(gallivm->context),
- &arg, 1, 0);
- } else {
- build_intrinsic(gallivm->builder,
- "llvm.AMDGPU.kilp",
- LLVMVoidTypeInContext(gallivm->context),
- NULL, 0, 0);
- }
-}
-
-static void si_alpha_to_one(struct lp_build_tgsi_context *bld_base,
- unsigned index)
-{
- struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
-
- /* set alpha to one */
- LLVMBuildStore(bld_base->base.gallivm->builder,
- bld_base->base.one,
- si_shader_ctx->radeon_bld.soa.outputs[index][3]);
-}
-
-static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
- LLVMValueRef (*pos)[9], unsigned index)
-{
- struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- struct si_pipe_shader *shader = si_shader_ctx->shader;
- struct lp_build_context *base = &bld_base->base;
- struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
- unsigned reg_index;
- unsigned chan;
- unsigned const_chan;
- LLVMValueRef out_elts[4];
- LLVMValueRef base_elt;
- LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
- LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm, NUM_PIPE_CONST_BUFFERS);
- LLVMValueRef const_resource = build_indexed_load(si_shader_ctx, ptr, constbuf_index);
-
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][chan];
- out_elts[chan] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
- }
-
- for (reg_index = 0; reg_index < 2; reg_index ++) {
- LLVMValueRef *args = pos[2 + reg_index];
-
- if (!(shader->key.vs.ucps_enabled & (1 << reg_index)))
- continue;
-
- shader->shader.clip_dist_write |= 0xf << (4 * reg_index);
-
- args[5] =
- args[6] =
- args[7] =
- args[8] = lp_build_const_float(base->gallivm, 0.0f);
-
- /* Compute dot products of position and user clip plane vectors */
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) {
- args[0] = const_resource;
- args[1] = lp_build_const_int32(base->gallivm,
- ((reg_index * 4 + chan) * 4 +
- const_chan) * 4);
- base_elt = build_intrinsic(base->gallivm->builder,
- "llvm.SI.load.const",
- base->elem_type,
- args, 2,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- args[5 + chan] =
- lp_build_add(base, args[5 + chan],
- lp_build_mul(base, base_elt,
- out_elts[const_chan]));
- }
- }
-
- args[0] = lp_build_const_int32(base->gallivm, 0xf);
- args[1] = uint->zero;
- args[2] = uint->zero;
- args[3] = lp_build_const_int32(base->gallivm,
- V_008DFC_SQ_EXP_POS + 2 + reg_index);
- args[4] = uint->zero;
- }
-}
-
-static void si_dump_streamout(struct pipe_stream_output_info *so)
-{
- unsigned i;
-
- if (so->num_outputs)
- fprintf(stderr, "STREAMOUT\n");
-
- for (i = 0; i < so->num_outputs; i++) {
- unsigned mask = ((1 << so->output[i].num_components) - 1) <<
- so->output[i].start_component;
- fprintf(stderr, " %i: BUF%i[%i..%i] <- OUT[%i].%s%s%s%s\n",
- i, so->output[i].output_buffer,
- so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
- so->output[i].register_index,
- mask & 1 ? "x" : "",
- mask & 2 ? "y" : "",
- mask & 4 ? "z" : "",
- mask & 8 ? "w" : "");
- }
-}
-
-/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
- * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
- * or v4i32 (num_channels=3,4). */
-static void build_tbuffer_store(struct si_shader_context *shader,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef vaddr,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned dfmt,
- unsigned nfmt,
- unsigned offen,
- unsigned idxen,
- unsigned glc,
- unsigned slc,
- unsigned tfe)
-{
- struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
- LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
- LLVMValueRef args[] = {
- rsrc,
- vdata,
- LLVMConstInt(i32, num_channels, 0),
- vaddr,
- soffset,
- LLVMConstInt(i32, inst_offset, 0),
- LLVMConstInt(i32, dfmt, 0),
- LLVMConstInt(i32, nfmt, 0),
- LLVMConstInt(i32, offen, 0),
- LLVMConstInt(i32, idxen, 0),
- LLVMConstInt(i32, glc, 0),
- LLVMConstInt(i32, slc, 0),
- LLVMConstInt(i32, tfe, 0)
- };
-
- /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
- unsigned func = CLAMP(num_channels, 1, 3) - 1;
- const char *types[] = {"i32", "v2i32", "v4i32"};
- char name[256];
- snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
-
- lp_build_intrinsic(gallivm->builder, name,
- LLVMVoidTypeInContext(gallivm->context),
- args, Elements(args));
-}
-
-static void build_streamout_store(struct si_shader_context *shader,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef vaddr,
- LLVMValueRef soffset,
- unsigned inst_offset)
-{
- static unsigned dfmt[] = {
- V_008F0C_BUF_DATA_FORMAT_32,
- V_008F0C_BUF_DATA_FORMAT_32_32,
- V_008F0C_BUF_DATA_FORMAT_32_32_32,
- V_008F0C_BUF_DATA_FORMAT_32_32_32_32
- };
- assert(num_channels >= 1 && num_channels <= 4);
-
- build_tbuffer_store(shader, rsrc, vdata, num_channels, vaddr, soffset,
- inst_offset, dfmt[num_channels-1],
- V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
-}
-
-/* On SI, the vertex shader is responsible for writing streamout data
- * to buffers. */
-static void si_llvm_emit_streamout(struct si_shader_context *shader)
-{
- struct pipe_stream_output_info *so = &shader->shader->selector->so;
- struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
- LLVMBuilderRef builder = gallivm->builder;
- int i, j;
- struct lp_build_if_state if_ctx;
-
- LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
-
- LLVMValueRef so_param =
- LLVMGetParam(shader->radeon_bld.main_fn,
- shader->param_streamout_config);
-
- /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
- LLVMValueRef so_vtx_count =
- LLVMBuildAnd(builder,
- LLVMBuildLShr(builder, so_param,
- LLVMConstInt(i32, 16, 0), ""),
- LLVMConstInt(i32, 127, 0), "");
-
- LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32,
- NULL, 0, LLVMReadNoneAttribute);
-
- /* can_emit = tid < so_vtx_count; */
- LLVMValueRef can_emit =
- LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
-
- /* Emit the streamout code conditionally. This actually avoids
- * out-of-bounds buffer access. The hw tells us via the SGPR
- * (so_vtx_count) which threads are allowed to emit streamout data. */
- lp_build_if(&if_ctx, gallivm, can_emit);
- {
- /* The buffer offset is computed as follows:
- * ByteOffset = streamout_offset[buffer_id]*4 +
- * (streamout_write_index + thread_id)*stride[buffer_id] +
- * attrib_offset
- */
-
- LLVMValueRef so_write_index =
- LLVMGetParam(shader->radeon_bld.main_fn,
- shader->param_streamout_write_index);
-
- /* Compute (streamout_write_index + thread_id). */
- so_write_index = LLVMBuildAdd(builder, so_write_index, tid, "");
-
- /* Compute the write offset for each enabled buffer. */
- LLVMValueRef so_write_offset[4] = {};
- for (i = 0; i < 4; i++) {
- if (!so->stride[i])
- continue;
-
- LLVMValueRef so_offset = LLVMGetParam(shader->radeon_bld.main_fn,
- shader->param_streamout_offset[i]);
- so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(i32, 4, 0), "");
-
- so_write_offset[i] = LLVMBuildMul(builder, so_write_index,
- LLVMConstInt(i32, so->stride[i]*4, 0), "");
- so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, "");
- }
-
- LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS] = shader->radeon_bld.soa.outputs;
-
- /* Write streamout data. */
- for (i = 0; i < so->num_outputs; i++) {
- unsigned buf_idx = so->output[i].output_buffer;
- unsigned reg = so->output[i].register_index;
- unsigned start = so->output[i].start_component;
- unsigned num_comps = so->output[i].num_components;
- LLVMValueRef out[4];
-
- assert(num_comps && num_comps <= 4);
- if (!num_comps || num_comps > 4)
- continue;
-
- /* Load the output as int. */
- for (j = 0; j < num_comps; j++) {
- out[j] = LLVMBuildLoad(builder, outputs[reg][start+j], "");
- out[j] = LLVMBuildBitCast(builder, out[j], i32, "");
- }
-
- /* Pack the output. */
- LLVMValueRef vdata = NULL;
-
- switch (num_comps) {
- case 1: /* as i32 */
- vdata = out[0];
- break;
- case 2: /* as v2i32 */
- case 3: /* as v4i32 (aligned to 4) */
- case 4: /* as v4i32 */
- vdata = LLVMGetUndef(LLVMVectorType(i32, util_next_power_of_two(num_comps)));
- for (j = 0; j < num_comps; j++) {
- vdata = LLVMBuildInsertElement(builder, vdata, out[j],
- LLVMConstInt(i32, j, 0), "");
- }
- break;
- }
-
- build_streamout_store(shader, shader->so_buffers[buf_idx],
- vdata, num_comps,
- so_write_offset[buf_idx],
- LLVMConstInt(i32, 0, 0),
- so->output[i].dst_offset*4);
- }
- }
- lp_build_endif(&if_ctx);
-}
-
-
-static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
-{
- struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
- struct si_shader * shader = &si_shader_ctx->shader->shader;
- struct lp_build_context * base = &bld_base->base;
- struct lp_build_context * uint =
- &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
- struct tgsi_parse_context *parse = &si_shader_ctx->parse;
- LLVMValueRef args[9];
- LLVMValueRef last_args[9] = { 0 };
- LLVMValueRef pos_args[4][9] = { { 0 } };
- unsigned semantic_name;
- unsigned param_count = 0;
- int depth_index = -1, stencil_index = -1, psize_index = -1, edgeflag_index = -1;
- int layer_index = -1;
- int i;
-
- if (si_shader_ctx->shader->selector->so.num_outputs) {
- si_llvm_emit_streamout(si_shader_ctx);
- }
-
- while (!tgsi_parse_end_of_tokens(parse)) {
- struct tgsi_full_declaration *d =
- &parse->FullToken.FullDeclaration;
- unsigned target;
- unsigned index;
-
- tgsi_parse_token(parse);
-
- if (parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_PROPERTY &&
- parse->FullToken.FullProperty.Property.PropertyName ==
- TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS)
- shader->fs_write_all = TRUE;
-
- if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
- continue;
-
- switch (d->Declaration.File) {
- case TGSI_FILE_INPUT:
- i = shader->ninput++;
- assert(i < Elements(shader->input));
- shader->input[i].name = d->Semantic.Name;
- shader->input[i].sid = d->Semantic.Index;
- shader->input[i].interpolate = d->Interp.Interpolate;
- shader->input[i].centroid = d->Interp.Centroid;
- continue;
-
- case TGSI_FILE_OUTPUT:
- i = shader->noutput++;
- assert(i < Elements(shader->output));
- shader->output[i].name = d->Semantic.Name;
- shader->output[i].sid = d->Semantic.Index;
- shader->output[i].interpolate = d->Interp.Interpolate;
- break;
-
- default:
- continue;
- }
-
- semantic_name = d->Semantic.Name;
-handle_semantic:
- for (index = d->Range.First; index <= d->Range.Last; index++) {
- /* Select the correct target */
- switch(semantic_name) {
- case TGSI_SEMANTIC_PSIZE:
- shader->vs_out_misc_write = true;
- shader->vs_out_point_size = true;
- psize_index = index;
- continue;
- case TGSI_SEMANTIC_EDGEFLAG:
- shader->vs_out_misc_write = true;
- shader->vs_out_edgeflag = true;
- edgeflag_index = index;
- continue;
- case TGSI_SEMANTIC_LAYER:
- shader->vs_out_misc_write = true;
- shader->vs_out_layer = true;
- layer_index = index;
- continue;
- case TGSI_SEMANTIC_POSITION:
- if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
- target = V_008DFC_SQ_EXP_POS;
- break;
- } else {
- depth_index = index;
- continue;
- }
- case TGSI_SEMANTIC_STENCIL:
- stencil_index = index;
- continue;
- case TGSI_SEMANTIC_COLOR:
- if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
- case TGSI_SEMANTIC_BCOLOR:
- target = V_008DFC_SQ_EXP_PARAM + param_count;
- shader->output[i].param_offset = param_count;
- param_count++;
- } else {
- target = V_008DFC_SQ_EXP_MRT + shader->output[i].sid;
- if (si_shader_ctx->shader->key.ps.alpha_to_one) {
- si_alpha_to_one(bld_base, index);
- }
- if (shader->output[i].sid == 0 &&
- si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
- si_alpha_test(bld_base, index);
- }
- break;
- case TGSI_SEMANTIC_CLIPDIST:
- if (!(si_shader_ctx->shader->key.vs.ucps_enabled &
- (1 << d->Semantic.Index)))
- continue;
- shader->clip_dist_write |=
- d->Declaration.UsageMask << (d->Semantic.Index << 2);
- target = V_008DFC_SQ_EXP_POS + 2 + d->Semantic.Index;
- break;
- case TGSI_SEMANTIC_CLIPVERTEX:
- si_llvm_emit_clipvertex(bld_base, pos_args, index);
- continue;
- case TGSI_SEMANTIC_FOG:
- case TGSI_SEMANTIC_GENERIC:
- target = V_008DFC_SQ_EXP_PARAM + param_count;
- shader->output[i].param_offset = param_count;
- param_count++;
- break;
- default:
- target = 0;
- fprintf(stderr,
- "Warning: SI unhandled output type:%d\n",
- semantic_name);
- }
-
- si_llvm_init_export_args(bld_base, d, index, target, args);
-
- if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
- target >= V_008DFC_SQ_EXP_POS &&
- target <= (V_008DFC_SQ_EXP_POS + 3)) {
- memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
- args, sizeof(args));
- } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT &&
- semantic_name == TGSI_SEMANTIC_COLOR) {
- /* If there is an export instruction waiting to be emitted, do so now. */
- if (last_args[0]) {
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- last_args, 9);
- }
-
- /* This instruction will be emitted at the end of the shader. */
- memcpy(last_args, args, sizeof(args));
-
- /* Handle FS_COLOR0_WRITES_ALL_CBUFS. */
- if (shader->fs_write_all && shader->output[i].sid == 0 &&
- si_shader_ctx->shader->key.ps.nr_cbufs > 1) {
- for (int c = 1; c < si_shader_ctx->shader->key.ps.nr_cbufs; c++) {
- si_llvm_init_export_args(bld_base, d, index,
- V_008DFC_SQ_EXP_MRT + c, args);
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
- }
- }
- } else {
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
- }
- }
-
- if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
- semantic_name = TGSI_SEMANTIC_GENERIC;
- goto handle_semantic;
- }
- }
-
- if (depth_index >= 0 || stencil_index >= 0) {
- LLVMValueRef out_ptr;
- unsigned mask = 0;
-
- /* Specify the target we are exporting */
- args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
-
- if (depth_index >= 0) {
- out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2];
- args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
- mask |= 0x1;
-
- if (stencil_index < 0) {
- args[6] =
- args[7] =
- args[8] = args[5];
- }
- }
-
- if (stencil_index >= 0) {
- out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1];
- args[7] =
- args[8] =
- args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
- /* Only setting the stencil component bit (0x2) here
- * breaks some stencil piglit tests
- */
- mask |= 0x3;
-
- if (depth_index < 0)
- args[5] = args[6];
- }
-
- /* Specify which components to enable */
- args[0] = lp_build_const_int32(base->gallivm, mask);
-
- args[1] =
- args[2] =
- args[4] = uint->zero;
-
- if (last_args[0])
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
- else
- memcpy(last_args, args, sizeof(args));
- }
-
- if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
- unsigned pos_idx = 0;
-
- /* We need to add the position output manually if it's missing. */
- if (!pos_args[0][0]) {
- pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
- pos_args[0][1] = uint->zero; /* EXEC mask */
- pos_args[0][2] = uint->zero; /* last export? */
- pos_args[0][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS);
- pos_args[0][4] = uint->zero; /* COMPR flag */
- pos_args[0][5] = base->zero; /* X */
- pos_args[0][6] = base->zero; /* Y */
- pos_args[0][7] = base->zero; /* Z */
- pos_args[0][8] = base->one; /* W */
- }
-
- /* Write the misc vector (point size, edgeflag, layer, viewport). */
- if (shader->vs_out_misc_write) {
- pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
- shader->vs_out_point_size |
- (shader->vs_out_edgeflag << 1) |
- (shader->vs_out_layer << 2));
- pos_args[1][1] = uint->zero; /* EXEC mask */
- pos_args[1][2] = uint->zero; /* last export? */
- pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
- pos_args[1][4] = uint->zero; /* COMPR flag */
- pos_args[1][5] = base->zero; /* X */
- pos_args[1][6] = base->zero; /* Y */
- pos_args[1][7] = base->zero; /* Z */
- pos_args[1][8] = base->zero; /* W */
-
- if (shader->vs_out_point_size) {
- pos_args[1][5] = LLVMBuildLoad(base->gallivm->builder,
- si_shader_ctx->radeon_bld.soa.outputs[psize_index][0], "");
- }
-
- if (shader->vs_out_edgeflag) {
- LLVMValueRef output = LLVMBuildLoad(base->gallivm->builder,
- si_shader_ctx->radeon_bld.soa.outputs[edgeflag_index][0], "");
-
- /* The output is a float, but the hw expects an integer
- * with the first bit containing the edge flag. */
- output = LLVMBuildFPToUI(base->gallivm->builder, output,
- bld_base->uint_bld.elem_type, "");
-
- output = lp_build_min(&bld_base->int_bld, output, bld_base->int_bld.one);
-
- /* The LLVM intrinsic expects a float. */
- pos_args[1][6] = LLVMBuildBitCast(base->gallivm->builder, output,
- base->elem_type, "");
- }
-
- if (shader->vs_out_layer) {
- pos_args[1][7] = LLVMBuildLoad(base->gallivm->builder,
- si_shader_ctx->radeon_bld.soa.outputs[layer_index][0], "");
- }
- }
-
- for (i = 0; i < 4; i++)
- if (pos_args[i][0])
- shader->nr_pos_exports++;
-
- for (i = 0; i < 4; i++) {
- if (!pos_args[i][0])
- continue;
-
- /* Specify the target we are exporting */
- pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
-
- if (pos_idx == shader->nr_pos_exports)
- /* Specify that this is the last export */
- pos_args[i][2] = uint->one;
-
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- pos_args[i], 9);
- }
- } else {
- if (!last_args[0]) {
- /* Specify which components to enable */
- last_args[0] = lp_build_const_int32(base->gallivm, 0x0);
-
- /* Specify the target we are exporting */
- last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
-
- /* Set COMPR flag to zero to export data as 32-bit */
- last_args[4] = uint->zero;
-
- /* dummy bits */
- last_args[5]= uint->zero;
- last_args[6]= uint->zero;
- last_args[7]= uint->zero;
- last_args[8]= uint->zero;
-
- si_shader_ctx->shader->spi_shader_col_format |=
- V_028714_SPI_SHADER_32_ABGR;
- si_shader_ctx->shader->cb_shader_mask |= S_02823C_OUTPUT0_ENABLE(0xf);
- }
-
- /* Specify whether the EXEC mask represents the valid mask */
- last_args[1] = uint->one;
-
- /* Specify that this is the last export */
- last_args[2] = lp_build_const_int32(base->gallivm, 1);
-
- lp_build_intrinsic(base->gallivm->builder,
- "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- last_args, 9);
- }
-}
-
-static const struct lp_build_tgsi_action txf_action;
-
-static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data);
-
-static void tex_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = bld_base->base.gallivm;
- const struct tgsi_full_instruction * inst = emit_data->inst;
- unsigned opcode = inst->Instruction.Opcode;
- unsigned target = inst->Texture.Texture;
- LLVMValueRef coords[4];
- LLVMValueRef address[16];
- int ref_pos;
- unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
- unsigned count = 0;
- unsigned chan;
- unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
- unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
-
- if (target == TGSI_TEXTURE_BUFFER) {
- LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128);
- LLVMTypeRef v2i128 = LLVMVectorType(i128, 2);
- LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context);
- LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
-
- /* Truncate v32i8 to v16i8. */
- LLVMValueRef res = si_shader_ctx->resources[sampler_index];
- res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
- res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.zero, "");
- res = LLVMBuildBitCast(gallivm->builder, res, v16i8, "");
-
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
- emit_data->args[0] = res;
- emit_data->args[1] = bld_base->uint_bld.zero;
- emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
- emit_data->arg_count = 3;
- return;
- }
-
- /* Fetch and project texture coordinates */
- coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
- for (chan = 0; chan < 3; chan++ ) {
- coords[chan] = lp_build_emit_fetch(bld_base,
- emit_data->inst, 0,
- chan);
- if (opcode == TGSI_OPCODE_TXP)
- coords[chan] = lp_build_emit_llvm_binary(bld_base,
- TGSI_OPCODE_DIV,
- coords[chan],
- coords[3]);
- }
-
- if (opcode == TGSI_OPCODE_TXP)
- coords[3] = bld_base->base.one;
-
- /* Pack LOD bias value */
- if (opcode == TGSI_OPCODE_TXB)
- address[count++] = coords[3];
-
- if (target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_SHADOWCUBE)
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
-
- /* Pack depth comparison value */
- switch (target) {
- case TGSI_TEXTURE_SHADOW1D:
- case TGSI_TEXTURE_SHADOW1D_ARRAY:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT:
- case TGSI_TEXTURE_SHADOWCUBE:
- case TGSI_TEXTURE_SHADOW2D_ARRAY:
- assert(ref_pos >= 0);
- address[count++] = coords[ref_pos];
- break;
- case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
- }
-
- /* Pack user derivatives */
- if (opcode == TGSI_OPCODE_TXD) {
- for (chan = 0; chan < 2; chan++) {
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, chan);
- if (num_coords > 1)
- address[count++] = lp_build_emit_fetch(bld_base, inst, 2, chan);
- }
- }
-
- /* Pack texture coordinates */
- address[count++] = coords[0];
- if (num_coords > 1)
- address[count++] = coords[1];
- if (num_coords > 2)
- address[count++] = coords[2];
-
- /* Pack LOD or sample index */
- if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
- address[count++] = coords[3];
-
- if (count > 16) {
- assert(!"Cannot handle more than 16 texture address parameters");
- count = 16;
- }
-
- for (chan = 0; chan < count; chan++ ) {
- address[chan] = LLVMBuildBitCast(gallivm->builder,
- address[chan],
- LLVMInt32TypeInContext(gallivm->context),
- "");
- }
-
- /* Adjust the sample index according to FMASK.
- *
- * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
- * which is the identity mapping. Each nibble says which physical sample
- * should be fetched to get that sample.
- *
- * For example, 0x11111100 means there are only 2 samples stored and
- * the second sample covers 3/4 of the pixel. When reading samples 0
- * and 1, return physical sample 0 (determined by the first two 0s
- * in FMASK), otherwise return physical sample 1.
- *
- * The sample index should be adjusted as follows:
- * sample_index = (fmask >> (sample_index * 4)) & 0xF;
- */
- if (target == TGSI_TEXTURE_2D_MSAA ||
- target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
- struct lp_build_emit_data txf_emit_data = *emit_data;
- LLVMValueRef txf_address[4];
- unsigned txf_count = count;
-
- memcpy(txf_address, address, sizeof(txf_address));
-
- if (target == TGSI_TEXTURE_2D_MSAA) {
- txf_address[2] = bld_base->uint_bld.zero;
- }
- txf_address[3] = bld_base->uint_bld.zero;
-
- /* Pad to a power-of-two size. */
- while (txf_count < util_next_power_of_two(txf_count))
- txf_address[txf_count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-
- /* Read FMASK using TXF. */
- txf_emit_data.chan = 0;
- txf_emit_data.dst_type = LLVMVectorType(
- LLVMInt32TypeInContext(bld_base->base.gallivm->context), 4);
- txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count);
- txf_emit_data.args[1] = si_shader_ctx->resources[FMASK_TEX_OFFSET + sampler_index];
- txf_emit_data.args[2] = lp_build_const_int32(bld_base->base.gallivm,
- target == TGSI_TEXTURE_2D_MSAA ? TGSI_TEXTURE_2D : TGSI_TEXTURE_2D_ARRAY);
- txf_emit_data.arg_count = 3;
-
- build_tex_intrinsic(&txf_action, bld_base, &txf_emit_data);
-
- /* Initialize some constants. */
- LLVMValueRef four = LLVMConstInt(uint_bld->elem_type, 4, 0);
- LLVMValueRef F = LLVMConstInt(uint_bld->elem_type, 0xF, 0);
-
- /* Apply the formula. */
- LLVMValueRef fmask =
- LLVMBuildExtractElement(gallivm->builder,
- txf_emit_data.output[0],
- uint_bld->zero, "");
-
- unsigned sample_chan = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3;
-
- LLVMValueRef sample_index4 =
- LLVMBuildMul(gallivm->builder, address[sample_chan], four, "");
-
- LLVMValueRef shifted_fmask =
- LLVMBuildLShr(gallivm->builder, fmask, sample_index4, "");
-
- LLVMValueRef final_sample =
- LLVMBuildAnd(gallivm->builder, shifted_fmask, F, "");
-
- /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
- * resource descriptor is 0 (invalid),
- */
- LLVMValueRef fmask_desc =
- LLVMBuildBitCast(gallivm->builder,
- si_shader_ctx->resources[FMASK_TEX_OFFSET + sampler_index],
- LLVMVectorType(uint_bld->elem_type, 8), "");
-
- LLVMValueRef fmask_word1 =
- LLVMBuildExtractElement(gallivm->builder, fmask_desc,
- uint_bld->one, "");
-
- LLVMValueRef word1_is_nonzero =
- LLVMBuildICmp(gallivm->builder, LLVMIntNE,
- fmask_word1, uint_bld->zero, "");
-
- /* Replace the MSAA sample index. */
- address[sample_chan] =
- LLVMBuildSelect(gallivm->builder, word1_is_nonzero,
- final_sample, address[sample_chan], "");
- }
-
- /* Resource */
- emit_data->args[1] = si_shader_ctx->resources[sampler_index];
-
- if (opcode == TGSI_OPCODE_TXF) {
- /* add tex offsets */
- if (inst->Texture.NumOffsets) {
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
- struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
- const struct tgsi_texture_offset * off = inst->TexOffsets;
-
- assert(inst->Texture.NumOffsets == 1);
-
- switch (target) {
- case TGSI_TEXTURE_3D:
- address[2] = lp_build_add(uint_bld, address[2],
- bld->immediates[off->Index][off->SwizzleZ]);
- /* fall through */
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_RECT:
- case TGSI_TEXTURE_SHADOWRECT:
- case TGSI_TEXTURE_2D_ARRAY:
- case TGSI_TEXTURE_SHADOW2D_ARRAY:
- address[1] =
- lp_build_add(uint_bld, address[1],
- bld->immediates[off->Index][off->SwizzleY]);
- /* fall through */
- case TGSI_TEXTURE_1D:
- case TGSI_TEXTURE_SHADOW1D:
- case TGSI_TEXTURE_1D_ARRAY:
- case TGSI_TEXTURE_SHADOW1D_ARRAY:
- address[0] =
- lp_build_add(uint_bld, address[0],
- bld->immediates[off->Index][off->SwizzleX]);
- break;
- /* texture offsets do not apply to other texture targets */
- }
- }
-
- emit_data->dst_type = LLVMVectorType(
- LLVMInt32TypeInContext(bld_base->base.gallivm->context),
- 4);
-
- emit_data->arg_count = 3;
- } else {
- /* Sampler */
- emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
-
- emit_data->dst_type = LLVMVectorType(
- LLVMFloatTypeInContext(bld_base->base.gallivm->context),
- 4);
-
- emit_data->arg_count = 4;
- }
-
- /* Dimensions */
- emit_data->args[emit_data->arg_count - 1] =
- lp_build_const_int32(bld_base->base.gallivm, target);
-
- /* Pad to power of two vector */
- while (count < util_next_power_of_two(count))
- address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
-
- emit_data->args[0] = lp_build_gather_values(gallivm, address, count);
-}
-
-static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- struct lp_build_context * base = &bld_base->base;
- char intr_name[127];
-
- if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
- emit_data->output[emit_data->chan] = build_intrinsic(
- base->gallivm->builder,
- "llvm.SI.vs.load.input", emit_data->dst_type,
- emit_data->args, emit_data->arg_count,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- return;
- }
-
- sprintf(intr_name, "%sv%ui32", action->intr_name,
- LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
-
- emit_data->output[emit_data->chan] = build_intrinsic(
- base->gallivm->builder, intr_name, emit_data->dst_type,
- emit_data->args, emit_data->arg_count,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
-}
-
-static void txq_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- const struct tgsi_full_instruction *inst = emit_data->inst;
- struct gallivm_state *gallivm = bld_base->base.gallivm;
-
- if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
- LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
- LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
-
- /* Read the size from the buffer descriptor directly. */
- LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index];
- size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
- size = LLVMBuildExtractElement(gallivm->builder, size,
- lp_build_const_int32(gallivm, 2), "");
- emit_data->args[0] = size;
- return;
- }
-
- /* Mip level */
- emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
-
- /* Resource */
- emit_data->args[1] = si_shader_ctx->resources[inst->Src[1].Register.Index];
-
- /* Dimensions */
- emit_data->args[2] = lp_build_const_int32(bld_base->base.gallivm,
- inst->Texture.Texture);
-
- emit_data->arg_count = 3;
-
- emit_data->dst_type = LLVMVectorType(
- LLVMInt32TypeInContext(bld_base->base.gallivm->context),
- 4);
-}
-
-static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
- /* Just return the buffer size. */
- emit_data->output[emit_data->chan] = emit_data->args[0];
- return;
- }
-
- build_tgsi_intrinsic_nomem(action, bld_base, emit_data);
-}
-
-#if HAVE_LLVM >= 0x0304
-
-static void si_llvm_emit_ddxy(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
- struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct lp_build_context * base = &bld_base->base;
- const struct tgsi_full_instruction *inst = emit_data->inst;
- unsigned opcode = inst->Instruction.Opcode;
- LLVMValueRef indices[2];
- LLVMValueRef store_ptr, load_ptr0, load_ptr1;
- LLVMValueRef tl, trbl, result[4];
- LLVMTypeRef i32;
- unsigned swizzle[4];
- unsigned c;
-
- i32 = LLVMInt32TypeInContext(gallivm->context);
-
- indices[0] = bld_base->uint_bld.zero;
- indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
- NULL, 0, LLVMReadNoneAttribute);
- store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
- indices, 2, "");
-
- indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm, 0xfffffffc), "");
- load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
- indices, 2, "");
-
- indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm,
- opcode == TGSI_OPCODE_DDX ? 1 : 2),
- "");
- load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
- indices, 2, "");
-
- for (c = 0; c < 4; ++c) {
- unsigned i;
-
- swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c);
- for (i = 0; i < c; ++i) {
- if (swizzle[i] == swizzle[c]) {
- result[c] = result[i];
- break;
- }
- }
- if (i != c)
- continue;
-
- LLVMBuildStore(gallivm->builder,
- LLVMBuildBitCast(gallivm->builder,
- lp_build_emit_fetch(bld_base, inst, 0, c),
- i32, ""),
- store_ptr);
-
- tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
- tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
-
- trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
- trbl = LLVMBuildBitCast(gallivm->builder, trbl, base->elem_type, "");
-
- result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, "");
- }
-
- emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
-}
-
-#endif /* HAVE_LLVM >= 0x0304 */
-
-static const struct lp_build_tgsi_action tex_action = {
- .fetch_args = tex_fetch_args,
- .emit = build_tex_intrinsic,
- .intr_name = "llvm.SI.sample."
-};
-
-static const struct lp_build_tgsi_action txb_action = {
- .fetch_args = tex_fetch_args,
- .emit = build_tex_intrinsic,
- .intr_name = "llvm.SI.sampleb."
-};
-
-#if HAVE_LLVM >= 0x0304
-static const struct lp_build_tgsi_action txd_action = {
- .fetch_args = tex_fetch_args,
- .emit = build_tex_intrinsic,
- .intr_name = "llvm.SI.sampled."
-};
-#endif
-
-static const struct lp_build_tgsi_action txf_action = {
- .fetch_args = tex_fetch_args,
- .emit = build_tex_intrinsic,
- .intr_name = "llvm.SI.imageload."
-};
-
-static const struct lp_build_tgsi_action txl_action = {
- .fetch_args = tex_fetch_args,
- .emit = build_tex_intrinsic,
- .intr_name = "llvm.SI.samplel."
-};
-
-static const struct lp_build_tgsi_action txq_action = {
- .fetch_args = txq_fetch_args,
- .emit = build_txq_intrinsic,
- .intr_name = "llvm.SI.resinfo"
-};
-
-static void create_meta_data(struct si_shader_context *si_shader_ctx)
-{
- struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
- LLVMValueRef args[3];
-
- args[0] = LLVMMDStringInContext(gallivm->context, "const", 5);
- args[1] = 0;
- args[2] = lp_build_const_int32(gallivm, 1);
-
- si_shader_ctx->const_md = LLVMMDNodeInContext(gallivm->context, args, 3);
-}
-
-static void create_function(struct si_shader_context *si_shader_ctx)
-{
- struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
- struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMTypeRef params[21], f32, i8, i32, v2i32, v3i32;
- unsigned i, last_sgpr, num_params;
-
- i8 = LLVMInt8TypeInContext(gallivm->context);
- i32 = LLVMInt32TypeInContext(gallivm->context);
- f32 = LLVMFloatTypeInContext(gallivm->context);
- v2i32 = LLVMVectorType(i32, 2);
- v3i32 = LLVMVectorType(i32, 3);
-
- params[SI_PARAM_CONST] = LLVMPointerType(
- LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE);
- /* We assume at most 16 textures per program at the moment.
- * This need probably need to be changed to support bindless textures */
- params[SI_PARAM_SAMPLER] = LLVMPointerType(
- LLVMArrayType(LLVMVectorType(i8, 16), NUM_SAMPLER_VIEWS), CONST_ADDR_SPACE);
- params[SI_PARAM_RESOURCE] = LLVMPointerType(
- LLVMArrayType(LLVMVectorType(i8, 32), NUM_SAMPLER_STATES), CONST_ADDR_SPACE);
-
- switch (si_shader_ctx->type) {
- case TGSI_PROCESSOR_VERTEX:
- params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST];
- params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST];
- params[SI_PARAM_START_INSTANCE] = i32;
- num_params = SI_PARAM_START_INSTANCE+1;
-
- /* The locations of the other parameters are assigned dynamically. */
-
- /* Streamout SGPRs. */
- if (si_shader_ctx->shader->selector->so.num_outputs) {
- params[si_shader_ctx->param_streamout_config = num_params++] = i32;
- params[si_shader_ctx->param_streamout_write_index = num_params++] = i32;
- }
- /* A streamout buffer offset is loaded if the stride is non-zero. */
- for (i = 0; i < 4; i++) {
- if (!si_shader_ctx->shader->selector->so.stride[i])
- continue;
-
- params[si_shader_ctx->param_streamout_offset[i] = num_params++] = i32;
- }
-
- last_sgpr = num_params-1;
-
- /* VGPRs */
- params[si_shader_ctx->param_vertex_id = num_params++] = i32;
- params[num_params++] = i32; /* unused*/
- params[num_params++] = i32; /* unused */
- params[si_shader_ctx->param_instance_id = num_params++] = i32;
- break;
-
- case TGSI_PROCESSOR_FRAGMENT:
- params[SI_PARAM_ALPHA_REF] = f32;
- params[SI_PARAM_PRIM_MASK] = i32;
- last_sgpr = SI_PARAM_PRIM_MASK;
- params[SI_PARAM_PERSP_SAMPLE] = v2i32;
- params[SI_PARAM_PERSP_CENTER] = v2i32;
- params[SI_PARAM_PERSP_CENTROID] = v2i32;
- params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
- params[SI_PARAM_LINEAR_SAMPLE] = v2i32;
- params[SI_PARAM_LINEAR_CENTER] = v2i32;
- params[SI_PARAM_LINEAR_CENTROID] = v2i32;
- params[SI_PARAM_LINE_STIPPLE_TEX] = f32;
- params[SI_PARAM_POS_X_FLOAT] = f32;
- params[SI_PARAM_POS_Y_FLOAT] = f32;
- params[SI_PARAM_POS_Z_FLOAT] = f32;
- params[SI_PARAM_POS_W_FLOAT] = f32;
- params[SI_PARAM_FRONT_FACE] = f32;
- params[SI_PARAM_ANCILLARY] = f32;
- params[SI_PARAM_SAMPLE_COVERAGE] = f32;
- params[SI_PARAM_POS_FIXED_PT] = f32;
- num_params = SI_PARAM_POS_FIXED_PT+1;
- break;
-
- default:
- assert(0 && "unimplemented shader");
- return;
- }
-
- assert(num_params <= Elements(params));
- radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params);
- radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
-
- for (i = 0; i <= last_sgpr; ++i) {
- LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
- switch (i) {
- default:
- LLVMAddAttribute(P, LLVMInRegAttribute);
- break;
-#if HAVE_LLVM >= 0x0304
- /* We tell llvm that array inputs are passed by value to allow Sinking pass
- * to move load. Inputs are constant so this is fine. */
- case SI_PARAM_CONST:
- case SI_PARAM_SAMPLER:
- case SI_PARAM_RESOURCE:
- LLVMAddAttribute(P, LLVMByValAttribute);
- break;
-#endif
- }
- }
-
-#if HAVE_LLVM >= 0x0304
- if (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
- bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0)
- si_shader_ctx->ddxy_lds =
- LLVMAddGlobalInAddressSpace(gallivm->module,
- LLVMArrayType(i32, 64),
- "ddxy_lds",
- LOCAL_ADDR_SPACE);
-#endif
-}
-
-static void preload_constants(struct si_shader_context *si_shader_ctx)
-{
- struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
- struct gallivm_state * gallivm = bld_base->base.gallivm;
- const struct tgsi_shader_info * info = bld_base->info;
- unsigned buf;
- LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
-
- for (buf = 0; buf < NUM_CONST_BUFFERS; buf++) {
- unsigned i, num_const = info->const_file_max[buf] + 1;
-
- if (num_const == 0)
- continue;
-
- /* Allocate space for the constant values */
- si_shader_ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef));
-
- /* Load the resource descriptor */
- si_shader_ctx->const_resource[buf] =
- build_indexed_load(si_shader_ctx, ptr, lp_build_const_int32(gallivm, buf));
-
- /* Load the constants, we rely on the code sinking to do the rest */
- for (i = 0; i < num_const * 4; ++i) {
- LLVMValueRef args[2] = {
- si_shader_ctx->const_resource[buf],
- lp_build_const_int32(gallivm, i * 4)
- };
- si_shader_ctx->constants[buf][i] =
- build_intrinsic(gallivm->builder, "llvm.SI.load.const",
- bld_base->base.elem_type, args, 2,
- LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
- }
- }
-}
-
-static void preload_samplers(struct si_shader_context *si_shader_ctx)
-{
- struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
- struct gallivm_state * gallivm = bld_base->base.gallivm;
- const struct tgsi_shader_info * info = bld_base->info;
-
- unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
-
- LLVMValueRef res_ptr, samp_ptr;
- LLVMValueRef offset;
-
- if (num_samplers == 0)
- return;
-
- /* Allocate space for the values */
- si_shader_ctx->resources = CALLOC(NUM_SAMPLER_VIEWS, sizeof(LLVMValueRef));
- si_shader_ctx->samplers = CALLOC(num_samplers, sizeof(LLVMValueRef));
-
- res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
- samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
-
- /* Load the resources and samplers, we rely on the code sinking to do the rest */
- for (i = 0; i < num_samplers; ++i) {
- /* Resource */
- offset = lp_build_const_int32(gallivm, i);
- si_shader_ctx->resources[i] = build_indexed_load(si_shader_ctx, res_ptr, offset);
-
- /* Sampler */
- offset = lp_build_const_int32(gallivm, i);
- si_shader_ctx->samplers[i] = build_indexed_load(si_shader_ctx, samp_ptr, offset);
-
- /* FMASK resource */
- if (info->is_msaa_sampler[i]) {
- offset = lp_build_const_int32(gallivm, FMASK_TEX_OFFSET + i);
- si_shader_ctx->resources[FMASK_TEX_OFFSET + i] =
- build_indexed_load(si_shader_ctx, res_ptr, offset);
- }
- }
-}
-
-static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
-{
- struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
- struct gallivm_state * gallivm = bld_base->base.gallivm;
- unsigned i;
-
- if (!si_shader_ctx->shader->selector->so.num_outputs)
- return;
-
- LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_SO_BUFFER);
-
- /* Load the resources, we rely on the code sinking to do the rest */
- for (i = 0; i < 4; ++i) {
- if (si_shader_ctx->shader->selector->so.stride[i]) {
- LLVMValueRef offset = lp_build_const_int32(gallivm, i);
-
- si_shader_ctx->so_buffers[i] = build_indexed_load(si_shader_ctx, buf_ptr, offset);
- }
- }
-}
-
-int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader,
- LLVMModuleRef mod)
-{
- unsigned i;
- uint32_t *ptr;
- struct radeon_llvm_binary binary;
- bool dump = r600_can_dump_shader(&rctx->screen->b,
- shader->selector ? shader->selector->tokens : NULL);
- memset(&binary, 0, sizeof(binary));
- radeon_llvm_compile(mod, &binary,
- r600_get_llvm_processor_name(rctx->screen->b.family), dump);
- if (dump && ! binary.disassembled) {
- fprintf(stderr, "SI CODE:\n");
- for (i = 0; i < binary.code_size; i+=4 ) {
- fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3],
- binary.code[i + 2], binary.code[i + 1],
- binary.code[i]);
- }
- }
-
- /* XXX: We may be able to emit some of these values directly rather than
- * extracting fields to be emitted later.
- */
- for (i = 0; i < binary.config_size; i+= 8) {
- unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i));
- unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
- switch (reg) {
- case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
- case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
- case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
- case R_00B848_COMPUTE_PGM_RSRC1:
- shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8;
- shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4;
- break;
- case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
- shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value);
- break;
- case R_00B84C_COMPUTE_PGM_RSRC2:
- shader->lds_size = G_00B84C_LDS_SIZE(value);
- break;
- case R_0286CC_SPI_PS_INPUT_ENA:
- shader->spi_ps_input_ena = value;
- break;
- default:
- fprintf(stderr, "Warning: Compiler emitted unknown "
- "config register: 0x%x\n", reg);
- break;
- }
- }
-
- /* copy new shader */
- r600_resource_reference(&shader->bo, NULL);
- shader->bo = r600_resource_create_custom(rctx->b.b.screen, PIPE_USAGE_IMMUTABLE,
- binary.code_size);
- if (shader->bo == NULL) {
- return -ENOMEM;
- }
-
- ptr = (uint32_t*)rctx->b.ws->buffer_map(shader->bo->cs_buf, rctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
- if (0 /*R600_BIG_ENDIAN*/) {
- for (i = 0; i < binary.code_size / 4; ++i) {
- ptr[i] = util_bswap32(*(uint32_t*)(binary.code + i*4));
- }
- } else {
- memcpy(ptr, binary.code, binary.code_size);
- }
- rctx->b.ws->buffer_unmap(shader->bo->cs_buf);
-
- free(binary.code);
- free(binary.config);
-
- return 0;
-}
-
-int si_pipe_shader_create(
- struct pipe_context *ctx,
- struct si_pipe_shader *shader)
-{
- struct r600_context *rctx = (struct r600_context*)ctx;
- struct si_pipe_shader_selector *sel = shader->selector;
- struct si_shader_context si_shader_ctx;
- struct tgsi_shader_info shader_info;
- struct lp_build_tgsi_context * bld_base;
- LLVMModuleRef mod;
- int r = 0;
- bool dump = r600_can_dump_shader(&rctx->screen->b, shader->selector->tokens);
-
- assert(shader->shader.noutput == 0);
- assert(shader->shader.ninterp == 0);
- assert(shader->shader.ninput == 0);
-
- memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
- radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
- bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
-
- tgsi_scan_shader(sel->tokens, &shader_info);
-
- shader->shader.uses_kill = shader_info.uses_kill;
- shader->shader.uses_instanceid = shader_info.uses_instanceid;
- bld_base->info = &shader_info;
- bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
- bld_base->emit_epilogue = si_llvm_emit_epilogue;
-
- bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
- bld_base->op_actions[TGSI_OPCODE_TXB] = txb_action;
-#if HAVE_LLVM >= 0x0304
- bld_base->op_actions[TGSI_OPCODE_TXD] = txd_action;
-#endif
- bld_base->op_actions[TGSI_OPCODE_TXF] = txf_action;
- bld_base->op_actions[TGSI_OPCODE_TXL] = txl_action;
- bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
- bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action;
-
-#if HAVE_LLVM >= 0x0304
- bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
- bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
-#endif
-
- si_shader_ctx.radeon_bld.load_input = declare_input;
- si_shader_ctx.radeon_bld.load_system_value = declare_system_value;
- si_shader_ctx.tokens = sel->tokens;
- tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
- si_shader_ctx.shader = shader;
- si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
-
- create_meta_data(&si_shader_ctx);
- create_function(&si_shader_ctx);
- preload_constants(&si_shader_ctx);
- preload_samplers(&si_shader_ctx);
- preload_streamout_buffers(&si_shader_ctx);
-
- /* Dump TGSI code before doing TGSI->LLVM conversion in case the
- * conversion fails. */
- if (dump) {
- tgsi_dump(sel->tokens, 0);
- si_dump_streamout(&sel->so);
- }
-
- if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
- fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
- for (int i = 0; i < NUM_CONST_BUFFERS; i++)
- FREE(si_shader_ctx.constants[i]);
- FREE(si_shader_ctx.resources);
- FREE(si_shader_ctx.samplers);
- return -EINVAL;
- }
-
- radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
-
- mod = bld_base->base.gallivm->module;
- r = si_compile_llvm(rctx, shader, mod);
-
- radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
- tgsi_parse_free(&si_shader_ctx.parse);
-
- for (int i = 0; i < NUM_CONST_BUFFERS; i++)
- FREE(si_shader_ctx.constants[i]);
- FREE(si_shader_ctx.resources);
- FREE(si_shader_ctx.samplers);
-
- return r;
-}
-
-void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
-{
- r600_resource_reference(&shader->bo, NULL);
-}
+++ /dev/null
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Tom Stellard <thomas.stellard@amd.com>
- * Michel Dänzer <michel.daenzer@amd.com>
- * Christian König <christian.koenig@amd.com>
- */
-
-#ifndef RADEONSI_SHADER_H
-#define RADEONSI_SHADER_H
-
-#include <llvm-c/Core.h> /* LLVMModuleRef */
-
-#define SI_SGPR_CONST 0
-#define SI_SGPR_SAMPLER 2
-#define SI_SGPR_RESOURCE 4
-#define SI_SGPR_VERTEX_BUFFER 6 /* VS only */
-#define SI_SGPR_SO_BUFFER 8 /* VS only, stream-out */
-#define SI_SGPR_START_INSTANCE 10 /* VS only */
-#define SI_SGPR_ALPHA_REF 6 /* PS only */
-
-#define SI_VS_NUM_USER_SGPR 11
-#define SI_PS_NUM_USER_SGPR 7
-
-/* LLVM function parameter indices */
-#define SI_PARAM_CONST 0
-#define SI_PARAM_SAMPLER 1
-#define SI_PARAM_RESOURCE 2
-
-/* VS only parameters */
-#define SI_PARAM_VERTEX_BUFFER 3
-#define SI_PARAM_SO_BUFFER 4
-#define SI_PARAM_START_INSTANCE 5
-/* the other VS parameters are assigned dynamically */
-
-/* PS only parameters */
-#define SI_PARAM_ALPHA_REF 3
-#define SI_PARAM_PRIM_MASK 4
-#define SI_PARAM_PERSP_SAMPLE 5
-#define SI_PARAM_PERSP_CENTER 6
-#define SI_PARAM_PERSP_CENTROID 7
-#define SI_PARAM_PERSP_PULL_MODEL 8
-#define SI_PARAM_LINEAR_SAMPLE 9
-#define SI_PARAM_LINEAR_CENTER 10
-#define SI_PARAM_LINEAR_CENTROID 11
-#define SI_PARAM_LINE_STIPPLE_TEX 12
-#define SI_PARAM_POS_X_FLOAT 13
-#define SI_PARAM_POS_Y_FLOAT 14
-#define SI_PARAM_POS_Z_FLOAT 15
-#define SI_PARAM_POS_W_FLOAT 16
-#define SI_PARAM_FRONT_FACE 17
-#define SI_PARAM_ANCILLARY 18
-#define SI_PARAM_SAMPLE_COVERAGE 19
-#define SI_PARAM_POS_FIXED_PT 20
-
-struct si_shader_io {
- unsigned name;
- int sid;
- unsigned param_offset;
- unsigned interpolate;
- bool centroid;
-};
-
-struct si_pipe_shader;
-
-struct si_pipe_shader_selector {
- struct si_pipe_shader *current;
-
- struct tgsi_token *tokens;
- struct pipe_stream_output_info so;
-
- unsigned num_shaders;
-
- /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
- unsigned type;
-
- /* 1 when the shader contains
- * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0.
- * Used to determine whether we need to include nr_cbufs in the key */
- unsigned fs_write_all;
-};
-
-struct si_shader {
- unsigned ninput;
- struct si_shader_io input[40];
-
- unsigned noutput;
- struct si_shader_io output[40];
-
- unsigned ninterp;
- bool uses_kill;
- bool uses_instanceid;
- bool fs_write_all;
- bool vs_out_misc_write;
- bool vs_out_point_size;
- bool vs_out_edgeflag;
- bool vs_out_layer;
- unsigned nr_pos_exports;
- unsigned clip_dist_write;
-};
-
-union si_shader_key {
- struct {
- unsigned export_16bpc:8;
- unsigned nr_cbufs:4;
- unsigned color_two_side:1;
- unsigned alpha_func:3;
- unsigned flatshade:1;
- unsigned alpha_to_one:1;
- } ps;
- struct {
- unsigned instance_divisors[PIPE_MAX_ATTRIBS];
- unsigned ucps_enabled:2;
- } vs;
-};
-
-struct si_pipe_shader {
- struct si_pipe_shader_selector *selector;
- struct si_pipe_shader *next_variant;
- struct si_shader shader;
- struct si_pm4_state *pm4;
- struct r600_resource *bo;
- unsigned num_sgprs;
- unsigned num_vgprs;
- unsigned lds_size;
- unsigned spi_ps_input_ena;
- unsigned spi_shader_col_format;
- unsigned cb_shader_mask;
- bool cb0_is_integer;
- unsigned sprite_coord_enable;
- union si_shader_key key;
-};
-
-/* radeonsi_shader.c */
-int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader);
-int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader);
-int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader,
- LLVMModuleRef mod);
-void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader);
-
-#endif
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2011 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/*
- * Authors:
- * Christian König <christian.koenig@amd.com>
- *
- */
-
-#include <sys/types.h>
-#include <assert.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include "pipe/p_video_codec.h"
-
-#include "util/u_memory.h"
-#include "util/u_video.h"
-
-#include "vl/vl_defines.h"
-#include "vl/vl_mpeg12_decoder.h"
-
-#include "radeonsi_pipe.h"
-#include "radeon/radeon_uvd.h"
-#include "sid.h"
-
-/**
- * creates an video buffer with an UVD compatible memory layout
- */
-struct pipe_video_buffer *radeonsi_video_buffer_create(struct pipe_context *pipe,
- const struct pipe_video_buffer *tmpl)
-{
- struct r600_context *ctx = (struct r600_context *)pipe;
- struct r600_texture *resources[VL_NUM_COMPONENTS] = {};
- struct radeon_surface *surfaces[VL_NUM_COMPONENTS] = {};
- struct pb_buffer **pbs[VL_NUM_COMPONENTS] = {};
- const enum pipe_format *resource_formats;
- struct pipe_video_buffer template;
- struct pipe_resource templ;
- unsigned i, array_size;
-
- assert(pipe);
-
- /* first create the needed resources as "normal" textures */
- resource_formats = vl_video_buffer_formats(pipe->screen, tmpl->buffer_format);
- if (!resource_formats)
- return NULL;
-
- array_size = tmpl->interlaced ? 2 : 1;
- template = *tmpl;
- template.width = align(tmpl->width, VL_MACROBLOCK_WIDTH);
- template.height = align(tmpl->height / array_size, VL_MACROBLOCK_HEIGHT);
-
- vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size, PIPE_USAGE_STATIC, 0);
- /* TODO: get tiling working */
- templ.bind = PIPE_BIND_LINEAR;
- resources[0] = (struct r600_texture *)
- pipe->screen->resource_create(pipe->screen, &templ);
- if (!resources[0])
- goto error;
-
- if (resource_formats[1] != PIPE_FORMAT_NONE) {
- vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size, PIPE_USAGE_STATIC, 1);
- templ.bind = PIPE_BIND_LINEAR;
- resources[1] = (struct r600_texture *)
- pipe->screen->resource_create(pipe->screen, &templ);
- if (!resources[1])
- goto error;
- }
-
- if (resource_formats[2] != PIPE_FORMAT_NONE) {
- vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size, PIPE_USAGE_STATIC, 2);
- templ.bind = PIPE_BIND_LINEAR;
- resources[2] = (struct r600_texture *)
- pipe->screen->resource_create(pipe->screen, &templ);
- if (!resources[2])
- goto error;
- }
-
- for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
- if (!resources[i])
- continue;
-
- surfaces[i] = & resources[i]->surface;
- pbs[i] = &resources[i]->resource.buf;
- }
-
- ruvd_join_surfaces(ctx->b.ws, templ.bind, pbs, surfaces);
-
- for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
- if (!resources[i])
- continue;
-
- /* recreate the CS handle */
- resources[i]->resource.cs_buf = ctx->b.ws->buffer_get_cs_handle(
- resources[i]->resource.buf);
- }
-
- template.height *= array_size;
- return vl_video_buffer_create_ex2(pipe, &template, (struct pipe_resource **)resources);
-
-error:
- for (i = 0; i < VL_NUM_COMPONENTS; ++i)
- pipe_resource_reference((struct pipe_resource **)&resources[i], NULL);
-
- return NULL;
-}
-
-/* set the decoding target buffer offsets */
-static struct radeon_winsys_cs_handle* radeonsi_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_buffer *buf)
-{
- struct r600_texture *luma = (struct r600_texture *)buf->resources[0];
- struct r600_texture *chroma = (struct r600_texture *)buf->resources[1];
-
- msg->body.decode.dt_field_mode = buf->base.interlaced;
-
- ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface);
-
- return luma->resource.cs_buf;
-}
-
-/**
- * creates an UVD compatible decoder
- */
-struct pipe_video_codec *radeonsi_uvd_create_decoder(struct pipe_context *context,
- const struct pipe_video_codec *templ)
-{
- return ruvd_create_decoder(context, templ, radeonsi_uvd_set_dtb);
-}
--- /dev/null
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse
+ */
+#ifndef SI_H
+#define SI_H
+
+#include "../../winsys/radeon/drm/radeon_winsys.h"
+#include "util/u_double_list.h"
+#include "util/u_transfer.h"
+
+#include "si_resource.h"
+
+struct winsys_handle;
+
+/* R600/R700 STATES */
+struct r600_query {
+ union {
+ uint64_t u64;
+ boolean b;
+ struct pipe_query_data_so_statistics so;
+ } result;
+ /* The kind of query */
+ unsigned type;
+ /* Offset of the first result for current query */
+ unsigned results_start;
+ /* Offset of the next free result after current query data */
+ unsigned results_end;
+ /* Size of the result in memory for both begin_query and end_query,
+ * this can be one or two numbers, or it could even be a size of a structure. */
+ unsigned result_size;
+ /* The buffer where query results are stored. It's used as a ring,
+ * data blocks for current query are stored sequentially from
+ * results_start to results_end, with wrapping on the buffer end */
+ struct r600_resource *buffer;
+ /* The number of dwords for begin_query or end_query. */
+ unsigned num_cs_dw;
+ /* linked list of queries */
+ struct list_head list;
+};
+
+struct r600_context;
+struct r600_screen;
+
+void si_get_backend_mask(struct r600_context *ctx);
+void si_context_flush(struct r600_context *ctx, unsigned flags);
+void si_begin_new_cs(struct r600_context *ctx);
+
+struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type);
+void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query);
+boolean r600_context_query_result(struct r600_context *ctx,
+ struct r600_query *query,
+ boolean wait, void *vresult);
+void r600_query_begin(struct r600_context *ctx, struct r600_query *query);
+void r600_query_end(struct r600_context *ctx, struct r600_query *query);
+void r600_context_queries_suspend(struct r600_context *ctx);
+void r600_context_queries_resume(struct r600_context *ctx);
+void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
+ int flag_wait);
+
+bool si_is_timer_query(unsigned type);
+bool si_query_needs_begin(unsigned type);
+void si_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
+
+int si_context_init(struct r600_context *ctx);
+
+#endif
--- /dev/null
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "util/u_surface.h"
+#include "util/u_blitter.h"
+#include "util/u_format.h"
+#include "si_pipe.h"
+#include "si_state.h"
+
+enum r600_blitter_op /* bitmask */
+{
+ R600_SAVE_TEXTURES = 1,
+ R600_SAVE_FRAMEBUFFER = 2,
+ R600_DISABLE_RENDER_COND = 4,
+
+ R600_CLEAR = 0,
+
+ R600_CLEAR_SURFACE = R600_SAVE_FRAMEBUFFER,
+
+ R600_COPY = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES |
+ R600_DISABLE_RENDER_COND,
+
+ R600_BLIT = R600_SAVE_FRAMEBUFFER | R600_SAVE_TEXTURES |
+ R600_DISABLE_RENDER_COND,
+
+ R600_DECOMPRESS = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND,
+
+ R600_COLOR_RESOLVE = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND
+};
+
+static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+
+ r600_context_queries_suspend(rctx);
+
+ util_blitter_save_blend(rctx->blitter, rctx->queued.named.blend);
+ util_blitter_save_depth_stencil_alpha(rctx->blitter, rctx->queued.named.dsa);
+ util_blitter_save_stencil_ref(rctx->blitter, &rctx->stencil_ref);
+ util_blitter_save_rasterizer(rctx->blitter, rctx->queued.named.rasterizer);
+ util_blitter_save_fragment_shader(rctx->blitter, rctx->ps_shader);
+ util_blitter_save_vertex_shader(rctx->blitter, rctx->vs_shader);
+ util_blitter_save_vertex_elements(rctx->blitter, rctx->vertex_elements);
+ if (rctx->queued.named.viewport) {
+ util_blitter_save_viewport(rctx->blitter, &rctx->queued.named.viewport->viewport);
+ }
+ util_blitter_save_vertex_buffer_slot(rctx->blitter, rctx->vertex_buffer);
+ util_blitter_save_so_targets(rctx->blitter, rctx->b.streamout.num_targets,
+ (struct pipe_stream_output_target**)rctx->b.streamout.targets);
+
+ if (op & R600_SAVE_FRAMEBUFFER)
+ util_blitter_save_framebuffer(rctx->blitter, &rctx->framebuffer);
+
+ if (op & R600_SAVE_TEXTURES) {
+ util_blitter_save_fragment_sampler_states(
+ rctx->blitter, rctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers,
+ (void**)rctx->samplers[PIPE_SHADER_FRAGMENT].samplers);
+
+ util_blitter_save_fragment_sampler_views(rctx->blitter,
+ util_last_bit(rctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask &
+ ((1 << NUM_TEX_UNITS) - 1)),
+ rctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
+ }
+
+ if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) {
+ rctx->saved_render_cond = rctx->current_render_cond;
+ rctx->saved_render_cond_cond = rctx->current_render_cond_cond;
+ rctx->saved_render_cond_mode = rctx->current_render_cond_mode;
+ rctx->b.b.render_condition(&rctx->b.b, NULL, FALSE, 0);
+ }
+
+}
+
+static void r600_blitter_end(struct pipe_context *ctx)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ if (rctx->saved_render_cond) {
+ rctx->b.b.render_condition(&rctx->b.b,
+ rctx->saved_render_cond,
+ rctx->saved_render_cond_cond,
+ rctx->saved_render_cond_mode);
+ rctx->saved_render_cond = NULL;
+ }
+ r600_context_queries_resume(rctx);
+}
+
+static unsigned u_max_sample(struct pipe_resource *r)
+{
+ return r->nr_samples ? r->nr_samples - 1 : 0;
+}
+
+static void r600_blit_decompress_depth(struct pipe_context *ctx,
+ struct r600_texture *texture,
+ struct r600_texture *staging,
+ unsigned first_level, unsigned last_level,
+ unsigned first_layer, unsigned last_layer,
+ unsigned first_sample, unsigned last_sample)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ unsigned layer, level, sample, checked_last_layer, max_layer, max_sample;
+ float depth = 1.0f;
+ const struct util_format_description *desc;
+ void **custom_dsa;
+ struct r600_texture *flushed_depth_texture = staging ?
+ staging : texture->flushed_depth_texture;
+
+ if (!staging && !texture->dirty_level_mask)
+ return;
+
+ max_sample = u_max_sample(&texture->resource.b.b);
+
+ desc = util_format_description(flushed_depth_texture->resource.b.b.format);
+ switch (util_format_has_depth(desc) | util_format_has_stencil(desc) << 1) {
+ default:
+ assert(!"No depth or stencil to uncompress");
+ return;
+ case 3:
+ custom_dsa = rctx->custom_dsa_flush_depth_stencil;
+ break;
+ case 2:
+ custom_dsa = rctx->custom_dsa_flush_stencil;
+ break;
+ case 1:
+ custom_dsa = rctx->custom_dsa_flush_depth;
+ break;
+ }
+
+ for (level = first_level; level <= last_level; level++) {
+ if (!staging && !(texture->dirty_level_mask & (1 << level)))
+ continue;
+
+ /* The smaller the mipmap level, the less layers there are
+ * as far as 3D textures are concerned. */
+ max_layer = util_max_layer(&texture->resource.b.b, level);
+ checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
+
+ for (layer = first_layer; layer <= checked_last_layer; layer++) {
+ for (sample = first_sample; sample <= last_sample; sample++) {
+ struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
+
+ surf_tmpl.format = texture->resource.b.b.format;
+ surf_tmpl.u.tex.level = level;
+ surf_tmpl.u.tex.first_layer = layer;
+ surf_tmpl.u.tex.last_layer = layer;
+
+ zsurf = ctx->create_surface(ctx, &texture->resource.b.b, &surf_tmpl);
+
+ surf_tmpl.format = flushed_depth_texture->resource.b.b.format;
+ cbsurf = ctx->create_surface(ctx,
+ (struct pipe_resource*)flushed_depth_texture, &surf_tmpl);
+
+ r600_blitter_begin(ctx, R600_DECOMPRESS);
+ util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, 1 << sample,
+ custom_dsa[sample], depth);
+ r600_blitter_end(ctx);
+
+ pipe_surface_reference(&zsurf, NULL);
+ pipe_surface_reference(&cbsurf, NULL);
+ }
+ }
+
+ /* The texture will always be dirty if some layers aren't flushed.
+ * I don't think this case can occur though. */
+ if (!staging &&
+ first_layer == 0 && last_layer == max_layer &&
+ first_sample == 0 && last_sample == max_sample) {
+ texture->dirty_level_mask &= ~(1 << level);
+ }
+ }
+}
+
+static void si_blit_decompress_depth_in_place(struct r600_context *rctx,
+ struct r600_texture *texture,
+ unsigned first_level, unsigned last_level,
+ unsigned first_layer, unsigned last_layer)
+{
+ struct pipe_surface *zsurf, surf_tmpl = {{0}};
+ unsigned layer, max_layer, checked_last_layer, level;
+
+ surf_tmpl.format = texture->resource.b.b.format;
+
+ for (level = first_level; level <= last_level; level++) {
+ if (!(texture->dirty_level_mask & (1 << level)))
+ continue;
+
+ surf_tmpl.u.tex.level = level;
+
+ /* The smaller the mipmap level, the less layers there are
+ * as far as 3D textures are concerned. */
+ max_layer = util_max_layer(&texture->resource.b.b, level);
+ checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
+
+ for (layer = first_layer; layer <= checked_last_layer; layer++) {
+ surf_tmpl.u.tex.first_layer = layer;
+ surf_tmpl.u.tex.last_layer = layer;
+
+ zsurf = rctx->b.b.create_surface(&rctx->b.b, &texture->resource.b.b, &surf_tmpl);
+
+ r600_blitter_begin(&rctx->b.b, R600_DECOMPRESS);
+ util_blitter_custom_depth_stencil(rctx->blitter, zsurf, NULL, ~0,
+ rctx->custom_dsa_flush_inplace,
+ 1.0f);
+ r600_blitter_end(&rctx->b.b);
+
+ pipe_surface_reference(&zsurf, NULL);
+ }
+
+ /* The texture will always be dirty if some layers aren't flushed.
+ * I don't think this case occurs often though. */
+ if (first_layer == 0 && last_layer == max_layer) {
+ texture->dirty_level_mask &= ~(1 << level);
+ }
+ }
+}
+
+void si_flush_depth_textures(struct r600_context *rctx,
+ struct r600_textures_info *textures)
+{
+ unsigned i;
+
+ for (i = 0; i < textures->n_views; ++i) {
+ struct pipe_sampler_view *view;
+ struct r600_texture *tex;
+
+ view = textures->views.views[i];
+ if (!view) continue;
+
+ tex = (struct r600_texture *)view->texture;
+ if (!tex->is_depth || tex->is_flushing_texture)
+ continue;
+
+ si_blit_decompress_depth_in_place(rctx, tex,
+ view->u.tex.first_level, view->u.tex.last_level,
+ 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
+ }
+}
+
+static void r600_blit_decompress_color(struct pipe_context *ctx,
+ struct r600_texture *rtex,
+ unsigned first_level, unsigned last_level,
+ unsigned first_layer, unsigned last_layer)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ unsigned layer, level, checked_last_layer, max_layer;
+
+ if (!rtex->dirty_level_mask)
+ return;
+
+ for (level = first_level; level <= last_level; level++) {
+ if (!(rtex->dirty_level_mask & (1 << level)))
+ continue;
+
+ /* The smaller the mipmap level, the less layers there are
+ * as far as 3D textures are concerned. */
+ max_layer = util_max_layer(&rtex->resource.b.b, level);
+ checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
+
+ for (layer = first_layer; layer <= checked_last_layer; layer++) {
+ struct pipe_surface *cbsurf, surf_tmpl;
+
+ surf_tmpl.format = rtex->resource.b.b.format;
+ surf_tmpl.u.tex.level = level;
+ surf_tmpl.u.tex.first_layer = layer;
+ surf_tmpl.u.tex.last_layer = layer;
+ cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
+
+ r600_blitter_begin(ctx, R600_DECOMPRESS);
+ util_blitter_custom_color(rctx->blitter, cbsurf,
+ rctx->custom_blend_decompress);
+ r600_blitter_end(ctx);
+
+ pipe_surface_reference(&cbsurf, NULL);
+ }
+
+ /* The texture will always be dirty if some layers aren't flushed.
+ * I don't think this case occurs often though. */
+ if (first_layer == 0 && last_layer == max_layer) {
+ rtex->dirty_level_mask &= ~(1 << level);
+ }
+ }
+}
+
+void r600_decompress_color_textures(struct r600_context *rctx,
+ struct r600_textures_info *textures)
+{
+ unsigned i;
+ unsigned mask = textures->compressed_colortex_mask;
+
+ while (mask) {
+ struct pipe_sampler_view *view;
+ struct r600_texture *tex;
+
+ i = u_bit_scan(&mask);
+
+ view = textures->views.views[i];
+ assert(view);
+
+ tex = (struct r600_texture *)view->texture;
+ assert(tex->cmask.size || tex->fmask.size);
+
+ r600_blit_decompress_color(&rctx->b.b, tex,
+ view->u.tex.first_level, view->u.tex.last_level,
+ 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
+ }
+}
+
+static void r600_clear(struct pipe_context *ctx, unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct pipe_framebuffer_state *fb = &rctx->framebuffer;
+
+ r600_blitter_begin(ctx, R600_CLEAR);
+ util_blitter_clear(rctx->blitter, fb->width, fb->height,
+ util_framebuffer_get_num_layers(fb),
+ buffers, color, depth, stencil);
+ r600_blitter_end(ctx);
+}
+
+static void r600_clear_render_target(struct pipe_context *ctx,
+ struct pipe_surface *dst,
+ const union pipe_color_union *color,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+
+ r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
+ util_blitter_clear_render_target(rctx->blitter, dst, color,
+ dstx, dsty, width, height);
+ r600_blitter_end(ctx);
+}
+
+static void r600_clear_depth_stencil(struct pipe_context *ctx,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+
+ r600_blitter_begin(ctx, R600_CLEAR_SURFACE);
+ util_blitter_clear_depth_stencil(rctx->blitter, dst, clear_flags, depth, stencil,
+ dstx, dsty, width, height);
+ r600_blitter_end(ctx);
+}
+
+/* Helper for decompressing a portion of a color or depth resource before
+ * blitting if any decompression is needed.
+ * The driver doesn't decompress resources automatically while u_blitter is
+ * rendering. */
+static void r600_decompress_subresource(struct pipe_context *ctx,
+ struct pipe_resource *tex,
+ unsigned level,
+ unsigned first_layer, unsigned last_layer)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct r600_texture *rtex = (struct r600_texture*)tex;
+
+ if (rtex->is_depth && !rtex->is_flushing_texture) {
+ si_blit_decompress_depth_in_place(rctx, rtex,
+ level, level,
+ first_layer, last_layer);
+ } else if (rtex->fmask.size || rtex->cmask.size) {
+ r600_blit_decompress_color(ctx, rtex, level, level,
+ first_layer, last_layer);
+ }
+}
+
+struct texture_orig_info {
+ unsigned format;
+ unsigned width0;
+ unsigned height0;
+ unsigned npix_x;
+ unsigned npix_y;
+ unsigned npix0_x;
+ unsigned npix0_y;
+};
+
+static void r600_compressed_to_blittable(struct pipe_resource *tex,
+ unsigned level,
+ struct texture_orig_info *orig)
+{
+ struct r600_texture *rtex = (struct r600_texture*)tex;
+ unsigned pixsize = util_format_get_blocksize(rtex->resource.b.b.format);
+ int new_format;
+ int new_height, new_width;
+
+ orig->format = tex->format;
+ orig->width0 = tex->width0;
+ orig->height0 = tex->height0;
+ orig->npix0_x = rtex->surface.level[0].npix_x;
+ orig->npix0_y = rtex->surface.level[0].npix_y;
+ orig->npix_x = rtex->surface.level[level].npix_x;
+ orig->npix_y = rtex->surface.level[level].npix_y;
+
+ if (pixsize == 8)
+ new_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
+ else
+ new_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
+
+ new_width = util_format_get_nblocksx(tex->format, orig->width0);
+ new_height = util_format_get_nblocksy(tex->format, orig->height0);
+
+ tex->width0 = new_width;
+ tex->height0 = new_height;
+ tex->format = new_format;
+ rtex->surface.level[0].npix_x = util_format_get_nblocksx(orig->format, orig->npix0_x);
+ rtex->surface.level[0].npix_y = util_format_get_nblocksy(orig->format, orig->npix0_y);
+ rtex->surface.level[level].npix_x = util_format_get_nblocksx(orig->format, orig->npix_x);
+ rtex->surface.level[level].npix_y = util_format_get_nblocksy(orig->format, orig->npix_y);
+
+ /* By dividing the dimensions by 4, we effectively decrement
+ * last_level by 2, therefore the last 2 mipmap levels disappear and
+ * aren't blittable. Note that the last 3 mipmap levels (4x4, 2x2,
+ * 1x1) have equal slice sizes, which is an important assumption
+ * for this to work.
+ *
+ * In order to make the last 2 mipmap levels blittable, we have to
+ * add the slice size of the last mipmap level to the texture
+ * address, so that even though the hw thinks it reads last_level-2,
+ * it will actually read last_level-1, and if we add the slice size*2,
+ * it will read last_level. That's how this workaround works.
+ */
+ if (level > rtex->resource.b.b.last_level-2)
+ rtex->mipmap_shift = level - (rtex->resource.b.b.last_level-2);
+}
+
+static void r600_change_format(struct pipe_resource *tex,
+ unsigned level,
+ struct texture_orig_info *orig,
+ enum pipe_format format)
+{
+ struct r600_texture *rtex = (struct r600_texture*)tex;
+
+ orig->format = tex->format;
+ orig->width0 = tex->width0;
+ orig->height0 = tex->height0;
+ orig->npix0_x = rtex->surface.level[0].npix_x;
+ orig->npix0_y = rtex->surface.level[0].npix_y;
+ orig->npix_x = rtex->surface.level[level].npix_x;
+ orig->npix_y = rtex->surface.level[level].npix_y;
+
+ tex->format = format;
+}
+
+static void r600_reset_blittable_to_orig(struct pipe_resource *tex,
+ unsigned level,
+ struct texture_orig_info *orig)
+{
+ struct r600_texture *rtex = (struct r600_texture*)tex;
+
+ tex->format = orig->format;
+ tex->width0 = orig->width0;
+ tex->height0 = orig->height0;
+ rtex->surface.level[0].npix_x = orig->npix0_x;
+ rtex->surface.level[0].npix_y = orig->npix0_y;
+ rtex->surface.level[level].npix_x = orig->npix_x;
+ rtex->surface.level[level].npix_y = orig->npix_y;
+ rtex->mipmap_shift = 0;
+}
+
+static void r600_resource_copy_region(struct pipe_context *ctx,
+ struct pipe_resource *dst,
+ unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct texture_orig_info orig_info[2];
+ struct pipe_box sbox;
+ const struct pipe_box *psbox = src_box;
+ boolean restore_orig[2];
+
+ /* Fallback for buffers. */
+ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+ si_copy_buffer(rctx, dst, src, dstx, src_box->x, src_box->width);
+ return;
+ }
+
+ memset(orig_info, 0, sizeof(orig_info));
+
+ /* The driver doesn't decompress resources automatically while
+ * u_blitter is rendering. */
+ r600_decompress_subresource(ctx, src, src_level,
+ src_box->z, src_box->z + src_box->depth - 1);
+
+ restore_orig[0] = restore_orig[1] = FALSE;
+
+ if (util_format_is_compressed(src->format) &&
+ util_format_is_compressed(dst->format)) {
+ r600_compressed_to_blittable(src, src_level, &orig_info[0]);
+ restore_orig[0] = TRUE;
+ sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x);
+ sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y);
+ sbox.z = src_box->z;
+ sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width);
+ sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height);
+ sbox.depth = src_box->depth;
+ psbox=&sbox;
+
+ r600_compressed_to_blittable(dst, dst_level, &orig_info[1]);
+ restore_orig[1] = TRUE;
+ /* translate the dst box as well */
+ dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
+ dsty = util_format_get_nblocksy(orig_info[1].format, dsty);
+ } else if (!util_blitter_is_copy_supported(rctx->blitter, dst, src)) {
+ unsigned blocksize = util_format_get_blocksize(src->format);
+
+ switch (blocksize) {
+ case 1:
+ r600_change_format(src, src_level, &orig_info[0],
+ PIPE_FORMAT_R8_UNORM);
+ r600_change_format(dst, dst_level, &orig_info[1],
+ PIPE_FORMAT_R8_UNORM);
+ break;
+ case 2:
+ r600_change_format(src, src_level, &orig_info[0],
+ PIPE_FORMAT_R8G8_UNORM);
+ r600_change_format(dst, dst_level, &orig_info[1],
+ PIPE_FORMAT_R8G8_UNORM);
+ break;
+ case 4:
+ r600_change_format(src, src_level, &orig_info[0],
+ PIPE_FORMAT_R8G8B8A8_UNORM);
+ r600_change_format(dst, dst_level, &orig_info[1],
+ PIPE_FORMAT_R8G8B8A8_UNORM);
+ break;
+ case 8:
+ r600_change_format(src, src_level, &orig_info[0],
+ PIPE_FORMAT_R16G16B16A16_UINT);
+ r600_change_format(dst, dst_level, &orig_info[1],
+ PIPE_FORMAT_R16G16B16A16_UINT);
+ break;
+ case 16:
+ r600_change_format(src, src_level, &orig_info[0],
+ PIPE_FORMAT_R32G32B32A32_UINT);
+ r600_change_format(dst, dst_level, &orig_info[1],
+ PIPE_FORMAT_R32G32B32A32_UINT);
+ break;
+ default:
+ fprintf(stderr, "Unhandled format %s with blocksize %u\n",
+ util_format_short_name(src->format), blocksize);
+ assert(0);
+ }
+ restore_orig[0] = TRUE;
+ restore_orig[1] = TRUE;
+ }
+
+ r600_blitter_begin(ctx, R600_COPY);
+ util_blitter_copy_texture(rctx->blitter, dst, dst_level, dstx, dsty, dstz,
+ src, src_level, psbox);
+ r600_blitter_end(ctx);
+
+ if (restore_orig[0])
+ r600_reset_blittable_to_orig(src, src_level, &orig_info[0]);
+
+ if (restore_orig[1])
+ r600_reset_blittable_to_orig(dst, dst_level, &orig_info[1]);
+}
+
+/* For MSAA integer resolving to work, we change the format to NORM using this function. */
+static enum pipe_format int_to_norm_format(enum pipe_format format)
+{
+ switch (format) {
+#define REPLACE_FORMAT_SIGN(format,sign) \
+ case PIPE_FORMAT_##format##_##sign##INT: \
+ return PIPE_FORMAT_##format##_##sign##NORM
+#define REPLACE_FORMAT(format) \
+ REPLACE_FORMAT_SIGN(format, U); \
+ REPLACE_FORMAT_SIGN(format, S)
+
+ REPLACE_FORMAT_SIGN(B10G10R10A2, U);
+ REPLACE_FORMAT(R8);
+ REPLACE_FORMAT(R8G8);
+ REPLACE_FORMAT(R8G8B8X8);
+ REPLACE_FORMAT(R8G8B8A8);
+ REPLACE_FORMAT(A8);
+ REPLACE_FORMAT(I8);
+ REPLACE_FORMAT(L8);
+ REPLACE_FORMAT(L8A8);
+ REPLACE_FORMAT(R16);
+ REPLACE_FORMAT(R16G16);
+ REPLACE_FORMAT(R16G16B16X16);
+ REPLACE_FORMAT(R16G16B16A16);
+ REPLACE_FORMAT(A16);
+ REPLACE_FORMAT(I16);
+ REPLACE_FORMAT(L16);
+ REPLACE_FORMAT(L16A16);
+
+#undef REPLACE_FORMAT
+#undef REPLACE_FORMAT_SIGN
+ default:
+ return format;
+ }
+}
+
+static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
+ const struct pipe_blit_info *info)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx;
+ struct r600_texture *dst = (struct r600_texture*)info->dst.resource;
+ unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
+ unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
+ enum pipe_format format = int_to_norm_format(info->dst.format);
+ unsigned sample_mask = ~0;
+
+ if (info->src.resource->nr_samples > 1 &&
+ info->dst.resource->nr_samples <= 1 &&
+ util_max_layer(info->src.resource, 0) == 0 &&
+ util_max_layer(info->dst.resource, info->dst.level) == 0 &&
+ info->dst.format == info->src.format &&
+ !util_format_is_pure_integer(format) &&
+ !util_format_is_depth_or_stencil(format) &&
+ !info->scissor_enable &&
+ (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA &&
+ dst_width == info->src.resource->width0 &&
+ dst_height == info->src.resource->height0 &&
+ info->dst.box.x == 0 &&
+ info->dst.box.y == 0 &&
+ info->dst.box.width == dst_width &&
+ info->dst.box.height == dst_height &&
+ info->dst.box.depth == 1 &&
+ info->src.box.x == 0 &&
+ info->src.box.y == 0 &&
+ info->src.box.width == dst_width &&
+ info->src.box.height == dst_height &&
+ info->src.box.depth == 1 &&
+ dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
+ !(dst->surface.flags & RADEON_SURF_SCANOUT)) {
+ r600_blitter_begin(ctx, R600_COLOR_RESOLVE);
+ util_blitter_custom_resolve_color(rctx->blitter,
+ info->dst.resource, info->dst.level,
+ info->dst.box.z,
+ info->src.resource, info->src.box.z,
+ sample_mask, rctx->custom_blend_resolve,
+ format);
+ r600_blitter_end(ctx);
+ return true;
+ }
+ return false;
+}
+
+static void si_blit(struct pipe_context *ctx,
+ const struct pipe_blit_info *info)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx;
+
+ if (do_hardware_msaa_resolve(ctx, info)) {
+ return;
+ }
+
+ assert(util_blitter_is_blit_supported(rctx->blitter, info));
+
+ /* The driver doesn't decompress resources automatically while
+ * u_blitter is rendering. */
+ r600_decompress_subresource(ctx, info->src.resource, info->src.level,
+ info->src.box.z,
+ info->src.box.z + info->src.box.depth - 1);
+
+ r600_blitter_begin(ctx, R600_BLIT);
+ util_blitter_blit(rctx->blitter, info);
+ r600_blitter_end(ctx);
+}
+
+static void si_flush_resource(struct pipe_context *ctx,
+ struct pipe_resource *resource)
+{
+}
+
+void si_init_blit_functions(struct r600_context *rctx)
+{
+ rctx->b.b.clear = r600_clear;
+ rctx->b.b.clear_render_target = r600_clear_render_target;
+ rctx->b.b.clear_depth_stencil = r600_clear_depth_stencil;
+ rctx->b.b.resource_copy_region = r600_resource_copy_region;
+ rctx->b.b.blit = si_blit;
+ rctx->b.b.flush_resource = si_flush_resource;
+ rctx->b.blit_decompress_depth = r600_blit_decompress_depth;
+}
--- /dev/null
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse
+ * Corbin Simpson <MostAwesomeDude@gmail.com>
+ */
+
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+
+#include "si.h"
+#include "si_pipe.h"
+
+void r600_upload_index_buffer(struct r600_context *rctx,
+ struct pipe_index_buffer *ib, unsigned count)
+{
+ u_upload_data(rctx->b.uploader, 0, count * ib->index_size,
+ ib->user_buffer, &ib->offset, &ib->buffer);
+}
+
+void r600_upload_const_buffer(struct r600_context *rctx, struct r600_resource **rbuffer,
+ const uint8_t *ptr, unsigned size,
+ uint32_t *const_offset)
+{
+ if (R600_BIG_ENDIAN) {
+ uint32_t *tmpPtr;
+ unsigned i;
+
+ if (!(tmpPtr = malloc(size))) {
+ R600_ERR("Failed to allocate BE swap buffer.\n");
+ return;
+ }
+
+ for (i = 0; i < size / 4; ++i) {
+ tmpPtr[i] = util_bswap32(((uint32_t *)ptr)[i]);
+ }
+
+ u_upload_data(rctx->b.uploader, 0, size, tmpPtr, const_offset,
+ (struct pipe_resource**)rbuffer);
+
+ free(tmpPtr);
+ } else {
+ u_upload_data(rctx->b.uploader, 0, size, ptr, const_offset,
+ (struct pipe_resource**)rbuffer);
+ }
+}
* Christian König <christian.koenig@amd.com>
*/
-#include "radeonsi_pipe.h"
-#include "radeonsi_pm4.h"
#include "sid.h"
+#include "si_pipe.h"
+#include "si_pm4.h"
void si_cmd_context_control(struct si_pm4_state *pm4)
{
--- /dev/null
+#include "util/u_memory.h"
+
+#include "../radeon/r600_cs.h"
+#include "si_pipe.h"
+#include "si_shader.h"
+
+#include "radeon_llvm_util.h"
+
+#define MAX_GLOBAL_BUFFERS 20
+
+struct si_pipe_compute {
+ struct r600_context *ctx;
+
+ unsigned local_size;
+ unsigned private_size;
+ unsigned input_size;
+ unsigned num_kernels;
+ struct si_pipe_shader *kernels;
+ unsigned num_user_sgprs;
+
+ struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
+
+ LLVMContextRef llvm_ctx;
+};
+
+static void *radeonsi_create_compute_state(
+ struct pipe_context *ctx,
+ const struct pipe_compute_state *cso)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct si_pipe_compute *program =
+ CALLOC_STRUCT(si_pipe_compute);
+ const struct pipe_llvm_program_header *header;
+ const unsigned char *code;
+ unsigned i;
+
+ program->llvm_ctx = LLVMContextCreate();
+
+ header = cso->prog;
+ code = cso->prog + sizeof(struct pipe_llvm_program_header);
+
+ program->ctx = rctx;
+ program->local_size = cso->req_local_mem;
+ program->private_size = cso->req_private_mem;
+ program->input_size = cso->req_input_mem;
+
+ program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, code,
+ header->num_bytes);
+ program->kernels = CALLOC(sizeof(struct si_pipe_shader),
+ program->num_kernels);
+ for (i = 0; i < program->num_kernels; i++) {
+ LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
+ code, header->num_bytes);
+ si_compile_llvm(rctx, &program->kernels[i], mod);
+ LLVMDisposeModule(mod);
+ }
+
+ return program;
+}
+
+static void radeonsi_bind_compute_state(struct pipe_context *ctx, void *state)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx;
+ rctx->cs_shader_state.program = (struct si_pipe_compute*)state;
+}
+
+static void radeonsi_set_global_binding(
+ struct pipe_context *ctx, unsigned first, unsigned n,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ unsigned i;
+ struct r600_context *rctx = (struct r600_context*)ctx;
+ struct si_pipe_compute *program = rctx->cs_shader_state.program;
+
+ if (!resources) {
+ for (i = first; i < first + n; i++) {
+ program->global_buffers[i] = NULL;
+ }
+ return;
+ }
+
+ for (i = first; i < first + n; i++) {
+ uint64_t va;
+ program->global_buffers[i] = resources[i];
+ va = r600_resource_va(ctx->screen, resources[i]);
+ memcpy(handles[i], &va, sizeof(va));
+ }
+}
+
+static void radeonsi_launch_grid(
+ struct pipe_context *ctx,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t pc, const void *input)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx;
+ struct si_pipe_compute *program = rctx->cs_shader_state.program;
+ struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+ struct r600_resource *kernel_args_buffer = NULL;
+ unsigned kernel_args_size;
+ unsigned num_work_size_bytes = 36;
+ uint32_t kernel_args_offset = 0;
+ uint32_t *kernel_args;
+ uint64_t kernel_args_va;
+ uint64_t shader_va;
+ unsigned arg_user_sgpr_count = 2;
+ unsigned i;
+ struct si_pipe_shader *shader = &program->kernels[pc];
+ unsigned lds_blocks;
+
+ pm4->compute_pkt = true;
+ si_cmd_context_control(pm4);
+
+ si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
+ si_pm4_cmd_add(pm4, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) |
+ EVENT_INDEX(0x7) |
+ EVENT_WRITE_INV_L2);
+ si_pm4_cmd_end(pm4, false);
+
+ si_pm4_inval_texture_cache(pm4);
+ si_pm4_inval_shader_cache(pm4);
+ si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
+
+ /* Upload the kernel arguments */
+
+ /* The extra num_work_size_bytes are for work group / work item size information */
+ kernel_args_size = program->input_size + num_work_size_bytes;
+ kernel_args = MALLOC(kernel_args_size);
+ for (i = 0; i < 3; i++) {
+ kernel_args[i] = grid_layout[i];
+ kernel_args[i + 3] = grid_layout[i] * block_layout[i];
+ kernel_args[i + 6] = block_layout[i];
+ }
+
+ memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size);
+
+ r600_upload_const_buffer(rctx, &kernel_args_buffer, (uint8_t*)kernel_args,
+ kernel_args_size, &kernel_args_offset);
+ kernel_args_va = r600_resource_va(ctx->screen,
+ (struct pipe_resource*)kernel_args_buffer);
+ kernel_args_va += kernel_args_offset;
+
+ si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ);
+
+ si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
+ si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
+
+ si_pm4_set_reg(pm4, R_00B810_COMPUTE_START_X, 0);
+ si_pm4_set_reg(pm4, R_00B814_COMPUTE_START_Y, 0);
+ si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0);
+
+ si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X,
+ S_00B81C_NUM_THREAD_FULL(block_layout[0]));
+ si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y,
+ S_00B820_NUM_THREAD_FULL(block_layout[1]));
+ si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z,
+ S_00B824_NUM_THREAD_FULL(block_layout[2]));
+
+ /* Global buffers */
+ for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) {
+ struct r600_resource *buffer =
+ (struct r600_resource*)program->global_buffers[i];
+ if (!buffer) {
+ continue;
+ }
+ si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE);
+ }
+
+ /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
+ * and is now per pipe, so it should be handled in the
+ * kernel if we want to use something other than the default value,
+ * which is now 0x22f.
+ */
+ if (rctx->b.chip_class <= SI) {
+ /* XXX: This should be:
+ * (number of compute units) * 4 * (waves per simd) - 1 */
+
+ si_pm4_set_reg(pm4, R_00B82C_COMPUTE_MAX_WAVE_ID,
+ 0x190 /* Default value */);
+ }
+
+ shader_va = r600_resource_va(ctx->screen, (void *)shader->bo);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
+ si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
+ si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
+
+ si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1,
+ /* We always use at least 3 VGPRS, these come from
+ * TIDIG_COMP_CNT.
+ * XXX: The compiler should account for this.
+ */
+ S_00B848_VGPRS((MAX2(3, shader->num_vgprs) - 1) / 4)
+ /* We always use at least 4 + arg_user_sgpr_count. The 4 extra
+ * sgprs are from TGID_X_EN, TGID_Y_EN, TGID_Z_EN, TG_SIZE_EN
+ * XXX: The compiler should account for this.
+ */
+ | S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count,
+ shader->num_sgprs)) - 1) / 8))
+ ;
+
+ lds_blocks = shader->lds_size;
+ /* XXX: We are over allocating LDS. For SI, the shader reports LDS in
+ * blocks of 256 bytes, so if there are 4 bytes lds allocated in
+ * the shader and 4 bytes allocated by the state tracker, then
+ * we will set LDS_SIZE to 512 bytes rather than 256.
+ */
+ if (rctx->b.chip_class <= SI) {
+ lds_blocks += align(program->local_size, 256) >> 8;
+ } else {
+ lds_blocks += align(program->local_size, 512) >> 9;
+ }
+
+ assert(lds_blocks <= 0xFF);
+
+ si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2,
+ S_00B84C_SCRATCH_EN(0)
+ | S_00B84C_USER_SGPR(arg_user_sgpr_count)
+ | S_00B84C_TGID_X_EN(1)
+ | S_00B84C_TGID_Y_EN(1)
+ | S_00B84C_TGID_Z_EN(1)
+ | S_00B84C_TG_SIZE_EN(1)
+ | S_00B84C_TIDIG_COMP_CNT(2)
+ | S_00B84C_LDS_SIZE(lds_blocks)
+ | S_00B84C_EXCP_EN(0))
+ ;
+ si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);
+
+ si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0,
+ S_00B858_SH0_CU_EN(0xffff /* Default value */)
+ | S_00B858_SH1_CU_EN(0xffff /* Default value */))
+ ;
+
+ si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1,
+ S_00B85C_SH0_CU_EN(0xffff /* Default value */)
+ | S_00B85C_SH1_CU_EN(0xffff /* Default value */))
+ ;
+
+ si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT);
+ si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */
+ si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */
+ si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */
+ si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */
+ si_pm4_cmd_end(pm4, false);
+
+ si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
+ si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(0x4)));
+ si_pm4_cmd_end(pm4, false);
+
+ si_pm4_inval_texture_cache(pm4);
+ si_pm4_inval_shader_cache(pm4);
+ si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
+
+ si_pm4_emit(rctx, pm4);
+
+#if 0
+ fprintf(stderr, "cdw: %i\n", rctx->cs->cdw);
+ for (i = 0; i < rctx->cs->cdw; i++) {
+ fprintf(stderr, "%4i : 0x%08X\n", i, rctx->cs->buf[i]);
+ }
+#endif
+
+ FREE(pm4);
+ FREE(kernel_args);
+}
+
+
+static void si_delete_compute_state(struct pipe_context *ctx, void* state){
+ struct si_pipe_compute *program = (struct si_pipe_compute *)state;
+
+ if (!state) {
+ return;
+ }
+
+ if (program->kernels) {
+ FREE(program->kernels);
+ }
+
+ if (program->llvm_ctx){
+ LLVMContextDispose(program->llvm_ctx);
+ }
+
+ //And then free the program itself.
+ FREE(program);
+}
+
+static void si_set_compute_resources(struct pipe_context * ctx_,
+ unsigned start, unsigned count,
+ struct pipe_surface ** surfaces) { }
+
+void si_init_compute_functions(struct r600_context *rctx)
+{
+ rctx->b.b.create_compute_state = radeonsi_create_compute_state;
+ rctx->b.b.delete_compute_state = si_delete_compute_state;
+ rctx->b.b.bind_compute_state = radeonsi_bind_compute_state;
+/* ctx->context.create_sampler_view = evergreen_compute_create_sampler_view; */
+ rctx->b.b.set_compute_resources = si_set_compute_resources;
+ rctx->b.b.set_global_binding = radeonsi_set_global_binding;
+ rctx->b.b.launch_grid = radeonsi_launch_grid;
+}
* Marek Olšák <marek.olsak@amd.com>
*/
#include "../radeon/r600_cs.h"
-#include "radeonsi_pipe.h"
-#include "radeonsi_resource.h"
-#include "radeonsi_shader.h"
+#include "si_pipe.h"
+#include "si_resource.h"
+#include "si_shader.h"
#include "util/u_memory.h"
--- /dev/null
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse
+ */
+#include "../radeon/r600_cs.h"
+#include "sid.h"
+#include "si_pm4.h"
+#include "si_pipe.h"
+#include "util/u_memory.h"
+#include <errno.h>
+
+#define GROUP_FORCE_NEW_BLOCK 0
+
+/* Get backends mask */
+void si_get_backend_mask(struct r600_context *ctx)
+{
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+ struct r600_resource *buffer;
+ uint32_t *results;
+ unsigned num_backends = ctx->screen->b.info.r600_num_backends;
+ unsigned i, mask = 0;
+
+ /* if backend_map query is supported by the kernel */
+ if (ctx->screen->b.info.r600_backend_map_valid) {
+ unsigned num_tile_pipes = ctx->screen->b.info.r600_num_tile_pipes;
+ unsigned backend_map = ctx->screen->b.info.r600_backend_map;
+ unsigned item_width = 4, item_mask = 0x7;
+
+ while(num_tile_pipes--) {
+ i = backend_map & item_mask;
+ mask |= (1<<i);
+ backend_map >>= item_width;
+ }
+ if (mask != 0) {
+ ctx->backend_mask = mask;
+ return;
+ }
+ }
+
+ /* otherwise backup path for older kernels */
+
+ /* create buffer for event data */
+ buffer = r600_resource_create_custom(&ctx->screen->b.b,
+ PIPE_USAGE_STAGING,
+ ctx->max_db*16);
+ if (!buffer)
+ goto err;
+
+ /* initialize buffer with zeroes */
+ results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+ if (results) {
+ uint64_t va = 0;
+
+ memset(results, 0, ctx->max_db * 4 * 4);
+ ctx->b.ws->buffer_unmap(buffer->cs_buf);
+
+ /* emit EVENT_WRITE for ZPASS_DONE */
+ va = r600_resource_va(&ctx->screen->b.b, (void *)buffer);
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = va >> 32;
+
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, buffer, RADEON_USAGE_WRITE);
+
+ /* analyze results */
+ results = ctx->b.ws->buffer_map(buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_READ);
+ if (results) {
+ for(i = 0; i < ctx->max_db; i++) {
+ /* at least highest bit will be set if backend is used */
+ if (results[i*4 + 1])
+ mask |= (1<<i);
+ }
+ ctx->b.ws->buffer_unmap(buffer->cs_buf);
+ }
+ }
+
+ r600_resource_reference(&buffer, NULL);
+
+ if (mask != 0) {
+ ctx->backend_mask = mask;
+ return;
+ }
+
+err:
+ /* fallback to old method - set num_backends lower bits to 1 */
+ ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends);
+ return;
+}
+
+bool si_is_timer_query(unsigned type)
+{
+ return type == PIPE_QUERY_TIME_ELAPSED ||
+ type == PIPE_QUERY_TIMESTAMP ||
+ type == PIPE_QUERY_TIMESTAMP_DISJOINT;
+}
+
+bool si_query_needs_begin(unsigned type)
+{
+ return type != PIPE_QUERY_TIMESTAMP;
+}
+
+/* initialize */
+void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
+ boolean count_draw_in)
+{
+ int i;
+
+ /* The number of dwords we already used in the CS so far. */
+ num_dw += ctx->b.rings.gfx.cs->cdw;
+
+ for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
+ if (ctx->atoms.array[i]->dirty) {
+ num_dw += ctx->atoms.array[i]->num_dw;
+ }
+ }
+
+ if (count_draw_in) {
+ /* The number of dwords all the dirty states would take. */
+ num_dw += ctx->pm4_dirty_cdwords;
+
+ /* The upper-bound of how much a draw command would take. */
+ num_dw += SI_MAX_DRAW_CS_DWORDS;
+ }
+
+ /* Count in queries_suspend. */
+ num_dw += ctx->num_cs_dw_nontimer_queries_suspend;
+
+ /* Count in streamout_end at the end of CS. */
+ if (ctx->b.streamout.begin_emitted) {
+ num_dw += ctx->b.streamout.num_dw_for_end;
+ }
+
+ /* Count in render_condition(NULL) at the end of CS. */
+ if (ctx->predicate_drawing) {
+ num_dw += 3;
+ }
+
+ /* Count in framebuffer cache flushes at the end of CS. */
+ num_dw += ctx->atoms.cache_flush->num_dw;
+
+#if R600_TRACE_CS
+ if (ctx->screen->trace_bo) {
+ num_dw += R600_TRACE_CS_DWORDS;
+ }
+#endif
+
+ /* Flush if there's not enough space. */
+ if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+ radeonsi_flush(&ctx->b.b, NULL, RADEON_FLUSH_ASYNC);
+ }
+}
+
+void si_context_flush(struct r600_context *ctx, unsigned flags)
+{
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+
+ if (!cs->cdw)
+ return;
+
+ /* suspend queries */
+ ctx->nontimer_queries_suspended = false;
+ if (ctx->num_cs_dw_nontimer_queries_suspend) {
+ r600_context_queries_suspend(ctx);
+ ctx->nontimer_queries_suspended = true;
+ }
+
+ ctx->b.streamout.suspended = false;
+
+ if (ctx->b.streamout.begin_emitted) {
+ r600_emit_streamout_end(&ctx->b);
+ ctx->b.streamout.suspended = true;
+ }
+
+ ctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
+ R600_CONTEXT_FLUSH_AND_INV_CB_META |
+ R600_CONTEXT_FLUSH_AND_INV_DB |
+ R600_CONTEXT_FLUSH_AND_INV_DB_META |
+ R600_CONTEXT_INV_TEX_CACHE;
+ si_emit_cache_flush(&ctx->b, NULL);
+
+ /* this is probably not needed anymore */
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
+
+ /* force to keep tiling flags */
+ flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
+
+#if R600_TRACE_CS
+ if (ctx->screen->trace_bo) {
+ struct r600_screen *rscreen = ctx->screen;
+ unsigned i;
+
+ for (i = 0; i < cs->cdw; i++) {
+ fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
+ }
+ rscreen->cs_count++;
+ }
+#endif
+
+ /* Flush the CS. */
+ ctx->b.ws->cs_flush(ctx->b.rings.gfx.cs, flags, 0);
+
+#if R600_TRACE_CS
+ if (ctx->screen->trace_bo) {
+ struct r600_screen *rscreen = ctx->screen;
+ unsigned i;
+
+ for (i = 0; i < 10; i++) {
+ usleep(5);
+ if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
+ break;
+ }
+ }
+ if (i == 10) {
+ fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
+ rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
+ } else {
+ fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
+ }
+ }
+#endif
+
+ si_begin_new_cs(ctx);
+}
+
+void si_begin_new_cs(struct r600_context *ctx)
+{
+ ctx->pm4_dirty_cdwords = 0;
+
+ /* Flush read caches at the beginning of CS. */
+ ctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+ R600_CONTEXT_INV_CONST_CACHE |
+ R600_CONTEXT_INV_SHADER_CACHE;
+
+ /* set all valid group as dirty so they get reemited on
+ * next draw command
+ */
+ si_pm4_reset_emitted(ctx);
+
+ /* The CS initialization should be emitted before everything else. */
+ si_pm4_emit(ctx, ctx->queued.named.init);
+ ctx->emitted.named.init = ctx->queued.named.init;
+
+ if (ctx->b.streamout.suspended) {
+ ctx->b.streamout.append_bitmask = ctx->b.streamout.enabled_mask;
+ r600_streamout_buffers_dirty(&ctx->b);
+ }
+
+ /* resume queries */
+ if (ctx->nontimer_queries_suspended) {
+ r600_context_queries_resume(ctx);
+ }
+
+ si_all_descriptors_begin_new_cs(ctx);
+}
+
+static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
+ bool test_status_bit)
+{
+ uint32_t *current_result = (uint32_t*)map;
+ uint64_t start, end;
+
+ start = (uint64_t)current_result[start_index] |
+ (uint64_t)current_result[start_index+1] << 32;
+ end = (uint64_t)current_result[end_index] |
+ (uint64_t)current_result[end_index+1] << 32;
+
+ if (!test_status_bit ||
+ ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
+ return end - start;
+ }
+ return 0;
+}
+
+static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
+{
+ unsigned results_base = query->results_start;
+ char *map;
+
+ map = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs,
+ PIPE_TRANSFER_READ |
+ (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
+ if (!map)
+ return FALSE;
+
+ /* count all results across all data blocks */
+ switch (query->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ while (results_base != query->results_end) {
+ query->result.u64 +=
+ r600_query_read_result(map + results_base, 0, 2, true);
+ results_base = (results_base + 16) % query->buffer->b.b.width0;
+ }
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ while (results_base != query->results_end) {
+ query->result.b = query->result.b ||
+ r600_query_read_result(map + results_base, 0, 2, true) != 0;
+ results_base = (results_base + 16) % query->buffer->b.b.width0;
+ }
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ {
+ uint32_t *current_result = (uint32_t*)map;
+ query->result.u64 = (uint64_t)current_result[0] | (uint64_t)current_result[1] << 32;
+ break;
+ }
+ case PIPE_QUERY_TIME_ELAPSED:
+ while (results_base != query->results_end) {
+ query->result.u64 +=
+ r600_query_read_result(map + results_base, 0, 2, false);
+ results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ /* SAMPLE_STREAMOUTSTATS stores this structure:
+ * {
+ * u64 NumPrimitivesWritten;
+ * u64 PrimitiveStorageNeeded;
+ * }
+ * We only need NumPrimitivesWritten here. */
+ while (results_base != query->results_end) {
+ query->result.u64 +=
+ r600_query_read_result(map + results_base, 2, 6, true);
+ results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ /* Here we read PrimitiveStorageNeeded. */
+ while (results_base != query->results_end) {
+ query->result.u64 +=
+ r600_query_read_result(map + results_base, 0, 4, true);
+ results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
+ }
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ while (results_base != query->results_end) {
+ query->result.so.num_primitives_written +=
+ r600_query_read_result(map + results_base, 2, 6, true);
+ query->result.so.primitives_storage_needed +=
+ r600_query_read_result(map + results_base, 0, 4, true);
+ results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
+ }
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ while (results_base != query->results_end) {
+ query->result.b = query->result.b ||
+ r600_query_read_result(map + results_base, 2, 6, true) !=
+ r600_query_read_result(map + results_base, 0, 4, true);
+ results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ query->results_start = query->results_end;
+ ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
+ return TRUE;
+}
+
+void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
+{
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+ unsigned new_results_end, i;
+ uint32_t *results;
+ uint64_t va;
+
+ si_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
+
+ new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
+
+ /* collect current results if query buffer is full */
+ if (new_results_end == query->results_start) {
+ r600_query_result(ctx, query, TRUE);
+ }
+
+ switch (query->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+ if (results) {
+ results = (uint32_t*)((char*)results + query->results_end);
+ memset(results, 0, query->result_size);
+
+ /* Set top bits for unused backends */
+ for (i = 0; i < ctx->max_db; i++) {
+ if (!(ctx->backend_mask & (1<<i))) {
+ results[(i * 4)+1] = 0x80000000;
+ results[(i * 4)+3] = 0x80000000;
+ }
+ }
+ ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
+ }
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ results = ctx->b.ws->buffer_map(query->buffer->cs_buf, ctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+ results = (uint32_t*)((char*)results + query->results_end);
+ memset(results, 0, query->result_size);
+ ctx->b.ws->buffer_unmap(query->buffer->cs_buf);
+ break;
+ default:
+ assert(0);
+ }
+
+ /* emit begin query */
+ va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
+ va += query->results_end;
+
+ switch (query->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
+ cs->buf[cs->cdw++] = 0;
+ cs->buf[cs->cdw++] = 0;
+ break;
+ default:
+ assert(0);
+ }
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE);
+
+ if (!si_is_timer_query(query->type)) {
+ ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
+ }
+}
+
+void r600_query_end(struct r600_context *ctx, struct r600_query *query)
+{
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+ uint64_t va;
+ unsigned new_results_end;
+
+ /* The queries which need begin already called this in begin_query. */
+ if (!si_query_needs_begin(query->type)) {
+ si_need_cs_space(ctx, query->num_cs_dw, TRUE);
+
+ new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
+
+ /* collect current results if query buffer is full */
+ if (new_results_end == query->results_start) {
+ r600_query_result(ctx, query, TRUE);
+ }
+ }
+
+ va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
+ /* emit end query */
+ switch (query->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ va += query->results_end + 8;
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ va += query->results_end + query->result_size/2;
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ va += query->results_end + query->result_size/2;
+ /* fall through */
+ case PIPE_QUERY_TIMESTAMP:
+ cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
+ cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
+ cs->buf[cs->cdw++] = va;
+ cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
+ cs->buf[cs->cdw++] = 0;
+ cs->buf[cs->cdw++] = 0;
+ break;
+ default:
+ assert(0);
+ }
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx, query->buffer, RADEON_USAGE_WRITE);
+
+ query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
+
+ if (si_query_needs_begin(query->type) && !si_is_timer_query(query->type)) {
+ ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
+ }
+}
+
+void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
+ int flag_wait)
+{
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+ uint64_t va;
+
+ if (operation == PREDICATION_OP_CLEAR) {
+ si_need_cs_space(ctx, 3, FALSE);
+
+ cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
+ cs->buf[cs->cdw++] = 0;
+ cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR);
+ } else {
+ unsigned results_base = query->results_start;
+ unsigned count;
+ uint32_t op;
+
+ /* find count of the query data blocks */
+ count = (query->buffer->b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.width0;
+ count /= query->result_size;
+
+ si_need_cs_space(ctx, 5 * count, TRUE);
+
+ op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
+ (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
+ va = r600_resource_va(&ctx->screen->b.b, (void*)query->buffer);
+
+ /* emit predicate packets for all data blocks */
+ while (results_base != query->results_end) {
+ cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
+ cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
+ cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
+ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(&ctx->b, &ctx->b.rings.gfx,
+ query->buffer, RADEON_USAGE_READ);
+ results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
+
+ /* set CONTINUE bit for all packets except the first */
+ op |= PREDICATION_CONTINUE;
+ }
+ }
+}
+
+struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
+{
+ struct r600_query *query;
+ unsigned buffer_size = 4096;
+
+ query = CALLOC_STRUCT(r600_query);
+ if (query == NULL)
+ return NULL;
+
+ query->type = query_type;
+
+ switch (query_type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ query->result_size = 16 * ctx->max_db;
+ query->num_cs_dw = 6;
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ query->result_size = 8;
+ query->num_cs_dw = 8;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ query->result_size = 16;
+ query->num_cs_dw = 8;
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+ query->result_size = 32;
+ query->num_cs_dw = 6;
+ break;
+ default:
+ assert(0);
+ FREE(query);
+ return NULL;
+ }
+
+ /* adjust buffer size to simplify offsets wrapping math */
+ buffer_size -= buffer_size % query->result_size;
+
+ /* Queries are normally read by the CPU after
+ * being written by the gpu, hence staging is probably a good
+ * usage pattern.
+ */
+ query->buffer = r600_resource_create_custom(&ctx->screen->b.b,
+ PIPE_USAGE_STAGING,
+ buffer_size);
+ if (!query->buffer) {
+ FREE(query);
+ return NULL;
+ }
+ return query;
+}
+
+void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
+{
+ r600_resource_reference(&query->buffer, NULL);
+ free(query);
+}
+
+boolean r600_context_query_result(struct r600_context *ctx,
+ struct r600_query *query,
+ boolean wait, void *vresult)
+{
+ boolean *result_b = (boolean*)vresult;
+ uint64_t *result_u64 = (uint64_t*)vresult;
+ struct pipe_query_data_so_statistics *result_so =
+ (struct pipe_query_data_so_statistics*)vresult;
+
+ if (!r600_query_result(ctx, query, wait))
+ return FALSE;
+
+ switch (query->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ *result_u64 = query->result.u64;
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ *result_b = query->result.b;
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIME_ELAPSED:
+ *result_u64 = (1000000 * query->result.u64) / ctx->screen->b.info.r600_clock_crystal_freq;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ *result_so = query->result.so;
+ break;
+ default:
+ assert(0);
+ }
+ return TRUE;
+}
+
+void r600_context_queries_suspend(struct r600_context *ctx)
+{
+ struct r600_query *query;
+
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_query_list, list) {
+ r600_query_end(ctx, query);
+ }
+ assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
+}
+
+void r600_context_queries_resume(struct r600_context *ctx)
+{
+ struct r600_query *query;
+
+ assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
+
+ LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_query_list, list) {
+ r600_query_begin(ctx, query);
+ }
+}
+
+#if R600_TRACE_CS
+void r600_trace_emit(struct r600_context *rctx)
+{
+ struct r600_screen *rscreen = rctx->screen;
+ struct radeon_winsys_cs *cs = rctx->cs;
+ uint64_t va;
+
+ va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
+ r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
+ cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
+ cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
+ PKT3_WRITE_DATA_WR_CONFIRM |
+ PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
+ cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
+ cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
+ cs->buf[cs->cdw++] = cs->cdw;
+ cs->buf[cs->cdw++] = rscreen->cs_count;
+}
+#endif
--- /dev/null
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <errno.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_util.h"
+#include "util/u_blitter.h"
+#include "util/u_double_list.h"
+#include "util/u_format.h"
+#include "util/u_transfer.h"
+#include "util/u_surface.h"
+#include "util/u_pack_color.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_simple_shaders.h"
+#include "util/u_upload_mgr.h"
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+#include "os/os_time.h"
+#include "pipebuffer/pb_buffer.h"
+#include "si_pipe.h"
+#include "radeon/radeon_uvd.h"
+#include "si.h"
+#include "sid.h"
+#include "si_resource.h"
+#include "si_pipe.h"
+#include "si_state.h"
+#include "../radeon/r600_cs.h"
+
+/*
+ * pipe_context
+ */
+void radeonsi_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct pipe_query *render_cond = NULL;
+ boolean render_cond_cond = FALSE;
+ unsigned render_cond_mode = 0;
+
+ if (fence) {
+ *fence = rctx->b.ws->cs_create_fence(rctx->b.rings.gfx.cs);
+ }
+
+ /* Disable render condition. */
+ if (rctx->current_render_cond) {
+ render_cond = rctx->current_render_cond;
+ render_cond_cond = rctx->current_render_cond_cond;
+ render_cond_mode = rctx->current_render_cond_mode;
+ ctx->render_condition(ctx, NULL, FALSE, 0);
+ }
+
+ si_context_flush(rctx, flags);
+
+ /* Re-enable render condition. */
+ if (render_cond) {
+ ctx->render_condition(ctx, render_cond, render_cond_cond, render_cond_mode);
+ }
+}
+
+static void r600_flush_from_st(struct pipe_context *ctx,
+ struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ radeonsi_flush(ctx, fence,
+ flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0);
+}
+
+static void r600_flush_from_winsys(void *ctx, unsigned flags)
+{
+ radeonsi_flush((struct pipe_context*)ctx, NULL, flags);
+}
+
+static void r600_destroy_context(struct pipe_context *context)
+{
+ struct r600_context *rctx = (struct r600_context *)context;
+
+ si_release_all_descriptors(rctx);
+
+ pipe_resource_reference(&rctx->null_const_buf.buffer, NULL);
+ r600_resource_reference(&rctx->border_color_table, NULL);
+
+ if (rctx->dummy_pixel_shader) {
+ rctx->b.b.delete_fs_state(&rctx->b.b, rctx->dummy_pixel_shader);
+ }
+ for (int i = 0; i < 8; i++) {
+ rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_depth_stencil[i]);
+ rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_depth[i]);
+ rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_stencil[i]);
+ }
+ rctx->b.b.delete_depth_stencil_alpha_state(&rctx->b.b, rctx->custom_dsa_flush_inplace);
+ rctx->b.b.delete_blend_state(&rctx->b.b, rctx->custom_blend_resolve);
+ rctx->b.b.delete_blend_state(&rctx->b.b, rctx->custom_blend_decompress);
+ util_unreference_framebuffer_state(&rctx->framebuffer);
+
+ util_blitter_destroy(rctx->blitter);
+
+ r600_common_context_cleanup(&rctx->b);
+ FREE(rctx);
+}
+
+static struct pipe_context *r600_create_context(struct pipe_screen *screen, void *priv)
+{
+ struct r600_context *rctx = CALLOC_STRUCT(r600_context);
+ struct r600_screen* rscreen = (struct r600_screen *)screen;
+ int shader, i;
+
+ if (rctx == NULL)
+ return NULL;
+
+ if (!r600_common_context_init(&rctx->b, &rscreen->b))
+ goto fail;
+
+ rctx->b.b.screen = screen;
+ rctx->b.b.priv = priv;
+ rctx->b.b.destroy = r600_destroy_context;
+ rctx->b.b.flush = r600_flush_from_st;
+
+ /* Easy accessing of screen/winsys. */
+ rctx->screen = rscreen;
+
+ si_init_blit_functions(rctx);
+ r600_init_query_functions(rctx);
+ r600_init_context_resource_functions(rctx);
+ si_init_compute_functions(rctx);
+
+ if (rscreen->b.info.has_uvd) {
+ rctx->b.b.create_video_codec = radeonsi_uvd_create_decoder;
+ rctx->b.b.create_video_buffer = radeonsi_video_buffer_create;
+ } else {
+ rctx->b.b.create_video_codec = vl_create_decoder;
+ rctx->b.b.create_video_buffer = vl_video_buffer_create;
+ }
+
+ rctx->b.rings.gfx.cs = rctx->b.ws->cs_create(rctx->b.ws, RING_GFX, NULL);
+ rctx->b.rings.gfx.flush = r600_flush_from_winsys;
+
+ si_init_all_descriptors(rctx);
+
+ /* Initialize cache_flush. */
+ rctx->cache_flush = si_atom_cache_flush;
+ rctx->atoms.cache_flush = &rctx->cache_flush;
+
+ rctx->atoms.streamout_begin = &rctx->b.streamout.begin_atom;
+
+ switch (rctx->b.chip_class) {
+ case SI:
+ case CIK:
+ si_init_state_functions(rctx);
+ LIST_INITHEAD(&rctx->active_nontimer_query_list);
+ rctx->max_db = 8;
+ si_init_config(rctx);
+ break;
+ default:
+ R600_ERR("Unsupported chip class %d.\n", rctx->b.chip_class);
+ goto fail;
+ }
+
+ rctx->b.ws->cs_set_flush_callback(rctx->b.rings.gfx.cs, r600_flush_from_winsys, rctx);
+
+ rctx->blitter = util_blitter_create(&rctx->b.b);
+ if (rctx->blitter == NULL)
+ goto fail;
+
+ rctx->dummy_pixel_shader =
+ util_make_fragment_cloneinput_shader(&rctx->b.b, 0,
+ TGSI_SEMANTIC_GENERIC,
+ TGSI_INTERPOLATE_CONSTANT);
+ rctx->b.b.bind_fs_state(&rctx->b.b, rctx->dummy_pixel_shader);
+
+ /* these must be last */
+ si_begin_new_cs(rctx);
+ si_get_backend_mask(rctx);
+
+ /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
+ * with a NULL buffer). We need to use a dummy buffer instead. */
+ if (rctx->b.chip_class == CIK) {
+ rctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
+ PIPE_USAGE_STATIC, 16);
+ rctx->null_const_buf.buffer_size = rctx->null_const_buf.buffer->width0;
+
+ for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
+ for (i = 0; i < NUM_CONST_BUFFERS; i++) {
+ rctx->b.b.set_constant_buffer(&rctx->b.b, shader, i,
+ &rctx->null_const_buf);
+ }
+ }
+
+ /* Clear the NULL constant buffer, because loads should return zeros. */
+ rctx->b.clear_buffer(&rctx->b.b, rctx->null_const_buf.buffer, 0,
+ rctx->null_const_buf.buffer->width0, 0);
+ }
+
+ return &rctx->b.b;
+fail:
+ r600_destroy_context(&rctx->b.b);
+ return NULL;
+}
+
+/*
+ * pipe_screen
+ */
+static const char* r600_get_vendor(struct pipe_screen* pscreen)
+{
+ return "X.Org";
+}
+
+const char *r600_get_llvm_processor_name(enum radeon_family family)
+{
+ switch (family) {
+ case CHIP_TAHITI: return "tahiti";
+ case CHIP_PITCAIRN: return "pitcairn";
+ case CHIP_VERDE: return "verde";
+ case CHIP_OLAND: return "oland";
+#if HAVE_LLVM <= 0x0303
+ default: return "SI";
+#else
+ case CHIP_HAINAN: return "hainan";
+ case CHIP_BONAIRE: return "bonaire";
+ case CHIP_KABINI: return "kabini";
+ case CHIP_KAVERI: return "kaveri";
+ case CHIP_HAWAII: return "hawaii";
+ default: return "";
+#endif
+ }
+}
+
+static const char *r600_get_family_name(enum radeon_family family)
+{
+ switch(family) {
+ case CHIP_TAHITI: return "AMD TAHITI";
+ case CHIP_PITCAIRN: return "AMD PITCAIRN";
+ case CHIP_VERDE: return "AMD CAPE VERDE";
+ case CHIP_OLAND: return "AMD OLAND";
+ case CHIP_HAINAN: return "AMD HAINAN";
+ case CHIP_BONAIRE: return "AMD BONAIRE";
+ case CHIP_KAVERI: return "AMD KAVERI";
+ case CHIP_KABINI: return "AMD KABINI";
+ case CHIP_HAWAII: return "AMD HAWAII";
+ default: return "AMD unknown";
+ }
+}
+
+static const char* r600_get_name(struct pipe_screen* pscreen)
+{
+ struct r600_screen *rscreen = (struct r600_screen *)pscreen;
+
+ return r600_get_family_name(rscreen->b.family);
+}
+
+static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
+{
+ struct r600_screen *rscreen = (struct r600_screen *)pscreen;
+
+ switch (param) {
+ /* Supported features (boolean caps). */
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ case PIPE_CAP_SM3:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_TEXTURE_BARRIER:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_USER_INDEX_BUFFERS:
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ case PIPE_CAP_START_INSTANCE:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_COMPUTE:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_TGSI_VS_LAYER:
+ return 1;
+
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ /* 2D tiling on CIK is supported since DRM 2.35.0 */
+ return HAVE_LLVM >= 0x0304 && (rscreen->b.chip_class < CIK ||
+ rscreen->b.info.drm_minor >= 35);
+
+ case PIPE_CAP_TGSI_TEXCOORD:
+ return 0;
+
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return 64;
+
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 256;
+
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return 140;
+
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return MIN2(rscreen->b.info.vram_size, 0xFFFFFFFF);
+
+ /* Unsupported features. */
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ case PIPE_CAP_SCALED_RESOLVE:
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ return 0;
+
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600;
+
+ /* Stream output. */
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return rscreen->b.has_streamout ? 4 : 0;
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ return rscreen->b.has_streamout ? 1 : 0;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return rscreen->b.has_streamout ? 32*4 : 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 15;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return 16384;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 32;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+
+ case PIPE_CAP_MAX_VIEWPORTS:
+ return 1;
+
+ /* Timer queries, present when the clock frequency is non zero. */
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ return rscreen->b.info.r600_clock_crystal_freq != 0;
+
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ return -8;
+
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ return 7;
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_LITTLE;
+ }
+ return 0;
+}
+
+static float r600_get_paramf(struct pipe_screen* pscreen,
+ enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 16384.0f;
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 16.0f;
+ case PIPE_CAPF_GUARD_BAND_LEFT:
+ case PIPE_CAPF_GUARD_BAND_TOP:
+ case PIPE_CAPF_GUARD_BAND_RIGHT:
+ case PIPE_CAPF_GUARD_BAND_BOTTOM:
+ return 0.0f;
+ }
+ return 0.0f;
+}
+
+static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param)
+{
+ switch(shader)
+ {
+ case PIPE_SHADER_FRAGMENT:
+ case PIPE_SHADER_VERTEX:
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ /* TODO: support and enable geometry programs */
+ return 0;
+ case PIPE_SHADER_COMPUTE:
+ switch (param) {
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_LLVM;
+ default:
+ return 0;
+ }
+ default:
+ /* TODO: support tessellation */
+ return 0;
+ }
+
+ switch (param) {
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 32;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ return 32;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return 256; /* Max native temporaries. */
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ /* FIXME Isn't this equal to TEMPS? */
+ return 1; /* Max native address registers */
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return 4096; /* actually only memory limits this */
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ return NUM_PIPE_CONST_BUFFERS;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0; /* FIXME */
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ return 0;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_INTEGERS:
+ return 1;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+ return 16;
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_TGSI;
+ }
+ return 0;
+}
+
+static int r600_get_video_param(struct pipe_screen *screen,
+ enum pipe_video_profile profile,
+ enum pipe_video_entrypoint entrypoint,
+ enum pipe_video_cap param)
+{
+ switch (param) {
+ case PIPE_VIDEO_CAP_SUPPORTED:
+ return vl_profile_supported(screen, profile, entrypoint);
+ case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+ return 1;
+ case PIPE_VIDEO_CAP_MAX_WIDTH:
+ case PIPE_VIDEO_CAP_MAX_HEIGHT:
+ return vl_video_buffer_max_size(screen);
+ case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+ return PIPE_FORMAT_NV12;
+ case PIPE_VIDEO_CAP_MAX_LEVEL:
+ return vl_level_supported(screen, profile);
+ default:
+ return 0;
+ }
+}
+
+static int r600_get_compute_param(struct pipe_screen *screen,
+ enum pipe_compute_cap param,
+ void *ret)
+{
+ struct r600_screen *rscreen = (struct r600_screen *)screen;
+ //TODO: select these params by asic
+ switch (param) {
+ case PIPE_COMPUTE_CAP_IR_TARGET: {
+ const char *gpu = r600_get_llvm_processor_name(rscreen->b.family);
+ if (ret) {
+ sprintf(ret, "%s-r600--", gpu);
+ }
+ return (8 + strlen(gpu)) * sizeof(char);
+ }
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ if (ret) {
+ uint64_t * grid_dimension = ret;
+ grid_dimension[0] = 3;
+ }
+ return 1 * sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ if (ret) {
+ uint64_t * grid_size = ret;
+ grid_size[0] = 65535;
+ grid_size[1] = 65535;
+ grid_size[2] = 1;
+ }
+ return 3 * sizeof(uint64_t) ;
+
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ if (ret) {
+ uint64_t * block_size = ret;
+ block_size[0] = 256;
+ block_size[1] = 256;
+ block_size[2] = 256;
+ }
+ return 3 * sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ if (ret) {
+ uint64_t * max_threads_per_block = ret;
+ *max_threads_per_block = 256;
+ }
+ return sizeof(uint64_t);
+
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+ if (ret) {
+ uint64_t *max_global_size = ret;
+ /* XXX: Not sure what to put here. */
+ *max_global_size = 2000000000;
+ }
+ return sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+ if (ret) {
+ uint64_t *max_local_size = ret;
+ /* Value reported by the closed source driver. */
+ *max_local_size = 32768;
+ }
+ return sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+ if (ret) {
+ uint64_t *max_input_size = ret;
+ /* Value reported by the closed source driver. */
+ *max_input_size = 1024;
+ }
+ return sizeof(uint64_t);
+ case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+ if (ret) {
+ uint64_t max_global_size;
+ uint64_t *max_mem_alloc_size = ret;
+ r600_get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE, &max_global_size);
+ *max_mem_alloc_size = max_global_size / 4;
+ }
+ return sizeof(uint64_t);
+ default:
+ fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static void r600_destroy_screen(struct pipe_screen* pscreen)
+{
+ struct r600_screen *rscreen = (struct r600_screen *)pscreen;
+
+ if (rscreen == NULL)
+ return;
+
+ if (!radeon_winsys_unref(rscreen->b.ws))
+ return;
+
+ r600_common_screen_cleanup(&rscreen->b);
+
+#if R600_TRACE_CS
+ if (rscreen->trace_bo) {
+ rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
+ pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
+ }
+#endif
+
+ rscreen->b.ws->destroy(rscreen->b.ws);
+ FREE(rscreen);
+}
+
+static uint64_t r600_get_timestamp(struct pipe_screen *screen)
+{
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
+
+ return 1000000 * rscreen->b.ws->query_value(rscreen->b.ws, RADEON_TIMESTAMP) /
+ rscreen->b.info.r600_clock_crystal_freq;
+}
+
+struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
+{
+ struct r600_screen *rscreen = CALLOC_STRUCT(r600_screen);
+ if (rscreen == NULL) {
+ return NULL;
+ }
+
+ ws->query_info(ws, &rscreen->b.info);
+
+ /* Set functions first. */
+ rscreen->b.b.context_create = r600_create_context;
+ rscreen->b.b.destroy = r600_destroy_screen;
+ rscreen->b.b.get_name = r600_get_name;
+ rscreen->b.b.get_vendor = r600_get_vendor;
+ rscreen->b.b.get_param = r600_get_param;
+ rscreen->b.b.get_shader_param = r600_get_shader_param;
+ rscreen->b.b.get_paramf = r600_get_paramf;
+ rscreen->b.b.get_compute_param = r600_get_compute_param;
+ rscreen->b.b.get_timestamp = r600_get_timestamp;
+ rscreen->b.b.is_format_supported = si_is_format_supported;
+ if (rscreen->b.info.has_uvd) {
+ rscreen->b.b.get_video_param = ruvd_get_video_param;
+ rscreen->b.b.is_video_format_supported = ruvd_is_format_supported;
+ } else {
+ rscreen->b.b.get_video_param = r600_get_video_param;
+ rscreen->b.b.is_video_format_supported = vl_video_buffer_is_format_supported;
+ }
+ r600_init_screen_resource_functions(&rscreen->b.b);
+
+ if (!r600_common_screen_init(&rscreen->b, ws)) {
+ FREE(rscreen);
+ return NULL;
+ }
+
+ rscreen->b.has_cp_dma = true;
+ rscreen->b.has_streamout = HAVE_LLVM >= 0x0304;
+
+ if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
+ rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
+
+#if R600_TRACE_CS
+ rscreen->cs_count = 0;
+ if (rscreen->info.drm_minor >= 28) {
+ rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->screen,
+ PIPE_BIND_CUSTOM,
+ PIPE_USAGE_STAGING,
+ 4096);
+ if (rscreen->trace_bo) {
+ rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
+ PIPE_TRANSFER_UNSYNCHRONIZED);
+ }
+ }
+#endif
+
+ /* Create the auxiliary context. This must be done last. */
+ rscreen->b.aux_context = rscreen->b.b.context_create(&rscreen->b.b, NULL);
+
+ return &rscreen->b.b;
+}
--- /dev/null
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jerome Glisse
+ */
+#ifndef SI_PIPE_H
+#define SI_PIPE_H
+
+#include "../radeon/r600_pipe_common.h"
+
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_slab.h"
+#include "si.h"
+#include "sid.h"
+#include "si_public.h"
+#include "si_pm4.h"
+#include "si_resource.h"
+#include "si_state.h"
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+#define R600_BIG_ENDIAN 1
+#else
+#define R600_BIG_ENDIAN 0
+#endif
+
+#define R600_TRACE_CS 0
+#define R600_TRACE_CS_DWORDS 6
+
+#define SI_MAX_DRAW_CS_DWORDS 18
+
+struct si_pipe_compute;
+
+struct r600_screen {
+ struct r600_common_screen b;
+#if R600_TRACE_CS
+ struct r600_resource *trace_bo;
+ uint32_t *trace_ptr;
+ unsigned cs_count;
+#endif
+};
+
+struct si_pipe_sampler_view {
+ struct pipe_sampler_view base;
+ struct r600_resource *resource;
+ uint32_t state[8];
+ uint32_t fmask_state[8];
+};
+
+struct si_pipe_sampler_state {
+ uint32_t val[4];
+ uint32_t border_color[4];
+};
+
+struct si_cs_shader_state {
+ struct si_pipe_compute *program;
+};
+
+struct r600_textures_info {
+ struct si_sampler_views views;
+ struct si_pipe_sampler_state *samplers[NUM_TEX_UNITS];
+ unsigned n_views;
+ uint32_t depth_texture_mask; /* which textures are depth */
+ uint32_t compressed_colortex_mask;
+ unsigned n_samplers;
+};
+
+#define SI_NUM_ATOMS(rctx) (sizeof((rctx)->atoms)/sizeof((rctx)->atoms.array[0]))
+#define SI_NUM_SHADERS (PIPE_SHADER_FRAGMENT+1)
+
+struct r600_context {
+ struct r600_common_context b;
+ struct blitter_context *blitter;
+ void *custom_dsa_flush_depth_stencil[8];
+ void *custom_dsa_flush_depth[8];
+ void *custom_dsa_flush_stencil[8];
+ void *custom_dsa_flush_inplace;
+ void *custom_blend_resolve;
+ void *custom_blend_decompress;
+ struct r600_screen *screen;
+
+ union {
+ struct {
+ /* The order matters. */
+ struct r600_atom *const_buffers[SI_NUM_SHADERS];
+ struct r600_atom *sampler_views[SI_NUM_SHADERS];
+ struct r600_atom *streamout_buffers;
+ /* Caches must be flushed after resource descriptors are
+ * updated in memory. */
+ struct r600_atom *cache_flush;
+ struct r600_atom *streamout_begin;
+ };
+ struct r600_atom *array[0];
+ } atoms;
+
+ struct si_vertex_element *vertex_elements;
+ struct pipe_framebuffer_state framebuffer;
+ unsigned fb_log_samples;
+ unsigned fb_cb0_is_integer;
+ unsigned fb_compressed_cb_mask;
+ unsigned pa_sc_line_stipple;
+ unsigned pa_su_sc_mode_cntl;
+ /* for saving when using blitter */
+ struct pipe_stencil_ref stencil_ref;
+ struct si_pipe_shader_selector *ps_shader;
+ struct si_pipe_shader_selector *vs_shader;
+ struct si_cs_shader_state cs_shader_state;
+ struct pipe_query *current_render_cond;
+ unsigned current_render_cond_mode;
+ boolean current_render_cond_cond;
+ struct pipe_query *saved_render_cond;
+ unsigned saved_render_cond_mode;
+ boolean saved_render_cond_cond;
+ /* shader information */
+ unsigned sprite_coord_enable;
+ unsigned export_16bpc;
+ struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
+ struct si_buffer_resources streamout_buffers;
+ struct r600_textures_info samplers[SI_NUM_SHADERS];
+ struct r600_resource *border_color_table;
+ unsigned border_color_offset;
+
+ unsigned default_ps_gprs, default_vs_gprs;
+
+ /* Below are variables from the old r600_context.
+ */
+ unsigned pm4_dirty_cdwords;
+
+ /* The list of active queries. Only one query of each type can be active. */
+ struct list_head active_nontimer_query_list;
+ unsigned num_cs_dw_nontimer_queries_suspend;
+ /* If queries have been suspended. */
+ bool nontimer_queries_suspended;
+
+ unsigned backend_mask;
+ unsigned max_db; /* for OQ */
+ boolean predicate_drawing;
+
+ /* Vertex and index buffers. */
+ bool vertex_buffers_dirty;
+ struct pipe_index_buffer index_buffer;
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_buffers;
+
+ /* With rasterizer discard, there doesn't have to be a pixel shader.
+ * In that case, we bind this one: */
+ void *dummy_pixel_shader;
+ struct r600_atom cache_flush;
+ struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
+
+ /* SI state handling */
+ union si_state queued;
+ union si_state emitted;
+};
+
+/* si_blit.c */
+void si_init_blit_functions(struct r600_context *rctx);
+void si_flush_depth_textures(struct r600_context *rctx,
+ struct r600_textures_info *textures);
+void r600_decompress_color_textures(struct r600_context *rctx,
+ struct r600_textures_info *textures);
+
+/* si_buffer.c */
+void r600_upload_index_buffer(struct r600_context *rctx,
+ struct pipe_index_buffer *ib, unsigned count);
+
+
+/* si_pipe.c */
+void radeonsi_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence,
+ unsigned flags);
+const char *r600_get_llvm_processor_name(enum radeon_family family);
+
+/* si_query.c */
+void r600_init_query_functions(struct r600_context *rctx);
+
+/* si_resource.c */
+void r600_init_context_resource_functions(struct r600_context *r600);
+
+/* si_translate.c */
+void r600_translate_index_buffer(struct r600_context *r600,
+ struct pipe_index_buffer *ib,
+ unsigned count);
+
+#if R600_TRACE_CS
+void r600_trace_emit(struct r600_context *rctx);
+#endif
+
+/* si_compute.c */
+void si_init_compute_functions(struct r600_context *rctx);
+
+/* si_uvd.c */
+struct pipe_video_codec *radeonsi_uvd_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ);
+
+struct pipe_video_buffer *radeonsi_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *tmpl);
+
+/*
+ * common helpers
+ */
+static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
+{
+ return value * (1 << frac_bits);
+}
+#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
+
+static INLINE unsigned si_map_swizzle(unsigned swizzle)
+{
+ switch (swizzle) {
+ case UTIL_FORMAT_SWIZZLE_Y:
+ return V_008F0C_SQ_SEL_Y;
+ case UTIL_FORMAT_SWIZZLE_Z:
+ return V_008F0C_SQ_SEL_Z;
+ case UTIL_FORMAT_SWIZZLE_W:
+ return V_008F0C_SQ_SEL_W;
+ case UTIL_FORMAT_SWIZZLE_0:
+ return V_008F0C_SQ_SEL_0;
+ case UTIL_FORMAT_SWIZZLE_1:
+ return V_008F0C_SQ_SEL_1;
+ default: /* UTIL_FORMAT_SWIZZLE_X */
+ return V_008F0C_SQ_SEL_X;
+ }
+}
+
+static inline unsigned r600_tex_aniso_filter(unsigned filter)
+{
+ if (filter <= 1) return 0;
+ if (filter <= 2) return 1;
+ if (filter <= 4) return 2;
+ if (filter <= 8) return 3;
+ /* else */ return 4;
+}
+
+/* 12.4 fixed-point */
+static INLINE unsigned r600_pack_float_12p4(float x)
+{
+ return x <= 0 ? 0 :
+ x >= 4096 ? 0xffff : x * 16;
+}
+
+#endif
--- /dev/null
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ */
+
+#include "../radeon/r600_cs.h"
+#include "util/u_memory.h"
+#include "si_pipe.h"
+#include "si_pm4.h"
+#include "sid.h"
+
+#define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
+
+void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode)
+{
+ state->last_opcode = opcode;
+ state->last_pm4 = state->ndw++;
+}
+
+void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
+{
+ state->pm4[state->ndw++] = dw;
+}
+
+void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate)
+{
+ unsigned count;
+ count = state->ndw - state->last_pm4 - 2;
+ state->pm4[state->last_pm4] =
+ PKT3(state->last_opcode, count, predicate)
+ | PKT3_SHADER_TYPE_S(state->compute_pkt);
+
+ assert(state->ndw <= SI_PM4_MAX_DW);
+}
+
+void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val)
+{
+ unsigned opcode;
+
+ if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) {
+ opcode = PKT3_SET_CONFIG_REG;
+ reg -= SI_CONFIG_REG_OFFSET;
+
+ } else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) {
+ opcode = PKT3_SET_SH_REG;
+ reg -= SI_SH_REG_OFFSET;
+
+ } else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) {
+ opcode = PKT3_SET_CONTEXT_REG;
+ reg -= SI_CONTEXT_REG_OFFSET;
+
+ } else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) {
+ opcode = PKT3_SET_UCONFIG_REG;
+ reg -= CIK_UCONFIG_REG_OFFSET;
+
+ } else {
+ R600_ERR("Invalid register offset %08x!\n", reg);
+ return;
+ }
+
+ reg >>= 2;
+
+ if (opcode != state->last_opcode || reg != (state->last_reg + 1)) {
+ si_pm4_cmd_begin(state, opcode);
+ si_pm4_cmd_add(state, reg);
+ }
+
+ state->last_reg = reg;
+ si_pm4_cmd_add(state, val);
+ si_pm4_cmd_end(state, false);
+}
+
+void si_pm4_add_bo(struct si_pm4_state *state,
+ struct r600_resource *bo,
+ enum radeon_bo_usage usage)
+{
+ unsigned idx = state->nbo++;
+ assert(idx < SI_PM4_MAX_BO);
+
+ r600_resource_reference(&state->bo[idx], bo);
+ state->bo_usage[idx] = usage;
+}
+
+void si_pm4_sh_data_begin(struct si_pm4_state *state)
+{
+ si_pm4_cmd_begin(state, PKT3_NOP);
+}
+
+void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw)
+{
+ si_pm4_cmd_add(state, dw);
+}
+
+void si_pm4_sh_data_end(struct si_pm4_state *state, unsigned base, unsigned idx)
+{
+ unsigned offs = state->last_pm4 + 1;
+ unsigned reg = base + idx * 4;
+
+ /* Bail if no data was added */
+ if (state->ndw == offs) {
+ state->ndw--;
+ return;
+ }
+
+ si_pm4_cmd_end(state, false);
+
+ si_pm4_cmd_begin(state, PKT3_SET_SH_REG_OFFSET);
+ si_pm4_cmd_add(state, (reg - SI_SH_REG_OFFSET) >> 2);
+ state->relocs[state->nrelocs++] = state->ndw;
+ si_pm4_cmd_add(state, offs << 2);
+ si_pm4_cmd_add(state, 0);
+ si_pm4_cmd_end(state, false);
+}
+
+void si_pm4_inval_shader_cache(struct si_pm4_state *state)
+{
+ state->cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
+ state->cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
+}
+
+void si_pm4_inval_texture_cache(struct si_pm4_state *state)
+{
+ state->cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
+ state->cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
+}
+
+void si_pm4_free_state(struct r600_context *rctx,
+ struct si_pm4_state *state,
+ unsigned idx)
+{
+ if (state == NULL)
+ return;
+
+ if (idx != ~0 && rctx->emitted.array[idx] == state) {
+ rctx->emitted.array[idx] = NULL;
+ }
+
+ for (int i = 0; i < state->nbo; ++i) {
+ r600_resource_reference(&state->bo[i], NULL);
+ }
+ FREE(state);
+}
+
+struct si_pm4_state * si_pm4_alloc_state(struct r600_context *rctx)
+{
+ struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+
+ if (pm4 == NULL)
+ return NULL;
+
+ pm4->chip_class = rctx->b.chip_class;
+
+ return pm4;
+}
+
+uint32_t si_pm4_sync_flags(struct r600_context *rctx)
+{
+ uint32_t cp_coher_cntl = 0;
+
+ for (int i = 0; i < NUMBER_OF_STATES; ++i) {
+ struct si_pm4_state *state = rctx->queued.array[i];
+
+ if (!state || rctx->emitted.array[i] == state)
+ continue;
+
+ cp_coher_cntl |= state->cp_coher_cntl;
+ }
+ return cp_coher_cntl;
+}
+
+unsigned si_pm4_dirty_dw(struct r600_context *rctx)
+{
+ unsigned count = 0;
+
+ for (int i = 0; i < NUMBER_OF_STATES; ++i) {
+ struct si_pm4_state *state = rctx->queued.array[i];
+
+ if (!state || rctx->emitted.array[i] == state)
+ continue;
+
+ count += state->ndw;
+#if R600_TRACE_CS
+ /* for tracing each states */
+ if (rctx->screen->trace_bo) {
+ count += R600_TRACE_CS_DWORDS;
+ }
+#endif
+ }
+
+ return count;
+}
+
+void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state)
+{
+ struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+ for (int i = 0; i < state->nbo; ++i) {
+ r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, state->bo[i],
+ state->bo_usage[i]);
+ }
+
+ memcpy(&cs->buf[cs->cdw], state->pm4, state->ndw * 4);
+
+ for (int i = 0; i < state->nrelocs; ++i) {
+ cs->buf[cs->cdw + state->relocs[i]] += cs->cdw << 2;
+ }
+
+ cs->cdw += state->ndw;
+
+#if R600_TRACE_CS
+ if (rctx->screen->trace_bo) {
+ r600_trace_emit(rctx);
+ }
+#endif
+}
+
+void si_pm4_emit_dirty(struct r600_context *rctx)
+{
+ for (int i = 0; i < NUMBER_OF_STATES; ++i) {
+ struct si_pm4_state *state = rctx->queued.array[i];
+
+ if (!state || rctx->emitted.array[i] == state)
+ continue;
+
+ assert(state != rctx->queued.named.init);
+ si_pm4_emit(rctx, state);
+ rctx->emitted.array[i] = state;
+ }
+}
+
+void si_pm4_reset_emitted(struct r600_context *rctx)
+{
+ memset(&rctx->emitted, 0, sizeof(rctx->emitted));
+}
--- /dev/null
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ */
+
+#ifndef SI_PM4_H
+#define SI_PM4_H
+
+#include "../../winsys/radeon/drm/radeon_winsys.h"
+
+#define SI_PM4_MAX_DW 256
+#define SI_PM4_MAX_BO 32
+#define SI_PM4_MAX_RELOCS 4
+
+// forward defines
+struct r600_context;
+enum chip_class;
+
+struct si_pm4_state
+{
+ /* family specific handling */
+ enum chip_class chip_class;
+ /* PKT3_SET_*_REG handling */
+ unsigned last_opcode;
+ unsigned last_reg;
+ unsigned last_pm4;
+
+ /* flush flags for SURFACE_SYNC */
+ uint32_t cp_coher_cntl;
+
+ /* commands for the DE */
+ unsigned ndw;
+ uint32_t pm4[SI_PM4_MAX_DW];
+
+ /* BO's referenced by this state */
+ unsigned nbo;
+ struct r600_resource *bo[SI_PM4_MAX_BO];
+ enum radeon_bo_usage bo_usage[SI_PM4_MAX_BO];
+
+ /* relocs for shader data */
+ unsigned nrelocs;
+ unsigned relocs[SI_PM4_MAX_RELOCS];
+
+ bool compute_pkt;
+};
+
+void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode);
+void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
+void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate);
+
+void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
+void si_pm4_add_bo(struct si_pm4_state *state,
+ struct r600_resource *bo,
+ enum radeon_bo_usage usage);
+
+void si_pm4_sh_data_begin(struct si_pm4_state *state);
+void si_pm4_sh_data_add(struct si_pm4_state *state, uint32_t dw);
+void si_pm4_sh_data_end(struct si_pm4_state *state, unsigned base, unsigned idx);
+
+void si_pm4_inval_shader_cache(struct si_pm4_state *state);
+void si_pm4_inval_texture_cache(struct si_pm4_state *state);
+
+void si_pm4_free_state(struct r600_context *rctx,
+ struct si_pm4_state *state,
+ unsigned idx);
+struct si_pm4_state * si_pm4_alloc_state(struct r600_context *rctx);
+
+uint32_t si_pm4_sync_flags(struct r600_context *rctx);
+unsigned si_pm4_dirty_dw(struct r600_context *rctx);
+void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state);
+void si_pm4_emit_dirty(struct r600_context *rctx);
+void si_pm4_reset_emitted(struct r600_context *rctx);
+
+#endif
--- /dev/null
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef SI_PUBLIC_H
+#define SI_PUBLIC_H
+
+struct radeon_winsys;
+
+struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws);
+
+#endif
--- /dev/null
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "si_pipe.h"
+#include "sid.h"
+
+static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+
+ return (struct pipe_query*)r600_context_query_create(rctx, query_type);
+}
+
+static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+
+ r600_context_query_destroy(rctx, (struct r600_query *)query);
+}
+
+static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ if (!si_query_needs_begin(rquery->type)) {
+ assert(0);
+ return;
+ }
+
+ memset(&rquery->result, 0, sizeof(rquery->result));
+ rquery->results_start = rquery->results_end;
+ r600_query_begin(rctx, (struct r600_query *)query);
+
+ if (!si_is_timer_query(rquery->type)) {
+ LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_query_list);
+ }
+}
+
+static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ if (!si_query_needs_begin(rquery->type)) {
+ memset(&rquery->result, 0, sizeof(rquery->result));
+ }
+
+ r600_query_end(rctx, rquery);
+
+ if (si_query_needs_begin(rquery->type) && !si_is_timer_query(rquery->type)) {
+ LIST_DELINIT(&rquery->list);
+ }
+}
+
+static boolean r600_get_query_result(struct pipe_context *ctx,
+ struct pipe_query *query,
+ boolean wait, union pipe_query_result *vresult)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+
+ return r600_context_query_result(rctx, rquery, wait, vresult);
+}
+
+static void r600_render_condition(struct pipe_context *ctx,
+ struct pipe_query *query,
+ boolean condition,
+ uint mode)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct r600_query *rquery = (struct r600_query *)query;
+ int wait_flag = 0;
+
+ /* If we already have nonzero result, render unconditionally */
+ if (query != NULL && rquery->result.u64 != 0) {
+ if (rctx->current_render_cond) {
+ r600_render_condition(ctx, NULL, FALSE, 0);
+ }
+ return;
+ }
+
+ rctx->current_render_cond = query;
+ rctx->current_render_cond_cond = condition;
+ rctx->current_render_cond_mode = mode;
+
+ if (query == NULL) {
+ if (rctx->predicate_drawing) {
+ rctx->predicate_drawing = false;
+ r600_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, 1);
+ }
+ return;
+ }
+
+ if (mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+ wait_flag = 1;
+ }
+
+ rctx->predicate_drawing = true;
+
+ switch (rquery->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ r600_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ r600_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+void r600_init_query_functions(struct r600_context *rctx)
+{
+ rctx->b.b.create_query = r600_create_query;
+ rctx->b.b.destroy_query = r600_destroy_query;
+ rctx->b.b.begin_query = r600_begin_query;
+ rctx->b.b.end_query = r600_end_query;
+ rctx->b.b.get_query_result = r600_get_query_result;
+
+ if (rctx->screen->b.info.r600_num_backends > 0)
+ rctx->b.b.render_condition = r600_render_condition;
+}
--- /dev/null
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_pipe.h"
+
+static struct pipe_resource *r600_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ if (templ->target == PIPE_BUFFER) {
+ return r600_buffer_create(screen, templ, 4096);
+ } else {
+ return r600_texture_create(screen, templ);
+ }
+}
+
+static struct pipe_resource *r600_resource_from_handle(struct pipe_screen * screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ if (templ->target == PIPE_BUFFER) {
+ return NULL;
+ } else {
+ return r600_texture_from_handle(screen, templ, whandle);
+ }
+}
+
+void r600_init_screen_resource_functions(struct pipe_screen *screen)
+{
+ screen->resource_create = r600_resource_create;
+ screen->resource_from_handle = r600_resource_from_handle;
+ screen->resource_get_handle = u_resource_get_handle_vtbl;
+ screen->resource_destroy = u_resource_destroy_vtbl;
+}
+
+void r600_init_context_resource_functions(struct r600_context *r600)
+{
+ r600->b.b.transfer_map = u_transfer_map_vtbl;
+ r600->b.b.transfer_flush_region = u_default_transfer_flush_region;
+ r600->b.b.transfer_unmap = u_transfer_unmap_vtbl;
+ r600->b.b.transfer_inline_write = u_default_transfer_inline_write;
+}
--- /dev/null
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ */
+
+#ifndef SI_RESOURCE_H
+#define SI_RESOURCE_H
+
+#include "../radeon/r600_pipe_common.h"
+#include "util/u_transfer.h"
+#include "util/u_inlines.h"
+
+static INLINE struct r600_resource *
+r600_resource_create_custom(struct pipe_screen *screen,
+ unsigned usage, unsigned size)
+{
+ assert(size);
+ return r600_resource(pipe_buffer_create(screen,
+ PIPE_BIND_CUSTOM, usage, size));
+}
+
+struct r600_surface {
+ struct pipe_surface base;
+};
+
+void r600_init_screen_resource_functions(struct pipe_screen *screen);
+
+struct r600_context;
+
+void r600_upload_const_buffer(struct r600_context *rctx, struct r600_resource **rbuffer,
+ const uint8_t *ptr, unsigned size,
+ uint32_t *const_offset);
+
+#endif
--- /dev/null
+
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Tom Stellard <thomas.stellard@amd.com>
+ * Michel Dänzer <michel.daenzer@amd.com>
+ * Christian König <christian.koenig@amd.com>
+ */
+
+#include "gallivm/lp_bld_tgsi_action.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_gather.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_logic.h"
+#include "gallivm/lp_bld_tgsi.h"
+#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_flow.h"
+#include "radeon_llvm.h"
+#include "radeon_llvm_emit.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "si_pipe.h"
+#include "si_shader.h"
+#include "si_state.h"
+#include "sid.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+
+struct si_shader_context
+{
+ struct radeon_llvm_context radeon_bld;
+ struct tgsi_parse_context parse;
+ struct tgsi_token * tokens;
+ struct si_pipe_shader *shader;
+ unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
+ int param_streamout_config;
+ int param_streamout_write_index;
+ int param_streamout_offset[4];
+ int param_vertex_id;
+ int param_instance_id;
+ LLVMValueRef const_md;
+ LLVMValueRef const_resource[NUM_CONST_BUFFERS];
+#if HAVE_LLVM >= 0x0304
+ LLVMValueRef ddxy_lds;
+#endif
+ LLVMValueRef *constants[NUM_CONST_BUFFERS];
+ LLVMValueRef *resources;
+ LLVMValueRef *samplers;
+ LLVMValueRef so_buffers[4];
+};
+
+static struct si_shader_context * si_shader_context(
+ struct lp_build_tgsi_context * bld_base)
+{
+ return (struct si_shader_context *)bld_base;
+}
+
+
+#define PERSPECTIVE_BASE 0
+#define LINEAR_BASE 9
+
+#define SAMPLE_OFFSET 0
+#define CENTER_OFFSET 2
+#define CENTROID_OFSET 4
+
+#define USE_SGPR_MAX_SUFFIX_LEN 5
+#define CONST_ADDR_SPACE 2
+#define LOCAL_ADDR_SPACE 3
+#define USER_SGPR_ADDR_SPACE 8
+
+/**
+ * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad
+ *
+ * @param offset The offset parameter specifies the number of
+ * elements to offset, not the number of bytes or dwords. An element is the
+ * the type pointed to by the base_ptr parameter (e.g. int is the element of
+ * an int* pointer)
+ *
+ * When LLVM lowers the load instruction, it will convert the element offset
+ * into a dword offset automatically.
+ *
+ */
+static LLVMValueRef build_indexed_load(
+ struct si_shader_context * si_shader_ctx,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset)
+{
+ struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
+
+ LLVMValueRef indices[2] = {
+ LLVMConstInt(LLVMInt64TypeInContext(base->gallivm->context), 0, false),
+ offset
+ };
+ LLVMValueRef computed_ptr = LLVMBuildGEP(
+ base->gallivm->builder, base_ptr, indices, 2, "");
+
+ LLVMValueRef result = LLVMBuildLoad(base->gallivm->builder, computed_ptr, "");
+ LLVMSetMetadata(result, 1, si_shader_ctx->const_md);
+ return result;
+}
+
+static LLVMValueRef get_instance_index_for_fetch(
+ struct radeon_llvm_context * radeon_bld,
+ unsigned divisor)
+{
+ struct si_shader_context *si_shader_ctx =
+ si_shader_context(&radeon_bld->soa.bld_base);
+ struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm;
+
+ LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn,
+ si_shader_ctx->param_instance_id);
+ result = LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
+ radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
+
+ if (divisor > 1)
+ result = LLVMBuildUDiv(gallivm->builder, result,
+ lp_build_const_int32(gallivm, divisor), "");
+
+ return result;
+}
+
+static void declare_input_vs(
+ struct si_shader_context * si_shader_ctx,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl)
+{
+ struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
+ unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index];
+
+ unsigned chan;
+
+ LLVMValueRef t_list_ptr;
+ LLVMValueRef t_offset;
+ LLVMValueRef t_list;
+ LLVMValueRef attribute_offset;
+ LLVMValueRef buffer_index;
+ LLVMValueRef args[3];
+ LLVMTypeRef vec4_type;
+ LLVMValueRef input;
+
+ /* Load the T list */
+ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER);
+
+ t_offset = lp_build_const_int32(base->gallivm, input_index);
+
+ t_list = build_indexed_load(si_shader_ctx, t_list_ptr, t_offset);
+
+ /* Build the attribute offset */
+ attribute_offset = lp_build_const_int32(base->gallivm, 0);
+
+ if (divisor) {
+ /* Build index from instance ID, start instance and divisor */
+ si_shader_ctx->shader->shader.uses_instanceid = true;
+ buffer_index = get_instance_index_for_fetch(&si_shader_ctx->radeon_bld, divisor);
+ } else {
+ /* Load the buffer index, which is always stored in VGPR0
+ * for Vertex Shaders */
+ buffer_index = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_vertex_id);
+ }
+
+ vec4_type = LLVMVectorType(base->elem_type, 4);
+ args[0] = t_list;
+ args[1] = attribute_offset;
+ args[2] = buffer_index;
+ input = build_intrinsic(base->gallivm->builder,
+ "llvm.SI.vs.load.input", vec4_type, args, 3,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+
+ /* Break up the vec4 into individual components */
+ for (chan = 0; chan < 4; chan++) {
+ LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
+ /* XXX: Use a helper function for this. There is one in
+ * tgsi_llvm.c. */
+ si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
+ LLVMBuildExtractElement(base->gallivm->builder,
+ input, llvm_chan, "");
+ }
+}
+
+static void declare_input_fs(
+ struct si_shader_context * si_shader_ctx,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl)
+{
+ struct si_shader *shader = &si_shader_ctx->shader->shader;
+ struct lp_build_context * base =
+ &si_shader_ctx->radeon_bld.soa.bld_base.base;
+ struct lp_build_context *uint =
+ &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
+ struct gallivm_state * gallivm = base->gallivm;
+ LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
+ LLVMValueRef main_fn = si_shader_ctx->radeon_bld.main_fn;
+
+ LLVMValueRef interp_param;
+ const char * intr_name;
+
+ /* This value is:
+ * [15:0] NewPrimMask (Bit mask for each quad. It is set it the
+ * quad begins a new primitive. Bit 0 always needs
+ * to be unset)
+ * [32:16] ParamOffset
+ *
+ */
+ LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
+ LLVMValueRef attr_number;
+
+ unsigned chan;
+
+ if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ unsigned soa_index =
+ radeon_llvm_reg_index_soa(input_index, chan);
+ si_shader_ctx->radeon_bld.inputs[soa_index] =
+ LLVMGetParam(main_fn, SI_PARAM_POS_X_FLOAT + chan);
+
+ if (chan == 3)
+ /* RCP for fragcoord.w */
+ si_shader_ctx->radeon_bld.inputs[soa_index] =
+ LLVMBuildFDiv(gallivm->builder,
+ lp_build_const_float(gallivm, 1.0f),
+ si_shader_ctx->radeon_bld.inputs[soa_index],
+ "");
+ }
+ return;
+ }
+
+ if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
+ LLVMValueRef face, is_face_positive;
+
+ face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
+
+ is_face_positive = LLVMBuildFCmp(gallivm->builder,
+ LLVMRealUGT, face,
+ lp_build_const_float(gallivm, 0.0f),
+ "");
+
+ si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
+ LLVMBuildSelect(gallivm->builder,
+ is_face_positive,
+ lp_build_const_float(gallivm, 1.0f),
+ lp_build_const_float(gallivm, 0.0f),
+ "");
+ si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
+ si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
+ lp_build_const_float(gallivm, 0.0f);
+ si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
+ lp_build_const_float(gallivm, 1.0f);
+
+ return;
+ }
+
+ shader->input[input_index].param_offset = shader->ninterp++;
+ attr_number = lp_build_const_int32(gallivm,
+ shader->input[input_index].param_offset);
+
+ switch (decl->Interp.Interpolate) {
+ case TGSI_INTERPOLATE_COLOR:
+ if (si_shader_ctx->shader->key.ps.flatshade) {
+ interp_param = 0;
+ } else {
+ if (decl->Interp.Centroid)
+ interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
+ else
+ interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
+ }
+ break;
+ case TGSI_INTERPOLATE_CONSTANT:
+ interp_param = 0;
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ if (decl->Interp.Centroid)
+ interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTROID);
+ else
+ interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER);
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ if (decl->Interp.Centroid)
+ interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
+ else
+ interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
+ break;
+ default:
+ fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
+ return;
+ }
+
+ intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
+
+ /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
+ if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+ si_shader_ctx->shader->key.ps.color_two_side) {
+ LLVMValueRef args[4];
+ LLVMValueRef face, is_face_positive;
+ LLVMValueRef back_attr_number =
+ lp_build_const_int32(gallivm,
+ shader->input[input_index].param_offset + 1);
+
+ face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
+
+ is_face_positive = LLVMBuildFCmp(gallivm->builder,
+ LLVMRealUGT, face,
+ lp_build_const_float(gallivm, 0.0f),
+ "");
+
+ args[2] = params;
+ args[3] = interp_param;
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
+ unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
+ LLVMValueRef front, back;
+
+ args[0] = llvm_chan;
+ args[1] = attr_number;
+ front = build_intrinsic(base->gallivm->builder, intr_name,
+ input_type, args, args[3] ? 4 : 3,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+
+ args[1] = back_attr_number;
+ back = build_intrinsic(base->gallivm->builder, intr_name,
+ input_type, args, args[3] ? 4 : 3,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+
+ si_shader_ctx->radeon_bld.inputs[soa_index] =
+ LLVMBuildSelect(gallivm->builder,
+ is_face_positive,
+ front,
+ back,
+ "");
+ }
+
+ shader->ninterp++;
+ } else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) {
+ LLVMValueRef args[4];
+
+ args[0] = uint->zero;
+ args[1] = attr_number;
+ args[2] = params;
+ args[3] = interp_param;
+ si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
+ build_intrinsic(base->gallivm->builder, intr_name,
+ input_type, args, args[3] ? 4 : 3,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
+ si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
+ lp_build_const_float(gallivm, 0.0f);
+ si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
+ lp_build_const_float(gallivm, 1.0f);
+ } else {
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ LLVMValueRef args[4];
+ LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
+ unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
+ args[0] = llvm_chan;
+ args[1] = attr_number;
+ args[2] = params;
+ args[3] = interp_param;
+ si_shader_ctx->radeon_bld.inputs[soa_index] =
+ build_intrinsic(base->gallivm->builder, intr_name,
+ input_type, args, args[3] ? 4 : 3,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ }
+ }
+}
+
+static void declare_input(
+ struct radeon_llvm_context * radeon_bld,
+ unsigned input_index,
+ const struct tgsi_full_declaration *decl)
+{
+ struct si_shader_context * si_shader_ctx =
+ si_shader_context(&radeon_bld->soa.bld_base);
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
+ declare_input_vs(si_shader_ctx, input_index, decl);
+ } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ declare_input_fs(si_shader_ctx, input_index, decl);
+ } else {
+ fprintf(stderr, "Warning: Unsupported shader type,\n");
+ }
+}
+
+static void declare_system_value(
+ struct radeon_llvm_context * radeon_bld,
+ unsigned index,
+ const struct tgsi_full_declaration *decl)
+{
+ struct si_shader_context *si_shader_ctx =
+ si_shader_context(&radeon_bld->soa.bld_base);
+ LLVMValueRef value = 0;
+
+ switch (decl->Semantic.Name) {
+ case TGSI_SEMANTIC_INSTANCEID:
+ value = LLVMGetParam(radeon_bld->main_fn,
+ si_shader_ctx->param_instance_id);
+ break;
+
+ case TGSI_SEMANTIC_VERTEXID:
+ value = LLVMGetParam(radeon_bld->main_fn,
+ si_shader_ctx->param_vertex_id);
+ break;
+
+ default:
+ assert(!"unknown system value");
+ return;
+ }
+
+ radeon_bld->system_values[index] = value;
+}
+
+static LLVMValueRef fetch_constant(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type,
+ unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct lp_build_context * base = &bld_base->base;
+ const struct tgsi_ind_register *ireg = ®->Indirect;
+ unsigned buf, idx;
+
+ LLVMValueRef args[2];
+ LLVMValueRef addr;
+ LLVMValueRef result;
+
+ if (swizzle == LP_CHAN_ALL) {
+ unsigned chan;
+ LLVMValueRef values[4];
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
+ values[chan] = fetch_constant(bld_base, reg, type, chan);
+
+ return lp_build_gather_values(bld_base->base.gallivm, values, 4);
+ }
+
+ buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
+ idx = reg->Register.Index * 4 + swizzle;
+
+ if (!reg->Register.Indirect)
+ return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
+
+ args[0] = si_shader_ctx->const_resource[buf];
+ args[1] = lp_build_const_int32(base->gallivm, idx * 4);
+ addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
+ addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
+ addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
+ args[1] = lp_build_add(&bld_base->uint_bld, addr, args[1]);
+
+ result = build_intrinsic(base->gallivm->builder, "llvm.SI.load.const", base->elem_type,
+ args, 2, LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+
+ return bitcast(bld_base, type, result);
+}
+
+/* Initialize arguments for the shader export intrinsic */
+static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
+ struct tgsi_full_declaration *d,
+ unsigned index,
+ unsigned target,
+ LLVMValueRef *args)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct lp_build_context *uint =
+ &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
+ struct lp_build_context *base = &bld_base->base;
+ unsigned compressed = 0;
+ unsigned chan;
+
+ if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+ int cbuf = target - V_008DFC_SQ_EXP_MRT;
+
+ if (cbuf >= 0 && cbuf < 8) {
+ compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
+
+ if (compressed)
+ si_shader_ctx->shader->spi_shader_col_format |=
+ V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf);
+ else
+ si_shader_ctx->shader->spi_shader_col_format |=
+ V_028714_SPI_SHADER_32_ABGR << (4 * cbuf);
+
+ si_shader_ctx->shader->cb_shader_mask |= 0xf << (4 * cbuf);
+ }
+ }
+
+ if (compressed) {
+ /* Pixel shader needs to pack output values before export */
+ for (chan = 0; chan < 2; chan++ ) {
+ LLVMValueRef *out_ptr =
+ si_shader_ctx->radeon_bld.soa.outputs[index];
+ args[0] = LLVMBuildLoad(base->gallivm->builder,
+ out_ptr[2 * chan], "");
+ args[1] = LLVMBuildLoad(base->gallivm->builder,
+ out_ptr[2 * chan + 1], "");
+ args[chan + 5] =
+ build_intrinsic(base->gallivm->builder,
+ "llvm.SI.packf16",
+ LLVMInt32TypeInContext(base->gallivm->context),
+ args, 2,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ args[chan + 7] = args[chan + 5] =
+ LLVMBuildBitCast(base->gallivm->builder,
+ args[chan + 5],
+ LLVMFloatTypeInContext(base->gallivm->context),
+ "");
+ }
+
+ /* Set COMPR flag */
+ args[4] = uint->one;
+ } else {
+ for (chan = 0; chan < 4; chan++ ) {
+ LLVMValueRef out_ptr =
+ si_shader_ctx->radeon_bld.soa.outputs[index][chan];
+ /* +5 because the first output value will be
+ * the 6th argument to the intrinsic. */
+ args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
+ out_ptr, "");
+ }
+
+ /* Clear COMPR flag */
+ args[4] = uint->zero;
+ }
+
+ /* XXX: This controls which components of the output
+ * registers actually get exported. (e.g bit 0 means export
+ * X component, bit 1 means export Y component, etc.) I'm
+ * hard coding this to 0xf for now. In the future, we might
+ * want to do something else. */
+ args[0] = lp_build_const_int32(base->gallivm, 0xf);
+
+ /* Specify whether the EXEC mask represents the valid mask */
+ args[1] = uint->zero;
+
+ /* Specify whether this is the last export */
+ args[2] = uint->zero;
+
+ /* Specify the target we are exporting */
+ args[3] = lp_build_const_int32(base->gallivm, target);
+
+ /* XXX: We probably need to keep track of the output
+ * values, so we know what we are passing to the next
+ * stage. */
+}
+
+static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
+ unsigned index)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+ if (si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
+ LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][3];
+ LLVMValueRef alpha_ref = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_ALPHA_REF);
+
+ LLVMValueRef alpha_pass =
+ lp_build_cmp(&bld_base->base,
+ si_shader_ctx->shader->key.ps.alpha_func,
+ LLVMBuildLoad(gallivm->builder, out_ptr, ""),
+ alpha_ref);
+ LLVMValueRef arg =
+ lp_build_select(&bld_base->base,
+ alpha_pass,
+ lp_build_const_float(gallivm, 1.0f),
+ lp_build_const_float(gallivm, -1.0f));
+
+ build_intrinsic(gallivm->builder,
+ "llvm.AMDGPU.kill",
+ LLVMVoidTypeInContext(gallivm->context),
+ &arg, 1, 0);
+ } else {
+ build_intrinsic(gallivm->builder,
+ "llvm.AMDGPU.kilp",
+ LLVMVoidTypeInContext(gallivm->context),
+ NULL, 0, 0);
+ }
+}
+
+static void si_alpha_to_one(struct lp_build_tgsi_context *bld_base,
+ unsigned index)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+
+ /* set alpha to one */
+ LLVMBuildStore(bld_base->base.gallivm->builder,
+ bld_base->base.one,
+ si_shader_ctx->radeon_bld.soa.outputs[index][3]);
+}
+
+static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef (*pos)[9], unsigned index)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct si_pipe_shader *shader = si_shader_ctx->shader;
+ struct lp_build_context *base = &bld_base->base;
+ struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
+ unsigned reg_index;
+ unsigned chan;
+ unsigned const_chan;
+ LLVMValueRef out_elts[4];
+ LLVMValueRef base_elt;
+ LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm, NUM_PIPE_CONST_BUFFERS);
+ LLVMValueRef const_resource = build_indexed_load(si_shader_ctx, ptr, constbuf_index);
+
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][chan];
+ out_elts[chan] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
+ }
+
+ for (reg_index = 0; reg_index < 2; reg_index ++) {
+ LLVMValueRef *args = pos[2 + reg_index];
+
+ if (!(shader->key.vs.ucps_enabled & (1 << reg_index)))
+ continue;
+
+ shader->shader.clip_dist_write |= 0xf << (4 * reg_index);
+
+ args[5] =
+ args[6] =
+ args[7] =
+ args[8] = lp_build_const_float(base->gallivm, 0.0f);
+
+ /* Compute dot products of position and user clip plane vectors */
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) {
+ args[0] = const_resource;
+ args[1] = lp_build_const_int32(base->gallivm,
+ ((reg_index * 4 + chan) * 4 +
+ const_chan) * 4);
+ base_elt = build_intrinsic(base->gallivm->builder,
+ "llvm.SI.load.const",
+ base->elem_type,
+ args, 2,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ args[5 + chan] =
+ lp_build_add(base, args[5 + chan],
+ lp_build_mul(base, base_elt,
+ out_elts[const_chan]));
+ }
+ }
+
+ args[0] = lp_build_const_int32(base->gallivm, 0xf);
+ args[1] = uint->zero;
+ args[2] = uint->zero;
+ args[3] = lp_build_const_int32(base->gallivm,
+ V_008DFC_SQ_EXP_POS + 2 + reg_index);
+ args[4] = uint->zero;
+ }
+}
+
+static void si_dump_streamout(struct pipe_stream_output_info *so)
+{
+ unsigned i;
+
+ if (so->num_outputs)
+ fprintf(stderr, "STREAMOUT\n");
+
+ for (i = 0; i < so->num_outputs; i++) {
+ unsigned mask = ((1 << so->output[i].num_components) - 1) <<
+ so->output[i].start_component;
+ fprintf(stderr, " %i: BUF%i[%i..%i] <- OUT[%i].%s%s%s%s\n",
+ i, so->output[i].output_buffer,
+ so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
+ so->output[i].register_index,
+ mask & 1 ? "x" : "",
+ mask & 2 ? "y" : "",
+ mask & 4 ? "z" : "",
+ mask & 8 ? "w" : "");
+ }
+}
+
+/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
+ * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
+ * or v4i32 (num_channels=3,4). */
+static void build_tbuffer_store(struct si_shader_context *shader,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ unsigned num_channels,
+ LLVMValueRef vaddr,
+ LLVMValueRef soffset,
+ unsigned inst_offset,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned offen,
+ unsigned idxen,
+ unsigned glc,
+ unsigned slc,
+ unsigned tfe)
+{
+ struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
+ LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef args[] = {
+ rsrc,
+ vdata,
+ LLVMConstInt(i32, num_channels, 0),
+ vaddr,
+ soffset,
+ LLVMConstInt(i32, inst_offset, 0),
+ LLVMConstInt(i32, dfmt, 0),
+ LLVMConstInt(i32, nfmt, 0),
+ LLVMConstInt(i32, offen, 0),
+ LLVMConstInt(i32, idxen, 0),
+ LLVMConstInt(i32, glc, 0),
+ LLVMConstInt(i32, slc, 0),
+ LLVMConstInt(i32, tfe, 0)
+ };
+
+ /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
+ unsigned func = CLAMP(num_channels, 1, 3) - 1;
+ const char *types[] = {"i32", "v2i32", "v4i32"};
+ char name[256];
+ snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
+
+ lp_build_intrinsic(gallivm->builder, name,
+ LLVMVoidTypeInContext(gallivm->context),
+ args, Elements(args));
+}
+
+static void build_streamout_store(struct si_shader_context *shader,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ unsigned num_channels,
+ LLVMValueRef vaddr,
+ LLVMValueRef soffset,
+ unsigned inst_offset)
+{
+ static unsigned dfmt[] = {
+ V_008F0C_BUF_DATA_FORMAT_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32_32_32
+ };
+ assert(num_channels >= 1 && num_channels <= 4);
+
+ build_tbuffer_store(shader, rsrc, vdata, num_channels, vaddr, soffset,
+ inst_offset, dfmt[num_channels-1],
+ V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
+}
+
+/* On SI, the vertex shader is responsible for writing streamout data
+ * to buffers. */
+static void si_llvm_emit_streamout(struct si_shader_context *shader)
+{
+ struct pipe_stream_output_info *so = &shader->shader->selector->so;
+ struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ int i, j;
+ struct lp_build_if_state if_ctx;
+
+ LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+
+ LLVMValueRef so_param =
+ LLVMGetParam(shader->radeon_bld.main_fn,
+ shader->param_streamout_config);
+
+ /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
+ LLVMValueRef so_vtx_count =
+ LLVMBuildAnd(builder,
+ LLVMBuildLShr(builder, so_param,
+ LLVMConstInt(i32, 16, 0), ""),
+ LLVMConstInt(i32, 127, 0), "");
+
+ LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32,
+ NULL, 0, LLVMReadNoneAttribute);
+
+ /* can_emit = tid < so_vtx_count; */
+ LLVMValueRef can_emit =
+ LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
+
+ /* Emit the streamout code conditionally. This actually avoids
+ * out-of-bounds buffer access. The hw tells us via the SGPR
+ * (so_vtx_count) which threads are allowed to emit streamout data. */
+ lp_build_if(&if_ctx, gallivm, can_emit);
+ {
+ /* The buffer offset is computed as follows:
+ * ByteOffset = streamout_offset[buffer_id]*4 +
+ * (streamout_write_index + thread_id)*stride[buffer_id] +
+ * attrib_offset
+ */
+
+ LLVMValueRef so_write_index =
+ LLVMGetParam(shader->radeon_bld.main_fn,
+ shader->param_streamout_write_index);
+
+ /* Compute (streamout_write_index + thread_id). */
+ so_write_index = LLVMBuildAdd(builder, so_write_index, tid, "");
+
+ /* Compute the write offset for each enabled buffer. */
+ LLVMValueRef so_write_offset[4] = {};
+ for (i = 0; i < 4; i++) {
+ if (!so->stride[i])
+ continue;
+
+ LLVMValueRef so_offset = LLVMGetParam(shader->radeon_bld.main_fn,
+ shader->param_streamout_offset[i]);
+ so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(i32, 4, 0), "");
+
+ so_write_offset[i] = LLVMBuildMul(builder, so_write_index,
+ LLVMConstInt(i32, so->stride[i]*4, 0), "");
+ so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, "");
+ }
+
+ LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS] = shader->radeon_bld.soa.outputs;
+
+ /* Write streamout data. */
+ for (i = 0; i < so->num_outputs; i++) {
+ unsigned buf_idx = so->output[i].output_buffer;
+ unsigned reg = so->output[i].register_index;
+ unsigned start = so->output[i].start_component;
+ unsigned num_comps = so->output[i].num_components;
+ LLVMValueRef out[4];
+
+ assert(num_comps && num_comps <= 4);
+ if (!num_comps || num_comps > 4)
+ continue;
+
+ /* Load the output as int. */
+ for (j = 0; j < num_comps; j++) {
+ out[j] = LLVMBuildLoad(builder, outputs[reg][start+j], "");
+ out[j] = LLVMBuildBitCast(builder, out[j], i32, "");
+ }
+
+ /* Pack the output. */
+ LLVMValueRef vdata = NULL;
+
+ switch (num_comps) {
+ case 1: /* as i32 */
+ vdata = out[0];
+ break;
+ case 2: /* as v2i32 */
+ case 3: /* as v4i32 (aligned to 4) */
+ case 4: /* as v4i32 */
+ vdata = LLVMGetUndef(LLVMVectorType(i32, util_next_power_of_two(num_comps)));
+ for (j = 0; j < num_comps; j++) {
+ vdata = LLVMBuildInsertElement(builder, vdata, out[j],
+ LLVMConstInt(i32, j, 0), "");
+ }
+ break;
+ }
+
+ build_streamout_store(shader, shader->so_buffers[buf_idx],
+ vdata, num_comps,
+ so_write_offset[buf_idx],
+ LLVMConstInt(i32, 0, 0),
+ so->output[i].dst_offset*4);
+ }
+ }
+ lp_build_endif(&if_ctx);
+}
+
+
+static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+ struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
+ struct si_shader * shader = &si_shader_ctx->shader->shader;
+ struct lp_build_context * base = &bld_base->base;
+ struct lp_build_context * uint =
+ &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
+ struct tgsi_parse_context *parse = &si_shader_ctx->parse;
+ LLVMValueRef args[9];
+ LLVMValueRef last_args[9] = { 0 };
+ LLVMValueRef pos_args[4][9] = { { 0 } };
+ unsigned semantic_name;
+ unsigned param_count = 0;
+ int depth_index = -1, stencil_index = -1, psize_index = -1, edgeflag_index = -1;
+ int layer_index = -1;
+ int i;
+
+ if (si_shader_ctx->shader->selector->so.num_outputs) {
+ si_llvm_emit_streamout(si_shader_ctx);
+ }
+
+ while (!tgsi_parse_end_of_tokens(parse)) {
+ struct tgsi_full_declaration *d =
+ &parse->FullToken.FullDeclaration;
+ unsigned target;
+ unsigned index;
+
+ tgsi_parse_token(parse);
+
+ if (parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_PROPERTY &&
+ parse->FullToken.FullProperty.Property.PropertyName ==
+ TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS)
+ shader->fs_write_all = TRUE;
+
+ if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
+ continue;
+
+ switch (d->Declaration.File) {
+ case TGSI_FILE_INPUT:
+ i = shader->ninput++;
+ assert(i < Elements(shader->input));
+ shader->input[i].name = d->Semantic.Name;
+ shader->input[i].sid = d->Semantic.Index;
+ shader->input[i].interpolate = d->Interp.Interpolate;
+ shader->input[i].centroid = d->Interp.Centroid;
+ continue;
+
+ case TGSI_FILE_OUTPUT:
+ i = shader->noutput++;
+ assert(i < Elements(shader->output));
+ shader->output[i].name = d->Semantic.Name;
+ shader->output[i].sid = d->Semantic.Index;
+ shader->output[i].interpolate = d->Interp.Interpolate;
+ break;
+
+ default:
+ continue;
+ }
+
+ semantic_name = d->Semantic.Name;
+handle_semantic:
+ for (index = d->Range.First; index <= d->Range.Last; index++) {
+ /* Select the correct target */
+ switch(semantic_name) {
+ case TGSI_SEMANTIC_PSIZE:
+ shader->vs_out_misc_write = true;
+ shader->vs_out_point_size = true;
+ psize_index = index;
+ continue;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ shader->vs_out_misc_write = true;
+ shader->vs_out_edgeflag = true;
+ edgeflag_index = index;
+ continue;
+ case TGSI_SEMANTIC_LAYER:
+ shader->vs_out_misc_write = true;
+ shader->vs_out_layer = true;
+ layer_index = index;
+ continue;
+ case TGSI_SEMANTIC_POSITION:
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
+ target = V_008DFC_SQ_EXP_POS;
+ break;
+ } else {
+ depth_index = index;
+ continue;
+ }
+ case TGSI_SEMANTIC_STENCIL:
+ stencil_index = index;
+ continue;
+ case TGSI_SEMANTIC_COLOR:
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
+ case TGSI_SEMANTIC_BCOLOR:
+ target = V_008DFC_SQ_EXP_PARAM + param_count;
+ shader->output[i].param_offset = param_count;
+ param_count++;
+ } else {
+ target = V_008DFC_SQ_EXP_MRT + shader->output[i].sid;
+ if (si_shader_ctx->shader->key.ps.alpha_to_one) {
+ si_alpha_to_one(bld_base, index);
+ }
+ if (shader->output[i].sid == 0 &&
+ si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
+ si_alpha_test(bld_base, index);
+ }
+ break;
+ case TGSI_SEMANTIC_CLIPDIST:
+ if (!(si_shader_ctx->shader->key.vs.ucps_enabled &
+ (1 << d->Semantic.Index)))
+ continue;
+ shader->clip_dist_write |=
+ d->Declaration.UsageMask << (d->Semantic.Index << 2);
+ target = V_008DFC_SQ_EXP_POS + 2 + d->Semantic.Index;
+ break;
+ case TGSI_SEMANTIC_CLIPVERTEX:
+ si_llvm_emit_clipvertex(bld_base, pos_args, index);
+ continue;
+ case TGSI_SEMANTIC_FOG:
+ case TGSI_SEMANTIC_GENERIC:
+ target = V_008DFC_SQ_EXP_PARAM + param_count;
+ shader->output[i].param_offset = param_count;
+ param_count++;
+ break;
+ default:
+ target = 0;
+ fprintf(stderr,
+ "Warning: SI unhandled output type:%d\n",
+ semantic_name);
+ }
+
+ si_llvm_init_export_args(bld_base, d, index, target, args);
+
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
+ target >= V_008DFC_SQ_EXP_POS &&
+ target <= (V_008DFC_SQ_EXP_POS + 3)) {
+ memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
+ args, sizeof(args));
+ } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT &&
+ semantic_name == TGSI_SEMANTIC_COLOR) {
+ /* If there is an export instruction waiting to be emitted, do so now. */
+ if (last_args[0]) {
+ lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ last_args, 9);
+ }
+
+ /* This instruction will be emitted at the end of the shader. */
+ memcpy(last_args, args, sizeof(args));
+
+ /* Handle FS_COLOR0_WRITES_ALL_CBUFS. */
+ if (shader->fs_write_all && shader->output[i].sid == 0 &&
+ si_shader_ctx->shader->key.ps.nr_cbufs > 1) {
+ for (int c = 1; c < si_shader_ctx->shader->key.ps.nr_cbufs; c++) {
+ si_llvm_init_export_args(bld_base, d, index,
+ V_008DFC_SQ_EXP_MRT + c, args);
+ lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9);
+ }
+ }
+ } else {
+ lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9);
+ }
+ }
+
+ if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
+ semantic_name = TGSI_SEMANTIC_GENERIC;
+ goto handle_semantic;
+ }
+ }
+
+ if (depth_index >= 0 || stencil_index >= 0) {
+ LLVMValueRef out_ptr;
+ unsigned mask = 0;
+
+ /* Specify the target we are exporting */
+ args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
+
+ if (depth_index >= 0) {
+ out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2];
+ args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
+ mask |= 0x1;
+
+ if (stencil_index < 0) {
+ args[6] =
+ args[7] =
+ args[8] = args[5];
+ }
+ }
+
+ if (stencil_index >= 0) {
+ out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1];
+ args[7] =
+ args[8] =
+ args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
+ /* Only setting the stencil component bit (0x2) here
+ * breaks some stencil piglit tests
+ */
+ mask |= 0x3;
+
+ if (depth_index < 0)
+ args[5] = args[6];
+ }
+
+ /* Specify which components to enable */
+ args[0] = lp_build_const_int32(base->gallivm, mask);
+
+ args[1] =
+ args[2] =
+ args[4] = uint->zero;
+
+ if (last_args[0])
+ lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9);
+ else
+ memcpy(last_args, args, sizeof(args));
+ }
+
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
+ unsigned pos_idx = 0;
+
+ /* We need to add the position output manually if it's missing. */
+ if (!pos_args[0][0]) {
+ pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
+ pos_args[0][1] = uint->zero; /* EXEC mask */
+ pos_args[0][2] = uint->zero; /* last export? */
+ pos_args[0][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS);
+ pos_args[0][4] = uint->zero; /* COMPR flag */
+ pos_args[0][5] = base->zero; /* X */
+ pos_args[0][6] = base->zero; /* Y */
+ pos_args[0][7] = base->zero; /* Z */
+ pos_args[0][8] = base->one; /* W */
+ }
+
+ /* Write the misc vector (point size, edgeflag, layer, viewport). */
+ if (shader->vs_out_misc_write) {
+ pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
+ shader->vs_out_point_size |
+ (shader->vs_out_edgeflag << 1) |
+ (shader->vs_out_layer << 2));
+ pos_args[1][1] = uint->zero; /* EXEC mask */
+ pos_args[1][2] = uint->zero; /* last export? */
+ pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
+ pos_args[1][4] = uint->zero; /* COMPR flag */
+ pos_args[1][5] = base->zero; /* X */
+ pos_args[1][6] = base->zero; /* Y */
+ pos_args[1][7] = base->zero; /* Z */
+ pos_args[1][8] = base->zero; /* W */
+
+ if (shader->vs_out_point_size) {
+ pos_args[1][5] = LLVMBuildLoad(base->gallivm->builder,
+ si_shader_ctx->radeon_bld.soa.outputs[psize_index][0], "");
+ }
+
+ if (shader->vs_out_edgeflag) {
+ LLVMValueRef output = LLVMBuildLoad(base->gallivm->builder,
+ si_shader_ctx->radeon_bld.soa.outputs[edgeflag_index][0], "");
+
+ /* The output is a float, but the hw expects an integer
+ * with the first bit containing the edge flag. */
+ output = LLVMBuildFPToUI(base->gallivm->builder, output,
+ bld_base->uint_bld.elem_type, "");
+
+ output = lp_build_min(&bld_base->int_bld, output, bld_base->int_bld.one);
+
+ /* The LLVM intrinsic expects a float. */
+ pos_args[1][6] = LLVMBuildBitCast(base->gallivm->builder, output,
+ base->elem_type, "");
+ }
+
+ if (shader->vs_out_layer) {
+ pos_args[1][7] = LLVMBuildLoad(base->gallivm->builder,
+ si_shader_ctx->radeon_bld.soa.outputs[layer_index][0], "");
+ }
+ }
+
+ for (i = 0; i < 4; i++)
+ if (pos_args[i][0])
+ shader->nr_pos_exports++;
+
+ for (i = 0; i < 4; i++) {
+ if (!pos_args[i][0])
+ continue;
+
+ /* Specify the target we are exporting */
+ pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
+
+ if (pos_idx == shader->nr_pos_exports)
+ /* Specify that this is the last export */
+ pos_args[i][2] = uint->one;
+
+ lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ pos_args[i], 9);
+ }
+ } else {
+ if (!last_args[0]) {
+ /* Specify which components to enable */
+ last_args[0] = lp_build_const_int32(base->gallivm, 0x0);
+
+ /* Specify the target we are exporting */
+ last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
+
+ /* Set COMPR flag to zero to export data as 32-bit */
+ last_args[4] = uint->zero;
+
+ /* dummy bits */
+ last_args[5]= uint->zero;
+ last_args[6]= uint->zero;
+ last_args[7]= uint->zero;
+ last_args[8]= uint->zero;
+
+ si_shader_ctx->shader->spi_shader_col_format |=
+ V_028714_SPI_SHADER_32_ABGR;
+ si_shader_ctx->shader->cb_shader_mask |= S_02823C_OUTPUT0_ENABLE(0xf);
+ }
+
+ /* Specify whether the EXEC mask represents the valid mask */
+ last_args[1] = uint->one;
+
+ /* Specify that this is the last export */
+ last_args[2] = lp_build_const_int32(base->gallivm, 1);
+
+ lp_build_intrinsic(base->gallivm->builder,
+ "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ last_args, 9);
+ }
+}
+
+static const struct lp_build_tgsi_action txf_action;
+
+static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data);
+
+static void tex_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ unsigned opcode = inst->Instruction.Opcode;
+ unsigned target = inst->Texture.Texture;
+ LLVMValueRef coords[4];
+ LLVMValueRef address[16];
+ int ref_pos;
+ unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
+ unsigned count = 0;
+ unsigned chan;
+ unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
+ unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
+
+ if (target == TGSI_TEXTURE_BUFFER) {
+ LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128);
+ LLVMTypeRef v2i128 = LLVMVectorType(i128, 2);
+ LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context);
+ LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
+
+ /* Truncate v32i8 to v16i8. */
+ LLVMValueRef res = si_shader_ctx->resources[sampler_index];
+ res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
+ res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.zero, "");
+ res = LLVMBuildBitCast(gallivm->builder, res, v16i8, "");
+
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+ emit_data->args[0] = res;
+ emit_data->args[1] = bld_base->uint_bld.zero;
+ emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
+ emit_data->arg_count = 3;
+ return;
+ }
+
+ /* Fetch and project texture coordinates */
+ coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+ for (chan = 0; chan < 3; chan++ ) {
+ coords[chan] = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 0,
+ chan);
+ if (opcode == TGSI_OPCODE_TXP)
+ coords[chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_DIV,
+ coords[chan],
+ coords[3]);
+ }
+
+ if (opcode == TGSI_OPCODE_TXP)
+ coords[3] = bld_base->base.one;
+
+ /* Pack LOD bias value */
+ if (opcode == TGSI_OPCODE_TXB)
+ address[count++] = coords[3];
+
+ if (target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_SHADOWCUBE)
+ radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
+
+ /* Pack depth comparison value */
+ switch (target) {
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_SHADOWCUBE:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ assert(ref_pos >= 0);
+ address[count++] = coords[ref_pos];
+ break;
+ case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+ }
+
+ /* Pack user derivatives */
+ if (opcode == TGSI_OPCODE_TXD) {
+ for (chan = 0; chan < 2; chan++) {
+ address[count++] = lp_build_emit_fetch(bld_base, inst, 1, chan);
+ if (num_coords > 1)
+ address[count++] = lp_build_emit_fetch(bld_base, inst, 2, chan);
+ }
+ }
+
+ /* Pack texture coordinates */
+ address[count++] = coords[0];
+ if (num_coords > 1)
+ address[count++] = coords[1];
+ if (num_coords > 2)
+ address[count++] = coords[2];
+
+ /* Pack LOD or sample index */
+ if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
+ address[count++] = coords[3];
+
+ if (count > 16) {
+ assert(!"Cannot handle more than 16 texture address parameters");
+ count = 16;
+ }
+
+ for (chan = 0; chan < count; chan++ ) {
+ address[chan] = LLVMBuildBitCast(gallivm->builder,
+ address[chan],
+ LLVMInt32TypeInContext(gallivm->context),
+ "");
+ }
+
+ /* Adjust the sample index according to FMASK.
+ *
+ * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
+ * which is the identity mapping. Each nibble says which physical sample
+ * should be fetched to get that sample.
+ *
+ * For example, 0x11111100 means there are only 2 samples stored and
+ * the second sample covers 3/4 of the pixel. When reading samples 0
+ * and 1, return physical sample 0 (determined by the first two 0s
+ * in FMASK), otherwise return physical sample 1.
+ *
+ * The sample index should be adjusted as follows:
+ * sample_index = (fmask >> (sample_index * 4)) & 0xF;
+ */
+ if (target == TGSI_TEXTURE_2D_MSAA ||
+ target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ struct lp_build_emit_data txf_emit_data = *emit_data;
+ LLVMValueRef txf_address[4];
+ unsigned txf_count = count;
+
+ memcpy(txf_address, address, sizeof(txf_address));
+
+ if (target == TGSI_TEXTURE_2D_MSAA) {
+ txf_address[2] = bld_base->uint_bld.zero;
+ }
+ txf_address[3] = bld_base->uint_bld.zero;
+
+ /* Pad to a power-of-two size. */
+ while (txf_count < util_next_power_of_two(txf_count))
+ txf_address[txf_count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+
+ /* Read FMASK using TXF. */
+ txf_emit_data.chan = 0;
+ txf_emit_data.dst_type = LLVMVectorType(
+ LLVMInt32TypeInContext(bld_base->base.gallivm->context), 4);
+ txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count);
+ txf_emit_data.args[1] = si_shader_ctx->resources[FMASK_TEX_OFFSET + sampler_index];
+ txf_emit_data.args[2] = lp_build_const_int32(bld_base->base.gallivm,
+ target == TGSI_TEXTURE_2D_MSAA ? TGSI_TEXTURE_2D : TGSI_TEXTURE_2D_ARRAY);
+ txf_emit_data.arg_count = 3;
+
+ build_tex_intrinsic(&txf_action, bld_base, &txf_emit_data);
+
+ /* Initialize some constants. */
+ LLVMValueRef four = LLVMConstInt(uint_bld->elem_type, 4, 0);
+ LLVMValueRef F = LLVMConstInt(uint_bld->elem_type, 0xF, 0);
+
+ /* Apply the formula. */
+ LLVMValueRef fmask =
+ LLVMBuildExtractElement(gallivm->builder,
+ txf_emit_data.output[0],
+ uint_bld->zero, "");
+
+ unsigned sample_chan = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3;
+
+ LLVMValueRef sample_index4 =
+ LLVMBuildMul(gallivm->builder, address[sample_chan], four, "");
+
+ LLVMValueRef shifted_fmask =
+ LLVMBuildLShr(gallivm->builder, fmask, sample_index4, "");
+
+ LLVMValueRef final_sample =
+ LLVMBuildAnd(gallivm->builder, shifted_fmask, F, "");
+
+ /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
+ * resource descriptor is 0 (invalid),
+ */
+ LLVMValueRef fmask_desc =
+ LLVMBuildBitCast(gallivm->builder,
+ si_shader_ctx->resources[FMASK_TEX_OFFSET + sampler_index],
+ LLVMVectorType(uint_bld->elem_type, 8), "");
+
+ LLVMValueRef fmask_word1 =
+ LLVMBuildExtractElement(gallivm->builder, fmask_desc,
+ uint_bld->one, "");
+
+ LLVMValueRef word1_is_nonzero =
+ LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+ fmask_word1, uint_bld->zero, "");
+
+ /* Replace the MSAA sample index. */
+ address[sample_chan] =
+ LLVMBuildSelect(gallivm->builder, word1_is_nonzero,
+ final_sample, address[sample_chan], "");
+ }
+
+ /* Resource */
+ emit_data->args[1] = si_shader_ctx->resources[sampler_index];
+
+ if (opcode == TGSI_OPCODE_TXF) {
+ /* add tex offsets */
+ if (inst->Texture.NumOffsets) {
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ const struct tgsi_texture_offset * off = inst->TexOffsets;
+
+ assert(inst->Texture.NumOffsets == 1);
+
+ switch (target) {
+ case TGSI_TEXTURE_3D:
+ address[2] = lp_build_add(uint_bld, address[2],
+ bld->immediates[off->Index][off->SwizzleZ]);
+ /* fall through */
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ address[1] =
+ lp_build_add(uint_bld, address[1],
+ bld->immediates[off->Index][off->SwizzleY]);
+ /* fall through */
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ address[0] =
+ lp_build_add(uint_bld, address[0],
+ bld->immediates[off->Index][off->SwizzleX]);
+ break;
+ /* texture offsets do not apply to other texture targets */
+ }
+ }
+
+ emit_data->dst_type = LLVMVectorType(
+ LLVMInt32TypeInContext(bld_base->base.gallivm->context),
+ 4);
+
+ emit_data->arg_count = 3;
+ } else {
+ /* Sampler */
+ emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+
+ emit_data->dst_type = LLVMVectorType(
+ LLVMFloatTypeInContext(bld_base->base.gallivm->context),
+ 4);
+
+ emit_data->arg_count = 4;
+ }
+
+ /* Dimensions */
+ emit_data->args[emit_data->arg_count - 1] =
+ lp_build_const_int32(bld_base->base.gallivm, target);
+
+ /* Pad to power of two vector */
+ while (count < util_next_power_of_two(count))
+ address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+
+ emit_data->args[0] = lp_build_gather_values(gallivm, address, count);
+}
+
+static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_context * base = &bld_base->base;
+ char intr_name[127];
+
+ if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
+ emit_data->output[emit_data->chan] = build_intrinsic(
+ base->gallivm->builder,
+ "llvm.SI.vs.load.input", emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ return;
+ }
+
+ sprintf(intr_name, "%sv%ui32", action->intr_name,
+ LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
+
+ emit_data->output[emit_data->chan] = build_intrinsic(
+ base->gallivm->builder, intr_name, emit_data->dst_type,
+ emit_data->args, emit_data->arg_count,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+}
+
+static void txq_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+ if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
+ LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
+ LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
+
+ /* Read the size from the buffer descriptor directly. */
+ LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index];
+ size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
+ size = LLVMBuildExtractElement(gallivm->builder, size,
+ lp_build_const_int32(gallivm, 2), "");
+ emit_data->args[0] = size;
+ return;
+ }
+
+ /* Mip level */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
+
+ /* Resource */
+ emit_data->args[1] = si_shader_ctx->resources[inst->Src[1].Register.Index];
+
+ /* Dimensions */
+ emit_data->args[2] = lp_build_const_int32(bld_base->base.gallivm,
+ inst->Texture.Texture);
+
+ emit_data->arg_count = 3;
+
+ emit_data->dst_type = LLVMVectorType(
+ LLVMInt32TypeInContext(bld_base->base.gallivm->context),
+ 4);
+}
+
+static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
+ /* Just return the buffer size. */
+ emit_data->output[emit_data->chan] = emit_data->args[0];
+ return;
+ }
+
+ build_tgsi_intrinsic_nomem(action, bld_base, emit_data);
+}
+
+#if HAVE_LLVM >= 0x0304
+
+static void si_llvm_emit_ddxy(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_context * base = &bld_base->base;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ unsigned opcode = inst->Instruction.Opcode;
+ LLVMValueRef indices[2];
+ LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+ LLVMValueRef tl, trbl, result[4];
+ LLVMTypeRef i32;
+ unsigned swizzle[4];
+ unsigned c;
+
+ i32 = LLVMInt32TypeInContext(gallivm->context);
+
+ indices[0] = bld_base->uint_bld.zero;
+ indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
+ NULL, 0, LLVMReadNoneAttribute);
+ store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ indices, 2, "");
+
+ indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
+ lp_build_const_int32(gallivm, 0xfffffffc), "");
+ load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ indices, 2, "");
+
+ indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
+ lp_build_const_int32(gallivm,
+ opcode == TGSI_OPCODE_DDX ? 1 : 2),
+ "");
+ load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ indices, 2, "");
+
+ for (c = 0; c < 4; ++c) {
+ unsigned i;
+
+ swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c);
+ for (i = 0; i < c; ++i) {
+ if (swizzle[i] == swizzle[c]) {
+ result[c] = result[i];
+ break;
+ }
+ }
+ if (i != c)
+ continue;
+
+ LLVMBuildStore(gallivm->builder,
+ LLVMBuildBitCast(gallivm->builder,
+ lp_build_emit_fetch(bld_base, inst, 0, c),
+ i32, ""),
+ store_ptr);
+
+ tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
+ tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
+
+ trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
+ trbl = LLVMBuildBitCast(gallivm->builder, trbl, base->elem_type, "");
+
+ result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, "");
+ }
+
+ emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
+}
+
+#endif /* HAVE_LLVM >= 0x0304 */
+
+static const struct lp_build_tgsi_action tex_action = {
+ .fetch_args = tex_fetch_args,
+ .emit = build_tex_intrinsic,
+ .intr_name = "llvm.SI.sample."
+};
+
+static const struct lp_build_tgsi_action txb_action = {
+ .fetch_args = tex_fetch_args,
+ .emit = build_tex_intrinsic,
+ .intr_name = "llvm.SI.sampleb."
+};
+
+#if HAVE_LLVM >= 0x0304
+static const struct lp_build_tgsi_action txd_action = {
+ .fetch_args = tex_fetch_args,
+ .emit = build_tex_intrinsic,
+ .intr_name = "llvm.SI.sampled."
+};
+#endif
+
+static const struct lp_build_tgsi_action txf_action = {
+ .fetch_args = tex_fetch_args,
+ .emit = build_tex_intrinsic,
+ .intr_name = "llvm.SI.imageload."
+};
+
+static const struct lp_build_tgsi_action txl_action = {
+ .fetch_args = tex_fetch_args,
+ .emit = build_tex_intrinsic,
+ .intr_name = "llvm.SI.samplel."
+};
+
+static const struct lp_build_tgsi_action txq_action = {
+ .fetch_args = txq_fetch_args,
+ .emit = build_txq_intrinsic,
+ .intr_name = "llvm.SI.resinfo"
+};
+
+static void create_meta_data(struct si_shader_context *si_shader_ctx)
+{
+ struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+ LLVMValueRef args[3];
+
+ args[0] = LLVMMDStringInContext(gallivm->context, "const", 5);
+ args[1] = 0;
+ args[2] = lp_build_const_int32(gallivm, 1);
+
+ si_shader_ctx->const_md = LLVMMDNodeInContext(gallivm->context, args, 3);
+}
+
+static void create_function(struct si_shader_context *si_shader_ctx)
+{
+ struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMTypeRef params[21], f32, i8, i32, v2i32, v3i32;
+ unsigned i, last_sgpr, num_params;
+
+ i8 = LLVMInt8TypeInContext(gallivm->context);
+ i32 = LLVMInt32TypeInContext(gallivm->context);
+ f32 = LLVMFloatTypeInContext(gallivm->context);
+ v2i32 = LLVMVectorType(i32, 2);
+ v3i32 = LLVMVectorType(i32, 3);
+
+ params[SI_PARAM_CONST] = LLVMPointerType(
+ LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE);
+ /* We assume at most 16 textures per program at the moment.
+ * This need probably need to be changed to support bindless textures */
+ params[SI_PARAM_SAMPLER] = LLVMPointerType(
+ LLVMArrayType(LLVMVectorType(i8, 16), NUM_SAMPLER_VIEWS), CONST_ADDR_SPACE);
+ params[SI_PARAM_RESOURCE] = LLVMPointerType(
+ LLVMArrayType(LLVMVectorType(i8, 32), NUM_SAMPLER_STATES), CONST_ADDR_SPACE);
+
+ switch (si_shader_ctx->type) {
+ case TGSI_PROCESSOR_VERTEX:
+ params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST];
+ params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST];
+ params[SI_PARAM_START_INSTANCE] = i32;
+ num_params = SI_PARAM_START_INSTANCE+1;
+
+ /* The locations of the other parameters are assigned dynamically. */
+
+ /* Streamout SGPRs. */
+ if (si_shader_ctx->shader->selector->so.num_outputs) {
+ params[si_shader_ctx->param_streamout_config = num_params++] = i32;
+ params[si_shader_ctx->param_streamout_write_index = num_params++] = i32;
+ }
+ /* A streamout buffer offset is loaded if the stride is non-zero. */
+ for (i = 0; i < 4; i++) {
+ if (!si_shader_ctx->shader->selector->so.stride[i])
+ continue;
+
+ params[si_shader_ctx->param_streamout_offset[i] = num_params++] = i32;
+ }
+
+ last_sgpr = num_params-1;
+
+ /* VGPRs */
+ params[si_shader_ctx->param_vertex_id = num_params++] = i32;
+ params[num_params++] = i32; /* unused*/
+ params[num_params++] = i32; /* unused */
+ params[si_shader_ctx->param_instance_id = num_params++] = i32;
+ break;
+
+ case TGSI_PROCESSOR_FRAGMENT:
+ params[SI_PARAM_ALPHA_REF] = f32;
+ params[SI_PARAM_PRIM_MASK] = i32;
+ last_sgpr = SI_PARAM_PRIM_MASK;
+ params[SI_PARAM_PERSP_SAMPLE] = v2i32;
+ params[SI_PARAM_PERSP_CENTER] = v2i32;
+ params[SI_PARAM_PERSP_CENTROID] = v2i32;
+ params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
+ params[SI_PARAM_LINEAR_SAMPLE] = v2i32;
+ params[SI_PARAM_LINEAR_CENTER] = v2i32;
+ params[SI_PARAM_LINEAR_CENTROID] = v2i32;
+ params[SI_PARAM_LINE_STIPPLE_TEX] = f32;
+ params[SI_PARAM_POS_X_FLOAT] = f32;
+ params[SI_PARAM_POS_Y_FLOAT] = f32;
+ params[SI_PARAM_POS_Z_FLOAT] = f32;
+ params[SI_PARAM_POS_W_FLOAT] = f32;
+ params[SI_PARAM_FRONT_FACE] = f32;
+ params[SI_PARAM_ANCILLARY] = f32;
+ params[SI_PARAM_SAMPLE_COVERAGE] = f32;
+ params[SI_PARAM_POS_FIXED_PT] = f32;
+ num_params = SI_PARAM_POS_FIXED_PT+1;
+ break;
+
+ default:
+ assert(0 && "unimplemented shader");
+ return;
+ }
+
+ assert(num_params <= Elements(params));
+ radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params);
+ radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
+
+ for (i = 0; i <= last_sgpr; ++i) {
+ LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
+ switch (i) {
+ default:
+ LLVMAddAttribute(P, LLVMInRegAttribute);
+ break;
+#if HAVE_LLVM >= 0x0304
+ /* We tell llvm that array inputs are passed by value to allow Sinking pass
+ * to move load. Inputs are constant so this is fine. */
+ case SI_PARAM_CONST:
+ case SI_PARAM_SAMPLER:
+ case SI_PARAM_RESOURCE:
+ LLVMAddAttribute(P, LLVMByValAttribute);
+ break;
+#endif
+ }
+ }
+
+#if HAVE_LLVM >= 0x0304
+ if (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0)
+ si_shader_ctx->ddxy_lds =
+ LLVMAddGlobalInAddressSpace(gallivm->module,
+ LLVMArrayType(i32, 64),
+ "ddxy_lds",
+ LOCAL_ADDR_SPACE);
+#endif
+}
+
+static void preload_constants(struct si_shader_context *si_shader_ctx)
+{
+ struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ const struct tgsi_shader_info * info = bld_base->info;
+ unsigned buf;
+ LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+
+ for (buf = 0; buf < NUM_CONST_BUFFERS; buf++) {
+ unsigned i, num_const = info->const_file_max[buf] + 1;
+
+ if (num_const == 0)
+ continue;
+
+ /* Allocate space for the constant values */
+ si_shader_ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef));
+
+ /* Load the resource descriptor */
+ si_shader_ctx->const_resource[buf] =
+ build_indexed_load(si_shader_ctx, ptr, lp_build_const_int32(gallivm, buf));
+
+ /* Load the constants, we rely on the code sinking to do the rest */
+ for (i = 0; i < num_const * 4; ++i) {
+ LLVMValueRef args[2] = {
+ si_shader_ctx->const_resource[buf],
+ lp_build_const_int32(gallivm, i * 4)
+ };
+ si_shader_ctx->constants[buf][i] =
+ build_intrinsic(gallivm->builder, "llvm.SI.load.const",
+ bld_base->base.elem_type, args, 2,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ }
+ }
+}
+
+static void preload_samplers(struct si_shader_context *si_shader_ctx)
+{
+ struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ const struct tgsi_shader_info * info = bld_base->info;
+
+ unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
+
+ LLVMValueRef res_ptr, samp_ptr;
+ LLVMValueRef offset;
+
+ if (num_samplers == 0)
+ return;
+
+ /* Allocate space for the values */
+ si_shader_ctx->resources = CALLOC(NUM_SAMPLER_VIEWS, sizeof(LLVMValueRef));
+ si_shader_ctx->samplers = CALLOC(num_samplers, sizeof(LLVMValueRef));
+
+ res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+ samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+
+ /* Load the resources and samplers, we rely on the code sinking to do the rest */
+ for (i = 0; i < num_samplers; ++i) {
+ /* Resource */
+ offset = lp_build_const_int32(gallivm, i);
+ si_shader_ctx->resources[i] = build_indexed_load(si_shader_ctx, res_ptr, offset);
+
+ /* Sampler */
+ offset = lp_build_const_int32(gallivm, i);
+ si_shader_ctx->samplers[i] = build_indexed_load(si_shader_ctx, samp_ptr, offset);
+
+ /* FMASK resource */
+ if (info->is_msaa_sampler[i]) {
+ offset = lp_build_const_int32(gallivm, FMASK_TEX_OFFSET + i);
+ si_shader_ctx->resources[FMASK_TEX_OFFSET + i] =
+ build_indexed_load(si_shader_ctx, res_ptr, offset);
+ }
+ }
+}
+
+static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
+{
+ struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ unsigned i;
+
+ if (!si_shader_ctx->shader->selector->so.num_outputs)
+ return;
+
+ LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_SO_BUFFER);
+
+ /* Load the resources, we rely on the code sinking to do the rest */
+ for (i = 0; i < 4; ++i) {
+ if (si_shader_ctx->shader->selector->so.stride[i]) {
+ LLVMValueRef offset = lp_build_const_int32(gallivm, i);
+
+ si_shader_ctx->so_buffers[i] = build_indexed_load(si_shader_ctx, buf_ptr, offset);
+ }
+ }
+}
+
+int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader,
+ LLVMModuleRef mod)
+{
+ unsigned i;
+ uint32_t *ptr;
+ struct radeon_llvm_binary binary;
+ bool dump = r600_can_dump_shader(&rctx->screen->b,
+ shader->selector ? shader->selector->tokens : NULL);
+ memset(&binary, 0, sizeof(binary));
+ radeon_llvm_compile(mod, &binary,
+ r600_get_llvm_processor_name(rctx->screen->b.family), dump);
+ if (dump && ! binary.disassembled) {
+ fprintf(stderr, "SI CODE:\n");
+ for (i = 0; i < binary.code_size; i+=4 ) {
+ fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3],
+ binary.code[i + 2], binary.code[i + 1],
+ binary.code[i]);
+ }
+ }
+
+ /* XXX: We may be able to emit some of these values directly rather than
+ * extracting fields to be emitted later.
+ */
+ for (i = 0; i < binary.config_size; i+= 8) {
+ unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + i));
+ unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
+ switch (reg) {
+ case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
+ case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
+ case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
+ case R_00B848_COMPUTE_PGM_RSRC1:
+ shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8;
+ shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4;
+ break;
+ case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
+ shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value);
+ break;
+ case R_00B84C_COMPUTE_PGM_RSRC2:
+ shader->lds_size = G_00B84C_LDS_SIZE(value);
+ break;
+ case R_0286CC_SPI_PS_INPUT_ENA:
+ shader->spi_ps_input_ena = value;
+ break;
+ default:
+ fprintf(stderr, "Warning: Compiler emitted unknown "
+ "config register: 0x%x\n", reg);
+ break;
+ }
+ }
+
+ /* copy new shader */
+ r600_resource_reference(&shader->bo, NULL);
+ shader->bo = r600_resource_create_custom(rctx->b.b.screen, PIPE_USAGE_IMMUTABLE,
+ binary.code_size);
+ if (shader->bo == NULL) {
+ return -ENOMEM;
+ }
+
+ ptr = (uint32_t*)rctx->b.ws->buffer_map(shader->bo->cs_buf, rctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+ if (0 /*R600_BIG_ENDIAN*/) {
+ for (i = 0; i < binary.code_size / 4; ++i) {
+ ptr[i] = util_bswap32(*(uint32_t*)(binary.code + i*4));
+ }
+ } else {
+ memcpy(ptr, binary.code, binary.code_size);
+ }
+ rctx->b.ws->buffer_unmap(shader->bo->cs_buf);
+
+ free(binary.code);
+ free(binary.config);
+
+ return 0;
+}
+
+int si_pipe_shader_create(
+ struct pipe_context *ctx,
+ struct si_pipe_shader *shader)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx;
+ struct si_pipe_shader_selector *sel = shader->selector;
+ struct si_shader_context si_shader_ctx;
+ struct tgsi_shader_info shader_info;
+ struct lp_build_tgsi_context * bld_base;
+ LLVMModuleRef mod;
+ int r = 0;
+ bool dump = r600_can_dump_shader(&rctx->screen->b, shader->selector->tokens);
+
+ assert(shader->shader.noutput == 0);
+ assert(shader->shader.ninterp == 0);
+ assert(shader->shader.ninput == 0);
+
+ memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
+ radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
+ bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
+
+ tgsi_scan_shader(sel->tokens, &shader_info);
+
+ shader->shader.uses_kill = shader_info.uses_kill;
+ shader->shader.uses_instanceid = shader_info.uses_instanceid;
+ bld_base->info = &shader_info;
+ bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
+ bld_base->emit_epilogue = si_llvm_emit_epilogue;
+
+ bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
+ bld_base->op_actions[TGSI_OPCODE_TXB] = txb_action;
+#if HAVE_LLVM >= 0x0304
+ bld_base->op_actions[TGSI_OPCODE_TXD] = txd_action;
+#endif
+ bld_base->op_actions[TGSI_OPCODE_TXF] = txf_action;
+ bld_base->op_actions[TGSI_OPCODE_TXL] = txl_action;
+ bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
+ bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action;
+
+#if HAVE_LLVM >= 0x0304
+ bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
+ bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
+#endif
+
+ si_shader_ctx.radeon_bld.load_input = declare_input;
+ si_shader_ctx.radeon_bld.load_system_value = declare_system_value;
+ si_shader_ctx.tokens = sel->tokens;
+ tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
+ si_shader_ctx.shader = shader;
+ si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
+
+ create_meta_data(&si_shader_ctx);
+ create_function(&si_shader_ctx);
+ preload_constants(&si_shader_ctx);
+ preload_samplers(&si_shader_ctx);
+ preload_streamout_buffers(&si_shader_ctx);
+
+ /* Dump TGSI code before doing TGSI->LLVM conversion in case the
+ * conversion fails. */
+ if (dump) {
+ tgsi_dump(sel->tokens, 0);
+ si_dump_streamout(&sel->so);
+ }
+
+ if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
+ fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
+ for (int i = 0; i < NUM_CONST_BUFFERS; i++)
+ FREE(si_shader_ctx.constants[i]);
+ FREE(si_shader_ctx.resources);
+ FREE(si_shader_ctx.samplers);
+ return -EINVAL;
+ }
+
+ radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
+
+ mod = bld_base->base.gallivm->module;
+ r = si_compile_llvm(rctx, shader, mod);
+
+ radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
+ tgsi_parse_free(&si_shader_ctx.parse);
+
+ for (int i = 0; i < NUM_CONST_BUFFERS; i++)
+ FREE(si_shader_ctx.constants[i]);
+ FREE(si_shader_ctx.resources);
+ FREE(si_shader_ctx.samplers);
+
+ return r;
+}
+
+void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
+{
+ r600_resource_reference(&shader->bo, NULL);
+}
--- /dev/null
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Tom Stellard <thomas.stellard@amd.com>
+ * Michel Dänzer <michel.daenzer@amd.com>
+ * Christian König <christian.koenig@amd.com>
+ */
+
+#ifndef SI_SHADER_H
+#define SI_SHADER_H
+
+#include <llvm-c/Core.h> /* LLVMModuleRef */
+
+#define SI_SGPR_CONST 0
+#define SI_SGPR_SAMPLER 2
+#define SI_SGPR_RESOURCE 4
+#define SI_SGPR_VERTEX_BUFFER 6 /* VS only */
+#define SI_SGPR_SO_BUFFER 8 /* VS only, stream-out */
+#define SI_SGPR_START_INSTANCE 10 /* VS only */
+#define SI_SGPR_ALPHA_REF 6 /* PS only */
+
+#define SI_VS_NUM_USER_SGPR 11
+#define SI_PS_NUM_USER_SGPR 7
+
+/* LLVM function parameter indices */
+#define SI_PARAM_CONST 0
+#define SI_PARAM_SAMPLER 1
+#define SI_PARAM_RESOURCE 2
+
+/* VS only parameters */
+#define SI_PARAM_VERTEX_BUFFER 3
+#define SI_PARAM_SO_BUFFER 4
+#define SI_PARAM_START_INSTANCE 5
+/* the other VS parameters are assigned dynamically */
+
+/* PS only parameters */
+#define SI_PARAM_ALPHA_REF 3
+#define SI_PARAM_PRIM_MASK 4
+#define SI_PARAM_PERSP_SAMPLE 5
+#define SI_PARAM_PERSP_CENTER 6
+#define SI_PARAM_PERSP_CENTROID 7
+#define SI_PARAM_PERSP_PULL_MODEL 8
+#define SI_PARAM_LINEAR_SAMPLE 9
+#define SI_PARAM_LINEAR_CENTER 10
+#define SI_PARAM_LINEAR_CENTROID 11
+#define SI_PARAM_LINE_STIPPLE_TEX 12
+#define SI_PARAM_POS_X_FLOAT 13
+#define SI_PARAM_POS_Y_FLOAT 14
+#define SI_PARAM_POS_Z_FLOAT 15
+#define SI_PARAM_POS_W_FLOAT 16
+#define SI_PARAM_FRONT_FACE 17
+#define SI_PARAM_ANCILLARY 18
+#define SI_PARAM_SAMPLE_COVERAGE 19
+#define SI_PARAM_POS_FIXED_PT 20
+
+struct si_shader_io {
+ unsigned name;
+ int sid;
+ unsigned param_offset;
+ unsigned interpolate;
+ bool centroid;
+};
+
+struct si_pipe_shader;
+
+struct si_pipe_shader_selector {
+ struct si_pipe_shader *current;
+
+ struct tgsi_token *tokens;
+ struct pipe_stream_output_info so;
+
+ unsigned num_shaders;
+
+ /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
+ unsigned type;
+
+ /* 1 when the shader contains
+ * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0.
+ * Used to determine whether we need to include nr_cbufs in the key */
+ unsigned fs_write_all;
+};
+
+struct si_shader {
+ unsigned ninput;
+ struct si_shader_io input[40];
+
+ unsigned noutput;
+ struct si_shader_io output[40];
+
+ unsigned ninterp;
+ bool uses_kill;
+ bool uses_instanceid;
+ bool fs_write_all;
+ bool vs_out_misc_write;
+ bool vs_out_point_size;
+ bool vs_out_edgeflag;
+ bool vs_out_layer;
+ unsigned nr_pos_exports;
+ unsigned clip_dist_write;
+};
+
+union si_shader_key {
+ struct {
+ unsigned export_16bpc:8;
+ unsigned nr_cbufs:4;
+ unsigned color_two_side:1;
+ unsigned alpha_func:3;
+ unsigned flatshade:1;
+ unsigned alpha_to_one:1;
+ } ps;
+ struct {
+ unsigned instance_divisors[PIPE_MAX_ATTRIBS];
+ unsigned ucps_enabled:2;
+ } vs;
+};
+
+struct si_pipe_shader {
+ struct si_pipe_shader_selector *selector;
+ struct si_pipe_shader *next_variant;
+ struct si_shader shader;
+ struct si_pm4_state *pm4;
+ struct r600_resource *bo;
+ unsigned num_sgprs;
+ unsigned num_vgprs;
+ unsigned lds_size;
+ unsigned spi_ps_input_ena;
+ unsigned spi_shader_col_format;
+ unsigned cb_shader_mask;
+ bool cb0_is_integer;
+ unsigned sprite_coord_enable;
+ union si_shader_key key;
+};
+
+/* radeonsi_shader.c */
+int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader);
+int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader);
+int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader,
+ LLVMModuleRef mod);
+void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader);
+
+#endif
#include "util/u_format_s3tc.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
-#include "radeonsi_pipe.h"
-#include "radeonsi_shader.h"
+#include "si_pipe.h"
+#include "si_shader.h"
#include "si_state.h"
#include "../radeon/r600_cs.h"
#include "sid.h"
#ifndef SI_STATE_H
#define SI_STATE_H
-#include "radeonsi_pm4.h"
+#include "si_pm4.h"
#include "../radeon/r600_pipe_common.h"
struct si_state_blend {
#include "util/u_framebuffer.h"
#include "util/u_blitter.h"
#include "tgsi/tgsi_parse.h"
-#include "radeonsi_pipe.h"
-#include "radeonsi_shader.h"
+#include "si_pipe.h"
+#include "si_shader.h"
#include "si_state.h"
#include "../radeon/r600_cs.h"
#include "sid.h"
--- /dev/null
+/*
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie <airlied@redhat.com>
+ */
+
+#include "util/u_index_modify.h"
+#include "util/u_upload_mgr.h"
+#include "si_pipe.h"
+
+
+void r600_translate_index_buffer(struct r600_context *r600,
+ struct pipe_index_buffer *ib,
+ unsigned count)
+{
+ struct pipe_resource *out_buffer = NULL;
+ unsigned out_offset;
+ void *ptr;
+
+ switch (ib->index_size) {
+ case 1:
+ u_upload_alloc(r600->b.uploader, 0, count * 2,
+ &out_offset, &out_buffer, &ptr);
+
+ util_shorten_ubyte_elts_to_userptr(
+ &r600->b.b, ib, 0, ib->offset, count, ptr);
+
+ pipe_resource_reference(&ib->buffer, NULL);
+ ib->buffer = out_buffer;
+ ib->offset = out_offset;
+ ib->index_size = 2;
+ break;
+ }
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Authors:
+ * Christian König <christian.koenig@amd.com>
+ *
+ */
+
+#include <sys/types.h>
+#include <assert.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "pipe/p_video_codec.h"
+
+#include "util/u_memory.h"
+#include "util/u_video.h"
+
+#include "vl/vl_defines.h"
+#include "vl/vl_mpeg12_decoder.h"
+
+#include "si_pipe.h"
+#include "radeon/radeon_uvd.h"
+#include "sid.h"
+
+/**
+ * creates an video buffer with an UVD compatible memory layout
+ */
+struct pipe_video_buffer *radeonsi_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *tmpl)
+{
+ struct r600_context *ctx = (struct r600_context *)pipe;
+ struct r600_texture *resources[VL_NUM_COMPONENTS] = {};
+ struct radeon_surface *surfaces[VL_NUM_COMPONENTS] = {};
+ struct pb_buffer **pbs[VL_NUM_COMPONENTS] = {};
+ const enum pipe_format *resource_formats;
+ struct pipe_video_buffer template;
+ struct pipe_resource templ;
+ unsigned i, array_size;
+
+ assert(pipe);
+
+ /* first create the needed resources as "normal" textures */
+ resource_formats = vl_video_buffer_formats(pipe->screen, tmpl->buffer_format);
+ if (!resource_formats)
+ return NULL;
+
+ array_size = tmpl->interlaced ? 2 : 1;
+ template = *tmpl;
+ template.width = align(tmpl->width, VL_MACROBLOCK_WIDTH);
+ template.height = align(tmpl->height / array_size, VL_MACROBLOCK_HEIGHT);
+
+ vl_video_buffer_template(&templ, &template, resource_formats[0], 1, array_size, PIPE_USAGE_STATIC, 0);
+ /* TODO: get tiling working */
+ templ.bind = PIPE_BIND_LINEAR;
+ resources[0] = (struct r600_texture *)
+ pipe->screen->resource_create(pipe->screen, &templ);
+ if (!resources[0])
+ goto error;
+
+ if (resource_formats[1] != PIPE_FORMAT_NONE) {
+ vl_video_buffer_template(&templ, &template, resource_formats[1], 1, array_size, PIPE_USAGE_STATIC, 1);
+ templ.bind = PIPE_BIND_LINEAR;
+ resources[1] = (struct r600_texture *)
+ pipe->screen->resource_create(pipe->screen, &templ);
+ if (!resources[1])
+ goto error;
+ }
+
+ if (resource_formats[2] != PIPE_FORMAT_NONE) {
+ vl_video_buffer_template(&templ, &template, resource_formats[2], 1, array_size, PIPE_USAGE_STATIC, 2);
+ templ.bind = PIPE_BIND_LINEAR;
+ resources[2] = (struct r600_texture *)
+ pipe->screen->resource_create(pipe->screen, &templ);
+ if (!resources[2])
+ goto error;
+ }
+
+ for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
+ if (!resources[i])
+ continue;
+
+ surfaces[i] = & resources[i]->surface;
+ pbs[i] = &resources[i]->resource.buf;
+ }
+
+ ruvd_join_surfaces(ctx->b.ws, templ.bind, pbs, surfaces);
+
+ for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
+ if (!resources[i])
+ continue;
+
+ /* recreate the CS handle */
+ resources[i]->resource.cs_buf = ctx->b.ws->buffer_get_cs_handle(
+ resources[i]->resource.buf);
+ }
+
+ template.height *= array_size;
+ return vl_video_buffer_create_ex2(pipe, &template, (struct pipe_resource **)resources);
+
+error:
+ for (i = 0; i < VL_NUM_COMPONENTS; ++i)
+ pipe_resource_reference((struct pipe_resource **)&resources[i], NULL);
+
+ return NULL;
+}
+
+/* set the decoding target buffer offsets */
+static struct radeon_winsys_cs_handle* radeonsi_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_buffer *buf)
+{
+ struct r600_texture *luma = (struct r600_texture *)buf->resources[0];
+ struct r600_texture *chroma = (struct r600_texture *)buf->resources[1];
+
+ msg->body.decode.dt_field_mode = buf->base.interlaced;
+
+ ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface);
+
+ return luma->resource.cs_buf;
+}
+
+/**
+ * creates an UVD compatible decoder
+ */
+struct pipe_video_codec *radeonsi_uvd_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ)
+{
+ return ruvd_create_decoder(context, templ, radeonsi_uvd_set_dtb);
+}
#include "state_tracker/drm_driver.h"
#include "target-helpers/inline_debug_helper.h"
#include "radeon/drm/radeon_drm_public.h"
-#include "radeonsi/radeonsi_public.h"
+#include "radeonsi/si_public.h"
static struct pipe_screen *
create_screen(int fd)
#include "target-helpers/inline_debug_helper.h"
#include "radeon/drm/radeon_drm_public.h"
#include "radeon/drm/radeon_winsys.h"
-#include "radeonsi/radeonsi_public.h"
+#include "radeonsi/si_public.h"
static struct pipe_screen *create_screen(int fd)
{