+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
#include "util/u_memory.h"
#include "../radeon/r600_cs.h"
#include "si_pipe.h"
#include "si_shader.h"
+#include "sid.h"
#include "radeon_llvm_util.h"
struct pipe_context *ctx,
const struct pipe_compute_state *cso)
{
- struct si_context *rctx = (struct si_context *)ctx;
+ struct si_context *sctx = (struct si_context *)ctx;
struct si_pipe_compute *program =
CALLOC_STRUCT(si_pipe_compute);
const struct pipe_llvm_program_header *header;
header = cso->prog;
code = cso->prog + sizeof(struct pipe_llvm_program_header);
- program->ctx = rctx;
+ program->ctx = sctx;
program->local_size = cso->req_local_mem;
program->private_size = cso->req_private_mem;
program->input_size = cso->req_input_mem;
for (i = 0; i < program->num_kernels; i++) {
LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
code, header->num_bytes);
- si_compile_llvm(rctx, &program->kernels[i], mod);
+ si_compile_llvm(sctx, &program->kernels[i], mod);
LLVMDisposeModule(mod);
}
static void si_bind_compute_state(struct pipe_context *ctx, void *state)
{
- struct si_context *rctx = (struct si_context*)ctx;
- rctx->cs_shader_state.program = (struct si_pipe_compute*)state;
+ struct si_context *sctx = (struct si_context*)ctx;
+ sctx->cs_shader_state.program = (struct si_pipe_compute*)state;
}
static void si_set_global_binding(
uint32_t **handles)
{
unsigned i;
- struct si_context *rctx = (struct si_context*)ctx;
- struct si_pipe_compute *program = rctx->cs_shader_state.program;
+ struct si_context *sctx = (struct si_context*)ctx;
+ struct si_pipe_compute *program = sctx->cs_shader_state.program;
if (!resources) {
for (i = first; i < first + n; i++) {
for (i = first; i < first + n; i++) {
uint64_t va;
+ uint32_t offset;
program->global_buffers[i] = resources[i];
va = r600_resource_va(ctx->screen, resources[i]);
+ offset = util_le32_to_cpu(*handles[i]);
+ va += offset;
+ va = util_cpu_to_le64(va);
memcpy(handles[i], &va, sizeof(va));
}
}
const uint *block_layout, const uint *grid_layout,
uint32_t pc, const void *input)
{
- struct si_context *rctx = (struct si_context*)ctx;
- struct si_pipe_compute *program = rctx->cs_shader_state.program;
+ struct si_context *sctx = (struct si_context*)ctx;
+ struct si_pipe_compute *program = sctx->cs_shader_state.program;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
struct r600_resource *kernel_args_buffer = NULL;
unsigned kernel_args_size;
memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size);
- si_upload_const_buffer(rctx, &kernel_args_buffer, (uint8_t*)kernel_args,
+ si_upload_const_buffer(sctx, &kernel_args_buffer, (uint8_t*)kernel_args,
kernel_args_size, &kernel_args_offset);
kernel_args_va = r600_resource_va(ctx->screen,
(struct pipe_resource*)kernel_args_buffer);
kernel_args_va += kernel_args_offset;
- si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ);
+ si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
if (!buffer) {
continue;
}
- si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE);
+ si_pm4_add_bo(pm4, buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
}
/* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
* kernel if we want to use something other than the default value,
* which is now 0x22f.
*/
- if (rctx->b.chip_class <= SI) {
+ if (sctx->b.chip_class <= SI) {
/* XXX: This should be:
* (number of compute units) * 4 * (waves per simd) - 1 */
}
shader_va = r600_resource_va(ctx->screen, (void *)shader->bo);
- si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
* the shader and 4 bytes allocated by the state tracker, then
* we will set LDS_SIZE to 512 bytes rather than 256.
*/
- if (rctx->b.chip_class <= SI) {
+ if (sctx->b.chip_class <= SI) {
lds_blocks += align(program->local_size, 256) >> 8;
} else {
lds_blocks += align(program->local_size, 512) >> 9;
si_pm4_inval_shader_cache(pm4);
si_cmd_surface_sync(pm4, pm4->cp_coher_cntl);
- si_pm4_emit(rctx, pm4);
+ si_pm4_emit(sctx, pm4);
#if 0
- fprintf(stderr, "cdw: %i\n", rctx->cs->cdw);
- for (i = 0; i < rctx->cs->cdw; i++) {
- fprintf(stderr, "%4i : 0x%08X\n", i, rctx->cs->buf[i]);
+ fprintf(stderr, "cdw: %i\n", sctx->cs->cdw);
+ for (i = 0; i < sctx->cs->cdw; i++) {
+ fprintf(stderr, "%4i : 0x%08X\n", i, sctx->cs->buf[i]);
}
#endif
unsigned start, unsigned count,
struct pipe_surface ** surfaces) { }
-void si_init_compute_functions(struct si_context *rctx)
+void si_init_compute_functions(struct si_context *sctx)
{
- rctx->b.b.create_compute_state = si_create_compute_state;
- rctx->b.b.delete_compute_state = si_delete_compute_state;
- rctx->b.b.bind_compute_state = si_bind_compute_state;
+ sctx->b.b.create_compute_state = si_create_compute_state;
+ sctx->b.b.delete_compute_state = si_delete_compute_state;
+ sctx->b.b.bind_compute_state = si_bind_compute_state;
/* ctx->context.create_sampler_view = evergreen_compute_create_sampler_view; */
- rctx->b.b.set_compute_resources = si_set_compute_resources;
- rctx->b.b.set_global_binding = si_set_global_binding;
- rctx->b.b.launch_grid = si_launch_grid;
+ sctx->b.b.set_compute_resources = si_set_compute_resources;
+ sctx->b.b.set_global_binding = si_set_global_binding;
+ sctx->b.b.launch_grid = si_launch_grid;
}