#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_dump.h"
#include "util/u_memory.h"
+#include "util/u_math.h"
#include <stdio.h>
#include <errno.h>
-#include <byteswap.h>
/* CAYMAN notes
Why CAYMAN got loops for lots of instructions is explained here.
int r, i;
uint32_t *ptr;
bool dump = r600_can_dump_shader(rctx->screen, tgsi_get_processor_type(sel->tokens));
- unsigned use_sb = rctx->screen->debug_flags & DBG_SB;
+ unsigned use_sb = !(rctx->screen->debug_flags & DBG_NO_SB);
unsigned sb_disasm = use_sb || (rctx->screen->debug_flags & DBG_SB_DISASM);
shader->shader.bc.isa = rctx->isa;
ptr = r600_buffer_mmap_sync_with_rings(rctx, shader->bo, PIPE_TRANSFER_WRITE);
if (R600_BIG_ENDIAN) {
for (i = 0; i < shader->shader.bc.ndw; ++i) {
- ptr[i] = bswap_32(shader->shader.bc.bytecode[i]);
+ ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]);
}
} else {
memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
}
- rctx->ws->buffer_unmap(shader->bo->cs_buf);
+ rctx->b.ws->buffer_unmap(shader->bo->cs_buf);
}
/* Build state. */
switch (shader->shader.processor_type) {
case TGSI_PROCESSOR_VERTEX:
- if (rctx->chip_class >= EVERGREEN) {
+ if (rctx->b.chip_class >= EVERGREEN) {
evergreen_update_vs_state(ctx, shader);
} else {
r600_update_vs_state(ctx, shader);
}
break;
case TGSI_PROCESSOR_FRAGMENT:
- if (rctx->chip_class >= EVERGREEN) {
+ if (rctx->b.chip_class >= EVERGREEN) {
evergreen_update_ps_state(ctx, shader);
} else {
r600_update_ps_state(ctx, shader);
static int tgsi_endloop(struct r600_shader_ctx *ctx);
static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
-#ifdef HAVE_OPENCL
-int r600_compute_shader_create(struct pipe_context * ctx,
- LLVMModuleRef mod, struct r600_bytecode * bytecode)
-{
- struct r600_context *r600_ctx = (struct r600_context *)ctx;
- struct r600_shader_ctx shader_ctx;
- boolean use_kill = false;
- bool dump = (r600_ctx->screen->debug_flags & DBG_CS) != 0;
- unsigned use_sb = r600_ctx->screen->debug_flags & DBG_SB_CS;
- unsigned sb_disasm = use_sb ||
- (r600_ctx->screen->debug_flags & DBG_SB_DISASM);
-
- shader_ctx.bc = bytecode;
- r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family,
- r600_ctx->screen->has_compressed_msaa_texturing);
- shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
- shader_ctx.bc->isa = r600_ctx->isa;
- r600_llvm_compile(mod, r600_ctx->family,
- shader_ctx.bc, &use_kill, dump);
-
- if (dump && !sb_disasm) {
- r600_bytecode_disasm(shader_ctx.bc);
- } else if ((dump && sb_disasm) || use_sb) {
- if (r600_sb_bytecode_process(r600_ctx, shader_ctx.bc, NULL, dump, use_sb))
- R600_ERR("r600_sb_bytecode_process failed!\n");
- }
-
- return 1;
-}
-
-#endif /* HAVE_OPENCL */
-
static int tgsi_is_supported(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction;
unsigned output_done, noutput;
unsigned opcode;
int i, j, k, r = 0;
- int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0;
+ int next_pos_base = 60, next_param_base = 0;
+ int max_color_exports = MAX2(key.nr_cbufs, 1);
/* Declarations used by llvm code */
bool use_llvm = false;
bool indirect_gprs;
ctx.shader = shader;
ctx.native_integers = true;
- r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family,
+ r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
rscreen->has_compressed_msaa_texturing);
ctx.tokens = tokens;
tgsi_scan_shader(tokens, &ctx.info);
radeon_llvm_ctx.face_gpr = ctx.face_gpr;
radeon_llvm_ctx.r600_inputs = ctx.shader->input;
radeon_llvm_ctx.r600_outputs = ctx.shader->output;
- radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
+ radeon_llvm_ctx.color_buffer_count = max_color_exports;
radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
- radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
+ radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
radeon_llvm_ctx.stream_outputs = &so;
radeon_llvm_ctx.clip_vertex = ctx.cv_output;
radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp;
- if (r600_llvm_compile(mod, rscreen->family, ctx.bc, &use_kill, dump)) {
+ if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) {
radeon_llvm_dispose(&radeon_llvm_ctx);
use_llvm = 0;
fprintf(stderr, "R600 LLVM backend failed to compile "
#endif
/* End of LLVM backend setup */
- if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN)
+ if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
shader->nr_ps_max_color_exports = 8;
if (!use_llvm) {
case TGSI_PROCESSOR_FRAGMENT:
if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
/* never export more colors than the number of CBs */
- if (next_pixel_base && next_pixel_base >= key.nr_cbufs) {
+ if (shader->output[i].sid >= max_color_exports) {
/* skip export */
j--;
continue;
}
output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
- output[j].array_base = next_pixel_base++;
+ output[j].array_base = shader->output[i].sid;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
shader->nr_ps_color_exports++;
- if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) {
- for (k = 1; k < key.nr_cbufs; k++) {
+ if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) {
+ for (k = 1; k < max_color_exports; k++) {
j++;
memset(&output[j], 0, sizeof(struct r600_bytecode_output));
output[j].gpr = shader->output[i].gpr;
output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
output[j].burst_count = 1;
output[j].barrier = 1;
- output[j].array_base = next_pixel_base++;
+ output[j].array_base = k;
output[j].op = CF_OP_EXPORT;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
shader->nr_ps_color_exports++;
}
/* add fake pixel export */
- if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) {
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) {
memset(&output[j], 0, sizeof(struct r600_bytecode_output));
output[j].gpr = 0;
output[j].elem_size = 3;
alu.src[0].sel = V_SQ_ALU_SRC_0;
- if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) {
+ if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILL) {
alu.src[1].sel = V_SQ_ALU_SRC_1;
alu.src[1].neg = 1;
} else {
{TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig},
{TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
{TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_KILP, 0, ALU_OP2_KILLGT, tgsi_kill}, /* predicated kill */
+ {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
{TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{105, 0, ALU_OP0_NOP, tgsi_unsupported},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {108, 0, ALU_OP0_NOP, tgsi_unsupported},
- {109, 0, ALU_OP0_NOP, tgsi_unsupported},
- {110, 0, ALU_OP0_NOP, tgsi_unsupported},
- {111, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{114, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
+ {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
/* gap */
{118, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig},
{TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
{TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_KILP, 0, ALU_OP2_KILLGT, tgsi_kill}, /* predicated kill */
+ {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
{TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{105, 0, ALU_OP0_NOP, tgsi_unsupported},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {108, 0, ALU_OP0_NOP, tgsi_unsupported},
- {109, 0, ALU_OP0_NOP, tgsi_unsupported},
- {110, 0, ALU_OP0_NOP, tgsi_unsupported},
- {111, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{114, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
+ {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
/* gap */
{118, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_COS, 0, ALU_OP1_COS, cayman_trig},
{TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
{TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
- {TGSI_OPCODE_KILP, 0, ALU_OP2_KILLGT, tgsi_kill}, /* predicated kill */
+ {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */
{TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
- {108, 0, ALU_OP0_NOP, tgsi_unsupported},
- {109, 0, ALU_OP0_NOP, tgsi_unsupported},
- {110, 0, ALU_OP0_NOP, tgsi_unsupported},
- {111, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
+ {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
+ {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
{TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{114, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
+ {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
/* gap */
{118, 0, ALU_OP0_NOP, tgsi_unsupported},