X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr600%2Fr600_shader.c;h=f0d3be405d23ae089f1317cb120261c807fa3eb6;hb=2cd769179345799d383f92dd615991755ec24be1;hp=8a80dba38a42c504f3fa60de5fe4ce62a6fe43cb;hpb=c108831d4451f624167d2c433282c6ac63541a79;p=mesa.git diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 8a80dba38a4..f0d3be405d2 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -24,6 +24,7 @@ #include "r600_llvm.h" #include "r600_formats.h" #include "r600_opcodes.h" +#include "r600_shader.h" #include "r600d.h" #include "pipe/p_shader_tokens.h" @@ -57,44 +58,126 @@ issued in the w slot as well. The compiler must issue the source argument to slots z, y, and x */ -static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) +static int r600_shader_from_tgsi(struct r600_screen *rscreen, + struct r600_pipe_shader *pipeshader, + struct r600_shader_key key); + +static unsigned tgsi_get_processor_type(const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in %s:%i!\n", __func__, __LINE__); + return ~0; + } + return parse.FullHeader.Processor.Processor; +} + +static bool r600_can_dump_shader(struct r600_screen *rscreen, unsigned processor_type) +{ + switch (processor_type) { + case TGSI_PROCESSOR_VERTEX: + return (rscreen->debug_flags & DBG_VS) != 0; + case TGSI_PROCESSOR_GEOMETRY: + return (rscreen->debug_flags & DBG_GS) != 0; + case TGSI_PROCESSOR_FRAGMENT: + return (rscreen->debug_flags & DBG_PS) != 0; + case TGSI_PROCESSOR_COMPUTE: + return (rscreen->debug_flags & DBG_CS) != 0; + default: + return false; + } +} + +static void r600_dump_streamout(struct pipe_stream_output_info *so) +{ + unsigned i; + + fprintf(stderr, "STREAMOUT\n"); + for (i = 0; i < so->num_outputs; i++) { + unsigned mask = ((1 << so->output[i].num_components) - 1) << + so->output[i].start_component; + fprintf(stderr, " %i: MEM_STREAM0_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", + i, so->output[i].output_buffer, + so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, + so->output[i].register_index, + mask & 1 ? "x" : "", + mask & 2 ? "y" : "", + mask & 4 ? "z" : "", + mask & 8 ? "w" : "", + so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); + } +} + +int r600_pipe_shader_create(struct pipe_context *ctx, + struct r600_pipe_shader *shader, + struct r600_shader_key key) { struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_shader *rshader = &shader->shader; + struct r600_pipe_shader_selector *sel = shader->selector; + int r, i; uint32_t *ptr; - int i; + bool dump = r600_can_dump_shader(rctx->screen, tgsi_get_processor_type(sel->tokens)); + + shader->shader.bc.isa = rctx->isa; + + if (dump) { + fprintf(stderr, "--------------------------------------------------------------\n"); + tgsi_dump(sel->tokens, 0); - /* copy new shader */ + if (sel->so.num_outputs) { + r600_dump_streamout(&sel->so); + } + } + r = r600_shader_from_tgsi(rctx->screen, shader, key); + if (r) { + R600_ERR("translation from TGSI failed !\n"); + return r; + } + r = r600_bytecode_build(&shader->shader.bc); + if (r) { + R600_ERR("building bytecode failed !\n"); + return r; + } + if (dump) { + fprintf(stderr, "--------------------------------------------------------------\n"); + r600_bytecode_disasm(&shader->shader.bc); + fprintf(stderr, "______________________________________________________________\n"); + } + + + /* Store the shader in a buffer. */ if (shader->bo == NULL) { shader->bo = (struct r600_resource*) - pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4); + pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); if (shader->bo == NULL) { return -ENOMEM; } - ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); + ptr = r600_buffer_mmap_sync_with_rings(rctx, shader->bo, PIPE_TRANSFER_WRITE); if (R600_BIG_ENDIAN) { - for (i = 0; i < rshader->bc.ndw; ++i) { - ptr[i] = bswap_32(rshader->bc.bytecode[i]); + for (i = 0; i < shader->shader.bc.ndw; ++i) { + ptr[i] = bswap_32(shader->shader.bc.bytecode[i]); } } else { - memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); + memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); } rctx->ws->buffer_unmap(shader->bo->cs_buf); } - /* build state */ - switch (rshader->processor_type) { + + /* Build state. */ + switch (shader->shader.processor_type) { case TGSI_PROCESSOR_VERTEX: if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_shader_vs(ctx, shader); + evergreen_update_vs_state(ctx, shader); } else { - r600_pipe_shader_vs(ctx, shader); + r600_update_vs_state(ctx, shader); } break; case TGSI_PROCESSOR_FRAGMENT: if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_shader_ps(ctx, shader); + evergreen_update_ps_state(ctx, shader); } else { - r600_pipe_shader_ps(ctx, shader); + r600_update_ps_state(ctx, shader); } break; default: @@ -103,61 +186,11 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s return 0; } -static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader); - -int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) -{ - static int dump_shaders = -1; - struct r600_context *rctx = (struct r600_context *)ctx; - int r; - - /* Would like some magic "get_bool_option_once" routine. - */ - if (dump_shaders == -1) - dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); - - if (dump_shaders) { - fprintf(stderr, "--------------------------------------------------------------\n"); - tgsi_dump(shader->tokens, 0); - - if (shader->so.num_outputs) { - unsigned i; - fprintf(stderr, "STREAMOUT\n"); - for (i = 0; i < shader->so.num_outputs; i++) { - unsigned mask = ((1 << shader->so.output[i].num_components) - 1) << - shader->so.output[i].start_component; - fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i, - shader->so.output[i].output_buffer, shader->so.output[i].register_index, - mask & 1 ? "x" : "_", - (mask >> 1) & 1 ? "y" : "_", - (mask >> 2) & 1 ? "z" : "_", - (mask >> 3) & 1 ? "w" : "_"); - } - } - } - r = r600_shader_from_tgsi(rctx, shader); - if (r) { - R600_ERR("translation from TGSI failed !\n"); - return r; - } - r = r600_bytecode_build(&shader->shader.bc); - if (r) { - R600_ERR("building bytecode failed !\n"); - return r; - } - if (dump_shaders) { - r600_bytecode_dump(&shader->shader.bc); - fprintf(stderr, "______________________________________________________________\n"); - } - return r600_pipe_shader(ctx, shader); -} - void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) { pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); r600_bytecode_clear(&shader->shader.bc); - - memset(&shader->shader,0,sizeof(struct r600_shader)); + r600_release_command_buffer(&shader->command_buffer); } /* @@ -171,6 +204,7 @@ struct r600_shader_src { unsigned neg; unsigned abs; unsigned rel; + unsigned kc_bank; uint32_t value[4]; }; @@ -188,6 +222,7 @@ struct r600_shader_ctx { uint32_t *literals; uint32_t nliterals; uint32_t max_driver_temp_used; + boolean use_llvm; /* needed for evergreen interpolation */ boolean input_centroid; boolean input_linear; @@ -204,13 +239,13 @@ struct r600_shader_ctx { struct r600_shader_tgsi_instruction { unsigned tgsi_opcode; unsigned is_op3; - unsigned r600_opcode; + unsigned op; int (*process)(struct r600_shader_ctx *ctx); }; static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); -static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only); +static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); static int tgsi_else(struct r600_shader_ctx *ctx); static int tgsi_endif(struct r600_shader_ctx *ctx); @@ -236,26 +271,41 @@ int r600_compute_shader_create(struct pipe_context * ctx, unsigned char * bytes; unsigned byte_count; struct r600_shader_ctx shader_ctx; - unsigned dump = 0; - - if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { - dump = 1; - } + boolean use_kill = false; + bool dump = (r600_ctx->screen->debug_flags & DBG_CS) != 0; - r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump); shader_ctx.bc = bytecode; - r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family); + r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family, + r600_ctx->screen->msaa_texture_support); shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE; + shader_ctx.bc->isa = r600_ctx->isa; + r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family, + shader_ctx.bc, &use_kill, dump); r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count); + if (shader_ctx.bc->chip_class == CAYMAN) { + cm_bytecode_add_cf_end(shader_ctx.bc); + } r600_bytecode_build(shader_ctx.bc); if (dump) { - r600_bytecode_dump(shader_ctx.bc); + r600_bytecode_disasm(shader_ctx.bc); } + free(bytes); return 1; } #endif /* HAVE_OPENCL */ +static uint32_t i32_from_byte_stream(unsigned char * bytes, + unsigned * bytes_read) +{ + unsigned i; + uint32_t out = 0; + for (i = 0; i < 4; i++) { + out |= bytes[(*bytes_read)++] << (8 * i); + } + return out; +} + static unsigned r600_src_from_byte_stream(unsigned char * bytes, unsigned bytes_read, struct r600_bytecode_alu * alu, unsigned src_idx) { @@ -278,34 +328,83 @@ static unsigned r600_src_from_byte_stream(unsigned char * bytes, static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx, unsigned char * bytes, unsigned bytes_read) { - unsigned src_idx; - unsigned inst0, inst1; + unsigned src_idx, src_num; struct r600_bytecode_alu alu; + unsigned src_use_sel[3]; + const struct alu_op_info *alu_op; + unsigned src_sel[3] = {}; + uint32_t word0, word1; + + src_num = bytes[bytes_read++]; + memset(&alu, 0, sizeof(alu)); - for(src_idx = 0; src_idx < 3; src_idx++) { - bytes_read = r600_src_from_byte_stream(bytes, bytes_read, - &alu, src_idx); - } - - alu.dst.sel = bytes[bytes_read++]; - alu.dst.chan = bytes[bytes_read++]; - alu.dst.clamp = bytes[bytes_read++]; - alu.dst.write = bytes[bytes_read++]; - alu.dst.rel = bytes[bytes_read++]; - inst0 = bytes[bytes_read++]; - inst1 = bytes[bytes_read++]; - alu.inst = inst0 | (inst1 << 8); - alu.last = bytes[bytes_read++]; - alu.is_op3 = bytes[bytes_read++]; - alu.predicate = bytes[bytes_read++]; - alu.bank_swizzle = bytes[bytes_read++]; - alu.bank_swizzle_force = bytes[bytes_read++]; - alu.omod = bytes[bytes_read++]; - alu.index_mode = bytes[bytes_read++]; - r600_bytecode_add_alu(ctx->bc, &alu); + for(src_idx = 0; src_idx < src_num; src_idx++) { + unsigned i; + src_use_sel[src_idx] = bytes[bytes_read++]; + for (i = 0; i < 4; i++) { + src_sel[src_idx] |= bytes[bytes_read++] << (i * 8); + } + for (i = 0; i < 4; i++) { + alu.src[src_idx].value |= bytes[bytes_read++] << (i * 8); + } + } + + word0 = i32_from_byte_stream(bytes, &bytes_read); + word1 = i32_from_byte_stream(bytes, &bytes_read); + + switch(ctx->bc->chip_class) { + default: + case R600: + r600_bytecode_alu_read(ctx->bc, &alu, word0, word1); + break; + case R700: + case EVERGREEN: + case CAYMAN: + r700_bytecode_alu_read(ctx->bc, &alu, word0, word1); + break; + } + + for(src_idx = 0; src_idx < src_num; src_idx++) { + if (src_use_sel[src_idx]) { + unsigned sel = src_sel[src_idx]; + + alu.src[src_idx].chan = sel & 3; + sel >>= 2; + + if (sel>=512) { /* constant */ + sel -= 512; + alu.src[src_idx].kc_bank = sel >> 12; + alu.src[src_idx].sel = (sel & 4095) + 512; + } + else { + alu.src[src_idx].sel = sel; + } + } + } + + alu_op = r600_isa_alu(alu.op); + +#if HAVE_LLVM < 0x0302 + if ((alu_op->flags & AF_PRED) && alu_op->src_count == 2) { + alu.update_pred = 1; + alu.dst.write = 0; + alu.src[1].sel = V_SQ_ALU_SRC_0; + alu.src[1].chan = 0; + alu.last = 1; + } +#endif + + if (alu_op->flags & AF_MOVA) { + ctx->bc->ar_reg = alu.src[0].sel; + ctx->bc->ar_chan = alu.src[0].chan; + ctx->bc->ar_loaded = 0; + return bytes_read; + } + + r600_bytecode_add_alu_type(ctx->bc, &alu, ctx->bc->cf_last->op); /* XXX: Handle other KILL instructions */ - if (alu.inst == CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT)) { + if (alu_op->flags & AF_KILL) { ctx->shader->uses_kill = 1; /* XXX: This should be enforced in the LLVM backend. */ ctx->bc->force_add_cf = 1; @@ -313,25 +412,14 @@ static unsigned r600_alu_from_byte_stream(struct r600_shader_ctx *ctx, return bytes_read; } -static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu, - unsigned pred_inst) +static void llvm_if(struct r600_shader_ctx *ctx) { - alu->inst = pred_inst; - alu->predicate = 1; - alu->dst.write = 0; - alu->src[1].sel = V_SQ_ALU_SRC_0; - alu->src[1].chan = 0; - alu->last = 1; - r600_bytecode_add_alu_type(ctx->bc, alu, - CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); - - r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); + r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); fc_pushlevel(ctx, FC_IF); - callstack_check_depth(ctx, FC_PUSH_VPM, 0); + callstack_push(ctx, FC_PUSH_VPM); } -static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx, - struct r600_bytecode_alu *alu, unsigned compare_opcode) +static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx) { unsigned opcode = TGSI_OPCODE_BRK; if (ctx->bc->chip_class == CAYMAN) @@ -340,7 +428,7 @@ static void r600_break_from_byte_stream(struct r600_shader_ctx *ctx, ctx->inst_info = &eg_shader_tgsi_instruction[opcode]; else ctx->inst_info = &r600_shader_tgsi_instruction[opcode]; - llvm_if(ctx, alu, compare_opcode); + llvm_if(ctx); tgsi_loop_brk_cont(ctx); tgsi_endif(ctx); } @@ -354,31 +442,25 @@ static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx, bytes_read = r600_src_from_byte_stream(bytes, bytes_read, &alu, 0); inst = bytes[bytes_read++]; switch (inst) { - case 0: - llvm_if(ctx, &alu, - CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); + case 0: /* IF_PREDICATED */ + llvm_if(ctx); break; - case 1: + case 1: /* ELSE */ tgsi_else(ctx); break; - case 2: + case 2: /* ENDIF */ tgsi_endif(ctx); break; - case 3: + case 3: /* BGNLOOP */ tgsi_bgnloop(ctx); break; - case 4: + case 4: /* ENDLOOP */ tgsi_endloop(ctx); break; - case 5: - r600_break_from_byte_stream(ctx, &alu, - CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE)); - break; - case 6: - r600_break_from_byte_stream(ctx, &alu, - CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)); + case 5: /* PREDICATED_BREAK */ + r600_break_from_byte_stream(ctx); break; - case 7: + case 6: /* CONTINUE */ { unsigned opcode = TGSI_OPCODE_CONT; if (ctx->bc->chip_class == CAYMAN) { @@ -394,10 +476,6 @@ static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx, tgsi_loop_brk_cont(ctx); } break; - case 8: - r600_break_from_byte_stream(ctx, &alu, - CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT)); - break; } return bytes_read; @@ -408,29 +486,38 @@ static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx, { struct r600_bytecode_tex tex; - tex.inst = bytes[bytes_read++]; - tex.resource_id = bytes[bytes_read++]; - tex.src_gpr = bytes[bytes_read++]; - tex.src_rel = bytes[bytes_read++]; - tex.dst_gpr = bytes[bytes_read++]; - tex.dst_rel = bytes[bytes_read++]; - tex.dst_sel_x = bytes[bytes_read++]; - tex.dst_sel_y = bytes[bytes_read++]; - tex.dst_sel_z = bytes[bytes_read++]; - tex.dst_sel_w = bytes[bytes_read++]; - tex.lod_bias = bytes[bytes_read++]; - tex.coord_type_x = bytes[bytes_read++]; - tex.coord_type_y = bytes[bytes_read++]; - tex.coord_type_z = bytes[bytes_read++]; - tex.coord_type_w = bytes[bytes_read++]; - tex.offset_x = bytes[bytes_read++]; - tex.offset_y = bytes[bytes_read++]; - tex.offset_z = bytes[bytes_read++]; - tex.sampler_id = bytes[bytes_read++]; - tex.src_sel_x = bytes[bytes_read++]; - tex.src_sel_y = bytes[bytes_read++]; - tex.src_sel_z = bytes[bytes_read++]; - tex.src_sel_w = bytes[bytes_read++]; + uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read); + uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read); + uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read); + + tex.op = r600_isa_fetch_by_opcode(ctx->bc->isa, G_SQ_TEX_WORD0_TEX_INST(word0)); + tex.resource_id = G_SQ_TEX_WORD0_RESOURCE_ID(word0); + tex.src_gpr = G_SQ_TEX_WORD0_SRC_GPR(word0); + tex.src_rel = G_SQ_TEX_WORD0_SRC_REL(word0); + tex.dst_gpr = G_SQ_TEX_WORD1_DST_GPR(word1); + tex.dst_rel = G_SQ_TEX_WORD1_DST_REL(word1); + tex.dst_sel_x = G_SQ_TEX_WORD1_DST_SEL_X(word1); + tex.dst_sel_y = G_SQ_TEX_WORD1_DST_SEL_Y(word1); + tex.dst_sel_z = G_SQ_TEX_WORD1_DST_SEL_Z(word1); + tex.dst_sel_w = G_SQ_TEX_WORD1_DST_SEL_W(word1); + tex.lod_bias = G_SQ_TEX_WORD1_LOD_BIAS(word1); + tex.coord_type_x = G_SQ_TEX_WORD1_COORD_TYPE_X(word1); + tex.coord_type_y = G_SQ_TEX_WORD1_COORD_TYPE_Y(word1); + tex.coord_type_z = G_SQ_TEX_WORD1_COORD_TYPE_Z(word1); + tex.coord_type_w = G_SQ_TEX_WORD1_COORD_TYPE_W(word1); + tex.offset_x = G_SQ_TEX_WORD2_OFFSET_X(word2); + tex.offset_y = G_SQ_TEX_WORD2_OFFSET_Y(word2); + tex.offset_z = G_SQ_TEX_WORD2_OFFSET_Z(word2); + tex.sampler_id = G_SQ_TEX_WORD2_SAMPLER_ID(word2); + tex.src_sel_x = G_SQ_TEX_WORD2_SRC_SEL_X(word2); + tex.src_sel_y = G_SQ_TEX_WORD2_SRC_SEL_Y(word2); + tex.src_sel_z = G_SQ_TEX_WORD2_SRC_SEL_Z(word2); + tex.src_sel_w = G_SQ_TEX_WORD2_SRC_SEL_W(word2); + tex.offset_x <<= 1; + tex.offset_y <<= 1; + tex.offset_z <<= 1; + + tex.inst_mod = 0; r600_bytecode_add_tex(ctx->bc, &tex); @@ -441,33 +528,63 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx, unsigned char * bytes, unsigned bytes_read) { struct r600_bytecode_vtx vtx; + + uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read); + uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read); + uint32_t word2 = i32_from_byte_stream(bytes, &bytes_read); + memset(&vtx, 0, sizeof(vtx)); - vtx.inst = bytes[bytes_read++]; - vtx.fetch_type = bytes[bytes_read++]; - vtx.buffer_id = bytes[bytes_read++]; - vtx.src_gpr = bytes[bytes_read++]; - vtx.src_sel_x = bytes[bytes_read++]; - vtx.mega_fetch_count = bytes[bytes_read++]; - vtx.dst_gpr = bytes[bytes_read++]; - vtx.dst_sel_x = bytes[bytes_read++]; - vtx.dst_sel_y = bytes[bytes_read++]; - vtx.dst_sel_z = bytes[bytes_read++]; - vtx.dst_sel_w = bytes[bytes_read++]; - vtx.use_const_fields = bytes[bytes_read++]; - vtx.data_format = bytes[bytes_read++]; - vtx.num_format_all = bytes[bytes_read++]; - vtx.format_comp_all = bytes[bytes_read++]; - vtx.srf_mode_all = bytes[bytes_read++]; - /* offset is 2 bytes wide */ - vtx.offset = bytes[bytes_read++]; - vtx.offset |= bytes[bytes_read++]; - vtx.endian = bytes[bytes_read++]; + + /* WORD0 */ + vtx.op = r600_isa_fetch_by_opcode(ctx->bc->isa, + G_SQ_VTX_WORD0_VTX_INST(word0)); + vtx.fetch_type = G_SQ_VTX_WORD0_FETCH_TYPE(word0); + vtx.buffer_id = G_SQ_VTX_WORD0_BUFFER_ID(word0); + vtx.src_gpr = G_SQ_VTX_WORD0_SRC_GPR(word0); + vtx.src_sel_x = G_SQ_VTX_WORD0_SRC_SEL_X(word0); + vtx.mega_fetch_count = G_SQ_VTX_WORD0_MEGA_FETCH_COUNT(word0); + + /* WORD1 */ + vtx.dst_gpr = G_SQ_VTX_WORD1_GPR_DST_GPR(word1); + vtx.dst_sel_x = G_SQ_VTX_WORD1_DST_SEL_X(word1); + vtx.dst_sel_y = G_SQ_VTX_WORD1_DST_SEL_Y(word1); + vtx.dst_sel_z = G_SQ_VTX_WORD1_DST_SEL_Z(word1); + vtx.dst_sel_w = G_SQ_VTX_WORD1_DST_SEL_W(word1); + vtx.use_const_fields = G_SQ_VTX_WORD1_USE_CONST_FIELDS(word1); + vtx.data_format = G_SQ_VTX_WORD1_DATA_FORMAT(word1); + vtx.num_format_all = G_SQ_VTX_WORD1_NUM_FORMAT_ALL(word1); + vtx.format_comp_all = G_SQ_VTX_WORD1_FORMAT_COMP_ALL(word1); + vtx.srf_mode_all = G_SQ_VTX_WORD1_SRF_MODE_ALL(word1); + + /* WORD 2*/ + vtx.offset = G_SQ_VTX_WORD2_OFFSET(word2); + vtx.endian = G_SQ_VTX_WORD2_ENDIAN_SWAP(word2); if (r600_bytecode_add_vtx(ctx->bc, &vtx)) { fprintf(stderr, "Error adding vtx\n"); } - /* Use the Texture Cache */ - ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX; + + /* Use the Texture Cache for compute shaders*/ + if (ctx->bc->chip_class >= EVERGREEN && + ctx->bc->type == TGSI_PROCESSOR_COMPUTE) { + ctx->bc->cf_last->op = CF_OP_TEX; + } + return bytes_read; +} + +static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx, + unsigned char * bytes, unsigned bytes_read) +{ + uint32_t word0 = 0, word1 = 0; + struct r600_bytecode_output output; + memset(&output, 0, sizeof(struct r600_bytecode_output)); + word0 = i32_from_byte_stream(bytes, &bytes_read); + word1 = i32_from_byte_stream(bytes, &bytes_read); + if (ctx->bc->chip_class >= EVERGREEN) + eg_bytecode_export_read(ctx->bc, &output, word0,word1); + else + r600_bytecode_export_read(ctx->bc, &output, word0,word1); + r600_bytecode_add_output(ctx->bc, &output); return bytes_read; } @@ -505,6 +622,24 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, bytes_read = r600_vtx_from_byte_stream(ctx, bytes, bytes_read); break; + case 5: + bytes_read = r600_export_from_byte_stream(ctx, bytes, + bytes_read); + break; + case 6: { + int32_t word0 = i32_from_byte_stream(bytes, &bytes_read); + int32_t word1 = i32_from_byte_stream(bytes, &bytes_read); + + r600_bytecode_add_cf(ctx->bc); + ctx->bc->cf_last->op = r600_isa_cf_by_opcode(ctx->bc->isa, G_SQ_CF_ALU_WORD1_CF_INST(word1), 1); + ctx->bc->cf_last->kcache[0].bank = G_SQ_CF_ALU_WORD0_KCACHE_BANK0(word0); + ctx->bc->cf_last->kcache[0].addr = G_SQ_CF_ALU_WORD1_KCACHE_ADDR0(word1); + ctx->bc->cf_last->kcache[0].mode = G_SQ_CF_ALU_WORD0_KCACHE_MODE0(word0); + ctx->bc->cf_last->kcache[1].bank = G_SQ_CF_ALU_WORD0_KCACHE_BANK1(word0); + ctx->bc->cf_last->kcache[1].addr = G_SQ_CF_ALU_WORD1_KCACHE_ADDR1(word1); + ctx->bc->cf_last->kcache[1].mode = G_SQ_CF_ALU_WORD1_KCACHE_MODE1(word1); + break; + } default: /* XXX: Error here */ break; @@ -535,9 +670,11 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) #endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { if (i->Src[j].Register.Dimension) { - R600_ERR("unsupported src %d (dimension %d)\n", j, - i->Src[j].Register.Dimension); - return -EINVAL; + if (i->Src[j].Register.File != TGSI_FILE_CONSTANT) { + R600_ERR("unsupported src %d (dimension %d)\n", j, + i->Src[j].Register.Dimension); + return -EINVAL; + } } } for (j = 0; j < i->Instruction.NumDstRegs; j++) { @@ -549,19 +686,15 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) return 0; } -static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) +static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, + int input) { - int i, r; - struct r600_bytecode_alu alu; - int gpr = 0, base_chan = 0; int ij_index = 0; if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { - ij_index = 0; if (ctx->shader->input[input].centroid) ij_index++; } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { - ij_index = 0; /* if we have perspective add one */ if (ctx->input_perspective) { ij_index++; @@ -573,6 +706,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) ij_index++; } + ctx->shader->input[input].ij_index = ij_index; +} + +static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) +{ + int i, r; + struct r600_bytecode_alu alu; + int gpr = 0, base_chan = 0; + int ij_index = ctx->shader->input[input].ij_index; + /* work out gpr and base_chan from index */ gpr = ij_index / 2; base_chan = (2 * (ij_index % 2)) + 1; @@ -581,9 +724,9 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); if (i < 4) - alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_ZW; + alu.op = ALU_OP2_INTERP_ZW; else - alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_XY; + alu.op = ALU_OP2_INTERP_XY; if ((i > 1) && (i < 6)) { alu.dst.sel = ctx->shader->input[input].gpr; @@ -615,7 +758,7 @@ static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INTERP_LOAD_P0; + alu.op = ALU_OP1_INTERP_LOAD_P0; alu.dst.sel = ctx->shader->input[input].gpr; alu.dst.write = 1; @@ -706,9 +849,12 @@ static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) if (ctx->shader->input[index].spi_sid) { ctx->shader->input[index].lds_pos = ctx->shader->nlds++; if (ctx->shader->input[index].interpolate > 0) { - r = evergreen_interp_alu(ctx, index); + evergreen_interp_assign_ij_index(ctx, index); + if (!ctx->use_llvm) + r = evergreen_interp_alu(ctx, index); } else { - r = evergreen_interp_flat(ctx, index); + if (!ctx->use_llvm) + r = evergreen_interp_flat(ctx, index); } } return r; @@ -723,7 +869,7 @@ static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); + alu.op = ALU_OP3_CNDGT; alu.is_op3 = 1; alu.dst.write = 1; alu.dst.sel = gpr_front; @@ -746,19 +892,19 @@ static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back static int tgsi_declaration(struct r600_shader_ctx *ctx) { struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; - unsigned i; - int r; + int r, i, j, count = d->Range.Last - d->Range.First + 1; switch (d->Declaration.File) { case TGSI_FILE_INPUT: - i = ctx->shader->ninput++; + i = ctx->shader->ninput; + ctx->shader->ninput += count; ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; - ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); ctx->shader->input[i].interpolate = d->Interp.Interpolate; ctx->shader->input[i].centroid = d->Interp.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); switch (ctx->shader->input[i].name) { case TGSI_SEMANTIC_FACE: ctx->face_gpr = ctx->shader->input[i].gpr; @@ -775,16 +921,20 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) return r; } } + for (j = 1; j < count; ++j) { + ctx->shader->input[i + j] = ctx->shader->input[i]; + ctx->shader->input[i + j].gpr += j; + } break; case TGSI_FILE_OUTPUT: i = ctx->shader->noutput++; ctx->shader->output[i].name = d->Semantic.Name; ctx->shader->output[i].sid = d->Semantic.Index; - ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; ctx->shader->output[i].interpolate = d->Interp.Interpolate; ctx->shader->output[i].write_mask = d->Declaration.UsageMask; if (ctx->type == TGSI_PROCESSOR_VERTEX) { + ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); switch (d->Semantic.Name) { case TGSI_SEMANTIC_CLIPDIST: ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2); @@ -798,6 +948,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->cv_output = i; break; } + } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + switch (d->Semantic.Name) { + case TGSI_SEMANTIC_COLOR: + ctx->shader->nr_ps_max_color_exports++; + break; + } } break; case TGSI_FILE_CONSTANT: @@ -812,7 +968,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); + alu.op = ALU_OP1_INT_TO_FLT; alu.src[0].sel = 0; alu.src[0].chan = 3; @@ -932,9 +1088,14 @@ static void tgsi_src(struct r600_shader_ctx *ctx, r600_src->sel = tgsi_src->Register.Index; r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; } + if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { + if (tgsi_src->Register.Dimension) { + r600_src->kc_bank = tgsi_src->Dimension.Index; + } + } } -static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) +static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int cb_idx, unsigned int offset, unsigned int dst_reg) { struct r600_bytecode_vtx vtx; unsigned int ar_reg; @@ -945,7 +1106,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset memset(&alu, 0, sizeof(alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.op = ALU_OP2_ADD_INT; alu.src[0].sel = ctx->bc->ar_reg; alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; @@ -964,6 +1125,7 @@ static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset } memset(&vtx, 0, sizeof(vtx)); + vtx.buffer_id = cb_idx; vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ vtx.src_gpr = ar_reg; vtx.mega_fetch_count = 16; @@ -1003,9 +1165,10 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) if (ctx->src[i].rel) { int treg = r600_get_temp(ctx); - if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) + if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].sel - 512, treg))) return r; + ctx->src[i].kc_bank = 0; ctx->src[i].sel = treg; ctx->src[i].rel = 0; j--; @@ -1013,7 +1176,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx) int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; alu.src[0].sel = ctx->src[i].sel; alu.src[0].chan = k; alu.src[0].rel = ctx->src[i].rel; @@ -1051,7 +1214,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) int treg = r600_get_temp(ctx); for (k = 0; k < 4; k++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; alu.src[0].sel = ctx->src[i].sel; alu.src[0].chan = k; alu.src[0].value = ctx->src[i].value[k]; @@ -1075,36 +1238,9 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) { int i, r, count = ctx->shader->ninput; - /* additional inputs will be allocated right after the existing inputs, - * we won't need them after the color selection, so we don't need to - * reserve these gprs for the rest of the shader code and to adjust - * output offsets etc. */ - int gpr = ctx->file_offset[TGSI_FILE_INPUT] + - ctx->info.file_max[TGSI_FILE_INPUT] + 1; - - if (ctx->face_gpr == -1) { - i = ctx->shader->ninput++; - ctx->shader->input[i].name = TGSI_SEMANTIC_FACE; - ctx->shader->input[i].spi_sid = 0; - ctx->shader->input[i].gpr = gpr++; - ctx->face_gpr = ctx->shader->input[i].gpr; - } - for (i = 0; i < count; i++) { if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { - int ni = ctx->shader->ninput++; - memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io)); - ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; - ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]); - ctx->shader->input[ni].gpr = gpr++; - - if (ctx->bc->chip_class >= EVERGREEN) { - r = evergreen_interp_input(ctx, ni); - if (r) - return r; - } - - r = select_twoside_color(ctx, i, ni); + r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); if (r) return r; } @@ -1112,11 +1248,13 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) return 0; } -static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_shader *pipeshader) +static int r600_shader_from_tgsi(struct r600_screen *rscreen, + struct r600_pipe_shader *pipeshader, + struct r600_shader_key key) { struct r600_shader *shader = &pipeshader->shader; - struct tgsi_token *tokens = pipeshader->tokens; - struct pipe_stream_output_info so = pipeshader->so; + struct tgsi_token *tokens = pipeshader->selector->tokens; + struct pipe_stream_output_info so = pipeshader->selector->so; struct tgsi_full_immediate *immediate; struct tgsi_full_property *property; struct r600_shader_ctx ctx; @@ -1131,13 +1269,14 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh unsigned inst_byte_count = 0; #ifdef R600_USE_LLVM - use_llvm = debug_get_bool_option("R600_LLVM", TRUE); + use_llvm = !(rscreen->debug_flags & DBG_NO_LLVM); #endif ctx.bc = &shader->bc; ctx.shader = shader; - ctx.native_integers = (rctx->screen->glsl_feature_level >= 130); + ctx.native_integers = true; - r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family); + r600_bytecode_init(ctx.bc, rscreen->chip_class, rscreen->family, + rscreen->msaa_texture_support); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); tgsi_parse_init(&ctx.parse, tokens); @@ -1150,8 +1289,10 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh ctx.colors_used = 0; ctx.clip_vertex_write = 0; - shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side; - shader->nr_cbufs = rctx->nr_cbufs; + shader->nr_ps_color_exports = 0; + shader->nr_ps_max_color_exports = 0; + + shader->two_side = key.color_two_side; /* register allocations */ /* Values [0,127] correspond to GPR[0..127]. @@ -1179,52 +1320,27 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh } if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; - if (ctx.bc->chip_class >= EVERGREEN) { - r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); - } else { - r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); - } + r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); } if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } - /* LLVM backend setup */ #ifdef R600_USE_LLVM - if (use_llvm && ctx.info.indirect_files) { + if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { fprintf(stderr, "Warning: R600 LLVM backend does not support " "indirect adressing. Falling back to TGSI " "backend.\n"); use_llvm = 0; } - if (use_llvm) { - struct radeon_llvm_context radeon_llvm_ctx; - LLVMModuleRef mod; - unsigned dump = 0; - memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); - radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT]; - mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); - if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { - dump = 1; - } - if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count, - rctx->family, dump)) { - FREE(inst_bytes); - radeon_llvm_dispose(&radeon_llvm_ctx); - use_llvm = 0; - fprintf(stderr, "R600 LLVM backend failed to compile " - "shader. Falling back to TGSI\n"); - } else { - ctx.file_offset[TGSI_FILE_OUTPUT] = - ctx.file_offset[TGSI_FILE_INPUT]; - } - radeon_llvm_dispose(&radeon_llvm_ctx); - } #endif - /* End of LLVM backend setup */ + ctx.use_llvm = use_llvm; - if (!use_llvm) { + if (use_llvm) { ctx.file_offset[TGSI_FILE_OUTPUT] = + ctx.file_offset[TGSI_FILE_INPUT]; + } else { + ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_max[TGSI_FILE_INPUT] + 1; } @@ -1274,8 +1390,7 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh shader->fs_write_all = TRUE; break; case TGSI_PROPERTY_VS_PROHIBIT_UCPS: - if (property->u[0].Data == 1) - shader->vs_prohibit_ucps = TRUE; + /* we don't need this one */ break; } break; @@ -1285,79 +1400,163 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh goto out_err; } } + + /* Process two side if needed */ + if (shader->two_side && ctx.colors_used) { + int i, count = ctx.shader->ninput; + unsigned next_lds_loc = ctx.shader->nlds; + + /* additional inputs will be allocated right after the existing inputs, + * we won't need them after the color selection, so we don't need to + * reserve these gprs for the rest of the shader code and to adjust + * output offsets etc. */ + int gpr = ctx.file_offset[TGSI_FILE_INPUT] + + ctx.info.file_max[TGSI_FILE_INPUT] + 1; + + if (ctx.face_gpr == -1) { + i = ctx.shader->ninput++; + ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; + ctx.shader->input[i].spi_sid = 0; + ctx.shader->input[i].gpr = gpr++; + ctx.face_gpr = ctx.shader->input[i].gpr; + } - if (ctx.fragcoord_input >= 0) { - if (ctx.bc->chip_class == CAYMAN) { - for (j = 0 ; j < 4; j++) { + for (i = 0; i < count; i++) { + if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { + int ni = ctx.shader->ninput++; + memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); + ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; + ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); + ctx.shader->input[ni].gpr = gpr++; + // TGSI to LLVM needs to know the lds position of inputs. + // Non LLVM path computes it later (in process_twoside_color) + ctx.shader->input[ni].lds_pos = next_lds_loc++; + ctx.shader->input[i].back_color_input = ni; + if (ctx.bc->chip_class >= EVERGREEN) { + if ((r = evergreen_interp_input(&ctx, ni))) + return r; + } + } + } + } + +/* LLVM backend setup */ +#ifdef R600_USE_LLVM + if (use_llvm) { + struct radeon_llvm_context radeon_llvm_ctx; + LLVMModuleRef mod; + bool dump = r600_can_dump_shader(rscreen, ctx.type); + boolean use_kill = false; + + memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); + radeon_llvm_ctx.type = ctx.type; + radeon_llvm_ctx.two_side = shader->two_side; + radeon_llvm_ctx.face_gpr = ctx.face_gpr; + radeon_llvm_ctx.r600_inputs = ctx.shader->input; + radeon_llvm_ctx.r600_outputs = ctx.shader->output; + radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1); + radeon_llvm_ctx.chip_class = ctx.bc->chip_class; + radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN); + radeon_llvm_ctx.stream_outputs = &so; + radeon_llvm_ctx.clip_vertex = ctx.cv_output; + radeon_llvm_ctx.alpha_to_one = key.alpha_to_one; + mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); + + if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count, + rscreen->family, ctx.bc, &use_kill, dump)) { + FREE(inst_bytes); + radeon_llvm_dispose(&radeon_llvm_ctx); + use_llvm = 0; + fprintf(stderr, "R600 LLVM backend failed to compile " + "shader. Falling back to TGSI\n"); + } else { + ctx.file_offset[TGSI_FILE_OUTPUT] = + ctx.file_offset[TGSI_FILE_INPUT]; + } + if (use_kill) + ctx.shader->uses_kill = use_kill; + radeon_llvm_dispose(&radeon_llvm_ctx); + } +#endif +/* End of LLVM backend setup */ + + if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN) + shader->nr_ps_max_color_exports = 8; + + if (!use_llvm) { + if (ctx.fragcoord_input >= 0) { + if (ctx.bc->chip_class == CAYMAN) { + for (j = 0 ; j < 4; j++) { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_RECIP_IEEE; + alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; + alu.src[0].chan = 3; + + alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; + alu.dst.chan = j; + alu.dst.write = (j == 3); + alu.last = 1; + if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) + return r; + } + } else { struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.op = ALU_OP1_RECIP_IEEE; alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; alu.src[0].chan = 3; alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; - alu.dst.chan = j; - alu.dst.write = (j == 3); + alu.dst.chan = 3; + alu.dst.write = 1; alu.last = 1; if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) return r; } - } else { - struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; - alu.src[0].chan = 3; + } - alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; - alu.dst.chan = 3; - alu.dst.write = 1; - alu.last = 1; - if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) + if (shader->two_side && ctx.colors_used) { + if ((r = process_twoside_color_inputs(&ctx))) return r; } - } - - if (shader->two_side && ctx.colors_used) { - if ((r = process_twoside_color_inputs(&ctx))) - return r; - } - tgsi_parse_init(&ctx.parse, tokens); - while (!tgsi_parse_end_of_tokens(&ctx.parse)) { - tgsi_parse_token(&ctx.parse); - switch (ctx.parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (use_llvm) { - continue; + tgsi_parse_init(&ctx.parse, tokens); + while (!tgsi_parse_end_of_tokens(&ctx.parse)) { + tgsi_parse_token(&ctx.parse); + switch (ctx.parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + r = tgsi_is_supported(&ctx); + if (r) + goto out_err; + ctx.max_driver_temp_used = 0; + /* reserve first tmp for everyone */ + r600_get_temp(&ctx); + + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + if ((r = tgsi_split_constant(&ctx))) + goto out_err; + if ((r = tgsi_split_literal_constant(&ctx))) + goto out_err; + if (ctx.bc->chip_class == CAYMAN) + ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; + else if (ctx.bc->chip_class >= EVERGREEN) + ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; + else + ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; + r = ctx.inst_info->process(&ctx); + if (r) + goto out_err; + break; + default: + break; } - r = tgsi_is_supported(&ctx); - if (r) - goto out_err; - ctx.max_driver_temp_used = 0; - /* reserve first tmp for everyone */ - r600_get_temp(&ctx); - - opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; - if ((r = tgsi_split_constant(&ctx))) - goto out_err; - if ((r = tgsi_split_literal_constant(&ctx))) - goto out_err; - if (ctx.bc->chip_class == CAYMAN) - ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; - else if (ctx.bc->chip_class >= EVERGREEN) - ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; - else - ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; - r = ctx.inst_info->process(&ctx); - if (r) - goto out_err; - break; - default: - break; } } + /* Reset the temporary register counter. */ + ctx.max_driver_temp_used = 0; + /* Get instructions if we are using the LLVM backend. */ if (use_llvm) { r600_bytecode_from_byte_stream(&ctx, inst_bytes, inst_byte_count); @@ -1367,15 +1566,20 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh noutput = shader->noutput; if (ctx.clip_vertex_write) { + unsigned clipdist_temp[2]; + + clipdist_temp[0] = r600_get_temp(&ctx); + clipdist_temp[1] = r600_get_temp(&ctx); + /* need to convert a clipvertex write into clipdistance writes and not export the clip vertex anymore */ memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; - shader->output[noutput].gpr = ctx.temp_reg; + shader->output[noutput].gpr = clipdist_temp[0]; noutput++; shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; - shader->output[noutput].gpr = ctx.temp_reg+1; + shader->output[noutput].gpr = clipdist_temp[1]; noutput++; /* reset spi_sid for clipvertex output to avoid confusing spi */ @@ -1390,20 +1594,21 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh for (j = 0; j < 4; j++) { struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4); + alu.op = ALU_OP2_DOT4; alu.src[0].sel = shader->output[ctx.cv_output].gpr; alu.src[0].chan = j; alu.src[1].sel = 512 + i; - alu.src[1].kc_bank = 1; + alu.src[1].kc_bank = R600_UCP_CONST_BUFFER; alu.src[1].chan = j; - alu.dst.sel = ctx.temp_reg + oreg; + alu.dst.sel = clipdist_temp[oreg]; alu.dst.chan = j; alu.dst.write = (j == ochan); if (j == 3) alu.last = 1; - r = r600_bytecode_add_alu(ctx.bc, &alu); + if (!use_llvm) + r = r600_bytecode_add_alu(ctx.bc, &alu); if (r) return r; } @@ -1411,25 +1616,65 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh } /* Add stream outputs. */ - if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) { - for (i = 0; i < so.num_outputs; i++) { - struct r600_bytecode_output output; + if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm) { + unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; + /* Sanity checking. */ + if (so.num_outputs > PIPE_MAX_SHADER_OUTPUTS) { + R600_ERR("Too many stream outputs: %d\n", so.num_outputs); + r = -EINVAL; + goto out_err; + } + for (i = 0; i < so.num_outputs; i++) { if (so.output[i].output_buffer >= 4) { - R600_ERR("exceeded the max number of stream output buffers, got: %d\n", + R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", so.output[i].output_buffer); r = -EINVAL; goto out_err; } + } + + /* Initialize locations where the outputs are stored. */ + for (i = 0; i < so.num_outputs; i++) { + so_gpr[i] = shader->output[so.output[i].register_index].gpr; + + /* Lower outputs with dst_offset < start_component. + * + * We can only output 4D vectors with a write mask, e.g. we can + * only output the W component at offset 3, etc. If we want + * to store Y, Z, or W at buffer offset 0, we need to use MOV + * to move it to X and output X. */ if (so.output[i].dst_offset < so.output[i].start_component) { - R600_ERR("stream_output - dst_offset cannot be less than start_component\n"); - r = -EINVAL; - goto out_err; + unsigned tmp = r600_get_temp(&ctx); + + for (j = 0; j < so.output[i].num_components; j++) { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.src[0].sel = so_gpr[i]; + alu.src[0].chan = so.output[i].start_component + j; + + alu.dst.sel = tmp; + alu.dst.chan = j; + alu.dst.write = 1; + if (j == so.output[i].num_components - 1) + alu.last = 1; + r = r600_bytecode_add_alu(ctx.bc, &alu); + if (r) + return r; + } + so.output[i].start_component = 0; + so_gpr[i] = tmp; } + } + + /* Write outputs to buffers. */ + for (i = 0; i < so.num_outputs; i++) { + struct r600_bytecode_output output; memset(&output, 0, sizeof(struct r600_bytecode_output)); - output.gpr = shader->output[so.output[i].register_index].gpr; - output.elem_size = 0; + output.gpr = so_gpr[i]; + output.elem_size = so.output[i].num_components; output.array_base = so.output[i].dst_offset - so.output[i].start_component; output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; output.burst_count = 1; @@ -1441,31 +1686,31 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh if (ctx.bc->chip_class >= EVERGREEN) { switch (so.output[i].output_buffer) { case 0: - output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0; + output.op = CF_OP_MEM_STREAM0_BUF0; break; case 1: - output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1; + output.op = CF_OP_MEM_STREAM0_BUF1; break; case 2: - output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2; + output.op = CF_OP_MEM_STREAM0_BUF2; break; case 3: - output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3; + output.op = CF_OP_MEM_STREAM0_BUF3; break; } } else { switch (so.output[i].output_buffer) { case 0: - output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0; + output.op = CF_OP_MEM_STREAM0; break; case 1: - output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1; + output.op = CF_OP_MEM_STREAM1; break; case 2: - output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2; + output.op = CF_OP_MEM_STREAM2; break; case 3: - output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3; + output.op = CF_OP_MEM_STREAM3; break; } } @@ -1487,7 +1732,7 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh output[j].burst_count = 1; output[j].barrier = 1; output[j].type = -1; - output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[j].op = CF_OP_EXPORT; switch (ctx.type) { case TGSI_PROCESSOR_VERTEX: switch (shader->output[i].name) { @@ -1525,10 +1770,18 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh break; case TGSI_PROCESSOR_FRAGMENT: if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { + /* never export more colors than the number of CBs */ + if (next_pixel_base && next_pixel_base >= key.nr_cbufs) { + /* skip export */ + j--; + continue; + } + output[j].swizzle_w = key.alpha_to_one ? 5 : 3; output[j].array_base = next_pixel_base++; output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; - if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { - for (k = 1; k < shader->nr_cbufs; k++) { + shader->nr_ps_color_exports++; + if (shader->fs_write_all && (rscreen->chip_class >= EVERGREEN)) { + for (k = 1; k < key.nr_cbufs; k++) { j++; memset(&output[j], 0, sizeof(struct r600_bytecode_output)); output[j].gpr = shader->output[i].gpr; @@ -1536,12 +1789,13 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh output[j].swizzle_x = 0; output[j].swizzle_y = 1; output[j].swizzle_z = 2; - output[j].swizzle_w = 3; + output[j].swizzle_w = key.alpha_to_one ? 5 : 3; output[j].burst_count = 1; output[j].barrier = 1; output[j].array_base = next_pixel_base++; - output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[j].op = CF_OP_EXPORT; output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; + shader->nr_ps_color_exports++; } } } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { @@ -1574,6 +1828,23 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh } } + /* add fake position export */ + if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) { + memset(&output[j], 0, sizeof(struct r600_bytecode_output)); + output[j].gpr = 0; + output[j].elem_size = 3; + output[j].swizzle_x = 7; + output[j].swizzle_y = 7; + output[j].swizzle_z = 7; + output[j].swizzle_w = 7; + output[j].burst_count = 1; + output[j].barrier = 1; + output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; + output[j].array_base = next_pos_base; + output[j].op = CF_OP_EXPORT; + j++; + } + /* add fake param output for vertex shader if no param is exported */ if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { memset(&output[j], 0, sizeof(struct r600_bytecode_output)); @@ -1587,12 +1858,12 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh output[j].barrier = 1; output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; output[j].array_base = 0; - output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[j].op = CF_OP_EXPORT; j++; } /* add fake pixel export */ - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) { + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0) { memset(&output[j], 0, sizeof(struct r600_bytecode_output)); output[j].gpr = 0; output[j].elem_size = 3; @@ -1604,7 +1875,7 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh output[j].barrier = 1; output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; output[j].array_base = 0; - output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); + output[j].op = CF_OP_EXPORT; j++; } @@ -1619,17 +1890,19 @@ static int r600_shader_from_tgsi(struct r600_context * rctx, struct r600_pipe_sh } if (!(output_done & (1 << output[i].type))) { output_done |= (1 << output[i].type); - output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); + output[i].op = CF_OP_EXPORT_DONE; } } /* add output to bytecode */ - for (i = 0; i < noutput; i++) { - r = r600_bytecode_add_output(ctx.bc, &output[i]); - if (r) - goto out_err; + if (!use_llvm) { + for (i = 0; i < noutput; i++) { + r = r600_bytecode_add_output(ctx.bc, &output[i]); + if (r) + goto out_err; + } } /* add program end */ - if (ctx.bc->chip_class == CAYMAN) + if (!use_llvm && ctx.bc->chip_class == CAYMAN) cm_bytecode_add_cf_end(ctx.bc); /* check GPR limit - we have 124 = 128 - 4 @@ -1671,6 +1944,7 @@ static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, bc_src->abs = shader_src->abs; bc_src->rel = shader_src->rel; bc_src->value = shader_src->value[bc_src->chan]; + bc_src->kc_bank = shader_src->kc_bank; } static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) @@ -1728,7 +2002,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r600_bytecode_src(&alu.src[j], &ctx->src[j], i); @@ -1785,7 +2059,7 @@ static int tgsi_ineg(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; alu.src[0].sel = V_SQ_ALU_SRC_0; @@ -1813,9 +2087,14 @@ static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) for (i = 0 ; i < last_slot; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); + + /* RSQ should take the absolute value of src */ + if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) { + r600_bytecode_src_set_abs(&alu.src[j]); + } } tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; @@ -1841,7 +2120,7 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) for (i = 0 ; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r600_bytecode_src(&alu.src[j], &ctx->src[j], k); } @@ -1872,7 +2151,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); + alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; alu.dst.chan = 0; @@ -1892,7 +2171,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) return r; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); + alu.op = ALU_OP1_FRACT; alu.dst.chan = 0; alu.dst.sel = ctx->temp_reg; @@ -1906,7 +2185,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) return r; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); + alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; alu.dst.chan = 0; @@ -1951,7 +2230,7 @@ static int cayman_trig(struct r600_shader_ctx *ctx) for (i = 0; i < last_slot; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; alu.dst.chan = i; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -1980,7 +2259,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) return r; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; alu.dst.chan = 0; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -1998,7 +2277,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; alu.src[0].sel = ctx->temp_reg; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2031,7 +2310,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); + alu.op = ALU_OP1_COS; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 0) @@ -2048,7 +2327,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); + alu.op = ALU_OP1_COS; tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); alu.src[0].sel = ctx->temp_reg; @@ -2065,7 +2344,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0 ; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); + alu.op = ALU_OP1_SIN; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 1) alu.dst.write = 1; @@ -2081,7 +2360,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); + alu.op = ALU_OP1_SIN; tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); alu.src[0].sel = ctx->temp_reg; @@ -2097,7 +2376,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); @@ -2115,7 +2394,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); @@ -2139,7 +2418,7 @@ static int tgsi_kill(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; alu.dst.chan = i; @@ -2173,7 +2452,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* tmp.x = max(src.y, 0.0) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); + alu.op = ALU_OP2_MAX; r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 1; @@ -2197,7 +2476,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) for (i = 0; i < 3; i++) { /* tmp.z = log(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); + alu.op = ALU_OP1_LOG_CLAMPED; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.dst.sel = ctx->temp_reg; @@ -2215,7 +2494,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) } else { /* tmp.z = log(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); + alu.op = ALU_OP1_LOG_CLAMPED; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.dst.sel = ctx->temp_reg; @@ -2232,7 +2511,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); + alu.op = ALU_OP3_MUL_LIT; alu.src[0].sel = sel; alu.src[0].chan = chan; r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); @@ -2250,7 +2529,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) for (i = 0; i < 3; i++) { /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.op = ALU_OP1_EXP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2266,7 +2545,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) } else { /* dst.z = exp(tmp.x) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.op = ALU_OP1_EXP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); @@ -2279,7 +2558,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.x, <- 1.0 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); @@ -2290,7 +2569,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.y = max(src.x, 0.0) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); + alu.op = ALU_OP2_MAX; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ alu.src[1].chan = 0; @@ -2302,7 +2581,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) /* dst.w, <- 1.0 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); @@ -2327,7 +2606,7 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx) * For state trackers other than OpenGL, we'll want to use * _RECIPSQRT_IEEE instead. */ - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); + alu.op = ALU_OP1_RECIPSQRT_CLAMPED; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); @@ -2352,7 +2631,7 @@ static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.src[0].sel = ctx->temp_reg; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; alu.dst.chan = i; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; @@ -2372,7 +2651,7 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) int i, r; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); } @@ -2395,7 +2674,7 @@ static int cayman_pow(struct r600_shader_ctx *ctx) for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + alu.op = ALU_OP1_LOG_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -2409,7 +2688,7 @@ static int cayman_pow(struct r600_shader_ctx *ctx) /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.op = ALU_OP2_MUL; r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; @@ -2422,7 +2701,7 @@ static int cayman_pow(struct r600_shader_ctx *ctx) for (i = 0; i < last_slot; i++) { /* POW(a,b) = EXP2(b * LOG2(a))*/ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.op = ALU_OP1_EXP_IEEE; alu.src[0].sel = ctx->temp_reg; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -2443,7 +2722,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) /* LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + alu.op = ALU_OP1_LOG_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2453,7 +2732,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) return r; /* b * LOG2(a) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.op = ALU_OP2_MUL; r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); alu.src[1].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; @@ -2464,7 +2743,7 @@ static int tgsi_pow(struct r600_shader_ctx *ctx) return r; /* POW(a,b) = EXP2(b * LOG2(a))*/ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.op = ALU_OP1_EXP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -2539,7 +2818,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* tmp2.x = -src0 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); + alu.op = ALU_OP2_SUB_INT; alu.dst.sel = tmp2; alu.dst.chan = 0; @@ -2555,7 +2834,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* tmp2.y = -src1 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); + alu.op = ALU_OP2_SUB_INT; alu.dst.sel = tmp2; alu.dst.chan = 1; @@ -2574,7 +2853,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) if (!mod) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT); + alu.op = ALU_OP2_XOR_INT; alu.dst.sel = tmp2; alu.dst.chan = 2; @@ -2590,7 +2869,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* tmp2.x = |src0| */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); + alu.op = ALU_OP3_CNDGE_INT; alu.is_op3 = 1; alu.dst.sel = tmp2; @@ -2608,7 +2887,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* tmp2.y = |src1| */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); + alu.op = ALU_OP3_CNDGE_INT; alu.is_op3 = 1; alu.dst.sel = tmp2; @@ -2630,7 +2909,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) if (ctx->bc->chip_class == CAYMAN) { /* tmp3.x = u2f(src2) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT); + alu.op = ALU_OP1_UINT_TO_FLT; alu.dst.sel = tmp3; alu.dst.chan = 0; @@ -2650,7 +2929,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* tmp0.x = recip(tmp3.x) */ for (j = 0 ; j < 3; j++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE; + alu.op = ALU_OP1_RECIP_IEEE; alu.dst.sel = tmp0; alu.dst.chan = j; @@ -2666,7 +2945,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) } memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.op = ALU_OP2_MUL; alu.src[0].sel = tmp0; alu.src[0].chan = 0; @@ -2682,7 +2961,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) return r; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT); + alu.op = ALU_OP1_FLT_TO_UINT; alu.dst.sel = tmp0; alu.dst.chan = 0; @@ -2697,7 +2976,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT); + alu.op = ALU_OP1_RECIP_UINT; alu.dst.sel = tmp0; alu.dst.chan = 0; @@ -2719,7 +2998,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) if (ctx->bc->chip_class == CAYMAN) { for (j = 0 ; j < 4; j++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); + alu.op = ALU_OP2_MULLO_UINT; alu.dst.sel = tmp0; alu.dst.chan = j; @@ -2740,7 +3019,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); + alu.op = ALU_OP2_MULLO_UINT; alu.dst.sel = tmp0; alu.dst.chan = 2; @@ -2762,7 +3041,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 3. tmp0.w = -tmp0.z */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); + alu.op = ALU_OP2_SUB_INT; alu.dst.sel = tmp0; alu.dst.chan = 3; @@ -2780,7 +3059,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) if (ctx->bc->chip_class == CAYMAN) { for (j = 0 ; j < 4; j++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); + alu.op = ALU_OP2_MULHI_UINT; alu.dst.sel = tmp0; alu.dst.chan = j; @@ -2801,7 +3080,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); + alu.op = ALU_OP2_MULHI_UINT; alu.dst.sel = tmp0; alu.dst.chan = 1; @@ -2824,7 +3103,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); + alu.op = ALU_OP3_CNDE_INT; alu.is_op3 = 1; alu.dst.sel = tmp0; @@ -2846,7 +3125,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) if (ctx->bc->chip_class == CAYMAN) { for (j = 0 ; j < 4; j++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); + alu.op = ALU_OP2_MULHI_UINT; alu.dst.sel = tmp0; alu.dst.chan = j; @@ -2864,7 +3143,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); + alu.op = ALU_OP2_MULHI_UINT; alu.dst.sel = tmp0; alu.dst.chan = 3; @@ -2883,7 +3162,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 7. tmp1.x = tmp0.x - tmp0.w */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); + alu.op = ALU_OP2_SUB_INT; alu.dst.sel = tmp1; alu.dst.chan = 0; @@ -2900,7 +3179,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 8. tmp1.y = tmp0.x + tmp0.w */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.op = ALU_OP2_ADD_INT; alu.dst.sel = tmp1; alu.dst.chan = 1; @@ -2917,7 +3196,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); + alu.op = ALU_OP3_CNDE_INT; alu.is_op3 = 1; alu.dst.sel = tmp0; @@ -2939,7 +3218,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) if (ctx->bc->chip_class == CAYMAN) { for (j = 0 ; j < 4; j++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); + alu.op = ALU_OP2_MULHI_UINT; alu.dst.sel = tmp0; alu.dst.chan = j; @@ -2961,7 +3240,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); + alu.op = ALU_OP2_MULHI_UINT; alu.dst.sel = tmp0; alu.dst.chan = 2; @@ -2986,7 +3265,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) if (ctx->bc->chip_class == CAYMAN) { for (j = 0 ; j < 4; j++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); + alu.op = ALU_OP2_MULLO_UINT; alu.dst.sel = tmp0; alu.dst.chan = j; @@ -3008,7 +3287,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); + alu.op = ALU_OP2_MULLO_UINT; alu.dst.sel = tmp0; alu.dst.chan = 1; @@ -3031,7 +3310,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 12. tmp0.w = src1 - tmp0.y = r */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); + alu.op = ALU_OP2_SUB_INT; alu.dst.sel = tmp0; alu.dst.chan = 3; @@ -3053,7 +3332,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT); + alu.op = ALU_OP2_SETGE_UINT; alu.dst.sel = tmp1; alu.dst.chan = 0; @@ -3074,7 +3353,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT); + alu.op = ALU_OP2_SETGE_UINT; alu.dst.sel = tmp1; alu.dst.chan = 1; @@ -3098,7 +3377,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); + alu.op = ALU_OP2_SUB_INT; alu.dst.sel = tmp1; alu.dst.chan = 2; @@ -3120,7 +3399,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.op = ALU_OP2_ADD_INT; alu.dst.sel = tmp1; alu.dst.chan = 3; @@ -3143,7 +3422,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.op = ALU_OP2_ADD_INT; alu.dst.sel = tmp1; alu.dst.chan = 2; @@ -3159,7 +3438,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.op = ALU_OP2_ADD_INT; alu.dst.sel = tmp1; alu.dst.chan = 3; @@ -3177,7 +3456,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 17. tmp1.x = tmp1.x & tmp1.y */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); + alu.op = ALU_OP2_AND_INT; alu.dst.sel = tmp1; alu.dst.chan = 0; @@ -3195,7 +3474,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); + alu.op = ALU_OP3_CNDE_INT; alu.is_op3 = 1; alu.dst.sel = tmp0; @@ -3215,7 +3494,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); + alu.op = ALU_OP3_CNDE_INT; alu.is_op3 = 1; if (signed_op) { @@ -3245,7 +3524,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* tmp0.x = -tmp0.z */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); + alu.op = ALU_OP2_SUB_INT; alu.dst.sel = tmp0; alu.dst.chan = 0; @@ -3262,7 +3541,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* sign of the remainder is the same as the sign of src0 */ /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); + alu.op = ALU_OP3_CNDGE_INT; alu.is_op3 = 1; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -3281,7 +3560,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* tmp0.x = -tmp0.z */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); + alu.op = ALU_OP2_SUB_INT; alu.dst.sel = tmp0; alu.dst.chan = 0; @@ -3298,7 +3577,7 @@ static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) /* fix the quotient sign (same as the sign of src0*src1) */ /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); + alu.op = ALU_OP3_CNDGE_INT; alu.is_op3 = 1; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -3353,7 +3632,7 @@ static int tgsi_f2i(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); + alu.op = ALU_OP1_TRUNC; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -3372,14 +3651,14 @@ static int tgsi_f2i(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; - if (i == last_inst) + if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) alu.last = 1; r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) @@ -3403,7 +3682,7 @@ static int tgsi_iabs(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); + alu.op = ALU_OP2_SUB_INT; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -3425,7 +3704,7 @@ static int tgsi_iabs(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); + alu.op = ALU_OP3_CNDGE_INT; alu.is_op3 = 1; alu.dst.write = 1; @@ -3459,7 +3738,7 @@ static int tgsi_issg(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); + alu.op = ALU_OP3_CNDGE_INT; alu.is_op3 = 1; alu.dst.sel = ctx->temp_reg; @@ -3483,7 +3762,7 @@ static int tgsi_issg(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT); + alu.op = ALU_OP3_CNDGT_INT; alu.is_op3 = 1; alu.dst.write = 1; @@ -3517,7 +3796,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) /* tmp = (src > 0 ? 1 : src) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); + alu.op = ALU_OP3_CNDGT; alu.is_op3 = 1; alu.dst.sel = ctx->temp_reg; @@ -3537,7 +3816,7 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx) /* dst = (-tmp > 0 ? -1 : tmp) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); + alu.op = ALU_OP3_CNDGT; alu.is_op3 = 1; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); @@ -3568,10 +3847,10 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); + alu.op = ALU_OP0_NOP; alu.dst.chan = i; } else { - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; @@ -3598,7 +3877,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } @@ -3625,7 +3904,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = ctx->inst_info->r600_opcode; + alu.op = ctx->inst_info->op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } @@ -3684,6 +3963,128 @@ static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; } +static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) +{ + struct r600_bytecode_vtx vtx; + struct r600_bytecode_alu alu; + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int src_gpr, r, i; + int id = tgsi_tex_get_src_gpr(ctx, 1); + + src_gpr = tgsi_tex_get_src_gpr(ctx, 0); + if (src_requires_loading) { + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 3) + alu.last = 1; + alu.dst.write = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + src_gpr = ctx->temp_reg; + } + + memset(&vtx, 0, sizeof(vtx)); + vtx.op = FETCH_OP_VFETCH; + vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; + vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ + vtx.src_gpr = src_gpr; + vtx.mega_fetch_count = 16; + vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; + vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ + vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ + vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ + vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ + vtx.use_const_fields = 1; + vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ + + if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) + return r; + + if (ctx->bc->chip_class >= EVERGREEN) + return 0; + + for (i = 0; i < 4; i++) { + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_AND_INT; + + alu.dst.chan = i; + alu.dst.sel = vtx.dst_gpr; + alu.dst.write = 1; + + alu.src[0].sel = vtx.dst_gpr; + alu.src[0].chan = i; + + alu.src[1].sel = 512 + (id * 2); + alu.src[1].chan = i % 4; + alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; + + if (i == lasti) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + if (inst->Dst[0].Register.WriteMask & 3) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_OR_INT; + + alu.dst.chan = 3; + alu.dst.sel = vtx.dst_gpr; + alu.dst.write = 1; + + alu.src[0].sel = vtx.dst_gpr; + alu.src[0].chan = 3; + + alu.src[1].sel = 512 + (id * 2) + 1; + alu.src[1].chan = 0; + alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; + + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int r; + int id = tgsi_tex_get_src_gpr(ctx, 1); + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + + if (ctx->bc->chip_class >= EVERGREEN) { + alu.src[0].sel = 512 + (id / 4); + alu.src[0].chan = id % 4; + } else { + /* r600 we have them at channel 2 of the second dword */ + alu.src[0].sel = 512 + (id * 2) + 1; + alu.src[0].chan = 1; + } + alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + return 0; +} + static int tgsi_tex(struct r600_shader_ctx *ctx) { static float one_point_five = 1.5f; @@ -3693,15 +4094,47 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) unsigned src_gpr; int r, i, j; int opcode; + bool read_compressed_msaa = ctx->bc->msaa_texture_mode == MSAA_TEXTURE_COMPRESSED && + inst->Instruction.Opcode == TGSI_OPCODE_TXF && + (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || + inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); /* Texture fetch instructions can only use gprs as source. * Also they cannot negate the source or take the absolute value */ - const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); + const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && + tgsi_tex_src_requires_loading(ctx, 0)) || + read_compressed_msaa; boolean src_loaded = FALSE; - unsigned sampler_src_reg = 1; - uint8_t offset_x = 0, offset_y = 0, offset_z = 0; + unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; + int8_t offset_x = 0, offset_y = 0, offset_z = 0; + boolean has_txq_cube_array_z = false; + + if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && + ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) + if (inst->Dst[0].Register.WriteMask & 4) { + ctx->shader->has_txq_cube_array_z_comp = true; + has_txq_cube_array_z = true; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXL2) + sampler_src_reg = 2; src_gpr = tgsi_tex_get_src_gpr(ctx, 0); + if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { + if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { + ctx->shader->uses_tex_buffers = true; + return r600_do_buffer_txq(ctx); + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { + if (ctx->bc->chip_class < EVERGREEN) + ctx->shader->uses_tex_buffers = true; + return do_vtx_fetch_inst(ctx, src_requires_loading); + } + } + if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { /* get offset values */ if (inst->Texture.NumOffsets) { @@ -3718,8 +4151,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (i = 1; i < 3; i++) { /* set gradients h/v */ memset(&tex, 0, sizeof(struct r600_bytecode_tex)); - tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : - SQ_TEX_INST_SET_GRADIENTS_V; + tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : + FETCH_OP_SET_GRADIENTS_V; tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; @@ -3732,7 +4165,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (j = 0; j < 4; j++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; r600_bytecode_src(&alu.src[0], &ctx->src[i], j); alu.dst.sel = tex.src_gpr; alu.dst.chan = j; @@ -3771,7 +4204,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) out_chan = 2; for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.op = ALU_OP1_RECIP_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); alu.dst.sel = ctx->temp_reg; @@ -3788,7 +4221,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } else { out_chan = 3; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.op = ALU_OP1_RECIP_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); alu.dst.sel = ctx->temp_reg; @@ -3802,7 +4235,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.op = ALU_OP2_MUL; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = out_chan; r600_bytecode_src(&alu.src[1], &ctx->src[0], i); @@ -3814,7 +4247,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) return r; } memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; alu.dst.sel = ctx->temp_reg; @@ -3829,8 +4262,11 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || - inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && - inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { + inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && + inst->Instruction.Opcode != TGSI_OPCODE_TXQ && + inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { static const unsigned src0_swizzle[] = {2, 2, 0, 1}; static const unsigned src1_swizzle[] = {1, 0, 2, 2}; @@ -3838,7 +4274,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); + alu.op = ALU_OP2_CUBE; r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); alu.dst.sel = ctx->temp_reg; @@ -3855,7 +4291,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.op = ALU_OP1_RECIP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 2; alu.src[0].abs = 1; @@ -3871,7 +4307,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.op = ALU_OP1_RECIP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 2; alu.src[0].abs = 1; @@ -3889,7 +4325,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) * muladd has no writemask, have to use another temp */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); + alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; alu.src[0].sel = ctx->temp_reg; @@ -3910,7 +4346,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) return r; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); + alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; alu.src[0].sel = ctx->temp_reg; @@ -3930,11 +4366,17 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; - /* write initial W value into Z component */ - if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) { + /* write initial compare value into Z component + - W src 0 for shadow cube + - X src 1 for shadow cube array */ + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); - r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); + alu.op = ALU_OP1_MOV; + if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) + r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); + else + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 2; alu.dst.write = 1; @@ -3943,6 +4385,95 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (r) return r; } + + if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { + if (ctx->bc->chip_class >= EVERGREEN) { + int mytmp = r600_get_temp(ctx); + static const float eight = 8.0f; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.dst.sel = mytmp; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP3_MULADD; + alu.is_op3 = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].chan = 0; + alu.src[1].value = *(uint32_t *)&eight; + alu.src[2].sel = mytmp; + alu.src[2].chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } else if (ctx->bc->chip_class < EVERGREEN) { + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); + tex.op = FETCH_OP_SET_CUBEMAP_INDEX; + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; + tex.src_gpr = r600_get_temp(ctx); + tex.src_sel_x = 0; + tex.src_sel_y = 0; + tex.src_sel_z = 0; + tex.src_sel_w = 0; + tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; + tex.coord_type_x = 1; + tex.coord_type_y = 1; + tex.coord_type_z = 1; + tex.coord_type_w = 1; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); + alu.dst.sel = tex.src_gpr; + alu.dst.chan = 0; + alu.last = 1; + alu.dst.write = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + r = r600_bytecode_add_tex(ctx->bc, &tex); + if (r) + return r; + } + + } + + /* for cube forms of lod and bias we need to route things */ + if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || + inst->Instruction.Opcode == TGSI_OPCODE_TXL || + inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || + inst->Instruction.Opcode == TGSI_OPCODE_TXL2) + r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); + else + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.last = 1; + alu.dst.write = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + src_loaded = TRUE; src_gpr = ctx->temp_reg; } @@ -3950,7 +4481,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (src_requires_loading && !src_loaded) { for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; r600_bytecode_src(&alu.src[0], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -3965,31 +4496,172 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_gpr = ctx->temp_reg; } - opcode = ctx->inst_info->r600_opcode; + /* Obtain the sample index for reading a compressed MSAA color texture. + * To read the FMASK, we use the ldfptr instruction, which tells us + * where the samples are stored. + * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, + * which is the identity mapping. Each nibble says which physical sample + * should be fetched to get that sample. + * + * Assume src.z contains the sample index. It should be modified like this: + * src.z = (ldfptr() >> (src.z * 4)) & 0xF; + * Then fetch the texel with src. + */ + if (read_compressed_msaa) { + unsigned sample_chan = 3; + unsigned temp = r600_get_temp(ctx); + assert(src_loaded); + + /* temp.w = ldfptr() */ + memset(&tex, 0, sizeof(struct r600_bytecode_tex)); + tex.op = FETCH_OP_LD; + tex.inst_mod = 1; /* to indicate this is ldfptr */ + tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); + tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; + tex.src_gpr = src_gpr; + tex.dst_gpr = temp; + tex.dst_sel_x = 7; /* mask out these components */ + tex.dst_sel_y = 7; + tex.dst_sel_z = 7; + tex.dst_sel_w = 0; /* store X */ + tex.src_sel_x = 0; + tex.src_sel_y = 1; + tex.src_sel_z = 2; + tex.src_sel_w = 3; + tex.offset_x = offset_x; + tex.offset_y = offset_y; + tex.offset_z = offset_z; + r = r600_bytecode_add_tex(ctx->bc, &tex); + if (r) + return r; + + /* temp.x = sample_index*4 */ + if (ctx->bc->chip_class == CAYMAN) { + for (i = 0 ; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULLO_INT; + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 4; + alu.dst.sel = temp; + alu.dst.chan = i; + alu.dst.write = i == 0; + if (i == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MULLO_INT; + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 4; + alu.dst.sel = temp; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* sample_index = temp.w >> temp.x */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_LSHR_INT; + alu.src[0].sel = temp; + alu.src[0].chan = 3; + alu.src[1].sel = temp; + alu.src[1].chan = 0; + alu.dst.sel = src_gpr; + alu.dst.chan = sample_chan; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* sample_index & 0xF */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_AND_INT; + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0xF; + alu.dst.sel = src_gpr; + alu.dst.chan = sample_chan; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; +#if 0 + /* visualize the FMASK */ + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_INT_TO_FLT; + alu.src[0].sel = src_gpr; + alu.src[0].chan = sample_chan; + alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; + alu.dst.chan = i; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +#endif + } + + /* does this shader want a num layers from TXQ for a cube array? */ + if (has_txq_cube_array_z) { + int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + + alu.src[0].sel = 512 + (id / 4); + alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER; + alu.src[0].chan = id % 4; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + /* disable writemask from texture instruction */ + inst->Dst[0].Register.WriteMask &= ~4; + } + + opcode = ctx->inst_info->op; if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || - inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) { + inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { switch (opcode) { - case SQ_TEX_INST_SAMPLE: - opcode = SQ_TEX_INST_SAMPLE_C; + case FETCH_OP_SAMPLE: + opcode = FETCH_OP_SAMPLE_C; break; - case SQ_TEX_INST_SAMPLE_L: - opcode = SQ_TEX_INST_SAMPLE_C_L; + case FETCH_OP_SAMPLE_L: + opcode = FETCH_OP_SAMPLE_C_L; break; - case SQ_TEX_INST_SAMPLE_LB: - opcode = SQ_TEX_INST_SAMPLE_C_LB; + case FETCH_OP_SAMPLE_LB: + opcode = FETCH_OP_SAMPLE_C_LB; break; - case SQ_TEX_INST_SAMPLE_G: - opcode = SQ_TEX_INST_SAMPLE_C_G; + case FETCH_OP_SAMPLE_G: + opcode = FETCH_OP_SAMPLE_C_G; break; } } memset(&tex, 0, sizeof(struct r600_bytecode_tex)); - tex.inst = opcode; + tex.op = opcode; tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; @@ -3999,7 +4671,13 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; - if (src_loaded) { + + if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) { + tex.src_sel_x = 4; + tex.src_sel_y = 4; + tex.src_sel_z = 4; + tex.src_sel_w = 4; + } else if (src_loaded) { tex.src_sel_x = 0; tex.src_sel_y = 1; tex.src_sel_z = 2; @@ -4012,17 +4690,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_rel = ctx->src[0].rel; } - if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { - tex.src_sel_x = 1; - tex.src_sel_y = 0; - tex.src_sel_z = 3; - tex.src_sel_w = 1; - } - if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) { + if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || + inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { tex.src_sel_x = 1; tex.src_sel_y = 0; tex.src_sel_z = 3; - tex.src_sel_w = 2; /* route Z compare value into W */ + tex.src_sel_w = 2; /* route Z compare or Lod value into W */ } if (inst->Texture.Texture != TGSI_TEXTURE_RECT && @@ -4044,15 +4719,15 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && - opcode != SQ_TEX_INST_SAMPLE_C_L && - opcode != SQ_TEX_INST_SAMPLE_C_LB) { + opcode != FETCH_OP_SAMPLE_C_L && + opcode != FETCH_OP_SAMPLE_C_LB) { tex.src_sel_w = tex.src_sel_z; } if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { - if (opcode == SQ_TEX_INST_SAMPLE_C_L || - opcode == SQ_TEX_INST_SAMPLE_C_LB) { + if (opcode == FETCH_OP_SAMPLE_C_L || + opcode == FETCH_OP_SAMPLE_C_LB) { /* the array index is read from Y */ tex.coord_type_y = 0; } else { @@ -4061,10 +4736,33 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) tex.src_sel_z = tex.src_sel_y; } } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || - inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) + inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || + ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && + (ctx->bc->chip_class >= EVERGREEN))) /* the array index is read from Z */ tex.coord_type_z = 0; + /* mask unused source components */ + if (opcode == FETCH_OP_SAMPLE) { + switch (inst->Texture.Texture) { + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + tex.src_sel_z = 7; + tex.src_sel_w = 7; + break; + case TGSI_TEXTURE_1D_ARRAY: + tex.src_sel_y = 7; + tex.src_sel_w = 7; + break; + case TGSI_TEXTURE_1D: + tex.src_sel_y = 7; + tex.src_sel_z = 7; + tex.src_sel_w = 7; + break; + } + } + r = r600_bytecode_add_tex(ctx->bc, &tex); if (r) return r; @@ -4088,7 +4786,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); + alu.op = ALU_OP2_ADD; r600_bytecode_src(&alu.src[0], &ctx->src[1], i); r600_bytecode_src(&alu.src[1], &ctx->src[2], i); alu.omod = 3; @@ -4110,7 +4808,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); + alu.op = ALU_OP2_ADD; alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; r600_bytecode_src(&alu.src[1], &ctx->src[0], i); @@ -4132,7 +4830,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.op = ALU_OP2_MUL; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; r600_bytecode_src(&alu.src[1], &ctx->src[2], i); @@ -4153,7 +4851,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); + alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; r600_bytecode_src(&alu.src[0], &ctx->src[0], i); r600_bytecode_src(&alu.src[1], &ctx->src[1], i); @@ -4184,7 +4882,36 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); + alu.op = ALU_OP3_CNDGE; + r600_bytecode_src(&alu.src[0], &ctx->src[0], i); + r600_bytecode_src(&alu.src[1], &ctx->src[2], i); + r600_bytecode_src(&alu.src[2], &ctx->src[1], i); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.chan = i; + alu.dst.write = 1; + alu.is_op3 = 1; + if (i == lasti) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + +static int tgsi_ucmp(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int i, r; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP3_CNDGE_INT; r600_bytecode_src(&alu.src[0], &ctx->src[0], i); r600_bytecode_src(&alu.src[1], &ctx->src[2], i); r600_bytecode_src(&alu.src[2], &ctx->src[1], i); @@ -4215,7 +4942,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.op = ALU_OP2_MUL; if (i < 3) { r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); @@ -4239,7 +4966,7 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); + alu.op = ALU_OP3_MULADD; if (i < 3) { r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); @@ -4284,7 +5011,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (inst->Dst[0].Register.WriteMask & 1) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); + alu.op = ALU_OP1_FLOOR; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; @@ -4297,22 +5024,20 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.op = ALU_OP1_EXP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; - if (i == 0) - alu.dst.write = 1; - if (i == 2) - alu.last = 1; + alu.dst.write = i == 0; + alu.last = i == 2; r = r600_bytecode_add_alu(ctx->bc, &alu); if (r) return r; } } else { - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.op = ALU_OP1_EXP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -4330,7 +5055,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); + alu.op = ALU_OP1_FRACT; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; @@ -4354,7 +5079,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.op = ALU_OP1_EXP_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; @@ -4370,7 +5095,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.op = ALU_OP1_EXP_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->temp_reg; @@ -4389,7 +5114,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; @@ -4417,7 +5142,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + alu.op = ALU_OP1_LOG_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); r600_bytecode_src_set_abs(&alu.src[0]); @@ -4435,7 +5160,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + alu.op = ALU_OP1_LOG_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); r600_bytecode_src_set_abs(&alu.src[0]); @@ -4448,7 +5173,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) return r; } - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); + alu.op = ALU_OP1_FLOOR; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 0; @@ -4469,7 +5194,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + alu.op = ALU_OP1_LOG_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); r600_bytecode_src_set_abs(&alu.src[0]); @@ -4487,7 +5212,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + alu.op = ALU_OP1_LOG_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); r600_bytecode_src_set_abs(&alu.src[0]); @@ -4503,7 +5228,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); + alu.op = ALU_OP1_FLOOR; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -4519,7 +5244,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.op = ALU_OP1_EXP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -4536,7 +5261,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.op = ALU_OP1_EXP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -4553,7 +5278,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (ctx->bc->chip_class == CAYMAN) { for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.op = ALU_OP1_RECIP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -4570,7 +5295,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) } } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.op = ALU_OP1_RECIP_IEEE; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = 1; @@ -4586,7 +5311,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.op = ALU_OP2_MUL; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); r600_bytecode_src_set_abs(&alu.src[0]); @@ -4610,7 +5335,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + alu.op = ALU_OP1_LOG_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); r600_bytecode_src_set_abs(&alu.src[0]); @@ -4628,7 +5353,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) } else { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + alu.op = ALU_OP1_LOG_IEEE; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); r600_bytecode_src_set_abs(&alu.src[0]); @@ -4647,7 +5372,7 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); + alu.op = ALU_OP1_MOV; alu.src[0].sel = V_SQ_ALU_SRC_1; alu.src[0].chan = 0; @@ -4674,13 +5399,13 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx) switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: - alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; + alu.op = ALU_OP1_FLT_TO_INT_FLOOR; break; case TGSI_OPCODE_ARR: - alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + alu.op = ALU_OP1_FLT_TO_INT; break; case TGSI_OPCODE_UARL: - alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.op = ALU_OP1_MOV; break; default: assert(0); @@ -4707,7 +5432,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: memset(&alu, 0, sizeof(alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; + alu.op = ALU_OP1_FLOOR; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->bc->ar_reg; alu.dst.write = 1; @@ -4717,7 +5442,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) return r; memset(&alu, 0, sizeof(alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + alu.op = ALU_OP1_FLT_TO_INT; alu.src[0].sel = ctx->bc->ar_reg; alu.dst.sel = ctx->bc->ar_reg; alu.dst.write = 1; @@ -4728,7 +5453,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) break; case TGSI_OPCODE_ARR: memset(&alu, 0, sizeof(alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; + alu.op = ALU_OP1_FLT_TO_INT; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->bc->ar_reg; alu.dst.write = 1; @@ -4739,7 +5464,7 @@ static int tgsi_r600_arl(struct r600_shader_ctx *ctx) break; case TGSI_OPCODE_UARL: memset(&alu, 0, sizeof(alu)); - alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; + alu.op = ALU_OP1_MOV; r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); alu.dst.sel = ctx->bc->ar_reg; alu.dst.write = 1; @@ -4766,7 +5491,7 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + alu.op = ALU_OP2_MUL; tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); if (i == 0 || i == 3) { @@ -4789,14 +5514,15 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx) return 0; } -static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) +static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) { struct r600_bytecode_alu alu; int r; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = opcode; - alu.predicate = 1; + alu.op = opcode; + alu.execute_mask = 1; + alu.update_pred = 1; alu.dst.sel = ctx->temp_reg; alu.dst.write = 1; @@ -4808,7 +5534,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) alu.last = 1; - r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); + r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); if (r) return r; return 0; @@ -4821,17 +5547,17 @@ static int pops(struct r600_shader_ctx *ctx, int pops) if (!force_pop) { int alu_pop = 3; if (ctx->bc->cf_last) { - if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)) + if (ctx->bc->cf_last->op == CF_OP_ALU) alu_pop = 0; - else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER)) + else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) alu_pop = 1; } alu_pop += pops; if (alu_pop == 1) { - ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER); + ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; ctx->bc->force_add_cf = 1; } else if (alu_pop == 2) { - ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER); + ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; ctx->bc->force_add_cf = 1; } else { force_pop = 1; @@ -4839,7 +5565,7 @@ static int pops(struct r600_shader_ctx *ctx, int pops) } if (force_pop) { - r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); + r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); ctx->bc->cf_last->pop_count = pops; ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; } @@ -4847,70 +5573,114 @@ static int pops(struct r600_shader_ctx *ctx, int pops) return 0; } -static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) +static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, + unsigned reason) +{ + struct r600_stack_info *stack = &ctx->bc->stack; + unsigned elements, entries; + + unsigned entry_size = stack->entry_size; + + elements = (stack->loop + stack->push_wqm ) * entry_size; + elements += stack->push; + + switch (ctx->bc->chip_class) { + case R600: + case R700: + /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on + * the stack must be reserved to hold the current active/continue + * masks */ + if (reason == FC_PUSH_VPM) { + elements += 2; + } + break; + + case CAYMAN: + /* r9xx: any stack operation on empty stack consumes 2 additional + * elements */ + elements += 2; + + /* fallthrough */ + /* FIXME: do the two elements added above cover the cases for the + * r8xx+ below? */ + + case EVERGREEN: + /* r8xx+: 2 extra elements are not always required, but one extra + * element must be added for each of the following cases: + * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest + * stack usage. + * (Currently we don't use ALU_ELSE_AFTER.) + * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM + * PUSH instruction executed. + * + * NOTE: it seems we also need to reserve additional element in some + * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, + * then STACK_SIZE should be 2 instead of 1 */ + if (reason == FC_PUSH_VPM) { + elements += 1; + } + break; + + default: + assert(0); + break; + } + + /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 + * for all chips, so we use 4 in the final formula, not the real entry_size + * for the chip */ + entry_size = 4; + + entries = (elements + (entry_size - 1)) / entry_size; + + if (entries > stack->max_entries) + stack->max_entries = entries; +} + +static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) { switch(reason) { case FC_PUSH_VPM: - ctx->bc->callstack[ctx->bc->call_sp].current--; + --ctx->bc->stack.push; + assert(ctx->bc->stack.push >= 0); break; case FC_PUSH_WQM: + --ctx->bc->stack.push_wqm; + assert(ctx->bc->stack.push_wqm >= 0); + break; case FC_LOOP: - ctx->bc->callstack[ctx->bc->call_sp].current -= 4; + --ctx->bc->stack.loop; + assert(ctx->bc->stack.loop >= 0); break; - case FC_REP: - /* TOODO : for 16 vp asic should -= 2; */ - ctx->bc->callstack[ctx->bc->call_sp].current --; + default: + assert(0); break; } } -static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) +static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) { - if (check_max_only) { - int diff; - switch (reason) { - case FC_PUSH_VPM: - diff = 1; - break; - case FC_PUSH_WQM: - diff = 4; - break; - default: - assert(0); - diff = 0; - } - if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > - ctx->bc->callstack[ctx->bc->call_sp].max) { - ctx->bc->callstack[ctx->bc->call_sp].max = - ctx->bc->callstack[ctx->bc->call_sp].current + diff; - } - return; - } switch (reason) { case FC_PUSH_VPM: - ctx->bc->callstack[ctx->bc->call_sp].current++; + ++ctx->bc->stack.push; break; case FC_PUSH_WQM: + ++ctx->bc->stack.push_wqm; case FC_LOOP: - ctx->bc->callstack[ctx->bc->call_sp].current += 4; - break; - case FC_REP: - ctx->bc->callstack[ctx->bc->call_sp].current++; + ++ctx->bc->stack.loop; break; + default: + assert(0); } - if ((ctx->bc->callstack[ctx->bc->call_sp].current) > - ctx->bc->callstack[ctx->bc->call_sp].max) { - ctx->bc->callstack[ctx->bc->call_sp].max = - ctx->bc->callstack[ctx->bc->call_sp].current; - } + callstack_update_max_depth(ctx, reason); } static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) { struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; - sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, + sp->mid = realloc((void *)sp->mid, sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); sp->mid[sp->num_mid] = ctx->bc->cf_last; sp->num_mid++; @@ -4926,10 +5696,8 @@ static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) static void fc_poplevel(struct r600_shader_ctx *ctx) { struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; - if (sp->mid) { - free(sp->mid); - sp->mid = NULL; - } + free(sp->mid); + sp->mid = NULL; sp->num_mid = 0; sp->start = NULL; sp->type = 0; @@ -4939,14 +5707,14 @@ static void fc_poplevel(struct r600_shader_ctx *ctx) #if 0 static int emit_return(struct r600_shader_ctx *ctx) { - r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); + r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); return 0; } static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) { - r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); + r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); ctx->bc->cf_last->pop_count = pops; /* XXX work out offset */ return 0; @@ -4975,7 +5743,7 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) { emit_testflag(ctx); - r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); + r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, fc_sp); @@ -4984,21 +5752,43 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) } #endif -static int tgsi_if(struct r600_shader_ctx *ctx) +static int emit_if(struct r600_shader_ctx *ctx, int opcode) { - emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)); + int alu_type = CF_OP_ALU_PUSH_BEFORE; + + /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by + * LOOP_STARTxxx for nested loops may put the branch stack into a state + * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this + * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ + if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { + r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); + ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; + alu_type = CF_OP_ALU; + } + + emit_logic_pred(ctx, opcode, alu_type); - r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); + r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); fc_pushlevel(ctx, FC_IF); - callstack_check_depth(ctx, FC_PUSH_VPM, 0); + callstack_push(ctx, FC_PUSH_VPM); return 0; } +static int tgsi_if(struct r600_shader_ctx *ctx) +{ + return emit_if(ctx, ALU_OP2_PRED_SETNE); +} + +static int tgsi_uif(struct r600_shader_ctx *ctx) +{ + return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); +} + static int tgsi_else(struct r600_shader_ctx *ctx) { - r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); + r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); ctx->bc->cf_last->pop_count = 1; fc_set_mid(ctx, ctx->bc->fc_sp); @@ -5022,18 +5812,20 @@ static int tgsi_endif(struct r600_shader_ctx *ctx) } fc_poplevel(ctx); - callstack_decrease_current(ctx, FC_PUSH_VPM); + callstack_pop(ctx, FC_PUSH_VPM); return 0; } static int tgsi_bgnloop(struct r600_shader_ctx *ctx) { - r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); + /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not + * limited to 4096 iterations, like the other LOOP_* instructions. */ + r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); fc_pushlevel(ctx, FC_LOOP); /* check stack depth */ - callstack_check_depth(ctx, FC_LOOP, 0); + callstack_push(ctx, FC_LOOP); return 0; } @@ -5041,7 +5833,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx) { int i; - r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); + r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { R600_ERR("loop/endloop in shader code are not paired.\n"); @@ -5062,7 +5854,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx) } /* XXX add LOOPRET support */ fc_poplevel(ctx); - callstack_decrease_current(ctx, FC_LOOP); + callstack_pop(ctx, FC_LOOP); return 0; } @@ -5081,11 +5873,10 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) return -EINVAL; } - r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); + r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); fc_set_mid(ctx, fscp); - callstack_check_depth(ctx, FC_PUSH_VPM, 1); return 0; } @@ -5093,7 +5884,7 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; - int i, j, r; + int i, j, k, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); /* src0 * src1 */ @@ -5101,21 +5892,40 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + if (ctx->bc->chip_class == CAYMAN) { + for (j = 0 ; j < 4; j++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.dst.chan = i; - alu.dst.sel = ctx->temp_reg; - alu.dst.write = 1; + alu.op = ALU_OP2_MULLO_UINT; + for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { + r600_bytecode_src(&alu.src[k], &ctx->src[k], i); + } + tgsi_dst(ctx, &inst->Dst[0], j, &alu.dst); + alu.dst.sel = ctx->temp_reg; + alu.dst.write = (j == i); + if (j == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); - for (j = 0; j < 2; j++) { - r600_bytecode_src(&alu.src[j], &ctx->src[j], i); - } + alu.dst.chan = i; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; - alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.op = ALU_OP2_MULLO_UINT; + for (j = 0; j < 2; j++) { + r600_bytecode_src(&alu.src[j], &ctx->src[j], i); + } + + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } } @@ -5126,7 +5936,7 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); + alu.op = ALU_OP2_ADD_INT; alu.src[0].sel = ctx->temp_reg; alu.src[0].chan = i; @@ -5143,166 +5953,166 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) } static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, - {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, - {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, + {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_r600_arl}, + {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2}, + {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit}, /* XXX: * For state trackers other than OpenGL, we'll want to use * _RECIP_IEEE instead. */ - {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, - - {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, - {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, - {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, - {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, - {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, - {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, - {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, - {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, - {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, - {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, - {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, - {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, - {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, - {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, + + {TGSI_OPCODE_RSQ, 0, ALU_OP0_NOP, tgsi_rsq}, + {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp}, + {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log}, + {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2}, + {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2}, + {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst}, + {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2}, + {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2}, + {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap}, + {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2}, + {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, + {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, + {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, + {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {20, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, - {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, - {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, - {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, + {22, 0, ALU_OP0_NOP, tgsi_unsupported}, + {23, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, + {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2}, + {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2}, + {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow}, + {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, /* gap */ - {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, - {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, - {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, - {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, - {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ - {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, - {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, - {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, - {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, - {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, - {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, - {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, - {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, - {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, - {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, - {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, - {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, - {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, - {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, + {32, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, + {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig}, + {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, + {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, + {TGSI_OPCODE_KILP, 0, ALU_OP2_KILLGT, tgsi_kill}, /* predicated kill */ + {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2}, + {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2}, + {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig}, + {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap}, + {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2}, + {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex}, + {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_r600_arl}, + {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg}, + {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp}, + {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs}, + {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, + {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, + {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, + {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if}, + {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif}, + {76, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else}, + {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif}, /* gap */ - {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, - {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, + {79, 0, ALU_OP0_NOP, tgsi_unsupported}, + {80, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2}, + {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, + {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, + {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, + {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2_trans}, /* gap */ - {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, - {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, - {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, - {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, - {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans}, + {88, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2}, + {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2}, + {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod}, + {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2}, + {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, + {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, + {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, + {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, + {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, + {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, /* gap */ - {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, - {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, - {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, - {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, - {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, - {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, - {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, - {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, - {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, - {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {104, 0, ALU_OP0_NOP, tgsi_unsupported}, + {105, 0, ALU_OP0_NOP, tgsi_unsupported}, + {106, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {108, 0, ALU_OP0_NOP, tgsi_unsupported}, + {109, 0, ALU_OP0_NOP, tgsi_unsupported}, + {110, 0, ALU_OP0_NOP, tgsi_unsupported}, + {111, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ - {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ + {114, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ + {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */ /* gap */ - {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans}, - {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, - {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, - {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, - {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, - {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, - {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans}, - {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, - {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2}, - {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, - {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, - {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, - {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, - {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, - {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, - {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, - {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, - {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, - {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, - {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans}, - {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, - {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap}, - {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {118, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, + {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv}, + {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2}, + {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2}, + {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg}, + {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2}, + {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2_trans}, + {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap}, + {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, + {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, + {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2}, + {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv}, + {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad}, + {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2}, + {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2}, + {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod}, + {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, tgsi_op2_trans}, + {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2}, + {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2}, + {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2_trans}, + {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap}, + {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2_swap}, + {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, @@ -5315,168 +6125,187 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl}, - {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_r600_arl}, + {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp}, {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, - {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, + {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, + {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported}, }; static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, - {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, - {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, - {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, - {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, - {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, - {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, - {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, - {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, - {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, - {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, - {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, - {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, - {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, - {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, - {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, - {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_eg_arl}, + {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2}, + {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit}, + {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_RSQ, 0, ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, + {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp}, + {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log}, + {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2}, + {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2}, + {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst}, + {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2}, + {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2}, + {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap}, + {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2}, + {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, + {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, + {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, + {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {20, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, - {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, - {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, - {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, - {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, + {22, 0, ALU_OP0_NOP, tgsi_unsupported}, + {23, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, + {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2}, + {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2}, + {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, + {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow}, + {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, /* gap */ - {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, - {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, - {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, - {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, - {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ - {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, - {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, - {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, - {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, - {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, - {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, - {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, - {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, - {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, - {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, - {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, - {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, - {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, - {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, + {32, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, + {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig}, + {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, + {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, + {TGSI_OPCODE_KILP, 0, ALU_OP2_KILLGT, tgsi_kill}, /* predicated kill */ + {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2}, + {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2}, + {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig}, + {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap}, + {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2}, + {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex}, + {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl}, + {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg}, + {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp}, + {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs}, + {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, + {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, + {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, + {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if}, + {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif}, + {76, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else}, + {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif}, /* gap */ - {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, - {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, + {79, 0, ALU_OP0_NOP, tgsi_unsupported}, + {80, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2}, + {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, + {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, + {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, + {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2}, /* gap */ - {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, - {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, - {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, - {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, - {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2}, + {88, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2}, + {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2}, + {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod}, + {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2}, + {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, + {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, + {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, + {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, + {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, + {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, /* gap */ - {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, - {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, - {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, - {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, - {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, - {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, - {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, - {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, - {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, - {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {104, 0, ALU_OP0_NOP, tgsi_unsupported}, + {105, 0, ALU_OP0_NOP, tgsi_unsupported}, + {106, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {108, 0, ALU_OP0_NOP, tgsi_unsupported}, + {109, 0, ALU_OP0_NOP, tgsi_unsupported}, + {110, 0, ALU_OP0_NOP, tgsi_unsupported}, + {111, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ - {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ + {114, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ + {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */ /* gap */ - {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i}, - {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, - {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, - {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, - {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, - {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, - {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2}, - {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, - {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i}, - {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, - {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, - {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, - {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, - {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, - {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, - {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, - {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, - {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, - {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, - {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2}, - {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, - {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, - {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {118, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_f2i}, + {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv}, + {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2}, + {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2}, + {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg}, + {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2}, + {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2}, + {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap}, + {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_f2i}, + {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, + {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2}, + {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv}, + {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad}, + {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2}, + {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2}, + {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod}, + {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, tgsi_op2_trans}, + {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2}, + {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2}, + {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2}, + {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap}, + {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2}, + {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, @@ -5489,168 +6318,187 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, - {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_eg_arl}, + {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp}, {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, - {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, + {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, + {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported}, }; static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { - {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, - {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, - {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, - {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, - {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, - {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, - {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, - {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, - {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, - {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, - {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, - {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, - {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, - {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, - {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, - {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, - {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, - {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_eg_arl}, + {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2}, + {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit}, + {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_RSQ, 0, ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp}, + {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log}, + {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2}, + {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2}, + {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst}, + {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2}, + {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2}, + {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap}, + {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2}, + {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, + {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, + {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, + {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {20, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, - {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, - {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, - {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, - {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, - {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, - {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, + {22, 0, ALU_OP0_NOP, tgsi_unsupported}, + {23, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, + {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2}, + {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2}, + {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, cayman_pow}, + {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, /* gap */ - {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, - {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, - {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, - {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, - {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ - {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, - {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, - {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, - {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, - {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, - {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, - {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, - {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, - {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, - {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, - {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, - {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, - {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, - {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, - {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, - {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, + {32, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, + {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_COS, 0, ALU_OP1_COS, cayman_trig}, + {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, + {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, + {TGSI_OPCODE_KILP, 0, ALU_OP2_KILLGT, tgsi_kill}, /* predicated kill */ + {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2}, + {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2}, + {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, cayman_trig}, + {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap}, + {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2}, + {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex}, + {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl}, + {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg}, + {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp}, + {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs}, + {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, + {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp}, + {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, + {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, + {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if}, + {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif}, + {76, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else}, + {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif}, /* gap */ - {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, - {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, + {79, 0, ALU_OP0_NOP, tgsi_unsupported}, + {80, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2}, + {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2}, + {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, + {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, + {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2}, /* gap */ - {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, - {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2}, - {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, - {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, - {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2}, + {88, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2}, + {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2}, + {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod}, + {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2}, + {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, + {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, + {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, + {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, + {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, + {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, /* gap */ - {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, - {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, - {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, - {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, - {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, - {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, - {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, - {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, - {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, - {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {104, 0, ALU_OP0_NOP, tgsi_unsupported}, + {105, 0, ALU_OP0_NOP, tgsi_unsupported}, + {106, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {108, 0, ALU_OP0_NOP, tgsi_unsupported}, + {109, 0, ALU_OP0_NOP, tgsi_unsupported}, + {110, 0, ALU_OP0_NOP, tgsi_unsupported}, + {111, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, /* gap */ - {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ - {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ + {114, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KIL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ + {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */ /* gap */ - {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2}, - {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, - {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, - {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, - {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, - {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, - {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2}, - {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, - {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2}, - {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2}, - {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, - {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, - {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, - {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, - {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, - {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, - {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT, cayman_mul_int_instr}, - {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, - {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, - {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2}, - {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, - {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, - {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {118, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2}, + {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv}, + {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2}, + {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2}, + {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg}, + {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2}, + {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2}, + {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap}, + {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_op2}, + {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2}, + {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2}, + {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv}, + {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad}, + {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2}, + {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2}, + {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod}, + {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_INT, cayman_mul_int_instr}, + {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2}, + {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2}, + {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2}, + {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap}, + {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2}, + {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, @@ -5663,9 +6511,28 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, - {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_eg_arl}, + {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp}, {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, - {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, + {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, + {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported}, };