* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_dump.h"
#include "r600_pipe.h"
#include "r600_asm.h"
#include "r600_sq.h"
+#include "r600_formats.h"
#include "r600_opcodes.h"
#include "r600d.h"
#include <stdio.h>
#include <errno.h>
-
-static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_state *rstate = &shader->rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned spi_vs_out_id[10];
- unsigned i, tmp;
-
- /* clear previous register */
- rstate->nregs = 0;
-
- /* so far never got proper semantic id from tgsi */
- for (i = 0; i < 10; i++) {
- spi_vs_out_id[i] = 0;
- }
- for (i = 0; i < 32; i++) {
- tmp = i << ((i & 3) * 8);
- spi_vs_out_id[i / 4] |= tmp;
- }
- for (i = 0; i < 10; i++) {
- r600_pipe_state_add_reg(rstate,
- R_028614_SPI_VS_OUT_ID_0 + i * 4,
- spi_vs_out_id[i], 0xFFFFFFFF, NULL);
- }
-
- r600_pipe_state_add_reg(rstate,
- R_0286C4_SPI_VS_OUT_CONFIG,
- S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028868_SQ_PGM_RESOURCES_VS,
- S_028868_NUM_GPRS(rshader->bc.ngpr) |
- S_028868_STACK_SIZE(rshader->bc.nstack),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_0288A4_SQ_PGM_RESOURCES_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_0288D0_SQ_PGM_CF_OFFSET_VS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_0288DC_SQ_PGM_CF_OFFSET_FS,
- 0x00000000, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028858_SQ_PGM_START_VS,
- r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate,
- R_028894_SQ_PGM_START_FS,
- r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch);
-
- r600_pipe_state_add_reg(rstate,
- R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
- 0xFFFFFFFF, NULL);
-
-}
+#include <byteswap.h>
int r600_find_vs_semantic_index(struct r600_shader *vs,
struct r600_shader *ps, int id)
return 0;
}
-static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_pipe_state *rstate = &shader->rstate;
- struct r600_shader *rshader = &shader->shader;
- unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1;
- int pos_index = -1, face_index = -1;
-
- /* clear previous register */
- rstate->nregs = 0;
-
- for (i = 0; i < rshader->ninput; i++) {
- tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i));
- if (rshader->input[i].centroid)
- tmp |= S_028644_SEL_CENTROID(1);
- if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR)
- tmp |= S_028644_SEL_LINEAR(1);
-
- if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
- pos_index = i;
- if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
- rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
- tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
- }
- if (rshader->input[i].name == TGSI_SEMANTIC_FACE)
- face_index = i;
- if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC &&
- rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) {
- tmp |= S_028644_PT_SPRITE_TEX(1);
- }
- r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL);
- }
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_Z_EXPORT_ENABLE(1),
- S_02880C_Z_EXPORT_ENABLE(1), NULL);
- if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_STENCIL_REF_EXPORT_ENABLE(1),
- S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL);
- }
-
- exports_ps = 0;
- num_cout = 0;
- for (i = 0; i < rshader->noutput; i++) {
- if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL)
- exports_ps |= 1;
- else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
- num_cout++;
- }
- }
- exports_ps |= S_028854_EXPORT_COLORS(num_cout);
- if (!exports_ps) {
- /* always at least export 1 component per pixel */
- exports_ps = 2;
- }
-
- spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) |
- S_0286CC_PERSP_GRADIENT_ENA(1);
- spi_input_z = 0;
- if (pos_index != -1) {
- spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) |
- S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) |
- S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) |
- S_0286CC_BARYC_SAMPLE_CNTL(1));
- spi_input_z |= 1;
- }
-
- spi_ps_in_control_1 = 0;
- if (face_index != -1) {
- spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) |
- S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr);
- }
-
- r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028840_SQ_PGM_START_PS,
- r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo);
- r600_pipe_state_add_reg(rstate,
- R_028850_SQ_PGM_RESOURCES_PS,
- S_028868_NUM_GPRS(rshader->bc.ngpr) |
- S_028868_STACK_SIZE(rshader->bc.nstack),
- 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_028854_SQ_PGM_EXPORTS_PS,
- exports_ps, 0xFFFFFFFF, NULL);
- r600_pipe_state_add_reg(rstate,
- R_0288CC_SQ_PGM_CF_OFFSET_PS,
- 0x00000000, 0xFFFFFFFF, NULL);
-
- if (rshader->uses_kill) {
- /* only set some bits here, the other bits are set in the dsa state */
- r600_pipe_state_add_reg(rstate,
- R_02880C_DB_SHADER_CONTROL,
- S_02880C_KILL_ENABLE(1),
- S_02880C_KILL_ENABLE(1), NULL);
- }
- r600_pipe_state_add_reg(rstate,
- R_03E200_SQ_LOOP_CONST_0, 0x01000FFF,
- 0xFFFFFFFF, NULL);
-}
-
static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_shader *rshader = &shader->shader;
- void *ptr;
+ uint32_t *ptr;
+ int i;
/* copy new shader */
- if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) {
- shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0);
- if (shader->bo_fetch == NULL) {
- return -ENOMEM;
- }
- ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL);
- memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4);
- r600_bo_unmap(rctx->radeon, shader->bo_fetch);
- }
if (shader->bo == NULL) {
shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0);
if (shader->bo == NULL) {
return -ENOMEM;
}
- ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
- memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4);
+ ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL);
+ if (R600_BIG_ENDIAN) {
+ for (i = 0; i < rshader->bc.ndw; ++i) {
+ ptr[i] = bswap_32(rshader->bc.bytecode[i]);
+ }
+ } else {
+ memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr));
+ }
r600_bo_unmap(rctx->radeon, shader->bo);
}
/* build state */
- rshader->flat_shade = rctx->flatshade;
switch (rshader->processor_type) {
case TGSI_PROCESSOR_VERTEX:
if (rshader->family >= CHIP_CEDAR) {
default:
return -EINVAL;
}
- r600_context_pipe_state_set(&rctx->ctx, &shader->rstate);
return 0;
}
-static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- struct r600_shader *shader = &rshader->shader;
- const struct util_format_description *desc;
- enum pipe_format resource_format[160];
- unsigned i, nresources = 0;
- struct r600_bc *bc = &shader->bc_fetch;
- struct r600_bc_cf *cf;
- struct r600_bc_vtx *vtx;
-
- if (shader->processor_type != TGSI_PROCESSOR_VERTEX)
- return 0;
- /* doing a full memcmp fell over the refcount */
- if ((rshader->vertex_elements.count == rctx->vertex_elements->count) &&
- (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements,
- rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) {
- return 0;
- }
- rshader->vertex_elements = *rctx->vertex_elements;
- for (i = 0; i < rctx->vertex_elements->count; i++) {
- resource_format[nresources++] = rctx->vertex_elements->hw_format[i];
- }
- r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL);
- LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
- switch (cf->inst) {
- case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
- case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
- LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
- desc = util_format_description(resource_format[vtx->buffer_id]);
- if (desc == NULL) {
- R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]);
- return -EINVAL;
- }
- vtx->dst_sel_x = desc->swizzle[0];
- vtx->dst_sel_y = desc->swizzle[1];
- vtx->dst_sel_z = desc->swizzle[2];
- vtx->dst_sel_w = desc->swizzle[3];
- }
- break;
- default:
- break;
- }
- }
- return r600_bc_build(&shader->bc_fetch);
-}
-
-int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader)
-{
- struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- int r;
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
- if (shader == NULL)
- return -EINVAL;
- /* there should be enough input */
- if (rctx->vertex_elements->count < shader->shader.bc.nresource) {
- R600_ERR("%d resources provided, expecting %d\n",
- rctx->vertex_elements->count, shader->shader.bc.nresource);
- return -EINVAL;
- }
- r = r600_shader_update(ctx, shader);
- if (r)
- return r;
- return r600_pipe_shader(ctx, shader);
-}
-
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
{
+ static int dump_shaders = -1;
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
int r;
-//fprintf(stderr, "--------------------------------------------------------------\n");
-//tgsi_dump(tokens, 0);
+ /* Would like some magic "get_bool_option_once" routine.
+ */
+ if (dump_shaders == -1)
+ dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE);
+
+ if (dump_shaders) {
+ fprintf(stderr, "--------------------------------------------------------------\n");
+ tgsi_dump(tokens, 0);
+ }
shader->shader.family = r600_get_family(rctx->radeon);
r = r600_shader_from_tgsi(tokens, &shader->shader);
if (r) {
R600_ERR("building bytecode failed !\n");
return r;
}
- if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
- r = r600_bc_build(&shader->shader.bc_fetch);
- if (r) {
- R600_ERR("building bytecode failed !\n");
- return r;
- }
+ if (dump_shaders) {
+ r600_bc_dump(&shader->shader.bc);
+ fprintf(stderr, "______________________________________________________________\n");
}
-//r600_bc_dump(&shader->shader.bc);
-//fprintf(stderr, "______________________________________________________________\n");
- return 0;
+ return r600_pipe_shader(ctx, shader);
}
-void
-r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
+void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
{
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
- if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) {
- r600_bo_reference(rctx->radeon, &shader->bo_fetch, NULL);
- r600_bc_clear(&shader->shader.bc_fetch);
- }
-
r600_bo_reference(rctx->radeon, &shader->bo, NULL);
-
r600_bc_clear(&shader->shader.bc);
-
- /* FIXME: is there more stuff to free? */
}
/*
*/
struct r600_shader_tgsi_instruction;
+struct r600_shader_src {
+ unsigned sel;
+ unsigned swizzle[4];
+ unsigned neg;
+ unsigned abs;
+ unsigned rel;
+ uint32_t value[4];
+};
+
struct r600_shader_ctx {
struct tgsi_shader_info info;
struct tgsi_parse_context parse;
unsigned type;
unsigned file_offset[TGSI_FILE_COUNT];
unsigned temp_reg;
+ unsigned ar_reg;
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bc *bc;
- struct r600_bc *bc_fetch;
struct r600_shader *shader;
- u32 value[4];
+ struct r600_shader_src src[3];
u32 *literals;
u32 nliterals;
u32 max_driver_temp_used;
static int tgsi_declaration(struct r600_shader_ctx *ctx)
{
struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration;
- struct r600_bc_vtx vtx;
unsigned i;
int r;
ctx->shader->input[i].interpolate = d->Declaration.Interpolate;
ctx->shader->input[i].centroid = d->Declaration.Centroid;
ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i;
- if (ctx->type == TGSI_PROCESSOR_VERTEX) {
- /* turn input into fetch */
- memset(&vtx, 0, sizeof(struct r600_bc_vtx));
- vtx.inst = 0;
- vtx.fetch_type = 0;
- vtx.buffer_id = i;
- /* register containing the index into the buffer */
- vtx.src_gpr = 0;
- vtx.src_sel_x = 0;
- vtx.mega_fetch_count = 0x1F;
- vtx.dst_gpr = ctx->shader->input[i].gpr;
- vtx.dst_sel_x = 0;
- vtx.dst_sel_y = 1;
- vtx.dst_sel_z = 2;
- vtx.dst_sel_w = 3;
- vtx.use_const_fields = 1;
- r = r600_bc_add_vtx(ctx->bc_fetch, &vtx);
- if (r)
- return r;
- }
if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) {
/* turn input into interpolate on EG */
if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) {
case TGSI_FILE_SAMPLER:
case TGSI_FILE_ADDRESS:
break;
+
+ case TGSI_FILE_SYSTEM_VALUE:
+ if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+ struct r600_bc_alu alu;
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT);
+ alu.src[0].sel = 0;
+ alu.src[0].chan = 3;
+
+ alu.dst.sel = 0;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+ break;
+ }
+
default:
R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
return -EINVAL;
return ctx->num_interp_gpr;
}
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
+static void tgsi_src(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_src_register *tgsi_src,
+ struct r600_shader_src *r600_src)
+{
+ memset(r600_src, 0, sizeof(*r600_src));
+ r600_src->swizzle[0] = tgsi_src->Register.SwizzleX;
+ r600_src->swizzle[1] = tgsi_src->Register.SwizzleY;
+ r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ;
+ r600_src->swizzle[3] = tgsi_src->Register.SwizzleW;
+ r600_src->neg = tgsi_src->Register.Negate;
+ r600_src->abs = tgsi_src->Register.Absolute;
+
+ if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
+ int index;
+ if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
+ (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
+
+ index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
+ r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+ if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
+ return;
+ }
+ index = tgsi_src->Register.Index;
+ r600_src->sel = V_SQ_ALU_SRC_LITERAL;
+ memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
+ } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
+ /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */
+ r600_src->swizzle[0] = 3;
+ r600_src->swizzle[1] = 3;
+ r600_src->swizzle[2] = 3;
+ r600_src->swizzle[3] = 3;
+ r600_src->sel = 0;
+ } else {
+ if (tgsi_src->Register.Indirect)
+ r600_src->rel = V_SQ_REL_RELATIVE;
+ r600_src->sel = tgsi_src->Register.Index;
+ r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+ }
+}
+
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg)
+{
+ struct r600_bc_vtx vtx;
+ unsigned int ar_reg;
+ int r;
+
+ if (offset) {
+ struct r600_bc_alu alu;
+
+ memset(&alu, 0, sizeof(alu));
+
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
+ alu.src[0].sel = ctx->ar_reg;
+
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = offset;
+
+ alu.dst.sel = dst_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+
+ ar_reg = dst_reg;
+ } else {
+ ar_reg = ctx->ar_reg;
+ }
+
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ vtx.src_gpr = ar_reg;
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = dst_reg;
+ vtx.dst_sel_x = 0; /* SEL_X */
+ vtx.dst_sel_y = 1; /* SEL_Y */
+ vtx.dst_sel_z = 2; /* SEL_Z */
+ vtx.dst_sel_w = 3; /* SEL_W */
+ vtx.data_format = FMT_32_32_32_32_FLOAT;
+ vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
+ vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
+ vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+ vtx.endian = r600_endian_swap(32);
+
+ if ((r = r600_bc_add_vtx(ctx->bc, &vtx)))
+ return r;
+
+ return 0;
+}
+
+static int tgsi_split_constant(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nconst, r;
+
+ for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
+ nconst++;
+ }
+ tgsi_src(ctx, &inst->Src[i], &ctx->src[i]);
+ }
+ for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) {
+ continue;
+ }
+
+ if (ctx->src[i].rel) {
+ int treg = r600_get_temp(ctx);
+ if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg)))
+ return r;
+
+ ctx->src[i].sel = treg;
+ ctx->src[i].rel = 0;
+ j--;
+ } else if (j > 0) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = ctx->src[i].sel;
+ alu.src[0].chan = k;
+ alu.src[0].rel = ctx->src[i].rel;
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ctx->src[i].sel = treg;
+ ctx->src[i].rel =0;
+ j--;
+ }
+ }
+ return 0;
+}
+
+/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
+static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bc_alu alu;
+ int i, j, k, nliteral, r;
+
+ for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
+ if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ nliteral++;
+ }
+ }
+ for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
+ if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+ int treg = r600_get_temp(ctx);
+ for (k = 0; k < 4; k++) {
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+ alu.src[0].sel = ctx->src[i].sel;
+ alu.src[0].chan = k;
+ alu.src[0].value = ctx->src[i].value[k];
+ alu.dst.sel = treg;
+ alu.dst.chan = k;
+ alu.dst.write = 1;
+ if (k == 3)
+ alu.last = 1;
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ ctx->src[i].sel = treg;
+ j--;
+ }
+ }
+ return 0;
+}
+
+static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
{
struct tgsi_full_immediate *immediate;
+ struct tgsi_full_property *property;
struct r600_shader_ctx ctx;
struct r600_bc_output output[32];
unsigned output_done, noutput;
int i, r = 0, pos0;
ctx.bc = &shader->bc;
- ctx.bc_fetch = &shader->bc_fetch;
ctx.shader = shader;
r = r600_bc_init(ctx.bc, shader->family);
if (r)
tgsi_parse_init(&ctx.parse, tokens);
ctx.type = ctx.parse.FullHeader.Processor.Processor;
shader->processor_type = ctx.type;
- if (shader->processor_type == TGSI_PROCESSOR_VERTEX) {
- r = r600_bc_init(ctx.bc_fetch, shader->family);
- if (r)
- return r;
- ctx.bc_fetch->type = -1;
- }
ctx.bc->type = shader->processor_type;
/* register allocations */
/* Values [0,127] correspond to GPR[0..127].
* Values [128,159] correspond to constant buffer bank 0
* Values [160,191] correspond to constant buffer bank 1
- * Values [256,511] correspond to cfile constants c[0..255].
+ * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG)
+ * Values [256,287] correspond to constant buffer bank 2 (EG)
+ * Values [288,319] correspond to constant buffer bank 3 (EG)
* Other special values are shown in the list below.
* 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+)
* 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+)
ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
ctx.info.file_count[TGSI_FILE_OUTPUT];
- ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
+ /* Outside the GPR range. This will be translated to one of the
+ * kcache banks later. */
+ ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
- ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
- ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
+ ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
+ ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_count[TGSI_FILE_TEMPORARY];
+ ctx.temp_reg = ctx.ar_reg + 1;
ctx.nliterals = 0;
ctx.literals = NULL;
-
+ shader->fs_write_all = FALSE;
while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
tgsi_parse_token(&ctx.parse);
switch (ctx.parse.FullToken.Token.Type) {
ctx.max_driver_temp_used = 0;
/* reserve first tmp for everyone */
r600_get_temp(&ctx);
+
opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
+ if ((r = tgsi_split_constant(&ctx)))
+ goto out_err;
+ if ((r = tgsi_split_literal_constant(&ctx)))
+ goto out_err;
if (ctx.bc->chiprev == CHIPREV_EVERGREEN)
ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
else
r = ctx.inst_info->process(&ctx);
if (r)
goto out_err;
- r = r600_bc_add_literal(ctx.bc, ctx.value);
- if (r)
- goto out_err;
+ break;
+ case TGSI_TOKEN_TYPE_PROPERTY:
+ property = &ctx.parse.FullToken.FullProperty;
+ if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
+ if (property->u[0].Data == 1)
+ shader->fs_write_all = TRUE;
+ }
break;
default:
R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type);
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].barrier = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = i - pos0;
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].barrier = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = 0;
output[0].swizzle_y = 7;
output[0].swizzle_z = 7;
output[0].swizzle_w = 7;
+ output[0].burst_count = 1;
output[0].barrier = 1;
output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[0].array_base = 0;
output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
}
}
- /* add return to fetch shader */
- if (ctx.type == TGSI_PROCESSOR_VERTEX) {
- if (ctx.bc->chiprev == CHIPREV_EVERGREEN) {
- r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
- } else {
- r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
- }
- }
/* add output to bytecode */
for (i = 0; i < noutput; i++) {
r = r600_bc_add_output(ctx.bc, &output[i]);
static int tgsi_unsupported(struct r600_shader_ctx *ctx)
{
- R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode);
+ R600_ERR("%s tgsi opcode unsupported\n",
+ tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode));
return -EINVAL;
}
return 0;
}
-static int tgsi_src(struct r600_shader_ctx *ctx,
- const struct tgsi_full_src_register *tgsi_src,
- struct r600_bc_alu_src *r600_src)
+static void r600_bc_src(struct r600_bc_alu_src *bc_src,
+ const struct r600_shader_src *shader_src,
+ unsigned chan)
{
- int index;
- memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
- r600_src->sel = tgsi_src->Register.Index;
- if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
- r600_src->sel = 0;
- index = tgsi_src->Register.Index;
- ctx->value[0] = ctx->literals[index * 4 + 0];
- ctx->value[1] = ctx->literals[index * 4 + 1];
- ctx->value[2] = ctx->literals[index * 4 + 2];
- ctx->value[3] = ctx->literals[index * 4 + 3];
- }
- if (tgsi_src->Register.Indirect)
- r600_src->rel = V_SQ_REL_RELATIVE;
- r600_src->neg = tgsi_src->Register.Negate;
- r600_src->abs = tgsi_src->Register.Absolute;
- r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
- return 0;
+ bc_src->sel = shader_src->sel;
+ bc_src->chan = shader_src->swizzle[chan];
+ bc_src->neg = shader_src->neg;
+ bc_src->abs = shader_src->abs;
+ bc_src->rel = shader_src->rel;
+ bc_src->value = shader_src->value[bc_src->chan];
}
-static int tgsi_dst(struct r600_shader_ctx *ctx,
- const struct tgsi_full_dst_register *tgsi_dst,
- unsigned swizzle,
- struct r600_bc_alu_dst *r600_dst)
+static void tgsi_dst(struct r600_shader_ctx *ctx,
+ const struct tgsi_full_dst_register *tgsi_dst,
+ unsigned swizzle,
+ struct r600_bc_alu_dst *r600_dst)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
if (inst->Instruction.Saturate) {
r600_dst->clamp = 1;
}
- return 0;
}
-static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle)
+static int tgsi_last_instruction(unsigned writemask)
{
- switch (swizzle) {
- case 0:
- return tgsi_src->Register.SwizzleX;
- case 1:
- return tgsi_src->Register.SwizzleY;
- case 2:
- return tgsi_src->Register.SwizzleZ;
- case 3:
- return tgsi_src->Register.SwizzleW;
- default:
- return 0;
- }
-}
-
-static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, k, nconst, r;
+ int i, lasti = 0;
- for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
- nconst++;
- }
- r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]);
- if (r) {
- return r;
- }
- }
- for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) {
- int treg = r600_get_temp(ctx);
- for (k = 0; k < 4; k++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[i].sel;
- alu.src[0].chan = k;
- alu.src[0].rel = r600_src[i].rel;
- alu.dst.sel = treg;
- alu.dst.chan = k;
- alu.dst.write = 1;
- if (k == 3)
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- r600_src[i].sel = treg;
- r600_src[i].rel =0;
- j--;
- }
- }
- return 0;
-}
-
-/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
-static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
-{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu alu;
- int i, j, k, nliteral, r;
-
- for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
- nliteral++;
- }
- }
- for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
- if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
- int treg = r600_get_temp(ctx);
- for (k = 0; k < 4; k++) {
- memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = r600_src[i].sel;
- alu.src[0].chan = k;
- alu.dst.sel = treg;
- alu.dst.chan = k;
- alu.dst.write = 1;
- if (k == 3)
- alu.last = 1;
- r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
- r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
- if (r)
- return r;
- r600_src[i].sel = treg;
- j--;
+ for (i = 0; i < 4; i++) {
+ if (writemask & (1 << i)) {
+ lasti = i;
}
}
- return 0;
+ return lasti;
}
static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
- int lasti = 0;
-
- for (i = 0; i < 4; i++) {
- if (inst->Dst[0].Register.WriteMask & (1 << i)) {
- lasti = i;
- }
- }
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bc_alu));
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.inst = ctx->inst_info->r600_opcode;
if (!swap) {
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
} else {
- alu.src[0] = r600_src[1];
- alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
-
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
}
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
break;
case TGSI_OPCODE_ABS:
alu.src[0].abs = 1;
+ if (alu.src[0].neg)
+ alu.src[0].neg = 0;
break;
default:
break;
* r700 - normalize by dividing by 2PI
* see fdo bug 27901
*/
-static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
- struct r600_bc_alu_src r600_src[3])
+static int tgsi_setup_trig(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ static float half_inv_pi = 1.0 /(3.1415926535 * 2);
+ static float double_pi = 3.1415926535 * 2;
+ static float neg_pi = -3.1415926535;
+
int r;
- uint32_t lit_vals[4];
struct r600_bc_alu alu;
- memset(lit_vals, 0, 4*4);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
- lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
- lit_vals[1] = fui(0.5f);
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].chan = 0;
- alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[2].chan = 1;
+ alu.src[1].value = *(uint32_t *)&half_inv_pi;
+ alu.src[2].sel = V_SQ_ALU_SRC_0_5;
+ alu.src[2].chan = 0;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- r = r600_bc_add_literal(ctx->bc, lit_vals);
if (r)
return r;
if (r)
return r;
- if (ctx->bc->chiprev == CHIPREV_R600) {
- lit_vals[0] = fui(3.1415926535897f * 2.0f);
- lit_vals[1] = fui(-3.1415926535897f);
- } else {
- lit_vals[0] = fui(1.0f);
- lit_vals[1] = fui(-0.5f);
- }
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].chan = 0;
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
- alu.src[2].chan = 1;
+ alu.src[2].chan = 0;
+
+ if (ctx->bc->chiprev == CHIPREV_R600) {
+ alu.src[1].value = *(uint32_t *)&double_pi;
+ alu.src[2].value = *(uint32_t *)&neg_pi;
+ } else {
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ alu.src[2].sel = V_SQ_ALU_SRC_0_5;
+ alu.src[2].neg = 1;
+ }
+
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- r = r600_bc_add_literal(ctx->bc, lit_vals);
if (r)
return r;
return 0;
static int tgsi_trig(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, r;
- int lasti = 0;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_setup_trig(ctx, r600_src);
+ r = tgsi_setup_trig(ctx);
if (r)
return r;
return r;
/* replicate result */
- for (i = 0; i < 4; i++) {
- if (inst->Dst[0].Register.WriteMask & (1 << i))
- lasti = i;
- }
for (i = 0; i < lasti + 1; i++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
continue;
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = ctx->temp_reg;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (i == lasti)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
static int tgsi_scs(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int r;
* X or Y components of the destination vector.
*/
if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) {
- r = tgsi_setup_trig(ctx, r600_src);
+ r = tgsi_setup_trig(ctx);
if (r)
return r;
}
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS);
- r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN);
- r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = 0;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* dst.w = 1.0; */
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return 0;
static int tgsi_kill(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int i, r;
alu.src[1].sel = V_SQ_ALU_SRC_1;
alu.src[1].neg = 1;
} else {
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
}
if (i == 3) {
alu.last = 1;
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* kill must be last in ALU */
ctx->bc->force_add_cf = 1;
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- struct r600_bc_alu_src r600_src[3];
int r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* dst.x, <- 1.0 */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
/* dst.y = max(src.x, 0.0) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX);
- alu.src[0] = r600_src[0];
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/
alu.src[1].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
if (inst->Dst[0].Register.WriteMask & (1 << 2))
{
int chan;
/* dst.z = log(src.y) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 1);
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
chan = alu.dst.chan;
sel = alu.dst.sel;
/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
alu.src[1].sel = sel;
alu.src[1].chan = chan;
- alu.src[2] = r600_src[0];
- alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[2], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
alu.dst.write = 1;
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* dst.z = exp(tmp.x) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
- r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED);
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
- if (r)
- return r;
- alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
+ r600_bc_src(&alu.src[i], &ctx->src[i], 0);
alu.src[i].abs = 1;
}
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
if (r)
return r;
/* replicate result */
alu.src[0].sel = ctx->temp_reg;
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
alu.dst.chan = i;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
if (i == 3)
alu.last = 1;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]);
- if (r)
- return r;
- alu.src[i].chan = tgsi_chan(&inst->Src[i], 0);
+ r600_bc_src(&alu.src[i], &ctx->src[i], 0);
}
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
if (r)
return r;
/* replicate result */
static int tgsi_pow(struct r600_shader_ctx *ctx)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
/* LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
if (r)
return r;
/* b * LOG2(a) */
memset(&alu, 0, sizeof(struct r600_bc_alu));
- alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE);
- r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[1], 0);
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
+ r600_bc_src(&alu.src[0], &ctx->src[1], 0);
alu.src[1].sel = ctx->temp_reg;
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
if (r)
return r;
/* POW(a,b) = EXP2(b * LOG2(a))*/
alu.dst.write = 1;
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- r = r600_bc_add_literal(ctx->bc,ctx->value);
if (r)
return r;
return tgsi_helper_tempx_replicate(ctx);
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
- struct r600_bc_alu_src r600_src[3];
int i, r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
/* tmp = (src > 0 ? 1 : src) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
-
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
alu.src[1].sel = V_SQ_ALU_SRC_1;
+ r600_bc_src(&alu.src[2], &ctx->src[0], i);
- alu.src[2] = r600_src[0];
- alu.src[2].chan = tgsi_chan(&inst->Src[0], i);
if (i == 3)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* dst = (-tmp > 0 ? -1 : tmp) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT);
alu.is_op3 = 1;
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
{
struct r600_bc_alu alu;
int i, r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
+
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
alu.dst.chan = i;
} else {
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
}
static int tgsi_op3(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
- /* do it in 2 step as op3 doesn't support writemask */
- for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
- alu.dst.sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_dp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
int i, j, r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = ctx->inst_info->r600_opcode;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
- alu.src[j] = r600_src[j];
- alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
+ r600_bc_src(&alu.src[j], &ctx->src[j], i);
}
- alu.dst.sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
- alu.dst.write = 1;
+ alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
/* handle some special cases */
switch (ctx->inst_info->tgsi_opcode) {
case TGSI_OPCODE_DP2:
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_tex(struct r600_shader_ctx *ctx)
{
+ static float one_point_five = 1.5f;
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_tex tex;
struct r600_bc_alu alu;
unsigned src_gpr;
int r, i;
int opcode;
- boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
- uint32_t lit_vals[4];
+ /* Texture fetch instructions can only use gprs as source.
+ * Also they cannot negate the source or take the absolute value */
+ const boolean src_requires_loading =
+ (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
+ inst->Src[0].Register.File != TGSI_FILE_INPUT) ||
+ ctx->src[0].neg || ctx->src[0].abs;
+ boolean src_loaded = FALSE;
src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
/* Add perspective divide */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 3);
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 3;
alu.last = 1;
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 3;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
alu.dst.write = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- src_not_temp = FALSE;
+ src_loaded = TRUE;
src_gpr = ctx->temp_reg;
}
if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
- int src_chan, src2_chan;
+ static const unsigned src0_swizzle[] = {2, 2, 0, 1};
+ static const unsigned src1_swizzle[] = {1, 0, 2, 2};
/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE);
- switch (i) {
- case 0:
- src_chan = 2;
- src2_chan = 1;
- break;
- case 1:
- src_chan = 2;
- src2_chan = 0;
- break;
- case 2:
- src_chan = 0;
- src2_chan = 2;
- break;
- case 3:
- src_chan = 1;
- src2_chan = 2;
- break;
- default:
- assert(0);
- src_chan = 0;
- src2_chan = 0;
- break;
- }
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
+ r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
+ r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
+ alu.src[2].value = *(uint32_t *)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[2].chan = 0;
+ alu.src[2].value = *(uint32_t *)&one_point_five;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
if (r)
return r;
- lit_vals[0] = fui(1.5f);
-
- r = r600_bc_add_literal(ctx->bc, lit_vals);
- if (r)
- return r;
- src_not_temp = FALSE;
+ src_loaded = TRUE;
src_gpr = ctx->temp_reg;
}
- if (src_not_temp) {
+ if (src_requires_loading && !src_loaded) {
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
- alu.src[0].sel = src_gpr;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
if (i == 3)
if (r)
return r;
}
+ src_loaded = TRUE;
src_gpr = ctx->temp_reg;
}
memset(&tex, 0, sizeof(struct r600_bc_tex));
tex.inst = opcode;
tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
- tex.resource_id = tex.sampler_id;
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
- tex.src_sel_x = 0;
- tex.src_sel_y = 1;
- tex.src_sel_z = 2;
- tex.src_sel_w = 3;
+ if (src_loaded) {
+ tex.src_sel_x = 0;
+ tex.src_sel_y = 1;
+ tex.src_sel_z = 2;
+ tex.src_sel_w = 3;
+ } else {
+ tex.src_sel_x = ctx->src[0].swizzle[0];
+ tex.src_sel_y = ctx->src[0].swizzle[1];
+ tex.src_sel_z = ctx->src[0].swizzle[2];
+ tex.src_sel_w = ctx->src[0].swizzle[3];
+ tex.src_rel = ctx->src[0].rel;
+ }
if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
tex.src_sel_x = 1;
tex.coord_type_w = 1;
}
+ if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) {
+ tex.coord_type_z = 0;
+ tex.src_sel_z = tex.src_sel_y;
+ } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
+ tex.coord_type_z = 0;
+
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
- tex.src_sel_w = 2;
+ tex.src_sel_w = tex.src_sel_z;
r = r600_bc_add_tex(ctx->bc, &tex);
if (r)
static int tgsi_lrp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
unsigned i;
int r;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
+ /* optimize if it's just an equal balance */
+ if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bc_alu));
+ alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
+ r600_bc_src(&alu.src[0], &ctx->src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
+ alu.omod = 3;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.chan = i;
+ if (i == lasti) {
+ alu.last = 1;
+ }
+ r = r600_bc_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+ }
+
/* 1 - src0 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
alu.src[0].sel = V_SQ_ALU_SRC_1;
alu.src[0].chan = 0;
- alu.src[1] = r600_src[0];
- alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[0], i);
alu.src[1].neg = 1;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
alu.dst.write = 1;
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* (1 - src0) * src2 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = i;
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
alu.dst.write = 1;
if (r)
return r;
}
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
/* src0 * src1 + (1 - src0) * src2 */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
alu.is_op3 = 1;
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
- alu.src[1] = r600_src[1];
- alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[1], i);
alu.src[2].sel = ctx->temp_reg;
alu.src[2].chan = i;
- alu.dst.sel = ctx->temp_reg;
+
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
- if (i == 3) {
+ if (i == lasti) {
alu.last = 1;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- return tgsi_helper_copy(ctx, inst);
+ return 0;
}
static int tgsi_cmp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
struct r600_bc_alu alu;
- int use_temp = 0;
int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
- if (inst->Dst[0].Register.WriteMask != 0xf)
- use_temp = 1;
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
- for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
- alu.src[0] = r600_src[0];
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
-
- alu.src[1] = r600_src[2];
- alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
-
- alu.src[2] = r600_src[1];
- alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
-
- if (use_temp)
- alu.dst.sel = ctx->temp_reg;
- else {
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
+ r600_bc_src(&alu.src[1], &ctx->src[2], i);
+ r600_bc_src(&alu.src[2], &ctx->src[1], i);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
- if (i == 3)
+ if (i == lasti)
alu.last = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
}
- if (use_temp)
- return tgsi_helper_copy(ctx, inst);
return 0;
}
static int tgsi_xpd(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3];
+ static const unsigned int src0_swizzle[] = {2, 0, 1};
+ static const unsigned int src1_swizzle[] = {1, 2, 0};
struct r600_bc_alu alu;
uint32_t use_temp = 0;
int i, r;
if (inst->Dst[0].Register.WriteMask != 0xf)
use_temp = 1;
- r = tgsi_split_constant(ctx, r600_src);
- if (r)
- return r;
- r = tgsi_split_literal_constant(ctx, r600_src);
- if (r)
- return r;
-
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
-
- alu.src[0] = r600_src[0];
- switch (i) {
- case 0:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
- break;
- case 1:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
- break;
- case 2:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- break;
- case 3:
+ if (i < 3) {
+ r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]);
+ r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]);
+ } else {
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = i;
- }
-
- alu.src[1] = r600_src[1];
- switch (i) {
- case 0:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
- break;
- case 1:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
- break;
- case 2:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
- break;
- case 3:
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = i;
}
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
- alu.src[0] = r600_src[0];
- switch (i) {
- case 0:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
- break;
- case 1:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 2);
- break;
- case 2:
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
- break;
- case 3:
+ if (i < 3) {
+ r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]);
+ r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]);
+ } else {
alu.src[0].sel = V_SQ_ALU_SRC_0;
alu.src[0].chan = i;
- }
-
- alu.src[1] = r600_src[1];
- switch (i) {
- case 0:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 2);
- break;
- case 1:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 0);
- break;
- case 2:
- alu.src[1].chan = tgsi_chan(&inst->Src[1], 1);
- break;
- case 3:
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = i;
}
if (use_temp)
alu.dst.sel = ctx->temp_reg;
- else {
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
- }
+ else
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.chan = i;
alu.dst.write = 1;
alu.is_op3 = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
if (use_temp)
return tgsi_helper_copy(ctx, inst);
static int tgsi_exp(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
- struct r600_bc_alu_src r600_src[3] = { { 0 } };
struct r600_bc_alu alu;
int r;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.y = tmp - floor(tmp); */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT);
- alu.src[0] = r600_src[0];
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
-// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-// if (r)
-// return r;
+#if 0
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+#endif
alu.dst.write = 1;
alu.dst.chan = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.z = RoughApprox2ToX(tmp);*/
if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.w = 1.0;*/
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return tgsi_helper_copy(ctx, inst);
}
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 0;
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
alu.src[0].sel = ctx->temp_reg;
alu.src[0].chan = 0;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.y = src.x / (2 ^ floor(log2(src.x))); */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = 1;
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
if (r)
return r;
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
-
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = ctx->temp_reg;
alu.src[1].chan = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.z = log2(src);*/
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE);
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
-
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->temp_reg;
alu.dst.write = 1;
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
/* result.w = 1.0; */
r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
-
- r = r600_bc_add_literal(ctx->bc, ctx->value);
- if (r)
- return r;
}
return tgsi_helper_copy(ctx, inst);
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
+
memset(&alu, 0, sizeof(struct r600_bc_alu));
switch (inst->Instruction.Opcode) {
return -1;
}
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.last = 1;
- alu.dst.chan = 0;
- alu.dst.sel = ctx->temp_reg;
+ alu.dst.sel = ctx->ar_reg;
alu.dst.write = 1;
- r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
+
+ /* TODO: Note that the MOVA can be avoided if we never use AR for
+ * indexing non-CB registers in the current ALU clause. Similarly, we
+ * need to load AR from ar_reg again if we started a new clause
+ * between ARL and AR usage. The easy way to do that is to remove
+ * the MOVA here, and load it for the first AR access after ar_reg
+ * has been modified in each clause. */
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].sel = ctx->ar_reg;
alu.src[0].chan = 0;
alu.last = 1;
- r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
return 0;
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
- memset(&alu, 0, sizeof(struct r600_bc_alu));
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR;
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+ alu.src[0].sel = ctx->ar_reg;
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
break;
case TGSI_OPCODE_ARR:
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA;
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bc_add_alu(ctx->bc, &alu)))
+ return r;
break;
default:
assert(0);
return -1;
}
-
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
-
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
+ alu.src[0].sel = ctx->ar_reg;
alu.last = 1;
- r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
+ r = r600_bc_add_alu(ctx->bc, &alu);
if (r)
return r;
ctx->bc->cf_last->r6xx_uses_waterfall = 1;
memset(&alu, 0, sizeof(struct r600_bc_alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
- r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
- if (r)
- return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
if (i == 0 || i == 3) {
alu.src[0].sel = V_SQ_ALU_SRC_1;
} else {
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], i);
+ r600_bc_src(&alu.src[0], &ctx->src[0], i);
}
- if (i == 0 || i == 2) {
+ if (i == 0 || i == 2) {
alu.src[1].sel = V_SQ_ALU_SRC_1;
} else {
- r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]);
- if (r)
- return r;
- alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
+ r600_bc_src(&alu.src[1], &ctx->src[1], i);
}
if (i == 3)
alu.last = 1;
static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
{
- struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bc_alu alu;
int r;
alu.dst.write = 1;
alu.dst.chan = 0;
- r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
- if (r)
- return r;
- alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+ r600_bc_src(&alu.src[0], &ctx->src[0], 0);
alu.src[1].sel = V_SQ_ALU_SRC_0;
alu.src[1].chan = 0;
static int pops(struct r600_shader_ctx *ctx, int pops)
{
- r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
- ctx->bc->cf_last->pop_count = pops;
- ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+ int alu_pop = 3;
+ if (ctx->bc->cf_last) {
+ if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
+ alu_pop = 0;
+ else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
+ alu_pop = 1;
+ }
+ alu_pop += pops;
+ if (alu_pop == 1) {
+ ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
+ ctx->bc->force_add_cf = 1;
+ } else if (alu_pop == 2) {
+ ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
+ ctx->bc->force_add_cf = 1;
+ } else {
+ r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
+ ctx->bc->cf_last->pop_count = pops;
+ ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+ }
return 0;
}
{TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp},
- {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log},
{TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2},
{TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2},
{TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
{TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
- {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate},
+ {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
/* gap */
{88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},