struct r600_pipe_shader *pipeshader,
struct r600_shader_key key);
+
static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
int size, unsigned comp_mask) {
ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE);
if (R600_BIG_ENDIAN) {
for (i = 0; i < shader->shader.bc.ndw; ++i) {
- ptr[i] = util_bswap32(shader->shader.bc.bytecode[i]);
+ ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]);
}
} else {
memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr));
r = r600_shader_from_tgsi(rctx, shader, key);
if (r) {
R600_ERR("translation from TGSI failed !\n");
- return r;
+ goto error;
}
/* disable SB for geom shaders - it can't handle the CF_EMIT instructions */
use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
+ /* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */
+ use_sb &= !shader->shader.uses_index_registers;
/* Check if the bytecode has already been built. When using the llvm
* backend, r600_shader_from_tgsi() will take care of building the
r = r600_bytecode_build(&shader->shader.bc);
if (r) {
R600_ERR("building bytecode failed !\n");
- return r;
+ goto error;
}
}
dump, use_sb);
if (r) {
R600_ERR("r600_sb_bytecode_process failed !\n");
- return r;
+ goto error;
}
}
r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc,
&shader->gs_copy_shader->shader, dump, 0);
if (r)
- return r;
+ goto error;
}
if ((r = store_shader(ctx, shader->gs_copy_shader)))
- return r;
+ goto error;
}
/* Store the shader in a buffer. */
if ((r = store_shader(ctx, shader)))
- return r;
+ goto error;
/* Build state. */
switch (shader->shader.processor_type) {
evergreen_update_gs_state(ctx, shader);
evergreen_update_vs_state(ctx, shader->gs_copy_shader);
} else {
- assert(!"not suported yet");
+ r600_update_gs_state(ctx, shader);
+ r600_update_vs_state(ctx, shader->gs_copy_shader);
}
break;
case TGSI_PROCESSOR_VERTEX:
else
evergreen_update_vs_state(ctx, shader);
} else {
- r600_update_vs_state(ctx, shader);
+ if (export_shader)
+ r600_update_es_state(ctx, shader);
+ else
+ r600_update_vs_state(ctx, shader);
}
break;
case TGSI_PROCESSOR_FRAGMENT:
}
break;
default:
- return -EINVAL;
+ r = -EINVAL;
+ goto error;
}
return 0;
+
+error:
+ r600_pipe_shader_destroy(ctx, shader);
+ return r;
}
void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
unsigned abs;
unsigned rel;
unsigned kc_bank;
+ boolean kc_rel; /* true if cache bank is indexed */
uint32_t value[4];
};
+struct eg_interp {
+ boolean enabled;
+ unsigned ij_index;
+};
+
struct r600_shader_ctx {
struct tgsi_shader_info info;
struct tgsi_parse_context parse;
uint32_t max_driver_temp_used;
boolean use_llvm;
/* needed for evergreen interpolation */
- boolean input_centroid;
- boolean input_linear;
- boolean input_perspective;
- int num_interp_gpr;
+ struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid
+ /* evergreen/cayman also store sample mask in face register */
int face_gpr;
+ /* sample id is .w component stored in fixed point position register */
+ int fixed_pt_position_gpr;
int colors_used;
boolean clip_vertex_write;
unsigned cv_output;
+ unsigned edgeflag_output;
int fragcoord_input;
int native_integers;
int next_ring_offset;
int gs_out_ring_offset;
int gs_next_vertex;
struct r600_shader *gs_for_vs;
+ int gs_export_gpr_treg;
};
struct r600_shader_tgsi_instruction {
int (*process)(struct r600_shader_ctx *ctx);
};
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx);
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind);
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
static int tgsi_bgnloop(struct r600_shader_ctx *ctx);
static int tgsi_endloop(struct r600_shader_ctx *ctx);
static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx);
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
+ unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan,
+ unsigned int dst_reg);
+static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src,
+ const struct r600_shader_src *shader_src,
+ unsigned chan);
static int tgsi_is_supported(struct r600_shader_ctx *ctx)
{
return 0;
}
-static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
- int input)
+int eg_get_interpolator_index(unsigned interpolate, unsigned location)
{
- int ij_index = 0;
+ if (interpolate == TGSI_INTERPOLATE_COLOR ||
+ interpolate == TGSI_INTERPOLATE_LINEAR ||
+ interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
+ {
+ int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR;
+ int loc;
- if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
- if (ctx->shader->input[input].centroid)
- ij_index++;
- } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
- /* if we have perspective add one */
- if (ctx->input_perspective) {
- ij_index++;
- /* if we have perspective centroid */
- if (ctx->input_centroid)
- ij_index++;
+ switch(location) {
+ case TGSI_INTERPOLATE_LOC_CENTER:
+ loc = 1;
+ break;
+ case TGSI_INTERPOLATE_LOC_CENTROID:
+ loc = 2;
+ break;
+ case TGSI_INTERPOLATE_LOC_SAMPLE:
+ default:
+ loc = 0; break;
}
- if (ctx->shader->input[input].centroid)
- ij_index++;
+
+ return is_linear * 3 + loc;
}
- ctx->shader->input[input].ij_index = ij_index;
+ return -1;
+}
+
+static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
+ int input)
+{
+ int i = eg_get_interpolator_index(
+ ctx->shader->input[input].interpolate,
+ ctx->shader->input[input].interpolate_location);
+ assert(i >= 0);
+ ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index;
}
static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
* semantic indices, so we'll use 0 for them.
*/
if (name == TGSI_SEMANTIC_POSITION ||
- name == TGSI_SEMANTIC_PSIZE ||
- name == TGSI_SEMANTIC_FACE)
+ name == TGSI_SEMANTIC_PSIZE ||
+ name == TGSI_SEMANTIC_EDGEFLAG ||
+ name == TGSI_SEMANTIC_FACE ||
+ name == TGSI_SEMANTIC_SAMPLEMASK)
index = 0;
else {
if (name == TGSI_SEMANTIC_GENERIC) {
ctx->shader->input[i].name = d->Semantic.Name;
ctx->shader->input[i].sid = d->Semantic.Index;
ctx->shader->input[i].interpolate = d->Interp.Interpolate;
- ctx->shader->input[i].centroid = d->Interp.Centroid;
+ ctx->shader->input[i].interpolate_location = d->Interp.Location;
ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
switch (ctx->shader->input[i].name) {
case TGSI_SEMANTIC_FACE:
- ctx->face_gpr = ctx->shader->input[i].gpr;
+ if (ctx->face_gpr != -1)
+ ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */
+ else
+ ctx->face_gpr = ctx->shader->input[i].gpr;
break;
case TGSI_SEMANTIC_COLOR:
ctx->colors_used++;
/* FIXME probably skip inputs if they aren't passed in the ring */
ctx->shader->input[i].ring_offset = ctx->next_ring_offset;
ctx->next_ring_offset += 16;
+ if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID)
+ ctx->shader->gs_prim_id_input = true;
}
for (j = 1; j < count; ++j) {
ctx->shader->input[i + j] = ctx->shader->input[i];
ctx->shader->vs_out_misc_write = 1;
ctx->shader->vs_out_point_size = 1;
break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_edgeflag = 1;
+ ctx->edgeflag_output = i;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_viewport = 1;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ ctx->shader->vs_out_misc_write = 1;
+ ctx->shader->vs_out_layer = 1;
+ break;
case TGSI_SEMANTIC_CLIPVERTEX:
ctx->clip_vertex_write = TRUE;
ctx->cv_output = i;
break;
case TGSI_FILE_SYSTEM_VALUE:
- if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+ if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
+ d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
+ d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
+ break; /* Already handled from allocate_system_value_inputs */
+ } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
if (!ctx->native_integers) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
break;
} else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
break;
+ else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID)
+ break;
default:
R600_ERR("unsupported file %d declaration\n", d->Declaration.File);
return -EINVAL;
return ctx->temp_reg + ctx->max_driver_temp_used++;
}
+static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset)
+{
+ struct tgsi_parse_context parse;
+ struct {
+ boolean enabled;
+ int *reg;
+ unsigned name, alternate_name;
+ } inputs[2] = {
+ { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */
+
+ { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */
+ };
+ int i, k, num_regs = 0;
+
+ if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
+ return 0;
+ }
+
+ /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
+ const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID)
+ {
+ int interpolate, location, k;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ location = TGSI_INTERPOLATE_LOC_CENTER;
+ inputs[1].enabled = true; /* needs SAMPLEID */
+ } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
+ location = TGSI_INTERPOLATE_LOC_CENTER;
+ /* Needs sample positions, currently those are always available */
+ } else {
+ location = TGSI_INTERPOLATE_LOC_CENTROID;
+ }
+
+ interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
+ k = eg_get_interpolator_index(interpolate, location);
+ ctx->eg_interpolators[k].enabled = true;
+ }
+ } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) {
+ struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
+ if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
+ for (k = 0; k < Elements(inputs); k++) {
+ if (d->Semantic.Name == inputs[k].name ||
+ d->Semantic.Name == inputs[k].alternate_name) {
+ inputs[k].enabled = true;
+ }
+ }
+ }
+ }
+ }
+
+ tgsi_parse_free(&parse);
+
+ for (i = 0; i < Elements(inputs); i++) {
+ boolean enabled = inputs[i].enabled;
+ int *reg = inputs[i].reg;
+ unsigned name = inputs[i].name;
+
+ if (enabled) {
+ int gpr = gpr_offset + num_regs++;
+
+ // add to inputs, allocate a gpr
+ k = ctx->shader->ninput ++;
+ ctx->shader->input[k].name = name;
+ ctx->shader->input[k].sid = 0;
+ ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT;
+ ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER;
+ *reg = ctx->shader->input[k].gpr = gpr;
+ }
+ }
+
+ return gpr_offset + num_regs;
+}
+
/*
* for evergreen we need to scan the shader to find the number of GPRs we need to
- * reserve for interpolation.
+ * reserve for interpolation and system values
*
* we need to know if we are going to emit
- * any centroid inputs
+ * any sample or centroid inputs
* if perspective and linear are required
*/
static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
{
int i;
int num_baryc;
+ struct tgsi_parse_context parse;
- ctx->input_linear = FALSE;
- ctx->input_perspective = FALSE;
- ctx->input_centroid = FALSE;
- ctx->num_interp_gpr = 1;
+ memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators));
- /* any centroid inputs */
for (i = 0; i < ctx->info.num_inputs; i++) {
- /* skip position/face */
+ int k;
+ /* skip position/face/mask/sampleid */
if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION ||
- ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE)
+ ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE ||
+ ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK ||
+ ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID)
continue;
- if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR)
- ctx->input_linear = TRUE;
- if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE)
- ctx->input_perspective = TRUE;
- if (ctx->info.input_centroid[i])
- ctx->input_centroid = TRUE;
+
+ k = eg_get_interpolator_index(
+ ctx->info.input_interpolate[i],
+ ctx->info.input_interpolate_loc[i]);
+ if (k >= 0)
+ ctx->eg_interpolators[k].enabled = TRUE;
}
+ if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
+ return 0;
+ }
+
+ /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */
+ while (!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
+ const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID)
+ {
+ int interpolate, location, k;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ location = TGSI_INTERPOLATE_LOC_CENTER;
+ } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
+ location = TGSI_INTERPOLATE_LOC_CENTER;
+ } else {
+ location = TGSI_INTERPOLATE_LOC_CENTROID;
+ }
+
+ interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index];
+ k = eg_get_interpolator_index(interpolate, location);
+ ctx->eg_interpolators[k].enabled = true;
+ }
+ }
+ }
+
+ tgsi_parse_free(&parse);
+
+ /* assign gpr to each interpolator according to priority */
num_baryc = 0;
- /* ignoring sample for now */
- if (ctx->input_perspective)
- num_baryc++;
- if (ctx->input_linear)
- num_baryc++;
- if (ctx->input_centroid)
- num_baryc *= 2;
-
- ctx->num_interp_gpr += (num_baryc + 1) >> 1;
-
- /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */
- return ctx->num_interp_gpr;
+ for (i = 0; i < Elements(ctx->eg_interpolators); i++) {
+ if (ctx->eg_interpolators[i].enabled) {
+ ctx->eg_interpolators[i].ij_index = num_baryc;
+ num_baryc ++;
+ }
+ }
+
+ /* XXX PULL MODEL and LINE STIPPLE */
+
+ num_baryc = (num_baryc + 1) >> 1;
+ return allocate_system_value_inputs(ctx, num_baryc);
+}
+
+/* sample_id_sel == NULL means fetch for current sample */
+static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel)
+{
+ struct r600_bytecode_vtx vtx;
+ int r, t1;
+
+ assert(ctx->fixed_pt_position_gpr != -1);
+
+ t1 = r600_get_temp(ctx);
+
+ memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+ vtx.op = FETCH_OP_VFETCH;
+ vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER;
+ vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
+ if (sample_id == NULL) {
+ vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w;
+ vtx.src_sel_x = 3;
+ }
+ else {
+ struct r600_bytecode_alu alu;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ r600_bytecode_src(&alu.src[0], sample_id, chan_sel);
+ alu.dst.sel = t1;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ vtx.src_gpr = t1;
+ vtx.src_sel_x = 0;
+ }
+ vtx.mega_fetch_count = 16;
+ vtx.dst_gpr = t1;
+ vtx.dst_sel_x = 0;
+ vtx.dst_sel_y = 1;
+ vtx.dst_sel_z = 2;
+ vtx.dst_sel_w = 3;
+ vtx.data_format = FMT_32_32_32_32_FLOAT;
+ vtx.num_format_all = 2;
+ vtx.format_comp_all = 1;
+ vtx.use_const_fields = 0;
+ vtx.offset = 1; // first element is size of buffer
+ vtx.endian = r600_endian_swap(32);
+ vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+ r = r600_bytecode_add_vtx(ctx->bc, &vtx);
+ if (r)
+ return r;
+
+ return t1;
}
static void tgsi_src(struct r600_shader_ctx *ctx,
r600_src->sel = V_SQ_ALU_SRC_LITERAL;
memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value));
} else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) {
- if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
+ if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) {
+ r600_src->swizzle[0] = 2; // Z value
+ r600_src->swizzle[1] = 2;
+ r600_src->swizzle[2] = 2;
+ r600_src->swizzle[3] = 2;
+ r600_src->sel = ctx->face_gpr;
+ } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) {
+ r600_src->swizzle[0] = 3; // W value
+ r600_src->swizzle[1] = 3;
+ r600_src->swizzle[2] = 3;
+ r600_src->swizzle[3] = 3;
+ r600_src->sel = ctx->fixed_pt_position_gpr;
+ } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) {
+ r600_src->swizzle[0] = 0;
+ r600_src->swizzle[1] = 1;
+ r600_src->swizzle[2] = 4;
+ r600_src->swizzle[3] = 4;
+ r600_src->sel = load_sample_position(ctx, NULL, -1);
+ } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) {
r600_src->swizzle[0] = 3;
r600_src->swizzle[1] = 3;
r600_src->swizzle[2] = 3;
r600_src->swizzle[2] = 0;
r600_src->swizzle[3] = 0;
r600_src->sel = 0;
+ } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) {
+ r600_src->swizzle[0] = 3;
+ r600_src->swizzle[1] = 3;
+ r600_src->swizzle[2] = 3;
+ r600_src->swizzle[3] = 3;
+ r600_src->sel = 1;
}
} else {
if (tgsi_src->Register.Indirect)
if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) {
if (tgsi_src->Register.Dimension) {
r600_src->kc_bank = tgsi_src->Dimension.Index;
+ if (tgsi_src->Dimension.Indirect) {
+ r600_src->kc_rel = 1;
+ }
}
}
}
-static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int cb_idx, unsigned int offset, unsigned int dst_reg)
+static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx,
+ unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan,
+ unsigned int dst_reg)
{
struct r600_bytecode_vtx vtx;
unsigned int ar_reg;
alu.op = ALU_OP2_ADD_INT;
alu.src[0].sel = ctx->bc->ar_reg;
+ alu.src[0].chan = ar_chan;
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = offset;
alu.dst.sel = dst_reg;
+ alu.dst.chan = ar_chan;
alu.dst.write = 1;
alu.last = 1;
vtx.buffer_id = cb_idx;
vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
vtx.src_gpr = ar_reg;
+ vtx.src_sel_x = ar_chan;
vtx.mega_fetch_count = 16;
vtx.dst_gpr = dst_reg;
vtx.dst_sel_x = 0; /* SEL_X */
vtx.data_format = FMT_32_32_32_32_FLOAT;
vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */
vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */
- vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
vtx.endian = r600_endian_swap(32);
+ vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE;
if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
return r;
return 0;
}
-static int fetch_gs_input(struct r600_shader_ctx *ctx, unsigned index, unsigned vtx_id, unsigned int dst_reg)
+static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg)
{
struct r600_bytecode_vtx vtx;
int r;
+ unsigned index = src->Register.Index;
+ unsigned vtx_id = src->Dimension.Index;
int offset_reg = vtx_id / 3;
int offset_chan = vtx_id % 3;
if (offset_reg == 0 && offset_chan == 2)
offset_chan = 3;
+ if (src->Dimension.Indirect) {
+ int treg[3];
+ int t2;
+ struct r600_bytecode_alu alu;
+ int r, i;
+
+ /* you have got to be shitting me -
+ we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt.
+ at least this is what fglrx seems to do. */
+ for (i = 0; i < 3; i++) {
+ treg[i] = r600_get_temp(ctx);
+ }
+ t2 = r600_get_temp(ctx);
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = 0;
+ alu.src[0].chan = i == 2 ? 3 : i;
+ alu.dst.sel = treg[i];
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = treg[0];
+ alu.src[0].rel = 1;
+ alu.dst.sel = t2;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ offset_reg = t2;
+ }
+
+
memset(&vtx, 0, sizeof(vtx));
vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */
vtx.dst_sel_y = 1; /* SEL_Y */
vtx.dst_sel_z = 2; /* SEL_Z */
vtx.dst_sel_w = 3; /* SEL_W */
- vtx.use_const_fields = 1;
+ if (ctx->bc->chip_class >= EVERGREEN) {
+ vtx.use_const_fields = 1;
+ } else {
+ vtx.data_format = FMT_32_32_32_32_FLOAT;
+ }
if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
return r;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
struct tgsi_full_src_register *src = &inst->Src[i];
+ if (src->Register.File == TGSI_FILE_INPUT) {
+ if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) {
+ /* primitive id is in R0.z */
+ ctx->src[i].sel = 0;
+ ctx->src[i].swizzle[0] = 2;
+ }
+ }
if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) {
int treg = r600_get_temp(ctx);
- int index = src->Register.Index;
- int vtx_id = src->Dimension.Index;
- fetch_gs_input(ctx, index, vtx_id, treg);
+ fetch_gs_input(ctx, src, treg);
ctx->src[i].sel = treg;
}
}
continue;
}
+ if (ctx->src[i].kc_rel)
+ ctx->shader->uses_index_registers = true;
+
if (ctx->src[i].rel) {
+ int chan = inst->Src[i].Indirect.Swizzle;
int treg = r600_get_temp(ctx);
- if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].sel - 512, treg)))
+ if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg)))
return r;
ctx->src[i].kc_bank = 0;
+ ctx->src[i].kc_rel = 0;
ctx->src[i].sel = treg;
ctx->src[i].rel = 0;
j--;
alu.src[0].sel = ctx->src[i].sel;
alu.src[0].chan = k;
alu.src[0].rel = ctx->src[i].rel;
+ alu.src[0].kc_bank = ctx->src[i].kc_bank;
+ alu.src[0].kc_rel = ctx->src[i].kc_rel;
alu.dst.sel = treg;
alu.dst.chan = k;
alu.dst.write = 1;
return r;
}
+static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx)
+{
+ struct r600_bytecode_alu alu;
+ unsigned reg;
+
+ if (!ctx->shader->vs_out_edgeflag)
+ return;
+
+ reg = ctx->shader->output[ctx->edgeflag_output].gpr;
+
+ /* clamp(x, 0, 1) */
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = reg;
+ alu.dst.sel = reg;
+ alu.dst.write = 1;
+ alu.dst.clamp = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu(ctx->bc, &alu);
+
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP1_FLT_TO_INT;
+ alu.src[0].sel = reg;
+ alu.dst.sel = reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r600_bytecode_add_alu(ctx->bc, &alu);
+}
+
static int generate_gs_copy_shader(struct r600_context *rctx,
- struct r600_pipe_shader *gs)
+ struct r600_pipe_shader *gs,
+ struct pipe_stream_output_info *so)
{
struct r600_shader_ctx ctx = {};
struct r600_shader *gs_shader = &gs->shader;
struct r600_bytecode_output output;
struct r600_bytecode_cf *cf_jump, *cf_pop,
*last_exp_pos = NULL, *last_exp_param = NULL;
- int i, next_pos = 60, next_param = 0;
+ int i, next_clip_pos = 61, next_param = 0;
cshader = calloc(1, sizeof(struct r600_pipe_shader));
if (!cshader)
vtx.dst_sel_y = 1;
vtx.dst_sel_z = 2;
vtx.dst_sel_w = 3;
- vtx.use_const_fields = 1;
+ if (rctx->b.chip_class >= EVERGREEN) {
+ vtx.use_const_fields = 1;
+ } else {
+ vtx.data_format = FMT_32_32_32_32_FLOAT;
+ }
r600_bytecode_add_vtx(ctx.bc, &vtx);
}
/* XXX handle clipvertex, streamout? */
+ emit_streamout(&ctx, so);
/* export vertex data */
/* XXX factor out common code with r600_shader_from_tgsi ? */
output.op = CF_OP_EXPORT;
switch (out->name) {
case TGSI_SEMANTIC_POSITION:
- output.array_base = next_pos++;
+ output.array_base = 60;
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
break;
case TGSI_SEMANTIC_PSIZE:
- output.array_base = next_pos++;
+ output.array_base = 61;
+ if (next_clip_pos == 61)
+ next_clip_pos = 62;
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ output.swizzle_y = 7;
+ output.swizzle_z = 7;
+ output.swizzle_w = 7;
+ ctx.shader->vs_out_misc_write = 1;
+ ctx.shader->vs_out_point_size = 1;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ if (out->spi_sid) {
+ /* duplicate it as PARAM to pass to the pixel shader */
+ output.array_base = next_param++;
+ r600_bytecode_add_output(ctx.bc, &output);
+ last_exp_param = ctx.bc->cf_last;
+ }
+ output.array_base = 61;
+ if (next_clip_pos == 61)
+ next_clip_pos = 62;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ output.swizzle_x = 7;
+ output.swizzle_y = 7;
+ output.swizzle_z = 0;
+ output.swizzle_w = 7;
+ ctx.shader->vs_out_misc_write = 1;
+ ctx.shader->vs_out_layer = 1;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ if (out->spi_sid) {
+ /* duplicate it as PARAM to pass to the pixel shader */
+ output.array_base = next_param++;
+ r600_bytecode_add_output(ctx.bc, &output);
+ last_exp_param = ctx.bc->cf_last;
+ }
+ output.array_base = 61;
+ if (next_clip_pos == 61)
+ next_clip_pos = 62;
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ ctx.shader->vs_out_misc_write = 1;
+ ctx.shader->vs_out_viewport = 1;
+ output.swizzle_x = 7;
+ output.swizzle_y = 7;
+ output.swizzle_z = 7;
+ output.swizzle_w = 0;
break;
case TGSI_SEMANTIC_CLIPDIST:
/* spi_sid is 0 for clipdistance outputs that were generated
* for clipvertex - we don't need to pass them to PS */
+ ctx.shader->clip_dist_write = gs->shader.clip_dist_write;
if (out->spi_sid) {
/* duplicate it as PARAM to pass to the pixel shader */
output.array_base = next_param++;
r600_bytecode_add_output(ctx.bc, &output);
last_exp_param = ctx.bc->cf_last;
}
- output.array_base = next_pos++;
+ output.array_base = next_clip_pos++;
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
break;
case TGSI_SEMANTIC_FOG:
output.burst_count = 1;
output.type = 2;
output.op = CF_OP_EXPORT;
- output.array_base = next_pos++;
+ output.array_base = 60;
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
r600_bytecode_add_output(ctx.bc, &output);
last_exp_pos = ctx.bc->cf_last;
cf_pop->cf_addr = cf_pop->id + 2;
cf_pop->pop_count = 1;
- r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
- ctx.bc->cf_last->end_of_program = 1;
+ if (ctx.bc->chip_class == CAYMAN)
+ cm_bytecode_add_cf_end(ctx.bc);
+ else {
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+ ctx.bc->cf_last->end_of_program = 1;
+ }
gs->gs_copy_shader = cshader;
return r600_bytecode_build(ctx.bc);
}
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx)
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
{
struct r600_bytecode_output output;
int i, k, ring_offset;
/* next_ring_offset after parsing input decls contains total size of
* single vertex data, gs_next_vertex - current vertex index */
- ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
+ if (!ind)
+ ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
+ /* get a temp and add the ring offset to the next vertex base in the shader */
memset(&output, 0, sizeof(struct r600_bytecode_output));
output.gpr = ctx->shader->output[i].gpr;
output.elem_size = 3;
output.comp_mask = 0xF;
output.burst_count = 1;
+
+ if (ind)
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
+ else
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
output.op = CF_OP_MEM_RING;
- output.array_base = ring_offset >> 2; /* in dwords */
+
+
+ if (ind) {
+ output.array_base = ring_offset >> 2; /* in dwords */
+ output.array_size = 0xfff;
+ output.index_gpr = ctx->gs_export_gpr_treg;
+ } else
+ output.array_base = ring_offset >> 2; /* in dwords */
r600_bytecode_add_output(ctx->bc, &output);
}
+
+ if (ind) {
+ struct r600_bytecode_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_ADD_INT;
+ alu.src[0].sel = ctx->gs_export_gpr_treg;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ctx->gs_out_ring_offset >> 4;
+ alu.dst.sel = ctx->gs_export_gpr_treg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
++ctx->gs_next_vertex;
return 0;
}
unsigned output_done, noutput;
unsigned opcode;
int i, j, k, r = 0;
- int next_pos_base = 60, next_param_base = 0;
+ int next_param_base = 0, next_clip_base;
int max_color_exports = MAX2(key.nr_cbufs, 1);
/* Declarations used by llvm code */
bool use_llvm = false;
bool indirect_gprs;
bool ring_outputs = false;
+ bool pos_emitted = false;
#ifdef R600_USE_LLVM
- use_llvm = !(rscreen->b.debug_flags & DBG_NO_LLVM);
+ use_llvm = rscreen->b.debug_flags & DBG_LLVM;
#endif
ctx.bc = &shader->bc;
ctx.shader = shader;
ctx.gs_out_ring_offset = 0;
ctx.gs_next_vertex = 0;
+ shader->uses_index_registers = false;
ctx.face_gpr = -1;
+ ctx.fixed_pt_position_gpr = -1;
ctx.fragcoord_input = -1;
ctx.colors_used = 0;
ctx.clip_vertex_write = 0;
r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS);
}
}
- if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) {
- ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
+ if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
+ if (ctx.bc->chip_class >= EVERGREEN)
+ ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
+ else
+ ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
}
- if (ctx.type == TGSI_PROCESSOR_GEOMETRY && ctx.bc->chip_class >= EVERGREEN) {
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
/* FIXME 1 would be enough in some cases (3 or less input vertices) */
ctx.file_offset[TGSI_FILE_INPUT] = 2;
}
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
- ctx.temp_reg = ctx.bc->ar_reg + 1;
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
+ ctx.temp_reg = ctx.bc->ar_reg + 2;
+ ctx.bc->index_reg[0] = ctx.bc->ar_reg + 3;
+ ctx.bc->index_reg[1] = ctx.bc->ar_reg + 4;
+ } else {
+ ctx.temp_reg = ctx.bc->ar_reg + 1;
+ ctx.bc->index_reg[0] = ctx.bc->ar_reg + 2;
+ ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
+ }
if (indirect_gprs) {
shader->max_arrays = 0;
if (property->u[0].Data == 1)
shader->fs_write_all = TRUE;
break;
+ case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
+ if (property->u[0].Data == 1)
+ shader->vs_position_window_space = TRUE;
+ break;
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
/* we don't need this one */
break;
case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
shader->gs_max_out_vertices = property->u[0].Data;
break;
+ case TGSI_PROPERTY_GS_INVOCATIONS:
+ shader->gs_num_invocations = property->u[0].Data;
+ break;
}
break;
default:
int gpr = ctx.file_offset[TGSI_FILE_INPUT] +
ctx.info.file_max[TGSI_FILE_INPUT] + 1;
+ /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */
if (ctx.face_gpr == -1) {
i = ctx.shader->ninput++;
ctx.shader->input[i].name = TGSI_SEMANTIC_FACE;
}
}
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ struct r600_bytecode_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0;
+ alu.dst.sel = ctx.gs_export_gpr_treg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx.bc, &alu);
+ if (r)
+ return r;
+ }
if (shader->two_side && ctx.colors_used) {
if ((r = process_twoside_color_inputs(&ctx)))
return r;
so.num_outputs && !use_llvm)
emit_streamout(&ctx, &so);
+ convert_edgeflag_to_int(&ctx);
+
if (ring_outputs) {
if (key.vs_as_es)
- emit_gs_ring_writes(&ctx);
+ emit_gs_ring_writes(&ctx, FALSE);
} else {
- /* export output */
+ /* Export output */
+ next_clip_base = shader->vs_out_misc_write ? 62 : 61;
+
for (i = 0, j = 0; i < noutput; i++, j++) {
memset(&output[j], 0, sizeof(struct r600_bytecode_output));
output[j].gpr = shader->output[i].gpr;
case TGSI_PROCESSOR_VERTEX:
switch (shader->output[i].name) {
case TGSI_SEMANTIC_POSITION:
- output[j].array_base = next_pos_base++;
+ output[j].array_base = 60;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
break;
case TGSI_SEMANTIC_PSIZE:
- output[j].array_base = next_pos_base++;
+ output[j].array_base = 61;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = 7;
+ output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
+ break;
+ case TGSI_SEMANTIC_EDGEFLAG:
+ output[j].array_base = 61;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 0;
+ output[j].swizzle_z = 7;
+ output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
+ break;
+ case TGSI_SEMANTIC_LAYER:
+ /* spi_sid is 0 for outputs that are
+ * not consumed by PS */
+ if (shader->output[i].spi_sid) {
+ output[j].array_base = next_param_base++;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ j++;
+ memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
+ }
+ output[j].array_base = 61;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = 0;
+ output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
+ break;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ /* spi_sid is 0 for outputs that are
+ * not consumed by PS */
+ if (shader->output[i].spi_sid) {
+ output[j].array_base = next_param_base++;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+ j++;
+ memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output));
+ }
+ output[j].array_base = 61;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = 7;
+ output[j].swizzle_w = 0;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
break;
case TGSI_SEMANTIC_CLIPVERTEX:
j--;
break;
case TGSI_SEMANTIC_CLIPDIST:
- output[j].array_base = next_pos_base++;
+ output[j].array_base = next_clip_base++;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+ pos_emitted = true;
/* spi_sid is 0 for clipdistance outputs that were generated
* for clipvertex - we don't need to pass them to PS */
if (shader->output[i].spi_sid) {
output[j].swizzle_y = 1;
output[j].swizzle_z = output[j].swizzle_w = 7;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+ } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) {
+ output[j].array_base = 61;
+ output[j].swizzle_x = 7;
+ output[j].swizzle_y = 7;
+ output[j].swizzle_z = 0;
+ output[j].swizzle_w = 7;
+ output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
} else {
R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
r = -EINVAL;
}
/* add fake position export */
- if (ctx.type == TGSI_PROCESSOR_VERTEX && next_pos_base == 60) {
+ if (ctx.type == TGSI_PROCESSOR_VERTEX && pos_emitted == false) {
memset(&output[j], 0, sizeof(struct r600_bytecode_output));
output[j].gpr = 0;
output[j].elem_size = 3;
output[j].swizzle_w = 7;
output[j].burst_count = 1;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
- output[j].array_base = next_pos_base;
+ output[j].array_base = 60;
output[j].op = CF_OP_EXPORT;
j++;
}
}
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
- if ((r = generate_gs_copy_shader(rctx, pipeshader)))
+ if ((r = generate_gs_copy_shader(rctx, pipeshader, &so)))
return r;
}
bc_src->rel = shader_src->rel;
bc_src->value = shader_src->value[bc_src->chan];
bc_src->kc_bank = shader_src->kc_bank;
+ bc_src->kc_rel = shader_src->kc_rel;
}
static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
int i, j, k, r;
struct r600_bytecode_alu alu;
- int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3;
- for (k = 0; k < last_slot; k++) {
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ int t1 = ctx->temp_reg;
+
+ for (k = 0; k <= lasti; k++) {
if (!(inst->Dst[0].Register.WriteMask & (1 << k)))
continue;
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
r600_bytecode_src(&alu.src[j], &ctx->src[j], k);
}
- tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
alu.dst.write = (i == k);
if (i == 3)
alu.last = 1;
return r;
}
}
+
+ for (i = 0 ; i <= lasti; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = t1;
+ alu.src[0].chan = i;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = 1;
+ if (i == lasti)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
return 0;
}
alu.src[2].sel = ctx->temp_reg;
alu.src[2].chan = i;
- if (i == 3)
- alu.last = 1;
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int tgsi_bfi(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r, t1, t2;
+
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ int last_inst = tgsi_last_instruction(write_mask);
+
+ t1 = ctx->temp_reg;
+
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ /* create mask tmp */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_BFM_INT;
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = i == last_inst;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[3], i);
+ r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ t2 = r600_get_temp(ctx);
+
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ /* shift insert left */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_LSHL_INT;
+ alu.dst.sel = t2;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = i == last_inst;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
+ r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ /* actual bitfield insert */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_BFI_INT;
+ alu.is_op3 = 1;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = i == last_inst;
+
+ alu.src[0].sel = t1;
+ alu.src[0].chan = i;
+ alu.src[1].sel = t2;
+ alu.src[1].chan = i;
+ r600_bytecode_src(&alu.src[2], &ctx->src[0], i);
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int tgsi_msb(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r, t1, t2;
+
+ unsigned write_mask = inst->Dst[0].Register.WriteMask;
+ int last_inst = tgsi_last_instruction(write_mask);
+
+ assert(ctx->inst_info->op == ALU_OP1_FFBH_INT ||
+ ctx->inst_info->op == ALU_OP1_FFBH_UINT);
+
+ t1 = ctx->temp_reg;
+
+ /* bit position is indexed from lsb by TGSI, and from msb by the hardware */
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ /* t1 = FFBH_INT / FFBH_UINT */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ctx->inst_info->op;
+ alu.dst.sel = t1;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = i == last_inst;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ t2 = r600_get_temp(ctx);
+
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ /* t2 = 31 - t1 */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_SUB_INT;
+ alu.dst.sel = t2;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = i == last_inst;
+
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 31;
+ alu.src[1].sel = t1;
+ alu.src[1].chan = i;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (!(write_mask & (1<<i)))
+ continue;
+
+ /* result = t1 >= 0 ? t2 : t1 */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_CNDGE_INT;
+ alu.is_op3 = 1;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.last = i == last_inst;
+
+ alu.src[0].sel = t1;
+ alu.src[0].chan = i;
+ alu.src[1].sel = t2;
+ alu.src[1].chan = i;
+ alu.src[2].sel = t1;
+ alu.src[2].chan = i;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int tgsi_interp_egcm(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti;
+ unsigned location;
+ int input;
+
+ assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
+
+ input = inst->Src[0].Register.Index;
+
+ /* Interpolators have been marked for use already by allocate_system_value_inputs */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */
+ }
+ else {
+ location = TGSI_INTERPOLATE_LOC_CENTROID;
+ }
+
+ k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location);
+ if (k < 0)
+ k = 0;
+ interp_gpr = ctx->eg_interpolators[k].ij_index / 2;
+ interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2);
+
+ /* NOTE: currently offset is not perspective correct */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ int sample_gpr = -1;
+ int gradientsH, gradientsV;
+ struct r600_bytecode_tex tex;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]);
+ }
+
+ gradientsH = r600_get_temp(ctx);
+ gradientsV = r600_get_temp(ctx);
+ for (i = 0; i < 2; i++) {
+ memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+ tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V;
+ tex.src_gpr = interp_gpr;
+ tex.src_sel_x = interp_base_chan + 0;
+ tex.src_sel_y = interp_base_chan + 1;
+ tex.src_sel_z = 0;
+ tex.src_sel_w = 0;
+ tex.dst_gpr = i == 0 ? gradientsH : gradientsV;
+ tex.dst_sel_x = 0;
+ tex.dst_sel_y = 1;
+ tex.dst_sel_z = 7;
+ tex.dst_sel_w = 7;
+ tex.inst_mod = 1; // Use per pixel gradient calculation
+ tex.sampler_id = 0;
+ tex.resource_id = tex.sampler_id;
+ r = r600_bytecode_add_tex(ctx->bc, &tex);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < 2; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_MULADD;
+ alu.is_op3 = 1;
+ alu.src[0].sel = gradientsH;
+ alu.src[0].chan = i;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ alu.src[1].sel = sample_gpr;
+ alu.src[1].chan = 2;
+ }
+ else {
+ r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
+ }
+ alu.src[2].sel = interp_gpr;
+ alu.src[2].chan = interp_base_chan + i;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.last = i == 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ for (i = 0; i < 2; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_MULADD;
+ alu.is_op3 = 1;
+ alu.src[0].sel = gradientsV;
+ alu.src[0].chan = i;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ alu.src[1].sel = sample_gpr;
+ alu.src[1].chan = 3;
+ }
+ else {
+ r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
+ }
+ alu.src[2].sel = ctx->temp_reg;
+ alu.src[2].chan = i;
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.last = i == 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+
+ tmp = r600_get_temp(ctx);
+ for (i = 0; i < 8; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY;
+
+ alu.dst.sel = tmp;
+ if ((i > 1 && i < 6)) {
+ alu.dst.write = 1;
+ }
+ else {
+ alu.dst.write = 0;
+ }
+ alu.dst.chan = i % 4;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1 - (i % 2);
+ } else {
+ alu.src[0].sel = interp_gpr;
+ alu.src[0].chan = interp_base_chan + 1 - (i % 2);
+ }
+ alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos;
+ alu.src[1].chan = 0;
+
+ alu.last = i % 4 == 3;
+ alu.bank_swizzle_force = SQ_ALU_VEC_210;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ // INTERP can't swizzle dst
+ lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ for (i = 0; i <= lasti; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = tmp;
+ alu.src[0].chan = ctx->src[0].swizzle[i];
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.write = 1;
+ alu.last = i == lasti;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
+
return 0;
}
+
static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst)
{
struct r600_bytecode_alu alu;
return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY &&
inst->Src[index].Register.File != TGSI_FILE_INPUT &&
inst->Src[index].Register.File != TGSI_FILE_OUTPUT) ||
- ctx->src[index].neg || ctx->src[index].abs;
+ ctx->src[index].neg || ctx->src[index].abs ||
+ (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == TGSI_PROCESSOR_GEOMETRY);
}
static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx,
vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */
vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */
vtx.use_const_fields = 1;
- vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
return r;
alu.op = ALU_OP1_MOV;
if (ctx->bc->chip_class >= EVERGREEN) {
- alu.src[0].sel = 512 + (id / 4);
- alu.src[0].chan = id % 4;
+ /* channel 0 or 2 of each word */
+ alu.src[0].sel = 512 + (id / 2);
+ alu.src[0].chan = (id % 2) * 2;
} else {
/* r600 we have them at channel 2 of the second dword */
alu.src[0].sel = 512 + (id * 2) + 1;
(inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);
+ bool txf_add_offsets = inst->Texture.NumOffsets &&
+ inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
+ inst->Texture.Texture != TGSI_TEXTURE_BUFFER;
+
/* Texture fetch instructions can only use gprs as source.
* Also they cannot negate the source or take the absolute value */
const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
tgsi_tex_src_requires_loading(ctx, 0)) ||
- read_compressed_msaa;
+ read_compressed_msaa || txf_add_offsets;
+
boolean src_loaded = FALSE;
unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
int8_t offset_x = 0, offset_y = 0, offset_z = 0;
boolean has_txq_cube_array_z = false;
+ unsigned sampler_index_mode;
if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
+ inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TG4)
sampler_src_reg = 2;
+ /* TGSI moves the sampler to src reg 3 for TXD */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXD)
+ sampler_src_reg = 3;
+
+ sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
+ if (sampler_index_mode)
+ ctx->shader->uses_index_registers = true;
+
src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
}
}
- /* get offset values */
- if (inst->Texture.NumOffsets) {
- assert(inst->Texture.NumOffsets == 1);
-
- offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
- offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
- offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
- }
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
- /* TGSI moves the sampler to src reg 3 for TXD */
- sampler_src_reg = 3;
-
- for (i = 1; i < 3; i++) {
- /* set gradients h/v */
- memset(&tex, 0, sizeof(struct r600_bytecode_tex));
- tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H :
- FETCH_OP_SET_GRADIENTS_V;
- tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
- tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
-
- if (tgsi_tex_src_requires_loading(ctx, i)) {
- tex.src_gpr = r600_get_temp(ctx);
- tex.src_sel_x = 0;
- tex.src_sel_y = 1;
- tex.src_sel_z = 2;
- tex.src_sel_w = 3;
-
- for (j = 0; j < 4; j++) {
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP1_MOV;
- r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
- alu.dst.sel = tex.src_gpr;
- alu.dst.chan = j;
- if (j == 3)
- alu.last = 1;
- alu.dst.write = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- } else {
- tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i);
- tex.src_sel_x = ctx->src[i].swizzle[0];
- tex.src_sel_y = ctx->src[i].swizzle[1];
- tex.src_sel_z = ctx->src[i].swizzle[2];
- tex.src_sel_w = ctx->src[i].swizzle[3];
- tex.src_rel = ctx->src[i].rel;
- }
- tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */
- tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
- if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
- tex.coord_type_x = 1;
- tex.coord_type_y = 1;
- tex.coord_type_z = 1;
- tex.coord_type_w = 1;
- }
- r = r600_bytecode_add_tex(ctx->bc, &tex);
- if (r)
- return r;
- }
- } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
int out_chan;
/* Add perspective divide */
if (ctx->bc->chip_class == CAYMAN) {
src_gpr = ctx->temp_reg;
}
+
if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
src_gpr = ctx->temp_reg;
}
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
+ int temp_h = 0, temp_v = 0;
+ int start_val = 0;
+
+ /* if we've already loaded the src (i.e. CUBE don't reload it). */
+ if (src_loaded == TRUE)
+ start_val = 1;
+ else
+ src_loaded = TRUE;
+ for (i = start_val; i < 3; i++) {
+ int treg = r600_get_temp(ctx);
+
+ if (i == 0)
+ src_gpr = treg;
+ else if (i == 1)
+ temp_h = treg;
+ else
+ temp_v = treg;
+
+ for (j = 0; j < 4; j++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ r600_bytecode_src(&alu.src[0], &ctx->src[i], j);
+ alu.dst.sel = treg;
+ alu.dst.chan = j;
+ if (j == 3)
+ alu.last = 1;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+ for (i = 1; i < 3; i++) {
+ /* set gradients h/v */
+ memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+ tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H :
+ FETCH_OP_SET_GRADIENTS_V;
+ tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+ tex.sampler_index_mode = sampler_index_mode;
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+ tex.resource_index_mode = sampler_index_mode;
+
+ tex.src_gpr = (i == 1) ? temp_h : temp_v;
+ tex.src_sel_x = 0;
+ tex.src_sel_y = 1;
+ tex.src_sel_z = 2;
+ tex.src_sel_w = 3;
+
+ tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */
+ tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7;
+ if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
+ tex.coord_type_x = 1;
+ tex.coord_type_y = 1;
+ tex.coord_type_z = 1;
+ tex.coord_type_w = 1;
+ }
+ r = r600_bytecode_add_tex(ctx->bc, &tex);
+ if (r)
+ return r;
+ }
+ }
+
if (src_requires_loading && !src_loaded) {
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
src_gpr = ctx->temp_reg;
}
+ /* get offset values */
+ if (inst->Texture.NumOffsets) {
+ assert(inst->Texture.NumOffsets == 1);
+
+ /* The texture offset feature doesn't work with the TXF instruction
+ * and must be emulated by adding the offset to the texture coordinates. */
+ if (txf_add_offsets) {
+ const struct tgsi_texture_offset *off = inst->TexOffsets;
+
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_3D:
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_ADD_INT;
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = 2;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ];
+ alu.dst.sel = src_gpr;
+ alu.dst.chan = 2;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ /* fall through */
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_ADD_INT;
+ alu.src[0].sel = src_gpr;
+ alu.src[0].chan = 1;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY];
+ alu.dst.sel = src_gpr;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ /* fall through */
+
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_ADD_INT;
+ alu.src[0].sel = src_gpr;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX];
+ alu.dst.sel = src_gpr;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ break;
+ /* texture offsets do not apply to other texture targets */
+ }
+ } else {
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_3D:
+ offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
+ /* fallthrough */
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
+ /* fallthrough */
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
+ }
+ }
+ }
+
/* Obtain the sample index for reading a compressed MSAA color texture.
* To read the FMASK, we use the ldfptr instruction, which tells us
* where the samples are stored.
tex.op = FETCH_OP_LD;
tex.inst_mod = 1; /* to indicate this is ldfptr */
tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+ tex.sampler_index_mode = sampler_index_mode;
tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+ tex.resource_index_mode = sampler_index_mode;
tex.src_gpr = src_gpr;
tex.dst_gpr = temp;
tex.dst_sel_x = 7; /* mask out these components */
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
- alu.src[0].sel = 512 + (id / 4);
- alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER;
- alu.src[0].chan = id % 4;
+ if (ctx->bc->chip_class >= EVERGREEN) {
+ /* channel 1 or 3 of each word */
+ alu.src[0].sel = 512 + (id / 2);
+ alu.src[0].chan = ((id % 2) * 2) + 1;
+ } else {
+ /* r600 we have them at channel 2 of the second dword */
+ alu.src[0].sel = 512 + (id * 2) + 1;
+ alu.src[0].chan = 2;
+ }
+ alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
}
opcode = ctx->inst_info->op;
+ if (opcode == FETCH_OP_GATHER4 &&
+ inst->TexOffsets[0].File != TGSI_FILE_NULL &&
+ inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) {
+ opcode = FETCH_OP_GATHER4_O;
+
+ /* GATHER4_O/GATHER4_C_O use offset values loaded by
+ SET_TEXTURE_OFFSETS instruction. The immediate offset values
+ encoded in the instruction are ignored. */
+ memset(&tex, 0, sizeof(struct r600_bytecode_tex));
+ tex.op = FETCH_OP_SET_TEXTURE_OFFSETS;
+ tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+ tex.sampler_index_mode = sampler_index_mode;
+ tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+ tex.resource_index_mode = sampler_index_mode;
+
+ tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index;
+ tex.src_sel_x = inst->TexOffsets[0].SwizzleX;
+ tex.src_sel_y = inst->TexOffsets[0].SwizzleY;
+ tex.src_sel_z = inst->TexOffsets[0].SwizzleZ;
+ tex.src_sel_w = 4;
+
+ tex.dst_sel_x = 7;
+ tex.dst_sel_y = 7;
+ tex.dst_sel_z = 7;
+ tex.dst_sel_w = 7;
+
+ r = r600_bytecode_add_tex(ctx->bc, &tex);
+ if (r)
+ return r;
+ }
+
if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
case FETCH_OP_SAMPLE_G:
opcode = FETCH_OP_SAMPLE_C_G;
break;
+ /* Texture gather variants */
+ case FETCH_OP_GATHER4:
+ opcode = FETCH_OP_GATHER4_C;
+ break;
+ case FETCH_OP_GATHER4_O:
+ opcode = FETCH_OP_GATHER4_C_O;
+ break;
}
}
tex.op = opcode;
tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg);
+ tex.sampler_index_mode = sampler_index_mode;
tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
+ tex.resource_index_mode = sampler_index_mode;
tex.src_gpr = src_gpr;
tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
- tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
- tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
- tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
- tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE ||
+ inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) {
+ tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
+ int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX];
+ tex.inst_mod = texture_component_select;
+
+ if (ctx->bc->chip_class == CAYMAN) {
+ /* GATHER4 result order is different from TGSI TG4 */
+ tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7;
+ tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7;
+ tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7;
+ tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+ } else {
+ tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
+ tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
+ tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
+ tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+ }
+ }
+ else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) {
+ tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
+ tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
+ tex.dst_sel_z = 7;
+ tex.dst_sel_w = 7;
+ }
+ else {
+ tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;
+ tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7;
+ tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;
+ tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;
+ }
+
if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) {
tex.src_sel_x = 4;
tex.offset_x = offset_x;
tex.offset_y = offset_y;
- tex.offset_z = offset_z;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 &&
+ (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) {
+ tex.offset_z = 0;
+ }
+ else {
+ tex.offset_z = offset_z;
+ }
/* Put the depth for comparison in W.
* TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W.
tex.coord_type_z = 0;
/* mask unused source components */
- if (opcode == FETCH_OP_SAMPLE) {
+ if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) {
switch (inst->Texture.Texture) {
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.op = ALU_OP3_CNDGE_INT;
+ alu.op = ALU_OP3_CNDE_INT;
r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int r;
+ int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ unsigned reg = inst->Dst[0].Register.Index > 0 ? ctx->bc->index_reg[inst->Dst[0].Register.Index - 1] : ctx->bc->ar_reg;
+ assert(inst->Dst[0].Register.Index < 3);
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
switch (inst->Instruction.Opcode) {
return -1;
}
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- alu.last = 1;
- alu.dst.sel = ctx->bc->ar_reg;
- alu.dst.write = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
+ for (i = 0; i <= lasti; ++i) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.last = i == lasti;
+ alu.dst.sel = reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ if (inst->Dst[0].Register.Index > 0)
+ ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0;
+ else
+ ctx->bc->ar_loaded = 0;
- ctx->bc->ar_loaded = 0;
return 0;
}
static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int r;
+ int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP1_FLOOR;
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
- alu.last = 1;
-
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ for (i = 0; i <= lasti; ++i) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ alu.dst.chan = i;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.last = i == lasti;
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
+ }
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP1_FLT_TO_INT;
alu.src[0].sel = ctx->bc->ar_reg;
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
- alu.last = 1;
-
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ /* FLT_TO_INT is trans-only on r600/r700 */
+ alu.last = TRUE;
+ for (i = 0; i <= lasti; ++i) {
+ alu.dst.chan = i;
+ alu.src[0].chan = i;
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
break;
case TGSI_OPCODE_ARR:
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP1_FLT_TO_INT;
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
- alu.last = 1;
-
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ /* FLT_TO_INT is trans-only on r600/r700 */
+ alu.last = TRUE;
+ for (i = 0; i <= lasti; ++i) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ alu.dst.chan = i;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
+ }
break;
case TGSI_OPCODE_UARL:
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP1_MOV;
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
- alu.last = 1;
-
- if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
- return r;
+ for (i = 0; i <= lasti; ++i) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ alu.dst.chan = i;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.last = i == lasti;
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
+ }
break;
default:
assert(0);
return 0;
}
+static int tgsi_loop_breakc(struct r600_shader_ctx *ctx)
+{
+ int r;
+ unsigned int fscp;
+
+ for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
+ {
+ if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+ break;
+ }
+ if (fscp == 0) {
+ R600_ERR("BREAKC not inside loop/endloop pair\n");
+ return -EINVAL;
+ }
+
+ if (ctx->bc->chip_class == EVERGREEN &&
+ ctx->bc->family != CHIP_CYPRESS &&
+ ctx->bc->family != CHIP_JUNIPER) {
+ /* HW bug: ALU_BREAK does not save the active mask correctly */
+ r = tgsi_uif(ctx);
+ if (r)
+ return r;
+
+ r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK);
+ if (r)
+ return r;
+ fc_set_mid(ctx, fscp);
+
+ return tgsi_endif(ctx);
+ } else {
+ r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK);
+ if (r)
+ return r;
+ fc_set_mid(ctx, fscp);
+ }
+
+ return 0;
+}
+
static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
{
unsigned int fscp;
static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
{
if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
- emit_gs_ring_writes(ctx);
+ emit_gs_ring_writes(ctx, TRUE);
return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
}
for (k = 0; k < inst->Instruction.NumSrcRegs; k++) {
r600_bytecode_src(&alu.src[k], &ctx->src[k], i);
}
- tgsi_dst(ctx, &inst->Dst[0], j, &alu.dst);
+ alu.dst.chan = j;
alu.dst.sel = ctx->temp_reg;
alu.dst.write = (j == i);
if (j == 3)
{TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
{TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
- {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {20, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {19, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
{22, 0, ALU_OP0_NOP, tgsi_unsupported},
{23, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2},
{TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow},
{TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd},
- /* gap */
{32, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2},
- {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {34, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp},
{TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig},
{TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
{TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {44, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2},
- {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {46, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2},
{TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig},
{TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap},
{TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2},
- {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {51, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex},
{TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {59, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {60, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_r600_arl},
- {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {62, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg},
{TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp},
{TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs},
{TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
- {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {69, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp},
{TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
{76, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
{TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
- /* gap */
- {79, 0, ALU_OP0_NOP, tgsi_unsupported},
- {80, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DDX_FINE, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DDY_FINE, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2},
{TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2},
{TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2_trans},
- /* gap */
{88, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2},
{TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2},
{TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex},
{TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
{TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont},
- {TGSI_OPCODE_EMIT, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ENDPRIM, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit},
+ {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit},
{TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop},
{TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
{TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- /* gap */
{104, 0, ALU_OP0_NOP, tgsi_unsupported},
{105, 0, ALU_OP0_NOP, tgsi_unsupported},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
{TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
{TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
- {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {112, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
{114, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_loop_breakc},
{TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
- /* gap */
{118, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2_trans},
{TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv},
{TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
{TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
+ {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, tgsi_op2_trans},
+ {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, tgsi_op2_trans},
+ {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_unsupported},
+ {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_unsupported},
+ {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_unsupported},
+ {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_unsupported},
+ {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_unsupported},
+ {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_unsupported},
+ {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_unsupported},
+ {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_unsupported},
+ {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_unsupported},
+ {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
};
{TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
{TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
- {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {20, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {19, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
{22, 0, ALU_OP0_NOP, tgsi_unsupported},
{23, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2},
{TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow},
{TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd},
- /* gap */
{32, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2},
- {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {34, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp},
{TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig},
{TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
{TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {44, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2},
- {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {46, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2},
{TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig},
{TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap},
{TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2},
- {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {51, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex},
{TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {59, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {60, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl},
- {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {62, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg},
{TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp},
{TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs},
{TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
- {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {69, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp},
{TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
{76, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
{TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
- /* gap */
- {79, 0, ALU_OP0_NOP, tgsi_unsupported},
- {80, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DDX_FINE, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
+ {TGSI_OPCODE_DDY_FINE, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
{TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2},
{TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2},
{TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2},
- /* gap */
{88, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2},
{TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2},
{TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
{TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- /* gap */
{104, 0, ALU_OP0_NOP, tgsi_unsupported},
{105, 0, ALU_OP0_NOP, tgsi_unsupported},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
{TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
{TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
- {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {112, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
{114, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
- /* gap */
{118, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_f2i},
{TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv},
{TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
{TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
+ {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, tgsi_op2_trans},
+ {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, tgsi_op2_trans},
+ {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_tex},
+ {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_tex},
+ {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_op3},
+ {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_op3},
+ {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_bfi},
+ {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_op2},
+ {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_op2},
+ {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_op2},
+ {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_msb},
+ {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_msb},
+ {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_interp_egcm},
+ {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_interp_egcm},
+ {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_interp_egcm},
{TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
};
{TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3},
{TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp},
- {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
- {20, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {19, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
{TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
{22, 0, ALU_OP0_NOP, tgsi_unsupported},
{23, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2},
{TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, cayman_emit_float_instr},
{TGSI_OPCODE_POW, 0, ALU_OP0_NOP, cayman_pow},
{TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd},
- /* gap */
{32, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2},
- {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {34, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp},
{TGSI_OPCODE_COS, 0, ALU_OP1_COS, cayman_trig},
{TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
{TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {44, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2},
- {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {46, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2},
{TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, cayman_trig},
{TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap},
{TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2},
- {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {51, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex},
{TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported},
- {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {59, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {60, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl},
- {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {62, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg},
{TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp},
{TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs},
{TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
- {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {69, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp},
{TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
{76, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else},
{TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif},
- /* gap */
- {79, 0, ALU_OP0_NOP, tgsi_unsupported},
- {80, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {TGSI_OPCODE_DDX_FINE, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex},
+ {TGSI_OPCODE_DDY_FINE, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex},
{TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2},
{TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2},
{TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2},
- /* gap */
{88, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2},
{TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2},
{TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop},
{TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
- /* gap */
{104, 0, ALU_OP0_NOP, tgsi_unsupported},
{105, 0, ALU_OP0_NOP, tgsi_unsupported},
{106, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
{TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2},
{TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2},
{TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap},
{TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
- {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported},
+ {112, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported},
- /* gap */
{114, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */
{TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */
- /* gap */
{118, 0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2},
{TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv},
{TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex},
{TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex},
{TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex},
+ {TGSI_OPCODE_IMUL_HI, 0, ALU_OP2_MULHI_INT, cayman_mul_int_instr},
+ {TGSI_OPCODE_UMUL_HI, 0, ALU_OP2_MULHI_UINT, cayman_mul_int_instr},
+ {TGSI_OPCODE_TG4, 0, FETCH_OP_GATHER4, tgsi_tex},
+ {TGSI_OPCODE_LODQ, 0, FETCH_OP_GET_LOD, tgsi_tex},
+ {TGSI_OPCODE_IBFE, 1, ALU_OP3_BFE_INT, tgsi_op3},
+ {TGSI_OPCODE_UBFE, 1, ALU_OP3_BFE_UINT, tgsi_op3},
+ {TGSI_OPCODE_BFI, 0, ALU_OP0_NOP, tgsi_bfi},
+ {TGSI_OPCODE_BREV, 0, ALU_OP1_BFREV_INT, tgsi_op2},
+ {TGSI_OPCODE_POPC, 0, ALU_OP1_BCNT_INT, tgsi_op2},
+ {TGSI_OPCODE_LSB, 0, ALU_OP1_FFBL_INT, tgsi_op2},
+ {TGSI_OPCODE_IMSB, 0, ALU_OP1_FFBH_INT, tgsi_msb},
+ {TGSI_OPCODE_UMSB, 0, ALU_OP1_FFBH_UINT, tgsi_msb},
+ {TGSI_OPCODE_INTERP_CENTROID, 0, ALU_OP0_NOP, tgsi_interp_egcm},
+ {TGSI_OPCODE_INTERP_SAMPLE, 0, ALU_OP0_NOP, tgsi_interp_egcm},
+ {TGSI_OPCODE_INTERP_OFFSET, 0, ALU_OP0_NOP, tgsi_interp_egcm},
{TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported},
};