* - Output an x and y component, which need to be multiplied to
* get the result
*/
- /* TGSI lowering should deal with SCS */
- assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
-
struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
emit_inst(c, &(struct etna_inst) {
.opcode = INST_OPCODE_MUL,
});
} else if (c->specs->has_sin_cos_sqrt) {
- /* TGSI lowering should deal with SCS */
- assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
-
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
/* add divide by PI/2, using a temp register. GC2000
* fails with src==dst for the trig instruction. */
* DP3 t.x___, t.xyww, C, void (for scs)
* MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
* MAD dst, t.ywyw, .2225, t.xzxz
- *
- * TODO: we don't set dst.zw correctly for SCS.
*/
struct etna_inst *p, ins[9] = { };
struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
ins[4].src[0] = swizzle(t0s, dp3_swiz);
ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
- if (inst->Instruction.Opcode == TGSI_OPCODE_SCS) {
- ins[5] = ins[3];
- ins[6] = ins[4];
- ins[4].dst.comps = INST_COMPS_X;
- ins[6].dst.comps = INST_COMPS_Z;
- ins[5].src[0] = swizzle(t0s, SWIZZLE(W, Z, W, W));
- ins[6].src[0] = swizzle(t0s, SWIZZLE(Z, Y, W, W));
- ins[5].src[1] = absolute(ins[5].src[0]);
- p = &ins[7];
- } else {
- p = &ins[5];
- }
-
+ p = &ins[5];
p->opcode = INST_OPCODE_MAD;
p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
}
}
-static void
-trans_dph(const struct instr_translater *t, struct etna_compile *c,
- const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
-{
- /*
- DP3 tmp.xyzw, src0.xyzw, src1,xyzw, void
- ADD dst.xyzw, tmp.xyzw, void, src1.wwww
- */
- struct etna_native_reg temp = etna_compile_get_inner_temp(c);
- struct etna_inst ins[2] = { };
-
- ins[0].opcode = INST_OPCODE_DP3;
- ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
- INST_COMPS_Z | INST_COMPS_W);
- ins[0].src[0] = src[0];
- ins[0].src[1] = src[1];
-
- ins[1].opcode = INST_OPCODE_ADD;
- ins[1].sat = inst->Instruction.Saturate;
- ins[1].dst = convert_dst(c, &inst->Dst[0]);
- ins[1].src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
- ins[1].src[2] = swizzle(src[1], SWIZZLE(W, W, W, W));
-
- emit_inst(c, &ins[0]);
- emit_inst(c, &ins[1]);
-}
-
static void
trans_sampler(const struct instr_translater *t, struct etna_compile *c,
const struct tgsi_full_instruction *inst,
INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
+ INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
INSTR(LRP, trans_lrp),
INSTR(LIT, trans_lit),
INSTR(SSG, trans_ssg),
- INSTR(DPH, trans_dph),
INSTR(SIN, trans_trig),
INSTR(COS, trans_trig),
- INSTR(SCS, trans_trig),
INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
assert(sf->num_reg < ETNA_NUM_INPUTS);
+
+ if (!reg->native.valid)
+ continue;
+
/* XXX exclude inputs with special semantics such as gl_frontFacing */
sf->reg[sf->num_reg].reg = reg->native.id;
sf->reg[sf->num_reg].semantic = reg->semantic;
/* round up number of uniforms, including immediates, in units of four */
int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
- if (c->inst_ptr > c->specs->max_instructions) {
+ if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
c->specs->max_instructions);
return false;
const struct etna_specs *specs = v->shader->specs;
struct tgsi_lowering_config lconfig = {
- .lower_SCS = specs->has_sin_cos_sqrt,
.lower_FLR = !specs->has_sign_floor_ceil,
.lower_CEIL = !specs->has_sign_floor_ceil,
.lower_POW = true,
.lower_EXP = true,
.lower_LOG = true,
- .lower_DP2 = true,
- .lower_DP2A = true,
+ .lower_DP2 = !specs->has_halti2_instructions,
.lower_TRUNC = true,
- .lower_XPD = true
};
c = CALLOC_STRUCT(etna_compile);
if (!c)
return false;
- memset(&c->lbl_usage, -1, ARRAY_SIZE(c->lbl_usage));
+ memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
const struct tgsi_token *tokens = v->shader->tokens;
v->vs_pointsize_out_reg = -1;
v->ps_color_out_reg = -1;
v->ps_depth_out_reg = -1;
+ v->needs_icache = c->inst_ptr > c->specs->max_instructions;
copy_uniform_state_to_shader(c, v);
if (c->info.processor == PIPE_SHADER_VERTEX) {
etna_link_shader(struct etna_shader_link_info *info,
const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
{
+ int comp_ofs = 0;
/* For each fragment input we need to find the associated vertex shader
* output, which can be found by matching on semantic name and index. A
* binary search could be used because the vs outputs are sorted by their
* semantic index and grouped by semantic type by fill_in_vs_outputs.
*/
assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
+ info->pcoord_varying_comp_ofs = -1;
for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
struct etna_varying *varying;
+ bool interpolate_always = fsio->semantic.Name != TGSI_SEMANTIC_COLOR;
assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
varying = &info->varyings[fsio->reg - 1];
varying->num_components = fsio->num_components;
- if (fsio->semantic.Name == TGSI_SEMANTIC_COLOR) /* colors affected by flat shading */
+ if (!interpolate_always) /* colors affected by flat shading */
varying->pa_attributes = 0x200;
else /* texture coord or other bypasses flat shading */
varying->pa_attributes = 0x2f1;
+ varying->use[0] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_X : VARYING_COMPONENT_USE_USED;
+ varying->use[1] = interpolate_always ? VARYING_COMPONENT_USE_POINTCOORD_Y : VARYING_COMPONENT_USE_USED;
+ varying->use[2] = VARYING_COMPONENT_USE_USED;
+ varying->use[3] = VARYING_COMPONENT_USE_USED;
+
+
+ /* point coord is an input to the PS without matching VS output,
+ * so it gets a varying slot without being assigned a VS register.
+ */
if (fsio->semantic.Name == TGSI_SEMANTIC_PCOORD) {
- varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
- varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
- varying->use[2] = VARYING_COMPONENT_USE_USED;
- varying->use[3] = VARYING_COMPONENT_USE_USED;
- varying->reg = 0; /* replaced by point coord -- doesn't matter */
- continue;
- }
+ info->pcoord_varying_comp_ofs = comp_ofs;
+ } else {
+ if (vsio == NULL) { /* not found -- link error */
+ BUG("Semantic %d value %d not found in vertex shader outputs\n", fsio->semantic.Name, fsio->semantic.Index);
+ return true;
+ }
- if (vsio == NULL)
- return true; /* not found -- link error */
+ varying->reg = vsio->reg;
+ }
- varying->use[0] = VARYING_COMPONENT_USE_USED;
- varying->use[1] = VARYING_COMPONENT_USE_USED;
- varying->use[2] = VARYING_COMPONENT_USE_USED;
- varying->use[3] = VARYING_COMPONENT_USE_USED;
- varying->reg = vsio->reg;
+ comp_ofs += varying->num_components;
}
assert(info->num_varyings == fs->infile.num_reg);