#include "tgsi/tgsi_parse.h"
#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_pstipple.h"
#include "svga_tgsi_emit.h"
#include "svga_context.h"
translate_opcode(uint opcode)
{
switch (opcode) {
- case TGSI_OPCODE_ABS: return SVGA3DOP_ABS;
case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD;
case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
case TGSI_OPCODE_MOV: return SVGA3DOP_MOV;
case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
- case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM;
default:
assert(!"svga: unexpected opcode in translate_opcode()");
return SVGA3DOP_LAST_INST;
* Need to lookup a table built at decl time:
*/
dest = emit->output_map[reg->Register.Index];
+ emit->num_output_writes++;
break;
default:
static boolean
svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
{
- int i;
+ unsigned i;
for (i = 0; i < emit->num_arl_consts; ++i) {
if (emit->arl_consts[i].arl_num == emit->current_arl)
static int
svga_arl_adjustment( const struct svga_shader_emitter *emit )
{
- int i;
+ unsigned i;
for (i = 0; i < emit->num_arl_consts; ++i) {
if (emit->arl_consts[i].arl_num == emit->current_arl)
* in one slot at least:
*/
assert(type1 == SVGA3DREG_SAMPLER);
+ (void) type1;
if (type0 == SVGA3DREG_CONST &&
((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
}
if (!emit_instruction(emit, opcode) ||
- !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
+ !svga_shader_emit_dwords( emit, def.values, ARRAY_SIZE(def.values)))
return FALSE;
return TRUE;
if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
idx, 0.0f, 0.5f, -1.0f, 1.0f ))
return FALSE;
+ emit->common_immediate_idx[0] = idx;
+ idx++;
+
+ /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
+ if (emit->key.vs.adjust_attrib_range) {
+ if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+ idx, 2.0f, 0.0f, 0.0f, 0.0f ))
+ return FALSE;
+ emit->common_immediate_idx[1] = idx;
+ }
+ else {
+ emit->common_immediate_idx[1] = -1;
+ }
- emit->common_immediate_idx = idx;
emit->created_common_immediate = TRUE;
return TRUE;
/**
- * Returns an immediate reg where all the terms are either 0, 1, -1 or 0.5
+ * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5
*/
static struct src_register
get_immediate(struct svga_shader_emitter *emit,
unsigned sz = common_immediate_swizzle(z);
unsigned sw = common_immediate_swizzle(w);
assert(emit->created_common_immediate);
- assert(emit->common_immediate_idx >= 0);
- return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx),
+ assert(emit->common_immediate_idx[0] >= 0);
+ return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
sx, sy, sz, sw);
}
get_zero_immediate( struct svga_shader_emitter *emit )
{
assert(emit->created_common_immediate);
- assert(emit->common_immediate_idx >= 0);
+ assert(emit->common_immediate_idx[0] >= 0);
return swizzle(src_register( SVGA3DREG_CONST,
- emit->common_immediate_idx),
+ emit->common_immediate_idx[0]),
0, 0, 0, 0);
}
get_one_immediate( struct svga_shader_emitter *emit )
{
assert(emit->created_common_immediate);
- assert(emit->common_immediate_idx >= 0);
+ assert(emit->common_immediate_idx[0] >= 0);
return swizzle(src_register( SVGA3DREG_CONST,
- emit->common_immediate_idx),
+ emit->common_immediate_idx[0]),
3, 3, 3, 3);
}
get_half_immediate( struct svga_shader_emitter *emit )
{
assert(emit->created_common_immediate);
- assert(emit->common_immediate_idx >= 0);
- return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx),
+ assert(emit->common_immediate_idx[0] >= 0);
+ return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
1, 1, 1, 1);
}
+/**
+ * returns {2, 2, 2, 2} immediate
+ */
+static struct src_register
+get_two_immediate( struct svga_shader_emitter *emit )
+{
+ /* Note we use the second common immediate here */
+ assert(emit->created_common_immediate);
+ assert(emit->common_immediate_idx[1] >= 0);
+ return swizzle(src_register( SVGA3DREG_CONST,
+ emit->common_immediate_idx[1]),
+ 0, 0, 0, 0);
+}
+
+
/**
* returns the loop const
*/
struct src_register reg;
/* the width/height indexes start right after constants */
- idx = emit->key.fkey.tex[sampler_num].width_height_idx +
+ idx = emit->key.tex[sampler_num].width_height_idx +
emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
reg = src_register( SVGA3DREG_CONST, idx );
const struct src_register src1 =
translate_src_register(emit, &insn->Src[1] );
SVGA3dShaderDestToken temp = get_temp( emit );
- int i;
+ unsigned i;
/* For each enabled element, perform a RCP instruction. Note that
* RCP is scalar in SVGA3D:
}
-/**
- * Translate the following TGSI DST instruction.
- * NRM DST, SRC
- * To the following SVGA3D instruction sequence.
- * DP3 TMP, SRC, SRC
- * RSQ TMP, TMP
- * MUL DST, SRC, TMP
- */
-static boolean
-emit_nrm(struct svga_shader_emitter *emit,
- const struct tgsi_full_instruction *insn)
-{
- SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
- const struct src_register src0 =
- translate_src_register(emit, &insn->Src[0]);
- SVGA3dShaderDestToken temp = get_temp( emit );
-
- /* DP3 TMP, SRC, SRC */
- if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
- return FALSE;
-
- /* RSQ TMP, TMP */
- if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
- return FALSE;
-
- /* MUL DST, SRC, TMP */
- if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
- src0, src( temp )))
- return FALSE;
-
- return TRUE;
-}
-
-
/**
* Sine / Cosine helper function.
*/
}
-/**
- * Translate/emit TGSI SUB instruction as:
- * ADD DST, SRC0, negate(SRC1)
- */
-static boolean
-emit_sub(struct svga_shader_emitter *emit,
- const struct tgsi_full_instruction *insn)
-{
- SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
- struct src_register src0 = translate_src_register(
- emit, &insn->Src[0] );
- struct src_register src1 = translate_src_register(
- emit, &insn->Src[1] );
-
- src1 = negate(src1);
-
- if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
- src0, src1 ))
- return FALSE;
-
- return TRUE;
-}
-
-
/**
* Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative).
*/
texcoord = translate_src_register( emit, &insn->Src[0] );
sampler = translate_src_register( emit, &insn->Src[1] );
- if (emit->key.fkey.tex[sampler.base.num].unnormalized ||
+ if (emit->key.tex[sampler.base.num].unnormalized ||
emit->dynamic_branching_level > 0)
tmp = get_temp( emit );
/* Explicit normalization of texcoords:
*/
- if (emit->key.fkey.tex[sampler.base.num].unnormalized) {
+ if (emit->key.tex[sampler.base.num].unnormalized) {
struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
/* MUL tmp, SRC0, WH */
const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
unsigned srcSwizzle[4];
unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
- int i;
+ unsigned i;
/* build writemasks and srcSwizzle terms */
for (i = 0; i < 4; i++) {
- if (swizzleIn[i] == PIPE_SWIZZLE_ZERO) {
+ if (swizzleIn[i] == PIPE_SWIZZLE_0) {
srcSwizzle[i] = TGSI_SWIZZLE_X + i;
zeroWritemask |= (1 << i);
}
- else if (swizzleIn[i] == PIPE_SWIZZLE_ONE) {
+ else if (swizzleIn[i] == PIPE_SWIZZLE_1) {
srcSwizzle[i] = TGSI_SWIZZLE_X + i;
oneWritemask |= (1 << i);
}
const unsigned unit = src1.base.num;
/* check for shadow samplers */
- boolean compare = (emit->key.fkey.tex[unit].compare_mode ==
+ boolean compare = (emit->key.tex[unit].compare_mode ==
PIPE_TEX_COMPARE_R_TO_TEXTURE);
/* texture swizzle */
- boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED ||
- emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN ||
- emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE ||
- emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA);
+ boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_X ||
+ emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_Y ||
+ emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_Z ||
+ emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_W);
- boolean saturate = insn->Instruction.Saturate != TGSI_SAT_NONE;
+ boolean saturate = insn->Instruction.Saturate;
/* If doing compare processing or tex swizzle or saturation, we need to put
* the fetched color into a temporary so it can be used as a source later on.
/* Compare texture sample value against R component of texcoord */
if (!emit_select(emit,
- emit->key.fkey.tex[unit].compare_func,
+ emit->key.tex[unit].compare_func,
writemask( dst2, TGSI_WRITEMASK_XYZ ),
r_coord,
tex_src_x))
/* swizzle from tex_result to dst (handles saturation too, if any) */
emit_tex_swizzle(emit,
dst, src(tex_result),
- emit->key.fkey.tex[unit].swizzle_r,
- emit->key.fkey.tex[unit].swizzle_g,
- emit->key.fkey.tex[unit].swizzle_b,
- emit->key.fkey.tex[unit].swizzle_a);
+ emit->key.tex[unit].swizzle_r,
+ emit->key.tex[unit].swizzle_g,
+ emit->key.tex[unit].swizzle_b,
+ emit->key.tex[unit].swizzle_a);
}
return TRUE;
}
+/**
+ * TGSI_OPCODE_MOVE is only special-cased here to detect the
+ * svga_fragment_shader::constant_color_output case.
+ */
+static boolean
+emit_mov(struct svga_shader_emitter *emit,
+ const struct tgsi_full_instruction *insn)
+{
+ const struct tgsi_full_src_register *src = &insn->Src[0];
+ const struct tgsi_full_dst_register *dst = &insn->Dst[0];
+
+ if (emit->unit == PIPE_SHADER_FRAGMENT &&
+ dst->Register.File == TGSI_FILE_OUTPUT &&
+ dst->Register.Index == 0 &&
+ src->Register.File == TGSI_FILE_CONSTANT &&
+ !src->Register.Indirect) {
+ emit->constant_color_output = TRUE;
+ }
+
+ return emit_simple_instruction(emit, SVGA3DOP_MOV, insn);
+}
+
+
/**
* Translate/emit TGSI DDX, DDY instructions.
*/
break;
}
- if (emit->nr_labels == Elements(emit->label))
+ if (emit->nr_labels == ARRAY_SIZE(emit->label))
return FALSE;
if (i == emit->nr_labels) {
case TGSI_OPCODE_DPH:
return emit_dph( emit, insn );
- case TGSI_OPCODE_NRM:
- return emit_nrm( emit, insn );
-
case TGSI_OPCODE_COS:
return emit_cos( emit, insn );
case TGSI_OPCODE_SLE:
return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
- case TGSI_OPCODE_SUB:
- return emit_sub( emit, insn );
-
case TGSI_OPCODE_POW:
return emit_pow( emit, insn );
/* These aren't actually used by any of the frontends we care
* about:
*/
- case TGSI_OPCODE_CLAMP:
case TGSI_OPCODE_AND:
case TGSI_OPCODE_OR:
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_SSG:
return emit_ssg( emit, insn );
+ case TGSI_OPCODE_MOV:
+ return emit_mov( emit, insn );
+
default:
{
unsigned opcode = translate_opcode(insn->Instruction.Opcode);
static boolean
emit_vs_preamble(struct svga_shader_emitter *emit)
{
- if (!emit->key.vkey.need_prescale) {
+ if (!emit->key.vs.need_prescale) {
if (!make_immediate( emit, 0, 0, .5, .5,
&emit->imm_0055))
return FALSE;
* logicop workaround.
*/
if (emit->unit == PIPE_SHADER_FRAGMENT &&
- emit->key.fkey.white_fragments) {
+ emit->key.fs.white_fragments) {
struct src_register one = get_one_immediate(emit);
if (!submit_op1( emit,
return FALSE;
}
else if (emit->unit == PIPE_SHADER_FRAGMENT &&
- i < emit->key.fkey.write_color0_to_n_cbufs) {
+ i < emit->key.fs.write_color0_to_n_cbufs) {
/* Write temp color output [0] to true output [i] */
if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
emit->true_color_output[i],
/* Need to perform various manipulations on vertex position to cope
* with the different GL and D3D clip spaces.
*/
- if (emit->key.vkey.need_prescale) {
+ if (emit->key.vs.need_prescale) {
SVGA3dShaderDestToken temp_pos = emit->temp_pos;
SVGA3dShaderDestToken depth = emit->depth_pos;
SVGA3dShaderDestToken pos = emit->true_pos;
struct src_register back[2];
SVGA3dShaderDestToken color[2];
int count = emit->internal_color_count;
- int i;
+ unsigned i;
SVGA3dShaderInstToken if_token;
if (count == 0)
if_token = inst_token( SVGA3DOP_IFC );
- if (emit->key.fkey.front_ccw)
+ if (emit->key.fs.front_ccw)
if_token.control = SVGA3DOPCOMP_LT;
else
if_token.control = SVGA3DOPCOMP_GT;
temp = dst_register( SVGA3DREG_TEMP,
emit->nr_hw_temp++ );
- if (emit->key.fkey.front_ccw) {
+ if (emit->key.fs.front_ccw) {
pass = get_zero_immediate(emit);
fail = get_one_immediate(emit);
} else {
assert(emit->inverted_texcoords & (1 << unit));
- assert(unit < Elements(emit->ps_true_texcoord));
+ assert(unit < ARRAY_SIZE(emit->ps_true_texcoord));
- assert(unit < Elements(emit->ps_inverted_texcoord_input));
+ assert(unit < ARRAY_SIZE(emit->ps_inverted_texcoord_input));
assert(emit->ps_inverted_texcoord_input[unit]
- < Elements(emit->input_map));
+ < ARRAY_SIZE(emit->input_map));
/* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
if (!submit_op3(emit,
}
+/**
+ * Emit code to adjust vertex shader inputs/attributes:
+ * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs).
+ * - Set attrib W component = 1.
+ */
+static boolean
+emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
+{
+ unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
+ emit->key.vs.adjust_attrib_w_1);
+
+ while (adjust_mask) {
+ /* Adjust vertex attrib range and/or set W component = 1 */
+ const unsigned index = u_bit_scan(&adjust_mask);
+ struct src_register tmp;
+
+ /* allocate a temp reg */
+ tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
+ emit->nr_hw_temp++;
+
+ if (emit->key.vs.adjust_attrib_range & (1 << index)) {
+ /* The vertex input/attribute is supposed to be a signed value in
+ * the range [-1,1] but we actually fetched/converted it to the
+ * range [0,1]. This most likely happens when the app specifies a
+ * signed byte attribute but we interpreted it as unsigned bytes.
+ * See also svga_translate_vertex_format().
+ *
+ * Here, we emit some extra instructions to adjust
+ * the attribute values from [0,1] to [-1,1].
+ *
+ * The adjustment we implement is:
+ * new_attrib = attrib * 2.0;
+ * if (attrib >= 0.5)
+ * new_attrib = new_attrib - 2.0;
+ * This isn't exactly right (it's off by a bit or so) but close enough.
+ */
+ SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);
+
+ /* tmp = attrib * 2.0 */
+ if (!submit_op2(emit,
+ inst_token(SVGA3DOP_MUL),
+ dst(tmp),
+ emit->input_map[index],
+ get_two_immediate(emit)))
+ return FALSE;
+
+ /* pred = (attrib >= 0.5) */
+ if (!submit_op2(emit,
+ inst_token_setp(SVGA3DOPCOMP_GE),
+ pred_reg,
+ emit->input_map[index], /* vert attrib */
+ get_half_immediate(emit))) /* 0.5 */
+ return FALSE;
+
+ /* sub(pred) tmp, tmp, 2.0 */
+ if (!submit_op3(emit,
+ inst_token_predicated(SVGA3DOP_SUB),
+ dst(tmp),
+ src(pred_reg),
+ tmp,
+ get_two_immediate(emit)))
+ return FALSE;
+ }
+ else {
+ /* just copy the vertex input attrib to the temp register */
+ if (!submit_op1(emit,
+ inst_token(SVGA3DOP_MOV),
+ dst(tmp),
+ emit->input_map[index]))
+ return FALSE;
+ }
+
+ if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
+ /* move 1 into W position of tmp */
+ if (!submit_op1(emit,
+ inst_token(SVGA3DOP_MOV),
+ writemask(dst(tmp), TGSI_WRITEMASK_W),
+ get_one_immediate(emit)))
+ return FALSE;
+ }
+
+ /* Reassign the input_map entry to the new tmp register */
+ emit->input_map[index] = tmp;
+ }
+
+ return TRUE;
+}
+
+
/**
* Determine if we need to create the "common" immediate value which is
* used for generating useful vector constants such as {0,0,0,0} and
unsigned i;
if (emit->unit == PIPE_SHADER_FRAGMENT) {
- if (emit->key.fkey.light_twoside)
+ if (emit->key.fs.light_twoside)
return TRUE;
- if (emit->key.fkey.white_fragments)
+ if (emit->key.fs.white_fragments)
return TRUE;
if (emit->emit_frontface)
if (emit->inverted_texcoords)
return TRUE;
- /* look for any PIPE_SWIZZLE_ZERO/ONE terms */
- for (i = 0; i < emit->key.fkey.num_textures; i++) {
- if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA ||
- emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA ||
- emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA ||
- emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
+ /* look for any PIPE_SWIZZLE_0/ONE terms */
+ for (i = 0; i < emit->key.num_textures; i++) {
+ if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_W ||
+ emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_W ||
+ emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_W ||
+ emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_W)
return TRUE;
}
- for (i = 0; i < emit->key.fkey.num_textures; i++) {
- if (emit->key.fkey.tex[i].compare_mode
+ for (i = 0; i < emit->key.num_textures; i++) {
+ if (emit->key.tex[i].compare_mode
== PIPE_TEX_COMPARE_R_TO_TEXTURE)
return TRUE;
}
}
-
- if (emit->unit == PIPE_SHADER_VERTEX) {
+ else if (emit->unit == PIPE_SHADER_VERTEX) {
if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
return TRUE;
+ if (emit->key.vs.adjust_attrib_range ||
+ emit->key.vs.adjust_attrib_w_1)
+ return TRUE;
}
if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
pre_parse_add_indirect( struct svga_shader_emitter *emit,
int num, int current_arl)
{
- int i;
+ unsigned i;
assert(num < 0);
for (i = 0; i < emit->num_arl_consts; ++i) {
}
if (emit->unit == PIPE_SHADER_FRAGMENT) {
+ if (!svga_shader_emit_samplers_decl( emit ))
+ return FALSE;
+
if (!emit_ps_preamble( emit ))
return FALSE;
- if (emit->key.fkey.light_twoside) {
+ if (emit->key.fs.light_twoside) {
if (!emit_light_twoside( emit ))
return FALSE;
}
return FALSE;
}
}
+ else {
+ assert(emit->unit == PIPE_SHADER_VERTEX);
+ if (emit->key.vs.adjust_attrib_range) {
+ if (!emit_adjusted_vertex_attribs(emit) ||
+ emit->key.vs.adjust_attrib_w_1) {
+ return FALSE;
+ }
+ }
+ }
return TRUE;
}
const struct tgsi_token *tokens)
{
struct tgsi_parse_context parse;
+ const struct tgsi_token *new_tokens = NULL;
boolean ret = TRUE;
boolean helpers_emitted = FALSE;
unsigned line_nr = 0;
+ if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
+ unsigned unit;
+
+ new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
+ TGSI_FILE_INPUT);
+
+ if (new_tokens) {
+ /* Setup texture state for stipple */
+ emit->sampler_target[unit] = TGSI_TEXTURE_2D;
+ emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
+ emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
+ emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
+ emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
+
+ emit->pstipple_sampler_unit = unit;
+
+ tokens = new_tokens;
+ }
+ }
+
tgsi_parse_init( &parse, tokens );
emit->internal_imm_count = 0;
done:
tgsi_parse_free( &parse );
+ if (new_tokens) {
+ tgsi_free_tokens(new_tokens);
+ }
+
return ret;
}