/* Samplers */
unsigned num_samplers;
+ boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/
ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */
ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */
/* For fragment shaders only */
struct {
- /* apha test */
unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */
+ unsigned num_color_outputs;
unsigned color_tmp_index; /**< fake/temp color output reg */
unsigned alpha_ref_index; /**< immediate constant for alpha ref */
unsigned fragcoord_input_index; /**< real fragment position input reg */
unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */
+
+ /** Which texture units are doing shadow comparison in the FS code */
+ unsigned shadow_compare_units;
} fs;
/* For geometry shaders only */
* as seen below. All other opcodes are handled/translated specially.
*/
static VGPU10_OPCODE_TYPE
-translate_opcode(unsigned opcode)
+translate_opcode(enum tgsi_opcode opcode)
{
switch (opcode) {
case TGSI_OPCODE_MOV:
return VGPU10_OPCODE_RET;
case TGSI_OPCODE_NOP:
return VGPU10_OPCODE_NOP;
- case TGSI_OPCODE_BREAKC:
- return VGPU10_OPCODE_BREAKC;
case TGSI_OPCODE_END:
return VGPU10_OPCODE_RET;
case TGSI_OPCODE_F2I:
*/
static unsigned
get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
- unsigned file, unsigned index)
+ enum tgsi_file_type file, unsigned index)
{
if (file == TGSI_FILE_TEMPORARY) {
return emit->temp_map[index].arrayId;
*/
static unsigned
remap_temp_index(const struct svga_shader_emitter_v10 *emit,
- unsigned file, unsigned index)
+ enum tgsi_file_type file, unsigned index)
{
if (file == TGSI_FILE_TEMPORARY) {
return emit->temp_map[index].index;
static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
VGPU10OperandToken0 operand0,
- unsigned file,
+ enum tgsi_file_type file,
boolean indirect, boolean index2D,
unsigned tempArrayID)
{
emit_dst_register(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_dst_register *reg)
{
- unsigned file = reg->Register.File;
+ enum tgsi_file_type file = reg->Register.File;
unsigned index = reg->Register.Index;
- const unsigned sem_name = emit->info.output_semantic_name[index];
+ const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
const unsigned sem_index = emit->info.output_semantic_index[index];
unsigned writemask = reg->Register.WriteMask;
- const unsigned indirect = reg->Register.Indirect;
+ const boolean indirect = reg->Register.Indirect;
const unsigned tempArrayId = get_temp_array_id(emit, file, index);
- const unsigned index2d = reg->Register.Dimension;
+ const boolean index2d = reg->Register.Dimension;
VGPU10OperandToken0 operand0;
if (file == TGSI_FILE_OUTPUT) {
emit_src_register(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_src_register *reg)
{
- unsigned file = reg->Register.File;
+ enum tgsi_file_type file = reg->Register.File;
unsigned index = reg->Register.Index;
- const unsigned indirect = reg->Register.Indirect;
+ const boolean indirect = reg->Register.Indirect;
const unsigned tempArrayId = get_temp_array_id(emit, file, index);
- const unsigned index2d = reg->Register.Dimension;
+ const boolean index2d = reg->Register.Dimension;
const unsigned swizzleX = reg->Register.SwizzleX;
const unsigned swizzleY = reg->Register.SwizzleY;
const unsigned swizzleZ = reg->Register.SwizzleZ;
const unsigned swizzleW = reg->Register.SwizzleW;
- const unsigned absolute = reg->Register.Absolute;
- const unsigned negate = reg->Register.Negate;
+ const boolean absolute = reg->Register.Absolute;
+ const boolean negate = reg->Register.Negate;
bool is_prim_id = FALSE;
VGPU10OperandToken0 operand0;
operand0.value = operand1.value = 0;
if (is_prim_id) {
+ /* NOTE: we should be using VGPU10_OPERAND_1_COMPONENT here, but
+ * our virtual GPU accepts this as-is.
+ */
operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
}
*/
static void
emit_opcode(struct svga_shader_emitter_v10 *emit,
- unsigned vgpu10_opcode, boolean saturate)
+ VGPU10_OPCODE_TYPE vgpu10_opcode, boolean saturate)
{
VGPU10OpcodeToken0 token0;
* Create a tgsi_full_src_register.
*/
static struct tgsi_full_src_register
-make_src_reg(unsigned file, unsigned index)
+make_src_reg(enum tgsi_file_type file, unsigned index)
{
struct tgsi_full_src_register reg;
* Create a tgsi_full_dst_register.
*/
static struct tgsi_full_dst_register
-make_dst_reg(unsigned file, unsigned index)
+make_dst_reg(enum tgsi_file_type file, unsigned index)
{
struct tgsi_full_dst_register reg;
/** Return the named swizzle term from the src register */
static inline unsigned
-get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
+get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
{
switch (term) {
case TGSI_SWIZZLE_X:
*/
static struct tgsi_full_src_register
swizzle_src(const struct tgsi_full_src_register *reg,
- unsigned swizzleX, unsigned swizzleY,
- unsigned swizzleZ, unsigned swizzleW)
+ enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
+ enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
{
struct tgsi_full_src_register swizzled = *reg;
/* Note: we swizzle the current swizzle */
* terms are the same.
*/
static struct tgsi_full_src_register
-scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle)
+scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
{
struct tgsi_full_src_register swizzled = *reg;
/* Note: we swizzle the current swizzle */
*/
static unsigned
translate_interpolation(const struct svga_shader_emitter_v10 *emit,
- unsigned interp, unsigned interpolate_loc)
+ enum tgsi_interpolate_mode interp,
+ enum tgsi_interpolate_loc interpolate_loc)
{
if (interp == TGSI_INTERPOLATE_COLOR) {
interp = emit->key.fs.flatshade ?
/**
* Translate a TGSI property to VGPU10.
- * Don't emit any instructions yet, only need to gather the primitive property information.
- * The output primitive topology might be changed later. The final property instructions
- * will be emitted as part of the pre-helper code.
+ * Don't emit any instructions yet, only need to gather the primitive property
+ * information. The output primitive topology might be changed later. The
+ * final property instructions will be emitted as part of the pre-helper code.
*/
static boolean
emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
*/
static void
emit_input_declaration(struct svga_shader_emitter_v10 *emit,
- unsigned opcodeType, unsigned operandType,
- unsigned dim, unsigned index, unsigned size,
- unsigned name, unsigned numComp,
- unsigned selMode, unsigned usageMask,
- unsigned interpMode)
+ VGPU10_OPCODE_TYPE opcodeType,
+ VGPU10_OPERAND_TYPE operandType,
+ VGPU10_OPERAND_INDEX_DIMENSION dim,
+ unsigned index, unsigned size,
+ VGPU10_SYSTEM_NAME name,
+ VGPU10_OPERAND_NUM_COMPONENTS numComp,
+ VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
+ unsigned usageMask,
+ VGPU10_INTERPOLATION_MODE interpMode)
{
VGPU10OpcodeToken0 opcode0;
VGPU10OperandToken0 operand0;
*/
static void
emit_output_declaration(struct svga_shader_emitter_v10 *emit,
- unsigned type, unsigned index,
- unsigned name, unsigned usageMask)
+ VGPU10_OPCODE_TYPE type, unsigned index,
+ VGPU10_SYSTEM_NAME name,
+ unsigned usageMask)
{
VGPU10OpcodeToken0 opcode0;
VGPU10OperandToken0 operand0;
*/
static void
emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
- unsigned semantic_name, unsigned index)
+ enum tgsi_semantic semantic_name, unsigned index)
{
switch (semantic_name) {
case TGSI_SEMANTIC_INSTANCEID:
emit->sampler_target[unit] = decl->SamplerView.Resource;
/* Note: we can ignore YZW return types for now */
emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
+ emit->sampler_view[unit] = TRUE;
}
return TRUE;
if (emit->unit == PIPE_SHADER_FRAGMENT) {
for (i = 0; i < emit->linkage.num_inputs; i++) {
- unsigned semantic_name = emit->info.input_semantic_name[i];
+ enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
unsigned usage_mask = emit->info.input_usage_mask[i];
unsigned index = emit->linkage.input_map[i];
- unsigned type, interpolationMode, name;
+ VGPU10_OPCODE_TYPE type;
+ VGPU10_INTERPOLATION_MODE interpolationMode;
+ VGPU10_SYSTEM_NAME name;
if (usage_mask == 0)
continue; /* register is not actually used */
else if (emit->unit == PIPE_SHADER_GEOMETRY) {
for (i = 0; i < emit->info.num_inputs; i++) {
- unsigned semantic_name = emit->info.input_semantic_name[i];
+ enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
unsigned usage_mask = emit->info.input_usage_mask[i];
unsigned index = emit->linkage.input_map[i];
- unsigned opcodeType, operandType;
- unsigned numComp, selMode;
- unsigned name;
- unsigned dim;
+ VGPU10_OPCODE_TYPE opcodeType, operandType;
+ VGPU10_OPERAND_NUM_COMPONENTS numComp;
+ VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode;
+ VGPU10_SYSTEM_NAME name;
+ VGPU10_OPERAND_INDEX_DIMENSION dim;
if (usage_mask == 0)
continue; /* register is not actually used */
selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
name = VGPU10_NAME_UNDEFINED;
- /* all geometry shader inputs are two dimensional except gl_PrimitiveID */
+ /* all geometry shader inputs are two dimensional except
+ * gl_PrimitiveID
+ */
dim = VGPU10_OPERAND_INDEX_2D;
if (semantic_name == TGSI_SEMANTIC_PRIMID) {
for (i = 0; i < emit->info.num_outputs; i++) {
/*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
- const unsigned semantic_name = emit->info.output_semantic_name[i];
+ const enum tgsi_semantic semantic_name =
+ emit->info.output_semantic_name[i];
const unsigned semantic_index = emit->info.output_semantic_index[i];
unsigned index = i;
emit->fs.color_out_index[semantic_index] = index;
+ emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
+ index + 1);
+
/* The semantic index is the shader's color output/buffer index */
emit_output_declaration(emit,
VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
emit->info.output_semantic_index[idx] = j;
}
+
+ emit->fs.num_color_outputs =
+ emit->key.fs.write_color0_to_n_cbufs;
}
}
else {
}
else {
/* VS or GS */
- unsigned name, type;
+ VGPU10_COMPONENT_NAME name;
+ VGPU10_OPCODE_TYPE type;
unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
switch (semantic_name) {
}
else if (emit->unit == PIPE_SHADER_FRAGMENT) {
if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
- emit->key.fs.white_fragments ||
emit->key.fs.write_color0_to_n_cbufs > 1) {
/* Allocate a temp to hold the output color */
emit->fs.color_tmp_index = total_temps;
*/
total_consts = emit->num_shader_consts[0];
- /* Now, allocate constant slots for the "extra" constants */
+ /* Now, allocate constant slots for the "extra" constants.
+ * Note: it's critical that these extra constant locations
+ * exactly match what's emitted by the "extra" constants code
+ * in svga_state_constants.c
+ */
/* Vertex position scale/translation */
if (emit->vposition.need_prescale) {
}
}
- /* Texcoord scale factors for RECT textures */
- {
- for (i = 0; i < emit->num_samplers; i++) {
+ for (i = 0; i < emit->num_samplers; i++) {
+
+ if (emit->sampler_view[i]) {
+
+ /* Texcoord scale factors for RECT textures */
if (emit->key.tex[i].unnormalized) {
emit->texcoord_scale_index[i] = total_consts++;
}
- }
- }
- /* Texture buffer sizes */
- for (i = 0; i < emit->num_samplers; i++) {
- if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
- emit->texture_buffer_size_index[i] = total_consts++;
+ /* Texture buffer sizes */
+ if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
+ emit->texture_buffer_size_index[i] = total_consts++;
+ }
}
}
* Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
*/
static unsigned
-tgsi_texture_to_resource_dimension(unsigned target, boolean is_array)
+tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
+ boolean is_array)
{
switch (target) {
case TGSI_TEXTURE_BUFFER:
static void
emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
- unsigned opcode,
+ VGPU10_OPCODE_TYPE opcode,
const struct tgsi_full_dst_register *dst,
const struct tgsi_full_src_register *src,
boolean saturate)
static void
emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
- unsigned opcode,
+ VGPU10_OPCODE_TYPE opcode,
const struct tgsi_full_dst_register *dst,
const struct tgsi_full_src_register *src1,
const struct tgsi_full_src_register *src2,
static void
emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
- unsigned opcode,
+ VGPU10_OPCODE_TYPE opcode,
const struct tgsi_full_dst_register *dst,
const struct tgsi_full_src_register *src1,
const struct tgsi_full_src_register *src2,
}
-/**
- * Emit code for TGSI_OPCODE_ABS instruction.
- */
-static boolean
-emit_abs(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /* dst = ABS(s0):
- * dst = abs(s0)
- * Translates into:
- * MOV dst, abs(s0)
- */
- struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]);
-
- /* MOV dst, abs(s0) */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
- &abs_src0, inst->Instruction.Saturate);
-
- return TRUE;
-}
-
-
/**
* Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
*/
{
unsigned index = inst->Dst[0].Register.Index;
struct tgsi_full_dst_register dst;
- unsigned opcode;
+ VGPU10_OPCODE_TYPE opcode;
assert(index < MAX_VGPU10_ADDR_REGS);
dst = make_dst_temp_reg(emit->address_reg_index[index]);
}
-/**
- * Emit code for TGSI_OPCODE_DP2A instruction.
- */
-static boolean
-emit_dp2a(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
- * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
- * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
- * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
- * Translate into
- * MAD tmp.x, s0.y, s1.y, s2.x
- * MAD tmp.x, s0.x, s1.x, tmp.x
- * MOV dst.xyzw, tmp.xxxx
- */
- unsigned tmp = get_temp_index(emit);
- struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
- struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
-
- struct tgsi_full_src_register tmp_src_xxxx =
- scalar_src(&tmp_src, TGSI_SWIZZLE_X);
- struct tgsi_full_dst_register tmp_dst_x =
- writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
-
- struct tgsi_full_src_register src0_xxxx =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
- struct tgsi_full_src_register src0_yyyy =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
- struct tgsi_full_src_register src1_xxxx =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
- struct tgsi_full_src_register src1_yyyy =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
- struct tgsi_full_src_register src2_xxxx =
- scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
-
- emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
- &src1_yyyy, &src2_xxxx, FALSE);
- emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
- &src1_xxxx, &tmp_src_xxxx, FALSE);
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
- &tmp_src_xxxx, inst->Instruction.Saturate);
-
- free_temp_indexes(emit);
-
- return TRUE;
-}
-
-
-/**
- * Emit code for TGSI_OPCODE_DPH instruction.
- */
-static boolean
-emit_dph(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /*
- * DP3 tmp, s0, s1
- * ADD dst, tmp, s1.wwww
- */
-
- struct tgsi_full_src_register s1_wwww =
- swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
- TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
-
- unsigned tmp = get_temp_index(emit);
- struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
- struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
-
- /* DP3 tmp, s0, s1 */
- emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
- &inst->Src[1], FALSE);
-
- /* ADD dst, tmp, s1.wwww */
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
- &s1_wwww, inst->Instruction.Saturate);
-
- free_temp_indexes(emit);
-
- return TRUE;
-}
-
-
/**
* Emit code for TGSI_OPCODE_DST instruction.
*/
}
-/**
- * Emit code for TGSI_OPCODE_SCS instruction.
- */
-static boolean
-emit_scs(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /* dst.x = cos(src.x)
- * dst.y = sin(src.x)
- * dst.z = 0.0
- * dst.w = 1.0
- */
- struct tgsi_full_dst_register dst_x =
- writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
- struct tgsi_full_dst_register dst_y =
- writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
- struct tgsi_full_dst_register dst_zw =
- writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
-
- struct tgsi_full_src_register zero_one =
- make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
-
- begin_emit_instruction(emit);
- emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
- emit_dst_register(emit, &dst_y);
- emit_dst_register(emit, &dst_x);
- emit_src_register(emit, &inst->Src[0]);
- end_emit_instruction(emit);
-
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
- &dst_zw, &zero_one, inst->Instruction.Saturate);
-
- return TRUE;
-}
-
-
/**
* Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
*/
}
-/**
- * Emit code for TGSI_OPCODE_SUB instruction.
- */
-static boolean
-emit_sub(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /* dst = SUB(s0, s1):
- * dst = s0 - s1
- * Translates into:
- * ADD dst, s0, neg(s1)
- */
- struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]);
-
- /* ADD dst, s0, neg(s1) */
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0],
- &inst->Src[0], &neg_src1,
- inst->Instruction.Saturate);
-
- return TRUE;
-}
-
-
/**
* Emit a comparison instruction. The dest register will get
* 0 or ~0 values depending on the outcome of comparing src0 to src1.
struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
- /* MUL tmp, coord, const[] */
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
- coord, &scale_src, FALSE);
+ if (emit->key.tex[unit].texel_bias) {
+ /* to fix texture coordinate rounding issue, 0.0001 offset is
+ * been added. This fixes piglit test fbo-blit-scaled-linear. */
+ struct tgsi_full_src_register offset =
+ make_immediate_reg_float(emit, 0.0001f);
+
+ /* ADD tmp, coord, offset */
+ emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
+ coord, &offset, FALSE);
+ /* MUL tmp, tmp, scale */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
+ &tmp_src, &scale_src, FALSE);
+ }
+ else {
+ /* MUL tmp, coord, const[] */
+ emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
+ coord, &scale_src, FALSE);
+ }
return tmp_src;
}
else {
*/
static void
emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
- unsigned target,
+ enum tgsi_texture_type target,
const struct tgsi_full_src_register *coord)
{
struct tgsi_full_src_register coord_src_ref;
- unsigned component;
+ int component;
assert(tgsi_is_shadow_target(target));
- assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */
- if (target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE)
- component = TGSI_SWIZZLE_W;
- else
- component = TGSI_SWIZZLE_Z;
+ component = tgsi_util_get_shadow_ref_src_index(target) % 4;
+ assert(component >= 0);
coord_src_ref = scalar_src(coord, component);
boolean swizzled;
boolean shadow_compare;
unsigned unit;
- unsigned texture_target; /**< TGSI_TEXTURE_x */
+ enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */
struct tgsi_full_src_register tmp_src;
struct tgsi_full_dst_register tmp_dst;
const struct tgsi_full_dst_register *inst_dst;
}
swz->inst_dst = &inst->Dst[0];
swz->coord_src = &inst->Src[0];
+
+ emit->fs.shadow_compare_units |= shadow_compare << unit;
}
assert(emit->unit == PIPE_SHADER_FRAGMENT);
- switch (swz->texture_target) {
- case TGSI_TEXTURE_SHADOW2D:
- case TGSI_TEXTURE_SHADOWRECT:
- case TGSI_TEXTURE_SHADOW1D_ARRAY:
- coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
- break;
- case TGSI_TEXTURE_SHADOW1D:
- coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y);
- break;
- case TGSI_TEXTURE_SHADOWCUBE:
- case TGSI_TEXTURE_SHADOW2D_ARRAY:
- coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W);
- break;
- default:
- assert(!"Unexpected texture target in end_tex_swizzle()");
- coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
- }
+ int component =
+ tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
+ assert(component >= 0);
+ coord_src = scalar_src(swz->coord_src, component);
/* COMPARE tmp, coord, texel */
- /* XXX it would seem that the texel and coord arguments should
- * be transposed here, but piglit tests indicate otherwise.
- */
emit_comparison(emit, compare_func,
- &swz->tmp_dst, &texel_src, &coord_src);
+ &swz->tmp_dst, &coord_src, &texel_src);
/* AND dest, tmp, {1.0} */
begin_emit_instruction(emit);
((swz_g == PIPE_SWIZZLE_0) << 1) |
((swz_b == PIPE_SWIZZLE_0) << 2) |
((swz_a == PIPE_SWIZZLE_0) << 3));
+ writemask_0 &= swz->inst_dst->Register.WriteMask;
if (writemask_0) {
struct tgsi_full_src_register zero = int_tex ?
((swz_g == PIPE_SWIZZLE_1) << 1) |
((swz_b == PIPE_SWIZZLE_1) << 2) |
((swz_a == PIPE_SWIZZLE_1) << 3));
+ writemask_1 &= swz->inst_dst->Register.WriteMask;
if (writemask_1) {
struct tgsi_full_src_register one = int_tex ?
/* SAMPLE dst, coord(s0), resource, sampler */
begin_emit_instruction(emit);
+ /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
+ * with LOD=0. But our virtual GPU accepts this as-is.
+ */
emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
inst->Instruction.Saturate, offsets);
emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
const struct tgsi_full_instruction *inst)
{
const unsigned unit = inst->Src[1].Register.Index;
- const unsigned target = inst->Texture.Texture;
+ const enum tgsi_texture_type target = inst->Texture.Texture;
boolean valid = TRUE;
if (tgsi_is_shadow_target(target) &&
const struct tgsi_full_instruction *inst)
{
const uint unit = inst->Src[1].Register.Index;
- unsigned target = inst->Texture.Texture;
- unsigned opcode;
+ const enum tgsi_texture_type target = inst->Texture.Texture;
+ VGPU10_OPCODE_TYPE opcode;
struct tgsi_full_src_register coord;
int offsets[3];
struct tex_swizzle_info swz_info;
const struct tgsi_full_instruction *inst)
{
const uint unit = inst->Src[1].Register.Index;
- unsigned target = inst->Texture.Texture;
- unsigned opcode;
+ const enum tgsi_texture_type target = inst->Texture.Texture;
+ VGPU10_OPCODE_TYPE opcode;
int offsets[3];
unsigned tmp = get_temp_index(emit);
struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
begin_emit_instruction(emit);
if (tgsi_is_shadow_target(target))
+ /* NOTE: for non-fragment shaders, we should use
+ * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
+ */
opcode = VGPU10_OPCODE_SAMPLE_C;
else
opcode = VGPU10_OPCODE_SAMPLE;
}
-/*
- * Emit code for TGSI_OPCODE_XPD instruction.
- */
-static boolean
-emit_xpd(struct svga_shader_emitter_v10 *emit,
- const struct tgsi_full_instruction *inst)
-{
- /* dst.x = src0.y * src1.z - src1.y * src0.z
- * dst.y = src0.z * src1.x - src1.z * src0.x
- * dst.z = src0.x * src1.y - src1.x * src0.y
- * dst.w = 1
- */
- struct tgsi_full_src_register s0_xxxx =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
- struct tgsi_full_src_register s0_yyyy =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
- struct tgsi_full_src_register s0_zzzz =
- scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
-
- struct tgsi_full_src_register s1_xxxx =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
- struct tgsi_full_src_register s1_yyyy =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
- struct tgsi_full_src_register s1_zzzz =
- scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
-
- unsigned tmp1 = get_temp_index(emit);
- struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
- struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
-
- unsigned tmp2 = get_temp_index(emit);
- struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
- struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
- struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
-
- unsigned tmp3 = get_temp_index(emit);
- struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
- struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
- struct tgsi_full_dst_register tmp3_dst_x =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
- struct tgsi_full_dst_register tmp3_dst_y =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
- struct tgsi_full_dst_register tmp3_dst_z =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
- struct tgsi_full_dst_register tmp3_dst_w =
- writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
-
- /* Note: we put all the intermediate computations into tmp3 in case
- * the XPD dest register is that same as one of the src regs (in which
- * case we could clobber a src reg before we're done with it) .
- *
- * Note: we could get by with just one temp register instead of three
- * since we're doing scalar operations and there's enough room in one
- * temp for everything.
- */
-
- /* MUL tmp1, src0.y, src1.z */
- /* MUL tmp2, src1.y, src0.z */
- /* ADD tmp3.x, tmp1, -tmp2 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
- &s0_yyyy, &s1_zzzz, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
- &s1_yyyy, &s0_zzzz, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
- &tmp1_src, &neg_tmp2_src, FALSE);
- }
-
- /* MUL tmp1, src0.z, src1.x */
- /* MUL tmp2, src1.z, src0.x */
- /* ADD tmp3.y, tmp1, -tmp2 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
- &s1_xxxx, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
- &s0_xxxx, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
- &tmp1_src, &neg_tmp2_src, FALSE);
- }
-
- /* MUL tmp1, src0.x, src1.y */
- /* MUL tmp2, src1.x, src0.y */
- /* ADD tmp3.z, tmp1, -tmp2 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
- &s1_yyyy, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
- &s0_yyyy, FALSE);
- emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
- &tmp1_src, &neg_tmp2_src, FALSE);
- }
-
- /* MOV tmp3.w, 1.0 */
- if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
- struct tgsi_full_src_register one =
- make_immediate_reg_float(emit, 1.0f);
-
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
- }
-
- /* MOV dst, tmp3 */
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
- inst->Instruction.Saturate);
-
-
- free_temp_indexes(emit);
-
- return TRUE;
-}
-
-
/**
* Emit code for TGSI_OPCODE_TXD (explicit derivatives)
*/
const struct tgsi_full_instruction *inst)
{
const uint unit = inst->Src[3].Register.Index;
- unsigned target = inst->Texture.Texture;
+ const enum tgsi_texture_type target = inst->Texture.Texture;
int offsets[3];
struct tgsi_full_src_register coord;
struct tex_swizzle_info swz_info;
emit_txl_txb(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_instruction *inst)
{
- unsigned target = inst->Texture.Texture;
- unsigned opcode, unit;
+ const enum tgsi_texture_type target = inst->Texture.Texture;
+ VGPU10_OPCODE_TYPE opcode;
+ unsigned unit;
int offsets[3];
struct tgsi_full_src_register coord, lod_bias;
struct tex_swizzle_info swz_info;
emit_simple(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_instruction *inst)
{
- const unsigned opcode = inst->Instruction.Opcode;
+ const enum tgsi_opcode opcode = inst->Instruction.Opcode;
const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
unsigned i;
begin_emit_instruction(emit);
- emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
- inst->Instruction.Saturate);
+ emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
for (i = 0; i < op->num_dst; i++) {
emit_dst_register(emit, &inst->Dst[i]);
}
unsigned dst_count,
unsigned dst_index)
{
- const unsigned opcode = inst->Instruction.Opcode;
+ const enum tgsi_opcode opcode = inst->Instruction.Opcode;
const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
unsigned i;
begin_emit_instruction(emit);
- emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
- inst->Instruction.Saturate);
+ emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
for (i = 0; i < dst_count; i++) {
if (i == dst_index) {
unsigned inst_number,
const struct tgsi_full_instruction *inst)
{
- const unsigned opcode = inst->Instruction.Opcode;
+ const enum tgsi_opcode opcode = inst->Instruction.Opcode;
switch (opcode) {
case TGSI_OPCODE_ADD:
return emit_vertex(emit, inst);
case TGSI_OPCODE_ENDPRIM:
return emit_endprim(emit, inst);
- case TGSI_OPCODE_ABS:
- return emit_abs(emit, inst);
case TGSI_OPCODE_IABS:
return emit_iabs(emit, inst);
case TGSI_OPCODE_ARL:
return emit_cmp(emit, inst);
case TGSI_OPCODE_COS:
return emit_sincos(emit, inst);
- case TGSI_OPCODE_DP2A:
- return emit_dp2a(emit, inst);
- case TGSI_OPCODE_DPH:
- return emit_dph(emit, inst);
case TGSI_OPCODE_DST:
return emit_dst(emit, inst);
case TGSI_OPCODE_EX2:
return emit_rsq(emit, inst);
case TGSI_OPCODE_SAMPLE:
return emit_sample(emit, inst);
- case TGSI_OPCODE_SCS:
- return emit_scs(emit, inst);
case TGSI_OPCODE_SEQ:
return emit_seq(emit, inst);
case TGSI_OPCODE_SGE:
return emit_ssg(emit, inst);
case TGSI_OPCODE_ISSG:
return emit_issg(emit, inst);
- case TGSI_OPCODE_SUB:
- return emit_sub(emit, inst);
case TGSI_OPCODE_TEX:
return emit_tex(emit, inst);
case TGSI_OPCODE_TXP:
return emit_txq(emit, inst);
case TGSI_OPCODE_UIF:
return emit_if(emit, inst);
- case TGSI_OPCODE_XPD:
- return emit_xpd(emit, inst);
case TGSI_OPCODE_UMUL_HI:
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_UDIV:
emit->common_immediate_pos[n++] =
alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
- emit->common_immediate_pos[n++] =
- alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f);
+ if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
+ }
emit->common_immediate_pos[n++] =
alloc_immediate_int4(emit, 0, 1, 0, -1);
if (emit->key.vs.attrib_puint_to_snorm) {
emit->common_immediate_pos[n++] =
- alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
+ alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
}
if (emit->key.vs.attrib_puint_to_uscaled) {
alloc_immediate_int4(emit, 22, 30, 0, 0);
}
+ unsigned i;
+
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ if (emit->key.tex[i].texel_bias) {
+ /* Replace 0.0f if more immediate float value is needed */
+ emit->common_immediate_pos[n++] =
+ alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
+ break;
+ }
+ }
+
assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
emit->num_common_immediates = n;
}
}
+/**
+ * The device has no direct support for the pipe_blend_state::alpha_to_one
+ * option so we implement it here with shader code.
+ *
+ * Note that this is kind of pointless, actually. Here we're clobbering
+ * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind
+ * up with 100% coverage. That's almost certainly not what the user wants.
+ * The work-around is to add extra shader code to compute coverage from alpha
+ * and write it to the coverage output register (if the user's shader doesn't
+ * do so already). We'll probably do that in the future.
+ */
+static void
+emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
+ unsigned fs_color_tmp_index)
+{
+ struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+ unsigned i;
+
+ /* Note: it's not 100% clear from the spec if we're supposed to clobber
+ * the alpha for all render targets. But that's what NVIDIA does and
+ * that's what Piglit tests.
+ */
+ for (i = 0; i < emit->fs.num_color_outputs; i++) {
+ struct tgsi_full_dst_register color_dst;
+
+ if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
+ /* write to the temp color register */
+ color_dst = make_dst_temp_reg(fs_color_tmp_index);
+ }
+ else {
+ /* write directly to the color[i] output */
+ color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
+ }
+
+ color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
+
+ emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one, FALSE);
+ }
+}
+
+
/**
* Emit alpha test code. This compares TEMP[fs_color_tmp_index].w
* against the alpha reference value and discards the fragment if the
emit_src_register(emit, &tmp_src_x);
end_emit_instruction(emit);
- /* If we don't need to broadcast the color below or set fragments to
- * white, emit final color here.
+ /* If we don't need to broadcast the color below, emit the final color here.
*/
- if (emit->key.fs.write_color0_to_n_cbufs <= 1 &&
- !emit->key.fs.white_fragments) {
+ if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
/* MOV output.color, tempcolor */
emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
&color_src, FALSE); /* XXX saturate? */
}
-/**
- * When we need to emit white for all fragments (for emulating XOR logicop
- * mode), this function copies white into the temporary color output register.
- */
-static void
-emit_set_color_white(struct svga_shader_emitter_v10 *emit,
- unsigned fs_color_tmp_index)
-{
- struct tgsi_full_dst_register color_dst =
- make_dst_temp_reg(fs_color_tmp_index);
- struct tgsi_full_src_register white =
- make_immediate_reg_float(emit, 1.0f);
-
- emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE);
-}
-
-
/**
* Emit instructions for writing a single color output to multiple
* color buffers.
{
const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
unsigned i;
- struct tgsi_full_src_register color_src =
- make_src_temp_reg(fs_color_tmp_index);
+ struct tgsi_full_src_register color_src;
+
+ if (emit->key.fs.white_fragments) {
+ /* set all color outputs to white */
+ color_src = make_immediate_reg_float(emit, 1.0f);
+ }
+ else {
+ /* set all color outputs to TEMP[fs_color_tmp_index] */
+ assert(fs_color_tmp_index != INVALID_INDEX);
+ color_src = make_src_temp_reg(fs_color_tmp_index);
+ }
assert(emit->unit == PIPE_SHADER_FRAGMENT);
else if (emit->unit == PIPE_SHADER_FRAGMENT) {
const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
+ assert(!(emit->key.fs.white_fragments &&
+ emit->key.fs.write_color0_to_n_cbufs == 0));
+
/* We no longer want emit_dst_register() to substitute the
* temporary fragment color register for the real color output.
*/
emit->fs.color_tmp_index = INVALID_INDEX;
+ if (emit->key.fs.alpha_to_one) {
+ emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
+ }
if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
emit_alpha_test_instructions(emit, fs_color_tmp_index);
}
- if (emit->key.fs.white_fragments) {
- emit_set_color_white(emit, fs_color_tmp_index);
- }
if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
emit->key.fs.white_fragments) {
emit_broadcast_color_instructions(emit, fs_color_tmp_index);
svga_tgsi_vgpu10_translate(struct svga_context *svga,
const struct svga_shader *shader,
const struct svga_compile_key *key,
- unsigned unit)
+ enum pipe_shader_type unit)
{
struct svga_shader_variant *variant = NULL;
struct svga_shader_emitter_v10 *emit;
*/
variant->uses_flat_interp = emit->uses_flat_interp;
+ variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
+
if (tokens != shader->tokens) {
tgsi_free_tokens(tokens);
}