/**************************************************************************
*
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "i915_reg.h"
#include "i915_context.h"
#include "i915_fpc.h"
+#include "i915_debug_private.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
* Simple pass-through fragment shader to use when we don't have
* a real shader (or it fails to compile for some reason).
*/
-static unsigned passthrough[] =
+static unsigned passthrough_decl[] =
{
_3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1),
D0_CHANNEL_ALL),
0,
0,
+};
+static unsigned passthrough_program[] =
+{
/* move to output color:
*/
(A0_MOV |
0
};
-
-/* 1, -1/3!, 1/5!, -1/7! */
-static const float scs_sin_constants[4] = { 1.0,
- -1.0f / (3 * 2 * 1),
- 1.0f / (5 * 4 * 3 * 2 * 1),
- -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1)
-};
-
-/* 1, -1/2!, 1/4!, -1/6! */
-static const float scs_cos_constants[4] = { 1.0,
- -1.0f / (2 * 1),
- 1.0f / (4 * 3 * 2 * 1),
- -1.0f / (6 * 5 * 4 * 3 * 2 * 1)
-};
-
/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */
static const float sin_constants[4] = { 2.0 * M_PI,
-8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1),
/**
* component-wise negation of ureg
*/
-static INLINE int
+static inline int
negate(int reg, int x, int y, int z, int w)
{
/* Another neat thing about the UREG representation */
static void
i915_use_passthrough_shader(struct i915_fragment_shader *fs)
{
- fs->program = (uint *) MALLOC(sizeof(passthrough));
+ fs->program = (uint *) MALLOC(sizeof(passthrough_program));
+ fs->decl = (uint *) MALLOC(sizeof(passthrough_decl));
if (fs->program) {
- memcpy(fs->program, passthrough, sizeof(passthrough));
- fs->program_len = Elements(passthrough);
+ memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
+ memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl));
+ fs->program_len = ARRAY_SIZE(passthrough_program);
+ fs->decl_len = ARRAY_SIZE(passthrough_decl);
}
fs->num_constants = 0;
}
static uint
src_vector(struct i915_fp_compile *p,
const struct i915_full_src_register *source,
- struct i915_fragment_shader* fs)
+ struct i915_fragment_shader *fs)
{
uint index = source->Register.Index;
uint src = 0, sem_name, sem_ind;
= inst->Dst[0].Register.WriteMask;
uint flags = 0x0;
- if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
+ if (inst->Instruction.Saturate)
flags |= A0_DEST_SATURATE;
if (writeMask & TGSI_WRITEMASK_X)
/**
* Return the number of coords needed to access a given TGSI_TEXTURE_*
*/
-static uint
-texture_num_coords(struct i915_fp_compile *p, uint tex)
+uint
+i915_num_coords(uint tex)
{
switch (tex) {
case TGSI_TEXTURE_SHADOW1D:
return 3;
default:
- i915_program_error(p, "Num coords");
+ debug_printf("Unknown texture target for num coords");
return 2;
}
}
sampler,
coord,
opcode,
- texture_num_coords(p, texture) );
+ i915_num_coords(texture) );
}
emit_simple_arith(struct i915_fp_compile *p,
const struct i915_full_instruction *inst,
uint opcode, uint numArgs,
- struct i915_fragment_shader* fs)
+ struct i915_fragment_shader *fs)
{
uint arg1, arg2, arg3;
emit_simple_arith_swap2(struct i915_fp_compile *p,
const struct i915_full_instruction *inst,
uint opcode, uint numArgs,
- struct i915_fragment_shader* fs)
+ struct i915_fragment_shader *fs)
{
struct i915_full_instruction inst2;
const struct i915_full_instruction *inst,
struct i915_fragment_shader *fs)
{
- uint writemask;
uint src0, src1, src2, flags;
uint tmp = 0;
switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ABS:
+ case TGSI_OPCODE_ADD:
+ emit_simple_arith(p, inst, A0_ADD, 2, fs);
+ break;
+
+ case TGSI_OPCODE_CEIL:
src0 = src_vector(p, &inst->Src[0], fs);
+ tmp = i915_get_utemp(p);
+ flags = get_result_flags(inst);
+ i915_emit_arith(p,
+ A0_FLR,
+ tmp,
+ flags & A0_DEST_CHANNEL_ALL, 0,
+ negate(src0, 1, 1, 1, 1), 0, 0);
i915_emit_arith(p,
- A0_MAX,
+ A0_MOV,
get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
- src0, negate(src0, 1, 1, 1, 1), 0);
- break;
-
- case TGSI_OPCODE_ADD:
- emit_simple_arith(p, inst, A0_ADD, 2, fs);
+ flags, 0,
+ negate(tmp, 1, 1, 1, 1), 0, 0);
break;
case TGSI_OPCODE_CMP:
i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
/*
- * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
+ * t0.xy = MUL x.xx11, x.x111 ; x^2, x, 1, 1
* t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
* t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
* result = DP4 t0, cos_constants
case TGSI_OPCODE_DDX:
case TGSI_OPCODE_DDY:
/* XXX We just output 0 here */
- debug_printf("Punting DDX/DDX\n");
+ debug_printf("Punting DDX/DDY\n");
src0 = get_result_vector(p, &inst->Dst[0]);
i915_emit_arith(p,
A0_MOV,
emit_simple_arith(p, inst, A0_DP4, 2, fs);
break;
- case TGSI_OPCODE_DPH:
- src0 = src_vector(p, &inst->Src[0], fs);
- src1 = src_vector(p, &inst->Src[1], fs);
-
- i915_emit_arith(p,
- A0_DP4,
- get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
- swizzle(src0, X, Y, Z, ONE), src1, 0);
- break;
-
case TGSI_OPCODE_DST:
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
emit_simple_arith(p, inst, A0_FRC, 1, fs);
break;
- case TGSI_OPCODE_KIL:
+ case TGSI_OPCODE_KILL_IF:
/* kill if src[0].x < 0 || src[0].y < 0 ... */
src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
1); /* num_coord */
break;
- case TGSI_OPCODE_KILP:
- /* We emit an unconditional kill; we may want to revisit
- * if we ever implement conditionals.
- */
+ case TGSI_OPCODE_KILL:
+ /* unconditional kill */
tmp = i915_get_utemp(p);
i915_emit_texld(p,
break;
case TGSI_OPCODE_MIN:
- src0 = src_vector(p, &inst->Src[0], fs);
- src1 = src_vector(p, &inst->Src[1], fs);
- tmp = i915_get_utemp(p);
- flags = get_result_flags(inst);
-
- i915_emit_arith(p,
- A0_MAX,
- tmp, flags & A0_DEST_CHANNEL_ALL, 0,
- negate(src0, 1, 1, 1, 1),
- negate(src1, 1, 1, 1, 1), 0);
-
- i915_emit_arith(p,
- A0_MOV,
- get_result_vector(p, &inst->Dst[0]),
- flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
+ emit_simple_arith(p, inst, A0_MIN, 2, fs);
break;
case TGSI_OPCODE_MOV:
swizzle(src0, X, X, X, X), 0, 0);
break;
- case TGSI_OPCODE_SCS:
- src0 = src_vector(p, &inst->Src[0], fs);
- tmp = i915_get_utemp(p);
-
- /*
- * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
- * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
- * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
- * scs.x = DP4 t1, scs_sin_constants
- * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
- * scs.y = DP4 t1, scs_cos_constants
- */
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_XY, 0,
- swizzle(src0, X, X, ONE, ONE),
- swizzle(src0, X, ONE, ONE, ONE), 0);
-
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_ALL, 0,
- swizzle(tmp, X, Y, X, Y),
- swizzle(tmp, X, X, ONE, ONE), 0);
-
- writemask = inst->Dst[0].Register.WriteMask;
-
- if (writemask & TGSI_WRITEMASK_Y) {
- uint tmp1;
-
- if (writemask & TGSI_WRITEMASK_X)
- tmp1 = i915_get_utemp(p);
- else
- tmp1 = tmp;
-
- i915_emit_arith(p,
- A0_MUL,
- tmp1, A0_DEST_CHANNEL_ALL, 0,
- swizzle(tmp, X, Y, Y, W),
- swizzle(tmp, X, Z, ONE, ONE), 0);
-
- i915_emit_arith(p,
- A0_DP4,
- get_result_vector(p, &inst->Dst[0]),
- A0_DEST_CHANNEL_Y, 0,
- swizzle(tmp1, W, Z, Y, X),
- i915_emit_const4fv(p, scs_sin_constants), 0);
- }
-
- if (writemask & TGSI_WRITEMASK_X) {
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_XYZ, 0,
- swizzle(tmp, X, X, Z, ONE),
- swizzle(tmp, Z, ONE, ONE, ONE), 0);
-
- i915_emit_arith(p,
- A0_DP4,
- get_result_vector(p, &inst->Dst[0]),
- A0_DEST_CHANNEL_X, 0,
- swizzle(tmp, ONE, Z, Y, X),
- i915_emit_const4fv(p, scs_cos_constants), 0);
- }
- break;
-
case TGSI_OPCODE_SEQ:
/* if we're both >= and <= then we're == */
src0 = src_vector(p, &inst->Src[0], fs);
negate(tmp, 1, 1, 1, 1), 0);
break;
- case TGSI_OPCODE_SUB:
- src0 = src_vector(p, &inst->Src[0], fs);
- src1 = src_vector(p, &inst->Src[1], fs);
-
- i915_emit_arith(p,
- A0_ADD,
- get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
- src0, negate(src1, 1, 1, 1, 1), 0);
- break;
-
case TGSI_OPCODE_TEX:
emit_tex(p, inst, T0_TEXLD, fs);
break;
emit_tex(p, inst, T0_TEXLDP, fs);
break;
- case TGSI_OPCODE_XPD:
- /* Cross product:
- * result.x = src0.y * src1.z - src0.z * src1.y;
- * result.y = src0.z * src1.x - src0.x * src1.z;
- * result.z = src0.x * src1.y - src0.y * src1.x;
- * result.w = undef;
- */
- src0 = src_vector(p, &inst->Src[0], fs);
- src1 = src_vector(p, &inst->Src[1], fs);
- tmp = i915_get_utemp(p);
-
- i915_emit_arith(p,
- A0_MUL,
- tmp, A0_DEST_CHANNEL_ALL, 0,
- swizzle(src0, Z, X, Y, ONE),
- swizzle(src1, Y, Z, X, ONE), 0);
-
- i915_emit_arith(p,
- A0_MAD,
- get_result_vector(p, &inst->Dst[0]),
- get_result_flags(inst), 0,
- swizzle(src0, Y, Z, X, ONE),
- swizzle(src1, Z, X, Y, ONE),
- negate(tmp, 1, 1, 1, 0));
- break;
-
default:
i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
p->error = 1;
static void i915_translate_token(struct i915_fp_compile *p,
- const union i915_full_token* token,
+ const union i915_full_token *token,
struct i915_fragment_shader *fs)
{
struct i915_fragment_shader *ifs = p->shader;
== TGSI_FILE_CONSTANT) {
uint i;
for (i = token->FullDeclaration.Range.First;
- i <= token->FullDeclaration.Range.Last;
+ i <= MIN2(token->FullDeclaration.Range.Last, I915_MAX_CONSTANT - 1);
i++) {
assert(ifs->constant_flags[i] == 0x0);
ifs->constant_flags[i] = I915_CONSTFLAG_USER;
/* Copy compilation results to fragment program struct:
*/
+ assert(!ifs->decl);
assert(!ifs->program);
+
+ ifs->decl
+ = (uint *) MALLOC(decl_size * sizeof(uint));
ifs->program
- = (uint *) MALLOC((program_size + decl_size) * sizeof(uint));
- if (ifs->program) {
- ifs->program_len = program_size + decl_size;
+ = (uint *) MALLOC(program_size * sizeof(uint));
- memcpy(ifs->program,
+ if (ifs->decl) {
+ ifs->decl_len = decl_size;
+
+ memcpy(ifs->decl,
p->declarations,
decl_size * sizeof(uint));
+ }
- memcpy(ifs->program + decl_size,
+ if (ifs->program) {
+ ifs->program_len = program_size;
+
+ memcpy(ifs->program,
p->program,
program_size * sizeof(uint));
}