inst = &p->program->Base.Instructions[nr];
inst->Opcode = (enum prog_opcode) op;
- inst->Data = 0;
emit_arg( &inst->SrcReg[0], src0 );
emit_arg( &inst->SrcReg[1], src1 );
struct ureg dest,
struct ureg src )
{
-#if 0
- /* XXX use this when drivers are ready for NRM3 */
- emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src);
-#else
struct ureg tmp = get_temp(p);
emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
release_temp(p, tmp);
-#endif
}
}
-/**
- * Fetch a 4-element uint vector from the given source register.
- * Apply swizzling but not negation/abs.
- */
-static void
-fetch_vector4ui(const struct prog_src_register *source,
- const struct gl_program_machine *machine, GLuint result[4])
-{
- const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
- ASSERT(src);
-
- if (source->Swizzle == SWIZZLE_NOOP) {
- /* no swizzling */
- COPY_4V(result, src);
- }
- else {
- ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
- ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
- ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
- ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
- result[0] = src[GET_SWZ(source->Swizzle, 0)];
- result[1] = src[GET_SWZ(source->Swizzle, 1)];
- result[2] = src[GET_SWZ(source->Swizzle, 2)];
- result[3] = src[GET_SWZ(source->Swizzle, 3)];
- }
-
- /* Note: no Negate or Abs here */
-}
-
-
-
/**
* Fetch the derivative with respect to X or Y for the given register.
* XXX this currently only works for fragment program input attribs.
}
}
break;
- case OPCODE_AND: /* bitwise AND */
- {
- GLuint a[4], b[4], result[4];
- fetch_vector4ui(&inst->SrcReg[0], machine, a);
- fetch_vector4ui(&inst->SrcReg[1], machine, b);
- result[0] = a[0] & b[0];
- result[1] = a[1] & b[1];
- result[2] = a[2] & b[2];
- result[3] = a[3] & b[3];
- store_vector4ui(inst, machine, result);
- }
- break;
case OPCODE_ARL:
{
GLfloat t[4];
}
}
break;
- case OPCODE_DP2A:
- {
- GLfloat a[4], b[4], c, result[4];
- fetch_vector4(&inst->SrcReg[0], machine, a);
- fetch_vector4(&inst->SrcReg[1], machine, b);
- fetch_vector1(&inst->SrcReg[1], machine, &c);
- result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
- store_vector4(inst, machine, result);
- if (DEBUG_PROG) {
- printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
- result[0], a[0], a[1], b[0], b[1], c);
- }
- }
- break;
case OPCODE_DP3:
{
GLfloat a[4], b[4], result[4];
break;
case OPCODE_NOP:
break;
- case OPCODE_NOT: /* bitwise NOT */
- {
- GLuint a[4], result[4];
- fetch_vector4ui(&inst->SrcReg[0], machine, a);
- result[0] = ~a[0];
- result[1] = ~a[1];
- result[2] = ~a[2];
- result[3] = ~a[3];
- store_vector4ui(inst, machine, result);
- }
- break;
- case OPCODE_NRM3: /* 3-component normalization */
- {
- GLfloat a[4], result[4];
- GLfloat tmp;
- fetch_vector4(&inst->SrcReg[0], machine, a);
- tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
- if (tmp != 0.0F)
- tmp = INV_SQRTF(tmp);
- result[0] = tmp * a[0];
- result[1] = tmp * a[1];
- result[2] = tmp * a[2];
- result[3] = 0.0; /* undefined, but prevent valgrind warnings */
- store_vector4(inst, machine, result);
- }
- break;
- case OPCODE_NRM4: /* 4-component normalization */
- {
- GLfloat a[4], result[4];
- GLfloat tmp;
- fetch_vector4(&inst->SrcReg[0], machine, a);
- tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
- if (tmp != 0.0F)
- tmp = INV_SQRTF(tmp);
- result[0] = tmp * a[0];
- result[1] = tmp * a[1];
- result[2] = tmp * a[2];
- result[3] = tmp * a[3];
- store_vector4(inst, machine, result);
- }
- break;
- case OPCODE_OR: /* bitwise OR */
- {
- GLuint a[4], b[4], result[4];
- fetch_vector4ui(&inst->SrcReg[0], machine, a);
- fetch_vector4ui(&inst->SrcReg[1], machine, b);
- result[0] = a[0] | b[0];
- result[1] = a[1] | b[1];
- result[2] = a[2] | b[2];
- result[3] = a[3] | b[3];
- store_vector4ui(inst, machine, result);
- }
- break;
case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
{
GLfloat a[4];
store_vector4(inst, machine, result);
}
break;
- case OPCODE_XOR: /* bitwise XOR */
- {
- GLuint a[4], b[4], result[4];
- fetch_vector4ui(&inst->SrcReg[0], machine, a);
- fetch_vector4ui(&inst->SrcReg[1], machine, b);
- result[0] = a[0] ^ b[0];
- result[1] = a[1] ^ b[1];
- result[2] = a[2] ^ b[2];
- result[3] = a[3] ^ b[3];
- store_vector4ui(inst, machine, result);
- }
- break;
case OPCODE_XPD: /* cross product */
{
GLfloat a[4], b[4], result[4];
store_vector4(inst, machine, result);
}
break;
- case OPCODE_PRINT:
- {
- if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
- GLfloat a[4];
- fetch_vector4(&inst->SrcReg[0], machine, a);
- printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
- a[0], a[1], a[2], a[3]);
- }
- else {
- printf("%s\n", (const char *) inst->Data);
- }
- }
- break;
case OPCODE_END:
return GL_TRUE;
default:
{
GLuint i;
for (i = 0; i < count; i++) {
- free(inst[i].Data);
free((char *)inst[i].Comment);
}
free(inst);
{ OPCODE_NOP, "NOP", 0, 0 },
{ OPCODE_ABS, "ABS", 1, 1 },
{ OPCODE_ADD, "ADD", 2, 1 },
- { OPCODE_AND, "AND", 2, 1 },
{ OPCODE_ARL, "ARL", 1, 1 },
{ OPCODE_BGNLOOP,"BGNLOOP", 0, 0 },
{ OPCODE_BGNSUB, "BGNSUB", 0, 0 },
{ OPCODE_DDX, "DDX", 1, 1 },
{ OPCODE_DDY, "DDY", 1, 1 },
{ OPCODE_DP2, "DP2", 2, 1 },
- { OPCODE_DP2A, "DP2A", 3, 1 },
{ OPCODE_DP3, "DP3", 2, 1 },
{ OPCODE_DP4, "DP4", 2, 1 },
{ OPCODE_DPH, "DPH", 2, 1 },
{ OPCODE_NOISE2, "NOISE2", 1, 1 },
{ OPCODE_NOISE3, "NOISE3", 1, 1 },
{ OPCODE_NOISE4, "NOISE4", 1, 1 },
- { OPCODE_NOT, "NOT", 1, 1 },
- { OPCODE_NRM3, "NRM3", 1, 1 },
- { OPCODE_NRM4, "NRM4", 1, 1 },
- { OPCODE_OR, "OR", 2, 1 },
{ OPCODE_PK2H, "PK2H", 1, 1 },
{ OPCODE_PK2US, "PK2US", 1, 1 },
{ OPCODE_PK4B, "PK4B", 1, 1 },
{ OPCODE_PK4UB, "PK4UB", 1, 1 },
{ OPCODE_POW, "POW", 2, 1 },
- { OPCODE_PRINT, "PRINT", 1, 0 },
{ OPCODE_RCP, "RCP", 1, 1 },
{ OPCODE_RET, "RET", 0, 0 },
{ OPCODE_RFL, "RFL", 1, 1 },
{ OPCODE_UP4B, "UP4B", 1, 1 },
{ OPCODE_UP4UB, "UP4UB", 1, 1 },
{ OPCODE_X2D, "X2D", 3, 1 },
- { OPCODE_XOR, "XOR", 2, 1 },
{ OPCODE_XPD, "XPD", 2, 1 }
};
OPCODE_NOP = 0, /* X */
OPCODE_ABS, /* X X 1.1 X */
OPCODE_ADD, /* X X X X X */
- OPCODE_AND, /* */
OPCODE_ARL, /* X X X */
OPCODE_BGNLOOP, /* opt */
OPCODE_BGNSUB, /* opt */
OPCODE_DDX, /* X X */
OPCODE_DDY, /* X X */
OPCODE_DP2, /* 2 X */
- OPCODE_DP2A, /* 2 */
OPCODE_DP3, /* X X X X X */
OPCODE_DP4, /* X X X X X */
OPCODE_DPH, /* X X 1.1 */
OPCODE_NOISE2, /* X */
OPCODE_NOISE3, /* X */
OPCODE_NOISE4, /* X */
- OPCODE_NOT, /* */
- OPCODE_NRM3, /* */
- OPCODE_NRM4, /* */
- OPCODE_OR, /* */
OPCODE_PK2H, /* X */
OPCODE_PK2US, /* X */
OPCODE_PK4B, /* X */
OPCODE_PK4UB, /* X */
OPCODE_POW, /* X X X X */
- OPCODE_PRINT, /* X X */
OPCODE_RCP, /* X X X X X */
OPCODE_RET, /* 2 2 opt */
OPCODE_RFL, /* X */
OPCODE_UP4B, /* X */
OPCODE_UP4UB, /* X */
OPCODE_X2D, /* X */
- OPCODE_XOR, /* */
OPCODE_XPD, /* X X */
MAX_OPCODE
} gl_inst_opcode;
/** for debugging purposes */
const char *Comment;
- /** Arbitrary data. Used for OPCODE_PRINT and some drivers */
- void *Data;
-
/** for driver use (try to remove someday) */
GLint Aux;
};
get_value(prog, &inst->SrcReg[0], a);
get_value(prog, &inst->SrcReg[1], b);
- /* It seems like a loop could be used here, but we cleverly put
- * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from
- * the opcode results in various failures of the loop control.
- */
result = (a[0] * b[0]) + (a[1] * b[1]);
if (inst->Opcode >= OPCODE_DP3)
}
switch (inst->Opcode) {
- case OPCODE_PRINT:
- fprintf(f, "PRINT '%s'", (char *) inst->Data);
- if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
- fprintf(f, ", ");
- fprintf(f, "%s[%d]%s",
- _mesa_register_file_name((gl_register_file) inst->SrcReg[0].File),
- inst->SrcReg[0].Index,
- _mesa_swizzle_string(inst->SrcReg[0].Swizzle,
- inst->SrcReg[0].Negate, GL_FALSE));
- }
- if (inst->Comment)
- fprintf(f, " # %s", inst->Comment);
- fprint_comment(f, inst);
- break;
case OPCODE_SWZ:
fprintf(f, "SWZ");
if (inst->SaturateMode == SATURATE_ZERO_ONE)
return TGSI_OPCODE_DDY;
case OPCODE_DP2:
return TGSI_OPCODE_DP2;
- case OPCODE_DP2A:
- return TGSI_OPCODE_DP2A;
case OPCODE_DP3:
return TGSI_OPCODE_DP3;
case OPCODE_DP4:
return TGSI_OPCODE_MUL;
case OPCODE_NOP:
return TGSI_OPCODE_NOP;
- case OPCODE_NRM3:
- return TGSI_OPCODE_NRM;
- case OPCODE_NRM4:
- return TGSI_OPCODE_NRM4;
case OPCODE_POW:
return TGSI_OPCODE_POW;
case OPCODE_RCP: