mesa: added OPCODE_NRM3/NRM4 instructions for vector normalization.
authorBrian Paul <brian.paul@tungstengraphics.com>
Fri, 7 Nov 2008 15:51:31 +0000 (08:51 -0700)
committerBrian Paul <brian.paul@tungstengraphics.com>
Fri, 7 Nov 2008 16:51:25 +0000 (09:51 -0700)
We may emit these instructions from GLSL instead of DP3/RCP/MUL.

Also, implement SSG (set sign) instruction in the interpreter.

src/mesa/shader/prog_execute.c
src/mesa/shader/prog_instruction.c
src/mesa/shader/prog_instruction.h

index d843761723bd608e4d15aa4c6efaf89d86d45d9f..c0173d369e52a7d377818f9450eaf9bbdde6fd42 100644 (file)
@@ -1019,6 +1019,36 @@ _mesa_execute_program(GLcontext * ctx,
          break;
       case OPCODE_NOP:
          break;
+      case OPCODE_NRM3:        /* 3-component normalization */
+         {
+            GLfloat a[4], result[4];
+            GLfloat tmp;
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
+            if (tmp != 0.0F)
+               tmp = 1.0F / tmp;
+            result[0] = tmp * a[0];
+            result[1] = tmp * a[1];
+            result[2] = tmp * a[2];
+            result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_NRM4:        /* 4-component normalization */
+         {
+            GLfloat a[4], result[4];
+            GLfloat tmp;
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
+            if (tmp != 0.0F)
+               tmp = 1.0F / tmp;
+            result[0] = tmp * a[0];
+            result[1] = tmp * a[1];
+            result[2] = tmp * a[2];
+            result[3] = tmp * a[3];
+            store_vector4(inst, machine, result);
+         }
+         break;
       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
          {
             GLfloat a[4], result[4];
@@ -1277,6 +1307,17 @@ _mesa_execute_program(GLcontext * ctx,
             }
          }
          break;
+      case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
+         {
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
+            result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
+            result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
+            result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
+            store_vector4(inst, machine, result);
+         }
+         break;
       case OPCODE_STR:         /* set true, operands ignored */
          {
             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
index 7e340ce45459d72b69ab3b448c8a10a8f4fa9f08..d4f3bcb0e535430a4a304e4dce2e61825679afd9 100644 (file)
@@ -197,6 +197,8 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = {
    { OPCODE_NOISE2, "NOISE2",  1, 1 },
    { OPCODE_NOISE3, "NOISE3",  1, 1 },
    { OPCODE_NOISE4, "NOISE4",  1, 1 },
+   { OPCODE_NRM3,   "NRM3",    1, 1 },
+   { OPCODE_NRM4,   "NRM4",    1, 1 },
    { OPCODE_PK2H,   "PK2H",    1, 1 },
    { OPCODE_PK2US,  "PK2US",   1, 1 },
    { OPCODE_PK4B,   "PK4B",    1, 1 },
index 16701e4ec95b51555a41bceb500078131be95070..3bcd0829a2386c934266f258f7401a13f44ea513 100644 (file)
@@ -188,6 +188,8 @@ typedef enum prog_opcode {
    OPCODE_NOISE2,    /*                                      X   */
    OPCODE_NOISE3,    /*                                      X   */
    OPCODE_NOISE4,    /*                                      X   */
+   OPCODE_NRM3,      /*                                          */
+   OPCODE_NRM4,      /*                                          */
    OPCODE_PK2H,      /*                            X             */
    OPCODE_PK2US,     /*                            X             */
    OPCODE_PK4B,      /*                            X             */