mesa: fix/improve the atan(y,x) function

[mesa.git] / src / mesa / shader / slang / library / slang_common_builtin.gc
diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc

index 44e059f5a0d1a427867f22e2667794579d903ddd..3726335471fb93badc63ac363adbb1367fe82d97 100644 (file)
--- a/src/mesa/shader/slang/library/slang_common_builtin.gc
+++ b/src/mesa/shader/slang/library/slang_common_builtin.gc
@@ -47,6 +47,7 @@ uniform mat4 gl_ModelViewProjectionMatrix;
  uniform mat4 gl_TextureMatrix[gl_MaxTextureCoords];
  
  uniform mat3 gl_NormalMatrix;
+uniform mat3 __NormalMatrixTranspose;  // Mesa only
  
  uniform mat4 gl_ModelViewMatrixInverse;
  uniform mat4 gl_ProjectionMatrixInverse;
@@ -202,19 +203,19 @@ float degrees(const float rad)
  
  vec2 degrees(const vec2 rad)
  {
-   const float c = 3.1415926 / 180.0;
+   const float c = 180.0 / 3.1415926;
     __asm vec4_multiply __retVal.xy, rad.xy, c.xx;
  }
  
  vec3 degrees(const vec3 rad)
  {
-   const float c = 3.1415926 / 180.0;
+   const float c = 180.0 / 3.1415926;
     __asm vec4_multiply __retVal.xyz, rad.xyz, c.xxx;
  }
  
  vec4 degrees(const vec4 rad)
  {
-   const float c = 3.1415926 / 180.0;
+   const float c = 180.0 / 3.1415926;
     __asm vec4_multiply __retVal, rad, c.xxxx;
  }
  
@@ -310,129 +311,131 @@ vec4 tan(const vec4 angle)
  
  
  
-float asin (float x) {
-    float y;
-    __asm float_arcsine y, x;
-    return y;
+float asin(const float x)
+{
+   const float a0 = 1.5707288;  // PI/2?
+   const float a1 = -0.2121144;
+   const float a2 = 0.0742610;
+   //const float a3 = -0.0187293;
+   const float halfPi = 3.1415926 * 0.5;
+   const float y = abs(x);
+   // three terms seem to be enough:
+   __retVal = (halfPi - sqrt(1.0 - y) * (a0 + y * (a1 + a2 * y))) * sign(x);
+   // otherwise, try four:
+   //__retVal = (halfPi - sqrt(1.0 - y) * (a0 + y * (a1 + y * (a2 + y * a3)))) * sign(x);
  }
  
-vec2 asin (vec2 v) {
-    return vec2 (
-        asin (v.x),
-        asin (v.y)
-    );
+vec2 asin(const vec2 v)
+{
+   __retVal.x = asin(v.x);
+   __retVal.y = asin(v.y);
  }
  
-vec3 asin (vec3 v) {
-    return vec3 (
-        asin (v.x),
-        asin (v.y),
-        asin (v.z)
-    );
+vec3 asin(const vec3 v)
+{
+   __retVal.x = asin(v.x);
+   __retVal.y = asin(v.y);
+   __retVal.z = asin(v.z);
  }
  
-vec4 asin (vec4 v) {
-    return vec4 (
-        asin (v.x),
-        asin (v.y),
-        asin (v.z),
-        asin (v.w)
-    );
+vec4 asin(const vec4 v)
+{
+   __retVal.x = asin(v.x);
+   __retVal.y = asin(v.y);
+   __retVal.z = asin(v.z);
+   __retVal.w = asin(v.w);
  }
  
-float acos (float x) {
-    return 1.5708 - asin (x);
+float acos(const float x)
+{
+   const float halfPi = 3.1415926 * 0.5;
+   __retVal = halfPi - asin(x);
  }
  
-vec2 acos (vec2 v) {
-    return vec2 (
-        acos (v.x),
-        acos (v.y)
-    );
+vec2 acos(const vec2 v)
+{
+   __retVal.x = acos(v.x);
+   __retVal.y = acos(v.y);
  }
  
-vec3 acos (vec3 v) {
-    return vec3 (
-        acos (v.x),
-        acos (v.y),
-        acos (v.z)
-    );
+vec3 acos(const vec3 v)
+{
+   __retVal.x = acos(v.x);
+   __retVal.y = acos(v.y);
+   __retVal.z = acos(v.z);
  }
  
-vec4 acos (vec4 v) {
-    return vec4 (
-        acos (v.x),
-        acos (v.y),
-        acos (v.z),
-        acos (v.w)
-    );
+vec4 acos(const vec4 v)
+{
+   __retVal.x = acos(v.x);
+   __retVal.y = acos(v.y);
+   __retVal.z = acos(v.z);
+   __retVal.w = acos(v.w);
  }
  
-float atan (float y_over_x) {
-    float z;
-    __asm float_arctan z, y_over_x;
-    return z;
+float atan(const float x)
+{
+   __retVal = asin(x * inversesqrt(x * x + 1.0));
  }
  
-vec2 atan (vec2 y_over_x) {
-    return vec2 (
-        atan (y_over_x.x),
-        atan (y_over_x.y)
-    );
+vec2 atan(const vec2 y_over_x)
+{
+   __retVal.x = atan(y_over_x.x);
+   __retVal.y = atan(y_over_x.y);
  }
  
-vec3 atan (vec3 y_over_x) {
-    return vec3 (
-        atan (y_over_x.x),
-        atan (y_over_x.y),
-        atan (y_over_x.z)
-    );
+vec3 atan(const vec3 y_over_x)
+{
+   __retVal.x = atan(y_over_x.x);
+   __retVal.y = atan(y_over_x.y);
+   __retVal.z = atan(y_over_x.z);
  }
  
-vec4 atan (vec4 y_over_x) {
-    return vec4 (
-        atan (y_over_x.x),
-        atan (y_over_x.y),
-        atan (y_over_x.z),
-        atan (y_over_x.w)
-    );
+vec4 atan(const vec4 y_over_x)
+{
+   __retVal.x = atan(y_over_x.x);
+   __retVal.y = atan(y_over_x.y);
+   __retVal.z = atan(y_over_x.z);
+   __retVal.w = atan(y_over_x.w);
  }
  
-float atan (float y, float x) {
-    float z = atan (y / x);
-    if (x < 0.0)
-    {
-        if (y < 0.0)
-            return z - 3.141593;
-        return z + 3.141593;
-    }
-    return z;
+float atan(const float y, const float x)
+{
+   float r;
+   if (abs(x) > 1.0e-4) {
+      r = atan(y / x);
+      if (x < 0.0) {
+         r = r + sign(y) * 3.141593;
+      }
+   }
+   else {
+      r = sign(y) * 1.5707965;  // pi/2
+   }
+   return r;
  }
  
-vec2 atan (vec2 u, vec2 v) {
-    return vec2 (
-        atan (u.x, v.x),
-        atan (u.y, v.y)
-    );
+vec2 atan(const vec2 u, const vec2 v)
+{
+   __retVal.x = atan(u.x, v.x);
+   __retVal.y = atan(u.y, v.y);
  }
  
-vec3 atan (vec3 u, vec3 v) {
-    return vec3 (
-        atan (u.x, v.x),
-        atan (u.y, v.y),
-        atan (u.z, v.z)
-    );
+vec3 atan(const vec3 u, const vec3 v)
+{
+   __retVal.x = atan(u.x, v.x);
+   __retVal.y = atan(u.y, v.y);
+   __retVal.z = atan(u.z, v.z);
  }
  
-vec4 atan (vec4 u, vec4 v) {
-    return vec4 (
-        atan (u.x, v.x),
-        atan (u.y, v.y),
-        atan (u.z, v.z),
-        atan (u.w, v.w)
-    );
+vec4 atan(const vec4 u, const vec4 v)
+{
+   __retVal.x = atan(u.x, v.x);
+   __retVal.y = atan(u.y, v.y);
+   __retVal.z = atan(u.z, v.z);
+   __retVal.w = atan(u.w, v.w);
  }
  
+
  //
  // 8.2 Exponential Functions
  //
@@ -470,28 +473,32 @@ vec4 pow(const vec4 a, const vec4 b)
  
  float exp(const float a)
  {
-   __asm float_exp __retVal.x, a;
+   const float e = 2.71828;
+   __asm float_power __retVal, e, a;
  }
  
  vec2 exp(const vec2 a)
  {
-   __asm float_exp __retVal.x, a.x;
-   __asm float_exp __retVal.y, a.y;
+   const float e = 2.71828;
+   __asm float_power __retVal.x, e, a.x;
+   __asm float_power __retVal.y, e, a.y;
  }
  
  vec3 exp(const vec3 a)
  {
-   __asm float_exp __retVal.x, a.x;
-   __asm float_exp __retVal.y, a.y;
-   __asm float_exp __retVal.z, a.z;
+   const float e = 2.71828;
+   __asm float_power __retVal.x, e, a.x;
+   __asm float_power __retVal.y, e, a.y;
+   __asm float_power __retVal.z, e, a.z;
  }
  
  vec4 exp(const vec4 a)
  {
-   __asm float_exp __retVal.x, a.x;
-   __asm float_exp __retVal.y, a.y;
-   __asm float_exp __retVal.z, a.z;
-   __asm float_exp __retVal.w, a.w;
+   const float e = 2.71828;
+   __asm float_power __retVal.x, e, a.x;
+   __asm float_power __retVal.y, e, a.y;
+   __asm float_power __retVal.z, e, a.z;
+   __asm float_power __retVal.w, e, a.w;
  }
  
  
@@ -671,14 +678,23 @@ vec2 normalize(const vec2 v)
  
  vec3 normalize(const vec3 v)
  {
-   const float s = inversesqrt(dot(v, v));
-   __asm vec4_multiply __retVal.xyz, v, s.xxx;
+//   const float s = inversesqrt(dot(v, v));
+//   __retVal = v * s;
+// XXX note, we _could_ use __retVal.w instead of tmp and and save a
+// register, but that's actually a compilation error because v is a vec3
+// and the .w suffix is illegal.  Oh well.
+   float tmp;
+   __asm vec3_dot tmp, v, v;
+   __asm float_rsq tmp, tmp;
+   __asm vec4_multiply __retVal.xyz, v, tmp.xxx;
  }
  
  vec4 normalize(const vec4 v)
  {
-   const float s = inversesqrt(dot(v, v));
-   __asm vec4_multiply __retVal, v, s.xxxx;
+   float tmp;
+   __asm vec4_dot tmp, v, v;
+   __asm float_rsq tmp, tmp;
+   __asm vec4_multiply __retVal.xyz, v, tmp.xxx;
  }
  
  
@@ -884,7 +900,7 @@ vec4 mod(const vec4 a, const vec4 b)
      __retVal.x = a.x - b.x * floor(a.x * oneOverBx);
      __retVal.y = a.y - b.y * floor(a.y * oneOverBy);
      __retVal.z = a.z - b.z * floor(a.z * oneOverBz);
-    __retVal.w = a.w - b.w * floor(a.w * oneOverBz);
+    __retVal.w = a.w - b.w * floor(a.w * oneOverBw);
  }
  
  
@@ -968,51 +984,37 @@ vec4 max(const vec4 a, const float b)
  
  float clamp(const float val, const float minVal, const float maxVal)
  {
-   float t;
-   __asm vec4_max t, val, minVal;
-   __asm vec4_min __retVal.x, t, maxVal;
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
  }
  
  vec2 clamp(const vec2 val, const float minVal, const float maxVal)
  {
-   vec2 t;
-   __asm vec4_max t.xy, val.xy, minVal.xx;
-   __asm vec4_min __retVal.xy, t.xy, maxVal.xx;
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
  }
  
  vec3 clamp(const vec3 val, const float minVal, const float maxVal)
  {
-   vec3 t;
-   __asm vec4_max t.xyz, val.xyz, minVal.xxx;
-   __asm vec4_min __retVal.xyz, t.xyz, maxVal.xxx;
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
  }
  
  vec4 clamp(const vec4 val, const float minVal, const float maxVal)
  {
-   vec4 t;
-   __asm vec4_max t, val, minVal.xxxx;
-   __asm vec4_min __retVal, t, maxVal.xxxx;
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
  }
  
  vec2 clamp(const vec2 val, const vec2 minVal, const vec2 maxVal)
  {
-   vec2 t;
-   __asm vec4_max t.xy, val.xy, minVal.xy;
-   __asm vec4_min __retVal.xy, t.xy, maxVal.xxxx;
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
  }
  
  vec3 clamp(const vec3 val, const vec3 minVal, const vec3 maxVal)
  {
-   vec3 t;
-   __asm vec4_max t.xyz, val.xyz, minVal.xyz;
-   __asm vec4_min __retVal.xyz, t.xyz, maxVal.xxxx;
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
  }
  
  vec4 clamp(const vec4 val, const vec4 minVal, const vec4 maxVal)
  {
-   vec4 t;
-   __asm vec4_max t, val, minVal;
-   __asm vec4_min __retVal, t, maxVal;
+   __asm vec4_clamp __retVal, val, minVal, maxVal;
  }
  
  
@@ -1020,44 +1022,37 @@ vec4 clamp(const vec4 val, const vec4 minVal, const vec4 maxVal)
  
  float mix(const float x, const float y, const float a)
  {
-   const float d = y - x;
-   return x + d * a;  // MAD
+   __asm vec4_lrp __retVal, a, y, x;
  }
  
  vec2 mix(const vec2 x, const vec2 y, const float a)
  {
-   const vec2 d = y - x;
-   return x + d * a;  // MAD
+   __asm vec4_lrp __retVal, a, y, x;
  }
  
  vec3 mix(const vec3 x, const vec3 y, const float a)
  {
-   const vec3 d = y - x;
-   return x + d * a;  // MAD
+   __asm vec4_lrp __retVal, a, y, x;
  }
  
  vec4 mix(const vec4 x, const vec4 y, const float a)
  {
-   const vec4 d = y - x;
-   return x + d * a;  // MAD
+   __asm vec4_lrp __retVal, a, y, x;
  }
  
  vec2 mix(const vec2 x, const vec2 y, const vec2 a)
  {
-   const vec2 d = y - x;
-   return x + d * a;  // MAD
+   __asm vec4_lrp __retVal, a, y, x;
  }
  
  vec3 mix(const vec3 x, const vec3 y, const vec3 a)
  {
-   const vec3 d = y - x;
-   return x + d * a;  // MAD
+   __asm vec4_lrp __retVal, a, y, x;
  }
  
  vec4 mix(const vec4 x, const vec4 y, const vec4 a)
  {
-   const vec4 d = y - x;
-   return x + d * a;  // MAD
+   __asm vec4_lrp __retVal, a, y, x;
  }
  
  
@@ -1187,25 +1182,25 @@ float length(const vec4 v)
  float distance(const float x, const float y)
  {
     const float d = x - y;
-   return length(d);
+   __retVal = length(d);
  }
  
  float distance(const vec2 v, const vec2 u)
  {
-   const vec2 d = v - u;
-   return length(d);
+   const vec2 d2 = v - u;
+   __retVal = length(d2);
  }
  
  float distance(const vec3 v, const vec3 u)
  {
-   const vec3 d = v - u;
-   return length(d);
+   const vec3 d3 = v - u;
+   __retVal = length(d3);
  }
  
  float distance(const vec4 v, const vec4 u)
  {
-   const vec4 d = v - u;
-   return length(d);
+   const vec4 d4 = v - u;
+   __retVal = length(d4);
  }
  
  
@@ -1340,201 +1335,234 @@ mat4 matrixCompMult (mat4 m, mat4 n) {
  
  //// lessThan
  
-bvec2 lessThan(const vec2 v, const vec2 u)
+bvec2 lessThan(const vec2 u, const vec2 v)
  {
-   __asm vec4_sgt __retVal.xy, u, v;
+   __asm vec4_slt __retVal.xy, u, v;
  }
  
-bvec3 lessThan(const vec3 v, const vec3 u)
+bvec3 lessThan(const vec3 u, const vec3 v)
  {
-   __asm vec4_sgt __retVal.xyz, u, v;
+   __asm vec4_slt __retVal.xyz, u, v;
  }
  
-bvec4 lessThan(const vec4 v, const vec4 u)
+bvec4 lessThan(const vec4 u, const vec4 v)
  {
-   __asm vec4_sgt __retVal, u, v;
+   __asm vec4_slt __retVal, u, v;
  }
  
-bvec2 lessThan(const ivec2 v, const ivec2 u)
+bvec2 lessThan(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_sgt __retVal.xy, u, v;
+   __asm vec4_slt __retVal.xy, u, v;
  }
  
-bvec3 lessThan(const ivec3 v, const ivec3 u)
+bvec3 lessThan(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_sgt __retVal.xyz, u, v;
+   __asm vec4_slt __retVal.xyz, u, v;
  }
  
-bvec4 lessThan(const ivec4 v, const ivec4 u)
+bvec4 lessThan(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_sgt __retVal, u, v;
+   __asm vec4_slt __retVal, u, v;
  }
  
  
  //// lessThanEqual
  
-bvec2 lessThanEqual(const vec2 v, const vec2 u)
+bvec2 lessThanEqual(const vec2 u, const vec2 v)
  {
-   __asm vec4_sge __retVal.xy, u, v;
+   __asm vec4_sle __retVal.xy, u, v;
  }
  
-bvec3 lessThanEqual(const vec3 v, const vec3 u)
+bvec3 lessThanEqual(const vec3 u, const vec3 v)
  {
-   __asm vec4_sge __retVal.xyz, u, v;
+   __asm vec4_sle __retVal.xyz, u, v;
  }
  
-bvec4 lessThanEqual(const vec4 v, const vec4 u)
+bvec4 lessThanEqual(const vec4 u, const vec4 v)
  {
-   __asm vec4_sge __retVal, u, v;
+   __asm vec4_sle __retVal, u, v;
  }
  
-bvec2 lessThanEqual(const ivec2 v, const ivec2 u)
+bvec2 lessThanEqual(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_sge __retVal.xy, u, v;
+   __asm vec4_sle __retVal.xy, u, v;
  }
  
-bvec3 lessThanEqual(const ivec3 v, const ivec3 u)
+bvec3 lessThanEqual(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_sge __retVal.xyz, u, v;
+   __asm vec4_sle __retVal.xyz, u, v;
  }
  
-bvec4 lessThanEqual(const ivec4 v, const ivec4 u)
+bvec4 lessThanEqual(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_sge __retVal, u, v;
+   __asm vec4_sle __retVal, u, v;
  }
  
  
  //// greaterThan
  
-bvec2 greaterThan(const vec2 v, const vec2 u)
+bvec2 greaterThan(const vec2 u, const vec2 v)
  {
-   __asm vec4_sgt __retVal.xy, v, u;
+   __asm vec4_sgt __retVal.xy, u, v;
  }
  
-bvec3 greaterThan(const vec3 v, const vec3 u)
+bvec3 greaterThan(const vec3 u, const vec3 v)
  {
-   __asm vec4_sgt __retVal.xyz, v, u;
+   __asm vec4_sgt __retVal.xyz, u, v;
  }
  
-bvec4 greaterThan(const vec4 v, const vec4 u)
+bvec4 greaterThan(const vec4 u, const vec4 v)
  {
-   __asm vec4_sgt __retVal, v, u;
+   __asm vec4_sgt __retVal, u, v;
  }
  
-bvec2 greaterThan(const ivec2 v, const ivec2 u)
+bvec2 greaterThan(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_sgt __retVal.xy, v, u;
+   __asm vec4_sgt __retVal.xy, u, v;
  }
  
-bvec3 greaterThan(const ivec3 v, const ivec3 u)
+bvec3 greaterThan(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_sgt __retVal.xyz, v, u;
+   __asm vec4_sgt __retVal.xyz, u, v;
  }
  
-bvec4 greaterThan(const ivec4 v, const ivec4 u)
+bvec4 greaterThan(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_sgt __retVal, v, u;
+   __asm vec4_sgt __retVal, u, v;
  }
  
  
  //// greaterThanEqual
  
-bvec2 greaterThanEqual(const vec2 v, const vec2 u)
+bvec2 greaterThanEqual(const vec2 u, const vec2 v)
  {
-   __asm vec4_sge __retVal.xy, v, u;
+   __asm vec4_sge __retVal.xy, u, v;
  }
  
-bvec3 greaterThanEqual(const vec3 v, const vec3 u)
+bvec3 greaterThanEqual(const vec3 u, const vec3 v)
  {
-   __asm vec4_sge __retVal.xyz, v, u;
+   __asm vec4_sge __retVal.xyz, u, v;
  }
  
-bvec4 greaterThanEqual(const vec4 v, const vec4 u)
+bvec4 greaterThanEqual(const vec4 u, const vec4 v)
  {
-   __asm vec4_sge __retVal, v, u;
+   __asm vec4_sge __retVal, u, v;
  }
  
-bvec2 greaterThanEqual(const ivec2 v, const ivec2 u)
+bvec2 greaterThanEqual(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_sge __retVal.xy, v, u;
+   __asm vec4_sge __retVal.xy, u, v;
  }
  
-bvec3 greaterThanEqual(const ivec3 v, const ivec3 u)
+bvec3 greaterThanEqual(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_sge __retVal.xyz, v, u;
+   __asm vec4_sge __retVal.xyz, u, v;
  }
  
-bvec4 greaterThanEqual(const ivec4 v, const ivec4 u)
+bvec4 greaterThanEqual(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_sge __retVal, v, u;
+   __asm vec4_sge __retVal, u, v;
  }
  
  
  //// equal
  
-bvec2 equal(const vec2 v, const vec2 u)
+bvec2 equal(const vec2 u, const vec2 v)
+{
+   __asm vec4_seq __retVal.xy, u, v;
+}
+
+bvec3 equal(const vec3 u, const vec3 v)
  {
-   __asm vec4_seq __retVal.xy, v, u;
+   __asm vec4_seq __retVal.xyz, u, v;
  }
  
-bvec3 equal(const vec3 v, const vec3 u)
+bvec4 equal(const vec4 u, const vec4 v)
  {
-   __asm vec4_seq __retVal.xyz, v, u;
+   __asm vec4_seq __retVal, u, v;
  }
  
-bvec4 equal(const vec4 v, const vec4 u)
+bvec2 equal(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_seq __retVal, v, u;
+   __asm vec4_seq __retVal.xy, u, v;
  }
  
-bvec2 equal(const ivec2 v, const ivec2 u)
+bvec3 equal(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_seq __retVal.xy, v, u;
+   __asm vec4_seq __retVal.xyz, u, v;
  }
  
-bvec3 equal(const ivec3 v, const ivec3 u)
+bvec4 equal(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_seq __retVal.xyz, v, u;
+   __asm vec4_seq __retVal, u, v;
  }
  
-bvec4 equal(const ivec4 v, const ivec4 u)
+bvec2 equal(const bvec2 u, const bvec2 v)
  {
-   __asm vec4_seq __retVal, v, u;
+   __asm vec4_seq __retVal.xy, u, v;
  }
  
+bvec3 equal(const bvec3 u, const bvec3 v)
+{
+   __asm vec4_seq __retVal.xyz, u, v;
+}
+
+bvec4 equal(const bvec4 u, const bvec4 v)
+{
+   __asm vec4_seq __retVal, u, v;
+}
+
+
+
  
  //// notEqual
  
-bvec2 notEqual(const vec2 v, const vec2 u)
+bvec2 notEqual(const vec2 u, const vec2 v)
+{
+   __asm vec4_sne __retVal.xy, u, v;
+}
+
+bvec3 notEqual(const vec3 u, const vec3 v)
+{
+   __asm vec4_sne __retVal.xyz, u, v;
+}
+
+bvec4 notEqual(const vec4 u, const vec4 v)
  {
-   __asm vec4_sne __retVal.xy, v, u;
+   __asm vec4_sne __retVal, u, v;
  }
  
-bvec3 notEqual(const vec3 v, const vec3 u)
+bvec2 notEqual(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_sne __retVal.xyz, v, u;
+   __asm vec4_sne __retVal.xy, u, v;
  }
  
-bvec4 notEqual(const vec4 v, const vec4 u)
+bvec3 notEqual(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_sne __retVal, v, u;
+   __asm vec4_sne __retVal.xyz, u, v;
  }
  
-bvec2 notEqual(const ivec2 v, const ivec2 u)
+bvec4 notEqual(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_sne __retVal.xy, v, u;
+   __asm vec4_sne __retVal, u, v;
  }
  
-bvec3 notEqual(const ivec3 v, const ivec3 u)
+bvec2 notEqual(const bvec2 u, const bvec2 v)
  {
-   __asm vec4_sne __retVal.xyz, v, u;
+   __asm vec4_sne __retVal.xy, u, v;
  }
  
-bvec4 notEqual(const ivec4 v, const ivec4 u)
+bvec3 notEqual(const bvec3 u, const bvec3 v)
  {
-   __asm vec4_sne __retVal, v, u;
+   __asm vec4_sne __retVal.xyz, u, v;
  }
  
+bvec4 notEqual(const bvec4 u, const bvec4 v)
+{
+   __asm vec4_sne __retVal, u, v;
+}
+
+
  
  //// any
  
@@ -1565,7 +1593,7 @@ bool any(const bvec4 v)
  
  //// all
  
-bool all (const vec2 v)
+bool all (const bvec2 v)
  {
     float prod;
     __asm vec4_multiply prod.x, v.x, v.y;
@@ -1611,196 +1639,219 @@ bvec4 not (const bvec4 v)
  
  
  
-//
-// 8.7 Texture Lookup Functions
-//
+//// Texture Lookup Functions  (for both fragment and vertex shaders)
  
-vec4 texture1D (sampler1D sampler, float coord) {
-    vec4 texel;
-    __asm vec4_tex1d texel, sampler, coord, 0.0;
-    return texel;
+vec4 texture1D(const sampler1D sampler, const float coord)
+{
+   __asm vec4_tex1d __retVal, sampler, coord;
  }
  
-vec4 texture1DProj (sampler1D sampler, vec2 coord) {
-    return texture1D (sampler, coord.s / coord.t);
+vec4 texture1DProj(const sampler1D sampler, const vec2 coord)
+{
+   // need to swizzle .y into .w
+   __asm vec4_texp1d __retVal, sampler, coord.xyyy;
  }
  
-vec4 texture1DProj (sampler1D sampler, vec4 coord) {
-    return texture1D (sampler, coord.s / coord.q);
+vec4 texture1DProj(const sampler1D sampler, const vec4 coord)
+{
+   __asm vec4_texp1d __retVal, sampler, coord;
  }
  
+
  vec4 texture2D(const sampler2D sampler, const vec2 coord)
  {
-    __asm vec4_tex2d __retVal, coord; // XXX sampler
+   __asm vec4_tex2d __retVal, sampler, coord;
+}
+
+vec4 texture2DProj(const sampler2D sampler, const vec3 coord)
+{
+   // need to swizzle 'z' into 'w'.
+   __asm vec4_texp2d __retVal, sampler, coord.xyzz;
  }
  
-vec4 texture2DProj (sampler2D sampler, vec3 coord) {
-    return texture2D (sampler, vec2 (coord.s / coord.p, coord.t / coord.p));
+vec4 texture2DProj(const sampler2D sampler, const vec4 coord)
+{
+   __asm vec4_texp2d __retVal, sampler, coord;
  }
  
-vec4 texture2DProj (sampler2D sampler, vec4 coord) {
-    return texture2D (sampler, vec2 (coord.s / coord.q, coord.t / coord.q));
+
+vec4 texture3D(const sampler3D sampler, const vec3 coord)
+{
+   __asm vec4_tex3d __retVal, sampler, coord;
  }
  
-vec4 texture3D (sampler3D sampler, vec3 coord) {
-    vec4 texel;
-    __asm vec4_tex3d texel, sampler, coord, 0.0;
-    return texel;
+vec4 texture3DProj(const sampler3D sampler, const vec4 coord)
+{
+   __asm vec4_texp3d __retVal, sampler, coord;
  }
  
-vec4 texture3DProj (sampler3D sampler, vec4 coord) {
-    return texture3D (sampler, vec3 (coord.s / coord.q, coord.t / coord.q, coord.p / coord.q));
+
+vec4 textureCube(const samplerCube sampler, const vec3 coord)
+{
+   __asm vec4_texcube __retVal, sampler, coord;
  }
  
-vec4 textureCube (samplerCube sampler, vec3 coord) {
-    vec4 texel;
-    __asm vec4_texcube texel, sampler, coord, 0.0;
-    return texel;
+
+
+vec4 shadow1D(const sampler1DShadow sampler, const vec3 coord)
+{
+   __asm vec4_tex1d __retVal, sampler, coord;
  }
  
-vec4 shadow1D (sampler1DShadow sampler, vec3 coord) {
-    vec4 texel;
-    __asm vec4_shad1d texel, sampler, coord, 0.0;
-    return texel;
+vec4 shadow1DProj(const sampler1DShadow sampler, const vec4 coord)
+{
+   // .s and .p will be divided by .q
+   __asm vec4_texp1d __retVal, sampler, coord;
  }
  
-vec4 shadow1DProj (sampler1DShadow sampler, vec4 coord) {
-    return shadow1D (sampler, vec3 (coord.s / coord.q, 0.0, coord.p / coord.q));
+vec4 shadow2D(const sampler2DShadow sampler, const vec3 coord)
+{
+   __asm vec4_tex2d __retVal, sampler, coord;
  }
  
-vec4 shadow2D (sampler2DShadow sampler, vec3 coord) {
-    vec4 texel;
-    __asm vec4_shad2d texel, sampler, coord, 0.0;
-    return texel;
+vec4 shadow2DProj(const sampler2DShadow sampler, const vec4 coord)
+{
+   // .s, .t and .p will be divided by .q
+   __asm vec4_texp2d __retVal, sampler, coord;
  }
  
-vec4 shadow2DProj (sampler2DShadow sampler, vec4 coord) {
-    return shadow2D (sampler, vec3 (coord.s / coord.q, coord.t / coord.q, coord.p / coord.q));
+
+//// GL_ARB_texture_rectangle:
+vec4 texture2DRect(const sampler2DRect sampler, const vec2 coord)
+{
+   __asm vec4_tex_rect __retVal, sampler, coord;
  }
  
+vec4 texture2DRectProj(const sampler2DRect sampler, const vec3 coord)
+{
+   // need to swizzle .y into .w
+   __asm vec4_texp_rect __retVal, sampler, coord.xyzz;
+}
+
+vec4 texture2DRectProj(const sampler2DRect sampler, const vec4 coord)
+{
+   __asm vec4_texp_rect __retVal, sampler, ccoord;
+}
+
+vec4 shadow2DRect(const sampler2DRectShadow sampler, const vec3 coord)
+{
+   __asm vec4_tex_rect __retVal, sampler, coord;
+}
+
+vec4 shadow2DRectProj(const sampler2DRectShadow sampler, const vec4 coord)
+{
+   __asm vec4_texp_rect __retVal, sampler, coord;
+}
+
+
+
  //
  // 8.9 Noise Functions
  //
  // AUTHOR: Stefan Gustavson (stegu@itn.liu.se), Nov 26, 2005
  //
  
-float noise1 (float x) {
-    float a;
-    __asm float_noise1 a, x;
-    return a;
+float noise1(const float x)
+{
+   __asm float_noise1 __retVal, x;
  }
  
-float noise1 (vec2 x) {
-    float a;
-    __asm float_noise2 a, x;
-    return a;
-}
  
-float noise1 (vec3 x) {
-    float a;
-    __asm float_noise3 a, x;
-    return a;
+float noise1(const vec2 x)
+{
+    __asm float_noise2 __retVal, x;
  }
  
-float noise1 (vec4 x) {
-    float a;
-    __asm float_noise4 a, x;
-    return a;
+float noise1(const vec3 x)
+{
+    __asm float_noise3 __retVal, x;
  }
  
-vec2 noise2 (float x) {
-    return vec2 (
-        noise1 (x),
-        noise1 (x + 19.34)
-    );
+float noise1(const vec4 x)
+{
+    __asm float_noise4 __retVal, x;
  }
  
-vec2 noise2 (vec2 x) {
-    return vec2 (
-        noise1 (x),
-        noise1 (x + vec2 (19.34, 7.66))
-    );
+vec2 noise2(const float x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + 19.34);
  }
  
-vec2 noise2 (vec3 x) {
-    return vec2 (
-        noise1 (x),
-        noise1 (x + vec3 (19.34, 7.66, 3.23))
-    );
+vec2 noise2(const vec2 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec2(19.34, 7.66));
  }
  
-vec2 noise2 (vec4 x) {
-    return vec2 (
-        noise1 (x),
-        noise1 (x + vec4 (19.34, 7.66, 3.23, 2.77))
-    );
+vec2 noise2(const vec3 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec3(19.34, 7.66, 3.23));
  }
  
-vec3 noise3 (float x) {
-    return vec3 (
-        noise1 (x),
-        noise1 (x + 19.34),
-        noise1 (x + 5.47)
-    );
+vec2 noise2(const vec4 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec4(19.34, 7.66, 3.23, 2.77));
  }
  
-vec3 noise3 (vec2 x) {
-    return vec3 (
-        noise1 (x),
-        noise1 (x + vec2 (19.34, 7.66)),
-        noise1 (x + vec2 (5.47, 17.85))
-    );
+vec3 noise3(const float x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + 19.34);
+   __retVal.z = noise1(x + 5.47);
  }
  
-vec3 noise3 (vec3 x) {
-    return vec3 (
-        noise1 (x),
-        noise1 (x + vec3 (19.34, 7.66, 3.23)),
-        noise1 (x + vec3 (5.47, 17.85, 11.04))
-    );
+vec3 noise3(const vec2 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec2(19.34, 7.66));
+   __retVal.z = noise1(x + vec2(5.47, 17.85));
  }
  
-vec3 noise3 (vec4 x) {
-    return vec3 (
-        noise1 (x),
-        noise1 (x + vec4 (19.34, 7.66, 3.23, 2.77)),
-        noise1 (x + vec4 (5.47, 17.85, 11.04, 13.19))
-    );
+vec3 noise3(const vec3 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec3(19.34, 7.66, 3.23));
+   __retVal.z = noise1(x + vec3(5.47, 17.85, 11.04));
  }
  
-vec4 noise4 (float x) {
-    return vec4 (
-        noise1 (x),
-        noise1 (x + 19.34),
-        noise1 (x + 5.47),
-        noise1 (x + 23.54)
-    );
+vec3 noise3(const vec4 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec4(19.34, 7.66, 3.23, 2.77));
+   __retVal.z = noise1(x + vec4(5.47, 17.85, 11.04, 13.19));
  }
  
-vec4 noise4 (vec2 x) {
-    return vec4 (
-        noise1 (x),
-        noise1 (x + vec2 (19.34, 7.66)),
-        noise1 (x + vec2 (5.47, 17.85)),
-        noise1 (x + vec2 (23.54, 29.11))
-    );
+vec4 noise4(const float x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + 19.34);
+   __retVal.z = noise1(x + 5.47);
+   __retVal.w = noise1(x + 23.54);
  }
  
-vec4 noise4 (vec3 x) {
-    return vec4 (
-        noise1 (x),
-        noise1 (x + vec3 (19.34, 7.66, 3.23)),
-        noise1 (x + vec3 (5.47, 17.85, 11.04)),
-        noise1 (x + vec3 (23.54, 29.11, 31.91))
-    );
+vec4 noise4(const vec2 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec2 (19.34, 7.66));
+   __retVal.z = noise1(x + vec2 (5.47, 17.85));
+   __retVal.w = noise1(x + vec2 (23.54, 29.11));
  }
  
-vec4 noise4 (vec4 x) {
-    return vec4 (
-        noise1 (x),
-        noise1 (x + vec4 (19.34, 7.66, 3.23, 2.77)),
-        noise1 (x + vec4 (5.47, 17.85, 11.04, 13.19)),
-        noise1 (x + vec4 (23.54, 29.11, 31.91, 37.48))
-    );
+vec4 noise4(const vec3 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec3(19.34, 7.66, 3.23));
+   __retVal.z = noise1(x + vec3(5.47, 17.85, 11.04));
+   __retVal.w = noise1(x + vec3(23.54, 29.11, 31.91));
  }
  
+vec4 noise4(const vec4 x)
+{
+   __retVal.x = noise1(x);
+   __retVal.y = noise1(x + vec4(19.34, 7.66, 3.23, 2.77));
+   __retVal.z = noise1(x + vec4(5.47, 17.85, 11.04, 13.19));
+   __retVal.w = noise1(x + vec4(23.54, 29.11, 31.91, 37.48));
+}