mesa: allow variable indexing into the predefined uniform variable arrays

[mesa.git] / src / mesa / shader / slang / library / slang_common_builtin.gc
diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc

index c8931d259eaff036b6feff4b33baa8e16350f5fd..3e03b101b87384ef72b699ddeda91efd8907239b 100644 (file)
--- a/src/mesa/shader/slang/library/slang_common_builtin.gc
+++ b/src/mesa/shader/slang/library/slang_common_builtin.gc
@@ -1,8 +1,9 @@
  /*
   * Mesa 3-D graphics library
- * Version:  6.5
+ * Version:  7.3
   *
   * Copyright (C) 2006  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2008  VMware, Inc.  All Rights Reserved.
   *
   * Permission is hereby granted, free of charge, to any person obtaining a
   * copy of this software and associated documentation files (the "Software"),
@@ -98,6 +99,9 @@ struct gl_MaterialParameters {
  uniform gl_MaterialParameters gl_FrontMaterial;
  uniform gl_MaterialParameters gl_BackMaterial;
  
+/* NOTE: the order of these fields is significant!
+ * See the definition of the lighting state vars such as STATE_SPOT_DIRECTION.
+ */
  struct gl_LightSourceParameters {
      vec4 ambient;
      vec4 diffuse;
@@ -105,12 +109,14 @@ struct gl_LightSourceParameters {
      vec4 position;
      vec4 halfVector;
      vec3 spotDirection;
-    float spotExponent;
-    float spotCutoff;
      float spotCosCutoff;
+
      float constantAttenuation;
      float linearAttenuation;
      float quadraticAttenuation;
+    float spotExponent;
+
+    float spotCutoff;
  };
  
  uniform gl_LightSourceParameters gl_LightSource[gl_MaxLights];
@@ -170,7 +176,7 @@ uniform gl_FogParameters gl_Fog;
  float radians(const float deg)
  {
     const float c = 3.1415926 / 180.0;
-   __asm vec4_multiply __retVal.x, deg, c;
+   __asm vec4_multiply __retVal, deg, c;
  }
  
  vec2 radians(const vec2 deg)
@@ -197,24 +203,24 @@ vec4 radians(const vec4 deg)
  float degrees(const float rad)
  {
     const float c = 180.0 / 3.1415926;
-   __asm vec4_multiply __retVal.x, rad, c;
+   __asm vec4_multiply __retVal, rad, c;
  }
  
  vec2 degrees(const vec2 rad)
  {
-   const float c = 3.1415926 / 180.0;
+   const float c = 180.0 / 3.1415926;
     __asm vec4_multiply __retVal.xy, rad.xy, c.xx;
  }
  
  vec3 degrees(const vec3 rad)
  {
-   const float c = 3.1415926 / 180.0;
+   const float c = 180.0 / 3.1415926;
     __asm vec4_multiply __retVal.xyz, rad.xyz, c.xxx;
  }
  
  vec4 degrees(const vec4 rad)
  {
-   const float c = 3.1415926 / 180.0;
+   const float c = 180.0 / 3.1415926;
     __asm vec4_multiply __retVal, rad, c.xxxx;
  }
  
@@ -223,7 +229,7 @@ vec4 degrees(const vec4 rad)
  
  float sin(const float radians)
  {
-   __asm float_sine __retVal.x, radians;
+   __asm float_sine __retVal, radians;
  }
  
  vec2 sin(const vec2 radians)
@@ -252,7 +258,7 @@ vec4 sin(const vec4 radians)
  
  float cos(const float radians)
  {
-   __asm float_cosine __retVal.x, radians;
+   __asm float_cosine __retVal, radians;
  }
  
  vec2 cos(const vec2 radians)
@@ -310,129 +316,131 @@ vec4 tan(const vec4 angle)
  
  
  
-float asin (float x) {
-    float y;
-    __asm float_arcsine y, x;
-    return y;
+float asin(const float x)
+{
+   const float a0 = 1.5707288;  // PI/2?
+   const float a1 = -0.2121144;
+   const float a2 = 0.0742610;
+   //const float a3 = -0.0187293;
+   const float halfPi = 3.1415926 * 0.5;
+   const float y = abs(x);
+   // three terms seem to be enough:
+   __retVal = (halfPi - sqrt(1.0 - y) * (a0 + y * (a1 + a2 * y))) * sign(x);
+   // otherwise, try four:
+   //__retVal = (halfPi - sqrt(1.0 - y) * (a0 + y * (a1 + y * (a2 + y * a3)))) * sign(x);
  }
  
-vec2 asin (vec2 v) {
-    return vec2 (
-        asin (v.x),
-        asin (v.y)
-    );
+vec2 asin(const vec2 v)
+{
+   __retVal.x = asin(v.x);
+   __retVal.y = asin(v.y);
  }
  
-vec3 asin (vec3 v) {
-    return vec3 (
-        asin (v.x),
-        asin (v.y),
-        asin (v.z)
-    );
+vec3 asin(const vec3 v)
+{
+   __retVal.x = asin(v.x);
+   __retVal.y = asin(v.y);
+   __retVal.z = asin(v.z);
  }
  
-vec4 asin (vec4 v) {
-    return vec4 (
-        asin (v.x),
-        asin (v.y),
-        asin (v.z),
-        asin (v.w)
-    );
+vec4 asin(const vec4 v)
+{
+   __retVal.x = asin(v.x);
+   __retVal.y = asin(v.y);
+   __retVal.z = asin(v.z);
+   __retVal.w = asin(v.w);
  }
  
-float acos (float x) {
-    return 1.5708 - asin (x);
+float acos(const float x)
+{
+   const float halfPi = 3.1415926 * 0.5;
+   __retVal = halfPi - asin(x);
  }
  
-vec2 acos (vec2 v) {
-    return vec2 (
-        acos (v.x),
-        acos (v.y)
-    );
+vec2 acos(const vec2 v)
+{
+   __retVal.x = acos(v.x);
+   __retVal.y = acos(v.y);
  }
  
-vec3 acos (vec3 v) {
-    return vec3 (
-        acos (v.x),
-        acos (v.y),
-        acos (v.z)
-    );
+vec3 acos(const vec3 v)
+{
+   __retVal.x = acos(v.x);
+   __retVal.y = acos(v.y);
+   __retVal.z = acos(v.z);
  }
  
-vec4 acos (vec4 v) {
-    return vec4 (
-        acos (v.x),
-        acos (v.y),
-        acos (v.z),
-        acos (v.w)
-    );
+vec4 acos(const vec4 v)
+{
+   __retVal.x = acos(v.x);
+   __retVal.y = acos(v.y);
+   __retVal.z = acos(v.z);
+   __retVal.w = acos(v.w);
  }
  
-float atan (float y_over_x) {
-    float z;
-    __asm float_arctan z, y_over_x;
-    return z;
+float atan(const float x)
+{
+   __retVal = asin(x * inversesqrt(x * x + 1.0));
  }
  
-vec2 atan (vec2 y_over_x) {
-    return vec2 (
-        atan (y_over_x.x),
-        atan (y_over_x.y)
-    );
+vec2 atan(const vec2 y_over_x)
+{
+   __retVal.x = atan(y_over_x.x);
+   __retVal.y = atan(y_over_x.y);
  }
  
-vec3 atan (vec3 y_over_x) {
-    return vec3 (
-        atan (y_over_x.x),
-        atan (y_over_x.y),
-        atan (y_over_x.z)
-    );
+vec3 atan(const vec3 y_over_x)
+{
+   __retVal.x = atan(y_over_x.x);
+   __retVal.y = atan(y_over_x.y);
+   __retVal.z = atan(y_over_x.z);
  }
  
-vec4 atan (vec4 y_over_x) {
-    return vec4 (
-        atan (y_over_x.x),
-        atan (y_over_x.y),
-        atan (y_over_x.z),
-        atan (y_over_x.w)
-    );
+vec4 atan(const vec4 y_over_x)
+{
+   __retVal.x = atan(y_over_x.x);
+   __retVal.y = atan(y_over_x.y);
+   __retVal.z = atan(y_over_x.z);
+   __retVal.w = atan(y_over_x.w);
  }
  
-float atan (float y, float x) {
-    float z = atan (y / x);
-    if (x < 0.0)
-    {
-        if (y < 0.0)
-            return z - 3.141593;
-        return z + 3.141593;
-    }
-    return z;
+float atan(const float y, const float x)
+{
+   float r;
+   if (abs(x) > 1.0e-4) {
+      r = atan(y / x);
+      if (x < 0.0) {
+         r = r + sign(y) * 3.141593;
+      }
+   }
+   else {
+      r = sign(y) * 1.5707965;  // pi/2
+   }
+   return r;
  }
  
-vec2 atan (vec2 u, vec2 v) {
-    return vec2 (
-        atan (u.x, v.x),
-        atan (u.y, v.y)
-    );
+vec2 atan(const vec2 u, const vec2 v)
+{
+   __retVal.x = atan(u.x, v.x);
+   __retVal.y = atan(u.y, v.y);
  }
  
-vec3 atan (vec3 u, vec3 v) {
-    return vec3 (
-        atan (u.x, v.x),
-        atan (u.y, v.y),
-        atan (u.z, v.z)
-    );
+vec3 atan(const vec3 u, const vec3 v)
+{
+   __retVal.x = atan(u.x, v.x);
+   __retVal.y = atan(u.y, v.y);
+   __retVal.z = atan(u.z, v.z);
  }
  
-vec4 atan (vec4 u, vec4 v) {
-    return vec4 (
-        atan (u.x, v.x),
-        atan (u.y, v.y),
-        atan (u.z, v.z),
-        atan (u.w, v.w)
-    );
+vec4 atan(const vec4 u, const vec4 v)
+{
+   __retVal.x = atan(u.x, v.x);
+   __retVal.y = atan(u.y, v.y);
+   __retVal.z = atan(u.z, v.z);
+   __retVal.w = atan(u.w, v.w);
  }
  
+
  //
  // 8.2 Exponential Functions
  //
@@ -441,7 +449,7 @@ vec4 atan (vec4 u, vec4 v) {
  
  float pow(const float a, const float b)
  {
-   __asm float_power __retVal.x, a, b;
+   __asm float_power __retVal, a, b;
  }
  
  vec2 pow(const vec2 a, const vec2 b)
@@ -470,28 +478,33 @@ vec4 pow(const vec4 a, const vec4 b)
  
  float exp(const float a)
  {
-   __asm float_exp __retVal.x, a;
+   // NOTE: log2(e) = 1.44269502
+   float t = a * 1.44269502;
+   __asm float_exp2 __retVal, t;
  }
  
  vec2 exp(const vec2 a)
  {
-   __asm float_exp __retVal.x, a.x;
-   __asm float_exp __retVal.y, a.y;
+   vec2 t = a * 1.44269502;
+   __asm float_exp2 __retVal.x, t.x;
+   __asm float_exp2 __retVal.y, t.y;
  }
  
  vec3 exp(const vec3 a)
  {
-   __asm float_exp __retVal.x, a.x;
-   __asm float_exp __retVal.y, a.y;
-   __asm float_exp __retVal.z, a.z;
+   vec3 t = a * 1.44269502;
+   __asm float_exp2 __retVal.x, t.x;
+   __asm float_exp2 __retVal.y, t.y;
+   __asm float_exp2 __retVal.z, t.z;
  }
  
  vec4 exp(const vec4 a)
  {
-   __asm float_exp __retVal.x, a.x;
-   __asm float_exp __retVal.y, a.y;
-   __asm float_exp __retVal.z, a.z;
-   __asm float_exp __retVal.w, a.w;
+   vec4 t = a * 1.44269502;
+   __asm float_exp2 __retVal.x, t.x;
+   __asm float_exp2 __retVal.y, t.y;
+   __asm float_exp2 __retVal.z, t.z;
+   __asm float_exp2 __retVal.w, t.w;
  }
  
  
@@ -500,7 +513,7 @@ vec4 exp(const vec4 a)
  
  float log2(const float x)
  {
-   __asm float_log2 __retVal.x, x;
+   __asm float_log2 __retVal, x;
  }
  
  vec2 log2(const vec2 v)
@@ -559,7 +572,7 @@ vec4 log(const vec4 v)
  
  float exp2(const float a)
  {
-   __asm float_exp2 __retVal.x, a;
+   __asm float_exp2 __retVal, a;
  }
  
  vec2 exp2(const vec2 a)
@@ -590,7 +603,7 @@ float sqrt(const float x)
  {
     float r;
     __asm float_rsq r, x;
-   __asm float_rcp __retVal.x, r;
+   __asm float_rcp __retVal, r;
  }
  
  vec2 sqrt(const vec2 v)
@@ -660,13 +673,13 @@ vec4 inversesqrt(const vec4 v)
  
  float normalize(const float x)
  {
-   __retVal.x = 1.0;
+   __retVal = 1.0;
  }
  
  vec2 normalize(const vec2 v)
  {
     const float s = inversesqrt(dot(v, v));
-   __asm vec4_multiply __retVal.xy, v, s.xx;
+   __asm vec4_multiply __retVal.xy, v, s;
  }
  
  vec3 normalize(const vec3 v)
@@ -679,7 +692,7 @@ vec3 normalize(const vec3 v)
     float tmp;
     __asm vec3_dot tmp, v, v;
     __asm float_rsq tmp, tmp;
-   __asm vec4_multiply __retVal.xyz, v, tmp.xxx;
+   __asm vec4_multiply __retVal.xyz, v, tmp;
  }
  
  vec4 normalize(const vec4 v)
@@ -687,7 +700,7 @@ vec4 normalize(const vec4 v)
     float tmp;
     __asm vec4_dot tmp, v, v;
     __asm float_rsq tmp, tmp;
-   __asm vec4_multiply __retVal.xyz, v, tmp.xxx;
+   __asm vec4_multiply __retVal.xyz, v, tmp;
  }
  
  
@@ -701,7 +714,7 @@ vec4 normalize(const vec4 v)
  
  float abs(const float a)
  {
-   __asm vec4_abs __retVal.x, a;
+   __asm vec4_abs __retVal, a;
  }
  
  vec2 abs(const vec2 a)
@@ -725,9 +738,9 @@ vec4 abs(const vec4 a)
  float sign(const float x)
  {
     float p, n;
-   __asm vec4_sgt p.x, x, 0.0;            // p = (x > 0)
-   __asm vec4_sgt n.x, 0.0, x;            // n = (x < 0)
-   __asm vec4_subtract __retVal.x, p, n;  // sign = p - n
+   __asm vec4_sgt p, x, 0.0;            // p = (x > 0)
+   __asm vec4_sgt n, 0.0, x;            // n = (x < 0)
+   __asm vec4_subtract __retVal, p, n;  // sign = p - n
  }
  
  vec2 sign(const vec2 v)
@@ -759,7 +772,7 @@ vec4 sign(const vec4 v)
  
  float floor(const float a)
  {
-   __asm vec4_floor __retVal.x, a;
+   __asm vec4_floor __retVal, a;
  }
  
  vec2 floor(const vec2 a)
@@ -785,7 +798,7 @@ float ceil(const float a)
     // XXX this could be improved
     float b = -a;
     __asm vec4_floor b, b;
-   __retVal.x = -b;
+   __retVal = -b;
  }
  
  vec2 ceil(const vec2 a)
@@ -814,7 +827,7 @@ vec4 ceil(const vec4 a)
  
  float fract(const float a)
  {
-   __asm vec4_frac __retVal.x, a;
+   __asm vec4_frac __retVal, a;
  }
  
  vec2 fract(const vec2 a)
@@ -839,7 +852,7 @@ float mod(const float a, const float b)
  {
      float oneOverB;
      __asm float_rcp oneOverB, b;
-    __retVal.x = a - b * floor(a * oneOverB);
+    __retVal = a - b * floor(a * oneOverB);
  }
  
  vec2 mod(const vec2 a, const float b)
@@ -865,35 +878,29 @@ vec4 mod(const vec4 a, const float b)
  
  vec2 mod(const vec2 a, const vec2 b)
  {
-    float oneOverBx, oneOverBy;
-    __asm float_rcp oneOverBx, b.x;
-    __asm float_rcp oneOverBy, b.y;
-    __retVal.x = a.x - b.x * floor(a.x * oneOverBx);
-    __retVal.y = a.y - b.y * floor(a.y * oneOverBy);
+    vec2 oneOverB;
+    __asm float_rcp oneOverB.x, b.x;
+    __asm float_rcp oneOverB.y, b.y;
+    __retVal = a - b * floor(a * oneOverB);
  }
  
  vec3 mod(const vec3 a, const vec3 b)
  {
-    float oneOverBx, oneOverBy, oneOverBz;
-    __asm float_rcp oneOverBx, b.x;
-    __asm float_rcp oneOverBy, b.y;
-    __asm float_rcp oneOverBz, b.z;
-    __retVal.x = a.x - b.x * floor(a.x * oneOverBx);
-    __retVal.y = a.y - b.y * floor(a.y * oneOverBy);
-    __retVal.z = a.z - b.z * floor(a.z * oneOverBz);
+    vec3 oneOverB;
+    __asm float_rcp oneOverB.x, b.x;
+    __asm float_rcp oneOverB.y, b.y;
+    __asm float_rcp oneOverB.z, b.z;
+    __retVal = a - b * floor(a * oneOverB);
  }
  
  vec4 mod(const vec4 a, const vec4 b)
  {
-    float oneOverBx, oneOverBy, oneOverBz, oneOverBw;
-    __asm float_rcp oneOverBx, b.x;
-    __asm float_rcp oneOverBy, b.y;
-    __asm float_rcp oneOverBz, b.z;
-    __asm float_rcp oneOverBw, b.w;
-    __retVal.x = a.x - b.x * floor(a.x * oneOverBx);
-    __retVal.y = a.y - b.y * floor(a.y * oneOverBy);
-    __retVal.z = a.z - b.z * floor(a.z * oneOverBz);
-    __retVal.w = a.w - b.w * floor(a.w * oneOverBz);
+    vec4 oneOverB;
+    __asm float_rcp oneOverB.x, b.x;
+    __asm float_rcp oneOverB.y, b.y;
+    __asm float_rcp oneOverB.z, b.z;
+    __asm float_rcp oneOverB.w, b.w;
+    __retVal = a - b * floor(a * oneOverB);
  }
  
  
@@ -901,7 +908,7 @@ vec4 mod(const vec4 a, const vec4 b)
  
  float min(const float a, const float b)
  {
-   __asm vec4_min __retVal.x, a.x, b.x;
+   __asm vec4_min __retVal, a, b;
  }
  
  vec2 min(const vec2 a, const vec2 b)
@@ -921,17 +928,17 @@ vec4 min(const vec4 a, const vec4 b)
  
  vec2 min(const vec2 a, const float b)
  {
-   __asm vec4_min __retVal, a.xy, b.xx;
+   __asm vec4_min __retVal, a.xy, b;
  }
  
  vec3 min(const vec3 a, const float b)
  {
-   __asm vec4_min __retVal, a.xyz, b.xxx;
+   __asm vec4_min __retVal, a.xyz, b;
  }
  
  vec4 min(const vec4 a, const float b)
  {
-   __asm vec4_min __retVal, a, b.xxxx;
+   __asm vec4_min __retVal, a, b;
  }
  
  
@@ -939,7 +946,7 @@ vec4 min(const vec4 a, const float b)
  
  float max(const float a, const float b)
  {
-   __asm vec4_max __retVal.x, a.x, b.x;
+   __asm vec4_max __retVal, a, b;
  }
  
  vec2 max(const vec2 a, const vec2 b)
@@ -959,17 +966,17 @@ vec4 max(const vec4 a, const vec4 b)
  
  vec2 max(const vec2 a, const float b)
  {
-   __asm vec4_max __retVal, a.xy, b.xx;
+   __asm vec4_max __retVal, a.xy, b;
  }
  
  vec3 max(const vec3 a, const float b)
  {
-   __asm vec4_max __retVal, a.xyz, b.xxx;
+   __asm vec4_max __retVal, a.xyz, b;
  }
  
  vec4 max(const vec4 a, const float b)
  {
-   __asm vec4_max __retVal, a, b.xxxx;
+   __asm vec4_max __retVal, a, b;
  }
  
  
@@ -1049,45 +1056,45 @@ vec4 mix(const vec4 x, const vec4 y, const vec4 a)
  }
  
  
-//// step (untested)
+//// step
  
  float step(const float edge, const float x)
  {
-   __asm vec4_sgt __retVal.x, x, edge;
+   __asm vec4_sge __retVal, x, edge;
  }
  
  vec2 step(const vec2 edge, const vec2 x)
  {
-   __asm vec4_sgt __retVal.xy, x, edge;
+   __asm vec4_sge __retVal.xy, x, edge;
  }
  
  vec3 step(const vec3 edge, const vec3 x)
  {
-   __asm vec4_sgt __retVal.xyz, x, edge;
+   __asm vec4_sge __retVal.xyz, x, edge;
  }
  
  vec4 step(const vec4 edge, const vec4 x)
  {
-   __asm vec4_sgt __retVal, x, edge;
+   __asm vec4_sge __retVal, x, edge;
  }
  
  vec2 step(const float edge, const vec2 v)
  {
-   __asm vec4_sgt __retVal.xy, v, edge.xx;
+   __asm vec4_sge __retVal.xy, v, edge;
  }
  
  vec3 step(const float edge, const vec3 v)
  {
-   __asm vec4_sgt __retVal.xyz, v, edge.xxx;
+   __asm vec4_sge __retVal.xyz, v, edge;
  }
  
  vec4 step(const float edge, const vec4 v)
  {
-   __asm vec4_sgt __retVal, v, edge.xxxx;
+   __asm vec4_sge __retVal, v, edge;
  }
  
  
-//// smoothstep (untested)
+//// smoothstep
  
  float smoothstep(const float edge0, const float edge1, const float x)
  {
@@ -1158,7 +1165,7 @@ float length(const vec3 v)
     float r;
     const float p = dot(v, v);      // p = v.x * v.x + v.y * v.y + v.z * v.z
     __asm float_rsq r, p;           // r = 1 / sqrt(p)
-   __asm float_rcp __retVal.x, r;  // retVal = 1 / r
+   __asm float_rcp __retVal, r;    // retVal = 1 / r
  }
  
  float length(const vec4 v)
@@ -1166,7 +1173,7 @@ float length(const vec4 v)
     float r;
     const float p = dot(v, v);      // p = v.x * v.x + v.y * v.y + ...
     __asm float_rsq r, p;           // r = 1 / sqrt(p)
-   __asm float_rcp __retVal.x, r;  // retVal = 1 / r
+   __asm float_rcp __retVal, r;    // retVal = 1 / r
  }
  
  
@@ -1212,7 +1219,7 @@ float faceforward(const float N, const float I, const float Nref)
      // this could probably be done better
      const float d = dot(Nref, I);
      float s;
-    __asm vec4_sgt s.x, 0.0, d;  // s = (0.0 > d) ? 1 : 0
+    __asm vec4_sgt s, 0.0, d;  // s = (0.0 > d) ? 1 : 0
      return mix(-N, N, s);
  }
  
@@ -1221,7 +1228,7 @@ vec2 faceforward(const vec2 N, const vec2 I, const vec2 Nref)
      // this could probably be done better
      const float d = dot(Nref, I);
      float s;
-    __asm vec4_sgt s.x, 0.0, d;  // s = (0.0 > d) ? 1 : 0
+    __asm vec4_sgt s, 0.0, d;  // s = (0.0 > d) ? 1 : 0
      return mix(-N, N, s);
  }
  
@@ -1230,7 +1237,7 @@ vec3 faceforward(const vec3 N, const vec3 I, const vec3 Nref)
      // this could probably be done better
      const float d = dot(Nref, I);
      float s;
-    __asm vec4_sgt s.x, 0.0, d;  // s = (0.0 > d) ? 1 : 0
+    __asm vec4_sgt s, 0.0, d;  // s = (0.0 > d) ? 1 : 0
      return mix(-N, N, s);
  }
  
@@ -1239,7 +1246,7 @@ vec4 faceforward(const vec4 N, const vec4 I, const vec4 Nref)
      // this could probably be done better
      const float d = dot(Nref, I);
      float s;
-    __asm vec4_sgt s.x, 0.0, d;  // s = (0.0 > d) ? 1 : 0
+    __asm vec4_sgt s, 0.0, d;  // s = (0.0 > d) ? 1 : 0
      return mix(-N, N, s);
  }
  
@@ -1270,34 +1277,50 @@ vec4 reflect(const vec4 I, const vec4 N)
  
  float refract(const float I, const float N, const float eta)
  {
-   float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I));
+   float n_dot_i = dot(N, I);
+   float k = 1.0 - eta * eta * (1.0 - n_dot_i * n_dot_i);
+   float retval;
     if (k < 0.0)
-       return 0.0;
-   return eta * I - (eta * dot(N, I) + sqrt(k)) * N;
+      retval = 0.0;
+   else
+      retval = eta * I - (eta * n_dot_i + sqrt(k)) * N;
+   return retval;
  }
  
  vec2 refract(const vec2 I, const vec2 N, const float eta)
  {
-   float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I));
+   float n_dot_i = dot(N, I);
+   float k = 1.0 - eta * eta * (1.0 - n_dot_i * n_dot_i);
+   vec2 retval;
     if (k < 0.0)
-      return 0.0;
-   return eta * I - (eta * dot(N, I) + sqrt(k)) * N;
+      retval = vec2(0.0);
+   else
+      retval = eta * I - (eta * n_dot_i + sqrt(k)) * N;
+   return retval;
  }
  
  vec3 refract(const vec3 I, const vec3 N, const float eta)
  {
-   float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I));
+   float n_dot_i = dot(N, I);
+   float k = 1.0 - eta * eta * (1.0 - n_dot_i * n_dot_i);
+   vec3 retval;
     if (k < 0.0)
-      return 0.0;
-   return eta * I - (eta * dot(N, I) + sqrt(k)) * N;
+      retval = vec3(0.0);
+   else
+      retval = eta * I - (eta * n_dot_i + sqrt(k)) * N;
+   return retval;
  }
  
  vec4 refract(const vec4 I, const vec4 N, const float eta)
  {
-   float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I));
+   float n_dot_i = dot(N, I);
+   float k = 1.0 - eta * eta * (1.0 - n_dot_i * n_dot_i);
+   vec4 retval;
     if (k < 0.0)
-      return 0.0;
-   return eta * I - (eta * dot(N, I) + sqrt(k)) * N;
+      retval = vec4(0.0);
+   else
+      retval = eta * I - (eta * n_dot_i + sqrt(k)) * N;
+   return retval;
  }
  
  
@@ -1328,202 +1351,235 @@ mat4 matrixCompMult (mat4 m, mat4 n) {
  
  //// lessThan
  
-bvec2 lessThan(const vec2 v, const vec2 u)
+bvec2 lessThan(const vec2 u, const vec2 v)
  {
-   __asm vec4_sgt __retVal.xy, u, v;
+   __asm vec4_slt __retVal.xy, u, v;
  }
  
-bvec3 lessThan(const vec3 v, const vec3 u)
+bvec3 lessThan(const vec3 u, const vec3 v)
  {
-   __asm vec4_sgt __retVal.xyz, u, v;
+   __asm vec4_slt __retVal.xyz, u, v;
  }
  
-bvec4 lessThan(const vec4 v, const vec4 u)
+bvec4 lessThan(const vec4 u, const vec4 v)
  {
-   __asm vec4_sgt __retVal, u, v;
+   __asm vec4_slt __retVal, u, v;
  }
  
-bvec2 lessThan(const ivec2 v, const ivec2 u)
+bvec2 lessThan(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_sgt __retVal.xy, u, v;
+   __asm vec4_slt __retVal.xy, u, v;
  }
  
-bvec3 lessThan(const ivec3 v, const ivec3 u)
+bvec3 lessThan(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_sgt __retVal.xyz, u, v;
+   __asm vec4_slt __retVal.xyz, u, v;
  }
  
-bvec4 lessThan(const ivec4 v, const ivec4 u)
+bvec4 lessThan(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_sgt __retVal, u, v;
+   __asm vec4_slt __retVal, u, v;
  }
  
  
  //// lessThanEqual
  
-bvec2 lessThanEqual(const vec2 v, const vec2 u)
+bvec2 lessThanEqual(const vec2 u, const vec2 v)
  {
-   __asm vec4_sge __retVal.xy, u, v;
+   __asm vec4_sle __retVal.xy, u, v;
  }
  
-bvec3 lessThanEqual(const vec3 v, const vec3 u)
+bvec3 lessThanEqual(const vec3 u, const vec3 v)
  {
-   __asm vec4_sge __retVal.xyz, u, v;
+   __asm vec4_sle __retVal.xyz, u, v;
  }
  
-bvec4 lessThanEqual(const vec4 v, const vec4 u)
+bvec4 lessThanEqual(const vec4 u, const vec4 v)
  {
-   __asm vec4_sge __retVal, u, v;
+   __asm vec4_sle __retVal, u, v;
  }
  
-bvec2 lessThanEqual(const ivec2 v, const ivec2 u)
+bvec2 lessThanEqual(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_sge __retVal.xy, u, v;
+   __asm vec4_sle __retVal.xy, u, v;
  }
  
-bvec3 lessThanEqual(const ivec3 v, const ivec3 u)
+bvec3 lessThanEqual(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_sge __retVal.xyz, u, v;
+   __asm vec4_sle __retVal.xyz, u, v;
  }
  
-bvec4 lessThanEqual(const ivec4 v, const ivec4 u)
+bvec4 lessThanEqual(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_sge __retVal, u, v;
+   __asm vec4_sle __retVal, u, v;
  }
  
  
  //// greaterThan
  
-bvec2 greaterThan(const vec2 v, const vec2 u)
+bvec2 greaterThan(const vec2 u, const vec2 v)
  {
-   __asm vec4_sgt __retVal.xy, v, u;
+   __asm vec4_sgt __retVal.xy, u, v;
  }
  
-bvec3 greaterThan(const vec3 v, const vec3 u)
+bvec3 greaterThan(const vec3 u, const vec3 v)
  {
-   __asm vec4_sgt __retVal.xyz, v, u;
+   __asm vec4_sgt __retVal.xyz, u, v;
  }
  
-bvec4 greaterThan(const vec4 v, const vec4 u)
+bvec4 greaterThan(const vec4 u, const vec4 v)
  {
-   __asm vec4_sgt __retVal, v, u;
+   __asm vec4_sgt __retVal, u, v;
  }
  
-bvec2 greaterThan(const ivec2 v, const ivec2 u)
+bvec2 greaterThan(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_sgt __retVal.xy, v, u;
+   __asm vec4_sgt __retVal.xy, u.xy, v.xy;
  }
  
-bvec3 greaterThan(const ivec3 v, const ivec3 u)
+bvec3 greaterThan(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_sgt __retVal.xyz, v, u;
+   __asm vec4_sgt __retVal.xyz, u, v;
  }
  
-bvec4 greaterThan(const ivec4 v, const ivec4 u)
+bvec4 greaterThan(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_sgt __retVal, v, u;
+   __asm vec4_sgt __retVal, u, v;
  }
  
  
  //// greaterThanEqual
  
-bvec2 greaterThanEqual(const vec2 v, const vec2 u)
+bvec2 greaterThanEqual(const vec2 u, const vec2 v)
  {
-   __asm vec4_sge __retVal.xy, v, u;
+   __asm vec4_sge __retVal.xy, u, v;
  }
  
-bvec3 greaterThanEqual(const vec3 v, const vec3 u)
+bvec3 greaterThanEqual(const vec3 u, const vec3 v)
  {
-   __asm vec4_sge __retVal.xyz, v, u;
+   __asm vec4_sge __retVal.xyz, u, v;
  }
  
-bvec4 greaterThanEqual(const vec4 v, const vec4 u)
+bvec4 greaterThanEqual(const vec4 u, const vec4 v)
  {
-   __asm vec4_sge __retVal, v, u;
+   __asm vec4_sge __retVal, u, v;
  }
  
-bvec2 greaterThanEqual(const ivec2 v, const ivec2 u)
+bvec2 greaterThanEqual(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_sge __retVal.xy, v, u;
+   __asm vec4_sge __retVal.xy, u, v;
  }
  
-bvec3 greaterThanEqual(const ivec3 v, const ivec3 u)
+bvec3 greaterThanEqual(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_sge __retVal.xyz, v, u;
+   __asm vec4_sge __retVal.xyz, u, v;
  }
  
-bvec4 greaterThanEqual(const ivec4 v, const ivec4 u)
+bvec4 greaterThanEqual(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_sge __retVal, v, u;
+   __asm vec4_sge __retVal, u, v;
  }
  
  
  //// equal
  
-bvec2 equal(const vec2 v, const vec2 u)
+bvec2 equal(const vec2 u, const vec2 v)
  {
-   __asm vec4_seq __retVal.xy, v, u;
+   __asm vec4_seq __retVal.xy, u, v;
  }
  
-bvec3 equal(const vec3 v, const vec3 u)
+bvec3 equal(const vec3 u, const vec3 v)
  {
-   __asm vec4_seq __retVal.xyz, v, u;
+   __asm vec4_seq __retVal.xyz, u, v;
  }
  
-bvec4 equal(const vec4 v, const vec4 u)
+bvec4 equal(const vec4 u, const vec4 v)
  {
-   __asm vec4_seq __retVal, v, u;
+   __asm vec4_seq __retVal, u, v;
  }
  
-bvec2 equal(const ivec2 v, const ivec2 u)
+bvec2 equal(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_seq __retVal.xy, v, u;
+   __asm vec4_seq __retVal.xy, u, v;
  }
  
-bvec3 equal(const ivec3 v, const ivec3 u)
+bvec3 equal(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_seq __retVal.xyz, v, u;
+   __asm vec4_seq __retVal.xyz, u, v;
  }
  
-bvec4 equal(const ivec4 v, const ivec4 u)
+bvec4 equal(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_seq __retVal, v, u;
+   __asm vec4_seq __retVal, u, v;
  }
  
+bvec2 equal(const bvec2 u, const bvec2 v)
+{
+   __asm vec4_seq __retVal.xy, u, v;
+}
+
+bvec3 equal(const bvec3 u, const bvec3 v)
+{
+   __asm vec4_seq __retVal.xyz, u, v;
+}
+
+bvec4 equal(const bvec4 u, const bvec4 v)
+{
+   __asm vec4_seq __retVal, u, v;
+}
+
+
+
  
  //// notEqual
  
-bvec2 notEqual(const vec2 v, const vec2 u)
+bvec2 notEqual(const vec2 u, const vec2 v)
+{
+   __asm vec4_sne __retVal.xy, u, v;
+}
+
+bvec3 notEqual(const vec3 u, const vec3 v)
+{
+   __asm vec4_sne __retVal.xyz, u, v;
+}
+
+bvec4 notEqual(const vec4 u, const vec4 v)
+{
+   __asm vec4_sne __retVal, u, v;
+}
+
+bvec2 notEqual(const ivec2 u, const ivec2 v)
  {
-   __asm vec4_sne __retVal.xy, v, u;
+   __asm vec4_sne __retVal.xy, u, v;
  }
  
-bvec3 notEqual(const vec3 v, const vec3 u)
+bvec3 notEqual(const ivec3 u, const ivec3 v)
  {
-   __asm vec4_sne __retVal.xyz, v, u;
+   __asm vec4_sne __retVal.xyz, u, v;
  }
  
-bvec4 notEqual(const vec4 v, const vec4 u)
+bvec4 notEqual(const ivec4 u, const ivec4 v)
  {
-   __asm vec4_sne __retVal, v, u;
+   __asm vec4_sne __retVal, u, v;
  }
  
-bvec2 notEqual(const ivec2 v, const ivec2 u)
+bvec2 notEqual(const bvec2 u, const bvec2 v)
  {
-   __asm vec4_sne __retVal.xy, v, u;
+   __asm vec4_sne __retVal.xy, u, v;
  }
  
-bvec3 notEqual(const ivec3 v, const ivec3 u)
+bvec3 notEqual(const bvec3 u, const bvec3 v)
  {
-   __asm vec4_sne __retVal.xyz, v, u;
+   __asm vec4_sne __retVal.xyz, u, v;
  }
  
-bvec4 notEqual(const ivec4 v, const ivec4 u)
+bvec4 notEqual(const bvec4 u, const bvec4 v)
  {
-   __asm vec4_sne __retVal, v, u;
+   __asm vec4_sne __retVal, u, v;
  }
  
  
+
  //// any
  
  bool any(const bvec2 v)
@@ -1553,29 +1609,28 @@ bool any(const bvec4 v)
  
  //// all
  
-bool all (const vec2 v)
+bool all (const bvec2 v)
  {
     float prod;
-   __asm vec4_multiply prod.x, v.x, v.y;
-   __asm vec4_sne __retVal.x, prod.x, 0.0;
-    return v.x && v.y;
+   __asm vec4_multiply prod, v.x, v.y;
+   __asm vec4_sne __retVal, prod, 0.0;
  }
  
  bool all (const bvec3 v)
  {
     float prod;
-   __asm vec4_multiply prod.x, v.x, v.y;
-   __asm vec4_multiply prod.x, prod.x, v.z;
-   __asm vec4_sne __retVal.x, prod.x, 0.0;
+   __asm vec4_multiply prod, v.x, v.y;
+   __asm vec4_multiply prod, prod, v.z;
+   __asm vec4_sne __retVal, prod, 0.0;
  }
  
  bool all (const bvec4 v)
  {
     float prod;
-   __asm vec4_multiply prod.x, v.x, v.y;
-   __asm vec4_multiply prod.x, prod.x, v.z;
-   __asm vec4_multiply prod.x, prod.x, v.w;
-   __asm vec4_sne __retVal.x, prod.x, 0.0;
+   __asm vec4_multiply prod, v.x, v.y;
+   __asm vec4_multiply prod, prod, v.z;
+   __asm vec4_multiply prod, prod, v.w;
+   __asm vec4_sne __retVal, prod, 0.0;
  }
  
  
@@ -1608,11 +1663,8 @@ vec4 texture1D(const sampler1D sampler, const float coord)
  
  vec4 texture1DProj(const sampler1D sampler, const vec2 coord)
  {
-   // new coord with .z moved to .w
-   vec4 coord4;
-   coord4.x = coord.x;
-   coord4.w = coord.y;
-   __asm vec4_texp1d __retVal, sampler, coord4;
+   // need to swizzle .y into .w
+   __asm vec4_texp1d __retVal, sampler, coord.xyyy;
  }
  
  vec4 texture1DProj(const sampler1D sampler, const vec4 coord)
@@ -1628,11 +1680,8 @@ vec4 texture2D(const sampler2D sampler, const vec2 coord)
  
  vec4 texture2DProj(const sampler2D sampler, const vec3 coord)
  {
-   // new coord with .z moved to .w
-   vec4 coord4;
-   coord4.xy = coord.xy;
-   coord4.w = coord.z;
-    __asm vec4_texp2d __retVal, sampler, coord4;
+   // need to swizzle 'z' into 'w'.
+   __asm vec4_texp2d __retVal, sampler, coord.xyzz;
  }
  
  vec4 texture2DProj(const sampler2D sampler, const vec4 coord)
@@ -1659,26 +1708,58 @@ vec4 textureCube(const samplerCube sampler, const vec3 coord)
  
  
  
-vec4 shadow1D (sampler1DShadow sampler, vec3 coord) {
-    vec4 texel;
-    __asm vec4_shad1d texel, sampler, coord, 0.0;
-    return texel;
+vec4 shadow1D(const sampler1DShadow sampler, const vec3 coord)
+{
+   __asm vec4_tex1d __retVal, sampler, coord;
  }
  
-vec4 shadow1DProj (sampler1DShadow sampler, vec4 coord) {
-    return shadow1D (sampler, vec3 (coord.s / coord.q, 0.0, coord.p / coord.q));
+vec4 shadow1DProj(const sampler1DShadow sampler, const vec4 coord)
+{
+   // .s and .p will be divided by .q
+   __asm vec4_texp1d __retVal, sampler, coord;
  }
  
-vec4 shadow2D (sampler2DShadow sampler, vec3 coord) {
-    vec4 texel;
-    __asm vec4_shad2d texel, sampler, coord, 0.0;
-    return texel;
+vec4 shadow2D(const sampler2DShadow sampler, const vec3 coord)
+{
+   __asm vec4_tex2d __retVal, sampler, coord;
  }
  
-vec4 shadow2DProj (sampler2DShadow sampler, vec4 coord) {
-    return shadow2D (sampler, vec3 (coord.s / coord.q, coord.t / coord.q, coord.p / coord.q));
+vec4 shadow2DProj(const sampler2DShadow sampler, const vec4 coord)
+{
+   // .s, .t and .p will be divided by .q
+   __asm vec4_texp2d __retVal, sampler, coord;
+}
+
+
+//// GL_ARB_texture_rectangle:
+vec4 texture2DRect(const sampler2DRect sampler, const vec2 coord)
+{
+   __asm vec4_tex_rect __retVal, sampler, coord;
  }
  
+vec4 texture2DRectProj(const sampler2DRect sampler, const vec3 coord)
+{
+   // need to swizzle .y into .w
+   __asm vec4_texp_rect __retVal, sampler, coord.xyzz;
+}
+
+vec4 texture2DRectProj(const sampler2DRect sampler, const vec4 coord)
+{
+   __asm vec4_texp_rect __retVal, sampler, ccoord;
+}
+
+vec4 shadow2DRect(const sampler2DRectShadow sampler, const vec3 coord)
+{
+   __asm vec4_tex_rect __retVal, sampler, coord;
+}
+
+vec4 shadow2DRectProj(const sampler2DRectShadow sampler, const vec4 coord)
+{
+   __asm vec4_texp_rect __retVal, sampler, coord;
+}
+
+
+
  //
  // 8.9 Noise Functions
  //