From: Brian Paul <brian.paul@tungstengraphics.com>
Date: Tue, 5 Aug 2008 22:18:39 +0000 (-0600)
Subject: mesa: glsl: re-org of intermediate/temp storage
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1308ca6d2168c5c2f81a8e675687e9d9a4db1a28;p=mesa.git

mesa: glsl: re-org of intermediate/temp storage

Simplify the code for allocating storage for intermediate results.  Use fewer
temps in some cases.

Also, use new asm vec4_move intrinsic instead of regular assigments in various
constructors.  For example:
  float f;
  vec3 v;
  v.xyz = f;
is not legal GLSL, so do this instead:
  __asm vec4_move v.xyz, f;  // note: f will auto-expand into f.xxxx

Plus, fix assorted bugs in structure comparison.
---

diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc
index 3726335471f..f41d1fb6037 100644
--- a/src/mesa/shader/slang/library/slang_common_builtin.gc
+++ b/src/mesa/shader/slang/library/slang_common_builtin.gc
@@ -1418,7 +1418,7 @@ bvec4 greaterThan(const vec4 u, const vec4 v)
 
 bvec2 greaterThan(const ivec2 u, const ivec2 v)
 {
-   __asm vec4_sgt __retVal.xy, u, v;
+   __asm vec4_sgt __retVal.xy, u.xy, v.xy;
 }
 
 bvec3 greaterThan(const ivec3 u, const ivec3 v)
diff --git a/src/mesa/shader/slang/library/slang_core.gc b/src/mesa/shader/slang/library/slang_core.gc
index 218383e001d..840a0814c5d 100644
--- a/src/mesa/shader/slang/library/slang_core.gc
+++ b/src/mesa/shader/slang/library/slang_core.gc
@@ -107,7 +107,7 @@
 
 int __constructor(const float f)
 {
-   __asm float_to_int __retVal, f;
+   __asm vec4_to_ivec4 __retVal, f;
 }
 
 int __constructor(const bool b)
@@ -122,14 +122,12 @@ int __constructor(const int i)
 
 bool __constructor(const int i)
 {
-   const float zero = 0.0;
-   __asm vec4_sne __retVal, i, zero;
+   __asm vec4_sne __retVal, i, 0.0;
 }
 
 bool __constructor(const float f)
 {
-   const float zero = 0.0;
-   __asm vec4_sne __retVal, f, zero;
+   __asm vec4_sne __retVal, f, 0.0;
 }
 
 bool __constructor(const bool b)
@@ -139,12 +137,12 @@ bool __constructor(const bool b)
 
 float __constructor(const int i)
 {
-    __asm int_to_float __retVal, i;
+    __asm ivec4_to_vec4 __retVal, i;
 }
 
 float __constructor(const bool b)
 {
-   __retVal = b;
+    __asm ivec4_to_vec4 __retVal, b;
 }
 
 float __constructor(const float f)
@@ -163,32 +161,33 @@ vec2 __constructor(const float x, const float y)
 
 vec2 __constructor(const float f)
 {
-   __retVal.xy = f.xx;
+   __asm vec4_move __retVal.xy, f;
 }
 
 vec2 __constructor(const int i)
 {
-   __retVal.xy = i.xx;
+   __asm ivec4_to_vec4 __retVal.xy, i;
 }
 
 vec2 __constructor(const bool b)
 {
-   __retVal.xy = b.xx;
+   __asm ivec4_to_vec4 __retVal.xy, b;
 }
 
 vec2 __constructor(const bvec2 b)
 {
-   __retVal = b;
+//   __retVal = b;
+   __asm ivec4_to_vec4 __retVal.xy, b;
 }
 
 vec2 __constructor(const vec3 v)
 {
-   __retVal.xy = v.xy;
+   __asm vec4_move __retVal.xy, v.xy;
 }
 
 vec2 __constructor(const vec4 v)
 {
-   __retVal.st = v.xy;
+   __asm vec4_move __retVal.xy, v.xy;
 }
 
 
@@ -203,27 +202,28 @@ vec3 __constructor(const float x, const float y, const float z)
 
 vec3 __constructor(const float f)
 {
-   __retVal.xyz = f.xxx;
+   // Note: this could be "__retVal.xyz = f" but that's an illegal assignment
+   __asm vec4_move __retVal.xyz, f;
 }
 
 vec3 __constructor(const int i)
 {
-   __asm int_to_float __retVal.xyz, i.xxx;
+   __asm ivec4_to_vec4 __retVal.xyz, i;
 }
 
 vec3 __constructor(const bool b)
 {
-   __retVal.xyz = b.xxx;
+   __asm ivec4_to_vec4 __retVal.xyz, b;
 }
 
 vec3 __constructor(const bvec3 b)
 {
-   __retVal = b;
+   __asm ivec4_to_vec4 __retVal.xyz, b;
 }
 
 vec3 __constructor(const vec4 v)
 {
-   __retVal.xyz = v.xyz;
+   __asm vec4_move __retVal.xyz, v;
 }
 
 
@@ -239,27 +239,28 @@ vec4 __constructor(const float x, const float y, const float z, const float w)
 
 vec4 __constructor(const float f)
 {
-   __retVal = f.xxxx;
+   // Note: this could be "__retVal = f" but that's an illegal assignment
+   __asm vec4_move __retVal, f;
 }
 
 vec4 __constructor(const int i)
 {
-   __retVal = i.xxxx;
+   __asm ivec4_to_vec4 __retVal, i;
 }
 
 vec4 __constructor(const bool b)
 {
-   __retVal = b.xxxx;
+   __asm ivec4_to_vec4 __retVal, b;
 }
 
 vec4 __constructor(const bvec4 b)
 {
-   __retVal = b;
+   __asm ivec4_to_vec4 __retVal, b;
 }
 
 vec4 __constructor(const ivec4 i)
 {
-   __retVal = i;
+   __asm ivec4_to_vec4 __retVal, i;
 }
 
 vec4 __constructor(const vec3 v3, const float f)
@@ -288,17 +289,17 @@ ivec2 __constructor(const int i, const int j)
 
 ivec2 __constructor(const int i)
 {
-   __retVal.xy = i.xx;
+   __asm vec4_move __retVal.xy, i;
 }
 
 ivec2 __constructor(const float f)
 {
-   __asm float_to_int __retVal.xy, f.xx;
+   __asm vec4_to_ivec4 __retVal.xy, f;
 }
 
 ivec2 __constructor(const bool b)
 {
-   __asm float_to_int __retVal.xy, b.xx;
+   __asm vec4_to_ivec4 __retVal.xy, b;
 }
 
 
@@ -313,17 +314,17 @@ ivec3 __constructor(const int i, const int j, const int k)
 
 ivec3 __constructor(const int i)
 {
-   __retVal.xyz = i.xxx;
+   __asm vec4_move __retVal.xyz, i;
 }
 
 ivec3 __constructor(const float f)
 {
-   __retVal.xyz = f.xxx;
+   __asm vec4_to_ivec4 __retVal.xyz, f;
 }
 
 ivec3 __constructor(const bool b)
 {
-   __retVal.xyz = b.xxx;
+   __asm vec4_move __retVal.xyz, b;
 }
 
 
@@ -339,17 +340,17 @@ ivec4 __constructor(const int x, const int y, const int z, const int w)
 
 ivec4 __constructor(const int i)
 {
-   __retVal = i.xxxx;
+   __asm vec4_move __retVal, i;
 }
 
 ivec4 __constructor(const float f)
 {
-   __asm float_to_int __retVal, f.xxxx;
+   __asm vec4_to_ivec4 __retVal, f;
 }
 
 ivec4 __constructor(const bool b)
 {
-   __retVal = b.xxxx;
+   __asm vec4_to_ivec4 __retVal, b;
 }
 
 
@@ -363,19 +364,17 @@ bvec2 __constructor(const bool b1, const bool b2)
 
 bvec2 __constructor(const bool b)
 {
-   __retVal.xy = b.xx;
+   __asm vec4_move __retVal.xy, b;
 }
 
 bvec2 __constructor(const float f)
 {
-   const vec2 zero = vec2(0.0, 0.0);
-   __asm vec4_sne __retVal.xy, f.xx, zero;
+   __asm vec4_sne __retVal.xy, f, 0.0;
 }
 
 bvec2 __constructor(const int i)
 {
-   const ivec2 zero = ivec2(0, 0);
-   __asm vec4_sne __retVal.xy, i.xx, zero;
+   __asm vec4_sne __retVal.xy, i, 0.0;
 }
 
 bvec2 __constructor(const vec2 v)
@@ -385,8 +384,7 @@ bvec2 __constructor(const vec2 v)
 
 bvec2 __constructor(const ivec2 v)
 {
-   const ivec2 zero = ivec2(0, 0);
-   __asm vec4_sne __retVal.xy, v, zero;
+   __asm vec4_sne __retVal.xy, v, 0.0;
 }
 
 
@@ -402,31 +400,27 @@ bvec3 __constructor(const bool b1, const bool b2, const bool b3)
 
 bvec3 __constructor(const bool b)
 {
-   __retVal.xyz = b.xxx;
+   __asm vec4_move __retVal.xyz, b;
 }
 
 bvec3 __constructor(const float f)
 {
-   const vec3 zero = vec3(0.0, 0.0, 0.0);
-   __asm vec4_sne __retVal.xyz, f.xxx, zero;
+   __asm vec4_sne __retVal.xyz, f, 0.0;
 }
 
 bvec3 __constructor(const int i)
 {
-   const ivec3 zero = ivec3(0, 0, 0);
-   __asm vec4_sne __retVal.xyz, i.xxx, zero;
+   __asm vec4_sne __retVal.xyz, i, 0.0;
 }
 
 bvec3 __constructor(const vec3 v)
 {
-   const vec3 zero = vec3(0.0, 0.0, 0.0);
-   __asm vec4_sne __retVal.xyz, v, zero;
+   __asm vec4_sne __retVal.xyz, v, 0.0;
 }
 
 bvec3 __constructor(const ivec3 v)
 {
-   const ivec3 zero = ivec3(0, 0, 0);
-   __asm vec4_sne __retVal.xyz, v, zero;
+   __asm vec4_sne __retVal.xyz, v, 0.0;
 }
 
 
@@ -452,31 +446,27 @@ bvec4 __constructor(const float f1, const float f2, const float f3, const float
 
 bvec4 __constructor(const bool b)
 {
-   __retVal.xyzw = b.xxxx;
+   __asm vec4_move __retVal.xyzw, b;
 }
 
 bvec4 __constructor(const float f)
 {
-   const vec4 zero = vec4(0.0, 0.0, 0.0, 0.0);
-   __asm vec4_sne __retVal, f.xxxx, zero;
+   __asm vec4_sne __retVal.xyzw, f, 0.0;
 }
 
 bvec4 __constructor(const int i)
 {
-   const ivec4 zero = ivec4(0, 0, 0, 0);
-   __asm vec4_sne __retVal, i.xxxx, zero;
+   __asm vec4_sne __retVal.xyzw, i, 0.0;
 }
 
 bvec4 __constructor(const vec4 v)
 {
-   const vec4 zero = vec4(0.0, 0.0, 0.0, 0.0);
-   __asm vec4_sne __retVal, v, zero;
+   __asm vec4_sne __retVal.xyzw, v, 0.0;
 }
 
 bvec4 __constructor(const ivec4 v)
 {
-   const ivec4 zero = ivec4(0, 0, 0, 0);
-   __asm vec4_sne __retVal, v, zero;
+   __asm vec4_sne __retVal.xyzw, v, 0.0;
 }
 
 
@@ -619,30 +609,17 @@ mat4 __constructor(const vec4 c0, const vec4 c1, const vec4 c2, const vec4 c3)
 
 int __operator + (const int a, const int b)
 {
-// XXX If we ever have int registers, we'll do something like this:
-// XXX For now, mostly treat ints as floats.
-//    float x, y;
-//    __asm int_to_float x, a;
-//    __asm int_to_float y, b;
-//    __asm vec4_add x.x, x.x, y.x;
-//    __asm float_to_int __retVal, x;
-   float x;
-   __asm vec4_add x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_add __retVal, a, b;
 }
 
 int __operator - (const int a, const int b)
 {
-   float x;
-   __asm vec4_subtract x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_subtract __retVal, a, b;
 }
 
 int __operator * (const int a, const int b)
 {
-   float x;
-   __asm vec4_multiply x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_multiply __retVal, a, b;
 }
 
 int __operator / (const int a, const int b)
@@ -650,7 +627,7 @@ int __operator / (const int a, const int b)
    float bInv, x;
    __asm float_rcp bInv, b;
    __asm vec4_multiply x, a, bInv;
-   __asm float_to_int __retVal, x;
+   __asm vec4_to_ivec4 __retVal, x;
 }
 
 
@@ -658,23 +635,17 @@ int __operator / (const int a, const int b)
 
 ivec2 __operator + (const ivec2 a, const ivec2 b)
 {
-   vec2 x;
-   __asm vec4_add x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_add __retVal, a, b;
 }
 
 ivec2 __operator - (const ivec2 a, const ivec2 b)
 {
-   vec2 x;
-   __asm vec4_subtract x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_subtract __retVal, a, b;
 }
 
 ivec2 __operator * (const ivec2 a, const ivec2 b)
 {
-   vec2 x;
-   __asm vec4_multiply x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_multiply __retVal, a, b;
 }
 
 ivec2 __operator / (const ivec2 a, const ivec2 b)
@@ -683,7 +654,7 @@ ivec2 __operator / (const ivec2 a, const ivec2 b)
    __asm float_rcp bInv.x, b.x;
    __asm float_rcp bInv.y, b.y;
    __asm vec4_multiply x, a, bInv;
-   __asm float_to_int __retVal, x;
+   __asm vec4_to_ivec4 __retVal, x;
 }
 
 
@@ -691,23 +662,17 @@ ivec2 __operator / (const ivec2 a, const ivec2 b)
 
 ivec3 __operator + (const ivec3 a, const ivec3 b)
 {
-   vec3 x;
-   __asm vec4_add x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_add __retVal, a, b;
 }
 
 ivec3 __operator - (const ivec3 a, const ivec3 b)
 {
-   vec3 x;
-   __asm vec4_subtract x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_subtract __retVal, a, b;
 }
 
 ivec3 __operator * (const ivec3 a, const ivec3 b)
 {
-   vec3 x;
-   __asm vec4_multiply x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_multiply __retVal, a, b;
 }
 
 ivec3 __operator / (const ivec3 a, const ivec3 b)
@@ -717,7 +682,7 @@ ivec3 __operator / (const ivec3 a, const ivec3 b)
    __asm float_rcp bInv.y, b.y;
    __asm float_rcp bInv.z, b.z;
    __asm vec4_multiply x, a, bInv;
-   __asm float_to_int __retVal, x;
+   __asm vec4_to_ivec4 __retVal, x;
 }
 
 
@@ -725,23 +690,17 @@ ivec3 __operator / (const ivec3 a, const ivec3 b)
 
 ivec4 __operator + (const ivec4 a, const ivec4 b)
 {
-   vec3 x;
-   __asm vec4_add x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_add __retVal, a, b;
 }
 
 ivec4 __operator - (const ivec4 a, const ivec4 b)
 {
-   vec4 x;
-   __asm vec4_subtract x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_subtract __retVal, a, b;
 }
 
 ivec4 __operator * (const ivec4 a, const ivec4 b)
 {
-   vec4 x;
-   __asm vec4_multiply x, a, b;
-   __asm float_to_int __retVal, x;
+   __asm vec4_multiply __retVal, a, b;
 }
 
 ivec4 __operator / (const ivec4 a, const ivec4 b)
@@ -752,7 +711,7 @@ ivec4 __operator / (const ivec4 a, const ivec4 b)
    __asm float_rcp bInv.z, b.z;
    __asm float_rcp bInv.w, b.w;
    __asm vec4_multiply x, a, bInv;
-   __asm float_to_int __retVal, x;
+   __asm vec4_to_ivec4 __retVal, x;
 }
 
 
@@ -760,24 +719,24 @@ ivec4 __operator / (const ivec4 a, const ivec4 b)
 
 float __operator + (const float a, const float b)
 {
-   __asm vec4_add __retVal.x, a, b;
+   __asm vec4_add __retVal, a, b;
 }
 
 float __operator - (const float a, const float b)
 {
-   __asm vec4_subtract __retVal.x, a, b;
+   __asm vec4_subtract __retVal, a, b;
 }
 
 float __operator * (const float a, const float b)
 {
-    __asm vec4_multiply __retVal.x, a, b;
+    __asm vec4_multiply __retVal, a, b;
 }
 
 float __operator / (const float a, const float b)
 {
    float bInv;
-   __asm float_rcp bInv.x, b.x;
-   __asm vec4_multiply __retVal.x, a, bInv;
+   __asm float_rcp bInv.x, b;
+   __asm vec4_multiply __retVal, a, bInv;
 }
 
 
@@ -868,32 +827,32 @@ vec4 __operator / (const vec4 v, const vec4 u)
 
 vec2 __operator + (const float a, const vec2 u)
 {
-   __asm vec4_add __retVal.xy, a.xx, u.xy;
+   __asm vec4_add __retVal.xy, a, u.xy;
 }
 
 vec2 __operator + (const vec2 v, const float b)
 {
-   __asm vec4_add __retVal.xy, v.xy, b.xx;
+   __asm vec4_add __retVal.xy, v.xy, b;
 }
 
 vec2 __operator - (const float a, const vec2 u)
 {
-   __asm vec4_subtract __retVal.xy, a.xx, u.xy;
+   __asm vec4_subtract __retVal.xy, a, u.xy;
 }
 
 vec2 __operator - (const vec2 v, const float b)
 {
-   __asm vec4_subtract __retVal.xy, v.xy, b.xx;
+   __asm vec4_subtract __retVal.xy, v.xy, b;
 }
 
 vec2 __operator * (const float a, const vec2 u)
 {
-   __asm vec4_multiply __retVal.xy, a.xx, u.xy;
+   __asm vec4_multiply __retVal.xy, a, u.xy;
 }
 
 vec2 __operator * (const vec2 v, const float b)
 {
-   __asm vec4_multiply __retVal.xy, v.xy, b.xx;
+   __asm vec4_multiply __retVal.xy, v.xy, b;
 }
 
 vec2 __operator / (const float a, const vec2 u)
@@ -901,14 +860,14 @@ vec2 __operator / (const float a, const vec2 u)
    vec2 invU;
    __asm float_rcp invU.x, u.x;
    __asm float_rcp invU.y, u.y;
-   __asm vec4_multiply __retVal.xy, a.xx, invU.xy;
+   __asm vec4_multiply __retVal.xy, a, invU.xy;
 }
 
 vec2 __operator / (const vec2 v, const float b)
 {
    float invB;
    __asm float_rcp invB, b;
-   __asm vec4_multiply __retVal.xy, v.xy, invB.xx;
+   __asm vec4_multiply __retVal.xy, v.xy, invB;
 }
 
 
@@ -916,32 +875,32 @@ vec2 __operator / (const vec2 v, const float b)
 
 vec3 __operator + (const float a, const vec3 u)
 {
-   __asm vec4_add __retVal.xyz, a.xxx, u.xyz;
+   __asm vec4_add __retVal.xyz, a, u.xyz;
 }
 
 vec3 __operator + (const vec3 v, const float b)
 {
-   __asm vec4_add __retVal.xyz, v.xyz, b.xxx;
+   __asm vec4_add __retVal.xyz, v.xyz, b;
 }
 
 vec3 __operator - (const float a, const vec3 u)
 {
-   __asm vec4_subtract __retVal.xyz, a.xxx, u.xyz;
+   __asm vec4_subtract __retVal.xyz, a, u.xyz;
 }
 
 vec3 __operator - (const vec3 v, const float b)
 {
-   __asm vec4_subtract __retVal.xyz, v.xyz, b.xxx;
+   __asm vec4_subtract __retVal.xyz, v.xyz, b;
 }
 
 vec3 __operator * (const float a, const vec3 u)
 {
-   __asm vec4_multiply __retVal.xyz, a.xxx, u.xyz;
+   __asm vec4_multiply __retVal.xyz, a, u.xyz;
 }
 
 vec3 __operator * (const vec3 v, const float b)
 {
-   __asm vec4_multiply __retVal.xyz, v.xyz, b.xxx;
+   __asm vec4_multiply __retVal.xyz, v.xyz, b;
 }
 
 vec3 __operator / (const float a, const vec3 u)
@@ -950,14 +909,14 @@ vec3 __operator / (const float a, const vec3 u)
    __asm float_rcp invU.x, u.x;
    __asm float_rcp invU.y, u.y;
    __asm float_rcp invU.z, u.z;
-   __asm vec4_multiply __retVal.xyz, a.xxx, invU.xyz;
+   __asm vec4_multiply __retVal.xyz, a, invU.xyz;
 }
 
 vec3 __operator / (const vec3 v, const float b)
 {
    float invB;
    __asm float_rcp invB, b;
-   __asm vec4_multiply __retVal.xyz, v.xyz, invB.xxx;
+   __asm vec4_multiply __retVal.xyz, v.xyz, invB;
 }
 
 
@@ -965,32 +924,32 @@ vec3 __operator / (const vec3 v, const float b)
 
 vec4 __operator + (const float a, const vec4 u)
 {
-   __asm vec4_add __retVal, a.xxxx, u;
+   __asm vec4_add __retVal, a, u;
 }
 
 vec4 __operator + (const vec4 v, const float b)
 {
-   __asm vec4_add __retVal, v, b.xxxx;
+   __asm vec4_add __retVal, v, b;
 }
 
 vec4 __operator - (const float a, const vec4 u)
 {
-   __asm vec4_subtract __retVal, a.xxxx, u;
+   __asm vec4_subtract __retVal, a, u;
 }
 
 vec4 __operator - (const vec4 v, const float b)
 {
-   __asm vec4_subtract __retVal, v, b.xxxx;
+   __asm vec4_subtract __retVal, v, b;
 }
 
 vec4 __operator * (const float a, const vec4 u)
 {
-   __asm vec4_multiply __retVal, a.xxxx, u;
+   __asm vec4_multiply __retVal, a, u;
 }
 
 vec4 __operator * (const vec4 v, const float b)
 {
-   __asm vec4_multiply __retVal, v, b.xxxx;
+   __asm vec4_multiply __retVal, v, b;
 }
 
 vec4 __operator / (const float a, const vec4 u)
@@ -1000,14 +959,14 @@ vec4 __operator / (const float a, const vec4 u)
    __asm float_rcp invU.y, u.y;
    __asm float_rcp invU.z, u.z;
    __asm float_rcp invU.w, u.w;
-   __asm vec4_multiply __retVal, a.xxxx, invU;
+   __asm vec4_multiply __retVal, a, invU;
 }
 
 vec4 __operator / (const vec4 v, const float b)
 {
    float invB;
    __asm float_rcp invB, b;
-   __asm vec4_multiply __retVal, v, invB.xxxx;
+   __asm vec4_multiply __retVal, v, invB;
 }
 
 
@@ -1254,7 +1213,7 @@ void __operator /= (inout int a, const int b)
    float invB;
    __asm float_rcp invB, b;
    __asm vec4_multiply a, a, invB;
-   __asm float_to_int a, a;
+   __asm vec4_to_ivec4 a, a;
 }
 
 
@@ -1281,7 +1240,7 @@ void __operator /= (inout ivec2 v, const ivec2 u)
    __asm float_rcp inv.x, u.x;
    __asm float_rcp inv.y, u.y;
    __asm vec4_multiply z, v, inv;
-   __asm float_to_int v, z;
+   __asm vec4_to_ivec4 v, z;
 }
 
 
@@ -1308,7 +1267,7 @@ void __operator /= (inout ivec3 v, const ivec3 u)
    __asm float_rcp inv.x, u.x;
    __asm float_rcp inv.y, u.y;
    __asm vec4_multiply z, v, inv;
-   __asm float_to_int v, z;
+   __asm vec4_to_ivec4 v, z;
 }
 
 
@@ -1335,7 +1294,7 @@ void __operator /= (inout ivec4 v, const ivec4 u)
    __asm float_rcp inv.x, u.x;
    __asm float_rcp inv.y, u.y;
    __asm vec4_multiply z, v, inv;
-   __asm float_to_int v, z;
+   __asm vec4_to_ivec4 v, z;
 }
 
 
@@ -1450,17 +1409,17 @@ void __operator /= (inout vec4 v, const vec4 u)
 
 void __operator += (inout ivec2 v, const int a)
 {
-   __asm vec4_add v.xy, v.xy, a.xx;
+   __asm vec4_add v.xy, v.xy, a;
 }
 
 void __operator -= (inout ivec2 v, const int a)
 {
-   __asm vec4_subtract v.xy, v.xy, a.xx;
+   __asm vec4_subtract v.xy, v.xy, a;
 }
 
 void __operator *= (inout ivec2 v, const int a)
 {
-   __asm vec4_multiply v.xy, v.xy, a.xx;
+   __asm vec4_multiply v.xy, v.xy, a;
    v.x *= a;
    v.y *= a;
 }
@@ -1477,17 +1436,17 @@ void __operator /= (inout ivec2 v, const int a)
 
 void __operator += (inout ivec3 v, const int a)
 {
-   __asm vec4_add v.xyz, v.xyz, a.xxx;
+   __asm vec4_add v.xyz, v.xyz, a;
 }
 
 void __operator -= (inout ivec3 v, const int a)
 {
-   __asm vec4_subtract v.xyz, v.xyz, a.xxx;
+   __asm vec4_subtract v.xyz, v.xyz, a;
 }
 
 void __operator *= (inout ivec3 v, const int a)
 {
-   __asm vec4_multiply v.xyz, v.xyz, a.xxx;
+   __asm vec4_multiply v.xyz, v.xyz, a;
 }
 
 void __operator /= (inout ivec3 v, const int a)
@@ -1503,17 +1462,17 @@ void __operator /= (inout ivec3 v, const int a)
 
 void __operator += (inout ivec4 v, const int a)
 {
-   __asm vec4_add v, v, a.xxxx;
+   __asm vec4_add v, v, a;
 }
 
 void __operator -= (inout ivec4 v, const int a)
 {
-   __asm vec4_subtract v, v, a.xxxx;
+   __asm vec4_subtract v, v, a;
 }
 
 void __operator *= (inout ivec4 v, const int a)
 {
-   __asm vec4_multiply v, v, a.xxxx;
+   __asm vec4_multiply v, v, a;
 }
 
 void __operator /= (inout ivec4 v, const int a)
@@ -1530,24 +1489,24 @@ void __operator /= (inout ivec4 v, const int a)
 
 void __operator += (inout vec2 v, const float a)
 {
-   __asm vec4_add v.xy, v, a.xx;
+   __asm vec4_add v.xy, v, a;
 }
 
 void __operator -= (inout vec2 v, const float a)
 {
-   __asm vec4_subtract v.xy, v, a.xx;
+   __asm vec4_subtract v.xy, v, a;
 }
 
 void __operator *= (inout vec2 v, const float a)
 {
-   __asm vec4_multiply v.xy, v, a.xx;
+   __asm vec4_multiply v.xy, v, a;
 }
 
 void __operator /= (inout vec2 v, const float a)
 {
    float invA;
    __asm float_rcp invA, a;
-   __asm vec4_multiply v.xy, v.xy, invA.xx;
+   __asm vec4_multiply v.xy, v.xy, invA;
 }
 
 
@@ -1555,24 +1514,24 @@ void __operator /= (inout vec2 v, const float a)
 
 void __operator += (inout vec3 v, const float a)
 {
-   __asm vec4_add v.xyz, v, a.xxx;
+   __asm vec4_add v.xyz, v, a;
 }
 
 void __operator -= (inout vec3 v, const float a)
 {
-   __asm vec4_subtract v.xyz, v, a.xxx;
+   __asm vec4_subtract v.xyz, v, a;
 }
 
 void __operator *= (inout vec3 v, const float a)
 {
-   __asm vec4_multiply v.xyz, v, a.xxx;
+   __asm vec4_multiply v.xyz, v, a;
 }
 
 void __operator /= (inout vec3 v, const float a)
 {
    float invA;
    __asm float_rcp invA, a;
-   __asm vec4_multiply v.xyz, v.xyz, invA.xxx;
+   __asm vec4_multiply v.xyz, v.xyz, invA;
 }
 
 
@@ -1580,24 +1539,24 @@ void __operator /= (inout vec3 v, const float a)
 
 void __operator += (inout vec4 v, const float a)
 {
-   __asm vec4_add v, v, a.xxxx;
+   __asm vec4_add v, v, a;
 }
 
 void __operator -= (inout vec4 v, const float a)
 {
-   __asm vec4_subtract v, v, a.xxxx;
+   __asm vec4_subtract v, v, a;
 }
 
 void __operator *= (inout vec4 v, const float a)
 {
-   __asm vec4_multiply v, v, a.xxxx;
+   __asm vec4_multiply v, v, a;
 }
 
 void __operator /= (inout vec4 v, const float a)
 {
    float invA;
    __asm float_rcp invA, a;
-   __asm vec4_multiply v, v, invA.xxxx;
+   __asm vec4_multiply v, v, invA;
 }
 
 
diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c
index 363c0a10c2b..582b47c2392 100644
--- a/src/mesa/shader/slang/slang_codegen.c
+++ b/src/mesa/shader/slang/slang_codegen.c
@@ -425,6 +425,7 @@ static slang_asm_info AsmInfo[] = {
    { "vec4_sle", IR_SLE, 1, 2 },
    { "vec4_slt", IR_SLT, 1, 2 },
    /* vec4 unary */
+   { "vec4_move", IR_MOVE, 1, 1 },
    { "vec4_floor", IR_FLOOR, 1, 1 },
    { "vec4_frac", IR_FRAC, 1, 1 },
    { "vec4_abs", IR_ABS, 1, 1 },
@@ -448,8 +449,8 @@ static slang_asm_info AsmInfo[] = {
    { "vec4_texp_rect", IR_TEX, 1, 2 },/* rectangle w/ projection */
 
    /* unary op */
-   { "int_to_float", IR_I_TO_F, 1, 1 },
-   { "float_to_int", IR_F_TO_I, 1, 1 },
+   { "ivec4_to_vec4", IR_I_TO_F, 1, 1 }, /* int[4] to float[4] */
+   { "vec4_to_ivec4", IR_F_TO_I, 1, 1 },  /* float[4] to int[4] */
    { "float_exp", IR_EXP, 1, 1 },
    { "float_exp2", IR_EXP2, 1, 1 },
    { "float_log2", IR_LOG2, 1, 1 },
@@ -1674,11 +1675,9 @@ _slang_gen_asm(slang_assemble_ctx *A, slang_operation *oper,
          return NULL;
 
       assert(!n->Store);
-      n->Store = get_store(n0);
-      n->Writemask = writemask;
+      n->Store = n0->Store;
 
-      assert(n->Store->File != PROGRAM_UNDEFINED ||
-             n->Store->Parent);
+      assert(n->Store->File != PROGRAM_UNDEFINED || n->Store->Parent);
 
       _slang_free(n0);
    }
@@ -1957,6 +1956,7 @@ _slang_gen_function_call_name(slang_assemble_ctx *A, const char *name,
    slang_atom atom;
    slang_function *fun;
    GLboolean error;
+   slang_ir_node *n;
 
    atom = slang_atom_pool_atom(A->atoms, name);
    if (atom == SLANG_ATOM_NULL)
@@ -2008,7 +2008,17 @@ _slang_gen_function_call_name(slang_assemble_ctx *A, const char *name,
       assert(fun);
    }
 
-   return _slang_gen_function_call(A, fun, oper, dest);
+   n = _slang_gen_function_call(A, fun, oper, dest);
+
+   if (n && !n->Store && !dest
+       && fun->header.type.specifier.type != SLANG_SPEC_VOID) {
+      /* setup n->Store for the result of the function call */
+      GLint size = _slang_sizeof_type_specifier(&fun->header.type.specifier);
+      n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, size);
+      /*printf("Alloc storage for function result, size %d \n", size);*/
+   }
+
+   return n;
 }
 
 
@@ -2524,13 +2534,13 @@ _slang_gen_select(slang_assemble_ctx *A, slang_operation *oper)
    tmpVar = new_node0(IR_VAR);
    tmpVar->Store = tmpDecl->Store;
    trueExpr = _slang_gen_operation(A, &oper->children[1]);
-   trueNode = new_node2(IR_MOVE, tmpVar, trueExpr);
+   trueNode = new_node2(IR_COPY, tmpVar, trueExpr);
 
    /* if-false body (child 2) */
    tmpVar = new_node0(IR_VAR);
    tmpVar->Store = tmpDecl->Store;
    falseExpr = _slang_gen_operation(A, &oper->children[2]);
-   falseNode = new_node2(IR_MOVE, tmpVar, falseExpr);
+   falseNode = new_node2(IR_COPY, tmpVar, falseExpr);
 
    ifNode = new_if(cond, trueNode, falseNode);
 
@@ -2711,7 +2721,7 @@ _slang_gen_declaration(slang_assemble_ctx *A, slang_operation *oper)
    assert(oper->num_children == 0 || oper->num_children == 1);
 
    v = _slang_locate_variable(oper->locals, oper->a_id, GL_TRUE);
-   /*printf("Declare %s at %p\n", varName, v);*/
+   /*printf("Declare %s at %p\n", varName, (void *) v);*/
    assert(v);
 
 #if 0
@@ -2736,7 +2746,8 @@ _slang_gen_declaration(slang_assemble_ctx *A, slang_operation *oper)
       rhs = _slang_gen_operation(A, &oper->children[0]);
       if (!rhs)
          return NULL;  /* must have found an error */
-      init = new_node2(IR_MOVE, var, rhs);
+      init = new_node2(IR_COPY, var, rhs);
+
       /*assert(rhs->Opcode != IR_SEQ);*/
       n = new_seq(varDecl, init);
    }
@@ -2775,12 +2786,14 @@ _slang_gen_declaration(slang_assemble_ctx *A, slang_operation *oper)
       if (!rhs)
          return NULL;
 
+      /*assert(rhs->Store);*/
+
       if (rhs->Store && var->Store->Size != rhs->Store->Size) {
          slang_info_log_error(A->log, "invalid assignment (wrong types)");
          return NULL;
       }
 
-      init = new_node2(IR_MOVE, var, rhs);
+      init = new_node2(IR_COPY, var, rhs);
       n = new_seq(varDecl, init);
    }
    else {
@@ -2851,12 +2864,14 @@ _slang_assignment_compatible(const slang_typeinfo *t0,
                              const slang_typeinfo *t1)
 
 {
-#if 0
+#if 1
    GLuint sz0 = _slang_sizeof_type_specifier(&t0->spec);
    GLuint sz1 = _slang_sizeof_type_specifier(&t1->spec);
 
-   if (sz0 != sz1)
+   if (sz0 != sz1) {
+      printf("size mismatch %u vs %u\n", sz0, sz1);
       return GL_FALSE;
+   }
 #endif
 
    if (t0->spec.type == SLANG_SPEC_STRUCT &&
@@ -2974,7 +2989,7 @@ _slang_gen_assignment(slang_assemble_ctx * A, slang_operation *oper)
              */
             rhs = _slang_gen_swizzle(rhs, newSwizzle);
          }
-         n = new_node2(IR_MOVE, lhs, rhs);
+         n = new_node2(IR_COPY, lhs, rhs);
          n->Writemask = writemask;
          return n;
       }
@@ -3208,6 +3223,7 @@ _slang_gen_compare(slang_assemble_ctx *A, slang_operation *oper,
                    slang_ir_opcode opcode)
 {
    slang_typeinfo t0, t1;
+   slang_ir_node *n;
    
    slang_typeinfo_construct(&t0);
    _slang_typeof_operation(A, &oper->children[0], &t0);
@@ -3221,9 +3237,14 @@ _slang_gen_compare(slang_assemble_ctx *A, slang_operation *oper,
       return NULL;
    }
 
-   return new_node2(opcode,
-                    _slang_gen_operation(A, &oper->children[0]),
-                    _slang_gen_operation(A, &oper->children[1]));
+   n =  new_node2(opcode,
+                  _slang_gen_operation(A, &oper->children[0]),
+                  _slang_gen_operation(A, &oper->children[1]));
+
+   /* result is a bool (size 1) */
+   n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
+
+   return n;
 }
 
 
@@ -3726,7 +3747,7 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var,
       if (var->initializer) {
          slang_ir_node *lhs, *rhs, *init;
 
-         /* Generate IR_MOVE instruction to initialize the variable */
+         /* Generate IR_COPY instruction to initialize the variable */
          lhs = new_node0(IR_VAR);
          lhs->Var = var;
          lhs->Store = n->Store;
@@ -3736,7 +3757,7 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var,
 
          rhs = _slang_gen_operation(A, var->initializer);
          assert(rhs);
-         init = new_node2(IR_MOVE, lhs, rhs);
+         init = new_node2(IR_COPY, lhs, rhs);
          n = new_seq(n, init);
       }
 
diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c
index b1ebad7f177..b902da84570 100644
--- a/src/mesa/shader/slang/slang_emit.c
+++ b/src/mesa/shader/slang/slang_emit.c
@@ -127,22 +127,33 @@ _slang_swizzle_swizzle(GLuint swz1, GLuint swz2)
 
 
 /**
- * Allocate temporary storage for an intermediate result (such as for
- * a multiply or add, etc.
+ * Allocate storage for the given node (if it hasn't already been allocated).
+ *
+ * Typically this is temporary storage for an intermediate result (such as
+ * for a multiply or add, etc).
+ *
+ * If n->Store does not exist it will be created and will be of the size
+ * specified by defaultSize.
  */
 static GLboolean
-alloc_temp_storage(slang_emit_info *emitInfo, slang_ir_node *n, GLint size)
+alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n,
+                   GLint defaultSize)
 {
    assert(!n->Var);
-   assert(!n->Store);
-   assert(size > 0);
-   n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, size);
-   if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
-      slang_info_log_error(emitInfo->log,
-                           "Ran out of registers, too many temporaries");
-      _slang_free(n->Store);
-      n->Store = NULL;
-      return GL_FALSE;
+   if (!n->Store) {
+      assert(defaultSize > 0);
+      n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize);
+   }
+
+   /* now allocate actual register(s).  I.e. set n->Store->Index >= 0 */
+   if (n->Store->Index < 0) {
+      if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
+         slang_info_log_error(emitInfo->log,
+                              "Ran out of registers, too many temporaries");
+         _slang_free(n->Store);
+         n->Store = NULL;
+         return GL_FALSE;
+      }
    }
    return GL_TRUE;
 }
@@ -153,7 +164,7 @@ alloc_temp_storage(slang_emit_info *emitInfo, slang_ir_node *n, GLint size)
  * Otherwise, no-op.
  */
 static void
-free_temp_storage(slang_var_table *vt, slang_ir_node *n)
+free_node_storage(slang_var_table *vt, slang_ir_node *n)
 {
    if (n->Store->File == PROGRAM_TEMPORARY &&
        n->Store->Index >= 0 &&
@@ -167,6 +178,22 @@ free_temp_storage(slang_var_table *vt, slang_ir_node *n)
 }
 
 
+/**
+ * Helper function to allocate a short-term temporary.
+ * Free it with _slang_free_temp().
+ */
+static GLboolean
+alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size)
+{
+   assert(size >= 1);
+   assert(size <= 4);
+   _mesa_bzero(temp, sizeof(*temp));
+   temp->Size = size;
+   temp->File = PROGRAM_TEMPORARY;
+   temp->Index = -1;
+   return _slang_alloc_temp(emitInfo->vt, temp);
+}
+
 
 /**
  * Remove any SWIZZLE_NIL terms from given swizzle mask.
@@ -584,19 +611,15 @@ emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
    }
 
    /* result storage */
-   if (!n->Store) {
-      GLint size = info->ResultSize;
-      if (!alloc_temp_storage(emitInfo, n, size))
-         return NULL;
-#if 0000 /* this should work, but doesn't yet */
-      if (size == 2)
-         n->Writemask = WRITEMASK_XY;
-      else if (size == 3)
-         n->Writemask = WRITEMASK_XYZ;
-      else if (size == 1)
-         n->Writemask = WRITEMASK_X << GET_SWZ(n->Store->Swizzle,0);
-#endif
-   }
+   alloc_node_storage(emitInfo, n, -1);
+   assert(n->Store->Index >= 0);
+   if (n->Store->Size == 2)
+      n->Writemask = WRITEMASK_XY;
+   else if (n->Store->Size == 3)
+      n->Writemask = WRITEMASK_XYZ;
+   else if (n->Store->Size == 1)
+      n->Writemask = WRITEMASK_X << GET_SWZ(n->Store->Swizzle, 0);
+
 
    storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
 
@@ -608,7 +631,7 @@ emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
    /* really free temps now */
    for (i = 0; i < 3; i++)
       if (temps[i])
-         free_temp_storage(emitInfo->vt, temps[i]);
+         free_node_storage(emitInfo->vt, temps[i]);
 
    /*_mesa_print_instruction(inst);*/
    return inst;
@@ -636,16 +659,15 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
       return NULL;
    }
 
+   /* final result is 1 bool */
+   if (!alloc_node_storage(emitInfo, n, 1))
+      return NULL;
+
    size = n->Children[0]->Store->Size;
 
    if (size == 1) {
       gl_inst_opcode opcode;
 
-      if (!n->Store) {
-         if (!alloc_temp_storage(emitInfo, n, 1))  /* 1 bool */
-            return NULL;
-      }
-
       opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE;
       inst = new_instruction(emitInfo, opcode);
       storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store);
@@ -655,11 +677,11 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
    else if (size <= 4) {
       GLuint swizzle;
       gl_inst_opcode dotOp;
-      
-      assert(!n->Store);
-      if (!n->Store) {
-         if (!alloc_temp_storage(emitInfo, n, size))  /* 'size' bools */
-            return NULL;
+      slang_ir_storage tempStore;
+
+      if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
+         return NULL;
+         /* out of temps */
       }
 
       if (size == 4) {
@@ -676,26 +698,25 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
          swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
       }
 
-      /* Compute equality, inequality (tmp1 = (A ?= B)) */
+      /* Compute inequality (temp = (A != B)) */
       inst = new_instruction(emitInfo, OPCODE_SNE);
+      storage_to_dst_reg(&inst->DstReg, &tempStore, n->Writemask);
       storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store);
       storage_to_src_reg(&inst->SrcReg[1], n->Children[1]->Store);
-      storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
       inst->Comment = _mesa_strdup("Compare values");
 
-      /* Compute tmp2 = DOT(tmp1, tmp1)  (reduction) */
+      /* Compute val = DOT(temp, temp)  (reduction) */
       inst = new_instruction(emitInfo, dotOp);
-      storage_to_src_reg(&inst->SrcReg[0], n->Store);
-      storage_to_src_reg(&inst->SrcReg[1], n->Store);
-      inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
-      free_temp_storage(emitInfo->vt, n); /* free tmp1 */
-      if (!alloc_temp_storage(emitInfo, n, 1))  /* alloc tmp2 */
-         return NULL;
       storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
+      storage_to_src_reg(&inst->SrcReg[0], &tempStore);
+      storage_to_src_reg(&inst->SrcReg[1], &tempStore);
+      inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
       inst->Comment = _mesa_strdup("Reduce vec to bool");
 
+      _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */
+
       if (n->Opcode == IR_EQUAL) {
-         /* compute tmp2.x = !tmp2.x  via tmp2.x = (tmp2.x == 0) */
+         /* compute val = !val.x  with SEQ val, val, 0; */
          inst = new_instruction(emitInfo, OPCODE_SEQ);
          storage_to_src_reg(&inst->SrcReg[0], n->Store);
          constant_to_src_reg(&inst->SrcReg[1], 0.0, emitInfo);
@@ -708,73 +729,59 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
        * XXX this won't work reliably for structs with padding!!
        */
       GLint i, num = (n->Children[0]->Store->Size + 3) / 4;
-      slang_ir_storage accTemp;
+      slang_ir_storage accTemp, sneTemp;
 
-      if (!n->Store) {
-         if (!alloc_temp_storage(emitInfo, n, 4))
-            return NULL;
-      }
+      if (!alloc_local_temp(emitInfo, &accTemp, 4))
+         return NULL;
 
-      accTemp.Size = 4;
-      accTemp.File = PROGRAM_TEMPORARY;
-      if (!_slang_alloc_temp(emitInfo->vt, &accTemp)) {
+      if (!alloc_local_temp(emitInfo, &sneTemp, 4))
          return NULL;
-         /* out of temps */
-      }
 
       for (i = 0; i < num; i++) {
-         /* SNE t0, left[i], right[i] */
+         /* SNE sneTemp, left[i], right[i] */
          inst = new_instruction(emitInfo, OPCODE_SNE);
          storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store);
          storage_to_src_reg(&inst->SrcReg[1], n->Children[1]->Store);
          inst->SrcReg[0].Index += i;
          inst->SrcReg[1].Index += i;
          if (i == 0) {
-            inst->DstReg.File = accTemp.File;
-            inst->DstReg.Index = accTemp.Index;
+            storage_to_dst_reg(&inst->DstReg, &accTemp, WRITEMASK_XYZW);
             inst->Comment = _mesa_strdup("Begin struct/array comparison");
          }
          else {
-            inst->DstReg.File = n->Store->File;
-            inst->DstReg.Index = n->Store->Index;
-         }
-         if (i > 0) {
-            /* ADD accTemp, accTemp, temp; # like logical-OR */
+            storage_to_dst_reg(&inst->DstReg, &sneTemp, WRITEMASK_XYZW);
+
+            /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
             inst = new_instruction(emitInfo, OPCODE_ADD);
-            inst->SrcReg[0].File = accTemp.File;
-            inst->SrcReg[0].Index = accTemp.Index;
-            inst->SrcReg[1].File = n->Store->File;
-            inst->SrcReg[1].Index = n->Store->Index;
-            inst->DstReg.File = accTemp.File;
-            inst->DstReg.Index = accTemp.Index;
+            storage_to_dst_reg(&inst->DstReg, &accTemp, WRITEMASK_XYZW);
+            storage_to_src_reg(&inst->SrcReg[0], &accTemp);
+            storage_to_src_reg(&inst->SrcReg[1], &sneTemp);
          }
       }
 
       /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
       inst = new_instruction(emitInfo, OPCODE_DP4);
-      inst->SrcReg[0].File = accTemp.File;
-      inst->SrcReg[0].Index = accTemp.Index;
-      inst->SrcReg[1].File = accTemp.File;
-      inst->SrcReg[1].Index = accTemp.Index;
-      inst->DstReg.File = n->Store->File;
-      inst->DstReg.Index = n->Store->Index;
+      storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
+      storage_to_src_reg(&inst->SrcReg[0], &accTemp);
+      storage_to_src_reg(&inst->SrcReg[1], &accTemp);
       inst->Comment = _mesa_strdup("End struct/array comparison");
 
       if (n->Opcode == IR_EQUAL) {
          /* compute tmp.x = !tmp.x  via tmp.x = (tmp.x == 0) */
          inst = new_instruction(emitInfo, OPCODE_SEQ);
+         storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
          storage_to_src_reg(&inst->SrcReg[0], n->Store);
          constant_to_src_reg(&inst->SrcReg[1], 0.0, emitInfo);
-         storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
          inst->Comment = _mesa_strdup("Invert true/false");
       }
 
       _slang_free_temp(emitInfo->vt, &accTemp);
+      _slang_free_temp(emitInfo->vt, &sneTemp);
    }
 
    /* free temps */
-   free_temp_storage(emitInfo->vt, n->Children[0]);
-   free_temp_storage(emitInfo->vt, n->Children[1]);
+   free_node_storage(emitInfo->vt, n->Children[0]);
+   free_node_storage(emitInfo->vt, n->Children[1]);
 
    return inst;
 }
@@ -827,9 +834,8 @@ emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
    }
 #endif
 
-   if (!n->Store)
-      if (!alloc_temp_storage(emitInfo, n, n->Children[0]->Store->Size))
-         return NULL;
+   if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
+      return NULL;
 
    emit(emitInfo, n->Children[1]);
    emit(emitInfo, n->Children[2]);
@@ -839,7 +845,7 @@ emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
     * the intermediate result.  Use a temp register instead.
     */
    _mesa_bzero(&tmpNode, sizeof(tmpNode));
-   alloc_temp_storage(emitInfo, &tmpNode, n->Store->Size);
+   alloc_node_storage(emitInfo, &tmpNode, n->Store->Size);
 
    /* tmp = max(ch[0], ch[1]) */
    inst = new_instruction(emitInfo, OPCODE_MAX);
@@ -853,7 +859,7 @@ emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
    storage_to_src_reg(&inst->SrcReg[0], tmpNode.Store);
    storage_to_src_reg(&inst->SrcReg[1], n->Children[2]->Store);
 
-   free_temp_storage(emitInfo->vt, &tmpNode);
+   free_node_storage(emitInfo->vt, &tmpNode);
 
    return inst;
 }
@@ -870,9 +876,8 @@ emit_negation(slang_emit_info *emitInfo, slang_ir_node *n)
 
    emit(emitInfo, n->Children[0]);
 
-   if (!n->Store)
-      if (!alloc_temp_storage(emitInfo, n, n->Children[0]->Store->Size))
-         return NULL;
+   if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
+      return NULL;
 
    inst = new_instruction(emitInfo, OPCODE_MOV);
    storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
@@ -1014,9 +1019,8 @@ emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
       inst = new_instruction(emitInfo, OPCODE_TXP);
    }
 
-   if (!n->Store)
-      if (!alloc_temp_storage(emitInfo, n, 4))
-         return NULL;
+   if (!alloc_node_storage(emitInfo, n, 4))
+      return NULL;
 
    storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
 
@@ -1045,12 +1049,15 @@ emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
 }
 
 
+/**
+ * Assignment/copy
+ */
 static struct prog_instruction *
-emit_move(slang_emit_info *emitInfo, slang_ir_node *n)
+emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
 {
    struct prog_instruction *inst;
 
-   assert(n->Opcode == IR_MOVE);
+   assert(n->Opcode == IR_COPY);
 
    /* lhs */
    emit(emitInfo, n->Children[0]);
@@ -1115,7 +1122,7 @@ emit_move(slang_emit_info *emitInfo, slang_ir_node *n)
          srcStore.Size = 4;
          while (size >= 4) {
             inst = new_instruction(emitInfo, OPCODE_MOV);
-            inst->Comment = _mesa_strdup("IR_MOVE block");
+            inst->Comment = _mesa_strdup("IR_COPY block");
             storage_to_dst_reg(&inst->DstReg, &dstStore, n->Writemask);
             storage_to_src_reg(&inst->SrcReg[0], &srcStore);
             srcStore.Index++;
@@ -1135,7 +1142,7 @@ emit_move(slang_emit_info *emitInfo, slang_ir_node *n)
          inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
                                                 srcAnnot, NULL, NULL);
       }
-      free_temp_storage(emitInfo->vt, n->Children[1]);
+      free_node_storage(emitInfo->vt, n->Children[1]);
       return inst;
    }
 }
@@ -1184,7 +1191,7 @@ emit_cond(slang_emit_info *emitInfo, slang_ir_node *n)
           * is normally generated for the expression "i".
           * Generate a move instruction just to set condition codes.
           */
-         if (!alloc_temp_storage(emitInfo, n, 1))
+         if (!alloc_node_storage(emitInfo, n, 1))
             return NULL;
          inst = new_instruction(emitInfo, OPCODE_MOV);
          inst->CondUpdate = GL_TRUE;
@@ -1240,15 +1247,14 @@ emit_not(slang_emit_info *emitInfo, slang_ir_node *n)
 #endif
 
    /* else, invert using SEQ (v = v == 0) */
-   if (!n->Store)
-      if (!alloc_temp_storage(emitInfo, n, n->Children[0]->Store->Size))
-         return NULL;
+   if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
+      return NULL;
 
    inst = new_instruction(emitInfo, OPCODE_SEQ);
    storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
    storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store);
    constant_to_src_reg(&inst->SrcReg[1], 0.0, emitInfo);
-   free_temp_storage(emitInfo->vt, n->Children[0]);
+   free_node_storage(emitInfo->vt, n->Children[0]);
 
    inst->Comment = _mesa_strdup("NOT");
    return inst;
@@ -1731,27 +1737,15 @@ emit(slang_emit_info *emitInfo, slang_ir_node *n)
    case IR_SWIZZLE:
       return emit_swizzle(emitInfo, n);
 
-   case IR_I_TO_F:
-      /* just move */
-      emit(emitInfo, n->Children[0]);
-      inst = new_instruction(emitInfo, OPCODE_MOV);
-      if (!n->Store) {
-         if (!alloc_temp_storage(emitInfo, n, 1))
-            return NULL;
-      }
-      storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
-      storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store);
-      if (emitInfo->EmitComments)
-         inst->Comment = _mesa_strdup("int to float");
-      return NULL;
-
    /* Simple arithmetic */
    /* unary */
+   case IR_MOVE:
    case IR_RSQ:
    case IR_RCP:
    case IR_FLOOR:
    case IR_FRAC:
    case IR_F_TO_I:
+   case IR_I_TO_F:
    case IR_ABS:
    case IR_SIN:
    case IR_COS:
@@ -1808,8 +1802,8 @@ emit(slang_emit_info *emitInfo, slang_ir_node *n)
       }
       return NULL;
 
-   case IR_MOVE:
-      return emit_move(emitInfo, n);
+   case IR_COPY:
+      return emit_copy(emitInfo, n);
 
    case IR_COND:
       return emit_cond(emitInfo, n);
diff --git a/src/mesa/shader/slang/slang_ir.c b/src/mesa/shader/slang/slang_ir.c
index 23d554234ee..3a0b8bf3a0d 100644
--- a/src/mesa/shader/slang/slang_ir.c
+++ b/src/mesa/shader/slang/slang_ir.c
@@ -54,7 +54,8 @@ static const slang_ir_info IrInfo[] = {
    { IR_NOTEQUAL, "IR_NOTEQUAL", OPCODE_NOP, 1, 2 },
 
    /* unary ops */
-   { IR_I_TO_F, "IR_I_TO_F", OPCODE_NOP, 1, 1 },
+   { IR_MOVE, "IR_MOVE", OPCODE_MOV, 4, 1 },
+   { IR_I_TO_F, "IR_I_TO_F", OPCODE_MOV, 4, 1 },  /* int[4] to float[4] */
    { IR_F_TO_I, "IR_F_TO_I", OPCODE_INT, 4, 1 }, /* 4 floats to 4 ints */
    { IR_EXP, "IR_EXP", OPCODE_EXP, 1, 1 },
    { IR_EXP2, "IR_EXP2", OPCODE_EX2, 1, 1 },
@@ -82,7 +83,7 @@ static const slang_ir_info IrInfo[] = {
    { IR_KILL, "IR_KILL", OPCODE_NOP, 0, 0 },
    { IR_COND, "IR_COND", OPCODE_NOP, 0, 0 },
    { IR_CALL, "IR_CALL", OPCODE_NOP, 0, 0 },
-   { IR_MOVE, "IR_MOVE", OPCODE_NOP, 0, 1 },
+   { IR_COPY, "IR_COPY", OPCODE_NOP, 0, 1 },
    { IR_NOT, "IR_NOT", OPCODE_NOP, 1, 1 },
    { IR_VAR, "IR_VAR", OPCODE_NOP, 0, 0 },
    { IR_VAR_DECL, "IR_VAR_DECL", OPCODE_NOP, 0, 0 },
@@ -326,8 +327,8 @@ _slang_print_ir_tree(const slang_ir_node *n, int indent)
       assert(!n->Children[1]);
       _slang_print_ir_tree(n->Children[0], indent + 3);
       break;
-   case IR_MOVE:
-      printf("MOVE (writemask = %s)\n", writemask_string(n->Writemask));
+   case IR_COPY:
+      printf("COPY (writemask = %s)\n", writemask_string(n->Writemask));
       _slang_print_ir_tree(n->Children[0], indent+3);
       _slang_print_ir_tree(n->Children[1], indent+3);
       break;
diff --git a/src/mesa/shader/slang/slang_ir.h b/src/mesa/shader/slang/slang_ir.h
index e4697ba3b47..f64f9a93b78 100644
--- a/src/mesa/shader/slang/slang_ir.h
+++ b/src/mesa/shader/slang/slang_ir.h
@@ -70,14 +70,14 @@ typedef enum
                  /* n->Parent = ptr to parent IR_LOOP Node */
    IR_BREAK,     /* break loop */
 
-   IR_BREAK_IF_TRUE,
+   IR_BREAK_IF_TRUE, /**< Children[0] = the condition expression */
    IR_CONT_IF_TRUE,
-                 /* Children[0] = the condition expression */
 
-   IR_MOVE,
+   IR_COPY,       /**< assignment/copy */
+   IR_MOVE,       /**< assembly MOV instruction */
 
    /* vector ops: */
-   IR_ADD,
+   IR_ADD,        /**< assembly ADD instruction */
    IR_SUB,
    IR_MUL,
    IR_DIV,
diff --git a/src/mesa/shader/slang/slang_vartable.c b/src/mesa/shader/slang/slang_vartable.c
index 68b4e00be03..9b607e64037 100644
--- a/src/mesa/shader/slang/slang_vartable.c
+++ b/src/mesa/shader/slang/slang_vartable.c
@@ -110,9 +110,10 @@ _slang_pop_var_table(slang_var_table *vt)
       slang_ir_storage *store = (slang_ir_storage *) t->Vars[i]->aux;
       GLint j;
       GLuint comp;
-      if (dbg) printf("  Free var %s, size %d at %d\n",
+      if (dbg) printf("  Free var %s, size %d at %d.%s\n",
                       (char*) t->Vars[i]->a_name, store->Size,
-                      store->Index);
+                      store->Index,
+                      _mesa_swizzle_string(store->Swizzle, 0, 0));
 
       if (store->Size == 1)
          comp = GET_SWZ(store->Swizzle, 0);
@@ -159,7 +160,7 @@ _slang_add_variable(slang_var_table *vt, slang_variable *v)
    assert(vt);
    t = vt->Top;
    assert(t);
-   if (dbg) printf("Adding var %s\n", (char *) v->a_name);
+   if (dbg) printf("Adding var %s, store %p\n", (char *) v->a_name, v->aux);
    t->Vars = (slang_variable **)
       _slang_realloc(t->Vars,
                      t->NumVars * sizeof(slang_variable *),
@@ -262,10 +263,11 @@ _slang_alloc_var(slang_var_table *vt, slang_ir_storage *store)
    }
 
    if (dbg)
-      printf("Alloc var sz %d at %d.%s (level %d)\n",
+      printf("Alloc var storage sz %d at %d.%s (level %d) store %p\n",
              store->Size, store->Index,
              _mesa_swizzle_string(store->Swizzle, 0, 0),
-             t->Level);
+             t->Level,
+             (void*) store);
 
    return GL_TRUE;
 }
@@ -283,19 +285,29 @@ _slang_alloc_temp(slang_var_table *vt, slang_ir_storage *store)
    if (i < 0)
       return GL_FALSE;
 
+   assert(store->Index < 0);
+
    store->Index = i / 4;
    if (store->Size == 1) {
       const GLuint comp = i % 4;
       store->Swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
-      if (dbg) printf("Alloc temp sz %d at %d.%c (level %d)\n",
-                      store->Size, store->Index, "xyzw"[comp], t->Level);
    }
    else {
       /* XXX improve swizzled for size=2/3, use for writemask... */
+#if 1
+      if (store->Size == 2) {
+         store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+                                        SWIZZLE_NIL, SWIZZLE_NIL);
+      }
+#endif
       store->Swizzle = SWIZZLE_NOOP;
-      if (dbg) printf("Alloc temp sz %d at %d.xyzw (level %d)\n",
-                      store->Size, store->Index, t->Level);
    }
+
+   if (dbg) printf("Alloc temp sz %d at %d.%s (level %d) store %p\n",
+                   store->Size, store->Index,
+                   _mesa_swizzle_string(store->Swizzle, 0, 0), t->Level,
+                   (void *) store);
+
    return GL_TRUE;
 }
 
@@ -309,7 +321,10 @@ _slang_free_temp(slang_var_table *vt, slang_ir_storage *store)
    assert(store->Size > 0);
    assert(r >= 0);
    assert(r + store->Size <= vt->MaxRegisters * 4);
-   if (dbg) printf("Free temp sz %d at %d (level %d)\n", store->Size, r, t->Level);
+   if (dbg) printf("Free temp sz %d at %d.%s (level %d) store %p\n",
+                   store->Size, r,
+                   _mesa_swizzle_string(store->Swizzle, 0, 0),
+                   t->Level, (void *) store);
    if (store->Size == 1) {
       const GLuint comp = GET_SWZ(store->Swizzle, 0);
       /* we can actually fail some of these assertions because of the
@@ -319,8 +334,8 @@ _slang_free_temp(slang_var_table *vt, slang_ir_storage *store)
       assert(store->Swizzle == MAKE_SWIZZLE4(comp, comp, comp, comp));
       assert(comp < 4);
       assert(t->ValSize[r * 4 + comp] == 1);
-      assert(t->Temps[r * 4 + comp] == TEMP);
 #endif
+      assert(t->Temps[r * 4 + comp] == TEMP);
       t->Temps[r * 4 + comp] = FREE;
    }
    else {