From: Brian Paul Date: Tue, 5 Aug 2008 22:18:39 +0000 (-0600) Subject: mesa: glsl: re-org of intermediate/temp storage X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1308ca6d2168c5c2f81a8e675687e9d9a4db1a28;p=mesa.git mesa: glsl: re-org of intermediate/temp storage Simplify the code for allocating storage for intermediate results. Use fewer temps in some cases. Also, use new asm vec4_move intrinsic instead of regular assigments in various constructors. For example: float f; vec3 v; v.xyz = f; is not legal GLSL, so do this instead: __asm vec4_move v.xyz, f; // note: f will auto-expand into f.xxxx Plus, fix assorted bugs in structure comparison. --- diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc index 3726335471f..f41d1fb6037 100644 --- a/src/mesa/shader/slang/library/slang_common_builtin.gc +++ b/src/mesa/shader/slang/library/slang_common_builtin.gc @@ -1418,7 +1418,7 @@ bvec4 greaterThan(const vec4 u, const vec4 v) bvec2 greaterThan(const ivec2 u, const ivec2 v) { - __asm vec4_sgt __retVal.xy, u, v; + __asm vec4_sgt __retVal.xy, u.xy, v.xy; } bvec3 greaterThan(const ivec3 u, const ivec3 v) diff --git a/src/mesa/shader/slang/library/slang_core.gc b/src/mesa/shader/slang/library/slang_core.gc index 218383e001d..840a0814c5d 100644 --- a/src/mesa/shader/slang/library/slang_core.gc +++ b/src/mesa/shader/slang/library/slang_core.gc @@ -107,7 +107,7 @@ int __constructor(const float f) { - __asm float_to_int __retVal, f; + __asm vec4_to_ivec4 __retVal, f; } int __constructor(const bool b) @@ -122,14 +122,12 @@ int __constructor(const int i) bool __constructor(const int i) { - const float zero = 0.0; - __asm vec4_sne __retVal, i, zero; + __asm vec4_sne __retVal, i, 0.0; } bool __constructor(const float f) { - const float zero = 0.0; - __asm vec4_sne __retVal, f, zero; + __asm vec4_sne __retVal, f, 0.0; } bool __constructor(const bool b) @@ -139,12 +137,12 @@ bool __constructor(const bool b) float __constructor(const int i) { - __asm int_to_float __retVal, i; + __asm ivec4_to_vec4 __retVal, i; } float __constructor(const bool b) { - __retVal = b; + __asm ivec4_to_vec4 __retVal, b; } float __constructor(const float f) @@ -163,32 +161,33 @@ vec2 __constructor(const float x, const float y) vec2 __constructor(const float f) { - __retVal.xy = f.xx; + __asm vec4_move __retVal.xy, f; } vec2 __constructor(const int i) { - __retVal.xy = i.xx; + __asm ivec4_to_vec4 __retVal.xy, i; } vec2 __constructor(const bool b) { - __retVal.xy = b.xx; + __asm ivec4_to_vec4 __retVal.xy, b; } vec2 __constructor(const bvec2 b) { - __retVal = b; +// __retVal = b; + __asm ivec4_to_vec4 __retVal.xy, b; } vec2 __constructor(const vec3 v) { - __retVal.xy = v.xy; + __asm vec4_move __retVal.xy, v.xy; } vec2 __constructor(const vec4 v) { - __retVal.st = v.xy; + __asm vec4_move __retVal.xy, v.xy; } @@ -203,27 +202,28 @@ vec3 __constructor(const float x, const float y, const float z) vec3 __constructor(const float f) { - __retVal.xyz = f.xxx; + // Note: this could be "__retVal.xyz = f" but that's an illegal assignment + __asm vec4_move __retVal.xyz, f; } vec3 __constructor(const int i) { - __asm int_to_float __retVal.xyz, i.xxx; + __asm ivec4_to_vec4 __retVal.xyz, i; } vec3 __constructor(const bool b) { - __retVal.xyz = b.xxx; + __asm ivec4_to_vec4 __retVal.xyz, b; } vec3 __constructor(const bvec3 b) { - __retVal = b; + __asm ivec4_to_vec4 __retVal.xyz, b; } vec3 __constructor(const vec4 v) { - __retVal.xyz = v.xyz; + __asm vec4_move __retVal.xyz, v; } @@ -239,27 +239,28 @@ vec4 __constructor(const float x, const float y, const float z, const float w) vec4 __constructor(const float f) { - __retVal = f.xxxx; + // Note: this could be "__retVal = f" but that's an illegal assignment + __asm vec4_move __retVal, f; } vec4 __constructor(const int i) { - __retVal = i.xxxx; + __asm ivec4_to_vec4 __retVal, i; } vec4 __constructor(const bool b) { - __retVal = b.xxxx; + __asm ivec4_to_vec4 __retVal, b; } vec4 __constructor(const bvec4 b) { - __retVal = b; + __asm ivec4_to_vec4 __retVal, b; } vec4 __constructor(const ivec4 i) { - __retVal = i; + __asm ivec4_to_vec4 __retVal, i; } vec4 __constructor(const vec3 v3, const float f) @@ -288,17 +289,17 @@ ivec2 __constructor(const int i, const int j) ivec2 __constructor(const int i) { - __retVal.xy = i.xx; + __asm vec4_move __retVal.xy, i; } ivec2 __constructor(const float f) { - __asm float_to_int __retVal.xy, f.xx; + __asm vec4_to_ivec4 __retVal.xy, f; } ivec2 __constructor(const bool b) { - __asm float_to_int __retVal.xy, b.xx; + __asm vec4_to_ivec4 __retVal.xy, b; } @@ -313,17 +314,17 @@ ivec3 __constructor(const int i, const int j, const int k) ivec3 __constructor(const int i) { - __retVal.xyz = i.xxx; + __asm vec4_move __retVal.xyz, i; } ivec3 __constructor(const float f) { - __retVal.xyz = f.xxx; + __asm vec4_to_ivec4 __retVal.xyz, f; } ivec3 __constructor(const bool b) { - __retVal.xyz = b.xxx; + __asm vec4_move __retVal.xyz, b; } @@ -339,17 +340,17 @@ ivec4 __constructor(const int x, const int y, const int z, const int w) ivec4 __constructor(const int i) { - __retVal = i.xxxx; + __asm vec4_move __retVal, i; } ivec4 __constructor(const float f) { - __asm float_to_int __retVal, f.xxxx; + __asm vec4_to_ivec4 __retVal, f; } ivec4 __constructor(const bool b) { - __retVal = b.xxxx; + __asm vec4_to_ivec4 __retVal, b; } @@ -363,19 +364,17 @@ bvec2 __constructor(const bool b1, const bool b2) bvec2 __constructor(const bool b) { - __retVal.xy = b.xx; + __asm vec4_move __retVal.xy, b; } bvec2 __constructor(const float f) { - const vec2 zero = vec2(0.0, 0.0); - __asm vec4_sne __retVal.xy, f.xx, zero; + __asm vec4_sne __retVal.xy, f, 0.0; } bvec2 __constructor(const int i) { - const ivec2 zero = ivec2(0, 0); - __asm vec4_sne __retVal.xy, i.xx, zero; + __asm vec4_sne __retVal.xy, i, 0.0; } bvec2 __constructor(const vec2 v) @@ -385,8 +384,7 @@ bvec2 __constructor(const vec2 v) bvec2 __constructor(const ivec2 v) { - const ivec2 zero = ivec2(0, 0); - __asm vec4_sne __retVal.xy, v, zero; + __asm vec4_sne __retVal.xy, v, 0.0; } @@ -402,31 +400,27 @@ bvec3 __constructor(const bool b1, const bool b2, const bool b3) bvec3 __constructor(const bool b) { - __retVal.xyz = b.xxx; + __asm vec4_move __retVal.xyz, b; } bvec3 __constructor(const float f) { - const vec3 zero = vec3(0.0, 0.0, 0.0); - __asm vec4_sne __retVal.xyz, f.xxx, zero; + __asm vec4_sne __retVal.xyz, f, 0.0; } bvec3 __constructor(const int i) { - const ivec3 zero = ivec3(0, 0, 0); - __asm vec4_sne __retVal.xyz, i.xxx, zero; + __asm vec4_sne __retVal.xyz, i, 0.0; } bvec3 __constructor(const vec3 v) { - const vec3 zero = vec3(0.0, 0.0, 0.0); - __asm vec4_sne __retVal.xyz, v, zero; + __asm vec4_sne __retVal.xyz, v, 0.0; } bvec3 __constructor(const ivec3 v) { - const ivec3 zero = ivec3(0, 0, 0); - __asm vec4_sne __retVal.xyz, v, zero; + __asm vec4_sne __retVal.xyz, v, 0.0; } @@ -452,31 +446,27 @@ bvec4 __constructor(const float f1, const float f2, const float f3, const float bvec4 __constructor(const bool b) { - __retVal.xyzw = b.xxxx; + __asm vec4_move __retVal.xyzw, b; } bvec4 __constructor(const float f) { - const vec4 zero = vec4(0.0, 0.0, 0.0, 0.0); - __asm vec4_sne __retVal, f.xxxx, zero; + __asm vec4_sne __retVal.xyzw, f, 0.0; } bvec4 __constructor(const int i) { - const ivec4 zero = ivec4(0, 0, 0, 0); - __asm vec4_sne __retVal, i.xxxx, zero; + __asm vec4_sne __retVal.xyzw, i, 0.0; } bvec4 __constructor(const vec4 v) { - const vec4 zero = vec4(0.0, 0.0, 0.0, 0.0); - __asm vec4_sne __retVal, v, zero; + __asm vec4_sne __retVal.xyzw, v, 0.0; } bvec4 __constructor(const ivec4 v) { - const ivec4 zero = ivec4(0, 0, 0, 0); - __asm vec4_sne __retVal, v, zero; + __asm vec4_sne __retVal.xyzw, v, 0.0; } @@ -619,30 +609,17 @@ mat4 __constructor(const vec4 c0, const vec4 c1, const vec4 c2, const vec4 c3) int __operator + (const int a, const int b) { -// XXX If we ever have int registers, we'll do something like this: -// XXX For now, mostly treat ints as floats. -// float x, y; -// __asm int_to_float x, a; -// __asm int_to_float y, b; -// __asm vec4_add x.x, x.x, y.x; -// __asm float_to_int __retVal, x; - float x; - __asm vec4_add x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_add __retVal, a, b; } int __operator - (const int a, const int b) { - float x; - __asm vec4_subtract x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_subtract __retVal, a, b; } int __operator * (const int a, const int b) { - float x; - __asm vec4_multiply x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_multiply __retVal, a, b; } int __operator / (const int a, const int b) @@ -650,7 +627,7 @@ int __operator / (const int a, const int b) float bInv, x; __asm float_rcp bInv, b; __asm vec4_multiply x, a, bInv; - __asm float_to_int __retVal, x; + __asm vec4_to_ivec4 __retVal, x; } @@ -658,23 +635,17 @@ int __operator / (const int a, const int b) ivec2 __operator + (const ivec2 a, const ivec2 b) { - vec2 x; - __asm vec4_add x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_add __retVal, a, b; } ivec2 __operator - (const ivec2 a, const ivec2 b) { - vec2 x; - __asm vec4_subtract x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_subtract __retVal, a, b; } ivec2 __operator * (const ivec2 a, const ivec2 b) { - vec2 x; - __asm vec4_multiply x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_multiply __retVal, a, b; } ivec2 __operator / (const ivec2 a, const ivec2 b) @@ -683,7 +654,7 @@ ivec2 __operator / (const ivec2 a, const ivec2 b) __asm float_rcp bInv.x, b.x; __asm float_rcp bInv.y, b.y; __asm vec4_multiply x, a, bInv; - __asm float_to_int __retVal, x; + __asm vec4_to_ivec4 __retVal, x; } @@ -691,23 +662,17 @@ ivec2 __operator / (const ivec2 a, const ivec2 b) ivec3 __operator + (const ivec3 a, const ivec3 b) { - vec3 x; - __asm vec4_add x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_add __retVal, a, b; } ivec3 __operator - (const ivec3 a, const ivec3 b) { - vec3 x; - __asm vec4_subtract x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_subtract __retVal, a, b; } ivec3 __operator * (const ivec3 a, const ivec3 b) { - vec3 x; - __asm vec4_multiply x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_multiply __retVal, a, b; } ivec3 __operator / (const ivec3 a, const ivec3 b) @@ -717,7 +682,7 @@ ivec3 __operator / (const ivec3 a, const ivec3 b) __asm float_rcp bInv.y, b.y; __asm float_rcp bInv.z, b.z; __asm vec4_multiply x, a, bInv; - __asm float_to_int __retVal, x; + __asm vec4_to_ivec4 __retVal, x; } @@ -725,23 +690,17 @@ ivec3 __operator / (const ivec3 a, const ivec3 b) ivec4 __operator + (const ivec4 a, const ivec4 b) { - vec3 x; - __asm vec4_add x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_add __retVal, a, b; } ivec4 __operator - (const ivec4 a, const ivec4 b) { - vec4 x; - __asm vec4_subtract x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_subtract __retVal, a, b; } ivec4 __operator * (const ivec4 a, const ivec4 b) { - vec4 x; - __asm vec4_multiply x, a, b; - __asm float_to_int __retVal, x; + __asm vec4_multiply __retVal, a, b; } ivec4 __operator / (const ivec4 a, const ivec4 b) @@ -752,7 +711,7 @@ ivec4 __operator / (const ivec4 a, const ivec4 b) __asm float_rcp bInv.z, b.z; __asm float_rcp bInv.w, b.w; __asm vec4_multiply x, a, bInv; - __asm float_to_int __retVal, x; + __asm vec4_to_ivec4 __retVal, x; } @@ -760,24 +719,24 @@ ivec4 __operator / (const ivec4 a, const ivec4 b) float __operator + (const float a, const float b) { - __asm vec4_add __retVal.x, a, b; + __asm vec4_add __retVal, a, b; } float __operator - (const float a, const float b) { - __asm vec4_subtract __retVal.x, a, b; + __asm vec4_subtract __retVal, a, b; } float __operator * (const float a, const float b) { - __asm vec4_multiply __retVal.x, a, b; + __asm vec4_multiply __retVal, a, b; } float __operator / (const float a, const float b) { float bInv; - __asm float_rcp bInv.x, b.x; - __asm vec4_multiply __retVal.x, a, bInv; + __asm float_rcp bInv.x, b; + __asm vec4_multiply __retVal, a, bInv; } @@ -868,32 +827,32 @@ vec4 __operator / (const vec4 v, const vec4 u) vec2 __operator + (const float a, const vec2 u) { - __asm vec4_add __retVal.xy, a.xx, u.xy; + __asm vec4_add __retVal.xy, a, u.xy; } vec2 __operator + (const vec2 v, const float b) { - __asm vec4_add __retVal.xy, v.xy, b.xx; + __asm vec4_add __retVal.xy, v.xy, b; } vec2 __operator - (const float a, const vec2 u) { - __asm vec4_subtract __retVal.xy, a.xx, u.xy; + __asm vec4_subtract __retVal.xy, a, u.xy; } vec2 __operator - (const vec2 v, const float b) { - __asm vec4_subtract __retVal.xy, v.xy, b.xx; + __asm vec4_subtract __retVal.xy, v.xy, b; } vec2 __operator * (const float a, const vec2 u) { - __asm vec4_multiply __retVal.xy, a.xx, u.xy; + __asm vec4_multiply __retVal.xy, a, u.xy; } vec2 __operator * (const vec2 v, const float b) { - __asm vec4_multiply __retVal.xy, v.xy, b.xx; + __asm vec4_multiply __retVal.xy, v.xy, b; } vec2 __operator / (const float a, const vec2 u) @@ -901,14 +860,14 @@ vec2 __operator / (const float a, const vec2 u) vec2 invU; __asm float_rcp invU.x, u.x; __asm float_rcp invU.y, u.y; - __asm vec4_multiply __retVal.xy, a.xx, invU.xy; + __asm vec4_multiply __retVal.xy, a, invU.xy; } vec2 __operator / (const vec2 v, const float b) { float invB; __asm float_rcp invB, b; - __asm vec4_multiply __retVal.xy, v.xy, invB.xx; + __asm vec4_multiply __retVal.xy, v.xy, invB; } @@ -916,32 +875,32 @@ vec2 __operator / (const vec2 v, const float b) vec3 __operator + (const float a, const vec3 u) { - __asm vec4_add __retVal.xyz, a.xxx, u.xyz; + __asm vec4_add __retVal.xyz, a, u.xyz; } vec3 __operator + (const vec3 v, const float b) { - __asm vec4_add __retVal.xyz, v.xyz, b.xxx; + __asm vec4_add __retVal.xyz, v.xyz, b; } vec3 __operator - (const float a, const vec3 u) { - __asm vec4_subtract __retVal.xyz, a.xxx, u.xyz; + __asm vec4_subtract __retVal.xyz, a, u.xyz; } vec3 __operator - (const vec3 v, const float b) { - __asm vec4_subtract __retVal.xyz, v.xyz, b.xxx; + __asm vec4_subtract __retVal.xyz, v.xyz, b; } vec3 __operator * (const float a, const vec3 u) { - __asm vec4_multiply __retVal.xyz, a.xxx, u.xyz; + __asm vec4_multiply __retVal.xyz, a, u.xyz; } vec3 __operator * (const vec3 v, const float b) { - __asm vec4_multiply __retVal.xyz, v.xyz, b.xxx; + __asm vec4_multiply __retVal.xyz, v.xyz, b; } vec3 __operator / (const float a, const vec3 u) @@ -950,14 +909,14 @@ vec3 __operator / (const float a, const vec3 u) __asm float_rcp invU.x, u.x; __asm float_rcp invU.y, u.y; __asm float_rcp invU.z, u.z; - __asm vec4_multiply __retVal.xyz, a.xxx, invU.xyz; + __asm vec4_multiply __retVal.xyz, a, invU.xyz; } vec3 __operator / (const vec3 v, const float b) { float invB; __asm float_rcp invB, b; - __asm vec4_multiply __retVal.xyz, v.xyz, invB.xxx; + __asm vec4_multiply __retVal.xyz, v.xyz, invB; } @@ -965,32 +924,32 @@ vec3 __operator / (const vec3 v, const float b) vec4 __operator + (const float a, const vec4 u) { - __asm vec4_add __retVal, a.xxxx, u; + __asm vec4_add __retVal, a, u; } vec4 __operator + (const vec4 v, const float b) { - __asm vec4_add __retVal, v, b.xxxx; + __asm vec4_add __retVal, v, b; } vec4 __operator - (const float a, const vec4 u) { - __asm vec4_subtract __retVal, a.xxxx, u; + __asm vec4_subtract __retVal, a, u; } vec4 __operator - (const vec4 v, const float b) { - __asm vec4_subtract __retVal, v, b.xxxx; + __asm vec4_subtract __retVal, v, b; } vec4 __operator * (const float a, const vec4 u) { - __asm vec4_multiply __retVal, a.xxxx, u; + __asm vec4_multiply __retVal, a, u; } vec4 __operator * (const vec4 v, const float b) { - __asm vec4_multiply __retVal, v, b.xxxx; + __asm vec4_multiply __retVal, v, b; } vec4 __operator / (const float a, const vec4 u) @@ -1000,14 +959,14 @@ vec4 __operator / (const float a, const vec4 u) __asm float_rcp invU.y, u.y; __asm float_rcp invU.z, u.z; __asm float_rcp invU.w, u.w; - __asm vec4_multiply __retVal, a.xxxx, invU; + __asm vec4_multiply __retVal, a, invU; } vec4 __operator / (const vec4 v, const float b) { float invB; __asm float_rcp invB, b; - __asm vec4_multiply __retVal, v, invB.xxxx; + __asm vec4_multiply __retVal, v, invB; } @@ -1254,7 +1213,7 @@ void __operator /= (inout int a, const int b) float invB; __asm float_rcp invB, b; __asm vec4_multiply a, a, invB; - __asm float_to_int a, a; + __asm vec4_to_ivec4 a, a; } @@ -1281,7 +1240,7 @@ void __operator /= (inout ivec2 v, const ivec2 u) __asm float_rcp inv.x, u.x; __asm float_rcp inv.y, u.y; __asm vec4_multiply z, v, inv; - __asm float_to_int v, z; + __asm vec4_to_ivec4 v, z; } @@ -1308,7 +1267,7 @@ void __operator /= (inout ivec3 v, const ivec3 u) __asm float_rcp inv.x, u.x; __asm float_rcp inv.y, u.y; __asm vec4_multiply z, v, inv; - __asm float_to_int v, z; + __asm vec4_to_ivec4 v, z; } @@ -1335,7 +1294,7 @@ void __operator /= (inout ivec4 v, const ivec4 u) __asm float_rcp inv.x, u.x; __asm float_rcp inv.y, u.y; __asm vec4_multiply z, v, inv; - __asm float_to_int v, z; + __asm vec4_to_ivec4 v, z; } @@ -1450,17 +1409,17 @@ void __operator /= (inout vec4 v, const vec4 u) void __operator += (inout ivec2 v, const int a) { - __asm vec4_add v.xy, v.xy, a.xx; + __asm vec4_add v.xy, v.xy, a; } void __operator -= (inout ivec2 v, const int a) { - __asm vec4_subtract v.xy, v.xy, a.xx; + __asm vec4_subtract v.xy, v.xy, a; } void __operator *= (inout ivec2 v, const int a) { - __asm vec4_multiply v.xy, v.xy, a.xx; + __asm vec4_multiply v.xy, v.xy, a; v.x *= a; v.y *= a; } @@ -1477,17 +1436,17 @@ void __operator /= (inout ivec2 v, const int a) void __operator += (inout ivec3 v, const int a) { - __asm vec4_add v.xyz, v.xyz, a.xxx; + __asm vec4_add v.xyz, v.xyz, a; } void __operator -= (inout ivec3 v, const int a) { - __asm vec4_subtract v.xyz, v.xyz, a.xxx; + __asm vec4_subtract v.xyz, v.xyz, a; } void __operator *= (inout ivec3 v, const int a) { - __asm vec4_multiply v.xyz, v.xyz, a.xxx; + __asm vec4_multiply v.xyz, v.xyz, a; } void __operator /= (inout ivec3 v, const int a) @@ -1503,17 +1462,17 @@ void __operator /= (inout ivec3 v, const int a) void __operator += (inout ivec4 v, const int a) { - __asm vec4_add v, v, a.xxxx; + __asm vec4_add v, v, a; } void __operator -= (inout ivec4 v, const int a) { - __asm vec4_subtract v, v, a.xxxx; + __asm vec4_subtract v, v, a; } void __operator *= (inout ivec4 v, const int a) { - __asm vec4_multiply v, v, a.xxxx; + __asm vec4_multiply v, v, a; } void __operator /= (inout ivec4 v, const int a) @@ -1530,24 +1489,24 @@ void __operator /= (inout ivec4 v, const int a) void __operator += (inout vec2 v, const float a) { - __asm vec4_add v.xy, v, a.xx; + __asm vec4_add v.xy, v, a; } void __operator -= (inout vec2 v, const float a) { - __asm vec4_subtract v.xy, v, a.xx; + __asm vec4_subtract v.xy, v, a; } void __operator *= (inout vec2 v, const float a) { - __asm vec4_multiply v.xy, v, a.xx; + __asm vec4_multiply v.xy, v, a; } void __operator /= (inout vec2 v, const float a) { float invA; __asm float_rcp invA, a; - __asm vec4_multiply v.xy, v.xy, invA.xx; + __asm vec4_multiply v.xy, v.xy, invA; } @@ -1555,24 +1514,24 @@ void __operator /= (inout vec2 v, const float a) void __operator += (inout vec3 v, const float a) { - __asm vec4_add v.xyz, v, a.xxx; + __asm vec4_add v.xyz, v, a; } void __operator -= (inout vec3 v, const float a) { - __asm vec4_subtract v.xyz, v, a.xxx; + __asm vec4_subtract v.xyz, v, a; } void __operator *= (inout vec3 v, const float a) { - __asm vec4_multiply v.xyz, v, a.xxx; + __asm vec4_multiply v.xyz, v, a; } void __operator /= (inout vec3 v, const float a) { float invA; __asm float_rcp invA, a; - __asm vec4_multiply v.xyz, v.xyz, invA.xxx; + __asm vec4_multiply v.xyz, v.xyz, invA; } @@ -1580,24 +1539,24 @@ void __operator /= (inout vec3 v, const float a) void __operator += (inout vec4 v, const float a) { - __asm vec4_add v, v, a.xxxx; + __asm vec4_add v, v, a; } void __operator -= (inout vec4 v, const float a) { - __asm vec4_subtract v, v, a.xxxx; + __asm vec4_subtract v, v, a; } void __operator *= (inout vec4 v, const float a) { - __asm vec4_multiply v, v, a.xxxx; + __asm vec4_multiply v, v, a; } void __operator /= (inout vec4 v, const float a) { float invA; __asm float_rcp invA, a; - __asm vec4_multiply v, v, invA.xxxx; + __asm vec4_multiply v, v, invA; } diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index 363c0a10c2b..582b47c2392 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -425,6 +425,7 @@ static slang_asm_info AsmInfo[] = { { "vec4_sle", IR_SLE, 1, 2 }, { "vec4_slt", IR_SLT, 1, 2 }, /* vec4 unary */ + { "vec4_move", IR_MOVE, 1, 1 }, { "vec4_floor", IR_FLOOR, 1, 1 }, { "vec4_frac", IR_FRAC, 1, 1 }, { "vec4_abs", IR_ABS, 1, 1 }, @@ -448,8 +449,8 @@ static slang_asm_info AsmInfo[] = { { "vec4_texp_rect", IR_TEX, 1, 2 },/* rectangle w/ projection */ /* unary op */ - { "int_to_float", IR_I_TO_F, 1, 1 }, - { "float_to_int", IR_F_TO_I, 1, 1 }, + { "ivec4_to_vec4", IR_I_TO_F, 1, 1 }, /* int[4] to float[4] */ + { "vec4_to_ivec4", IR_F_TO_I, 1, 1 }, /* float[4] to int[4] */ { "float_exp", IR_EXP, 1, 1 }, { "float_exp2", IR_EXP2, 1, 1 }, { "float_log2", IR_LOG2, 1, 1 }, @@ -1674,11 +1675,9 @@ _slang_gen_asm(slang_assemble_ctx *A, slang_operation *oper, return NULL; assert(!n->Store); - n->Store = get_store(n0); - n->Writemask = writemask; + n->Store = n0->Store; - assert(n->Store->File != PROGRAM_UNDEFINED || - n->Store->Parent); + assert(n->Store->File != PROGRAM_UNDEFINED || n->Store->Parent); _slang_free(n0); } @@ -1957,6 +1956,7 @@ _slang_gen_function_call_name(slang_assemble_ctx *A, const char *name, slang_atom atom; slang_function *fun; GLboolean error; + slang_ir_node *n; atom = slang_atom_pool_atom(A->atoms, name); if (atom == SLANG_ATOM_NULL) @@ -2008,7 +2008,17 @@ _slang_gen_function_call_name(slang_assemble_ctx *A, const char *name, assert(fun); } - return _slang_gen_function_call(A, fun, oper, dest); + n = _slang_gen_function_call(A, fun, oper, dest); + + if (n && !n->Store && !dest + && fun->header.type.specifier.type != SLANG_SPEC_VOID) { + /* setup n->Store for the result of the function call */ + GLint size = _slang_sizeof_type_specifier(&fun->header.type.specifier); + n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, size); + /*printf("Alloc storage for function result, size %d \n", size);*/ + } + + return n; } @@ -2524,13 +2534,13 @@ _slang_gen_select(slang_assemble_ctx *A, slang_operation *oper) tmpVar = new_node0(IR_VAR); tmpVar->Store = tmpDecl->Store; trueExpr = _slang_gen_operation(A, &oper->children[1]); - trueNode = new_node2(IR_MOVE, tmpVar, trueExpr); + trueNode = new_node2(IR_COPY, tmpVar, trueExpr); /* if-false body (child 2) */ tmpVar = new_node0(IR_VAR); tmpVar->Store = tmpDecl->Store; falseExpr = _slang_gen_operation(A, &oper->children[2]); - falseNode = new_node2(IR_MOVE, tmpVar, falseExpr); + falseNode = new_node2(IR_COPY, tmpVar, falseExpr); ifNode = new_if(cond, trueNode, falseNode); @@ -2711,7 +2721,7 @@ _slang_gen_declaration(slang_assemble_ctx *A, slang_operation *oper) assert(oper->num_children == 0 || oper->num_children == 1); v = _slang_locate_variable(oper->locals, oper->a_id, GL_TRUE); - /*printf("Declare %s at %p\n", varName, v);*/ + /*printf("Declare %s at %p\n", varName, (void *) v);*/ assert(v); #if 0 @@ -2736,7 +2746,8 @@ _slang_gen_declaration(slang_assemble_ctx *A, slang_operation *oper) rhs = _slang_gen_operation(A, &oper->children[0]); if (!rhs) return NULL; /* must have found an error */ - init = new_node2(IR_MOVE, var, rhs); + init = new_node2(IR_COPY, var, rhs); + /*assert(rhs->Opcode != IR_SEQ);*/ n = new_seq(varDecl, init); } @@ -2775,12 +2786,14 @@ _slang_gen_declaration(slang_assemble_ctx *A, slang_operation *oper) if (!rhs) return NULL; + /*assert(rhs->Store);*/ + if (rhs->Store && var->Store->Size != rhs->Store->Size) { slang_info_log_error(A->log, "invalid assignment (wrong types)"); return NULL; } - init = new_node2(IR_MOVE, var, rhs); + init = new_node2(IR_COPY, var, rhs); n = new_seq(varDecl, init); } else { @@ -2851,12 +2864,14 @@ _slang_assignment_compatible(const slang_typeinfo *t0, const slang_typeinfo *t1) { -#if 0 +#if 1 GLuint sz0 = _slang_sizeof_type_specifier(&t0->spec); GLuint sz1 = _slang_sizeof_type_specifier(&t1->spec); - if (sz0 != sz1) + if (sz0 != sz1) { + printf("size mismatch %u vs %u\n", sz0, sz1); return GL_FALSE; + } #endif if (t0->spec.type == SLANG_SPEC_STRUCT && @@ -2974,7 +2989,7 @@ _slang_gen_assignment(slang_assemble_ctx * A, slang_operation *oper) */ rhs = _slang_gen_swizzle(rhs, newSwizzle); } - n = new_node2(IR_MOVE, lhs, rhs); + n = new_node2(IR_COPY, lhs, rhs); n->Writemask = writemask; return n; } @@ -3208,6 +3223,7 @@ _slang_gen_compare(slang_assemble_ctx *A, slang_operation *oper, slang_ir_opcode opcode) { slang_typeinfo t0, t1; + slang_ir_node *n; slang_typeinfo_construct(&t0); _slang_typeof_operation(A, &oper->children[0], &t0); @@ -3221,9 +3237,14 @@ _slang_gen_compare(slang_assemble_ctx *A, slang_operation *oper, return NULL; } - return new_node2(opcode, - _slang_gen_operation(A, &oper->children[0]), - _slang_gen_operation(A, &oper->children[1])); + n = new_node2(opcode, + _slang_gen_operation(A, &oper->children[0]), + _slang_gen_operation(A, &oper->children[1])); + + /* result is a bool (size 1) */ + n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1); + + return n; } @@ -3726,7 +3747,7 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var, if (var->initializer) { slang_ir_node *lhs, *rhs, *init; - /* Generate IR_MOVE instruction to initialize the variable */ + /* Generate IR_COPY instruction to initialize the variable */ lhs = new_node0(IR_VAR); lhs->Var = var; lhs->Store = n->Store; @@ -3736,7 +3757,7 @@ _slang_codegen_global_variable(slang_assemble_ctx *A, slang_variable *var, rhs = _slang_gen_operation(A, var->initializer); assert(rhs); - init = new_node2(IR_MOVE, lhs, rhs); + init = new_node2(IR_COPY, lhs, rhs); n = new_seq(n, init); } diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index b1ebad7f177..b902da84570 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -127,22 +127,33 @@ _slang_swizzle_swizzle(GLuint swz1, GLuint swz2) /** - * Allocate temporary storage for an intermediate result (such as for - * a multiply or add, etc. + * Allocate storage for the given node (if it hasn't already been allocated). + * + * Typically this is temporary storage for an intermediate result (such as + * for a multiply or add, etc). + * + * If n->Store does not exist it will be created and will be of the size + * specified by defaultSize. */ static GLboolean -alloc_temp_storage(slang_emit_info *emitInfo, slang_ir_node *n, GLint size) +alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n, + GLint defaultSize) { assert(!n->Var); - assert(!n->Store); - assert(size > 0); - n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, size); - if (!_slang_alloc_temp(emitInfo->vt, n->Store)) { - slang_info_log_error(emitInfo->log, - "Ran out of registers, too many temporaries"); - _slang_free(n->Store); - n->Store = NULL; - return GL_FALSE; + if (!n->Store) { + assert(defaultSize > 0); + n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize); + } + + /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */ + if (n->Store->Index < 0) { + if (!_slang_alloc_temp(emitInfo->vt, n->Store)) { + slang_info_log_error(emitInfo->log, + "Ran out of registers, too many temporaries"); + _slang_free(n->Store); + n->Store = NULL; + return GL_FALSE; + } } return GL_TRUE; } @@ -153,7 +164,7 @@ alloc_temp_storage(slang_emit_info *emitInfo, slang_ir_node *n, GLint size) * Otherwise, no-op. */ static void -free_temp_storage(slang_var_table *vt, slang_ir_node *n) +free_node_storage(slang_var_table *vt, slang_ir_node *n) { if (n->Store->File == PROGRAM_TEMPORARY && n->Store->Index >= 0 && @@ -167,6 +178,22 @@ free_temp_storage(slang_var_table *vt, slang_ir_node *n) } +/** + * Helper function to allocate a short-term temporary. + * Free it with _slang_free_temp(). + */ +static GLboolean +alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size) +{ + assert(size >= 1); + assert(size <= 4); + _mesa_bzero(temp, sizeof(*temp)); + temp->Size = size; + temp->File = PROGRAM_TEMPORARY; + temp->Index = -1; + return _slang_alloc_temp(emitInfo->vt, temp); +} + /** * Remove any SWIZZLE_NIL terms from given swizzle mask. @@ -584,19 +611,15 @@ emit_arith(slang_emit_info *emitInfo, slang_ir_node *n) } /* result storage */ - if (!n->Store) { - GLint size = info->ResultSize; - if (!alloc_temp_storage(emitInfo, n, size)) - return NULL; -#if 0000 /* this should work, but doesn't yet */ - if (size == 2) - n->Writemask = WRITEMASK_XY; - else if (size == 3) - n->Writemask = WRITEMASK_XYZ; - else if (size == 1) - n->Writemask = WRITEMASK_X << GET_SWZ(n->Store->Swizzle,0); -#endif - } + alloc_node_storage(emitInfo, n, -1); + assert(n->Store->Index >= 0); + if (n->Store->Size == 2) + n->Writemask = WRITEMASK_XY; + else if (n->Store->Size == 3) + n->Writemask = WRITEMASK_XYZ; + else if (n->Store->Size == 1) + n->Writemask = WRITEMASK_X << GET_SWZ(n->Store->Swizzle, 0); + storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); @@ -608,7 +631,7 @@ emit_arith(slang_emit_info *emitInfo, slang_ir_node *n) /* really free temps now */ for (i = 0; i < 3; i++) if (temps[i]) - free_temp_storage(emitInfo->vt, temps[i]); + free_node_storage(emitInfo->vt, temps[i]); /*_mesa_print_instruction(inst);*/ return inst; @@ -636,16 +659,15 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n) return NULL; } + /* final result is 1 bool */ + if (!alloc_node_storage(emitInfo, n, 1)) + return NULL; + size = n->Children[0]->Store->Size; if (size == 1) { gl_inst_opcode opcode; - if (!n->Store) { - if (!alloc_temp_storage(emitInfo, n, 1)) /* 1 bool */ - return NULL; - } - opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE; inst = new_instruction(emitInfo, opcode); storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store); @@ -655,11 +677,11 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n) else if (size <= 4) { GLuint swizzle; gl_inst_opcode dotOp; - - assert(!n->Store); - if (!n->Store) { - if (!alloc_temp_storage(emitInfo, n, size)) /* 'size' bools */ - return NULL; + slang_ir_storage tempStore; + + if (!alloc_local_temp(emitInfo, &tempStore, 4)) { + return NULL; + /* out of temps */ } if (size == 4) { @@ -676,26 +698,25 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n) swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y); } - /* Compute equality, inequality (tmp1 = (A ?= B)) */ + /* Compute inequality (temp = (A != B)) */ inst = new_instruction(emitInfo, OPCODE_SNE); + storage_to_dst_reg(&inst->DstReg, &tempStore, n->Writemask); storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store); storage_to_src_reg(&inst->SrcReg[1], n->Children[1]->Store); - storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); inst->Comment = _mesa_strdup("Compare values"); - /* Compute tmp2 = DOT(tmp1, tmp1) (reduction) */ + /* Compute val = DOT(temp, temp) (reduction) */ inst = new_instruction(emitInfo, dotOp); - storage_to_src_reg(&inst->SrcReg[0], n->Store); - storage_to_src_reg(&inst->SrcReg[1], n->Store); - inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/ - free_temp_storage(emitInfo->vt, n); /* free tmp1 */ - if (!alloc_temp_storage(emitInfo, n, 1)) /* alloc tmp2 */ - return NULL; storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); + storage_to_src_reg(&inst->SrcReg[0], &tempStore); + storage_to_src_reg(&inst->SrcReg[1], &tempStore); + inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/ inst->Comment = _mesa_strdup("Reduce vec to bool"); + _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */ + if (n->Opcode == IR_EQUAL) { - /* compute tmp2.x = !tmp2.x via tmp2.x = (tmp2.x == 0) */ + /* compute val = !val.x with SEQ val, val, 0; */ inst = new_instruction(emitInfo, OPCODE_SEQ); storage_to_src_reg(&inst->SrcReg[0], n->Store); constant_to_src_reg(&inst->SrcReg[1], 0.0, emitInfo); @@ -708,73 +729,59 @@ emit_compare(slang_emit_info *emitInfo, slang_ir_node *n) * XXX this won't work reliably for structs with padding!! */ GLint i, num = (n->Children[0]->Store->Size + 3) / 4; - slang_ir_storage accTemp; + slang_ir_storage accTemp, sneTemp; - if (!n->Store) { - if (!alloc_temp_storage(emitInfo, n, 4)) - return NULL; - } + if (!alloc_local_temp(emitInfo, &accTemp, 4)) + return NULL; - accTemp.Size = 4; - accTemp.File = PROGRAM_TEMPORARY; - if (!_slang_alloc_temp(emitInfo->vt, &accTemp)) { + if (!alloc_local_temp(emitInfo, &sneTemp, 4)) return NULL; - /* out of temps */ - } for (i = 0; i < num; i++) { - /* SNE t0, left[i], right[i] */ + /* SNE sneTemp, left[i], right[i] */ inst = new_instruction(emitInfo, OPCODE_SNE); storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store); storage_to_src_reg(&inst->SrcReg[1], n->Children[1]->Store); inst->SrcReg[0].Index += i; inst->SrcReg[1].Index += i; if (i == 0) { - inst->DstReg.File = accTemp.File; - inst->DstReg.Index = accTemp.Index; + storage_to_dst_reg(&inst->DstReg, &accTemp, WRITEMASK_XYZW); inst->Comment = _mesa_strdup("Begin struct/array comparison"); } else { - inst->DstReg.File = n->Store->File; - inst->DstReg.Index = n->Store->Index; - } - if (i > 0) { - /* ADD accTemp, accTemp, temp; # like logical-OR */ + storage_to_dst_reg(&inst->DstReg, &sneTemp, WRITEMASK_XYZW); + + /* ADD accTemp, accTemp, sneTemp; # like logical-OR */ inst = new_instruction(emitInfo, OPCODE_ADD); - inst->SrcReg[0].File = accTemp.File; - inst->SrcReg[0].Index = accTemp.Index; - inst->SrcReg[1].File = n->Store->File; - inst->SrcReg[1].Index = n->Store->Index; - inst->DstReg.File = accTemp.File; - inst->DstReg.Index = accTemp.Index; + storage_to_dst_reg(&inst->DstReg, &accTemp, WRITEMASK_XYZW); + storage_to_src_reg(&inst->SrcReg[0], &accTemp); + storage_to_src_reg(&inst->SrcReg[1], &sneTemp); } } /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */ inst = new_instruction(emitInfo, OPCODE_DP4); - inst->SrcReg[0].File = accTemp.File; - inst->SrcReg[0].Index = accTemp.Index; - inst->SrcReg[1].File = accTemp.File; - inst->SrcReg[1].Index = accTemp.Index; - inst->DstReg.File = n->Store->File; - inst->DstReg.Index = n->Store->Index; + storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); + storage_to_src_reg(&inst->SrcReg[0], &accTemp); + storage_to_src_reg(&inst->SrcReg[1], &accTemp); inst->Comment = _mesa_strdup("End struct/array comparison"); if (n->Opcode == IR_EQUAL) { /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */ inst = new_instruction(emitInfo, OPCODE_SEQ); + storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); storage_to_src_reg(&inst->SrcReg[0], n->Store); constant_to_src_reg(&inst->SrcReg[1], 0.0, emitInfo); - storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); inst->Comment = _mesa_strdup("Invert true/false"); } _slang_free_temp(emitInfo->vt, &accTemp); + _slang_free_temp(emitInfo->vt, &sneTemp); } /* free temps */ - free_temp_storage(emitInfo->vt, n->Children[0]); - free_temp_storage(emitInfo->vt, n->Children[1]); + free_node_storage(emitInfo->vt, n->Children[0]); + free_node_storage(emitInfo->vt, n->Children[1]); return inst; } @@ -827,9 +834,8 @@ emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n) } #endif - if (!n->Store) - if (!alloc_temp_storage(emitInfo, n, n->Children[0]->Store->Size)) - return NULL; + if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size)) + return NULL; emit(emitInfo, n->Children[1]); emit(emitInfo, n->Children[2]); @@ -839,7 +845,7 @@ emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n) * the intermediate result. Use a temp register instead. */ _mesa_bzero(&tmpNode, sizeof(tmpNode)); - alloc_temp_storage(emitInfo, &tmpNode, n->Store->Size); + alloc_node_storage(emitInfo, &tmpNode, n->Store->Size); /* tmp = max(ch[0], ch[1]) */ inst = new_instruction(emitInfo, OPCODE_MAX); @@ -853,7 +859,7 @@ emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n) storage_to_src_reg(&inst->SrcReg[0], tmpNode.Store); storage_to_src_reg(&inst->SrcReg[1], n->Children[2]->Store); - free_temp_storage(emitInfo->vt, &tmpNode); + free_node_storage(emitInfo->vt, &tmpNode); return inst; } @@ -870,9 +876,8 @@ emit_negation(slang_emit_info *emitInfo, slang_ir_node *n) emit(emitInfo, n->Children[0]); - if (!n->Store) - if (!alloc_temp_storage(emitInfo, n, n->Children[0]->Store->Size)) - return NULL; + if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size)) + return NULL; inst = new_instruction(emitInfo, OPCODE_MOV); storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); @@ -1014,9 +1019,8 @@ emit_tex(slang_emit_info *emitInfo, slang_ir_node *n) inst = new_instruction(emitInfo, OPCODE_TXP); } - if (!n->Store) - if (!alloc_temp_storage(emitInfo, n, 4)) - return NULL; + if (!alloc_node_storage(emitInfo, n, 4)) + return NULL; storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); @@ -1045,12 +1049,15 @@ emit_tex(slang_emit_info *emitInfo, slang_ir_node *n) } +/** + * Assignment/copy + */ static struct prog_instruction * -emit_move(slang_emit_info *emitInfo, slang_ir_node *n) +emit_copy(slang_emit_info *emitInfo, slang_ir_node *n) { struct prog_instruction *inst; - assert(n->Opcode == IR_MOVE); + assert(n->Opcode == IR_COPY); /* lhs */ emit(emitInfo, n->Children[0]); @@ -1115,7 +1122,7 @@ emit_move(slang_emit_info *emitInfo, slang_ir_node *n) srcStore.Size = 4; while (size >= 4) { inst = new_instruction(emitInfo, OPCODE_MOV); - inst->Comment = _mesa_strdup("IR_MOVE block"); + inst->Comment = _mesa_strdup("IR_COPY block"); storage_to_dst_reg(&inst->DstReg, &dstStore, n->Writemask); storage_to_src_reg(&inst->SrcReg[0], &srcStore); srcStore.Index++; @@ -1135,7 +1142,7 @@ emit_move(slang_emit_info *emitInfo, slang_ir_node *n) inst->Comment = instruction_annotation(inst->Opcode, dstAnnot, srcAnnot, NULL, NULL); } - free_temp_storage(emitInfo->vt, n->Children[1]); + free_node_storage(emitInfo->vt, n->Children[1]); return inst; } } @@ -1184,7 +1191,7 @@ emit_cond(slang_emit_info *emitInfo, slang_ir_node *n) * is normally generated for the expression "i". * Generate a move instruction just to set condition codes. */ - if (!alloc_temp_storage(emitInfo, n, 1)) + if (!alloc_node_storage(emitInfo, n, 1)) return NULL; inst = new_instruction(emitInfo, OPCODE_MOV); inst->CondUpdate = GL_TRUE; @@ -1240,15 +1247,14 @@ emit_not(slang_emit_info *emitInfo, slang_ir_node *n) #endif /* else, invert using SEQ (v = v == 0) */ - if (!n->Store) - if (!alloc_temp_storage(emitInfo, n, n->Children[0]->Store->Size)) - return NULL; + if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size)) + return NULL; inst = new_instruction(emitInfo, OPCODE_SEQ); storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store); constant_to_src_reg(&inst->SrcReg[1], 0.0, emitInfo); - free_temp_storage(emitInfo->vt, n->Children[0]); + free_node_storage(emitInfo->vt, n->Children[0]); inst->Comment = _mesa_strdup("NOT"); return inst; @@ -1731,27 +1737,15 @@ emit(slang_emit_info *emitInfo, slang_ir_node *n) case IR_SWIZZLE: return emit_swizzle(emitInfo, n); - case IR_I_TO_F: - /* just move */ - emit(emitInfo, n->Children[0]); - inst = new_instruction(emitInfo, OPCODE_MOV); - if (!n->Store) { - if (!alloc_temp_storage(emitInfo, n, 1)) - return NULL; - } - storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); - storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store); - if (emitInfo->EmitComments) - inst->Comment = _mesa_strdup("int to float"); - return NULL; - /* Simple arithmetic */ /* unary */ + case IR_MOVE: case IR_RSQ: case IR_RCP: case IR_FLOOR: case IR_FRAC: case IR_F_TO_I: + case IR_I_TO_F: case IR_ABS: case IR_SIN: case IR_COS: @@ -1808,8 +1802,8 @@ emit(slang_emit_info *emitInfo, slang_ir_node *n) } return NULL; - case IR_MOVE: - return emit_move(emitInfo, n); + case IR_COPY: + return emit_copy(emitInfo, n); case IR_COND: return emit_cond(emitInfo, n); diff --git a/src/mesa/shader/slang/slang_ir.c b/src/mesa/shader/slang/slang_ir.c index 23d554234ee..3a0b8bf3a0d 100644 --- a/src/mesa/shader/slang/slang_ir.c +++ b/src/mesa/shader/slang/slang_ir.c @@ -54,7 +54,8 @@ static const slang_ir_info IrInfo[] = { { IR_NOTEQUAL, "IR_NOTEQUAL", OPCODE_NOP, 1, 2 }, /* unary ops */ - { IR_I_TO_F, "IR_I_TO_F", OPCODE_NOP, 1, 1 }, + { IR_MOVE, "IR_MOVE", OPCODE_MOV, 4, 1 }, + { IR_I_TO_F, "IR_I_TO_F", OPCODE_MOV, 4, 1 }, /* int[4] to float[4] */ { IR_F_TO_I, "IR_F_TO_I", OPCODE_INT, 4, 1 }, /* 4 floats to 4 ints */ { IR_EXP, "IR_EXP", OPCODE_EXP, 1, 1 }, { IR_EXP2, "IR_EXP2", OPCODE_EX2, 1, 1 }, @@ -82,7 +83,7 @@ static const slang_ir_info IrInfo[] = { { IR_KILL, "IR_KILL", OPCODE_NOP, 0, 0 }, { IR_COND, "IR_COND", OPCODE_NOP, 0, 0 }, { IR_CALL, "IR_CALL", OPCODE_NOP, 0, 0 }, - { IR_MOVE, "IR_MOVE", OPCODE_NOP, 0, 1 }, + { IR_COPY, "IR_COPY", OPCODE_NOP, 0, 1 }, { IR_NOT, "IR_NOT", OPCODE_NOP, 1, 1 }, { IR_VAR, "IR_VAR", OPCODE_NOP, 0, 0 }, { IR_VAR_DECL, "IR_VAR_DECL", OPCODE_NOP, 0, 0 }, @@ -326,8 +327,8 @@ _slang_print_ir_tree(const slang_ir_node *n, int indent) assert(!n->Children[1]); _slang_print_ir_tree(n->Children[0], indent + 3); break; - case IR_MOVE: - printf("MOVE (writemask = %s)\n", writemask_string(n->Writemask)); + case IR_COPY: + printf("COPY (writemask = %s)\n", writemask_string(n->Writemask)); _slang_print_ir_tree(n->Children[0], indent+3); _slang_print_ir_tree(n->Children[1], indent+3); break; diff --git a/src/mesa/shader/slang/slang_ir.h b/src/mesa/shader/slang/slang_ir.h index e4697ba3b47..f64f9a93b78 100644 --- a/src/mesa/shader/slang/slang_ir.h +++ b/src/mesa/shader/slang/slang_ir.h @@ -70,14 +70,14 @@ typedef enum /* n->Parent = ptr to parent IR_LOOP Node */ IR_BREAK, /* break loop */ - IR_BREAK_IF_TRUE, + IR_BREAK_IF_TRUE, /**< Children[0] = the condition expression */ IR_CONT_IF_TRUE, - /* Children[0] = the condition expression */ - IR_MOVE, + IR_COPY, /**< assignment/copy */ + IR_MOVE, /**< assembly MOV instruction */ /* vector ops: */ - IR_ADD, + IR_ADD, /**< assembly ADD instruction */ IR_SUB, IR_MUL, IR_DIV, diff --git a/src/mesa/shader/slang/slang_vartable.c b/src/mesa/shader/slang/slang_vartable.c index 68b4e00be03..9b607e64037 100644 --- a/src/mesa/shader/slang/slang_vartable.c +++ b/src/mesa/shader/slang/slang_vartable.c @@ -110,9 +110,10 @@ _slang_pop_var_table(slang_var_table *vt) slang_ir_storage *store = (slang_ir_storage *) t->Vars[i]->aux; GLint j; GLuint comp; - if (dbg) printf(" Free var %s, size %d at %d\n", + if (dbg) printf(" Free var %s, size %d at %d.%s\n", (char*) t->Vars[i]->a_name, store->Size, - store->Index); + store->Index, + _mesa_swizzle_string(store->Swizzle, 0, 0)); if (store->Size == 1) comp = GET_SWZ(store->Swizzle, 0); @@ -159,7 +160,7 @@ _slang_add_variable(slang_var_table *vt, slang_variable *v) assert(vt); t = vt->Top; assert(t); - if (dbg) printf("Adding var %s\n", (char *) v->a_name); + if (dbg) printf("Adding var %s, store %p\n", (char *) v->a_name, v->aux); t->Vars = (slang_variable **) _slang_realloc(t->Vars, t->NumVars * sizeof(slang_variable *), @@ -262,10 +263,11 @@ _slang_alloc_var(slang_var_table *vt, slang_ir_storage *store) } if (dbg) - printf("Alloc var sz %d at %d.%s (level %d)\n", + printf("Alloc var storage sz %d at %d.%s (level %d) store %p\n", store->Size, store->Index, _mesa_swizzle_string(store->Swizzle, 0, 0), - t->Level); + t->Level, + (void*) store); return GL_TRUE; } @@ -283,19 +285,29 @@ _slang_alloc_temp(slang_var_table *vt, slang_ir_storage *store) if (i < 0) return GL_FALSE; + assert(store->Index < 0); + store->Index = i / 4; if (store->Size == 1) { const GLuint comp = i % 4; store->Swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); - if (dbg) printf("Alloc temp sz %d at %d.%c (level %d)\n", - store->Size, store->Index, "xyzw"[comp], t->Level); } else { /* XXX improve swizzled for size=2/3, use for writemask... */ +#if 1 + if (store->Size == 2) { + store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, + SWIZZLE_NIL, SWIZZLE_NIL); + } +#endif store->Swizzle = SWIZZLE_NOOP; - if (dbg) printf("Alloc temp sz %d at %d.xyzw (level %d)\n", - store->Size, store->Index, t->Level); } + + if (dbg) printf("Alloc temp sz %d at %d.%s (level %d) store %p\n", + store->Size, store->Index, + _mesa_swizzle_string(store->Swizzle, 0, 0), t->Level, + (void *) store); + return GL_TRUE; } @@ -309,7 +321,10 @@ _slang_free_temp(slang_var_table *vt, slang_ir_storage *store) assert(store->Size > 0); assert(r >= 0); assert(r + store->Size <= vt->MaxRegisters * 4); - if (dbg) printf("Free temp sz %d at %d (level %d)\n", store->Size, r, t->Level); + if (dbg) printf("Free temp sz %d at %d.%s (level %d) store %p\n", + store->Size, r, + _mesa_swizzle_string(store->Swizzle, 0, 0), + t->Level, (void *) store); if (store->Size == 1) { const GLuint comp = GET_SWZ(store->Swizzle, 0); /* we can actually fail some of these assertions because of the @@ -319,8 +334,8 @@ _slang_free_temp(slang_var_table *vt, slang_ir_storage *store) assert(store->Swizzle == MAKE_SWIZZLE4(comp, comp, comp, comp)); assert(comp < 4); assert(t->ValSize[r * 4 + comp] == 1); - assert(t->Temps[r * 4 + comp] == TEMP); #endif + assert(t->Temps[r * 4 + comp] == TEMP); t->Temps[r * 4 + comp] = FREE; } else {