From: Jacob Lifshay <programmerjake@gmail.com>
Date: Sun, 11 Jun 2017 10:34:57 +0000 (-0700)
Subject: shader fully translated
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8e87ee5b5cd4937b775173fe73f87ff0fdc1beff;p=kazan.git

shader fully translated
---

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f7d2a05
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+shader.bc
+shader.o
diff --git a/shader.cpp b/shader.cpp
index 9b9b612..0e93206 100644
--- a/shader.cpp
+++ b/shader.cpp
@@ -1,12 +1,46 @@
 #include <cstdint>
 #include <cmath>
+#include <limits>
 
-// shader translated from SuperTuxKart data/shaders/rh.frag
-// https://github.com/supertuxkart/stk-code/blob/20ea7ca2711f0cbe5320b4877a5d332b3b935893/data/shaders/rh.frag
+namespace shader
+{
+constexpr float max(float a, float b) noexcept
+{
+    return a > b ? a : b;
+}
 
-// From http://graphics.cs.aueb.gr/graphics/research_illumination.html
-// "Real-Time Diffuse Global Illumination Using Radiance Hints"
-// paper and shader code
+struct vec2
+{
+    float x;
+    float y;
+    vec2() = default;
+    constexpr vec2(float v) noexcept : x(v), y(v)
+    {
+    }
+    constexpr vec2(float x, float y) noexcept : x(x), y(y)
+    {
+    }
+    friend constexpr vec2 operator *(vec2 a, float b) noexcept
+    {
+        return vec2(a.x * b, a.y * b);
+    }
+    friend constexpr vec2 operator *(float a, vec2 b) noexcept
+    {
+        return vec2(a * b.x, a * b.y);
+    }
+    friend constexpr vec2 operator +(vec2 a, vec2 b) noexcept
+    {
+        return vec2(a.x + b.x, a.y + b.y);
+    }
+    friend constexpr vec2 operator -(vec2 a, vec2 b) noexcept
+    {
+        return vec2(a.x - b.x, a.y - b.y);
+    }
+    friend constexpr vec2 operator /(vec2 a, vec2 b) noexcept
+    {
+        return vec2(a.x / b.x, a.y / b.y);
+    }
+};
 
 struct vec3
 {
@@ -17,15 +51,433 @@ struct vec3
     constexpr vec3(float v) noexcept : x(v), y(v), z(v)
     {
     }
+    constexpr vec3(float x, float y, float z) noexcept : x(x), y(y), z(z)
+    {
+    }
+    constexpr vec3(vec2 xy, float z) noexcept : x(xy.x), y(xy.y), z(z)
+    {
+    }
+    constexpr vec3 xzy() const noexcept
+    {
+        return vec3(x, z, y);
+    }
+    constexpr vec3 xyz() const noexcept
+    {
+        return vec3(x, y, z);
+    }
+    constexpr vec3 rgb() const noexcept
+    {
+        return vec3(x, y, z);
+    }
+    constexpr float r() const noexcept
+    {
+        return x;
+    }
+    constexpr float g() const noexcept
+    {
+        return y;
+    }
+    constexpr float b() const noexcept
+    {
+        return z;
+    }
+    friend constexpr vec3 operator /(vec3 a, vec3 b) noexcept
+    {
+        return vec3(a.x / b.x, a.y / b.y, a.z / b.z);
+    }
+    friend constexpr vec3 operator *(vec3 a, vec3 b) noexcept
+    {
+        return vec3(a.x * b.x, a.y * b.y, a.z * b.z);
+    }
+    friend constexpr vec3 operator +(vec3 a, vec3 b) noexcept
+    {
+        return vec3(a.x + b.x, a.y + b.y, a.z + b.z);
+    }
+    friend constexpr vec3 operator -(vec3 a, vec3 b) noexcept
+    {
+        return vec3(a.x - b.x, a.y - b.y, a.z - b.z);
+    }
 };
 
+struct vec4
+{
+    float x;
+    float y;
+    float z;
+    float w;
+    vec4() = default;
+    constexpr vec4(float v) noexcept : x(v), y(v), z(v), w(v)
+    {
+    }
+    constexpr vec4(float x, float y, float z, float w) noexcept : x(x), y(y), z(z), w(w)
+    {
+    }
+    constexpr vec4(vec2 xy, float z, float w) noexcept : x(xy.x), y(xy.y), z(z), w(w)
+    {
+    }
+    constexpr vec4(vec3 xyz, float w) noexcept : x(xyz.x), y(xyz.y), z(xyz.z), w(w)
+    {
+    }
+    constexpr vec3 xyz() const noexcept
+    {
+        return {x, y, z};
+    }
+    constexpr vec2 xy() const noexcept
+    {
+        return {x, y};
+    }
+    friend constexpr vec4 operator *(vec4 a, float b) noexcept
+    {
+        return vec4(a.x * b, a.y * b, a.z * b, a.w * b);
+    }
+    friend constexpr vec4 operator *(float a, vec4 b) noexcept
+    {
+        return vec4(a * b.x, a * b.y, a * b.z, a * b.w);
+    }
+    friend constexpr vec4 operator /(vec4 a, float b) noexcept
+    {
+        return vec4(a.x / b, a.y / b, a.z / b, a.w / b);
+    }
+    constexpr vec4 &operator /=(float v) noexcept
+    {
+        return *this = *this / v;
+    }
+    friend constexpr vec4 operator +(vec4 a, vec4 b) noexcept
+    {
+        return vec4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
+    }
+    friend constexpr vec4 operator -(vec4 a, vec4 b) noexcept
+    {
+        return vec4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
+    }
+    constexpr vec4 &operator +=(vec4 v) noexcept
+    {
+        return *this = *this + v;
+    }
+};
+
+constexpr float dot(vec3 a, vec3 b) noexcept
+{
+    return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+
+inline float length(vec3 v) noexcept
+{
+    return std::sqrt(dot(v, v));
+}
+
+inline float distance(vec3 a, vec3 b) noexcept
+{
+    return length(a - b);
+}
+
+inline vec3 normalize(vec3 v) noexcept
+{
+    return v / length(v);
+}
+
+struct mat4
+{
+    float values[4][4] = {{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {0, 0, 0, 1}};
+    constexpr mat4() noexcept {}
+    constexpr mat4(float value_0_0, float value_0_1, float value_0_2, float value_0_3,
+                   float value_1_0, float value_1_1, float value_1_2, float value_1_3,
+                   float value_2_0, float value_2_1, float value_2_2, float value_2_3,
+                   float value_3_0, float value_3_1, float value_3_2, float value_3_3) noexcept
+        : values{
+            {value_0_0, value_0_1, value_0_2, value_0_3},
+            {value_1_0, value_1_1, value_1_2, value_1_3},
+            {value_2_0, value_2_1, value_2_2, value_2_3},
+            {value_3_0, value_3_1, value_3_2, value_3_3},
+        }
+    {
+    }
+    friend constexpr mat4 operator *(float a, const mat4 &b) noexcept
+    {
+        return mat4(a * b.values[0][0], a * b.values[0][1], a * b.values[0][2], a * b.values[0][3],
+                    a * b.values[1][0], a * b.values[1][1], a * b.values[1][2], a * b.values[1][3],
+                    a * b.values[2][0], a * b.values[2][1], a * b.values[2][2], a * b.values[2][3],
+                    a * b.values[3][0], a * b.values[3][1], a * b.values[3][2], a * b.values[3][3]);
+    }
+    friend constexpr mat4 operator *(const mat4 &a, float b) noexcept
+    {
+        return mat4(a.values[0][0] * b, a.values[0][1] * b, a.values[0][2] * b, a.values[0][3] * b,
+                    a.values[1][0] * b, a.values[1][1] * b, a.values[1][2] * b, a.values[1][3] * b,
+                    a.values[2][0] * b, a.values[2][1] * b, a.values[2][2] * b, a.values[2][3] * b,
+                    a.values[3][0] * b, a.values[3][1] * b, a.values[3][2] * b, a.values[3][3] * b);
+    }
+    friend constexpr vec4 operator *(const mat4 &m, vec4 v) noexcept
+    {
+        return vec4(m.values[0][2]*v.z+m.values[0][1]*v.y+m.values[0][0]*v.x
+                         +m.values[0][3]*v.w,
+ m.values[1][2]*v.z+m.values[1][1]*v.y+m.values[1][0]*v.x
+                         +m.values[1][3]*v.w,
+ m.values[2][2]*v.z+m.values[2][1]*v.y+m.values[2][0]*v.x
+                         +m.values[2][3]*v.w,
+ m.values[3][2]*v.z+m.values[3][1]*v.y+m.values[3][0]*v.x
+                         +m.values[3][3]*v.w);
+    }
+};
+
+constexpr float determinant(const mat4 &m) noexcept
+{
+    return ((m.values[0][1]*m.values[1][2]
+ -m.values[0][2]*m.values[1][1])
+ *m.values[2][0]
+ +(m.values[0][2]*m.values[1][0]
+  -m.values[0][0]*m.values[1][2])
+  *m.values[2][1]
+ +(m.values[0][0]*m.values[1][1]
+  -m.values[0][1]*m.values[1][0])
+  *m.values[2][2])
+ *m.values[3][3]
+ +((m.values[0][3]*m.values[1][1]
+  -m.values[0][1]*m.values[1][3])
+  *m.values[2][0]
+  +(m.values[0][0]*m.values[1][3]
+   -m.values[0][3]*m.values[1][0])
+   *m.values[2][1]
+  +(m.values[0][1]*m.values[1][0]
+   -m.values[0][0]*m.values[1][1])
+   *m.values[2][3])
+  *m.values[3][2]
+ +((m.values[0][2]*m.values[1][3]
+  -m.values[0][3]*m.values[1][2])
+  *m.values[2][0]
+  +(m.values[0][3]*m.values[1][0]
+   -m.values[0][0]*m.values[1][3])
+   *m.values[2][2]
+  +(m.values[0][0]*m.values[1][2]
+   -m.values[0][2]*m.values[1][0])
+   *m.values[2][3])
+  *m.values[3][1]
+ +((m.values[0][3]*m.values[1][2]
+  -m.values[0][2]*m.values[1][3])
+  *m.values[2][1]
+  +(m.values[0][1]*m.values[1][3]
+   -m.values[0][3]*m.values[1][1])
+   *m.values[2][2]
+  +(m.values[0][2]*m.values[1][1]
+   -m.values[0][1]*m.values[1][2])
+   *m.values[2][3])
+  *m.values[3][0];
+}
+
+constexpr mat4 inverse(const mat4 &m) noexcept
+{
+    return 1.0f / determinant(m) * mat4((m.values[1][1]*m.values[2][2]
+         -m.values[1][2]*m.values[2][1])
+         *m.values[3][3]
+         +(m.values[1][3]*m.values[2][1]
+          -m.values[1][1]*m.values[2][3])
+          *m.values[3][2]
+         +(m.values[1][2]*m.values[2][3]
+          -m.values[1][3]*m.values[2][2])
+          *m.values[3][1],
+        (m.values[0][2]*m.values[2][1]
+         -m.values[0][1]*m.values[2][2])
+         *m.values[3][3]
+         +(m.values[0][1]*m.values[2][3]
+          -m.values[0][3]*m.values[2][1])
+          *m.values[3][2]
+         +(m.values[0][3]*m.values[2][2]
+          -m.values[0][2]*m.values[2][3])
+          *m.values[3][1],
+        (m.values[0][1]*m.values[1][2]
+         -m.values[0][2]*m.values[1][1])
+         *m.values[3][3]
+         +(m.values[0][3]*m.values[1][1]
+          -m.values[0][1]*m.values[1][3])
+          *m.values[3][2]
+         +(m.values[0][2]*m.values[1][3]
+          -m.values[0][3]*m.values[1][2])
+          *m.values[3][1],
+        (m.values[0][2]*m.values[1][1]
+         -m.values[0][1]*m.values[1][2])
+         *m.values[2][3]
+         +(m.values[0][1]*m.values[1][3]
+          -m.values[0][3]*m.values[1][1])
+          *m.values[2][2]
+         +(m.values[0][3]*m.values[1][2]
+          -m.values[0][2]*m.values[1][3])
+          *m.values[2][1],
+        (m.values[1][2]*m.values[2][0]
+         -m.values[1][0]*m.values[2][2])
+         *m.values[3][3]
+         +(m.values[1][0]*m.values[2][3]
+          -m.values[1][3]*m.values[2][0])
+          *m.values[3][2]
+         +(m.values[1][3]*m.values[2][2]
+          -m.values[1][2]*m.values[2][3])
+          *m.values[3][0],
+        (m.values[0][0]*m.values[2][2]
+         -m.values[0][2]*m.values[2][0])
+         *m.values[3][3]
+         +(m.values[0][3]*m.values[2][0]
+          -m.values[0][0]*m.values[2][3])
+          *m.values[3][2]
+         +(m.values[0][2]*m.values[2][3]
+          -m.values[0][3]*m.values[2][2])
+          *m.values[3][0],
+        (m.values[0][2]*m.values[1][0]
+         -m.values[0][0]*m.values[1][2])
+         *m.values[3][3]
+         +(m.values[0][0]*m.values[1][3]
+          -m.values[0][3]*m.values[1][0])
+          *m.values[3][2]
+         +(m.values[0][3]*m.values[1][2]
+          -m.values[0][2]*m.values[1][3])
+          *m.values[3][0],
+        (m.values[0][0]*m.values[1][2]
+         -m.values[0][2]*m.values[1][0])
+         *m.values[2][3]
+         +(m.values[0][3]*m.values[1][0]
+          -m.values[0][0]*m.values[1][3])
+          *m.values[2][2]
+         +(m.values[0][2]*m.values[1][3]
+          -m.values[0][3]*m.values[1][2])
+          *m.values[2][0],
+        (m.values[1][0]*m.values[2][1]
+         -m.values[1][1]*m.values[2][0])
+         *m.values[3][3]
+         +(m.values[1][3]*m.values[2][0]
+          -m.values[1][0]*m.values[2][3])
+          *m.values[3][1]
+         +(m.values[1][1]*m.values[2][3]
+          -m.values[1][3]*m.values[2][1])
+          *m.values[3][0],
+        (m.values[0][1]*m.values[2][0]
+         -m.values[0][0]*m.values[2][1])
+         *m.values[3][3]
+         +(m.values[0][0]*m.values[2][3]
+          -m.values[0][3]*m.values[2][0])
+          *m.values[3][1]
+         +(m.values[0][3]*m.values[2][1]
+          -m.values[0][1]*m.values[2][3])
+          *m.values[3][0],
+        (m.values[0][0]*m.values[1][1]
+         -m.values[0][1]*m.values[1][0])
+         *m.values[3][3]
+         +(m.values[0][3]*m.values[1][0]
+          -m.values[0][0]*m.values[1][3])
+          *m.values[3][1]
+         +(m.values[0][1]*m.values[1][3]
+          -m.values[0][3]*m.values[1][1])
+          *m.values[3][0],
+        (m.values[0][1]*m.values[1][0]
+         -m.values[0][0]*m.values[1][1])
+         *m.values[2][3]
+         +(m.values[0][0]*m.values[1][3]
+          -m.values[0][3]*m.values[1][0])
+          *m.values[2][1]
+         +(m.values[0][3]*m.values[1][1]
+          -m.values[0][1]*m.values[1][3])
+          *m.values[2][0],
+        (m.values[1][1]*m.values[2][0]
+         -m.values[1][0]*m.values[2][1])
+         *m.values[3][2]
+         +(m.values[1][0]*m.values[2][2]
+          -m.values[1][2]*m.values[2][0])
+          *m.values[3][1]
+         +(m.values[1][2]*m.values[2][1]
+          -m.values[1][1]*m.values[2][2])
+          *m.values[3][0],
+        (m.values[0][0]*m.values[2][1]
+         -m.values[0][1]*m.values[2][0])
+         *m.values[3][2]
+         +(m.values[0][2]*m.values[2][0]
+          -m.values[0][0]*m.values[2][2])
+          *m.values[3][1]
+         +(m.values[0][1]*m.values[2][2]
+          -m.values[0][2]*m.values[2][1])
+          *m.values[3][0],
+        (m.values[0][1]*m.values[1][0]
+         -m.values[0][0]*m.values[1][1])
+         *m.values[3][2]
+         +(m.values[0][0]*m.values[1][2]
+          -m.values[0][2]*m.values[1][0])
+          *m.values[3][1]
+         +(m.values[0][2]*m.values[1][1]
+          -m.values[0][1]*m.values[1][2])
+          *m.values[3][0],
+        (m.values[0][0]*m.values[1][1]
+         -m.values[0][1]*m.values[1][0])
+         *m.values[2][2]
+         +(m.values[0][2]*m.values[1][0]
+          -m.values[0][0]*m.values[1][2])
+          *m.values[2][1]
+         +(m.values[0][1]*m.values[1][2]
+          -m.values[0][2]*m.values[1][1])
+          *m.values[2][0]);
+}
+
+struct pixel
+{
+    std::uint8_t r, g, b, a;
+    constexpr operator vec4() const noexcept
+    {
+        constexpr float scale_factor = 1.0 / std::numeric_limits<std::uint8_t>::max();
+        return vec4(r, g, b, a) * scale_factor;
+    }
+};
+
+struct sampler2D
+{
+    const pixel *pixels;
+    std::size_t width;
+    std::size_t height;
+    vec4 get_pixel_int(int x, int y) const noexcept
+    {
+        if(x < 0)
+            x = 0;
+        else if(static_cast<std::size_t>(x) > width - 1)
+            x = width - 1;
+        if(y < 0)
+            y = 0;
+        else if(static_cast<std::size_t>(y) > height - 1)
+            y = height - 1;
+        return pixels[static_cast<std::size_t>(x) + width * static_cast<std::size_t>(y)];
+    }
+    vec4 get_pixel(vec2 position) const noexcept
+    {
+        // bilinear interpolation
+        int min_x = position.x; // works if position.x >= 0
+        int max_x = min_x + 1;
+        position.x -= min_x;
+        int min_y = position.y; // works if position.y >= 0
+        int max_y = min_y + 1;
+        position.y -= min_y;
+        vec4 min_min_value = get_pixel_int(min_x, min_y);
+        vec4 max_min_value = get_pixel_int(max_x, min_y);
+        vec4 min_max_value = get_pixel_int(min_x, max_y);
+        vec4 max_max_value = get_pixel_int(max_x, max_y);
+        vec4 min_interp_value = min_min_value + position.y * (min_max_value - min_min_value);
+        vec4 max_interp_value = max_min_value + position.y * (max_max_value - max_min_value);
+        return min_interp_value + position.x * (max_interp_value - min_interp_value);
+    }
+};
+
+vec4 texture(const sampler2D &sampler, vec2 uv) noexcept
+{
+    return sampler.get_pixel(uv);
+}
+
+extern vec4 gl_FragCoord;
+
+// shader translated from SuperTuxKart data/shaders/rh.frag
+// https://github.com/supertuxkart/stk-code/blob/20ea7ca2711f0cbe5320b4877a5d332b3b935893/data/shaders/rh.frag
+
+// From http://graphics.cs.aueb.gr/graphics/research_illumination.html
+// "Real-Time Diffuse Global Illumination Using Radiance Hints"
+// paper and shader code
+
 float R_wcs = 10.f;            // Rmax: maximum sampling distance (in WCS units)
 vec3 extents;
 mat4 RHMatrix;
 mat4 RSMMatrix;
-sampler2D dtex;
-sampler2D ctex;
-sampler2D ntex;
+extern sampler2D dtex;
+extern sampler2D ctex;
+extern sampler2D ntex;
 vec3 suncol;
 
 int slice;
@@ -66,37 +518,37 @@ static void loop(int i,
     float depth = texture(dtex, uv).x;
     vec4 RSMPos = inverse(RSMMatrix) * (2.f * vec4(uv, depth, 1.f) - 1.f);
     RSMPos /= RSMPos.w;
-    vec3 RSMAlbedo = texture(ctex, uv).xyz;
-    vec3 normal = normalize(2.f * texture(ntex, uv).xyz - 1.f);
+    vec3 RSMAlbedo = texture(ctex, uv).xyz();
+    vec3 normal = normalize(2.f * texture(ntex, uv).xyz() - 1.f);
 
     // Sampled location inside the RH cell
     vec3 offset3d = vec3(uv, 0);
-    vec3 SamplePos = RHcenter + .5f * offset3d.xzy * RHCellSize;
+    vec3 SamplePos = RHcenter + .5f * offset3d.xzy() * RHCellSize;
 
     // Normalize distance to RSM sample
-    float dist = distance(SamplePos, RSMPos.xyz) / R_wcs;
+    float dist = distance(SamplePos, RSMPos.xyz()) / R_wcs;
     // Determine the incident direction.
     // Avoid very close samples (and numerical instability problems)
-    vec3 RSM_to_RH_dir = (dist <= 0.1f) ? vec3(0.) : normalize(SamplePos - RSMPos.xyz);
-    float dotprod = max(dot(RSM_to_RH_dir, normal.xyz), 0.f);
+    vec3 RSM_to_RH_dir = (dist <= 0.1f) ? vec3(0.) : normalize(SamplePos - RSMPos.xyz());
+    float dotprod = max(dot(RSM_to_RH_dir, normal.xyz()), 0.f);
     float factor = dotprod / (0.1f + dist * dist);
 
-    vec3 color = RSMAlbedo.rgb * factor * suncol.rgb;
+    vec3 color = RSMAlbedo.rgb() * factor * suncol.rgb();
 
-    SHr += DirToSh(RSM_to_RH_dir, color.r);
-    SHg += DirToSh(RSM_to_RH_dir, color.g);
-    SHb += DirToSh(RSM_to_RH_dir, color.b);
+    SHr += DirToSh(RSM_to_RH_dir, color.r());
+    SHg += DirToSh(RSM_to_RH_dir, color.g());
+    SHb += DirToSh(RSM_to_RH_dir, color.b());
 }
 
-void shader_main(void) noexcept
+extern "C" void shader_main(void) noexcept
 {
-    vec3 normalizedRHCenter = 2.f * vec3(gl_FragCoord.xy, slice) / resolution - 1.f;
-    vec3 RHcenter = (RHMatrix * vec4(normalizedRHCenter * extents, 1.f)).xyz;
+    vec3 normalizedRHCenter = 2.f * vec3(gl_FragCoord.xy(), slice) / resolution - 1.f;
+    vec3 RHcenter = (RHMatrix * vec4(normalizedRHCenter * extents, 1.f)).xyz();
 
     vec4 ShadowProjectedRH = RSMMatrix * vec4(RHcenter, 1.f);
 
     vec3 RHCellSize = extents / resolution;
-    vec2 RHuv = .5f * ShadowProjectedRH.xy / ShadowProjectedRH.w + .5f;
+    vec2 RHuv = .5f * ShadowProjectedRH.xy() / ShadowProjectedRH.w + .5f;
     float RHdepth = .5f * ShadowProjectedRH.z / ShadowProjectedRH.w + .5f;
 
     vec4  SHr = vec4(0.f);
@@ -131,3 +583,4 @@ void shader_main(void) noexcept
     SHGreen = SHg;
     SHBlue = SHb;
 }
+}