From: Jacob Lifshay Date: Sun, 11 Jun 2017 10:34:57 +0000 (-0700) Subject: shader fully translated X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8e87ee5b5cd4937b775173fe73f87ff0fdc1beff;p=kazan.git shader fully translated --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f7d2a05 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +shader.bc +shader.o diff --git a/shader.cpp b/shader.cpp index 9b9b612..0e93206 100644 --- a/shader.cpp +++ b/shader.cpp @@ -1,12 +1,46 @@ #include #include +#include -// shader translated from SuperTuxKart data/shaders/rh.frag -// https://github.com/supertuxkart/stk-code/blob/20ea7ca2711f0cbe5320b4877a5d332b3b935893/data/shaders/rh.frag +namespace shader +{ +constexpr float max(float a, float b) noexcept +{ + return a > b ? a : b; +} -// From http://graphics.cs.aueb.gr/graphics/research_illumination.html -// "Real-Time Diffuse Global Illumination Using Radiance Hints" -// paper and shader code +struct vec2 +{ + float x; + float y; + vec2() = default; + constexpr vec2(float v) noexcept : x(v), y(v) + { + } + constexpr vec2(float x, float y) noexcept : x(x), y(y) + { + } + friend constexpr vec2 operator *(vec2 a, float b) noexcept + { + return vec2(a.x * b, a.y * b); + } + friend constexpr vec2 operator *(float a, vec2 b) noexcept + { + return vec2(a * b.x, a * b.y); + } + friend constexpr vec2 operator +(vec2 a, vec2 b) noexcept + { + return vec2(a.x + b.x, a.y + b.y); + } + friend constexpr vec2 operator -(vec2 a, vec2 b) noexcept + { + return vec2(a.x - b.x, a.y - b.y); + } + friend constexpr vec2 operator /(vec2 a, vec2 b) noexcept + { + return vec2(a.x / b.x, a.y / b.y); + } +}; struct vec3 { @@ -17,15 +51,433 @@ struct vec3 constexpr vec3(float v) noexcept : x(v), y(v), z(v) { } + constexpr vec3(float x, float y, float z) noexcept : x(x), y(y), z(z) + { + } + constexpr vec3(vec2 xy, float z) noexcept : x(xy.x), y(xy.y), z(z) + { + } + constexpr vec3 xzy() const noexcept + { + return vec3(x, z, y); + } + constexpr vec3 xyz() const noexcept + { + return vec3(x, y, z); + } + constexpr vec3 rgb() const noexcept + { + return vec3(x, y, z); + } + constexpr float r() const noexcept + { + return x; + } + constexpr float g() const noexcept + { + return y; + } + constexpr float b() const noexcept + { + return z; + } + friend constexpr vec3 operator /(vec3 a, vec3 b) noexcept + { + return vec3(a.x / b.x, a.y / b.y, a.z / b.z); + } + friend constexpr vec3 operator *(vec3 a, vec3 b) noexcept + { + return vec3(a.x * b.x, a.y * b.y, a.z * b.z); + } + friend constexpr vec3 operator +(vec3 a, vec3 b) noexcept + { + return vec3(a.x + b.x, a.y + b.y, a.z + b.z); + } + friend constexpr vec3 operator -(vec3 a, vec3 b) noexcept + { + return vec3(a.x - b.x, a.y - b.y, a.z - b.z); + } }; +struct vec4 +{ + float x; + float y; + float z; + float w; + vec4() = default; + constexpr vec4(float v) noexcept : x(v), y(v), z(v), w(v) + { + } + constexpr vec4(float x, float y, float z, float w) noexcept : x(x), y(y), z(z), w(w) + { + } + constexpr vec4(vec2 xy, float z, float w) noexcept : x(xy.x), y(xy.y), z(z), w(w) + { + } + constexpr vec4(vec3 xyz, float w) noexcept : x(xyz.x), y(xyz.y), z(xyz.z), w(w) + { + } + constexpr vec3 xyz() const noexcept + { + return {x, y, z}; + } + constexpr vec2 xy() const noexcept + { + return {x, y}; + } + friend constexpr vec4 operator *(vec4 a, float b) noexcept + { + return vec4(a.x * b, a.y * b, a.z * b, a.w * b); + } + friend constexpr vec4 operator *(float a, vec4 b) noexcept + { + return vec4(a * b.x, a * b.y, a * b.z, a * b.w); + } + friend constexpr vec4 operator /(vec4 a, float b) noexcept + { + return vec4(a.x / b, a.y / b, a.z / b, a.w / b); + } + constexpr vec4 &operator /=(float v) noexcept + { + return *this = *this / v; + } + friend constexpr vec4 operator +(vec4 a, vec4 b) noexcept + { + return vec4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + friend constexpr vec4 operator -(vec4 a, vec4 b) noexcept + { + return vec4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + } + constexpr vec4 &operator +=(vec4 v) noexcept + { + return *this = *this + v; + } +}; + +constexpr float dot(vec3 a, vec3 b) noexcept +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} + +inline float length(vec3 v) noexcept +{ + return std::sqrt(dot(v, v)); +} + +inline float distance(vec3 a, vec3 b) noexcept +{ + return length(a - b); +} + +inline vec3 normalize(vec3 v) noexcept +{ + return v / length(v); +} + +struct mat4 +{ + float values[4][4] = {{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {0, 0, 0, 1}}; + constexpr mat4() noexcept {} + constexpr mat4(float value_0_0, float value_0_1, float value_0_2, float value_0_3, + float value_1_0, float value_1_1, float value_1_2, float value_1_3, + float value_2_0, float value_2_1, float value_2_2, float value_2_3, + float value_3_0, float value_3_1, float value_3_2, float value_3_3) noexcept + : values{ + {value_0_0, value_0_1, value_0_2, value_0_3}, + {value_1_0, value_1_1, value_1_2, value_1_3}, + {value_2_0, value_2_1, value_2_2, value_2_3}, + {value_3_0, value_3_1, value_3_2, value_3_3}, + } + { + } + friend constexpr mat4 operator *(float a, const mat4 &b) noexcept + { + return mat4(a * b.values[0][0], a * b.values[0][1], a * b.values[0][2], a * b.values[0][3], + a * b.values[1][0], a * b.values[1][1], a * b.values[1][2], a * b.values[1][3], + a * b.values[2][0], a * b.values[2][1], a * b.values[2][2], a * b.values[2][3], + a * b.values[3][0], a * b.values[3][1], a * b.values[3][2], a * b.values[3][3]); + } + friend constexpr mat4 operator *(const mat4 &a, float b) noexcept + { + return mat4(a.values[0][0] * b, a.values[0][1] * b, a.values[0][2] * b, a.values[0][3] * b, + a.values[1][0] * b, a.values[1][1] * b, a.values[1][2] * b, a.values[1][3] * b, + a.values[2][0] * b, a.values[2][1] * b, a.values[2][2] * b, a.values[2][3] * b, + a.values[3][0] * b, a.values[3][1] * b, a.values[3][2] * b, a.values[3][3] * b); + } + friend constexpr vec4 operator *(const mat4 &m, vec4 v) noexcept + { + return vec4(m.values[0][2]*v.z+m.values[0][1]*v.y+m.values[0][0]*v.x + +m.values[0][3]*v.w, + m.values[1][2]*v.z+m.values[1][1]*v.y+m.values[1][0]*v.x + +m.values[1][3]*v.w, + m.values[2][2]*v.z+m.values[2][1]*v.y+m.values[2][0]*v.x + +m.values[2][3]*v.w, + m.values[3][2]*v.z+m.values[3][1]*v.y+m.values[3][0]*v.x + +m.values[3][3]*v.w); + } +}; + +constexpr float determinant(const mat4 &m) noexcept +{ + return ((m.values[0][1]*m.values[1][2] + -m.values[0][2]*m.values[1][1]) + *m.values[2][0] + +(m.values[0][2]*m.values[1][0] + -m.values[0][0]*m.values[1][2]) + *m.values[2][1] + +(m.values[0][0]*m.values[1][1] + -m.values[0][1]*m.values[1][0]) + *m.values[2][2]) + *m.values[3][3] + +((m.values[0][3]*m.values[1][1] + -m.values[0][1]*m.values[1][3]) + *m.values[2][0] + +(m.values[0][0]*m.values[1][3] + -m.values[0][3]*m.values[1][0]) + *m.values[2][1] + +(m.values[0][1]*m.values[1][0] + -m.values[0][0]*m.values[1][1]) + *m.values[2][3]) + *m.values[3][2] + +((m.values[0][2]*m.values[1][3] + -m.values[0][3]*m.values[1][2]) + *m.values[2][0] + +(m.values[0][3]*m.values[1][0] + -m.values[0][0]*m.values[1][3]) + *m.values[2][2] + +(m.values[0][0]*m.values[1][2] + -m.values[0][2]*m.values[1][0]) + *m.values[2][3]) + *m.values[3][1] + +((m.values[0][3]*m.values[1][2] + -m.values[0][2]*m.values[1][3]) + *m.values[2][1] + +(m.values[0][1]*m.values[1][3] + -m.values[0][3]*m.values[1][1]) + *m.values[2][2] + +(m.values[0][2]*m.values[1][1] + -m.values[0][1]*m.values[1][2]) + *m.values[2][3]) + *m.values[3][0]; +} + +constexpr mat4 inverse(const mat4 &m) noexcept +{ + return 1.0f / determinant(m) * mat4((m.values[1][1]*m.values[2][2] + -m.values[1][2]*m.values[2][1]) + *m.values[3][3] + +(m.values[1][3]*m.values[2][1] + -m.values[1][1]*m.values[2][3]) + *m.values[3][2] + +(m.values[1][2]*m.values[2][3] + -m.values[1][3]*m.values[2][2]) + *m.values[3][1], + (m.values[0][2]*m.values[2][1] + -m.values[0][1]*m.values[2][2]) + *m.values[3][3] + +(m.values[0][1]*m.values[2][3] + -m.values[0][3]*m.values[2][1]) + *m.values[3][2] + +(m.values[0][3]*m.values[2][2] + -m.values[0][2]*m.values[2][3]) + *m.values[3][1], + (m.values[0][1]*m.values[1][2] + -m.values[0][2]*m.values[1][1]) + *m.values[3][3] + +(m.values[0][3]*m.values[1][1] + -m.values[0][1]*m.values[1][3]) + *m.values[3][2] + +(m.values[0][2]*m.values[1][3] + -m.values[0][3]*m.values[1][2]) + *m.values[3][1], + (m.values[0][2]*m.values[1][1] + -m.values[0][1]*m.values[1][2]) + *m.values[2][3] + +(m.values[0][1]*m.values[1][3] + -m.values[0][3]*m.values[1][1]) + *m.values[2][2] + +(m.values[0][3]*m.values[1][2] + -m.values[0][2]*m.values[1][3]) + *m.values[2][1], + (m.values[1][2]*m.values[2][0] + -m.values[1][0]*m.values[2][2]) + *m.values[3][3] + +(m.values[1][0]*m.values[2][3] + -m.values[1][3]*m.values[2][0]) + *m.values[3][2] + +(m.values[1][3]*m.values[2][2] + -m.values[1][2]*m.values[2][3]) + *m.values[3][0], + (m.values[0][0]*m.values[2][2] + -m.values[0][2]*m.values[2][0]) + *m.values[3][3] + +(m.values[0][3]*m.values[2][0] + -m.values[0][0]*m.values[2][3]) + *m.values[3][2] + +(m.values[0][2]*m.values[2][3] + -m.values[0][3]*m.values[2][2]) + *m.values[3][0], + (m.values[0][2]*m.values[1][0] + -m.values[0][0]*m.values[1][2]) + *m.values[3][3] + +(m.values[0][0]*m.values[1][3] + -m.values[0][3]*m.values[1][0]) + *m.values[3][2] + +(m.values[0][3]*m.values[1][2] + -m.values[0][2]*m.values[1][3]) + *m.values[3][0], + (m.values[0][0]*m.values[1][2] + -m.values[0][2]*m.values[1][0]) + *m.values[2][3] + +(m.values[0][3]*m.values[1][0] + -m.values[0][0]*m.values[1][3]) + *m.values[2][2] + +(m.values[0][2]*m.values[1][3] + -m.values[0][3]*m.values[1][2]) + *m.values[2][0], + (m.values[1][0]*m.values[2][1] + -m.values[1][1]*m.values[2][0]) + *m.values[3][3] + +(m.values[1][3]*m.values[2][0] + -m.values[1][0]*m.values[2][3]) + *m.values[3][1] + +(m.values[1][1]*m.values[2][3] + -m.values[1][3]*m.values[2][1]) + *m.values[3][0], + (m.values[0][1]*m.values[2][0] + -m.values[0][0]*m.values[2][1]) + *m.values[3][3] + +(m.values[0][0]*m.values[2][3] + -m.values[0][3]*m.values[2][0]) + *m.values[3][1] + +(m.values[0][3]*m.values[2][1] + -m.values[0][1]*m.values[2][3]) + *m.values[3][0], + (m.values[0][0]*m.values[1][1] + -m.values[0][1]*m.values[1][0]) + *m.values[3][3] + +(m.values[0][3]*m.values[1][0] + -m.values[0][0]*m.values[1][3]) + *m.values[3][1] + +(m.values[0][1]*m.values[1][3] + -m.values[0][3]*m.values[1][1]) + *m.values[3][0], + (m.values[0][1]*m.values[1][0] + -m.values[0][0]*m.values[1][1]) + *m.values[2][3] + +(m.values[0][0]*m.values[1][3] + -m.values[0][3]*m.values[1][0]) + *m.values[2][1] + +(m.values[0][3]*m.values[1][1] + -m.values[0][1]*m.values[1][3]) + *m.values[2][0], + (m.values[1][1]*m.values[2][0] + -m.values[1][0]*m.values[2][1]) + *m.values[3][2] + +(m.values[1][0]*m.values[2][2] + -m.values[1][2]*m.values[2][0]) + *m.values[3][1] + +(m.values[1][2]*m.values[2][1] + -m.values[1][1]*m.values[2][2]) + *m.values[3][0], + (m.values[0][0]*m.values[2][1] + -m.values[0][1]*m.values[2][0]) + *m.values[3][2] + +(m.values[0][2]*m.values[2][0] + -m.values[0][0]*m.values[2][2]) + *m.values[3][1] + +(m.values[0][1]*m.values[2][2] + -m.values[0][2]*m.values[2][1]) + *m.values[3][0], + (m.values[0][1]*m.values[1][0] + -m.values[0][0]*m.values[1][1]) + *m.values[3][2] + +(m.values[0][0]*m.values[1][2] + -m.values[0][2]*m.values[1][0]) + *m.values[3][1] + +(m.values[0][2]*m.values[1][1] + -m.values[0][1]*m.values[1][2]) + *m.values[3][0], + (m.values[0][0]*m.values[1][1] + -m.values[0][1]*m.values[1][0]) + *m.values[2][2] + +(m.values[0][2]*m.values[1][0] + -m.values[0][0]*m.values[1][2]) + *m.values[2][1] + +(m.values[0][1]*m.values[1][2] + -m.values[0][2]*m.values[1][1]) + *m.values[2][0]); +} + +struct pixel +{ + std::uint8_t r, g, b, a; + constexpr operator vec4() const noexcept + { + constexpr float scale_factor = 1.0 / std::numeric_limits::max(); + return vec4(r, g, b, a) * scale_factor; + } +}; + +struct sampler2D +{ + const pixel *pixels; + std::size_t width; + std::size_t height; + vec4 get_pixel_int(int x, int y) const noexcept + { + if(x < 0) + x = 0; + else if(static_cast(x) > width - 1) + x = width - 1; + if(y < 0) + y = 0; + else if(static_cast(y) > height - 1) + y = height - 1; + return pixels[static_cast(x) + width * static_cast(y)]; + } + vec4 get_pixel(vec2 position) const noexcept + { + // bilinear interpolation + int min_x = position.x; // works if position.x >= 0 + int max_x = min_x + 1; + position.x -= min_x; + int min_y = position.y; // works if position.y >= 0 + int max_y = min_y + 1; + position.y -= min_y; + vec4 min_min_value = get_pixel_int(min_x, min_y); + vec4 max_min_value = get_pixel_int(max_x, min_y); + vec4 min_max_value = get_pixel_int(min_x, max_y); + vec4 max_max_value = get_pixel_int(max_x, max_y); + vec4 min_interp_value = min_min_value + position.y * (min_max_value - min_min_value); + vec4 max_interp_value = max_min_value + position.y * (max_max_value - max_min_value); + return min_interp_value + position.x * (max_interp_value - min_interp_value); + } +}; + +vec4 texture(const sampler2D &sampler, vec2 uv) noexcept +{ + return sampler.get_pixel(uv); +} + +extern vec4 gl_FragCoord; + +// shader translated from SuperTuxKart data/shaders/rh.frag +// https://github.com/supertuxkart/stk-code/blob/20ea7ca2711f0cbe5320b4877a5d332b3b935893/data/shaders/rh.frag + +// From http://graphics.cs.aueb.gr/graphics/research_illumination.html +// "Real-Time Diffuse Global Illumination Using Radiance Hints" +// paper and shader code + float R_wcs = 10.f; // Rmax: maximum sampling distance (in WCS units) vec3 extents; mat4 RHMatrix; mat4 RSMMatrix; -sampler2D dtex; -sampler2D ctex; -sampler2D ntex; +extern sampler2D dtex; +extern sampler2D ctex; +extern sampler2D ntex; vec3 suncol; int slice; @@ -66,37 +518,37 @@ static void loop(int i, float depth = texture(dtex, uv).x; vec4 RSMPos = inverse(RSMMatrix) * (2.f * vec4(uv, depth, 1.f) - 1.f); RSMPos /= RSMPos.w; - vec3 RSMAlbedo = texture(ctex, uv).xyz; - vec3 normal = normalize(2.f * texture(ntex, uv).xyz - 1.f); + vec3 RSMAlbedo = texture(ctex, uv).xyz(); + vec3 normal = normalize(2.f * texture(ntex, uv).xyz() - 1.f); // Sampled location inside the RH cell vec3 offset3d = vec3(uv, 0); - vec3 SamplePos = RHcenter + .5f * offset3d.xzy * RHCellSize; + vec3 SamplePos = RHcenter + .5f * offset3d.xzy() * RHCellSize; // Normalize distance to RSM sample - float dist = distance(SamplePos, RSMPos.xyz) / R_wcs; + float dist = distance(SamplePos, RSMPos.xyz()) / R_wcs; // Determine the incident direction. // Avoid very close samples (and numerical instability problems) - vec3 RSM_to_RH_dir = (dist <= 0.1f) ? vec3(0.) : normalize(SamplePos - RSMPos.xyz); - float dotprod = max(dot(RSM_to_RH_dir, normal.xyz), 0.f); + vec3 RSM_to_RH_dir = (dist <= 0.1f) ? vec3(0.) : normalize(SamplePos - RSMPos.xyz()); + float dotprod = max(dot(RSM_to_RH_dir, normal.xyz()), 0.f); float factor = dotprod / (0.1f + dist * dist); - vec3 color = RSMAlbedo.rgb * factor * suncol.rgb; + vec3 color = RSMAlbedo.rgb() * factor * suncol.rgb(); - SHr += DirToSh(RSM_to_RH_dir, color.r); - SHg += DirToSh(RSM_to_RH_dir, color.g); - SHb += DirToSh(RSM_to_RH_dir, color.b); + SHr += DirToSh(RSM_to_RH_dir, color.r()); + SHg += DirToSh(RSM_to_RH_dir, color.g()); + SHb += DirToSh(RSM_to_RH_dir, color.b()); } -void shader_main(void) noexcept +extern "C" void shader_main(void) noexcept { - vec3 normalizedRHCenter = 2.f * vec3(gl_FragCoord.xy, slice) / resolution - 1.f; - vec3 RHcenter = (RHMatrix * vec4(normalizedRHCenter * extents, 1.f)).xyz; + vec3 normalizedRHCenter = 2.f * vec3(gl_FragCoord.xy(), slice) / resolution - 1.f; + vec3 RHcenter = (RHMatrix * vec4(normalizedRHCenter * extents, 1.f)).xyz(); vec4 ShadowProjectedRH = RSMMatrix * vec4(RHcenter, 1.f); vec3 RHCellSize = extents / resolution; - vec2 RHuv = .5f * ShadowProjectedRH.xy / ShadowProjectedRH.w + .5f; + vec2 RHuv = .5f * ShadowProjectedRH.xy() / ShadowProjectedRH.w + .5f; float RHdepth = .5f * ShadowProjectedRH.z / ShadowProjectedRH.w + .5f; vec4 SHr = vec4(0.f); @@ -131,3 +583,4 @@ void shader_main(void) noexcept SHGreen = SHg; SHBlue = SHb; } +}