nir/from_ssa: Don't lower constant SSA values to registers
[mesa.git] / src / glsl / builtin_functions.cpp
index 26ea9231f73c6deb7375401dfa03b807eb9d214d..bb7fbcdc17dbf1b147c0e126e5c4e0cfa86caa23 100644 (file)
 #include "program/prog_instruction.h"
 #include <limits>
 
+#define M_PIf   ((float) M_PI)
+#define M_PI_2f ((float) M_PI_2)
+#define M_PI_4f ((float) M_PI_4)
+
 using namespace ir_builder;
 
 /**
@@ -221,6 +225,14 @@ shader_packing_or_gpu_shader5(const _mesa_glsl_parse_state *state)
           gpu_shader5(state);
 }
 
+static bool
+fs_gpu_shader5(const _mesa_glsl_parse_state *state)
+{
+   return state->stage == MESA_SHADER_FRAGMENT &&
+          (state->is_version(400, 0) || state->ARB_gpu_shader5_enable);
+}
+
+
 static bool
 texture_array_lod(const _mesa_glsl_parse_state *state)
 {
@@ -305,6 +317,14 @@ fs_oes_derivatives(const _mesa_glsl_parse_state *state)
            state->OES_standard_derivatives_enable);
 }
 
+static bool
+fs_derivative_control(const _mesa_glsl_parse_state *state)
+{
+   return state->stage == MESA_SHADER_FRAGMENT &&
+          (state->is_version(450, 0) ||
+           state->ARB_derivative_control_enable);
+}
+
 static bool
 tex1d_lod(const _mesa_glsl_parse_state *state)
 {
@@ -355,6 +375,12 @@ shader_image_load_store(const _mesa_glsl_parse_state *state)
            state->ARB_shader_image_load_store_enable);
 }
 
+static bool
+gs_streams(const _mesa_glsl_parse_state *state)
+{
+   return gpu_shader5(state) && gs_only(state);
+}
+
 /** @} */
 
 /******************************************************************************/
@@ -416,6 +442,7 @@ private:
    ir_swizzle *matrix_elt(ir_variable *var, int col, int row);
 
    ir_expression *asin_expr(ir_variable *x);
+   void do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x);
 
    /**
     * Call function \param f with parameters specified as the linked
@@ -590,12 +617,22 @@ private:
 
    B0(EmitVertex)
    B0(EndPrimitive)
+   ir_function_signature *_EmitStreamVertex(builtin_available_predicate avail,
+                                            const glsl_type *stream_type);
+   ir_function_signature *_EndStreamPrimitive(builtin_available_predicate avail,
+                                              const glsl_type *stream_type);
 
    B2(textureQueryLod);
    B1(textureQueryLevels);
    B1(dFdx);
    B1(dFdy);
    B1(fwidth);
+   B1(dFdxCoarse);
+   B1(dFdyCoarse);
+   B1(fwidthCoarse);
+   B1(dFdxFine);
+   B1(dFdyFine);
+   B1(fwidthFine);
    B1(noise1);
    B1(noise2);
    B1(noise3);
@@ -613,6 +650,9 @@ private:
    B1(uaddCarry)
    B1(usubBorrow)
    B1(mulExtended)
+   B1(interpolateAtCentroid)
+   B1(interpolateAtOffset)
+   B1(interpolateAtSample)
 
    ir_function_signature *_atomic_intrinsic(builtin_available_predicate avail);
    ir_function_signature *_atomic_op(const char *intrinsic,
@@ -681,7 +721,8 @@ builtin_builder::find(_mesa_glsl_parse_state *state,
    if (f == NULL)
       return NULL;
 
-   ir_function_signature *sig = f->matching_signature(state, actual_parameters);
+   ir_function_signature *sig =
+      f->matching_signature(state, actual_parameters, true);
    if (sig == NULL)
       return NULL;
 
@@ -1704,6 +1745,14 @@ builtin_builder::create_builtins()
 
    add_function("EmitVertex",   _EmitVertex(),   NULL);
    add_function("EndPrimitive", _EndPrimitive(), NULL);
+   add_function("EmitStreamVertex",
+                _EmitStreamVertex(gs_streams, glsl_type::uint_type),
+                _EmitStreamVertex(gs_streams, glsl_type::int_type),
+                NULL);
+   add_function("EndStreamPrimitive",
+                _EndStreamPrimitive(gs_streams, glsl_type::uint_type),
+                _EndStreamPrimitive(gs_streams, glsl_type::int_type),
+                NULL);
 
    add_function("textureQueryLOD",
                 _textureQueryLod(glsl_type::sampler1D_type,  glsl_type::float_type),
@@ -1834,8 +1883,8 @@ builtin_builder::create_builtins()
                 NULL);
 
    add_function("texture2DProjLod",
-                _texture(ir_txl, v110_lod, glsl_type::vec4_type,  glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT),
-                _texture(ir_txl, v110_lod, glsl_type::vec4_type,  glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT),
+                _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type,  glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT),
+                _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type,  glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT),
                 NULL);
 
    add_function("texture3D",
@@ -1862,7 +1911,7 @@ builtin_builder::create_builtins()
                 NULL);
 
    add_function("textureCubeLod",
-                _texture(ir_txl, v110_lod, glsl_type::vec4_type,  glsl_type::samplerCube_type, glsl_type::vec3_type),
+                _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type,  glsl_type::samplerCube_type, glsl_type::vec3_type),
                 NULL);
 
    add_function("texture2DRect",
@@ -2114,6 +2163,12 @@ builtin_builder::create_builtins()
    F(dFdx)
    F(dFdy)
    F(fwidth)
+   F(dFdxCoarse)
+   F(dFdyCoarse)
+   F(fwidthCoarse)
+   F(dFdxFine)
+   F(dFdyFine)
+   F(fwidthFine)
    F(noise1)
    F(noise2)
    F(noise3)
@@ -2164,6 +2219,24 @@ builtin_builder::create_builtins()
                 _mulExtended(glsl_type::uvec3_type),
                 _mulExtended(glsl_type::uvec4_type),
                 NULL);
+   add_function("interpolateAtCentroid",
+                _interpolateAtCentroid(glsl_type::float_type),
+                _interpolateAtCentroid(glsl_type::vec2_type),
+                _interpolateAtCentroid(glsl_type::vec3_type),
+                _interpolateAtCentroid(glsl_type::vec4_type),
+                NULL);
+   add_function("interpolateAtOffset",
+                _interpolateAtOffset(glsl_type::float_type),
+                _interpolateAtOffset(glsl_type::vec2_type),
+                _interpolateAtOffset(glsl_type::vec3_type),
+                _interpolateAtOffset(glsl_type::vec4_type),
+                NULL);
+   add_function("interpolateAtSample",
+                _interpolateAtSample(glsl_type::float_type),
+                _interpolateAtSample(glsl_type::vec2_type),
+                _interpolateAtSample(glsl_type::vec3_type),
+                _interpolateAtSample(glsl_type::vec4_type),
+                NULL);
 
    add_function("atomicCounter",
                 _atomic_op("__intrinsic_atomic_read",
@@ -2538,11 +2611,11 @@ ir_expression *
 builtin_builder::asin_expr(ir_variable *x)
 {
    return mul(sign(x),
-              sub(imm(1.5707964f),
+              sub(imm(M_PI_2f),
                   mul(sqrt(sub(imm(1.0f), abs(x))),
-                      add(imm(1.5707964f),
+                      add(imm(M_PI_2f),
                           mul(abs(x),
-                              add(imm(-0.21460183f),
+                              add(imm(M_PI_4f - 1.0f),
                                   mul(abs(x),
                                       add(imm(0.086566724f),
                                           mul(abs(x), imm(-0.03102955f))))))))));
@@ -2553,8 +2626,7 @@ builtin_builder::call(ir_function *f, ir_variable *ret, exec_list params)
 {
    exec_list actual_params;
 
-   foreach_list(node, &params) {
-      ir_variable *var = (ir_variable *) node;
+   foreach_in_list(ir_variable, var, &params) {
       actual_params.push_tail(var_ref(var));
    }
 
@@ -2586,7 +2658,7 @@ builtin_builder::_acos(const glsl_type *type)
    ir_variable *x = in_var(type, "x");
    MAKE_SIG(type, always_available, 1, x);
 
-   body.emit(ret(sub(imm(1.5707964f), asin_expr(x))));
+   body.emit(ret(sub(imm(M_PI_2f), asin_expr(x))));
 
    return sig;
 }
@@ -2613,23 +2685,19 @@ builtin_builder::_atan2(const glsl_type *type)
       ir_factory outer_then(&outer_if->then_instructions, mem_ctx);
 
       /* Then...call atan(y/x) */
-      ir_variable *y_over_x = outer_then.make_temp(glsl_type::float_type, "y_over_x");
-      outer_then.emit(assign(y_over_x, div(y, x)));
-      outer_then.emit(assign(r, mul(y_over_x, rsq(add(mul(y_over_x, y_over_x),
-                                                      imm(1.0f))))));
-      outer_then.emit(assign(r, asin_expr(r)));
+      do_atan(body, glsl_type::float_type, r, div(y, x));
 
       /*     ...and fix it up: */
       ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f)));
       inner_if->then_instructions.push_tail(
          if_tree(gequal(y, imm(0.0f)),
-                 assign(r, add(r, imm(3.141593f))),
-                 assign(r, sub(r, imm(3.141593f)))));
+                 assign(r, add(r, imm(M_PIf))),
+                 assign(r, sub(r, imm(M_PIf)))));
       outer_then.emit(inner_if);
 
       /* Else... */
       outer_if->else_instructions.push_tail(
-         assign(r, mul(sign(y), imm(1.5707965f))));
+         assign(r, mul(sign(y), imm(M_PI_2f))));
 
       body.emit(outer_if);
 
@@ -2640,17 +2708,65 @@ builtin_builder::_atan2(const glsl_type *type)
    return sig;
 }
 
+void
+builtin_builder::do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x)
+{
+   /*
+    * range-reduction, first step:
+    *
+    *      / y_over_x         if |y_over_x| <= 1.0;
+    * x = <
+    *      \ 1.0 / y_over_x   otherwise
+    */
+   ir_variable *x = body.make_temp(type, "atan_x");
+   body.emit(assign(x, div(min2(abs(y_over_x),
+                                imm(1.0f)),
+                           max2(abs(y_over_x),
+                                imm(1.0f)))));
+
+   /*
+    * approximate atan by evaluating polynomial:
+    *
+    * x   * 0.9999793128310355 - x^3  * 0.3326756418091246 +
+    * x^5 * 0.1938924977115610 - x^7  * 0.1173503194786851 +
+    * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
+    */
+   ir_variable *tmp = body.make_temp(type, "atan_tmp");
+   body.emit(assign(tmp, mul(x, x)));
+   body.emit(assign(tmp, mul(add(mul(sub(mul(add(mul(sub(mul(add(mul(imm(-0.0121323213173444f),
+                                                                     tmp),
+                                                                 imm(0.0536813784310406f)),
+                                                             tmp),
+                                                         imm(0.1173503194786851f)),
+                                                     tmp),
+                                                 imm(0.1938924977115610f)),
+                                             tmp),
+                                         imm(0.3326756418091246f)),
+                                     tmp),
+                                 imm(0.9999793128310355f)),
+                             x)));
+
+   /* range-reduction fixup */
+   body.emit(assign(tmp, add(tmp,
+                             mul(b2f(greater(abs(y_over_x),
+                                          imm(1.0f, type->components()))),
+                                  add(mul(tmp,
+                                          imm(-2.0f)),
+                                      imm(M_PI_2f))))));
+
+   /* sign fixup */
+   body.emit(assign(res, mul(tmp, sign(y_over_x))));
+}
+
 ir_function_signature *
 builtin_builder::_atan(const glsl_type *type)
 {
    ir_variable *y_over_x = in_var(type, "y_over_x");
    MAKE_SIG(type, always_available, 1, y_over_x);
 
-   ir_variable *t = body.make_temp(type, "t");
-   body.emit(assign(t, mul(y_over_x, rsq(add(mul(y_over_x, y_over_x),
-                                             imm(1.0f))))));
-
-   body.emit(ret(asin_expr(t)));
+   ir_variable *tmp = body.make_temp(type, "tmp");
+   do_atan(body, type, tmp, y_over_x);
+   body.emit(ret(tmp));
 
    return sig;
 }
@@ -3868,7 +3984,28 @@ builtin_builder::_EmitVertex()
 {
    MAKE_SIG(glsl_type::void_type, gs_only, 0);
 
-   body.emit(new(mem_ctx) ir_emit_vertex());
+   ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1);
+   body.emit(new(mem_ctx) ir_emit_vertex(stream));
+
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_EmitStreamVertex(builtin_available_predicate avail,
+                                   const glsl_type *stream_type)
+{
+   /* Section 8.12 (Geometry Shader Functions) of the GLSL 4.0 spec says:
+    *
+    *     "Emit the current values of output variables to the current output
+    *     primitive on stream stream. The argument to stream must be a constant
+    *     integral expression."
+    */
+   ir_variable *stream =
+      new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in);
+
+   MAKE_SIG(glsl_type::void_type, avail, 1, stream);
+
+   body.emit(new(mem_ctx) ir_emit_vertex(var_ref(stream)));
 
    return sig;
 }
@@ -3878,7 +4015,28 @@ builtin_builder::_EndPrimitive()
 {
    MAKE_SIG(glsl_type::void_type, gs_only, 0);
 
-   body.emit(new(mem_ctx) ir_end_primitive());
+   ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1);
+   body.emit(new(mem_ctx) ir_end_primitive(stream));
+
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_EndStreamPrimitive(builtin_available_predicate avail,
+                                     const glsl_type *stream_type)
+{
+   /* Section 8.12 (Geometry Shader Functions) of the GLSL 4.0 spec says:
+    *
+    *     "Completes the current output primitive on stream stream and starts
+    *     a new one. The argument to stream must be a constant integral
+    *     expression."
+    */
+   ir_variable *stream =
+      new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in);
+
+   MAKE_SIG(glsl_type::void_type, avail, 1, stream);
+
+   body.emit(new(mem_ctx) ir_end_primitive(var_ref(stream)));
 
    return sig;
 }
@@ -3917,7 +4075,11 @@ builtin_builder::_textureQueryLevels(const glsl_type *sampler_type)
 }
 
 UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives)
+UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control)
+UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control)
 UNOP(dFdy, ir_unop_dFdy, fs_oes_derivatives)
+UNOP(dFdyCoarse, ir_unop_dFdy_coarse, fs_derivative_control)
+UNOP(dFdyFine, ir_unop_dFdy_fine, fs_derivative_control)
 
 ir_function_signature *
 builtin_builder::_fwidth(const glsl_type *type)
@@ -3930,6 +4092,30 @@ builtin_builder::_fwidth(const glsl_type *type)
    return sig;
 }
 
+ir_function_signature *
+builtin_builder::_fwidthCoarse(const glsl_type *type)
+{
+   ir_variable *p = in_var(type, "p");
+   MAKE_SIG(type, fs_derivative_control, 1, p);
+
+   body.emit(ret(add(abs(expr(ir_unop_dFdx_coarse, p)),
+                     abs(expr(ir_unop_dFdy_coarse, p)))));
+
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_fwidthFine(const glsl_type *type)
+{
+   ir_variable *p = in_var(type, "p");
+   MAKE_SIG(type, fs_derivative_control, 1, p);
+
+   body.emit(ret(add(abs(expr(ir_unop_dFdx_fine, p)),
+                     abs(expr(ir_unop_dFdy_fine, p)))));
+
+   return sig;
+}
+
 ir_function_signature *
 builtin_builder::_noise1(const glsl_type *type)
 {
@@ -4196,6 +4382,44 @@ builtin_builder::_mulExtended(const glsl_type *type)
    return sig;
 }
 
+ir_function_signature *
+builtin_builder::_interpolateAtCentroid(const glsl_type *type)
+{
+   ir_variable *interpolant = in_var(type, "interpolant");
+   interpolant->data.must_be_shader_input = 1;
+   MAKE_SIG(type, fs_gpu_shader5, 1, interpolant);
+
+   body.emit(ret(interpolate_at_centroid(interpolant)));
+
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_interpolateAtOffset(const glsl_type *type)
+{
+   ir_variable *interpolant = in_var(type, "interpolant");
+   interpolant->data.must_be_shader_input = 1;
+   ir_variable *offset = in_var(glsl_type::vec2_type, "offset");
+   MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, offset);
+
+   body.emit(ret(interpolate_at_offset(interpolant, offset)));
+
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_interpolateAtSample(const glsl_type *type)
+{
+   ir_variable *interpolant = in_var(type, "interpolant");
+   interpolant->data.must_be_shader_input = 1;
+   ir_variable *sample_num = in_var(glsl_type::int_type, "sample_num");
+   MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, sample_num);
+
+   body.emit(ret(interpolate_at_sample(interpolant, sample_num)));
+
+   return sig;
+}
+
 ir_function_signature *
 builtin_builder::_atomic_intrinsic(builtin_available_predicate avail)
 {
@@ -4286,9 +4510,11 @@ builtin_builder::_image_prototype(const glsl_type *image_type,
       sig->parameters.push_tail(in_var(glsl_type::int_type, "sample"));
 
    /* Data arguments. */
-   for (unsigned i = 0; i < num_arguments; ++i)
-      sig->parameters.push_tail(in_var(data_type,
-                                       ralloc_asprintf(NULL, "arg%d", i)));
+   for (unsigned i = 0; i < num_arguments; ++i) {
+      char *arg_name = ralloc_asprintf(NULL, "arg%d", i);
+      sig->parameters.push_tail(in_var(data_type, arg_name));
+      ralloc_free(arg_name);
+   }
 
    /* Set the maximal set of qualifiers allowed for this image
     * built-in.  Function calls with arguments having fewer
@@ -4297,11 +4523,11 @@ builtin_builder::_image_prototype(const glsl_type *image_type,
     * accept everything that needs to be accepted, and reject cases
     * like loads from write-only or stores to read-only images.
     */
-   image->data.image.read_only = flags & IMAGE_FUNCTION_READ_ONLY;
-   image->data.image.write_only = flags & IMAGE_FUNCTION_WRITE_ONLY;
-   image->data.image.coherent = true;
-   image->data.image._volatile = true;
-   image->data.image.restrict_flag = true;
+   image->data.image_read_only = (flags & IMAGE_FUNCTION_READ_ONLY) != 0;
+   image->data.image_write_only = (flags & IMAGE_FUNCTION_WRITE_ONLY) != 0;
+   image->data.image_coherent = true;
+   image->data.image_volatile = true;
+   image->data.image_restrict = true;
 
    return sig;
 }