*/
#include "main/macros.h"
+#include "main/mtypes.h"
#include "compiler/glsl_types.h"
#include "ir.h"
#include "ir_builder.h"
#include "ir_rvalue_visitor.h"
#include "util/half_float.h"
#include "util/set.h"
+#include "util/hash_table.h"
#include <vector>
namespace {
class find_precision_visitor : public ir_rvalue_enter_visitor {
public:
- find_precision_visitor();
+ find_precision_visitor(const struct gl_shader_compiler_options *options);
~find_precision_visitor();
virtual void handle_rvalue(ir_rvalue **rvalue);
+ virtual ir_visitor_status visit_enter(ir_call *ir);
- bool progress;
+ ir_function_signature *map_builtin(ir_function_signature *sig);
/* Set of rvalues that can be lowered. This will be filled in by
* find_lowerable_rvalues_visitor. Only the root node of a lowerable section
* will be added to this set.
*/
struct set *lowerable_rvalues;
+
+ /**
+ * A mapping of builtin signature functions to lowered versions. This is
+ * filled in lazily when a lowered version is needed.
+ */
+ struct hash_table *lowered_builtins;
+ /**
+ * A temporary hash table only used in order to clone functions.
+ */
+ struct hash_table *clone_ht;
+
+ void *lowered_builtin_mem_ctx;
+
+ const struct gl_shader_compiler_options *options;
};
class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
std::vector<ir_instruction *> lowerable_children;
};
- find_lowerable_rvalues_visitor(struct set *result);
+ find_lowerable_rvalues_visitor(struct set *result,
+ const struct gl_shader_compiler_options *options);
static void stack_enter(class ir_instruction *ir, void *data);
static void stack_leave(class ir_instruction *ir, void *data);
virtual ir_visitor_status visit_leave(ir_assignment *ir);
virtual ir_visitor_status visit_leave(ir_call *ir);
- static can_lower_state handle_precision(const glsl_type *type,
- int precision);
+ can_lower_state handle_precision(const glsl_type *type,
+ int precision) const;
static parent_relation get_parent_relation(ir_instruction *parent,
ir_instruction *child);
std::vector<stack_entry> stack;
struct set *lowerable_rvalues;
+ const struct gl_shader_compiler_options *options;
void pop_stack_entry();
void add_lowerable_children(const stack_entry &entry);
virtual ir_visitor_status visit_leave(ir_expression *);
};
-bool
-can_lower_type(const glsl_type *type)
+static bool
+can_lower_type(const struct gl_shader_compiler_options *options,
+ const glsl_type *type)
{
/* Don’t lower any expressions involving non-float types except bool and
* texture samplers. This will rule out operations that change the type such
* boolean types so that it will do comparisons as 16-bit.
*/
- switch (type->base_type) {
- case GLSL_TYPE_FLOAT:
+ switch (type->without_array()->base_type) {
+ /* TODO: should we do anything for these two with regard to Int16 vs FP16
+ * support?
+ */
case GLSL_TYPE_BOOL:
case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
return true;
+ case GLSL_TYPE_FLOAT:
+ return options->LowerPrecisionFloat16;
+
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ return options->LowerPrecisionInt16;
+
default:
return false;
}
}
-find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res)
+find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
+ const struct gl_shader_compiler_options *opts)
{
lowerable_rvalues = res;
+ options = opts;
callback_enter = stack_enter;
callback_leave = stack_leave;
data_enter = this;
void
find_lowerable_rvalues_visitor::pop_stack_entry()
{
- const stack_entry &entry = stack.end()[-1];
+ const stack_entry &entry = stack.back();
if (stack.size() >= 2) {
/* Combine this state into the parent state, unless the parent operation
enum find_lowerable_rvalues_visitor::can_lower_state
find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
- int precision)
+ int precision) const
{
- if (!can_lower_type(type))
+ if (!can_lower_type(options, type))
return CANT_LOWER;
switch (precision) {
{
stack_enter(ir, this);
- if (!can_lower_type(ir->type))
- stack.end()[-1].state = CANT_LOWER;
+ if (!can_lower_type(options, ir->type))
+ stack.back().state = CANT_LOWER;
stack_leave(ir, this);
{
stack_enter(ir, this);
- if (stack.end()[-1].state == UNKNOWN)
- stack.end()[-1].state = handle_precision(ir->type, ir->precision());
+ if (stack.back().state == UNKNOWN)
+ stack.back().state = handle_precision(ir->type, ir->precision());
stack_leave(ir, this);
{
ir_hierarchical_visitor::visit_enter(ir);
- if (stack.end()[-1].state == UNKNOWN)
- stack.end()[-1].state = handle_precision(ir->type, ir->precision());
+ if (stack.back().state == UNKNOWN)
+ stack.back().state = handle_precision(ir->type, ir->precision());
return visit_continue;
}
{
ir_hierarchical_visitor::visit_enter(ir);
- if (stack.end()[-1].state == UNKNOWN)
- stack.end()[-1].state = handle_precision(ir->type, ir->precision());
+ if (stack.back().state == UNKNOWN)
+ stack.back().state = handle_precision(ir->type, ir->precision());
return visit_continue;
}
{
ir_hierarchical_visitor::visit_enter(ir);
- if (stack.end()[-1].state == UNKNOWN) {
- /* The precision of the sample value depends on the precision of the
- * sampler.
- */
- stack.end()[-1].state = handle_precision(ir->type,
- ir->sampler->precision());
- }
-
+ /* The precision of the sample value depends on the precision of the
+ * sampler.
+ */
+ stack.back().state = handle_precision(ir->type,
+ ir->sampler->precision());
return visit_continue;
}
{
ir_hierarchical_visitor::visit_enter(ir);
- if (!can_lower_type(ir->type))
- stack.end()[-1].state = CANT_LOWER;
+ if (!can_lower_type(options, ir->type))
+ stack.back().state = CANT_LOWER;
/* Don't lower precision for derivative calculations */
- if (ir->operation == ir_unop_dFdx ||
- ir->operation == ir_unop_dFdx_coarse ||
- ir->operation == ir_unop_dFdx_fine ||
- ir->operation == ir_unop_dFdy ||
- ir->operation == ir_unop_dFdy_coarse ||
- ir->operation == ir_unop_dFdy_fine) {
- stack.end()[-1].state = CANT_LOWER;
+ if (!options->LowerPrecisionDerivatives &&
+ (ir->operation == ir_unop_dFdx ||
+ ir->operation == ir_unop_dFdx_coarse ||
+ ir->operation == ir_unop_dFdx_fine ||
+ ir->operation == ir_unop_dFdy ||
+ ir->operation == ir_unop_dFdy_coarse ||
+ ir->operation == ir_unop_dFdy_fine)) {
+ stack.back().state = CANT_LOWER;
}
return visit_continue;
}
+static bool
+function_always_returns_mediump_or_lowp(const char *name)
+{
+ return !strcmp(name, "bitCount") ||
+ !strcmp(name, "findLSB") ||
+ !strcmp(name, "findMSB") ||
+ !strcmp(name, "unpackHalf2x16") ||
+ !strcmp(name, "unpackUnorm4x8") ||
+ !strcmp(name, "unpackSnorm4x8");
+}
+
+static unsigned
+handle_call(ir_call *ir, const struct set *lowerable_rvalues)
+{
+ /* The intrinsic call is inside the wrapper imageLoad function that will
+ * be inlined. We have to handle both of them.
+ */
+ if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
+ (ir->callee->is_builtin() &&
+ !strcmp(ir->callee_name(), "imageLoad"))) {
+ ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
+ ir_variable *resource = param->variable_referenced();
+
+ assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
+ assert(resource->type->without_array()->is_image());
+
+ /* GLSL ES 3.20 requires that images have a precision modifier, but if
+ * you set one, it doesn't do anything, because all intrinsics are
+ * defined with highp. This seems to be a spec bug.
+ *
+ * In theory we could set the return value to mediump if the image
+ * format has a lower precision. This appears to be the most sensible
+ * thing to do.
+ */
+ const struct util_format_description *desc =
+ util_format_description(resource->data.image_format);
+ int i =
+ util_format_get_first_non_void_channel(resource->data.image_format);
+ bool mediump;
+
+ assert(i >= 0);
+
+ if (desc->channel[i].pure_integer ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
+ mediump = desc->channel[i].size <= 16;
+ else
+ mediump = desc->channel[i].size <= 10; /* unorm/snorm */
+
+ return mediump ? GLSL_PRECISION_MEDIUM : GLSL_PRECISION_HIGH;
+ }
+
+ /* Return the declared precision for user-defined functions. */
+ if (!ir->callee->is_builtin())
+ return ir->callee->return_precision;
+
+ /* Handle special calls. */
+ if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
+ ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
+ ir_variable *var = param->variable_referenced();
+
+ /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
+ * be inlined by lower_precision() if we return true here, so that we can
+ * get to ir_texture later and do proper lowering.
+ *
+ * We should lower the type of the return value if the sampler type
+ * uses lower precision. The function parameters don't matter.
+ */
+ if (var && var->type->without_array()->is_sampler()) {
+ /* textureSize always returns highp. */
+ if (!strcmp(ir->callee_name(), "textureSize"))
+ return GLSL_PRECISION_HIGH;
+
+ return var->data.precision;
+ }
+ }
+
+ if (/* Parameters are always highp: */
+ !strcmp(ir->callee_name(), "floatBitsToInt") ||
+ !strcmp(ir->callee_name(), "floatBitsToUint") ||
+ !strcmp(ir->callee_name(), "intBitsToFloat") ||
+ !strcmp(ir->callee_name(), "uintBitsToFloat") ||
+ !strcmp(ir->callee_name(), "bitfieldReverse") ||
+ !strcmp(ir->callee_name(), "frexp") ||
+ !strcmp(ir->callee_name(), "ldexp") ||
+ /* Parameters and outputs are always highp: */
+ /* TODO: The operations are highp, but carry and borrow outputs are lowp. */
+ !strcmp(ir->callee_name(), "uaddCarry") ||
+ !strcmp(ir->callee_name(), "usubBorrow") ||
+ !strcmp(ir->callee_name(), "imulExtended") ||
+ !strcmp(ir->callee_name(), "umulExtended") ||
+ !strcmp(ir->callee_name(), "unpackUnorm2x16") ||
+ !strcmp(ir->callee_name(), "unpackSnorm2x16") ||
+ /* Outputs are highp: */
+ !strcmp(ir->callee_name(), "packUnorm2x16") ||
+ !strcmp(ir->callee_name(), "packSnorm2x16") ||
+ /* Parameters are mediump and outputs are highp. The parameters should
+ * be optimized in NIR, not here, e.g:
+ * - packHalf2x16 can just be a bitcast from f16vec2 to uint32
+ * - Other opcodes don't have to convert parameters to highp if the hw
+ * has f16 versions. Optimize in NIR accordingly.
+ */
+ !strcmp(ir->callee_name(), "packHalf2x16") ||
+ !strcmp(ir->callee_name(), "packUnorm4x8") ||
+ !strcmp(ir->callee_name(), "packSnorm4x8") ||
+ /* Atomic functions are not lowered. */
+ strstr(ir->callee_name(), "atomic") == ir->callee_name())
+ return GLSL_PRECISION_HIGH;
+
+ assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
+
+ /* Number of parameters to check if they are lowerable. */
+ unsigned check_parameters = ir->actual_parameters.length();
+
+ /* Interpolation functions only consider the precision of the interpolant. */
+ /* Bitfield functions ignore the precision of "offset" and "bits". */
+ if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
+ !strcmp(ir->callee_name(), "interpolateAtSample") ||
+ !strcmp(ir->callee_name(), "bitfieldExtract")) {
+ check_parameters = 1;
+ } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
+ check_parameters = 2;
+ } if (function_always_returns_mediump_or_lowp(ir->callee_name())) {
+ /* These only lower the return value. Parameters keep their precision,
+ * which is preserved in map_builtin.
+ */
+ check_parameters = 0;
+ }
+
+ /* If the call is to a builtin, then the function won’t have a return
+ * precision and we should determine it from the precision of the arguments.
+ */
+ foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
+ if (!check_parameters)
+ break;
+
+ if (!param->as_constant() &&
+ _mesa_set_search(lowerable_rvalues, param) == NULL)
+ return GLSL_PRECISION_HIGH;
+
+ --check_parameters;
+ }
+
+ return GLSL_PRECISION_MEDIUM;
+}
+
ir_visitor_status
find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
{
assert(var->data.mode == ir_var_temporary);
+ unsigned return_precision = handle_call(ir, lowerable_rvalues);
+
can_lower_state lower_state =
- handle_precision(var->type, ir->callee->return_precision);
+ handle_precision(var->type, return_precision);
if (lower_state == SHOULD_LOWER) {
/* There probably shouldn’t be any situations where multiple ir_call
}
void
-find_lowerable_rvalues(exec_list *instructions,
+find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
+ exec_list *instructions,
struct set *result)
{
- find_lowerable_rvalues_visitor v(result);
+ find_lowerable_rvalues_visitor v(result, options);
visit_list_elements(&v, instructions);
assert(v.stack.empty());
}
+static const glsl_type *
+convert_type(bool up, const glsl_type *type)
+{
+ if (type->is_array()) {
+ return glsl_type::get_array_instance(convert_type(up, type->fields.array),
+ type->array_size(),
+ type->explicit_stride);
+ }
+
+ glsl_base_type new_base_type;
+
+ if (up) {
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT16:
+ new_base_type = GLSL_TYPE_FLOAT;
+ break;
+ case GLSL_TYPE_INT16:
+ new_base_type = GLSL_TYPE_INT;
+ break;
+ case GLSL_TYPE_UINT16:
+ new_base_type = GLSL_TYPE_UINT;
+ break;
+ default:
+ unreachable("invalid type");
+ return NULL;
+ }
+ } else {
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ new_base_type = GLSL_TYPE_FLOAT16;
+ break;
+ case GLSL_TYPE_INT:
+ new_base_type = GLSL_TYPE_INT16;
+ break;
+ case GLSL_TYPE_UINT:
+ new_base_type = GLSL_TYPE_UINT16;
+ break;
+ default:
+ unreachable("invalid type");
+ return NULL;
+ }
+ }
+
+ return glsl_type::get_instance(new_base_type,
+ type->vector_elements,
+ type->matrix_columns,
+ type->explicit_stride,
+ type->interface_row_major);
+}
+
+static const glsl_type *
+lower_glsl_type(const glsl_type *type)
+{
+ return convert_type(false, type);
+}
+
static ir_rvalue *
-convert_precision(int op, ir_rvalue *ir)
+convert_precision(bool up, ir_rvalue *ir)
{
- unsigned base_type = (op == ir_unop_f2fmp ?
- GLSL_TYPE_FLOAT16 : GLSL_TYPE_FLOAT);
- const glsl_type *desired_type;
- desired_type = glsl_type::get_instance(base_type,
- ir->type->vector_elements,
- ir->type->matrix_columns);
+ unsigned op;
+
+ if (up) {
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT16:
+ op = ir_unop_f162f;
+ break;
+ case GLSL_TYPE_INT16:
+ op = ir_unop_i2i;
+ break;
+ case GLSL_TYPE_UINT16:
+ op = ir_unop_u2u;
+ break;
+ default:
+ unreachable("invalid type");
+ return NULL;
+ }
+ } else {
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ op = ir_unop_f2fmp;
+ break;
+ case GLSL_TYPE_INT:
+ op = ir_unop_i2imp;
+ break;
+ case GLSL_TYPE_UINT:
+ op = ir_unop_u2ump;
+ break;
+ default:
+ unreachable("invalid type");
+ return NULL;
+ }
+ }
+ const glsl_type *desired_type = convert_type(up, ir->type);
void *mem_ctx = ralloc_parent(ir);
return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
}
if (ir->as_dereference()) {
if (!ir->type->is_boolean())
- *rvalue = convert_precision(ir_unop_f2fmp, ir);
- } else if (ir->type->is_float()) {
- ir->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16,
- ir->type->vector_elements,
- ir->type->matrix_columns,
- ir->type->explicit_stride,
- ir->type->interface_row_major);
+ *rvalue = convert_precision(false, ir);
+ } else if (ir->type->is_32bit()) {
+ ir->type = lower_glsl_type(ir->type);
ir_constant *const_ir = ir->as_constant();
if (const_ir) {
ir_constant_data value;
- for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
- value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
+ if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
+ for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
+ value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
+ } else if (ir->type->base_type == GLSL_TYPE_INT16) {
+ for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
+ value.i16[i] = const_ir->value.i[i];
+ } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
+ for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
+ value.u16[i] = const_ir->value.u[i];
+ } else {
+ unreachable("invalid type");
+ }
const_ir->value = value;
}
case ir_unop_f2b:
ir->operation = ir_unop_f162b;
break;
+ case ir_unop_b2i:
+ case ir_unop_i2b:
+ /* Nothing to do - they both support int16. */
+ break;
default:
break;
}
/* Checking the precision of rvalue can be lowered first throughout
* find_lowerable_rvalues_visitor.
* Once it found the precision of rvalue can be lowered, then we can
- * add conversion f2fmp through lower_precision_visitor.
+ * add conversion f2fmp, etc. through lower_precision_visitor.
*/
if (*rvalue == NULL)
return;
/* We don’t need to add the final conversion if the final type has been
* converted to bool
*/
- if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL)
- *rvalue = convert_precision(ir_unop_f162f, *rvalue);
+ if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
+ *rvalue = convert_precision(true, *rvalue);
+ }
+}
+
+ir_visitor_status
+find_precision_visitor::visit_enter(ir_call *ir)
+{
+ ir_rvalue_enter_visitor::visit_enter(ir);
+
+ ir_variable *return_var =
+ ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
+
+ /* Don't do anything for image_load here. We have only changed the return
+ * value to mediump/lowp, so that following instructions can use reduced
+ * precision.
+ *
+ * The return value type of the intrinsic itself isn't changed here, but
+ * can be changed in NIR if all users use the *2*mp opcode.
+ */
+ if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
+ return visit_continue;
+
+ /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
+ * overrode the precision of the temporary return variable, then we can
+ * replace the builtin implementation with a lowered version.
+ */
+
+ if (!ir->callee->is_builtin() ||
+ ir->callee->is_intrinsic() ||
+ return_var == NULL ||
+ (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
+ return_var->data.precision != GLSL_PRECISION_LOW))
+ return visit_continue;
- progress = true;
+ ir->callee = map_builtin(ir->callee);
+ ir->generate_inline(ir);
+ ir->remove();
+
+ return visit_continue_with_parent;
+}
+
+ir_function_signature *
+find_precision_visitor::map_builtin(ir_function_signature *sig)
+{
+ if (lowered_builtins == NULL) {
+ lowered_builtins = _mesa_pointer_hash_table_create(NULL);
+ clone_ht =_mesa_pointer_hash_table_create(NULL);
+ lowered_builtin_mem_ctx = ralloc_context(NULL);
+ } else {
+ struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
+ if (entry)
+ return (ir_function_signature *) entry->data;
+ }
+
+ ir_function_signature *lowered_sig =
+ sig->clone(lowered_builtin_mem_ctx, clone_ht);
+
+ /* Functions that always return mediump or lowp should keep their
+ * parameters intact, because they can be highp. NIR can lower
+ * the up-conversion for parameters if needed.
+ */
+ if (!function_always_returns_mediump_or_lowp(sig->function_name())) {
+ foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
+ param->data.precision = GLSL_PRECISION_MEDIUM;
+ }
+ }
+
+ lower_precision(options, &lowered_sig->body);
+
+ _mesa_hash_table_clear(clone_ht, NULL);
+
+ _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
+
+ return lowered_sig;
}
-find_precision_visitor::find_precision_visitor()
- : progress(false),
- lowerable_rvalues(_mesa_pointer_set_create(NULL))
+find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
+ : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
+ lowered_builtins(NULL),
+ clone_ht(NULL),
+ lowered_builtin_mem_ctx(NULL),
+ options(options)
{
}
find_precision_visitor::~find_precision_visitor()
{
_mesa_set_destroy(lowerable_rvalues, NULL);
+
+ if (lowered_builtins) {
+ _mesa_hash_table_destroy(lowered_builtins, NULL);
+ _mesa_hash_table_destroy(clone_ht, NULL);
+ ralloc_free(lowered_builtin_mem_ctx);
+ }
+}
+
+/* Lowering opcodes to 16 bits is not enough for programs with control flow
+ * (and the ?: operator, which is represented by if-then-else in the IR),
+ * because temporary variables, which are used for passing values between
+ * code blocks, are not lowered, resulting in 32-bit phis in NIR.
+ *
+ * First change the variable types to 16 bits, then change all ir_dereference
+ * types to 16 bits.
+ */
+class lower_variables_visitor : public ir_rvalue_enter_visitor {
+public:
+ lower_variables_visitor(const struct gl_shader_compiler_options *options)
+ : options(options) {
+ lower_vars = _mesa_pointer_set_create(NULL);
+ }
+
+ virtual ~lower_variables_visitor()
+ {
+ _mesa_set_destroy(lower_vars, NULL);
+ }
+
+ virtual ir_visitor_status visit(ir_variable *var);
+ virtual ir_visitor_status visit_enter(ir_assignment *ir);
+ virtual ir_visitor_status visit_enter(ir_return *ir);
+ virtual ir_visitor_status visit_enter(ir_call *ir);
+ virtual void handle_rvalue(ir_rvalue **rvalue);
+
+ void fix_types_in_deref_chain(ir_dereference *ir);
+ void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs,
+ bool insert_before);
+
+ const struct gl_shader_compiler_options *options;
+ set *lower_vars;
+};
+
+static void
+lower_constant(ir_constant *ir)
+{
+ if (ir->type->is_array()) {
+ for (int i = 0; i < ir->type->array_size(); i++)
+ lower_constant(ir->get_array_element(i));
+
+ ir->type = lower_glsl_type(ir->type);
+ return;
+ }
+
+ ir->type = lower_glsl_type(ir->type);
+ ir_constant_data value;
+
+ if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
+ for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
+ value.f16[i] = _mesa_float_to_half(ir->value.f[i]);
+ } else if (ir->type->base_type == GLSL_TYPE_INT16) {
+ for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
+ value.i16[i] = ir->value.i[i];
+ } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
+ for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
+ value.u16[i] = ir->value.u[i];
+ } else {
+ unreachable("invalid type");
+ }
+
+ ir->value = value;
+}
+
+ir_visitor_status
+lower_variables_visitor::visit(ir_variable *var)
+{
+ if ((var->data.mode != ir_var_temporary &&
+ var->data.mode != ir_var_auto) ||
+ !var->type->without_array()->is_32bit() ||
+ (var->data.precision != GLSL_PRECISION_MEDIUM &&
+ var->data.precision != GLSL_PRECISION_LOW) ||
+ !can_lower_type(options, var->type))
+ return visit_continue;
+
+ /* Lower constant initializers. */
+ if (var->constant_value &&
+ var->type == var->constant_value->type) {
+ if (!options->LowerPrecisionConstants)
+ return visit_continue;
+ var->constant_value =
+ var->constant_value->clone(ralloc_parent(var), NULL);
+ lower_constant(var->constant_value);
+ }
+
+ if (var->constant_initializer &&
+ var->type == var->constant_initializer->type) {
+ if (!options->LowerPrecisionConstants)
+ return visit_continue;
+ var->constant_initializer =
+ var->constant_initializer->clone(ralloc_parent(var), NULL);
+ lower_constant(var->constant_initializer);
+ }
+
+ var->type = lower_glsl_type(var->type);
+ _mesa_set_add(lower_vars, var);
+
+ return visit_continue;
+}
+
+void
+lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir)
+{
+ assert(ir->type->without_array()->is_32bit());
+ assert(_mesa_set_search(lower_vars, ir->variable_referenced()));
+
+ /* Fix the type in the dereference node. */
+ ir->type = lower_glsl_type(ir->type);
+
+ /* If it's an array, fix the types in the whole dereference chain. */
+ for (ir_dereference_array *deref_array = ir->as_dereference_array();
+ deref_array;
+ deref_array = deref_array->array->as_dereference_array()) {
+ assert(deref_array->array->type->without_array()->is_32bit());
+ deref_array->array->type = lower_glsl_type(deref_array->array->type);
+ }
+}
+
+void
+lower_variables_visitor::convert_split_assignment(ir_dereference *lhs,
+ ir_rvalue *rhs,
+ bool insert_before)
+{
+ void *mem_ctx = ralloc_parent(lhs);
+
+ if (lhs->type->is_array()) {
+ for (unsigned i = 0; i < lhs->type->length; i++) {
+ ir_dereference *l, *r;
+
+ l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL),
+ new(mem_ctx) ir_constant(i));
+ r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL),
+ new(mem_ctx) ir_constant(i));
+ convert_split_assignment(l, r, insert_before);
+ }
+ return;
+ }
+
+ assert(lhs->type->is_16bit() || lhs->type->is_32bit());
+ assert(rhs->type->is_16bit() || rhs->type->is_32bit());
+ assert(lhs->type->is_16bit() != rhs->type->is_16bit());
+
+ ir_assignment *assign =
+ new(mem_ctx) ir_assignment(lhs, convert_precision(lhs->type->is_32bit(), rhs));
+
+ if (insert_before)
+ base_ir->insert_before(assign);
+ else
+ base_ir->insert_after(assign);
+}
+
+ir_visitor_status
+lower_variables_visitor::visit_enter(ir_assignment *ir)
+{
+ ir_dereference *lhs = ir->lhs;
+ ir_variable *var = lhs->variable_referenced();
+ ir_dereference *rhs_deref = ir->rhs->as_dereference();
+ ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL;
+ ir_constant *rhs_const = ir->rhs->as_constant();
+
+ /* Legalize array assignments between lowered and non-lowered variables. */
+ if (lhs->type->is_array() &&
+ (rhs_var || rhs_const) &&
+ (!rhs_var ||
+ (var &&
+ var->type->without_array()->is_16bit() !=
+ rhs_var->type->without_array()->is_16bit())) &&
+ (!rhs_const ||
+ (var &&
+ var->type->without_array()->is_16bit() &&
+ rhs_const->type->without_array()->is_32bit()))) {
+ assert(ir->rhs->type->is_array());
+
+ /* Fix array assignments from lowered to non-lowered. */
+ if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) {
+ fix_types_in_deref_chain(rhs_deref);
+ /* Convert to 32 bits for LHS. */
+ convert_split_assignment(lhs, rhs_deref, true);
+ ir->remove();
+ return visit_continue;
+ }
+
+ /* Fix array assignments from non-lowered to lowered. */
+ if (var &&
+ _mesa_set_search(lower_vars, var) &&
+ ir->rhs->type->without_array()->is_32bit()) {
+ fix_types_in_deref_chain(lhs);
+ /* Convert to 16 bits for LHS. */
+ convert_split_assignment(lhs, ir->rhs, true);
+ ir->remove();
+ return visit_continue;
+ }
+ }
+
+ /* Fix assignment types. */
+ if (var &&
+ _mesa_set_search(lower_vars, var)) {
+ /* Fix the LHS type. */
+ if (lhs->type->without_array()->is_32bit())
+ fix_types_in_deref_chain(lhs);
+
+ /* Fix the RHS type if it's a lowered variable. */
+ if (rhs_var &&
+ _mesa_set_search(lower_vars, rhs_var) &&
+ rhs_deref->type->without_array()->is_32bit())
+ fix_types_in_deref_chain(rhs_deref);
+
+ /* Fix the RHS type if it's a non-array expression. */
+ if (ir->rhs->type->is_32bit()) {
+ ir_expression *expr = ir->rhs->as_expression();
+
+ /* Convert the RHS to the LHS type. */
+ if (expr &&
+ (expr->operation == ir_unop_f162f ||
+ expr->operation == ir_unop_i2i ||
+ expr->operation == ir_unop_u2u) &&
+ expr->operands[0]->type->is_16bit()) {
+ /* If there is an "up" conversion, just remove it.
+ * This is optional. We could as well execute the else statement and
+ * let NIR eliminate the up+down conversions.
+ */
+ ir->rhs = expr->operands[0];
+ } else {
+ /* Add a "down" conversion operation to fix the type of RHS. */
+ ir->rhs = convert_precision(false, ir->rhs);
+ }
+ }
+ }
+
+ return ir_rvalue_enter_visitor::visit_enter(ir);
}
+ir_visitor_status
+lower_variables_visitor::visit_enter(ir_return *ir)
+{
+ void *mem_ctx = ralloc_parent(ir);
+
+ ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL;
+ if (deref) {
+ ir_variable *var = deref->variable_referenced();
+
+ /* Fix the type of the return value. */
+ if (var &&
+ _mesa_set_search(lower_vars, var) &&
+ deref->type->without_array()->is_32bit()) {
+ /* Create a 32-bit temporary variable. */
+ ir_variable *new_var =
+ new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
+ base_ir->insert_before(new_var);
+
+ /* Fix types in dereferences. */
+ fix_types_in_deref_chain(deref);
+
+ /* Convert to 32 bits for the return value. */
+ convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
+ deref, true);
+ ir->value = new(mem_ctx) ir_dereference_variable(new_var);
+ }
+ }
+
+ return ir_rvalue_enter_visitor::visit_enter(ir);
}
-bool
-lower_precision(exec_list *instructions)
+void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue)
{
- find_precision_visitor v;
+ ir_rvalue *ir = *rvalue;
+
+ if (in_assignee || ir == NULL)
+ return;
+
+ ir_expression *expr = ir->as_expression();
+ ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL;
+
+ /* Remove f2fmp(float16). Same for int16 and uint16. */
+ if (expr &&
+ expr_op0_deref &&
+ (expr->operation == ir_unop_f2fmp ||
+ expr->operation == ir_unop_i2imp ||
+ expr->operation == ir_unop_u2ump ||
+ expr->operation == ir_unop_f2f16 ||
+ expr->operation == ir_unop_i2i ||
+ expr->operation == ir_unop_u2u) &&
+ expr->type->without_array()->is_16bit() &&
+ expr_op0_deref->type->without_array()->is_32bit() &&
+ expr_op0_deref->variable_referenced() &&
+ _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) {
+ fix_types_in_deref_chain(expr_op0_deref);
+
+ /* Remove f2fmp/i2imp/u2ump. */
+ *rvalue = expr_op0_deref;
+ return;
+ }
+
+ ir_dereference *deref = ir->as_dereference();
- find_lowerable_rvalues(instructions, v.lowerable_rvalues);
+ if (deref) {
+ ir_variable *var = deref->variable_referenced();
+ /* var can be NULL if we are dereferencing ir_constant. */
+ if (var &&
+ _mesa_set_search(lower_vars, var) &&
+ deref->type->without_array()->is_32bit()) {
+ void *mem_ctx = ralloc_parent(ir);
+
+ /* Create a 32-bit temporary variable. */
+ ir_variable *new_var =
+ new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
+ base_ir->insert_before(new_var);
+
+ /* Fix types in dereferences. */
+ fix_types_in_deref_chain(deref);
+
+ /* Convert to 32 bits for the rvalue. */
+ convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
+ deref, true);
+ *rvalue = new(mem_ctx) ir_dereference_variable(new_var);
+ }
+ }
+}
+
+ir_visitor_status
+lower_variables_visitor::visit_enter(ir_call *ir)
+{
+ void *mem_ctx = ralloc_parent(ir);
+
+ /* We can't pass 16-bit variables as 32-bit inout/out parameters. */
+ foreach_two_lists(formal_node, &ir->callee->parameters,
+ actual_node, &ir->actual_parameters) {
+ ir_dereference *param_deref =
+ ((ir_rvalue *)actual_node)->as_dereference();
+ ir_variable *param = (ir_variable *)formal_node;
+
+ if (!param_deref)
+ continue;
+
+ ir_variable *var = param_deref->variable_referenced();
+
+ /* var can be NULL if we are dereferencing ir_constant. */
+ if (var &&
+ _mesa_set_search(lower_vars, var) &&
+ param->type->without_array()->is_32bit()) {
+ fix_types_in_deref_chain(param_deref);
+
+ /* Create a 32-bit temporary variable for the parameter. */
+ ir_variable *new_var =
+ new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary);
+ base_ir->insert_before(new_var);
+
+ /* Replace the parameter. */
+ actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var));
+
+ if (param->data.mode == ir_var_function_in ||
+ param->data.mode == ir_var_function_inout) {
+ /* Convert to 32 bits for passing in. */
+ convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
+ param_deref->clone(mem_ctx, NULL), true);
+ }
+ if (param->data.mode == ir_var_function_out ||
+ param->data.mode == ir_var_function_inout) {
+ /* Convert to 16 bits after returning. */
+ convert_split_assignment(param_deref,
+ new(mem_ctx) ir_dereference_variable(new_var),
+ false);
+ }
+ }
+ }
+
+ /* Fix the type of return value dereferencies. */
+ ir_dereference_variable *ret_deref = ir->return_deref;
+ ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL;
+
+ if (ret_var &&
+ _mesa_set_search(lower_vars, ret_var) &&
+ ret_deref->type->without_array()->is_32bit()) {
+ /* Create a 32-bit temporary variable. */
+ ir_variable *new_var =
+ new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp",
+ ir_var_temporary);
+ base_ir->insert_before(new_var);
+
+ /* Replace the return variable. */
+ ret_deref->var = new_var;
+
+ /* Convert to 16 bits after returning. */
+ convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var),
+ new(mem_ctx) ir_dereference_variable(new_var),
+ false);
+ }
+
+ return ir_rvalue_enter_visitor::visit_enter(ir);
+}
+
+}
+
+void
+lower_precision(const struct gl_shader_compiler_options *options,
+ exec_list *instructions)
+{
+ find_precision_visitor v(options);
+ find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
visit_list_elements(&v, instructions);
- return v.progress;
+ lower_variables_visitor vars(options);
+ visit_list_elements(&vars, instructions);
}