#include "st_format.h"
#include "st_nir.h"
#include "st_shader_cache.h"
-#include "st_glsl_to_tgsi_private.h"
+#include "st_glsl_to_tgsi_temprename.h"
#include "util/hash_table.h"
#include <algorithm>
class immediate_storage : public exec_node {
public:
- immediate_storage(gl_constant_value *values, int size32, int type)
+ immediate_storage(gl_constant_value *values, int size32, GLenum type)
{
memcpy(this->values, values, size32 * sizeof(gl_constant_value));
this->size32 = size32;
/* doubles are stored across 2 gl_constant_values */
gl_constant_value values[4];
int size32; /**< Number of 32-bit components (1-4) */
- int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+ GLenum type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
};
static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
return GLSL_TYPE_ERROR;
}
+struct hwatomic_decl {
+ unsigned location;
+ unsigned binding;
+ unsigned size;
+ unsigned array_id;
+};
+
struct glsl_to_tgsi_visitor : public ir_visitor {
public:
glsl_to_tgsi_visitor();
unsigned num_outputs;
unsigned num_output_arrays;
+ struct hwatomic_decl atomic_info[PIPE_MAX_HW_ATOMIC_BUFFERS];
+ unsigned num_atomics;
+ unsigned num_atomic_arrays;
int num_address_regs;
uint32_t samplers_used;
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
- int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
+ enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS];
int images_used;
int image_targets[PIPE_MAX_SHADER_IMAGES];
- unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
+ enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES];
bool indirect_addr_consts;
int wpos_transform_const;
- int glsl_version;
bool native_integers;
bool have_sqrt;
bool have_fma;
bool use_shared_memory;
bool has_tex_txf_lz;
bool precise;
+ bool need_uarl;
variable_storage *find_variable_storage(ir_variable *var);
int add_constant(gl_register_file file, gl_constant_value values[8],
- int size, int datatype, uint16_t *swizzle_out);
+ int size, GLenum datatype, uint16_t *swizzle_out);
st_src_reg get_temp(const glsl_type *type);
void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
st_src_reg st_src_reg_for_double(double val);
st_src_reg st_src_reg_for_float(float val);
st_src_reg st_src_reg_for_int(int val);
+ st_src_reg st_src_reg_for_int64(int64_t val);
st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val);
/**
ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args);
va_end(args);
- prog->data->LinkStatus = linking_failure;
+ prog->data->LinkStatus = LINKING_FAILURE;
}
int
* sources into temps.
*/
num_reladdr += dst.reladdr != NULL || dst.reladdr2;
- num_reladdr += dst1.reladdr != NULL || dst1.reladdr2;
+ assert(!dst1.reladdr); /* should be lowered in earlier passes */
num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
emit_arl(ir, address_reg2, *dst.reladdr2);
num_reladdr--;
}
- if (dst1.reladdr) {
- emit_arl(ir, address_reg, *dst1.reladdr);
- num_reladdr--;
- }
+
assert(num_reladdr == 0);
/* inst->op has only 8 bits. */
if (swz > 1) {
dinst->src[j].double_reg2 = true;
dinst->src[j].index++;
- }
+ }
if (swz & 1)
dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
} else {
/* some opcodes are special case in what they use as sources
- - [FUI]2D/[UI]2I64 is a float/[u]int src0, DLDEXP is integer src1 */
+ - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is integer src1 */
if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || op == TGSI_OPCODE_I2D ||
op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 ||
- op == TGSI_OPCODE_DLDEXP ||
+ op == TGSI_OPCODE_DLDEXP || op == TGSI_OPCODE_LDEXP ||
(op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) {
dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
}
{
int op = TGSI_OPCODE_ARL;
- if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
+ if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) {
+ if (!this->need_uarl && src0.is_legal_tgsi_address_operand())
+ return;
+
op = TGSI_OPCODE_UARL;
+ }
assert(dst.file == PROGRAM_ADDRESS);
if (dst.index >= this->num_address_regs)
int
glsl_to_tgsi_visitor::add_constant(gl_register_file file,
- gl_constant_value values[8], int size, int datatype,
+ gl_constant_value values[8], int size,
+ GLenum datatype,
uint16_t *swizzle_out)
{
if (file == PROGRAM_CONSTANT) {
return src;
}
+st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_int64(int64_t val)
+{
+ st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT64);
+ union gl_constant_value uval[2];
+
+ memcpy(uval, &val, sizeof(uval));
+ src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+
+ return src;
+}
+
st_src_reg
glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val)
{
return type->count_attribute_slots(false);
}
+static void
+add_buffer_to_load_and_stores(glsl_to_tgsi_instruction *inst, st_src_reg *buf,
+ exec_list *instructions, ir_constant *access)
+{
+ /**
+ * emit_asm() might have actually split the op into pieces, e.g. for
+ * double stores. We have to go back and fix up all the generated ops.
+ */
+ unsigned op = inst->op;
+ do {
+ inst->resource = *buf;
+ if (access)
+ inst->buffer_access = access->value.u[0];
+
+ if (inst == instructions->get_head_raw())
+ break;
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+
+ if (inst->op == TGSI_OPCODE_UADD) {
+ if (inst == instructions->get_head_raw())
+ break;
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+ }
+ } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
+}
+
/**
* If the given GLSL type is an array or matrix or a structure containing
* an array/matrix member, return true. Else return false.
if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
if (*num_reladdr != 1) {
- st_src_reg temp = get_temp(reg->type == GLSL_TYPE_DOUBLE ? glsl_type::dvec4_type : glsl_type::vec4_type);
+ st_src_reg temp = get_temp(glsl_type::get_instance(reg->type, 4, 1));
emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
*reg = temp;
st_dst_reg result_dst;
int vector_elements = ir->operands[0]->type->vector_elements;
- if (ir->operands[1]) {
+ if (ir->operands[1] &&
+ ir->operation != ir_binop_interpolate_at_offset &&
+ ir->operation != ir_binop_interpolate_at_sample) {
+ st_src_reg *swz_op = NULL;
+ if (vector_elements > ir->operands[1]->type->vector_elements) {
+ assert(ir->operands[1]->type->vector_elements == 1);
+ swz_op = &op[1];
+ } else if (vector_elements < ir->operands[1]->type->vector_elements) {
+ assert(ir->operands[0]->type->vector_elements == 1);
+ swz_op = &op[0];
+ }
+ if (swz_op) {
+ uint16_t swizzle_x = GET_SWZ(swz_op->swizzle, 0);
+ swz_op->swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x,
+ swizzle_x, swizzle_x);
+ }
vector_elements = MAX2(vector_elements,
ir->operands[1]->type->vector_elements);
}
+ if (ir->operands[2] &&
+ ir->operands[2]->type->vector_elements != vector_elements) {
+ /* This can happen with ir_triop_lrp, i.e. glsl mix */
+ assert(ir->operands[2]->type->vector_elements == 1);
+ uint16_t swizzle_x = GET_SWZ(op[2].swizzle, 0);
+ op[2].swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x,
+ swizzle_x, swizzle_x);
+ }
this->result.file = PROGRAM_UNDEFINED;
case ir_binop_less:
emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
break;
- case ir_binop_greater:
- emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
- break;
- case ir_binop_lequal:
- emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
- break;
case ir_binop_gequal:
emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
break;
break;
case ir_binop_ubo_load: {
- ir_constant *const_uniform_block = ir->operands[0]->as_constant();
- ir_constant *const_offset_ir = ir->operands[1]->as_constant();
- unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
- unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 1;
- st_src_reg index_reg = get_temp(glsl_type::uint_type);
- st_src_reg cbuf;
-
- cbuf.type = ir->type->base_type;
- cbuf.file = PROGRAM_CONSTANT;
- cbuf.index = 0;
- cbuf.reladdr = NULL;
- cbuf.negate = 0;
- cbuf.abs = 0;
- cbuf.index2D = const_block;
-
- assert(ir->type->is_vector() || ir->type->is_scalar());
-
- if (const_offset_ir) {
- /* Constant index into constant buffer */
- cbuf.reladdr = NULL;
- cbuf.index = const_offset / 16;
- }
- else {
- ir_expression *offset_expr = ir->operands[1]->as_expression();
- st_src_reg offset = op[1];
-
- /* The OpenGL spec is written in such a way that accesses with
- * non-constant offset are almost always vec4-aligned. The only
- * exception to this are members of structs in arrays of structs:
- * each struct in an array of structs is at least vec4-aligned,
- * but single-element and [ui]vec2 members of the struct may be at
- * an offset that is not a multiple of 16 bytes.
- *
- * Here, we extract that offset, relying on previous passes to always
- * generate offset expressions of the form (+ expr constant_offset).
- *
- * Note that the std430 layout, which allows more cases of alignment
- * less than vec4 in arrays, is not supported for uniform blocks, so
- * we do not have to deal with it here.
- */
- if (offset_expr && offset_expr->operation == ir_binop_add) {
- const_offset_ir = offset_expr->operands[1]->as_constant();
- if (const_offset_ir) {
- const_offset = const_offset_ir->value.u[0];
- cbuf.index = const_offset / 16;
- offset_expr->operands[0]->accept(this);
- offset = this->result;
- }
+ if (ctx->Const.UseSTD430AsDefaultPacking) {
+ ir_rvalue *block = ir->operands[0];
+ ir_rvalue *offset = ir->operands[1];
+ ir_constant *const_block = block->as_constant();
+
+ st_src_reg cbuf(PROGRAM_CONSTANT,
+ (const_block ? const_block->value.u[0] + 1 : 1),
+ ir->type->base_type);
+
+ cbuf.has_index2 = true;
+
+ if (!const_block) {
+ block->accept(this);
+ cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
+ *cbuf.reladdr = this->result;
+ emit_arl(ir, sampler_reladdr, this->result);
}
- /* Relative/variable index into constant buffer */
- emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset,
- st_src_reg_for_int(4));
- cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
- memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
- }
+ /* Calculate the surface offset */
+ offset->accept(this);
+ st_src_reg off = this->result;
- if (const_uniform_block) {
- /* Constant constant buffer */
- cbuf.reladdr2 = NULL;
- }
- else {
- /* Relative/variable constant buffer */
- cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
- memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
- }
- cbuf.has_index2 = true;
-
- cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
- if (glsl_base_type_is_64bit(cbuf.type))
- cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
- const_offset % 16 / 8,
- const_offset % 16 / 8,
- const_offset % 16 / 8);
- else
- cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
- const_offset % 16 / 4,
- const_offset % 16 / 4,
- const_offset % 16 / 4);
+ glsl_to_tgsi_instruction *inst =
+ emit_asm(ir, TGSI_OPCODE_LOAD, result_dst, off);
+
+ if (result_dst.type == GLSL_TYPE_BOOL)
+ emit_asm(ir, TGSI_OPCODE_USNE, result_dst, st_src_reg(result_dst),
+ st_src_reg_for_int(0));
- if (ir->type->is_boolean()) {
- emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
+ add_buffer_to_load_and_stores(inst, &cbuf, &this->instructions,
+ NULL);
} else {
- emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
+ ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+ ir_constant *const_offset_ir = ir->operands[1]->as_constant();
+ unsigned const_offset = const_offset_ir ?
+ const_offset_ir->value.u[0] : 0;
+ unsigned const_block = const_uniform_block ?
+ const_uniform_block->value.u[0] + 1 : 1;
+ st_src_reg index_reg = get_temp(glsl_type::uint_type);
+ st_src_reg cbuf;
+
+ cbuf.type = ir->type->base_type;
+ cbuf.file = PROGRAM_CONSTANT;
+ cbuf.index = 0;
+ cbuf.reladdr = NULL;
+ cbuf.negate = 0;
+ cbuf.abs = 0;
+ cbuf.index2D = const_block;
+
+ assert(ir->type->is_vector() || ir->type->is_scalar());
+
+ if (const_offset_ir) {
+ /* Constant index into constant buffer */
+ cbuf.reladdr = NULL;
+ cbuf.index = const_offset / 16;
+ } else {
+ ir_expression *offset_expr = ir->operands[1]->as_expression();
+ st_src_reg offset = op[1];
+
+ /* The OpenGL spec is written in such a way that accesses with
+ * non-constant offset are almost always vec4-aligned. The only
+ * exception to this are members of structs in arrays of structs:
+ * each struct in an array of structs is at least vec4-aligned,
+ * but single-element and [ui]vec2 members of the struct may be at
+ * an offset that is not a multiple of 16 bytes.
+ *
+ * Here, we extract that offset, relying on previous passes to
+ * always generate offset expressions of the form
+ * (+ expr constant_offset).
+ *
+ * Note that the std430 layout, which allows more cases of
+ * alignment less than vec4 in arrays, is not supported for
+ * uniform blocks, so we do not have to deal with it here.
+ */
+ if (offset_expr && offset_expr->operation == ir_binop_add) {
+ const_offset_ir = offset_expr->operands[1]->as_constant();
+ if (const_offset_ir) {
+ const_offset = const_offset_ir->value.u[0];
+ cbuf.index = const_offset / 16;
+ offset_expr->operands[0]->accept(this);
+ offset = this->result;
+ }
+ }
+
+ /* Relative/variable index into constant buffer */
+ emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset,
+ st_src_reg_for_int(4));
+ cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
+ }
+
+ if (const_uniform_block) {
+ /* Constant constant buffer */
+ cbuf.reladdr2 = NULL;
+ } else {
+ /* Relative/variable constant buffer */
+ cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
+ memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
+ }
+ cbuf.has_index2 = true;
+
+ cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
+ if (glsl_base_type_is_64bit(cbuf.type))
+ cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
+ const_offset % 16 / 8,
+ const_offset % 16 / 8,
+ const_offset % 16 / 8);
+ else
+ cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
+ const_offset % 16 / 4,
+ const_offset % 16 / 4,
+ const_offset % 16 / 4);
+
+ if (ir->type->is_boolean()) {
+ emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf,
+ st_src_reg_for_int(0));
+ } else {
+ emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
+ }
}
break;
}
case ir_binop_ldexp:
if (ir->operands[0]->type->is_double()) {
emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
+ } else if (ir->operands[0]->type->is_float()) {
+ emit_asm(ir, TGSI_OPCODE_LDEXP, result_dst, op[0], op[1]);
} else {
assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
}
case ir_unop_get_buffer_size: {
ir_constant *const_offset = ir->operands[0]->as_constant();
+ int buf_base = ctx->st->has_hw_atomics ? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers;
st_src_reg buffer(
PROGRAM_BUFFER,
- ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
- (const_offset ? const_offset->value.u[0] : 0),
+ buf_base + (const_offset ? const_offset->value.u[0] : 0),
GLSL_TYPE_UINT);
if (!const_offset) {
buffer.reladdr = ralloc(mem_ctx, st_src_reg);
st_src_reg temp = get_temp(glsl_type::uvec4_type);
st_dst_reg temp_dst = st_dst_reg(temp);
unsigned orig_swz = op[0].swizzle;
- /*
+ /*
* To convert unsigned to 64-bit:
* zero Y channel, copy X channel.
*/
break;
}
case ir_unop_i642b:
- emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int(0));
+ emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int64(0));
break;
case ir_unop_i642f:
emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]);
ir->array->accept(this);
src = this->result;
- if (ir->array->ir_type != ir_type_dereference_array) {
+ if (!src.has_index2) {
switch (this->prog->Target) {
case GL_TESS_CONTROL_PROGRAM_NV:
is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
if (index) {
if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
- src.file == PROGRAM_INPUT)
- element_size = attrib_type_size(ir->type, true);
+ src.file == PROGRAM_INPUT)
+ element_size = attrib_type_size(ir->type, true);
if (is_2D) {
src.index2D = index->value.i[0];
src.has_index2 = true;
/* a is - 0 + - 0 +
* (a < 0) T F F ( a < 0) T F F
* (0 < a) F F T (-a < 0) F F T
- * (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
- * (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
- * (a > 0) F F T (-a < 0) F F T
- * (0 > a) T F F ( a < 0) T F F
* (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
* (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
*
negate = zero_on_left;
break;
- case ir_binop_greater:
- switch_order = false;
- negate = !zero_on_left;
- break;
-
- case ir_binop_lequal:
- switch_order = true;
- negate = !zero_on_left;
- break;
-
case ir_binop_gequal:
switch_order = true;
negate = zero_on_left;
r->type = type->base_type;
if (cond) {
st_src_reg l_src = st_src_reg(*l);
- l_src.swizzle = swizzle_for_size(type->vector_elements);
+
+ if (l_src.file == PROGRAM_OUTPUT &&
+ this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+ (l_src.index == FRAG_RESULT_DEPTH || l_src.index == FRAG_RESULT_STENCIL)) {
+ /* This is a special case because the source swizzles will be shifted
+ * later to account for the difference between GLSL (where they're
+ * plain floats) and TGSI (where they're Z and Y components). */
+ l_src.swizzle = SWIZZLE_XXXX;
+ }
if (native_integers) {
emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond,
if (type->is_dual_slot()) {
l->index++;
if (r->is_double_vertex_input == false)
- r->index++;
+ r->index++;
}
}
inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
new_inst->saturate = inst->saturate;
+ new_inst->resource = inst->resource;
inst->dead_mask = inst->dst[0].writemask;
} else {
emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false);
st_src_reg temp_base = get_temp(ir->type);
st_dst_reg temp = st_dst_reg(temp_base);
- foreach_in_list(ir_constant, field_value, &ir->components) {
+ for (i = 0; i < ir->type->length; i++) {
+ ir_constant *const field_value = ir->get_record_field(i);
int size = type_size(field_value->type);
assert(size > 0);
field_value->accept(this);
src = this->result;
- for (i = 0; i < (unsigned int)size; i++) {
+ for (unsigned j = 0; j < (unsigned int)size; j++) {
emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
src.index++;
in_array++;
for (i = 0; i < ir->type->length; i++) {
- ir->array_elements[i]->accept(this);
+ ir->const_elements[i]->accept(this);
src = this->result;
for (int j = 0; j < size; j++) {
emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
exec_node *param = ir->actual_parameters.get_head();
ir_dereference *deref = static_cast<ir_dereference *>(param);
ir_variable *location = deref->variable_referenced();
-
- st_src_reg buffer(
- PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT);
-
+ bool has_hw_atomics = st_context(ctx)->has_hw_atomics;
/* Calculate the surface offset */
st_src_reg offset;
unsigned array_size = 0, base = 0;
uint16_t index = 0;
+ st_src_reg resource;
get_deref_offsets(deref, &array_size, &base, &index, &offset, false);
- if (offset.file != PROGRAM_UNDEFINED) {
- emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
- offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
- emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
- offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE));
+ if (has_hw_atomics) {
+ variable_storage *entry = find_variable_storage(location);
+ st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT, location->data.binding);
+
+ if (!entry) {
+ entry = new(mem_ctx) variable_storage(location, PROGRAM_HW_ATOMIC,
+ num_atomics);
+ _mesa_hash_table_insert(this->variables, location, entry);
+
+ atomic_info[num_atomics].location = location->data.location;
+ atomic_info[num_atomics].binding = location->data.binding;
+ atomic_info[num_atomics].size = location->type->arrays_of_arrays_size();
+ if (atomic_info[num_atomics].size == 0)
+ atomic_info[num_atomics].size = 1;
+ atomic_info[num_atomics].array_id = 0;
+ num_atomics++;
+ }
+
+ if (offset.file != PROGRAM_UNDEFINED) {
+ if (atomic_info[entry->index].array_id == 0) {
+ num_atomic_arrays++;
+ atomic_info[entry->index].array_id = num_atomic_arrays;
+ }
+ buffer.array_id = atomic_info[entry->index].array_id;
+ }
+
+ buffer.index = index;
+ buffer.index += location->data.offset / ATOMIC_COUNTER_SIZE;
+ buffer.has_index2 = true;
+
+ if (offset.file != PROGRAM_UNDEFINED) {
+ buffer.reladdr = ralloc(mem_ctx, st_src_reg);
+ *buffer.reladdr = offset;
+ emit_arl(ir, sampler_reladdr, offset);
+ }
+ offset = st_src_reg_for_int(0);
+
+ resource = buffer;
} else {
- offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE);
+ st_src_reg buffer(PROGRAM_BUFFER, location->data.binding,
+ GLSL_TYPE_ATOMIC_UINT);
+
+ if (offset.file != PROGRAM_UNDEFINED) {
+ emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
+ offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
+ emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
+ offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE));
+ } else {
+ offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE);
+ }
+ resource = buffer;
}
ir->return_deref->accept(this);
inst = emit_asm(ir, opcode, dst, offset, data, data2);
}
- inst->resource = buffer;
+ inst->resource = resource;
}
void
ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
ir_constant *const_block = block->as_constant();
-
+ int buf_base = st_context(ctx)->has_hw_atomics ? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers;
st_src_reg buffer(
PROGRAM_BUFFER,
- ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
- (const_block ? const_block->value.u[0] : 0),
+ buf_base + (const_block ? const_block->value.u[0] : 0),
GLSL_TYPE_UINT);
if (!const_block) {
assert(access);
}
- /* The emit_asm() might have actually split the op into pieces, e.g. for
- * double stores. We have to go back and fix up all the generated ops.
- */
- unsigned op = inst->op;
- do {
- inst->resource = buffer;
- if (access)
- inst->buffer_access = access->value.u[0];
-
- if (inst == this->instructions.get_head_raw())
- break;
- inst = (glsl_to_tgsi_instruction *)inst->get_prev();
-
- if (inst->op == TGSI_OPCODE_UADD) {
- if (inst == this->instructions.get_head_raw())
- break;
- inst = (glsl_to_tgsi_instruction *)inst->get_prev();
- }
- } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
+ add_buffer_to_load_and_stores(inst, &buffer, &this->instructions, access);
}
void
glsl_to_tgsi_instruction *inst;
+ st_src_reg bindless;
+ if (imgvar->contains_bindless()) {
+ img->accept(this);
+ bindless = this->result;
+ }
+
if (ir->callee->intrinsic_id == ir_intrinsic_image_size) {
dst.writemask = WRITEMASK_XYZ;
inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst);
}
if (imgvar->contains_bindless()) {
- img->accept(this);
- inst->resource = this->result;
+ inst->resource = bindless;
inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
SWIZZLE_X, SWIZZLE_Y);
} else {
emit_arl(ir, sampler_reladdr, reladdr);
}
+ st_src_reg bindless;
+ if (var->contains_bindless()) {
+ ir->sampler->accept(this);
+ bindless = this->result;
+ }
+
if (opcode == TGSI_OPCODE_TXD)
inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
else if (opcode == TGSI_OPCODE_TXQ) {
inst->tex_shadow = GL_TRUE;
if (var->contains_bindless()) {
- ir->sampler->accept(this);
- inst->resource = this->result;
+ inst->resource = bindless;
inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
SWIZZLE_X, SWIZZLE_Y);
} else {
num_outputs = 0;
num_input_arrays = 0;
num_output_arrays = 0;
+ num_atomics = 0;
+ num_atomic_arrays = 0;
num_immediates = 0;
num_address_regs = 0;
samplers_used = 0;
images_used = 0;
indirect_addr_consts = false;
wpos_transform_const = -1;
- glsl_version = 0;
native_integers = false;
mem_ctx = ralloc_context(NULL);
ctx = NULL;
{
v->samplers_used = 0;
v->images_used = 0;
+ prog->info.textures_used_by_txf = 0;
foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
if (inst->info->is_tex) {
v->sampler_targets[idx] =
st_translate_texture_target(inst->tex_target, inst->tex_shadow);
- if (inst->tex_shadow) {
- prog->ShadowSamplers |= 1 << (inst->resource.index + i);
+ if (inst->op == TGSI_OPCODE_TXF || inst->op == TGSI_OPCODE_TXF_LZ) {
+ prog->info.textures_used_by_txf |= 1u << idx;
}
}
}
free(tempWrites);
}
+static void
+rename_temp_handle_src(struct rename_reg_pair *renames, st_src_reg *src)
+{
+ if (src && src->file == PROGRAM_TEMPORARY) {
+ int old_idx = src->index;
+ if (renames[old_idx].valid)
+ src->index = renames[old_idx].new_reg;
+ }
+}
+
/* Replaces all references to a temporary register index with another index. */
void
glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames)
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
unsigned j;
for (j = 0; j < num_inst_src_regs(inst); j++) {
- if (inst->src[j].file == PROGRAM_TEMPORARY) {
- int old_idx = inst->src[j].index;
- if (renames[old_idx].valid)
- inst->src[j].index = renames[old_idx].new_reg;
- }
+ rename_temp_handle_src(renames, &inst->src[j]);
+ rename_temp_handle_src(renames, inst->src[j].reladdr);
+ rename_temp_handle_src(renames, inst->src[j].reladdr2);
}
for (j = 0; j < inst->tex_offset_num_offset; j++) {
- if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
- int old_idx = inst->tex_offsets[j].index;
- if (renames[old_idx].valid)
- inst->tex_offsets[j].index = renames[old_idx].new_reg;
- }
+ rename_temp_handle_src(renames, &inst->tex_offsets[j]);
+ rename_temp_handle_src(renames, inst->tex_offsets[j].reladdr);
+ rename_temp_handle_src(renames, inst->tex_offsets[j].reladdr2);
}
- if (inst->resource.file == PROGRAM_TEMPORARY) {
- int old_idx = inst->resource.index;
- if (renames[old_idx].valid)
- inst->resource.index = renames[old_idx].new_reg;
- }
+ rename_temp_handle_src(renames, &inst->resource);
+ rename_temp_handle_src(renames, inst->resource.reladdr);
+ rename_temp_handle_src(renames, inst->resource.reladdr2);
for (j = 0; j < num_inst_dst_regs(inst); j++) {
if (inst->dst[j].file == PROGRAM_TEMPORARY) {
int old_idx = inst->dst[j].index;
if (renames[old_idx].valid)
- inst->dst[j].index = renames[old_idx].new_reg;}
+ inst->dst[j].index = renames[old_idx].new_reg;
+ }
+ rename_temp_handle_src(renames, inst->dst[j].reladdr);
+ rename_temp_handle_src(renames, inst->dst[j].reladdr2);
}
}
}
!inst->dst[0].reladdr2 &&
!inst->saturate &&
inst->src[0].file != PROGRAM_ARRAY &&
+ (inst->src[0].file != PROGRAM_OUTPUT ||
+ this->shader->Stage != MESA_SHADER_TESS_CTRL) &&
!inst->src[0].reladdr &&
!inst->src[0].reladdr2 &&
!inst->src[0].negate &&
ralloc_free(acp);
}
+static void
+dead_code_handle_reladdr(glsl_to_tgsi_instruction **writes, st_src_reg *reladdr)
+{
+ if (reladdr && reladdr->file == PROGRAM_TEMPORARY) {
+ /* Clear where it's used as src. */
+ int swz = GET_SWZ(reladdr->swizzle, 0);
+ writes[4 * reladdr->index + swz] = NULL;
+ }
+}
+
/*
* On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
* code elimination.
writes[4 * inst->src[i].index + c] = NULL;
}
}
+ dead_code_handle_reladdr(writes, inst->src[i].reladdr);
+ dead_code_handle_reladdr(writes, inst->src[i].reladdr2);
}
for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
writes[4 * inst->tex_offsets[i].index + c] = NULL;
}
}
+ dead_code_handle_reladdr(writes, inst->tex_offsets[i].reladdr);
+ dead_code_handle_reladdr(writes, inst->tex_offsets[i].reladdr2);
}
if (inst->resource.file == PROGRAM_TEMPORARY) {
writes[4 * inst->resource.index + c] = NULL;
}
}
+ dead_code_handle_reladdr(writes, inst->resource.reladdr);
+ dead_code_handle_reladdr(writes, inst->resource.reladdr2);
+ for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
+ dead_code_handle_reladdr(writes, inst->dst[i].reladdr);
+ dead_code_handle_reladdr(writes, inst->dst[i].reladdr2);
+ }
break;
}
void
glsl_to_tgsi_visitor::merge_two_dsts(void)
{
- foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
+ /* We never delete inst, but we may delete its successor. */
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
glsl_to_tgsi_instruction *inst2;
- bool merged;
+ unsigned defined;
+
if (num_inst_dst_regs(inst) != 2)
continue;
inst->dst[1].file != PROGRAM_UNDEFINED)
continue;
- inst2 = (glsl_to_tgsi_instruction *) inst->next;
- do {
+ assert(inst->dst[0].file != PROGRAM_UNDEFINED ||
+ inst->dst[1].file != PROGRAM_UNDEFINED);
- if (inst->src[0].file == inst2->src[0].file &&
+ if (inst->dst[0].file == PROGRAM_UNDEFINED)
+ defined = 1;
+ else
+ defined = 0;
+
+ inst2 = (glsl_to_tgsi_instruction *) inst->next;
+ while (!inst2->is_tail_sentinel()) {
+ if (inst->op == inst2->op &&
+ inst2->dst[defined].file == PROGRAM_UNDEFINED &&
+ inst->src[0].file == inst2->src[0].file &&
inst->src[0].index == inst2->src[0].index &&
inst->src[0].type == inst2->src[0].type &&
inst->src[0].swizzle == inst2->src[0].swizzle)
break;
inst2 = (glsl_to_tgsi_instruction *) inst2->next;
- } while (inst2);
+ }
- if (!inst2)
+ if (inst2->is_tail_sentinel()) {
+ /* Undefined destinations are not allowed, substitute with an unused
+ * temporary register.
+ */
+ st_src_reg tmp = get_temp(glsl_type::vec4_type);
+ inst->dst[defined ^ 1] = st_dst_reg(tmp);
+ inst->dst[defined ^ 1].writemask = 0;
continue;
- merged = false;
- if (inst->dst[0].file == PROGRAM_UNDEFINED) {
- merged = true;
- inst->dst[0] = inst2->dst[0];
- } else if (inst->dst[1].file == PROGRAM_UNDEFINED) {
- inst->dst[1] = inst2->dst[1];
- merged = true;
}
- if (merged) {
- inst2->remove();
- delete inst2;
- }
+ inst->dst[defined ^ 1] = inst2->dst[defined ^ 1];
+ inst2->remove();
+ delete inst2;
}
}
void
glsl_to_tgsi_visitor::merge_registers(void)
{
- int *last_reads = ralloc_array(mem_ctx, int, this->next_temp);
- int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
- struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
- int i, j;
-
- /* Read the indices of the last read and first write to each temp register
- * into an array so that we don't have to traverse the instruction list as
- * much. */
- for (i = 0; i < this->next_temp; i++) {
- last_reads[i] = -1;
- first_writes[i] = -1;
- }
- get_last_temp_read_first_temp_write(last_reads, first_writes);
+ struct lifetime *lifetimes =
+ rzalloc_array(mem_ctx, struct lifetime, this->next_temp);
- /* Start looking for registers with non-overlapping usages that can be
- * merged together. */
- for (i = 0; i < this->next_temp; i++) {
- /* Don't touch unused registers. */
- if (last_reads[i] < 0 || first_writes[i] < 0) continue;
-
- for (j = 0; j < this->next_temp; j++) {
- /* Don't touch unused registers. */
- if (last_reads[j] < 0 || first_writes[j] < 0) continue;
-
- /* We can merge the two registers if the first write to j is after or
- * in the same instruction as the last read from i. Note that the
- * register at index i will always be used earlier or at the same time
- * as the register at index j. */
- if (first_writes[i] <= first_writes[j] &&
- last_reads[i] <= first_writes[j]) {
- renames[j].new_reg = i;
- renames[j].valid = true;
-
- /* Update the first_writes and last_reads arrays with the new
- * values for the merged register index, and mark the newly unused
- * register index as such. */
- assert(last_reads[j] >= last_reads[i]);
- last_reads[i] = last_reads[j];
- first_writes[j] = -1;
- last_reads[j] = -1;
- }
- }
+ if (get_temp_registers_required_lifetimes(mem_ctx, &this->instructions,
+ this->next_temp, lifetimes)) {
+ struct rename_reg_pair *renames =
+ rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
+ get_temp_registers_remapping(mem_ctx, this->next_temp, lifetimes, renames);
+ rename_temp_registers(renames);
+ ralloc_free(renames);
}
- rename_temp_registers(renames);
- ralloc_free(renames);
- ralloc_free(last_reads);
- ralloc_free(first_writes);
+ ralloc_free(lifetimes);
}
/* Reassign indices to temporary registers by reusing unused indices created
struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+ struct ureg_src hw_atomics[PIPE_MAX_HW_ATOMIC_BUFFERS];
struct ureg_src shared_memory;
unsigned *array_sizes;
struct inout_decl *input_decls;
const ubyte *outputMapping;
unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */
+ bool need_uarl;
};
/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
static struct ureg_src
emit_immediate(struct st_translate *t,
gl_constant_value values[4],
- int type, int size)
+ GLenum type, int size)
{
struct ureg_program *ureg = t->ureg;
}
}
+static struct ureg_src
+translate_src(struct st_translate *t, const st_src_reg *src_reg);
+
+static struct ureg_src
+translate_addr(struct st_translate *t, const st_src_reg *reladdr,
+ unsigned addr_index)
+{
+ if (t->need_uarl || !reladdr->is_legal_tgsi_address_operand())
+ return ureg_src(t->address[addr_index]);
+
+ return translate_src(t, reladdr);
+}
+
/**
* Create a TGSI ureg_dst register from an st_dst_reg.
*/
if (dst_reg->reladdr != NULL) {
assert(dst_reg->file != PROGRAM_TEMPORARY);
- dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
+ dst = ureg_dst_indirect(dst, translate_addr(t, dst_reg->reladdr, 0));
}
if (dst_reg->has_index2) {
if (dst_reg->reladdr2)
- dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]),
+ dst = ureg_dst_dimension_indirect(dst,
+ translate_addr(t, dst_reg->reladdr2, 1),
dst_reg->index2D);
else
dst = ureg_dst_dimension(dst, dst_reg->index2D);
src = t->systemValues[src_reg->index];
break;
+ case PROGRAM_HW_ATOMIC:
+ src = ureg_src_array_register(TGSI_FILE_HW_ATOMIC, src_reg->index,
+ src_reg->array_id);
+ break;
+
default:
assert(!"unknown src register file");
return ureg_src_undef();
* and UBO constant buffers (buffer, position).
*/
if (src_reg->reladdr2)
- src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]),
+ src = ureg_src_dimension_indirect(src,
+ translate_addr(t, src_reg->reladdr2, 1),
src_reg->index2D);
else
src = ureg_src_dimension(src, src_reg->index2D);
if (src_reg->reladdr != NULL) {
assert(src_reg->file != PROGRAM_TEMPORARY);
- src = ureg_src_indirect(src, ureg_src(t->address[0]));
+ src = ureg_src_indirect(src, translate_addr(t, src_reg->reladdr, 0));
}
return src;
assert(src[num_src].File != TGSI_FILE_NULL);
if (inst->resource.reladdr)
src[num_src] =
- ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
+ ureg_src_indirect(src[num_src],
+ translate_addr(t, inst->resource.reladdr, 2));
num_src++;
for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
src[0] = t->shared_memory;
} else if (inst->resource.file == PROGRAM_BUFFER) {
src[0] = t->buffers[inst->resource.index];
+ } else if (inst->resource.file == PROGRAM_HW_ATOMIC) {
+ src[0] = translate_src(t, &inst->resource);
+ } else if (inst->resource.file == PROGRAM_CONSTANT) {
+ assert(inst->resource.has_index2);
+ src[0] = ureg_src_register(TGSI_FILE_CONSTBUF, inst->resource.index);
} else {
+ assert(inst->resource.file != PROGRAM_UNDEFINED);
if (inst->resource.file == PROGRAM_IMAGE) {
src[0] = t->images[inst->resource.index];
} else {
tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
}
if (inst->resource.reladdr)
- src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2]));
+ src[0] = ureg_src_indirect(src[0],
+ translate_addr(t, inst->resource.reladdr, 2));
assert(src[0].File != TGSI_FILE_NULL);
ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
inst->buffer_access,
}
dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask);
if (inst->resource.reladdr)
- dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
+ dst[0] = ureg_dst_indirect(dst[0],
+ translate_addr(t, inst->resource.reladdr, 2));
assert(dst[0].File != TGSI_FILE_NULL);
ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
inst->buffer_access,
const ubyte outputSemanticName[],
const ubyte outputSemanticIndex[])
{
+ struct pipe_screen *screen = st_context(ctx)->pipe->screen;
struct st_translate *t;
unsigned i;
struct gl_program_constants *frag_const =
assert(numInputs <= ARRAY_SIZE(t->inputs));
assert(numOutputs <= ARRAY_SIZE(t->outputs));
+ ASSERT_BITFIELD_SIZE(st_src_reg, type, GLSL_TYPE_ERROR);
+ ASSERT_BITFIELD_SIZE(st_dst_reg, type, GLSL_TYPE_ERROR);
+ ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_type, GLSL_TYPE_ERROR);
+ ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, PIPE_FORMAT_COUNT);
+ ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_target,
+ (gl_texture_index) (NUM_TEXTURE_TARGETS - 1));
+ ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format,
+ (enum pipe_format) (PIPE_FORMAT_COUNT - 1));
+ ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, op, TGSI_OPCODE_LAST - 1);
+
t = CALLOC_STRUCT(st_translate);
if (!t) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
}
t->procType = procType;
+ t->need_uarl = !screen->get_param(screen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS);
t->inputMapping = inputMapping;
t->outputMapping = outputMapping;
t->ureg = ureg;
/* texture samplers */
for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
if (program->samplers_used & (1u << i)) {
- unsigned type = st_translate_texture_type(program->sampler_types[i]);
+ enum tgsi_return_type type =
+ st_translate_texture_type(program->sampler_types[i]);
t->samplers[i] = ureg_DECL_sampler(ureg, i);
{
struct gl_program *prog = program->prog;
- for (i = 0; i < prog->info.num_abos; i++) {
- unsigned index = prog->sh.AtomicBuffers[i]->Binding;
- assert(index < frag_const->MaxAtomicBuffers);
- t->buffers[index] = ureg_DECL_buffer(ureg, index, true);
+ if (!st_context(ctx)->has_hw_atomics) {
+ for (i = 0; i < prog->info.num_abos; i++) {
+ unsigned index = prog->sh.AtomicBuffers[i]->Binding;
+ assert(index < frag_const->MaxAtomicBuffers);
+ t->buffers[index] = ureg_DECL_buffer(ureg, index, true);
+ }
+ } else {
+ for (i = 0; i < program->num_atomics; i++) {
+ struct hwatomic_decl *ainfo = &program->atomic_info[i];
+ gl_uniform_storage *uni_storage = &prog->sh.data->UniformStorage[ainfo->location];
+ int base = uni_storage->offset / ATOMIC_COUNTER_SIZE;
+ ureg_DECL_hw_atomic(ureg, base, base + ainfo->size - 1, ainfo->binding,
+ ainfo->array_id);
+ }
}
assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks);
for (i = 0; i < prog->info.num_ssbos; i++) {
- unsigned index = frag_const->MaxAtomicBuffers + i;
+ unsigned index = i;
+ if (!st_context(ctx)->has_hw_atomics)
+ index += frag_const->MaxAtomicBuffers;
+
t->buffers[index] = ureg_DECL_buffer(ureg, index, false);
}
}
v->shader_program = shader_program;
v->shader = shader;
v->options = options;
- v->glsl_version = ctx->Const.GLSLVersion;
v->native_integers = ctx->Const.NativeIntegers;
v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
v->has_tex_txf_lz = pscreen->get_param(pscreen,
PIPE_CAP_TGSI_TEX_TXF_LZ);
+ v->need_uarl = !pscreen->get_param(pscreen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS);
v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer,
_mesa_key_pointer_equal);
/* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
v->simplify_cmp();
-
- if (shader->Stage != MESA_SHADER_TESS_CTRL &&
- shader->Stage != MESA_SHADER_TESS_EVAL)
- v->copy_propagate();
+ v->copy_propagate();
while (v->eliminate_dead_code());
_mesa_copy_linked_program_data(shader_program, shader);
shrink_array_declarations(v->inputs, v->num_inputs,
&prog->info.inputs_read,
- prog->info.double_inputs_read,
+ prog->info.vs.double_inputs_read,
&prog->info.patch_inputs_read);
shrink_array_declarations(v->outputs, v->num_outputs,
&prog->info.outputs_written, 0ULL,
struct pipe_screen *pscreen = ctx->st->pipe->screen;
assert(prog->data->LinkStatus);
+ bool use_nir = false;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
continue;
PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
+ bool have_ldexp = pscreen->get_shader_param(pscreen, ptarget,
+ PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED);
unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_LOWER_IF_THRESHOLD);
+ enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
+ pscreen->get_shader_param(pscreen, ptarget,
+ PIPE_SHADER_CAP_PREFERRED_IR);
+ if (preferred_ir == PIPE_SHADER_IR_NIR)
+ use_nir = true;
+
/* If there are forms of indirect addressing that the driver
* cannot handle, perform the lowering pass.
*/
FDIV_TO_MUL_RCP |
EXP_TO_EXP2 |
LOG_TO_LOG2 |
- LDEXP_TO_ARITH |
+ (have_ldexp ? 0 : LDEXP_TO_ARITH) |
(have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
CARRY_TO_ARITH |
BORROW_TO_ARITH |
build_program_resource_list(ctx, prog);
+ if (use_nir)
+ return st_link_nir(ctx, prog);
+
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_linked_shader *shader = prog->_LinkedShaders[i];
if (shader == NULL)
continue;
- enum pipe_shader_type ptarget =
- pipe_shader_type_from_mesa(shader->Stage);
- enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
- pscreen->get_shader_param(pscreen, ptarget,
- PIPE_SHADER_CAP_PREFERRED_IR);
-
- struct gl_program *linked_prog = NULL;
- if (preferred_ir == PIPE_SHADER_IR_NIR) {
- /* TODO only for GLSL VS/FS/CS for now: */
- switch (shader->Stage) {
- case MESA_SHADER_VERTEX:
- case MESA_SHADER_FRAGMENT:
- case MESA_SHADER_COMPUTE:
- linked_prog = st_nir_get_mesa_program(ctx, prog, shader);
- default:
- break;
- }
- } else {
- linked_prog = get_mesa_program_tgsi(ctx, prog, shader);
- }
+ struct gl_program *linked_prog =
+ get_mesa_program_tgsi(ctx, prog, shader);
+ st_set_prog_affected_state_flags(linked_prog);
if (linked_prog) {
- st_set_prog_affected_state_flags(linked_prog);
if (!ctx->Driver.ProgramStringNotify(ctx,
_mesa_shader_stage_to_program(i),
linked_prog)) {