#include "tgsi/tgsi_info.h"
#include "util/u_math.h"
#include "util/u_memory.h"
-#include "st_glsl_types.h"
#include "st_program.h"
#include "st_mesa_to_tgsi.h"
#include "st_format.h"
-#include "st_nir.h"
-#include "st_shader_cache.h"
#include "st_glsl_to_tgsi_temprename.h"
#include "util/hash_table.h"
#define MAX_GLSL_TEXTURE_OFFSET 4
+#ifndef NDEBUG
+#include "util/u_atomic.h"
+#include "util/simple_mtx.h"
+#include <fstream>
+#include <ios>
+
+/* Prepare to make it possible to specify log file */
+static std::ofstream stats_log;
+
+/* Helper function to check whether we want to write some statistics
+ * of the shader conversion.
+ */
+
+static simple_mtx_t print_stats_mutex = _SIMPLE_MTX_INITIALIZER_NP;
+
+static inline bool print_stats_enabled ()
+{
+ static int stats_enabled = 0;
+
+ if (!stats_enabled) {
+ simple_mtx_lock(&print_stats_mutex);
+ if (!stats_enabled) {
+ const char *stats_filename = getenv("GLSL_TO_TGSI_PRINT_STATS");
+ if (stats_filename) {
+ bool write_header = std::ifstream(stats_filename).fail();
+ stats_log.open(stats_filename, std::ios_base::out | std::ios_base::app);
+ stats_enabled = stats_log.good() ? 1 : -1;
+ if (write_header)
+ stats_log << "arrays,temps,temps in arrays,total,instructions\n";
+ } else {
+ stats_enabled = -1;
+ }
+ }
+ simple_mtx_unlock(&print_stats_mutex);
+ }
+ return stats_enabled > 0;
+}
+#define PRINT_STATS(X) if (print_stats_enabled()) do { X; } while (false);
+#else
+#define PRINT_STATS(X)
+#endif
+
+
static unsigned is_precise(const ir_variable *ir)
{
if (!ir)
class immediate_storage : public exec_node {
public:
- immediate_storage(gl_constant_value *values, int size32, int type)
+ immediate_storage(gl_constant_value *values, int size32, GLenum type)
{
memcpy(this->values, values, size32 * sizeof(gl_constant_value));
this->size32 = size32;
/* doubles are stored across 2 gl_constant_values */
gl_constant_value values[4];
int size32; /**< Number of 32-bit components (1-4) */
- int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+ GLenum type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
};
static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
enum glsl_interp_mode interp;
enum glsl_base_type base_type;
ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */
+ bool invariant;
};
static struct inout_decl *
int num_address_regs;
uint32_t samplers_used;
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
- int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
+ enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS];
int images_used;
- int image_targets[PIPE_MAX_SHADER_IMAGES];
- unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
+ enum tgsi_texture_type image_targets[PIPE_MAX_SHADER_IMAGES];
+ enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES];
+ bool image_wr[PIPE_MAX_SHADER_IMAGES];
bool indirect_addr_consts;
int wpos_transform_const;
bool has_tex_txf_lz;
bool precise;
bool need_uarl;
+ bool tg4_component_in_swizzle;
variable_storage *find_variable_storage(ir_variable *var);
int add_constant(gl_register_file file, gl_constant_value values[8],
- int size, int datatype, uint16_t *swizzle_out);
+ int size, GLenum datatype, uint16_t *swizzle_out);
st_src_reg get_temp(const glsl_type *type);
void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
virtual void visit(ir_call *);
virtual void visit(ir_return *);
virtual void visit(ir_discard *);
+ virtual void visit(ir_demote *);
virtual void visit(ir_texture *);
virtual void visit(ir_if *);
virtual void visit(ir_emit_vertex *);
void visit_membar_intrinsic(ir_call *);
void visit_shared_intrinsic(ir_call *);
void visit_image_intrinsic(ir_call *);
- void visit_generic_intrinsic(ir_call *, unsigned op);
+ void visit_generic_intrinsic(ir_call *, enum tgsi_opcode op);
st_src_reg result;
/** List of glsl_to_tgsi_instruction */
exec_list instructions;
- glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
+ glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, enum tgsi_opcode op,
st_dst_reg dst = undef_dst,
st_src_reg src0 = undef_src,
st_src_reg src1 = undef_src,
st_src_reg src2 = undef_src,
st_src_reg src3 = undef_src);
- glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
+ glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, enum tgsi_opcode op,
st_dst_reg dst, st_dst_reg dst1,
st_src_reg src0 = undef_src,
st_src_reg src1 = undef_src,
st_src_reg src2 = undef_src,
st_src_reg src3 = undef_src);
- unsigned get_opcode(unsigned op,
- st_dst_reg dst,
- st_src_reg src0, st_src_reg src1);
+ enum tgsi_opcode get_opcode(enum tgsi_opcode op,
+ st_dst_reg dst,
+ st_src_reg src0, st_src_reg src1);
/**
* Emit the correct dot-product instruction for the type of arguments
st_src_reg src1,
unsigned elements);
- void emit_scalar(ir_instruction *ir, unsigned op,
+ void emit_scalar(ir_instruction *ir, enum tgsi_opcode op,
st_dst_reg dst, st_src_reg src0);
- void emit_scalar(ir_instruction *ir, unsigned op,
+ void emit_scalar(ir_instruction *ir, enum tgsi_opcode op,
st_dst_reg dst, st_src_reg src0, st_src_reg src1);
void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
st_src_reg *indirect,
unsigned *location);
st_src_reg canonicalize_gather_offset(st_src_reg offset);
+ bool handle_bound_deref(ir_dereference *ir);
bool try_emit_mad(ir_expression *ir,
int mul_operand);
void copy_propagate(void);
int eliminate_dead_code(void);
+ void split_arrays(void);
void merge_two_dsts(void);
void merge_registers(void);
void renumber_registers(void);
st_dst_reg *l, st_src_reg *r,
st_src_reg *cond, bool cond_swap);
+ void print_stats();
+
void *mem_ctx;
};
-static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
-static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
-static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
+static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X,
+ GLSL_TYPE_FLOAT, 0);
+static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X,
+ GLSL_TYPE_FLOAT, 1);
+static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X,
+ GLSL_TYPE_FLOAT, 2);
static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
+fail_link(struct gl_shader_program *prog, const char *fmt, ...)
+ PRINTFLIKE(2, 3);
static void
fail_link(struct gl_shader_program *prog, const char *fmt, ...)
ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args);
va_end(args);
- prog->data->LinkStatus = linking_failure;
+ prog->data->LinkStatus = LINKING_FAILURE;
}
int
glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
+glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, enum tgsi_opcode op,
st_dst_reg dst, st_dst_reg dst1,
st_src_reg src0, st_src_reg src1,
st_src_reg src2, st_src_reg src3)
/* Update indirect addressing status used by TGSI */
if (dst.reladdr || dst.reladdr2) {
- switch(dst.file) {
+ switch (dst.file) {
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT:
case PROGRAM_UNIFORM:
}
else {
for (i = 0; i < 4; i++) {
- if(inst->src[i].reladdr) {
- switch(inst->src[i].file) {
+ if (inst->src[i].reladdr) {
+ switch (inst->src[i].file) {
case PROGRAM_STATE_VAR:
case PROGRAM_CONSTANT:
case PROGRAM_UNIFORM:
*/
for (j = 0; j < 2; j++) {
dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type);
- if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) {
- enum glsl_base_type type = find_array_type(this->outputs, this->num_outputs, inst->dst[j].array_id);
+ if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT &&
+ inst->dst[j].type == GLSL_TYPE_ARRAY) {
+ enum glsl_base_type type = find_array_type(this->outputs,
+ this->num_outputs,
+ inst->dst[j].array_id);
if (glsl_base_type_is_64bit(type))
dst_is_64bit[j] = true;
}
int initial_src_swz[4], initial_src_idx[4];
int initial_dst_idx[2], initial_dst_writemask[2];
/* select the writemask for dst0 or dst1 */
- unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask;
+ unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED
+ ? inst->dst[0].writemask : inst->dst[1].writemask;
/* copy out the writemask, index and swizzles for all src/dsts. */
for (j = 0; j < 2; j++) {
int i = u_bit_scan(&writemask);
- /* before emitting the instruction, see if we have to adjust load / store
- * address */
- if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || inst->op == TGSI_OPCODE_STORE) &&
+ /* before emitting the instruction, see if we have to adjust
+ * load / store address */
+ if (i > 1 && (inst->op == TGSI_OPCODE_LOAD ||
+ inst->op == TGSI_OPCODE_STORE) &&
addr.file == PROGRAM_UNDEFINED) {
/* We have to advance the buffer address by 16 */
addr = get_temp(glsl_type::uint_type);
dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
dinst->dst[j].index = initial_dst_idx[j];
if (i > 1) {
- if (dinst->op == TGSI_OPCODE_LOAD || dinst->op == TGSI_OPCODE_STORE)
+ if (dinst->op == TGSI_OPCODE_LOAD ||
+ dinst->op == TGSI_OPCODE_STORE)
dinst->src[0] = addr;
if (dinst->op != TGSI_OPCODE_STORE)
dinst->dst[j].index++;
}
} else {
- /* if we aren't writing to a double, just get the bit of the initial writemask
- for this channel */
+ /* if we aren't writing to a double, just get the bit of the
+ * initial writemask for this channel
+ */
dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
}
}
}
if (swz & 1)
- dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
+ dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W,
+ SWIZZLE_Z, SWIZZLE_W);
else
- dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+ dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+ SWIZZLE_X, SWIZZLE_Y);
} else {
/* some opcodes are special case in what they use as sources
- - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is integer src1 */
- if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || op == TGSI_OPCODE_I2D ||
+ * - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is
+ * integer src1
+ */
+ if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D ||
+ op == TGSI_OPCODE_I2D ||
op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 ||
op == TGSI_OPCODE_DLDEXP || op == TGSI_OPCODE_LDEXP ||
(op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) {
}
glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
+glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, enum tgsi_opcode op,
st_dst_reg dst,
st_src_reg src0, st_src_reg src1,
st_src_reg src2, st_src_reg src3)
* Determines whether to use an integer, unsigned integer, or float opcode
* based on the operands and input opcode, then emits the result.
*/
-unsigned
-glsl_to_tgsi_visitor::get_opcode(unsigned op,
+enum tgsi_opcode
+glsl_to_tgsi_visitor::get_opcode(enum tgsi_opcode op,
st_dst_reg dst,
st_src_reg src0, st_src_reg src1)
{
op = TGSI_OPCODE_##c; \
break;
- switch(op) {
+ switch (op) {
/* Some instructions are initially selected without considering the type.
* This fixes the type:
*
case7(ISHR, LAST, ISHR, USHR, LAST, I64SHR, U64SHR);
case7(ATOMIMAX,LAST, ATOMIMAX,ATOMUMAX,LAST, LAST, LAST);
case7(ATOMIMIN,LAST, ATOMIMIN,ATOMUMIN,LAST, LAST, LAST);
+ case7(ATOMUADD,ATOMFADD,ATOMUADD,ATOMUADD,LAST, LAST, LAST);
casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ);
casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE);
casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE);
casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT);
- default: break;
+ default:
+ break;
}
assert(op != TGSI_OPCODE_LAST);
st_dst_reg dst, st_src_reg src0, st_src_reg src1,
unsigned elements)
{
- static const unsigned dot_opcodes[] = {
+ static const enum tgsi_opcode dot_opcodes[] = {
TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
};
* to produce dest channels.
*/
void
-glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum tgsi_opcode op,
st_dst_reg dst,
st_src_reg orig_src0, st_src_reg orig_src1)
{
}
void
-glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum tgsi_opcode op,
st_dst_reg dst, st_src_reg src0)
{
st_src_reg undef = undef_src;
glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
st_dst_reg dst, st_src_reg src0)
{
- int op = TGSI_OPCODE_ARL;
+ enum tgsi_opcode op = TGSI_OPCODE_ARL;
if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) {
if (!this->need_uarl && src0.is_legal_tgsi_address_operand())
int
glsl_to_tgsi_visitor::add_constant(gl_register_file file,
- gl_constant_value values[8], int size, int datatype,
+ gl_constant_value values[8], int size,
+ GLenum datatype,
uint16_t *swizzle_out)
{
if (file == PROGRAM_CONSTANT) {
GLuint swizzle = swizzle_out ? *swizzle_out : 0;
- int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
- size, datatype, &swizzle);
+ int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+ values, size, datatype,
+ &swizzle);
if (swizzle_out)
*swizzle_out = swizzle;
return result;
immediate_storage *entry;
int size32 = size * ((datatype == GL_DOUBLE ||
datatype == GL_INT64_ARB ||
- datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1);
+ datatype == GL_UNSIGNED_INT64_ARB) ? 2 : 1);
int i;
/* Search immediate storage to see if we already have an identical
for (i = 0; i * 4 < size32; i++) {
int slot_size = MIN2(size32 - (i * 4), 4);
/* Add this immediate to the list. */
- entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype);
+ entry = new(mem_ctx) immediate_storage(&values[i * 4],
+ slot_size, datatype);
this->immediates.push_tail(entry);
this->num_immediates++;
}
* emit_asm() might have actually split the op into pieces, e.g. for
* double stores. We have to go back and fix up all the generated ops.
*/
- unsigned op = inst->op;
+ enum tgsi_opcode op = inst->op;
do {
inst->resource = *buf;
if (access)
if (type->is_array() || type->is_matrix())
return true;
- if (type->is_record()) {
+ if (type->is_struct()) {
for (unsigned i = 0; i < type->length; i++) {
if (type_has_array_or_matrix(type->fields.structure[i].type)) {
return true;
next_temp += type_size(type);
}
- if (type->is_array() || type->is_record()) {
+ if (type->is_array() || type->is_struct()) {
src.swizzle = SWIZZLE_NOOP;
} else {
src.swizzle = swizzle_for_size(type->vector_elements);
void
glsl_to_tgsi_visitor::visit(ir_variable *ir)
{
- if (strcmp(ir->name, "gl_FragCoord") == 0) {
- this->prog->OriginUpperLeft = ir->data.origin_upper_left;
- this->prog->PixelCenterInteger = ir->data.pixel_center_integer;
- }
-
if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
unsigned int i;
const ir_state_slot *const slots = ir->get_state_slots();
for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
int index = _mesa_add_state_reference(this->prog->Parameters,
- (gl_state_index *)slots[i].tokens);
+ slots[i].tokens);
if (storage->file == PROGRAM_STATE_VAR) {
if (storage->index == -1) {
st_src_reg a, b, c;
st_dst_reg result_dst;
+ // there is no TGSI opcode for this
+ if (ir->type->is_integer_64())
+ return false;
+
ir_expression *expr = ir->operands[mul_operand]->as_expression();
if (!expr || expr->operation != ir_binop_mul)
return false;
* instruction.
*/
bool
-glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir,
+ int try_operand)
{
const int other_operand = 1 - try_operand;
st_src_reg a, b;
if (!reg->reladdr && !reg->reladdr2)
return;
- if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr);
- if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
+ if (reg->reladdr)
+ emit_arl(ir, address_reg, *reg->reladdr);
+ if (reg->reladdr2)
+ emit_arl(ir, address_reg2, *reg->reladdr2);
if (*num_reladdr != 1) {
st_src_reg temp = get_temp(glsl_type::get_instance(reg->type, 4, 1));
st_dst_reg result_dst;
int vector_elements = ir->operands[0]->type->vector_elements;
- if (ir->operands[1]) {
+ if (ir->operands[1] &&
+ ir->operation != ir_binop_interpolate_at_offset &&
+ ir->operation != ir_binop_interpolate_at_sample) {
+ st_src_reg *swz_op = NULL;
+ if (vector_elements > ir->operands[1]->type->vector_elements) {
+ assert(ir->operands[1]->type->vector_elements == 1);
+ swz_op = &op[1];
+ } else if (vector_elements < ir->operands[1]->type->vector_elements) {
+ assert(ir->operands[0]->type->vector_elements == 1);
+ swz_op = &op[0];
+ }
+ if (swz_op) {
+ uint16_t swizzle_x = GET_SWZ(swz_op->swizzle, 0);
+ swz_op->swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x,
+ swizzle_x, swizzle_x);
+ }
vector_elements = MAX2(vector_elements,
ir->operands[1]->type->vector_elements);
}
+ if (ir->operands[2] &&
+ ir->operands[2]->type->vector_elements != vector_elements) {
+ /* This can happen with ir_triop_lrp, i.e. glsl mix */
+ assert(ir->operands[2]->type->vector_elements == 1);
+ uint16_t swizzle_x = GET_SWZ(op[2].swizzle, 0);
+ op[2].swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x,
+ swizzle_x, swizzle_x);
+ }
this->result.file = PROGRAM_UNDEFINED;
* 0.0 and 1.0, 1-x also implements !x.
*/
op[0].negate = ~op[0].negate;
- emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+ emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0],
+ st_src_reg_for_float(1.0));
}
break;
case ir_unop_neg:
- if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+ if (result_dst.type == GLSL_TYPE_INT64 ||
+ result_dst.type == GLSL_TYPE_UINT64)
emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]);
- else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
+ else if (result_dst.type == GLSL_TYPE_INT ||
+ result_dst.type == GLSL_TYPE_UINT)
emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
else if (result_dst.type == GLSL_TYPE_DOUBLE)
emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs());
else if (result_dst.type == GLSL_TYPE_DOUBLE)
emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]);
- else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+ else if (result_dst.type == GLSL_TYPE_INT64 ||
+ result_dst.type == GLSL_TYPE_UINT64)
emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]);
else
emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]);
* is a FBO or the window system buffer, respectively.
* It is then multiplied with the source operand of DDY.
*/
- static const gl_state_index transform_y_state[STATE_LENGTH]
+ static const gl_state_index16 transform_y_state[STATE_LENGTH]
= { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
unsigned transform_y_index =
emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
break;
- case ir_unop_noise: {
- /* At some point, a motivated person could add a better
- * implementation of noise. Currently not even the nvidia
- * binary drivers do anything more than this. In any case, the
- * place to do this is in the GL state tracker, not the poor
- * driver.
- */
- emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
- break;
- }
-
case ir_binop_add:
emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
break;
*/
st_src_reg sge_src = result_src;
sge_src.negate = ~sge_src.negate;
- emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
+ emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src,
+ st_src_reg_for_float(0.0));
}
} else {
emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
*/
dp->saturate = true;
} else {
- /* Negating the result of the dot-product gives values on the range
- * [-4, 0]. Zero stays zero, and negative values become 1.0. This
- * achieved using SLT.
+ /* Negating the result of the dot-product gives values on the
+ * range [-4, 0]. Zero stays zero, and negative values become
+ * 1.0. This achieved using SLT.
*/
st_src_reg slt_src = result_src;
slt_src.negate = ~slt_src.negate;
- emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src,
+ st_src_reg_for_float(0.0));
}
}
} else {
emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
/* The clamping to [0,1] can be done for free in the fragment
- * shader with a saturate if floats are being used as boolean values.
+ * shader with a saturate if floats are being used as boolean
+ * values.
*/
add->saturate = true;
} else {
/* Negating the result of the addition gives values on the range
- * [-2, 0]. Zero stays zero, and negative values become 1.0. This
- * is achieved using SLT.
+ * [-2, 0]. Zero stays zero, and negative values become 1.0
+ * This is achieved using SLT.
*/
st_src_reg slt_src = result_src;
slt_src.negate = ~slt_src.negate;
- emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+ emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src,
+ st_src_reg_for_float(0.0));
}
}
break;
/* fallthrough to next case otherwise */
case ir_unop_b2f:
if (native_integers) {
- emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
+ emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0],
+ st_src_reg_for_float(1.0));
break;
}
/* fallthrough to next case otherwise */
* GLSL requires that int(bool) return 1 for true and 0 for false.
* This conversion is done with AND, but it could be done with NEG.
*/
- emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
+ emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0],
+ st_src_reg_for_int(1));
} else {
/* Booleans and integers are both stored as floats when native
* integers are disabled.
result_src.type = GLSL_TYPE_FLOAT;
break;
case ir_unop_f2b:
- emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
+ emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0],
+ st_src_reg_for_float(0.0));
break;
case ir_unop_d2b:
- emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
+ emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0],
+ st_src_reg_for_double(0.0));
break;
case ir_unop_i2b:
if (native_integers)
- emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
+ emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0],
+ st_src_reg_for_int(0));
else
- emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
+ emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0],
+ st_src_reg_for_float(0.0));
break;
case ir_unop_bitcast_u642d:
case ir_unop_bitcast_i642d:
case ir_binop_lshift:
case ir_binop_rshift:
if (native_integers) {
- unsigned opcode = ir->operation == ir_binop_lshift ? TGSI_OPCODE_SHL
- : TGSI_OPCODE_ISHR;
+ enum tgsi_opcode opcode = ir->operation == ir_binop_lshift
+ ? TGSI_OPCODE_SHL : TGSI_OPCODE_ISHR;
st_src_reg count;
if (glsl_base_type_is_64bit(op[0].type)) {
/* GLSL shift operations have 32-bit shift counts, but TGSI uses
* 64 bits.
*/
- count = get_temp(glsl_type::u64vec(ir->operands[1]->type->components()));
+ count = get_temp(glsl_type::u64vec(ir->operands[1]
+ ->type->components()));
emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]);
} else {
count = op[1];
emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset,
st_src_reg_for_int(4));
cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
- memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
+ *cbuf.reladdr = index_reg;
}
if (const_uniform_block) {
} else {
/* Relative/variable constant buffer */
cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
- memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
+ *cbuf.reladdr2 = op[0];
}
cbuf.has_index2 = true;
break;
case ir_binop_interpolate_at_offset: {
/* The y coordinate needs to be flipped for the default fb */
- static const gl_state_index transform_y_state[STATE_LENGTH]
+ static const gl_state_index16 transform_y_state[STATE_LENGTH]
= { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
unsigned transform_y_index =
ir_constant *const_offset = ir->operands[0]->as_constant();
st_src_reg buffer(
PROGRAM_BUFFER,
- ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
- (const_offset ? const_offset->value.u[0] : 0),
+ const_offset ? const_offset->value.u[0] : 0,
GLSL_TYPE_UINT);
if (!const_offset) {
buffer.reladdr = ralloc(mem_ctx, st_src_reg);
temp_dst.writemask = WRITEMASK_X;
if (vector_elements > 3)
temp_dst.writemask |= WRITEMASK_Z;
- op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2),
- GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3));
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2),
+ GET_SWZ(orig_swz, 2),
+ GET_SWZ(orig_swz, 3),
+ GET_SWZ(orig_swz, 3));
if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
else
- emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
+ emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0],
+ st_src_reg_for_int(1));
}
break;
}
for (el = 0; el < vector_elements; el++) {
unsigned swz = GET_SWZ(orig_swz, el);
if (swz & 1)
- op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z);
+ op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z,
+ SWIZZLE_Z, SWIZZLE_Z);
else
- op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+ op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X,
+ SWIZZLE_X, SWIZZLE_X);
if (swz > 2)
op[0].index = orig_idx + 1;
op[0].type = GLSL_TYPE_UINT;
break;
}
case ir_unop_i642b:
- emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int64(0));
+ emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0],
+ st_src_reg_for_int64(0));
break;
case ir_unop_i642f:
emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]);
case ir_binop_carry:
case ir_binop_borrow:
case ir_unop_ssbo_unsized_array_length:
+ case ir_unop_atan:
+ case ir_binop_atan2:
+ case ir_unop_clz:
+ case ir_binop_add_sat:
+ case ir_binop_sub_sat:
+ case ir_binop_abs_sub:
+ case ir_binop_avg:
+ case ir_binop_avg_round:
+ case ir_binop_mul_32x16:
+ case ir_unop_f162f:
+ case ir_unop_f2f16:
+ case ir_unop_f2fmp:
+ case ir_unop_f162b:
+ case ir_unop_b2f16:
/* This operation is not supported, or should have already been handled.
*/
assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
{
st_src_reg src;
int i;
- int swizzle[4];
+ int swizzle[4] = {0};
/* Note that this is only swizzles in expressions, not those on the left
* hand side of an assignment, which do write masking. See ir_assignment
void
glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
{
- variable_storage *entry = find_variable_storage(ir->var);
+ variable_storage *entry;
ir_variable *var = ir->var;
bool remove_array;
+ if (handle_bound_deref(ir->as_dereference()))
+ return;
+
+ entry = find_variable_storage(ir->var);
+
if (!entry) {
switch (var->data.mode) {
case ir_var_uniform:
unsigned num_components;
num_outputs++;
+ decl->invariant = var->data.invariant;
+
if (type_without_array->is_64bit())
component = component / 2;
if (type_without_array->vector_elements)
}
}
+
+static void
+mark_array_io(struct inout_decl *decls, unsigned count,
+ GLbitfield64* usage_mask,
+ GLbitfield64 double_usage_mask,
+ GLbitfield* patch_usage_mask)
+{
+ unsigned i;
+ int j;
+
+ /* Fix array declarations by removing unused array elements at both ends
+ * of the arrays. For example, mat4[3] where only mat[1] is used.
+ */
+ for (i = 0; i < count; i++) {
+ struct inout_decl *decl = &decls[i];
+ if (!decl->array_id)
+ continue;
+
+ /* When not all entries of an array are accessed, we mark them as used
+ * here anyway, to ensure that the input/output mapping logic doesn't get
+ * confused.
+ *
+ * TODO This happens when an array isn't used via indirect access, which
+ * some game ports do (at least eON-based). There is an optimization
+ * opportunity here by replacing the array declaration with non-array
+ * declarations of those slots that are actually used.
+ */
+ for (j = 0; j < (int)decl->size; ++j) {
+ if (decl->mesa_index >= VARYING_SLOT_PATCH0)
+ *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j);
+ else
+ *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j);
+ }
+ }
+}
+
void
glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
{
bool is_2D = false;
ir_variable *var = ir->variable_referenced();
- /* We only need the logic provided by st_glsl_storage_type_size()
+ if (handle_bound_deref(ir->as_dereference()))
+ return;
+
+ /* We only need the logic provided by count_vec4_slots()
* for arrays of structs. Indirect sampler and image indexing is handled
* elsewhere.
*/
- int element_size = ir->type->without_array()->is_record() ?
- st_glsl_storage_type_size(ir->type, var->data.bindless) :
+ int element_size = ir->type->without_array()->is_struct() ?
+ ir->type->count_vec4_slots(false, var->data.bindless) :
type_size(ir->type);
index = ir->array_index->constant_expression_value(ralloc_parent(ir));
if (is_2D) {
src.reladdr2 = ralloc(mem_ctx, st_src_reg);
- memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
+ *src.reladdr2 = index_reg;
src.index2D = 0;
src.has_index2 = true;
} else {
src.reladdr = ralloc(mem_ctx, st_src_reg);
- memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+ *src.reladdr = index_reg;
}
}
ir_variable *var = ir->record->variable_referenced();
int offset = 0;
+ if (handle_bound_deref(ir->as_dereference()))
+ return;
+
ir->record->accept(this);
assert(ir->field_idx >= 0);
if (i == (unsigned) ir->field_idx)
break;
const glsl_type *member_type = struct_type->fields.structure[i].type;
- offset += st_glsl_storage_type_size(member_type, var->data.bindless);
+ offset += member_type->count_vec4_slots(false, var->data.bindless);
}
/* If the type is smaller than a vec4, replicate the last channel out. */
st_dst_reg *l, st_src_reg *r,
st_src_reg *cond, bool cond_swap)
{
- if (type->is_record()) {
+ if (type->is_struct()) {
for (unsigned int i = 0; i < type->length; i++) {
emit_block_mov(ir, type->fields.structure[i].type, l, r,
cond, cond_swap);
if (type->is_matrix()) {
const struct glsl_type *vec_type;
- vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT,
+ vec_type = glsl_type::get_instance(type->is_double()
+ ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT,
type->vector_elements, 1);
for (int i = 0; i < type->matrix_columns; i++) {
if (l_src.file == PROGRAM_OUTPUT &&
this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
- (l_src.index == FRAG_RESULT_DEPTH || l_src.index == FRAG_RESULT_STENCIL)) {
+ (l_src.index == FRAG_RESULT_DEPTH ||
+ l_src.index == FRAG_RESULT_STENCIL)) {
/* This is a special case because the source swizzles will be shifted
* later to account for the difference between GLSL (where they're
* plain floats) and TGSI (where they're Z and Y components). */
} else if (ir->write_mask == 0) {
assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
- unsigned num_elements = ir->lhs->type->without_array()->vector_elements;
+ unsigned num_elements =
+ ir->lhs->type->without_array()->vector_elements;
if (num_elements) {
l.writemask = u_bit_consecutive(0, num_elements);
GLdouble stack_vals[4] = { 0 };
gl_constant_value *values = (gl_constant_value *) stack_vals;
GLenum gl_type = GL_NONE;
- unsigned int i;
+ unsigned int i, elements;
static int in_array = 0;
gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
* aggregate constant and move each constant value into it. If we
* get lucky, copy propagation will eliminate the extra moves.
*/
- if (ir->type->is_record()) {
+ if (ir->type->is_struct()) {
st_src_reg temp_base = get_temp(ir->type);
st_dst_reg temp = st_dst_reg(temp_base);
for (i = 0; i < ir->type->matrix_columns; i++) {
switch (ir->type->base_type) {
case GLSL_TYPE_FLOAT:
- values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
+ values = (gl_constant_value *)
+ &ir->value.f[i * ir->type->vector_elements];
src = st_src_reg(file, -1, ir->type->base_type);
src.index = add_constant(file,
emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
break;
case GLSL_TYPE_DOUBLE:
- values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements];
+ values = (gl_constant_value *)
+ &ir->value.d[i * ir->type->vector_elements];
src = st_src_reg(file, -1, ir->type->base_type);
src.index = add_constant(file,
values,
&src.swizzle);
if (ir->type->vector_elements >= 2) {
mat_column.writemask = WRITEMASK_XY;
- src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+ SWIZZLE_X, SWIZZLE_Y);
emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
} else {
mat_column.writemask = WRITEMASK_X;
- src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X,
+ SWIZZLE_X, SWIZZLE_X);
emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
}
src.index++;
if (ir->type->vector_elements > 2) {
if (ir->type->vector_elements == 4) {
mat_column.writemask = WRITEMASK_ZW;
- src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+ SWIZZLE_X, SWIZZLE_Y);
emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
} else {
mat_column.writemask = WRITEMASK_Z;
- src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y,
+ SWIZZLE_Y, SWIZZLE_Y);
emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
mat_column.writemask = WRITEMASK_XYZW;
src.swizzle = SWIZZLE_XYZW;
return;
}
+ elements = ir->type->vector_elements;
switch (ir->type->base_type) {
case GLSL_TYPE_FLOAT:
gl_type = GL_FLOAT;
values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
}
break;
+ case GLSL_TYPE_SAMPLER:
+ case GLSL_TYPE_IMAGE:
+ gl_type = GL_UNSIGNED_INT;
+ elements = 2;
+ values[0].u = ir->value.u64[0] & 0xffffffff;
+ values[1].u = ir->value.u64[0] >> 32;
+ break;
default:
- assert(!"Non-float/uint/int/bool constant");
+ assert(!"Non-float/uint/int/bool/sampler/image constant");
}
this->result = st_src_reg(file, -1, ir->type);
this->result.index = add_constant(file,
values,
- ir->type->vector_elements,
+ elements,
gl_type,
&this->result.swizzle);
}
if (has_hw_atomics) {
variable_storage *entry = find_variable_storage(location);
- st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT, location->data.binding);
+ st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT,
+ location->data.binding);
if (!entry) {
entry = new(mem_ctx) variable_storage(location, PROGRAM_HW_ATOMIC,
resource = buffer;
} else {
- st_src_reg buffer(PROGRAM_BUFFER, location->data.binding,
+ st_src_reg buffer(PROGRAM_BUFFER,
+ prog->info.num_ssbos +
+ location->data.binding,
GLSL_TYPE_ATOMIC_UINT);
if (offset.file != PROGRAM_UNDEFINED) {
val->accept(this);
st_src_reg data = this->result, data2 = undef_src;
- unsigned opcode;
+ enum tgsi_opcode opcode;
switch (ir->callee->intrinsic_id) {
case ir_intrinsic_atomic_counter_add:
opcode = TGSI_OPCODE_ATOMUADD;
ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
ir_constant *const_block = block->as_constant();
-
st_src_reg buffer(
PROGRAM_BUFFER,
- ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
- (const_block ? const_block->value.u[0] : 0),
+ const_block ? const_block->value.u[0] : 0,
GLSL_TYPE_UINT);
if (!const_block) {
if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) {
inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
if (dst.type == GLSL_TYPE_BOOL)
- emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
+ emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst),
+ st_src_reg_for_int(0));
} else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) {
param = param->get_next();
ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
val->accept(this);
st_src_reg data = this->result, data2 = undef_src;
- unsigned opcode;
+ enum tgsi_opcode opcode;
switch (ir->callee->intrinsic_id) {
case ir_intrinsic_ssbo_atomic_add:
opcode = TGSI_OPCODE_ATOMUADD;
val->accept(this);
st_src_reg data = this->result, data2 = undef_src;
- unsigned opcode;
+ enum tgsi_opcode opcode;
switch (ir->callee->intrinsic_id) {
case ir_intrinsic_shared_atomic_add:
opcode = TGSI_OPCODE_ATOMUADD;
static void
get_image_qualifiers(ir_dereference *ir, const glsl_type **type,
bool *memory_coherent, bool *memory_volatile,
- bool *memory_restrict, unsigned *image_format)
+ bool *memory_restrict, bool *memory_read_only,
+ enum pipe_format *image_format)
{
switch (ir->ir_type) {
struct_type->fields.structure[fild_idx].memory_volatile;
*memory_restrict =
struct_type->fields.structure[fild_idx].memory_restrict;
+ *memory_read_only =
+ struct_type->fields.structure[fild_idx].memory_read_only;
*image_format =
struct_type->fields.structure[fild_idx].image_format;
break;
ir_dereference_array *deref_arr = ir->as_dereference_array();
get_image_qualifiers((ir_dereference *)deref_arr->array, type,
memory_coherent, memory_volatile, memory_restrict,
- image_format);
+ memory_read_only, image_format);
break;
}
*memory_coherent = var->data.memory_coherent;
*memory_volatile = var->data.memory_volatile;
*memory_restrict = var->data.memory_restrict;
+ *memory_read_only = var->data.memory_read_only;
*image_format = var->data.image_format;
break;
}
ir_dereference *img = (ir_dereference *)param;
const ir_variable *imgvar = img->variable_referenced();
unsigned sampler_array_size = 1, sampler_base = 0;
- bool memory_coherent = false, memory_volatile = false, memory_restrict = false;
- unsigned image_format = 0;
+ bool memory_coherent = false, memory_volatile = false,
+ memory_restrict = false, memory_read_only = false;
+ enum pipe_format image_format = PIPE_FORMAT_NONE;
const glsl_type *type = NULL;
get_image_qualifiers(img, &type, &memory_coherent, &memory_volatile,
- &memory_restrict, &image_format);
+ &memory_restrict, &memory_read_only, &image_format);
st_src_reg reladdr;
st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
assert(param->is_tail_sentinel());
- unsigned opcode;
+ enum tgsi_opcode opcode;
switch (ir->callee->intrinsic_id) {
case ir_intrinsic_image_load:
opcode = TGSI_OPCODE_LOAD;
case ir_intrinsic_image_atomic_comp_swap:
opcode = TGSI_OPCODE_ATOMCAS;
break;
+ case ir_intrinsic_image_atomic_inc_wrap: {
+ /* There's a bit of disagreement between GLSL and the hardware. The
+ * hardware wants to wrap after the given wrap value, while GLSL
+ * wants to wrap at the value. Subtract 1 to make up the difference.
+ */
+ st_src_reg wrap = get_temp(glsl_type::uint_type);
+ emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(wrap),
+ arg1, st_src_reg_for_int(-1));
+ arg1 = wrap;
+ opcode = TGSI_OPCODE_ATOMINC_WRAP;
+ break;
+ }
+ case ir_intrinsic_image_atomic_dec_wrap:
+ opcode = TGSI_OPCODE_ATOMDEC_WRAP;
+ break;
default:
assert(!"Unexpected intrinsic");
return;
}
inst->tex_target = type->sampler_index();
- inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
- _mesa_get_shader_image_format(image_format));
+ inst->image_format = image_format;
+ inst->read_only = memory_read_only;
if (memory_coherent)
inst->buffer_access |= TGSI_MEMORY_COHERENT;
}
void
-glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, unsigned op)
+glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, enum tgsi_opcode op)
{
ir->return_deref->accept(this);
st_dst_reg dst = st_dst_reg(this->result);
case ir_intrinsic_image_atomic_comp_swap:
case ir_intrinsic_image_size:
case ir_intrinsic_image_samples:
+ case ir_intrinsic_image_atomic_inc_wrap:
+ case ir_intrinsic_image_atomic_dec_wrap:
visit_image_intrinsic(ir);
return;
visit_generic_intrinsic(ir, TGSI_OPCODE_READ_INVOC);
return;
+ case ir_intrinsic_helper_invocation:
+ visit_generic_intrinsic(ir, TGSI_OPCODE_READ_HELPER);
+ return;
+
case ir_intrinsic_invalid:
case ir_intrinsic_generic_load:
case ir_intrinsic_generic_store:
case ir_intrinsic_generic_atomic_max:
case ir_intrinsic_generic_atomic_exchange:
case ir_intrinsic_generic_atomic_comp_swap:
+ case ir_intrinsic_begin_invocation_interlock:
+ case ir_intrinsic_end_invocation_interlock:
unreachable("Invalid intrinsic");
}
}
calc_deref_offsets(deref_record->record->as_dereference(), array_elements, index, indirect, location);
assert(field_index >= 0);
- *location += struct_type->record_location_offset(field_index);
+ *location += struct_type->struct_location_offset(field_index);
break;
}
unsigned location = 0;
ir_variable *var = ir->variable_referenced();
- memset(reladdr, 0, sizeof(*reladdr));
- reladdr->file = PROGRAM_UNDEFINED;
+ reladdr->reset();
*base = 0;
*array_size = 1;
st_src_reg
glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset)
{
- if (offset.reladdr || offset.reladdr2) {
+ if (offset.reladdr || offset.reladdr2 ||
+ offset.has_index2 ||
+ offset.file == PROGRAM_UNIFORM ||
+ offset.file == PROGRAM_CONSTANT ||
+ offset.file == PROGRAM_STATE_VAR) {
st_src_reg tmp = get_temp(glsl_type::ivec2_type);
st_dst_reg tmp_dst = st_dst_reg(tmp);
tmp_dst.writemask = WRITEMASK_XY;
return offset;
}
+
+bool
+glsl_to_tgsi_visitor::handle_bound_deref(ir_dereference *ir)
+{
+ ir_variable *var = ir->variable_referenced();
+
+ if (!var || var->data.mode != ir_var_uniform || var->data.bindless ||
+ !(ir->type->is_image() || ir->type->is_sampler()))
+ return false;
+
+ /* Convert from bound sampler/image to bindless handle. */
+ bool is_image = ir->type->is_image();
+ st_src_reg resource(is_image ? PROGRAM_IMAGE : PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT);
+ uint16_t index = 0;
+ unsigned array_size = 1, base = 0;
+ st_src_reg reladdr;
+ get_deref_offsets(ir, &array_size, &base, &index, &reladdr, true);
+
+ resource.index = index;
+ if (reladdr.file != PROGRAM_UNDEFINED) {
+ resource.reladdr = ralloc(mem_ctx, st_src_reg);
+ *resource.reladdr = reladdr;
+ emit_arl(ir, sampler_reladdr, reladdr);
+ }
+
+ this->result = get_temp(glsl_type::uvec2_type);
+ st_dst_reg dst(this->result);
+ dst.writemask = WRITEMASK_XY;
+
+ glsl_to_tgsi_instruction *inst = emit_asm(
+ ir, is_image ? TGSI_OPCODE_IMG2HND : TGSI_OPCODE_SAMP2HND, dst);
+
+ inst->tex_target = ir->type->sampler_index();
+ inst->resource = resource;
+ inst->sampler_array_size = array_size;
+ inst->sampler_base = base;
+
+ return true;
+}
void
glsl_to_tgsi_visitor::visit(ir_texture *ir)
st_src_reg levels_src, reladdr;
st_dst_reg result_dst, coord_dst, cube_sc_dst;
glsl_to_tgsi_instruction *inst = NULL;
- unsigned opcode = TGSI_OPCODE_NOP;
+ enum tgsi_opcode opcode = TGSI_OPCODE_NOP;
const glsl_type *sampler_type = ir->sampler->type;
unsigned sampler_array_size = 1, sampler_base = 0;
- bool is_cube_array = false, is_cube_shadow = false;
+ bool is_cube_array = false;
ir_variable *var = ir->sampler->variable_referenced();
unsigned i;
/* if we are a cube array sampler or a cube shadow */
if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
is_cube_array = sampler_type->sampler_array;
- is_cube_shadow = sampler_type->sampler_shadow;
}
if (ir->coordinate) {
}
break;
case ir_txb:
- if (is_cube_array || is_cube_shadow) {
+ if (is_cube_array ||
+ (sampler_type->sampler_shadow && sampler_type->coordinate_components() >= 3)) {
opcode = TGSI_OPCODE_TXB2;
}
else {
if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) {
opcode = TGSI_OPCODE_TEX_LZ;
} else {
- opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
+ opcode = (is_cube_array || (sampler_type->sampler_shadow && sampler_type->coordinate_components() >= 3)) ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
ir->lod_info.lod->accept(this);
lod_info = this->result;
}
}
}
- /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
- * comparator was put in the correct place (and projected) by the code,
- * above, that handles by-hand projection.
+ /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the
+ * shadow comparator was put in the correct place (and projected) by the
+ * code, above, that handles by-hand projection.
*/
if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
/* Slot the shadow value in as the second to last component of the
ir->shadow_comparator->accept(this);
if (is_cube_array) {
- cube_sc = get_temp(glsl_type::float_type);
- cube_sc_dst = st_dst_reg(cube_sc);
- cube_sc_dst.writemask = WRITEMASK_X;
+ if (lod_info.file != PROGRAM_UNDEFINED) {
+ // If we have both a cube array *and* a bias/lod, stick the
+ // comparator into the .Y of the second argument.
+ st_src_reg tmp = get_temp(glsl_type::vec2_type);
+ cube_sc_dst = st_dst_reg(tmp);
+ cube_sc_dst.writemask = WRITEMASK_X;
+ emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, lod_info);
+ lod_info = tmp;
+ cube_sc_dst.writemask = WRITEMASK_Y;
+ } else {
+ cube_sc = get_temp(glsl_type::float_type);
+ cube_sc_dst = st_dst_reg(cube_sc);
+ cube_sc_dst.writemask = WRITEMASK_X;
+ }
emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
- cube_sc_dst.writemask = WRITEMASK_X;
}
else {
if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
if (is_cube_array && ir->shadow_comparator) {
inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
} else {
- inst = emit_asm(ir, opcode, result_dst, coord, component);
+ if (this->tg4_component_in_swizzle) {
+ inst = emit_asm(ir, opcode, result_dst, coord);
+ int idx = 0;
+ foreach_in_list(immediate_storage, entry, &this->immediates) {
+ if (component.index == idx) {
+ gl_constant_value value = entry->values[component.swizzle];
+ inst->gather_component = value.i;
+ break;
+ }
+ idx++;
+ }
+ } else {
+ inst = emit_asm(ir, opcode, result_dst, coord, component);
+ }
}
} else
inst = emit_asm(ir, opcode, result_dst, coord);
if (ir->offset) {
if (!inst->tex_offsets)
- inst->tex_offsets = rzalloc_array(inst, st_src_reg, MAX_GLSL_TEXTURE_OFFSET);
+ inst->tex_offsets = rzalloc_array(inst, st_src_reg,
+ MAX_GLSL_TEXTURE_OFFSET);
- for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
+ for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET &&
+ offset[i].file != PROGRAM_UNDEFINED; i++)
inst->tex_offsets[i] = offset[i];
inst->tex_offset_num_offset = i;
}
}
}
+void
+glsl_to_tgsi_visitor::visit(ir_demote *ir)
+{
+ emit_asm(ir, TGSI_OPCODE_DEMOTE);
+}
+
void
glsl_to_tgsi_visitor::visit(ir_if *ir)
{
- unsigned if_opcode;
+ enum tgsi_opcode if_opcode;
glsl_to_tgsi_instruction *if_inst;
ir->condition->accept(this);
ctx = NULL;
prog = NULL;
precise = 0;
+ need_uarl = false;
+ tg4_component_in_swizzle = false;
shader_program = NULL;
shader = NULL;
options = NULL;
v->image_targets[idx] =
st_translate_texture_target(inst->tex_target, false);
v->image_formats[idx] = inst->image_format;
+ v->image_wr[idx] = !inst->read_only;
}
}
}
&& !(inst->dst[0].writemask & prevWriteMask)
&& inst->src[2].file == inst->dst[0].file
&& inst->src[2].index == inst->dst[0].index
- && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) {
+ && inst->dst[0].writemask ==
+ get_src_arg_mask(inst->dst[0], inst->src[2])) {
inst->op = TGSI_OPCODE_MOV;
inst->info = tgsi_get_opcode_info(inst->op);
}
if (inst->op == TGSI_OPCODE_BGNLOOP) {
- if(depth++ == 0)
+ if (depth++ == 0)
loop_start = i;
} else if (inst->op == TGSI_OPCODE_ENDLOOP) {
if (--depth == 0)
}
}
if (inst->op == TGSI_OPCODE_BGNLOOP) {
- if(depth++ == 0)
+ if (depth++ == 0)
loop_start = i;
} else if (inst->op == TGSI_OPCODE_ENDLOOP) {
if (--depth == 0)
last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
}
if (inst->op == TGSI_OPCODE_BGNLOOP) {
- if(depth++ == 0)
+ if (depth++ == 0)
loop_start = i;
} else if (inst->op == TGSI_OPCODE_ENDLOOP) {
if (--depth == 0) {
write_level[4 * r + c] = level-1;
}
}
- if(inst->op == TGSI_OPCODE_ENDIF)
+ if (inst->op == TGSI_OPCODE_ENDIF)
--level;
break;
defined = 0;
inst2 = (glsl_to_tgsi_instruction *) inst->next;
- do {
+ while (!inst2->is_tail_sentinel()) {
if (inst->op == inst2->op &&
inst2->dst[defined].file == PROGRAM_UNDEFINED &&
inst->src[0].file == inst2->src[0].file &&
inst->src[0].swizzle == inst2->src[0].swizzle)
break;
inst2 = (glsl_to_tgsi_instruction *) inst2->next;
- } while (inst2);
+ }
- if (!inst2) {
+ if (inst2->is_tail_sentinel()) {
/* Undefined destinations are not allowed, substitute with an unused
* temporary register.
*/
}
}
+template <typename st_reg>
+void test_indirect_access(const st_reg& reg, bool *has_indirect_access)
+{
+ if (reg.file == PROGRAM_ARRAY) {
+ if (reg.reladdr || reg.reladdr2 || reg.has_index2) {
+ has_indirect_access[reg.array_id] = true;
+ if (reg.reladdr)
+ test_indirect_access(*reg.reladdr, has_indirect_access);
+ if (reg.reladdr2)
+ test_indirect_access(*reg.reladdr2, has_indirect_access);
+ }
+ }
+}
+
+template <typename st_reg>
+void remap_array(st_reg& reg, const int *array_remap_info,
+ const bool *has_indirect_access)
+{
+ if (reg.file == PROGRAM_ARRAY) {
+ if (!has_indirect_access[reg.array_id]) {
+ reg.file = PROGRAM_TEMPORARY;
+ reg.index = reg.index + array_remap_info[reg.array_id];
+ reg.array_id = 0;
+ } else {
+ reg.array_id = array_remap_info[reg.array_id];
+ }
+
+ if (reg.reladdr)
+ remap_array(*reg.reladdr, array_remap_info, has_indirect_access);
+
+ if (reg.reladdr2)
+ remap_array(*reg.reladdr2, array_remap_info, has_indirect_access);
+ }
+}
+
+/* One-dimensional arrays whose elements are only accessed directly are
+ * replaced by an according set of temporary registers that then can become
+ * subject to further optimization steps like copy propagation and
+ * register merging.
+ */
+void
+glsl_to_tgsi_visitor::split_arrays(void)
+{
+ if (!next_array)
+ return;
+
+ bool *has_indirect_access = rzalloc_array(mem_ctx, bool, next_array + 1);
+
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
+ for (unsigned j = 0; j < num_inst_src_regs(inst); j++)
+ test_indirect_access(inst->src[j], has_indirect_access);
+
+ for (unsigned j = 0; j < inst->tex_offset_num_offset; j++)
+ test_indirect_access(inst->tex_offsets[j], has_indirect_access);
+
+ for (unsigned j = 0; j < num_inst_dst_regs(inst); j++)
+ test_indirect_access(inst->dst[j], has_indirect_access);
+
+ test_indirect_access(inst->resource, has_indirect_access);
+ }
+
+ unsigned array_offset = 0;
+ unsigned n_remaining_arrays = 0;
+
+ /* Double use: For arrays that get split this value will contain
+ * the base index of the temporary registers this array is replaced
+ * with. For arrays that remain it contains the new array ID.
+ */
+ int *array_remap_info = rzalloc_array(has_indirect_access, int,
+ next_array + 1);
+
+ for (unsigned i = 1; i <= next_array; ++i) {
+ if (!has_indirect_access[i]) {
+ array_remap_info[i] = this->next_temp + array_offset;
+ array_offset += array_sizes[i - 1];
+ } else {
+ array_sizes[n_remaining_arrays] = array_sizes[i-1];
+ array_remap_info[i] = ++n_remaining_arrays;
+ }
+ }
+
+ if (next_array != n_remaining_arrays) {
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
+ for (unsigned j = 0; j < num_inst_src_regs(inst); j++)
+ remap_array(inst->src[j], array_remap_info, has_indirect_access);
+
+ for (unsigned j = 0; j < inst->tex_offset_num_offset; j++)
+ remap_array(inst->tex_offsets[j], array_remap_info, has_indirect_access);
+
+ for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
+ remap_array(inst->dst[j], array_remap_info, has_indirect_access);
+ }
+ remap_array(inst->resource, array_remap_info, has_indirect_access);
+ }
+ }
+
+ ralloc_free(has_indirect_access);
+ this->next_temp += array_offset;
+ next_array = n_remaining_arrays;
+}
+
/* Merges temporary registers together where possible to reduce the number of
* registers needed to run a program.
*
void
glsl_to_tgsi_visitor::merge_registers(void)
{
- assert(need_uarl);
- struct lifetime *lifetimes =
- rzalloc_array(mem_ctx, struct lifetime, this->next_temp);
+ class array_live_range *arr_live_ranges = NULL;
+
+ struct register_live_range *reg_live_ranges =
+ rzalloc_array(mem_ctx, struct register_live_range, this->next_temp);
- if (get_temp_registers_required_lifetimes(mem_ctx, &this->instructions,
- this->next_temp, lifetimes)) {
+ if (this->next_array > 0) {
+ arr_live_ranges = new array_live_range[this->next_array];
+ for (unsigned i = 0; i < this->next_array; ++i)
+ arr_live_ranges[i] = array_live_range(i+1, this->array_sizes[i]);
+ }
+
+
+ if (get_temp_registers_required_live_ranges(reg_live_ranges, &this->instructions,
+ this->next_temp, reg_live_ranges,
+ this->next_array, arr_live_ranges)) {
struct rename_reg_pair *renames =
- rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
- get_temp_registers_remapping(mem_ctx, this->next_temp, lifetimes, renames);
+ rzalloc_array(reg_live_ranges, struct rename_reg_pair, this->next_temp);
+ get_temp_registers_remapping(reg_live_ranges, this->next_temp,
+ reg_live_ranges, renames);
rename_temp_registers(renames);
- ralloc_free(renames);
+
+ this->next_array = merge_arrays(this->next_array, this->array_sizes,
+ &this->instructions, arr_live_ranges);
}
- ralloc_free(lifetimes);
+ if (arr_live_ranges)
+ delete[] arr_live_ranges;
+
+ ralloc_free(reg_live_ranges);
}
/* Reassign indices to temporary registers by reusing unused indices created
ralloc_free(first_writes);
}
+#ifndef NDEBUG
+void glsl_to_tgsi_visitor::print_stats()
+{
+ int narray_registers = 0;
+ for (unsigned i = 0; i < this->next_array; ++i)
+ narray_registers += this->array_sizes[i];
+
+ int ninstructions = 0;
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &instructions) {
+ ++ninstructions;
+ }
+
+ simple_mtx_lock(&print_stats_mutex);
+ stats_log << next_array << ", "
+ << next_temp << ", "
+ << narray_registers << ", "
+ << next_temp + narray_registers << ", "
+ << ninstructions << "\n";
+ simple_mtx_unlock(&print_stats_mutex);
+}
+#endif
/* ------------------------- TGSI conversion stuff -------------------------- */
/**
const ubyte *inputMapping;
const ubyte *outputMapping;
- unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */
+ enum pipe_shader_type procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */
bool need_uarl;
+ bool tg4_component_in_swizzle;
};
-/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
-unsigned
-_mesa_sysval_to_semantic(unsigned sysval)
-{
- switch (sysval) {
- /* Vertex shader */
- case SYSTEM_VALUE_VERTEX_ID:
- return TGSI_SEMANTIC_VERTEXID;
- case SYSTEM_VALUE_INSTANCE_ID:
- return TGSI_SEMANTIC_INSTANCEID;
- case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
- return TGSI_SEMANTIC_VERTEXID_NOBASE;
- case SYSTEM_VALUE_BASE_VERTEX:
- return TGSI_SEMANTIC_BASEVERTEX;
- case SYSTEM_VALUE_BASE_INSTANCE:
- return TGSI_SEMANTIC_BASEINSTANCE;
- case SYSTEM_VALUE_DRAW_ID:
- return TGSI_SEMANTIC_DRAWID;
-
- /* Geometry shader */
- case SYSTEM_VALUE_INVOCATION_ID:
- return TGSI_SEMANTIC_INVOCATIONID;
-
- /* Fragment shader */
- case SYSTEM_VALUE_FRAG_COORD:
- return TGSI_SEMANTIC_POSITION;
- case SYSTEM_VALUE_FRONT_FACE:
- return TGSI_SEMANTIC_FACE;
- case SYSTEM_VALUE_SAMPLE_ID:
- return TGSI_SEMANTIC_SAMPLEID;
- case SYSTEM_VALUE_SAMPLE_POS:
- return TGSI_SEMANTIC_SAMPLEPOS;
- case SYSTEM_VALUE_SAMPLE_MASK_IN:
- return TGSI_SEMANTIC_SAMPLEMASK;
- case SYSTEM_VALUE_HELPER_INVOCATION:
- return TGSI_SEMANTIC_HELPER_INVOCATION;
-
- /* Tessellation shader */
- case SYSTEM_VALUE_TESS_COORD:
- return TGSI_SEMANTIC_TESSCOORD;
- case SYSTEM_VALUE_VERTICES_IN:
- return TGSI_SEMANTIC_VERTICESIN;
- case SYSTEM_VALUE_PRIMITIVE_ID:
- return TGSI_SEMANTIC_PRIMID;
- case SYSTEM_VALUE_TESS_LEVEL_OUTER:
- return TGSI_SEMANTIC_TESSOUTER;
- case SYSTEM_VALUE_TESS_LEVEL_INNER:
- return TGSI_SEMANTIC_TESSINNER;
-
- /* Compute shader */
- case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
- return TGSI_SEMANTIC_THREAD_ID;
- case SYSTEM_VALUE_WORK_GROUP_ID:
- return TGSI_SEMANTIC_BLOCK_ID;
- case SYSTEM_VALUE_NUM_WORK_GROUPS:
- return TGSI_SEMANTIC_GRID_SIZE;
- case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
- return TGSI_SEMANTIC_BLOCK_SIZE;
-
- /* ARB_shader_ballot */
- case SYSTEM_VALUE_SUBGROUP_SIZE:
- return TGSI_SEMANTIC_SUBGROUP_SIZE;
- case SYSTEM_VALUE_SUBGROUP_INVOCATION:
- return TGSI_SEMANTIC_SUBGROUP_INVOCATION;
- case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
- return TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
- case SYSTEM_VALUE_SUBGROUP_GE_MASK:
- return TGSI_SEMANTIC_SUBGROUP_GE_MASK;
- case SYSTEM_VALUE_SUBGROUP_GT_MASK:
- return TGSI_SEMANTIC_SUBGROUP_GT_MASK;
- case SYSTEM_VALUE_SUBGROUP_LE_MASK:
- return TGSI_SEMANTIC_SUBGROUP_LE_MASK;
- case SYSTEM_VALUE_SUBGROUP_LT_MASK:
- return TGSI_SEMANTIC_SUBGROUP_LT_MASK;
-
- /* Unhandled */
- case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
- case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
- case SYSTEM_VALUE_VERTEX_CNT:
- default:
- assert(!"Unexpected SYSTEM_VALUE_ enum");
- return TGSI_SEMANTIC_COUNT;
- }
-}
-
/**
* Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
*/
static struct ureg_src
emit_immediate(struct st_translate *t,
gl_constant_value values[4],
- int type, int size)
+ GLenum type, int size)
{
struct ureg_program *ureg = t->ureg;
- switch(type)
- {
+ switch (type) {
case GL_FLOAT:
return ureg_DECL_immediate(ureg, &values[0].f, size);
case GL_DOUBLE:
{
unsigned array;
- switch(file) {
+ switch (file) {
case PROGRAM_UNDEFINED:
return ureg_dst_undef();
return t->outputs[t->outputMapping[index]];
}
else {
- struct inout_decl *decl = find_inout_array(t->output_decls, t->num_output_decls, array_id);
+ struct inout_decl *decl =
+ find_inout_array(t->output_decls,
+ t->num_output_decls, array_id);
unsigned mesa_index = decl->mesa_index;
int slot = t->outputMapping[mesa_index];
int index = src_reg->index;
int double_reg2 = src_reg->double_reg2 ? 1 : 0;
- switch(src_reg->file) {
+ switch (src_reg->file) {
case PROGRAM_UNDEFINED:
src = ureg_imm4f(t->ureg, 0, 0, 0, 0);
break;
case PROGRAM_TEMPORARY:
case PROGRAM_ARRAY:
- src = ureg_src(dst_register(t, src_reg->file, src_reg->index, src_reg->array_id));
+ src = ureg_src(dst_register(t, src_reg->file, src_reg->index,
+ src_reg->array_id));
break;
case PROGRAM_OUTPUT: {
- struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index, src_reg->array_id);
+ struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index,
+ src_reg->array_id);
assert(dst.WriteMask != 0);
unsigned shift = ffs(dst.WriteMask) - 1;
src = ureg_swizzle(ureg_src(dst),
src = t->inputs[t->inputMapping[index] + double_reg2];
}
else {
- struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls,
+ struct inout_decl *decl = find_inout_array(t->input_decls,
+ t->num_input_decls,
src_reg->array_id);
unsigned mesa_index = decl->mesa_index;
int slot = t->inputMapping[mesa_index];
int num_dst;
int num_src;
- unsigned tex_target = 0;
+ enum tgsi_texture_type tex_target = TGSI_TEXTURE_BUFFER;
num_dst = num_inst_dst_regs(inst);
num_src = num_inst_src_regs(inst);
for (i = 0; i < num_src; i++)
src[i] = translate_src(t, &inst->src[i]);
- switch(inst->op) {
+ switch (inst->op) {
case TGSI_OPCODE_BGNLOOP:
case TGSI_OPCODE_ELSE:
case TGSI_OPCODE_ENDLOOP:
case TGSI_OPCODE_TXL2:
case TGSI_OPCODE_TG4:
case TGSI_OPCODE_LODQ:
+ case TGSI_OPCODE_SAMP2HND:
if (inst->resource.file == PROGRAM_SAMPLER) {
src[num_src] = t->samplers[inst->resource.index];
+ if (t->tg4_component_in_swizzle && inst->op == TGSI_OPCODE_TG4)
+ src[num_src].SwizzleX = inst->gather_component;
} else {
/* Bindless samplers. */
src[num_src] = translate_src(t, &inst->resource);
case TGSI_OPCODE_ATOMUMAX:
case TGSI_OPCODE_ATOMIMIN:
case TGSI_OPCODE_ATOMIMAX:
+ case TGSI_OPCODE_ATOMFADD:
+ case TGSI_OPCODE_IMG2HND:
+ case TGSI_OPCODE_ATOMINC_WRAP:
+ case TGSI_OPCODE_ATOMDEC_WRAP:
for (i = num_src - 1; i >= 0; i--)
src[i + 1] = src[i];
num_src++;
}
}
+/* Invert SamplePos.y when rendering to the default framebuffer. */
+static void
+emit_samplepos_adjustment(struct st_translate *t, int wpos_y_transform)
+{
+ struct ureg_program *ureg = t->ureg;
+
+ assert(wpos_y_transform >= 0);
+ struct ureg_src trans_const = ureg_DECL_constant(ureg, wpos_y_transform);
+ struct ureg_src samplepos_sysval = t->systemValues[SYSTEM_VALUE_SAMPLE_POS];
+ struct ureg_dst samplepos_flipped = ureg_DECL_temporary(ureg);
+ struct ureg_dst is_fbo = ureg_DECL_temporary(ureg);
+
+ ureg_ADD(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y),
+ ureg_imm1f(ureg, 1), ureg_negate(samplepos_sysval));
+
+ /* If trans.x == 1, use samplepos.y, else use 1 - samplepos.y. */
+ ureg_FSEQ(ureg, ureg_writemask(is_fbo, TGSI_WRITEMASK_Y),
+ ureg_scalar(trans_const, TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1));
+ ureg_UCMP(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y),
+ ureg_src(is_fbo), samplepos_sysval, ureg_src(samplepos_flipped));
+ ureg_MOV(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_X),
+ samplepos_sysval);
+
+ /* Use the result in place of the system value. */
+ t->systemValues[SYSTEM_VALUE_SAMPLE_POS] = ureg_src(samplepos_flipped);
+}
+
+
/**
* Emit the TGSI instructions for inverting and adjusting WPOS.
* This code is unavoidable because it also depends on whether
* where T = INPUT[WPOS] is inverted by Y.
*/
struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const);
- struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
+ struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
struct ureg_src *wpos =
ctx->Const.GLSLFragCoordIsSysVal ?
&t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
} else {
/* MOV wpos_temp, input[wpos]
*/
- ureg_MOV( ureg, wpos_temp, wpos_input );
+ ureg_MOV(ureg, wpos_temp, wpos_input);
}
/* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
if (invert) {
/* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
*/
- ureg_MAD( ureg,
- ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
- wpos_input,
- ureg_scalar(wpostrans, 0),
- ureg_scalar(wpostrans, 1));
+ ureg_MAD(ureg,
+ ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+ wpos_input,
+ ureg_scalar(wpostrans, 0),
+ ureg_scalar(wpostrans, 1));
} else {
/* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
*/
- ureg_MAD( ureg,
- ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
- wpos_input,
- ureg_scalar(wpostrans, 2),
- ureg_scalar(wpostrans, 3));
+ ureg_MAD(ureg,
+ ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+ wpos_input,
+ ureg_scalar(wpostrans, 2),
+ ureg_scalar(wpostrans, 3));
}
/* Use wpos_temp as position input from here on:
* u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
* u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
*/
- if (program->OriginUpperLeft) {
+ if (program->info.fs.origin_upper_left) {
/* Fragment shader wants origin in upper-left */
if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
/* the driver supports upper-left origin */
assert(0);
}
- if (program->PixelCenterInteger) {
+ if (program->info.fs.pixel_center_integer) {
/* Fragment shader wants pixel center integer */
if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
/* the driver supports pixel center integer */
std::sort(decls, decls + count, sorter);
}
-static unsigned
+static enum tgsi_interpolate_mode
st_translate_interp(enum glsl_interp_mode glsl_qual, GLuint varying)
{
switch (glsl_qual) {
extern "C" enum pipe_error
st_translate_program(
struct gl_context *ctx,
- uint procType,
+ enum pipe_shader_type procType,
struct ureg_program *ureg,
glsl_to_tgsi_visitor *program,
const struct gl_program *proginfo,
assert(numInputs <= ARRAY_SIZE(t->inputs));
assert(numOutputs <= ARRAY_SIZE(t->outputs));
+ ASSERT_BITFIELD_SIZE(st_src_reg, type, GLSL_TYPE_ERROR);
+ ASSERT_BITFIELD_SIZE(st_dst_reg, type, GLSL_TYPE_ERROR);
+ ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_type, GLSL_TYPE_ERROR);
+ ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, PIPE_FORMAT_COUNT);
+ ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_target,
+ (gl_texture_index) (NUM_TEXTURE_TARGETS - 1));
+ ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format,
+ (enum pipe_format) (PIPE_FORMAT_COUNT - 1));
+ ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, op,
+ (enum tgsi_opcode) (TGSI_OPCODE_LAST - 1));
+
t = CALLOC_STRUCT(st_translate);
if (!t) {
ret = PIPE_ERROR_OUT_OF_MEMORY;
t->procType = procType;
t->need_uarl = !screen->get_param(screen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS);
+ t->tg4_component_in_swizzle = screen->get_param(screen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE);
t->inputMapping = inputMapping;
t->outputMapping = outputMapping;
t->ureg = ureg;
tgsi_usage_mask = TGSI_WRITEMASK_XYZW;
}
- unsigned interp_mode = 0;
- unsigned interp_location = 0;
+ enum tgsi_interpolate_mode interp_mode = TGSI_INTERPOLATE_CONSTANT;
+ enum tgsi_interpolate_loc interp_location = TGSI_INTERPOLATE_LOC_CENTER;
if (procType == PIPE_SHADER_FRAGMENT) {
assert(interpMode);
interp_mode = interpMode[slot] != TGSI_INTERPOLATE_COUNT ?
- interpMode[slot] :
+ (enum tgsi_interpolate_mode) interpMode[slot] :
st_translate_interp(decl->interp, inputSlotToAttr[slot]);
- interp_location = decl->interp_loc;
+ interp_location = (enum tgsi_interpolate_loc) decl->interp_loc;
}
src = ureg_DECL_fs_input_cyl_centroid_layout(ureg,
- inputSemanticName[slot], inputSemanticIndex[slot],
+ (enum tgsi_semantic) inputSemanticName[slot],
+ inputSemanticIndex[slot],
interp_mode, 0, interp_location, slot, tgsi_usage_mask,
decl->array_id, decl->size);
}
dst = ureg_DECL_output_layout(ureg,
- outputSemanticName[slot], outputSemanticIndex[slot],
+ (enum tgsi_semantic) outputSemanticName[slot],
+ outputSemanticIndex[slot],
decl->gs_out_streams,
- slot, tgsi_usage_mask, decl->array_id, decl->size);
-
+ slot, tgsi_usage_mask, decl->array_id, decl->size, decl->invariant);
+ dst.Invariant = decl->invariant;
for (unsigned j = 0; j < decl->size; ++j) {
if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) {
/* The ArrayID is set up in dst_register */
t->outputs[slot + j] = dst;
t->outputs[slot + j].ArrayID = 0;
t->outputs[slot + j].Index += j;
+ t->outputs[slot + j].Invariant = decl->invariant;
}
}
}
emit_compute_block_size(proginfo, ureg);
}
+ if (program->shader->Program->info.layer_viewport_relative)
+ ureg_property(ureg, TGSI_PROPERTY_LAYER_VIEWPORT_RELATIVE, 1);
+
/* Declare address register.
*/
if (program->num_address_regs > 0) {
/* Declare misc input registers
*/
{
- GLbitfield sysInputs = proginfo->info.system_values_read;
+ GLbitfield64 sysInputs = proginfo->info.system_values_read;
for (i = 0; sysInputs; i++) {
- if (sysInputs & (1 << i)) {
- unsigned semName = _mesa_sysval_to_semantic(i);
+ if (sysInputs & (1ull << i)) {
+ enum tgsi_semantic semName = tgsi_get_sysval_semantic(i);
t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
(void) pscreen;
if (!ctx->Const.NativeIntegers) {
struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
- ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
+ ureg_U2F(t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X),
+ t->systemValues[i]);
t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
}
}
emit_wpos(st_context(ctx), t, proginfo, ureg,
program->wpos_transform_const);
- sysInputs &= ~(1 << i);
+ if (procType == PIPE_SHADER_FRAGMENT &&
+ semName == TGSI_SEMANTIC_SAMPLEPOS)
+ emit_samplepos_adjustment(t, program->wpos_transform_const);
+
+ sysInputs &= ~(1ull << i);
}
}
}
t->num_constants = proginfo->Parameters->NumParameters;
for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
+ unsigned pvo = proginfo->Parameters->ParameterValueOffset[i];
+
switch (proginfo->Parameters->Parameters[i].Type) {
case PROGRAM_STATE_VAR:
case PROGRAM_UNIFORM:
t->constants[i] = ureg_DECL_constant(ureg, i);
else
t->constants[i] = emit_immediate(t,
- proginfo->Parameters->ParameterValues[i],
+ proginfo->Parameters->ParameterValues + pvo,
proginfo->Parameters->Parameters[i].DataType,
4);
break;
/* texture samplers */
for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
if (program->samplers_used & (1u << i)) {
- unsigned type = st_translate_texture_type(program->sampler_types[i]);
+ enum tgsi_return_type type =
+ st_translate_texture_type(program->sampler_types[i]);
t->samplers[i] = ureg_DECL_sampler(ureg, i);
- ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
- type, type, type, type );
+ ureg_DECL_sampler_view(ureg, i, program->sampler_targets[i],
+ type, type, type, type);
}
}
struct gl_program *prog = program->prog;
if (!st_context(ctx)->has_hw_atomics) {
- for (i = 0; i < prog->info.num_abos; i++) {
- unsigned index = prog->sh.AtomicBuffers[i]->Binding;
- assert(index < frag_const->MaxAtomicBuffers);
+ for (i = 0; i < prog->info.num_abos; i++) {
+ unsigned index = (prog->info.num_ssbos +
+ prog->sh.AtomicBuffers[i]->Binding);
+ assert(prog->sh.AtomicBuffers[i]->Binding <
+ frag_const->MaxAtomicBuffers);
t->buffers[index] = ureg_DECL_buffer(ureg, index, true);
}
} else {
assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks);
for (i = 0; i < prog->info.num_ssbos; i++) {
- unsigned index = frag_const->MaxAtomicBuffers + i;
- t->buffers[index] = ureg_DECL_buffer(ureg, index, false);
+ t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
}
}
t->images[i] = ureg_DECL_image(ureg, i,
program->image_targets[i],
program->image_formats[i],
- true, false);
+ program->image_wr[i],
+ false);
}
}
}
}
break;
+ default:
+ ; /* nothing - silence compiler warning */
}
out:
PIPE_CAP_TGSI_TEX_TXF_LZ);
v->need_uarl = !pscreen->get_param(pscreen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS);
+ v->tg4_component_in_swizzle = pscreen->get_param(pscreen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE);
v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer,
_mesa_key_pointer_equal);
skip_merge_registers =
while (v->eliminate_dead_code());
v->merge_two_dsts();
- if (!skip_merge_registers)
+
+ if (!skip_merge_registers) {
+ v->split_arrays();
+ v->copy_propagate();
+ while (v->eliminate_dead_code());
+
v->merge_registers();
+ v->copy_propagate();
+ while (v->eliminate_dead_code());
+ }
+
v->renumber_registers();
/* Write the END instruction. */
}
do_set_program_inouts(shader->ir, prog, shader->Stage);
+
_mesa_copy_linked_program_data(shader_program, shader);
- shrink_array_declarations(v->inputs, v->num_inputs,
- &prog->info.inputs_read,
- prog->info.double_inputs_read,
- &prog->info.patch_inputs_read);
- shrink_array_declarations(v->outputs, v->num_outputs,
- &prog->info.outputs_written, 0ULL,
- &prog->info.patch_outputs_written);
+
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS)) {
+ mark_array_io(v->inputs, v->num_inputs,
+ &prog->info.inputs_read,
+ prog->DualSlotInputs,
+ &prog->info.patch_inputs_read);
+
+ mark_array_io(v->outputs, v->num_outputs,
+ &prog->info.outputs_written, 0ULL,
+ &prog->info.patch_outputs_written);
+ } else {
+ shrink_array_declarations(v->inputs, v->num_inputs,
+ &prog->info.inputs_read,
+ prog->DualSlotInputs,
+ &prog->info.patch_inputs_read);
+ shrink_array_declarations(v->outputs, v->num_outputs,
+ &prog->info.outputs_written, 0ULL,
+ &prog->info.patch_outputs_written);
+ }
+
count_resources(v, prog);
/* The GLSL IR won't be needed anymore. */
/* This must be done before the uniform storage is associated. */
if (shader->Stage == MESA_SHADER_FRAGMENT &&
(prog->info.inputs_read & VARYING_BIT_POS ||
- prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) {
- static const gl_state_index wposTransformState[STATE_LENGTH] = {
+ prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD) ||
+ prog->info.system_values_read & (1ull << SYSTEM_VALUE_SAMPLE_POS))) {
+ static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
};
* prog->ParameterValues to get reallocated (e.g., anything that adds a
* program constant) has to happen before creating this linkage.
*/
- _mesa_associate_uniform_storage(ctx, shader_program, prog, true);
+ _mesa_associate_uniform_storage(ctx, shader_program, prog);
if (!shader_program->data->LinkStatus) {
free_glsl_to_tgsi_visitor(v);
_mesa_reference_program(ctx, &shader->Program, NULL);
return NULL;
}
- struct st_vertex_program *stvp;
- struct st_fragment_program *stfp;
- struct st_common_program *stp;
- struct st_compute_program *stcp;
+ st_program(prog)->glsl_to_tgsi = v;
- switch (shader->Stage) {
- case MESA_SHADER_VERTEX:
- stvp = (struct st_vertex_program *)prog;
- stvp->glsl_to_tgsi = v;
- break;
- case MESA_SHADER_FRAGMENT:
- stfp = (struct st_fragment_program *)prog;
- stfp->glsl_to_tgsi = v;
- break;
- case MESA_SHADER_TESS_CTRL:
- case MESA_SHADER_TESS_EVAL:
- case MESA_SHADER_GEOMETRY:
- stp = st_common_program(prog);
- stp->glsl_to_tgsi = v;
- break;
- case MESA_SHADER_COMPUTE:
- stcp = (struct st_compute_program *)prog;
- stcp->glsl_to_tgsi = v;
- break;
- default:
- assert(!"should not be reached");
- return NULL;
- }
+ PRINT_STATS(v->print_stats());
return prog;
}
return visitor.unsupported;
}
-extern "C" {
-
/**
* Link a shader.
- * Called via ctx->Driver.LinkShader()
* This actually involves converting GLSL IR into an intermediate TGSI-like IR
* with code lowering and other optimizations.
*/
GLboolean
-st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+st_link_tgsi(struct gl_context *ctx, struct gl_shader_program *prog)
{
- /* Return early if we are loading the shader from on-disk cache */
- if (st_load_tgsi_from_disk_cache(ctx, prog)) {
- return GL_TRUE;
- }
-
struct pipe_screen *pscreen = ctx->st->pipe->screen;
- assert(prog->data->LinkStatus);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- if (prog->_LinkedShaders[i] == NULL)
+ struct gl_linked_shader *shader = prog->_LinkedShaders[i];
+ if (shader == NULL)
continue;
- struct gl_linked_shader *shader = prog->_LinkedShaders[i];
exec_list *ir = shader->ir;
gl_shader_stage stage = shader->Stage;
+ enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage);
const struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[stage];
- enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage);
- bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
- PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
- bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
- PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
- bool have_ldexp = pscreen->get_shader_param(pscreen, ptarget,
- PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED);
+
unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_LOWER_IF_THRESHOLD);
-
- /* If there are forms of indirect addressing that the driver
- * cannot handle, perform the lowering pass.
- */
- if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
- options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
- lower_variable_index_to_cond_assign(stage, ir,
- options->EmitNoIndirectInput,
- options->EmitNoIndirectOutput,
- options->EmitNoIndirectTemp,
- options->EmitNoIndirectUniform);
- }
-
- if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD))
- lower_64bit_integer_instructions(ir, DIV64 | MOD64);
-
- if (ctx->Extensions.ARB_shading_language_packing) {
- unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
- LOWER_UNPACK_SNORM_2x16 |
- LOWER_PACK_UNORM_2x16 |
- LOWER_UNPACK_UNORM_2x16 |
- LOWER_PACK_SNORM_4x8 |
- LOWER_UNPACK_SNORM_4x8 |
- LOWER_UNPACK_UNORM_4x8 |
- LOWER_PACK_UNORM_4x8;
-
- if (ctx->Extensions.ARB_gpu_shader5)
- lower_inst |= LOWER_PACK_USE_BFI |
- LOWER_PACK_USE_BFE;
- if (!ctx->st->has_half_float_packing)
- lower_inst |= LOWER_PACK_HALF_2x16 |
- LOWER_UNPACK_HALF_2x16;
-
- lower_packing_builtins(ir, lower_inst);
- }
-
- if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
- lower_offset_arrays(ir);
- do_mat_op_to_vec(ir);
-
- if (stage == MESA_SHADER_FRAGMENT)
- lower_blend_equation_advanced(shader);
-
- lower_instructions(ir,
- MOD_TO_FLOOR |
- FDIV_TO_MUL_RCP |
- EXP_TO_EXP2 |
- LOG_TO_LOG2 |
- (have_ldexp ? 0 : LDEXP_TO_ARITH) |
- (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
- CARRY_TO_ARITH |
- BORROW_TO_ARITH |
- (have_dround ? 0 : DOPS_TO_DFRAC) |
- (options->EmitNoPow ? POW_TO_EXP2 : 0) |
- (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
- (options->EmitNoSat ? SAT_TO_CLAMP : 0) |
- (ctx->Const.ForceGLSLAbsSqrt ? SQRT_TO_ABS_SQRT : 0) |
- /* Assume that if ARB_gpu_shader5 is not supported
- * then all of the extended integer functions need
- * lowering. It may be necessary to add some caps
- * for individual instructions.
- */
- (!ctx->Extensions.ARB_gpu_shader5
- ? BIT_COUNT_TO_MATH |
- EXTRACT_TO_SHIFTS |
- INSERT_TO_SHIFTS |
- REVERSE_TO_SHIFTS |
- FIND_LSB_TO_FLOAT_CAST |
- FIND_MSB_TO_FLOAT_CAST |
- IMUL_HIGH_TO_MUL
- : 0));
-
- do_vec_index_to_cond_assign(ir);
- lower_vector_insert(ir, true);
- lower_quadop_vector(ir, false);
- lower_noise(ir);
- if (options->MaxIfDepth == 0) {
- lower_discard(ir);
- }
-
if (ctx->Const.GLSLOptimizeConservatively) {
/* Do it once and repeat only if there's unsupported control flow. */
do {
} while (progress);
}
- validate_ir_tree(ir);
- }
-
- build_program_resource_list(ctx, prog);
-
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- struct gl_linked_shader *shader = prog->_LinkedShaders[i];
- if (shader == NULL)
- continue;
+ /* Do this again to lower ir_binop_vector_extract introduced
+ * by optimization passes.
+ */
+ do_vec_index_to_cond_assign(ir);
- enum pipe_shader_type ptarget =
- pipe_shader_type_from_mesa(shader->Stage);
- enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
- pscreen->get_shader_param(pscreen, ptarget,
- PIPE_SHADER_CAP_PREFERRED_IR);
+ validate_ir_tree(ir);
- struct gl_program *linked_prog = NULL;
- if (preferred_ir == PIPE_SHADER_IR_NIR) {
- linked_prog = st_nir_get_mesa_program(ctx, prog, shader);
- } else {
- linked_prog = get_mesa_program_tgsi(ctx, prog, shader);
- st_set_prog_affected_state_flags(linked_prog);
- }
+ struct gl_program *linked_prog =
+ get_mesa_program_tgsi(ctx, prog, shader);
+ st_set_prog_affected_state_flags(linked_prog);
if (linked_prog) {
+ /* This is really conservative: */
+ linked_prog->info.writes_memory =
+ linked_prog->info.num_ssbos ||
+ linked_prog->info.num_images ||
+ ctx->Extensions.ARB_bindless_texture ||
+ (linked_prog->sh.LinkedTransformFeedback &&
+ linked_prog->sh.LinkedTransformFeedback->NumVarying);
+
if (!ctx->Driver.ProgramStringNotify(ctx,
_mesa_shader_stage_to_program(i),
linked_prog)) {
return GL_TRUE;
}
-
-void
-st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
- const ubyte outputMapping[],
- struct pipe_stream_output_info *so)
-{
- if (!glsl_to_tgsi->shader_program->last_vert_prog)
- return;
-
- struct gl_transform_feedback_info *info =
- glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback;
- st_translate_stream_output_info2(info, outputMapping, so);
-}
-
-void
-st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
- const ubyte outputMapping[],
- struct pipe_stream_output_info *so)
-{
- unsigned i;
-
- for (i = 0; i < info->NumOutputs; i++) {
- so->output[i].register_index =
- outputMapping[info->Outputs[i].OutputRegister];
- so->output[i].start_component = info->Outputs[i].ComponentOffset;
- so->output[i].num_components = info->Outputs[i].NumComponents;
- so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
- so->output[i].dst_offset = info->Outputs[i].DstOffset;
- so->output[i].stream = info->Outputs[i].StreamId;
- }
-
- for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
- so->stride[i] = info->Buffers[i].Stride;
- }
- so->num_outputs = info->NumOutputs;
-}
-
-} /* extern "C" */