#include "st_format.h"
#include "st_glsl_types.h"
#include "st_nir.h"
+#include "st_shader_cache.h"
#include <algorithm>
this->index = index;
this->swizzle = swizzle_for_type(type, component);
this->negate = 0;
+ this->abs = 0;
this->index2D = 0;
this->type = type ? type->base_type : GLSL_TYPE_ERROR;
this->reladdr = NULL;
this->index2D = 0;
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
+ this->abs = 0;
this->reladdr = NULL;
this->reladdr2 = NULL;
this->has_index2 = false;
this->index2D = index2D;
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
+ this->abs = 0;
this->reladdr = NULL;
this->reladdr2 = NULL;
this->has_index2 = false;
this->index2D = 0;
this->swizzle = 0;
this->negate = 0;
+ this->abs = 0;
this->reladdr = NULL;
this->reladdr2 = NULL;
this->has_index2 = false;
int16_t index2D;
uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
int negate:4; /**< NEGATE_XYZW mask from mesa */
- enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+ unsigned abs:1;
+ enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
unsigned has_index2:1;
gl_register_file file:5; /**< PROGRAM_* from Mesa */
/*
/** Register index should be offset by the integer in this reg. */
st_src_reg *reladdr;
st_src_reg *reladdr2;
+
+ st_src_reg get_abs()
+ {
+ st_src_reg reg = *this;
+ reg.negate = 0;
+ reg.abs = 1;
+ return reg;
+ }
};
class st_dst_reg {
int16_t index2D;
gl_register_file file:5; /**< PROGRAM_* from Mesa */
unsigned writemask:4; /**< Bitfield of WRITEMASK_[XYZW] */
- enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+ enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
unsigned has_index2:1;
unsigned array_id:10;
this->index = reg.index;
this->swizzle = SWIZZLE_XYZW;
this->negate = 0;
+ this->abs = 0;
this->reladdr = reg.reladdr;
this->index2D = reg.index2D;
this->reladdr2 = reg.reladdr2;
unsigned sampler_base:5;
unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not array */
unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
- glsl_base_type tex_type:4;
+ glsl_base_type tex_type:5;
unsigned tex_shadow:1;
unsigned image_format:9;
unsigned tex_offset_num_offset:3;
unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */
unsigned size;
unsigned interp_loc;
+ unsigned gs_out_streams;
enum glsl_interp_mode interp;
enum glsl_base_type base_type;
ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */
ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args);
va_end(args);
- prog->data->LinkStatus = GL_FALSE;
+ prog->data->LinkStatus = linking_failure;
}
static int
if (is_resource_instruction(op))
type = src1.type;
+ else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64)
+ type = GLSL_TYPE_INT64;
+ else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64)
+ type = GLSL_TYPE_UINT64;
else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
type = GLSL_TYPE_DOUBLE;
else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
else if (native_integers)
type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
+#define case7(c, f, i, u, d, i64, ui64) \
+ case TGSI_OPCODE_##c: \
+ if (type == GLSL_TYPE_UINT64) \
+ op = TGSI_OPCODE_##ui64; \
+ else if (type == GLSL_TYPE_INT64) \
+ op = TGSI_OPCODE_##i64; \
+ else if (type == GLSL_TYPE_DOUBLE) \
+ op = TGSI_OPCODE_##d; \
+ else if (type == GLSL_TYPE_INT) \
+ op = TGSI_OPCODE_##i; \
+ else if (type == GLSL_TYPE_UINT) \
+ op = TGSI_OPCODE_##u; \
+ else \
+ op = TGSI_OPCODE_##f; \
+ break;
#define case5(c, f, i, u, d) \
case TGSI_OPCODE_##c: \
if (type == GLSL_TYPE_DOUBLE) \
break;
#define case3(f, i, u) case4(f, f, i, u)
-#define case4d(f, i, u, d) case5(f, f, i, u, d)
+#define case6d(f, i, u, d, i64, u64) case7(f, f, i, u, d, i64, u64)
#define case3fid(f, i, d) case5(f, f, i, i, d)
+#define case3fid64(f, i, d, i64) case7(f, f, i, i, d, i64, i64)
#define case2fi(f, i) case4(f, f, i, i)
#define case2iu(i, u) case4(i, LAST, i, u)
-#define casecomp(c, f, i, u, d) \
+#define case2iu64(i, i64) case7(i, LAST, i, i, LAST, i64, i64)
+#define case4iu64(i, u, i64, u64) case7(i, LAST, i, u, LAST, i64, u64)
+
+#define casecomp(c, f, i, u, d, i64, ui64) \
case TGSI_OPCODE_##c: \
- if (type == GLSL_TYPE_DOUBLE) \
+ if (type == GLSL_TYPE_INT64) \
+ op = TGSI_OPCODE_##i64; \
+ else if (type == GLSL_TYPE_UINT64) \
+ op = TGSI_OPCODE_##ui64; \
+ else if (type == GLSL_TYPE_DOUBLE) \
op = TGSI_OPCODE_##d; \
else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \
op = TGSI_OPCODE_##i; \
break;
switch(op) {
- case3fid(ADD, UADD, DADD);
- case3fid(MUL, UMUL, DMUL);
+ case3fid64(ADD, UADD, DADD, U64ADD);
+ case3fid64(MUL, UMUL, DMUL, U64MUL);
case3fid(MAD, UMAD, DMAD);
case3fid(FMA, UMAD, DFMA);
- case3(DIV, IDIV, UDIV);
- case4d(MAX, IMAX, UMAX, DMAX);
- case4d(MIN, IMIN, UMIN, DMIN);
- case2iu(MOD, UMOD);
+ case6d(DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV);
+ case6d(MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX);
+ case6d(MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN);
+ case4iu64(MOD, UMOD, I64MOD, U64MOD);
- casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
- casecomp(SNE, FSNE, USNE, USNE, DSNE);
- casecomp(SGE, FSGE, ISGE, USGE, DSGE);
- casecomp(SLT, FSLT, ISLT, USLT, DSLT);
+ casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ);
+ casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE);
+ casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE);
+ casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT);
- case2iu(ISHR, USHR);
+ case2iu64(SHL, U64SHL);
+ case4iu64(ISHR, USHR, I64SHR, U64SHR);
- case3fid(SSG, ISSG, DSSG);
- case3fid(ABS, IABS, DABS);
+ case3fid64(SSG, ISSG, DSSG, I64SSG);
case2iu(IBFE, UBFE);
case2iu(IMSB, UMSB);
int index = 0;
immediate_storage *entry;
- int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
+ int size32 = size * ((datatype == GL_DOUBLE ||
+ datatype == GL_INT64_ARB ||
+ datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1);
int i;
/* Search immediate storage to see if we already have an identical
src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
src.reladdr = NULL;
src.negate = 0;
+ src.abs = 0;
if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
if (next_array >= max_num_arrays) {
}
break;
case ir_unop_neg:
- if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
+ if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+ emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]);
+ else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
else if (result_dst.type == GLSL_TYPE_DOUBLE)
emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
break;
case ir_unop_abs:
- emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
+ if (result_dst.type == GLSL_TYPE_FLOAT)
+ emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs());
+ else if (result_dst.type == GLSL_TYPE_DOUBLE)
+ emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]);
+ else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+ emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]);
+ else
+ emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]);
break;
case ir_unop_sign:
emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
break;
case ir_binop_sub:
- emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
+ op[1].negate = ~op[1].negate;
+ emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
break;
case ir_binop_mul:
emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
break;
case ir_binop_div:
- if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
- assert(!"not reached: should be handled by ir_div_to_mul_rcp");
- else
- emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
+ emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
break;
case ir_binop_mod:
if (result_dst.type == GLSL_TYPE_FLOAT)
* we want, I choose to use ABS to match DX9 and pre-GLSL RSQ
* behavior.
*/
- emit_scalar(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
- emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, result_src);
+ emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0].get_abs());
emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src);
}
break;
/* fallthrough to next case otherwise */
case ir_unop_i2u:
case ir_unop_u2i:
+ case ir_unop_i642u64:
+ case ir_unop_u642i64:
/* Converting between signed and unsigned integers is a no-op. */
result_src = op[0];
result_src.type = result_dst.type;
case ir_unop_bitcast_f2i:
case ir_unop_bitcast_f2u:
/* Make sure we don't propagate the negate modifier to integer opcodes. */
- if (op[0].negate)
+ if (op[0].negate || op[0].abs)
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
else
result_src = op[0];
else
emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
break;
+ case ir_unop_bitcast_u642d:
+ case ir_unop_bitcast_i642d:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_DOUBLE;
+ break;
+ case ir_unop_bitcast_d2i64:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_INT64;
+ break;
+ case ir_unop_bitcast_d2u64:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_UINT64;
+ break;
case ir_unop_trunc:
emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
break;
cbuf.index = 0;
cbuf.reladdr = NULL;
cbuf.negate = 0;
+ cbuf.abs = 0;
assert(ir->type->is_vector() || ir->type->is_scalar());
break;
case ir_unop_unpack_double_2x32:
case ir_unop_pack_double_2x32:
+ case ir_unop_unpack_int_2x32:
+ case ir_unop_pack_int_2x32:
+ case ir_unop_unpack_uint_2x32:
+ case ir_unop_pack_uint_2x32:
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
break;
case ir_unop_vote_eq:
emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]);
break;
-
+ case ir_unop_u2i64:
+ case ir_unop_u2u64:
+ case ir_unop_b2i64: {
+ st_src_reg temp = get_temp(glsl_type::uvec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ unsigned orig_swz = op[0].swizzle;
+ /*
+ * To convert unsigned to 64-bit:
+ * zero Y channel, copy X channel.
+ */
+ temp_dst.writemask = WRITEMASK_Y;
+ if (vector_elements > 1)
+ temp_dst.writemask |= WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0));
+ temp_dst.writemask = WRITEMASK_X;
+ if (vector_elements > 1)
+ temp_dst.writemask |= WRITEMASK_Z;
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 0), GET_SWZ(orig_swz, 0),
+ GET_SWZ(orig_swz, 1), GET_SWZ(orig_swz, 1));
+ if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+ else
+ emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
+ result_src = temp;
+ result_src.type = GLSL_TYPE_UINT64;
+ if (vector_elements > 2) {
+ /* Subtle: We rely on the fact that get_temp here returns the next
+ * TGSI temporary register directly after the temp register used for
+ * the first two components, so that the result gets picked up
+ * automatically.
+ */
+ st_src_reg temp = get_temp(glsl_type::uvec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ temp_dst.writemask = WRITEMASK_Y;
+ if (vector_elements > 3)
+ temp_dst.writemask |= WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0));
+
+ temp_dst.writemask = WRITEMASK_X;
+ if (vector_elements > 3)
+ temp_dst.writemask |= WRITEMASK_Z;
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2),
+ GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3));
+ if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+ else
+ emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
+ }
+ break;
+ }
+ case ir_unop_i642i:
+ case ir_unop_u642i:
+ case ir_unop_u642u:
+ case ir_unop_i642u: {
+ st_src_reg temp = get_temp(glsl_type::uvec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ unsigned orig_swz = op[0].swizzle;
+ unsigned orig_idx = op[0].index;
+ int el;
+ temp_dst.writemask = WRITEMASK_X;
+
+ for (el = 0; el < vector_elements; el++) {
+ unsigned swz = GET_SWZ(orig_swz, el);
+ if (swz & 1)
+ op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z);
+ else
+ op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+ if (swz > 2)
+ op[0].index = orig_idx + 1;
+ op[0].type = GLSL_TYPE_UINT;
+ temp_dst.writemask = WRITEMASK_X << el;
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+ }
+ result_src = temp;
+ if (ir->operation == ir_unop_u642u || ir->operation == ir_unop_i642u)
+ result_src.type = GLSL_TYPE_UINT;
+ else
+ result_src.type = GLSL_TYPE_INT;
+ break;
+ }
+ case ir_unop_i642b:
+ emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int(0));
+ break;
+ case ir_unop_i642f:
+ emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]);
+ break;
+ case ir_unop_u642f:
+ emit_asm(ir, TGSI_OPCODE_U642F, result_dst, op[0]);
+ break;
+ case ir_unop_i642d:
+ emit_asm(ir, TGSI_OPCODE_I642D, result_dst, op[0]);
+ break;
+ case ir_unop_u642d:
+ emit_asm(ir, TGSI_OPCODE_U642D, result_dst, op[0]);
+ break;
+ case ir_unop_i2i64:
+ emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]);
+ break;
+ case ir_unop_f2i64:
+ emit_asm(ir, TGSI_OPCODE_F2I64, result_dst, op[0]);
+ break;
+ case ir_unop_d2i64:
+ emit_asm(ir, TGSI_OPCODE_D2I64, result_dst, op[0]);
+ break;
+ case ir_unop_i2u64:
+ emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]);
+ break;
+ case ir_unop_f2u64:
+ emit_asm(ir, TGSI_OPCODE_F2U64, result_dst, op[0]);
+ break;
+ case ir_unop_d2u64:
+ emit_asm(ir, TGSI_OPCODE_D2U64, result_dst, op[0]);
+ break;
+ /* these might be needed */
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_unorm_2x16:
case ir_unop_pack_snorm_4x8:
decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index;
decl->base_type = type_without_array->base_type;
decl->usage_mask = u_bit_consecutive(component, num_components);
+ if (var->data.stream & (1u << 31)) {
+ decl->gs_out_streams = var->data.stream & ~(1u << 31);
+ } else {
+ assert(var->data.stream < 4);
+ decl->gs_out_streams = 0;
+ for (unsigned i = 0; i < num_components; ++i)
+ decl->gs_out_streams |= var->data.stream << (2 * (component + i));
+ }
if (is_inout_array(shader->Stage, var, &remove_array)) {
decl->array_id = num_output_arrays + 1;
else
decl->size = type_size(var->type);
- entry = new(mem_ctx) variable_storage(var,
- PROGRAM_OUTPUT,
- decl->mesa_index,
- decl->array_id);
+ if (var->data.fb_fetch_output) {
+ st_dst_reg dst = st_dst_reg(get_temp(var->type));
+ st_src_reg src = st_src_reg(PROGRAM_OUTPUT, decl->mesa_index,
+ var->type, component, decl->array_id);
+ emit_asm(NULL, TGSI_OPCODE_FBFETCH, dst, src);
+ entry = new(mem_ctx) variable_storage(var, dst.file, dst.index,
+ dst.array_id);
+ } else {
+ entry = new(mem_ctx) variable_storage(var,
+ PROGRAM_OUTPUT,
+ decl->mesa_index,
+ decl->array_id);
+ }
entry->component = component;
this->variables.push_tail(entry);
memcpy(&values[i * 2], &ir->value.d[i], sizeof(double));
}
break;
+ case GLSL_TYPE_INT64:
+ gl_type = GL_INT64_ARB;
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ memcpy(&values[i * 2], &ir->value.d[i], sizeof(int64_t));
+ }
+ break;
+ case GLSL_TYPE_UINT64:
+ gl_type = GL_UNSIGNED_INT64_ARB;
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ memcpy(&values[i * 2], &ir->value.d[i], sizeof(uint64_t));
+ }
+ break;
case GLSL_TYPE_UINT:
gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
inst->resource = buffer;
if (access)
inst->buffer_access = access->value.u[0];
+
+ if (inst == this->instructions.get_head_raw())
+ break;
inst = (glsl_to_tgsi_instruction *)inst->get_prev();
- if (inst->op == TGSI_OPCODE_UADD)
+
+ if (inst->op == TGSI_OPCODE_UADD) {
+ if (inst == this->instructions.get_head_raw())
+ break;
inst = (glsl_to_tgsi_instruction *)inst->get_prev();
- } while (inst && inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
+ }
+ } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
}
void
switch (ir->op) {
case ir_tex:
- opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
+ opcode = (is_cube_array && ir->shadow_comparator) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
if (ir->offset) {
ir->offset->accept(this);
offset[0] = this->result;
* the shadow comparator value must also be projected.
*/
st_src_reg tmp_src = coord;
- if (ir->shadow_comparitor) {
+ if (ir->shadow_comparator) {
/* Slot the shadow value in as the second to last component of the
* coord.
*/
- ir->shadow_comparitor->accept(this);
+ ir->shadow_comparator->accept(this);
tmp_src = get_temp(glsl_type::vec4_type);
st_dst_reg tmp_dst = st_dst_reg(tmp_src);
* comparator was put in the correct place (and projected) by the code,
* above, that handles by-hand projection.
*/
- if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
+ if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
/* Slot the shadow value in as the second to last component of the
* coord.
*/
- ir->shadow_comparitor->accept(this);
+ ir->shadow_comparator->accept(this);
if (is_cube_array) {
cube_sc = get_temp(glsl_type::float_type);
} else if (opcode == TGSI_OPCODE_TEX2) {
inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
} else if (opcode == TGSI_OPCODE_TG4) {
- if (is_cube_array && ir->shadow_comparitor) {
+ if (is_cube_array && ir->shadow_comparator) {
inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
} else {
inst = emit_asm(ir, opcode, result_dst, coord, component);
} else
inst = emit_asm(ir, opcode, result_dst, coord);
- if (ir->shadow_comparitor)
+ if (ir->shadow_comparator)
inst->tex_shadow = GL_TRUE;
inst->resource.index = sampler_index;
inst->src[0].file != PROGRAM_ARRAY &&
!inst->src[0].reladdr &&
!inst->src[0].reladdr2 &&
- !inst->src[0].negate) {
+ !inst->src[0].negate &&
+ !inst->src[0].abs) {
for (int i = 0; i < 4; i++) {
if (inst->dst[0].writemask & (1 << i)) {
acp[4 * inst->dst[0].index + i] = inst;
return ureg_DECL_immediate(ureg, &values[0].f, size);
case GL_DOUBLE:
return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
+ case GL_INT64_ARB:
+ return ureg_DECL_immediate_int64(ureg, (int64_t *)&values[0].f, size);
+ case GL_UNSIGNED_INT64_ARB:
+ return ureg_DECL_immediate_uint64(ureg, (uint64_t *)&values[0].f, size);
case GL_INT:
return ureg_DECL_immediate_int(ureg, &values[0].i, size);
case GL_UNSIGNED_INT:
GET_SWZ(src_reg->swizzle, 2) & 0x3,
GET_SWZ(src_reg->swizzle, 3) & 0x3);
+ if (src_reg->abs)
+ src = ureg_abs(src);
+
if ((src_reg->negate & 0xf) == NEGATE_XYZW)
src = ureg_negate(src);
dst = ureg_DECL_output_layout(ureg,
outputSemanticName[slot], outputSemanticIndex[slot],
+ decl->gs_out_streams,
slot, tgsi_usage_mask, decl->array_id, decl->size);
for (unsigned j = 0; j < decl->size; ++j) {
}
if (procType == PIPE_SHADER_FRAGMENT) {
- if (program->shader->info.EarlyFragmentTests)
+ if (program->shader->Program->info.fs.early_fragment_tests)
ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
if (proginfo->info.inputs_read & VARYING_BIT_POS) {
}
}
- if (program->shader) {
- unsigned num_ubos = program->shader->NumUniformBlocks;
-
- for (i = 0; i < num_ubos; i++) {
- unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize;
- unsigned num_const_vecs = (size + 15) / 16;
- unsigned first, last;
- assert(num_const_vecs > 0);
- first = 0;
- last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
- ureg_DECL_constant2D(t->ureg, first, last, i + 1);
- }
+ for (i = 0; i < proginfo->info.num_ubos; i++) {
+ unsigned size = proginfo->sh.UniformBlocks[i]->UniformBufferSize;
+ unsigned num_const_vecs = (size + 15) / 16;
+ unsigned first, last;
+ assert(num_const_vecs > 0);
+ first = 0;
+ last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
+ ureg_DECL_constant2D(t->ureg, first, last, i + 1);
}
/* Emit immediate values.
if (program->use_shared_memory)
t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
- for (i = 0; i < program->shader->NumImages; i++) {
+ for (i = 0; i < program->shader->Program->info.num_images; i++) {
if (program->images_used & (1 << i)) {
t->images[i] = ureg_DECL_image(ureg, i,
program->image_targets[i],
prog->Parameters);
/* Remove reads from output registers. */
- lower_output_reads(shader->Stage, shader->ir);
+ if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS))
+ lower_output_reads(shader->Stage, shader->ir);
/* Emit intermediate IR for main(). */
visit_exec_list(shader->ir, v);
return prog;
}
-static void
-set_affected_state_flags(uint64_t *states,
- struct gl_program *prog,
- struct gl_linked_shader *shader,
- uint64_t new_constants,
- uint64_t new_sampler_views,
- uint64_t new_samplers,
- uint64_t new_images,
- uint64_t new_ubos,
- uint64_t new_ssbos,
- uint64_t new_atomics)
-{
- if (prog->Parameters->NumParameters)
- *states |= new_constants;
-
- if (shader->num_samplers)
- *states |= new_sampler_views | new_samplers;
-
- if (shader->NumImages)
- *states |= new_images;
-
- if (shader->NumUniformBlocks)
- *states |= new_ubos;
+/* See if there are unsupported control flow statements. */
+class ir_control_flow_info_visitor : public ir_hierarchical_visitor {
+private:
+ const struct gl_shader_compiler_options *options;
+public:
+ ir_control_flow_info_visitor(const struct gl_shader_compiler_options *options)
+ : options(options),
+ unsupported(false)
+ {
+ }
- if (shader->NumShaderStorageBlocks)
- *states |= new_ssbos;
+ virtual ir_visitor_status visit_enter(ir_function *ir)
+ {
+ /* Other functions are skipped (same as glsl_to_tgsi). */
+ if (strcmp(ir->name, "main") == 0)
+ return visit_continue;
- if (prog->info.num_abos)
- *states |= new_atomics;
-}
+ return visit_continue_with_parent;
+ }
-static struct gl_program *
-get_mesa_program(struct gl_context *ctx,
- struct gl_shader_program *shader_program,
- struct gl_linked_shader *shader)
-{
- struct pipe_screen *pscreen = ctx->st->pipe->screen;
- enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
- enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
- pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR);
- struct gl_program *prog = NULL;
-
- if (preferred_ir == PIPE_SHADER_IR_NIR) {
- /* TODO only for GLSL VS/FS for now: */
- switch (shader->Stage) {
- case MESA_SHADER_VERTEX:
- case MESA_SHADER_FRAGMENT:
- prog = st_nir_get_mesa_program(ctx, shader_program, shader);
- default:
- break;
+ virtual ir_visitor_status visit_enter(ir_call *ir)
+ {
+ if (!ir->callee->is_intrinsic()) {
+ unsupported = true; /* it's a function call */
+ return visit_stop;
}
- } else {
- prog = get_mesa_program_tgsi(ctx, shader_program, shader);
+ return visit_continue;
}
- if (prog) {
- uint64_t *states;
-
- /* This determines which states will be updated when the shader is
- * bound.
- */
- switch (shader->Stage) {
- case MESA_SHADER_VERTEX:
- states = &((struct st_vertex_program*)prog)->affected_states;
-
- *states = ST_NEW_VS_STATE |
- ST_NEW_RASTERIZER |
- ST_NEW_VERTEX_ARRAYS;
-
- set_affected_state_flags(states, prog, shader,
- ST_NEW_VS_CONSTANTS,
- ST_NEW_VS_SAMPLER_VIEWS,
- ST_NEW_RENDER_SAMPLERS,
- ST_NEW_VS_IMAGES,
- ST_NEW_VS_UBOS,
- ST_NEW_VS_SSBOS,
- ST_NEW_VS_ATOMICS);
- break;
-
- case MESA_SHADER_TESS_CTRL:
- states = &((struct st_tessctrl_program*)prog)->affected_states;
-
- *states = ST_NEW_TCS_STATE;
-
- set_affected_state_flags(states, prog, shader,
- ST_NEW_TCS_CONSTANTS,
- ST_NEW_TCS_SAMPLER_VIEWS,
- ST_NEW_RENDER_SAMPLERS,
- ST_NEW_TCS_IMAGES,
- ST_NEW_TCS_UBOS,
- ST_NEW_TCS_SSBOS,
- ST_NEW_TCS_ATOMICS);
- break;
-
- case MESA_SHADER_TESS_EVAL:
- states = &((struct st_tesseval_program*)prog)->affected_states;
-
- *states = ST_NEW_TES_STATE |
- ST_NEW_RASTERIZER;
-
- set_affected_state_flags(states, prog, shader,
- ST_NEW_TES_CONSTANTS,
- ST_NEW_TES_SAMPLER_VIEWS,
- ST_NEW_RENDER_SAMPLERS,
- ST_NEW_TES_IMAGES,
- ST_NEW_TES_UBOS,
- ST_NEW_TES_SSBOS,
- ST_NEW_TES_ATOMICS);
- break;
-
- case MESA_SHADER_GEOMETRY:
- states = &((struct st_geometry_program*)prog)->affected_states;
-
- *states = ST_NEW_GS_STATE |
- ST_NEW_RASTERIZER;
-
- set_affected_state_flags(states, prog, shader,
- ST_NEW_GS_CONSTANTS,
- ST_NEW_GS_SAMPLER_VIEWS,
- ST_NEW_RENDER_SAMPLERS,
- ST_NEW_GS_IMAGES,
- ST_NEW_GS_UBOS,
- ST_NEW_GS_SSBOS,
- ST_NEW_GS_ATOMICS);
- break;
-
- case MESA_SHADER_FRAGMENT:
- states = &((struct st_fragment_program*)prog)->affected_states;
-
- /* gl_FragCoord and glDrawPixels always use constants. */
- *states = ST_NEW_FS_STATE |
- ST_NEW_SAMPLE_SHADING |
- ST_NEW_FS_CONSTANTS;
-
- set_affected_state_flags(states, prog, shader,
- ST_NEW_FS_CONSTANTS,
- ST_NEW_FS_SAMPLER_VIEWS,
- ST_NEW_RENDER_SAMPLERS,
- ST_NEW_FS_IMAGES,
- ST_NEW_FS_UBOS,
- ST_NEW_FS_SSBOS,
- ST_NEW_FS_ATOMICS);
- break;
-
- case MESA_SHADER_COMPUTE:
- states = &((struct st_compute_program*)prog)->affected_states;
-
- *states = ST_NEW_CS_STATE;
-
- set_affected_state_flags(states, prog, shader,
- ST_NEW_CS_CONSTANTS,
- ST_NEW_CS_SAMPLER_VIEWS,
- ST_NEW_CS_SAMPLERS,
- ST_NEW_CS_IMAGES,
- ST_NEW_CS_UBOS,
- ST_NEW_CS_SSBOS,
- ST_NEW_CS_ATOMICS);
- break;
-
- default:
- unreachable("unhandled shader stage");
+ virtual ir_visitor_status visit_enter(ir_return *ir)
+ {
+ if (options->EmitNoMainReturn) {
+ unsupported = true;
+ return visit_stop;
}
+ return visit_continue;
}
- return prog;
-}
+ bool unsupported;
+};
+static bool
+has_unsupported_control_flow(exec_list *ir,
+ const struct gl_shader_compiler_options *options)
+{
+ ir_control_flow_info_visitor visitor(options);
+ visit_list_elements(&visitor, ir);
+ return visitor.unsupported;
+}
extern "C" {
GLboolean
st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
{
+ /* Return early if we are loading the shader from on-disk cache */
+ if (st_load_tgsi_from_disk_cache(ctx, prog)) {
+ return GL_TRUE;
+ }
+
struct pipe_screen *pscreen = ctx->st->pipe->screen;
assert(prog->data->LinkStatus);
if (prog->_LinkedShaders[i] == NULL)
continue;
- bool progress;
- exec_list *ir = prog->_LinkedShaders[i]->ir;
- gl_shader_stage stage = prog->_LinkedShaders[i]->Stage;
+ struct gl_linked_shader *shader = prog->_LinkedShaders[i];
+ exec_list *ir = shader->ir;
+ gl_shader_stage stage = shader->Stage;
const struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[stage];
enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(stage);
*/
if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
- lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
+ lower_variable_index_to_cond_assign(stage, ir,
options->EmitNoIndirectInput,
options->EmitNoIndirectOutput,
options->EmitNoIndirectTemp,
options->EmitNoIndirectUniform);
}
+ if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD))
+ lower_64bit_integer_instructions(ir, DIV64 | MOD64);
+
if (ctx->Extensions.ARB_shading_language_packing) {
unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
LOWER_UNPACK_SNORM_2x16 |
if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
lower_offset_arrays(ir);
do_mat_op_to_vec(ir);
+
+ if (stage == MESA_SHADER_FRAGMENT)
+ lower_blend_equation_advanced(shader);
+
lower_instructions(ir,
MOD_TO_FLOOR |
- DIV_TO_MUL_RCP |
+ FDIV_TO_MUL_RCP |
EXP_TO_EXP2 |
LOG_TO_LOG2 |
LDEXP_TO_ARITH |
lower_discard(ir);
}
- do {
- progress = false;
-
- progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
-
- progress = do_common_optimization(ir, true, true, options,
- ctx->Const.NativeIntegers)
- || progress;
-
- progress = lower_if_to_cond_assign((gl_shader_stage)i, ir,
- options->MaxIfDepth, if_threshold) ||
- progress;
-
- } while (progress);
+ if (ctx->Const.GLSLOptimizeConservatively) {
+ /* Do it once and repeat only if there's unsupported control flow. */
+ do {
+ do_common_optimization(ir, true, true, options,
+ ctx->Const.NativeIntegers);
+ lower_if_to_cond_assign((gl_shader_stage)i, ir,
+ options->MaxIfDepth, if_threshold);
+ } while (has_unsupported_control_flow(ir, options));
+ } else {
+ /* Repeat it until it stops making changes. */
+ bool progress;
+ do {
+ progress = do_common_optimization(ir, true, true, options,
+ ctx->Const.NativeIntegers);
+ progress |= lower_if_to_cond_assign((gl_shader_stage)i, ir,
+ options->MaxIfDepth, if_threshold);
+ } while (progress);
+ }
validate_ir_tree(ir);
}
build_program_resource_list(ctx, prog);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- struct gl_program *linked_prog;
-
- if (prog->_LinkedShaders[i] == NULL)
+ struct gl_linked_shader *shader = prog->_LinkedShaders[i];
+ if (shader == NULL)
continue;
- linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+ enum pipe_shader_type ptarget =
+ st_shader_stage_to_ptarget(shader->Stage);
+ enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
+ pscreen->get_shader_param(pscreen, ptarget,
+ PIPE_SHADER_CAP_PREFERRED_IR);
+
+ struct gl_program *linked_prog = NULL;
+ if (preferred_ir == PIPE_SHADER_IR_NIR) {
+ /* TODO only for GLSL VS/FS for now: */
+ switch (shader->Stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_FRAGMENT:
+ linked_prog = st_nir_get_mesa_program(ctx, prog, shader);
+ default:
+ break;
+ }
+ } else {
+ linked_prog = get_mesa_program_tgsi(ctx, prog, shader);
+ }
if (linked_prog) {
+ st_set_prog_affected_state_flags(linked_prog);
if (!ctx->Driver.ProgramStringNotify(ctx,
_mesa_shader_stage_to_program(i),
linked_prog)) {
- _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
- NULL);
+ _mesa_reference_program(ctx, &shader->Program, NULL);
return GL_FALSE;
}
}
const GLuint outputMapping[],
struct pipe_stream_output_info *so)
{
+ if (!glsl_to_tgsi->shader_program->last_vert_prog)
+ return;
+
struct gl_transform_feedback_info *info =
- &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
+ glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback;
st_translate_stream_output_info2(info, outputMapping, so);
}