#include "st_format.h"
#include "st_glsl_types.h"
#include "st_nir.h"
+#include "st_shader_cache.h"
#include <algorithm>
uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
int negate:4; /**< NEGATE_XYZW mask from mesa */
unsigned abs:1;
- enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+ enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
unsigned has_index2:1;
gl_register_file file:5; /**< PROGRAM_* from Mesa */
/*
int16_t index2D;
gl_register_file file:5; /**< PROGRAM_* from Mesa */
unsigned writemask:4; /**< Bitfield of WRITEMASK_[XYZW] */
- enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+ enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
unsigned has_index2:1;
unsigned array_id:10;
unsigned sampler_base:5;
unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not array */
unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
- glsl_base_type tex_type:4;
+ glsl_base_type tex_type:5;
unsigned tex_shadow:1;
unsigned image_format:9;
unsigned tex_offset_num_offset:3;
int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
};
-static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
-static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
+static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
+static const st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
struct inout_decl {
unsigned mesa_index;
uint32_t samplers_used;
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
- int buffers_used;
int images_used;
int image_targets[PIPE_MAX_SHADER_IMAGES];
unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
bool have_sqrt;
bool have_fma;
bool use_shared_memory;
+ bool has_tex_txf_lz;
variable_storage *find_variable_storage(ir_variable *var);
void visit_membar_intrinsic(ir_call *);
void visit_shared_intrinsic(ir_call *);
void visit_image_intrinsic(ir_call *);
+ void visit_generic_intrinsic(ir_call *, unsigned op);
st_src_reg result;
void rename_temp_registers(int num_renames, struct rename_reg_pair *renames);
void get_first_temp_read(int *first_reads);
+ void get_first_temp_write(int *first_writes);
void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
void get_last_temp_write(int *last_writes);
ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args);
va_end(args);
- prog->data->LinkStatus = GL_FALSE;
+ prog->data->LinkStatus = linking_failure;
}
static int
if (is_resource_instruction(op))
type = src1.type;
+ else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64)
+ type = GLSL_TYPE_INT64;
+ else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64)
+ type = GLSL_TYPE_UINT64;
else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
type = GLSL_TYPE_DOUBLE;
else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
else if (native_integers)
type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
+#define case7(c, f, i, u, d, i64, ui64) \
+ case TGSI_OPCODE_##c: \
+ if (type == GLSL_TYPE_UINT64) \
+ op = TGSI_OPCODE_##ui64; \
+ else if (type == GLSL_TYPE_INT64) \
+ op = TGSI_OPCODE_##i64; \
+ else if (type == GLSL_TYPE_DOUBLE) \
+ op = TGSI_OPCODE_##d; \
+ else if (type == GLSL_TYPE_INT) \
+ op = TGSI_OPCODE_##i; \
+ else if (type == GLSL_TYPE_UINT) \
+ op = TGSI_OPCODE_##u; \
+ else \
+ op = TGSI_OPCODE_##f; \
+ break;
#define case5(c, f, i, u, d) \
case TGSI_OPCODE_##c: \
if (type == GLSL_TYPE_DOUBLE) \
break;
#define case3(f, i, u) case4(f, f, i, u)
-#define case4d(f, i, u, d) case5(f, f, i, u, d)
+#define case6d(f, i, u, d, i64, u64) case7(f, f, i, u, d, i64, u64)
#define case3fid(f, i, d) case5(f, f, i, i, d)
+#define case3fid64(f, i, d, i64) case7(f, f, i, i, d, i64, i64)
#define case2fi(f, i) case4(f, f, i, i)
#define case2iu(i, u) case4(i, LAST, i, u)
-#define casecomp(c, f, i, u, d) \
+#define case2iu64(i, i64) case7(i, LAST, i, i, LAST, i64, i64)
+#define case4iu64(i, u, i64, u64) case7(i, LAST, i, u, LAST, i64, u64)
+
+#define casecomp(c, f, i, u, d, i64, ui64) \
case TGSI_OPCODE_##c: \
- if (type == GLSL_TYPE_DOUBLE) \
+ if (type == GLSL_TYPE_INT64) \
+ op = TGSI_OPCODE_##i64; \
+ else if (type == GLSL_TYPE_UINT64) \
+ op = TGSI_OPCODE_##ui64; \
+ else if (type == GLSL_TYPE_DOUBLE) \
op = TGSI_OPCODE_##d; \
else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \
op = TGSI_OPCODE_##i; \
break;
switch(op) {
- case3fid(ADD, UADD, DADD);
- case3fid(MUL, UMUL, DMUL);
+ case3fid64(ADD, UADD, DADD, U64ADD);
+ case3fid64(MUL, UMUL, DMUL, U64MUL);
case3fid(MAD, UMAD, DMAD);
case3fid(FMA, UMAD, DFMA);
- case3(DIV, IDIV, UDIV);
- case4d(MAX, IMAX, UMAX, DMAX);
- case4d(MIN, IMIN, UMIN, DMIN);
- case2iu(MOD, UMOD);
+ case6d(DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV);
+ case6d(MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX);
+ case6d(MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN);
+ case4iu64(MOD, UMOD, I64MOD, U64MOD);
- casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
- casecomp(SNE, FSNE, USNE, USNE, DSNE);
- casecomp(SGE, FSGE, ISGE, USGE, DSGE);
- casecomp(SLT, FSLT, ISLT, USLT, DSLT);
+ casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ);
+ casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE);
+ casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE);
+ casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT);
- case2iu(ISHR, USHR);
+ case2iu64(SHL, U64SHL);
+ case4iu64(ISHR, USHR, I64SHR, U64SHR);
- case3fid(SSG, ISSG, DSSG);
+ case3fid64(SSG, ISSG, DSSG, I64SSG);
case2iu(IBFE, UBFE);
case2iu(IMSB, UMSB);
int index = 0;
immediate_storage *entry;
- int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
+ int size32 = size * ((datatype == GL_DOUBLE ||
+ datatype == GL_INT64_ARB ||
+ datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1);
int i;
/* Search immediate storage to see if we already have an identical
}
break;
case ir_unop_neg:
- if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
+ if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+ emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]);
+ else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
else if (result_dst.type == GLSL_TYPE_DOUBLE)
emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs());
else if (result_dst.type == GLSL_TYPE_DOUBLE)
emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]);
+ else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+ emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]);
else
emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]);
break;
emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
break;
case ir_unop_exp:
+ assert(!"not reached: should be handled by exp_to_exp2");
+ break;
case ir_unop_log:
- assert(!"not reached: should be handled by ir_explog_to_explog2");
+ assert(!"not reached: should be handled by log_to_log2");
break;
case ir_unop_log2:
emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
break;
case ir_binop_div:
- if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
- assert(!"not reached: should be handled by ir_div_to_mul_rcp");
- else
- emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
+ emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
break;
case ir_binop_mod:
if (result_dst.type == GLSL_TYPE_FLOAT)
/* fallthrough to next case otherwise */
case ir_unop_i2u:
case ir_unop_u2i:
+ case ir_unop_i642u64:
+ case ir_unop_u642i64:
/* Converting between signed and unsigned integers is a no-op. */
result_src = op[0];
result_src.type = result_dst.type;
else
emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
break;
+ case ir_unop_bitcast_u642d:
+ case ir_unop_bitcast_i642d:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_DOUBLE;
+ break;
+ case ir_unop_bitcast_d2i64:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_INT64;
+ break;
+ case ir_unop_bitcast_d2u64:
+ result_src = op[0];
+ result_src.type = GLSL_TYPE_UINT64;
+ break;
case ir_unop_trunc:
emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
break;
break;
}
case ir_binop_lshift:
- if (native_integers) {
- emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
- break;
- }
case ir_binop_rshift:
if (native_integers) {
- emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
+ unsigned opcode = ir->operation == ir_binop_lshift ? TGSI_OPCODE_SHL
+ : TGSI_OPCODE_ISHR;
+ st_src_reg count;
+
+ if (glsl_base_type_is_64bit(op[0].type)) {
+ /* GLSL shift operations have 32-bit shift counts, but TGSI uses
+ * 64 bits.
+ */
+ count = get_temp(glsl_type::u64vec(ir->operands[1]->type->components()));
+ emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]);
+ } else {
+ count = op[1];
+ }
+
+ emit_asm(ir, opcode, result_dst, op[0], count);
break;
}
case ir_binop_bit_and:
const_offset % 16 / 4,
const_offset % 16 / 4);
- if (ir->type->base_type == GLSL_TYPE_BOOL) {
+ if (ir->type->is_boolean()) {
emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
} else {
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
break;
case ir_unop_unpack_double_2x32:
case ir_unop_pack_double_2x32:
+ case ir_unop_unpack_int_2x32:
+ case ir_unop_pack_int_2x32:
+ case ir_unop_unpack_uint_2x32:
+ case ir_unop_pack_uint_2x32:
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
break;
case ir_binop_ldexp:
- if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
+ if (ir->operands[0]->type->is_double()) {
emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
} else {
assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
break;
}
- case ir_unop_vote_any:
- emit_asm(ir, TGSI_OPCODE_VOTE_ANY, result_dst, op[0]);
+ case ir_unop_u2i64:
+ case ir_unop_u2u64:
+ case ir_unop_b2i64: {
+ st_src_reg temp = get_temp(glsl_type::uvec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ unsigned orig_swz = op[0].swizzle;
+ /*
+ * To convert unsigned to 64-bit:
+ * zero Y channel, copy X channel.
+ */
+ temp_dst.writemask = WRITEMASK_Y;
+ if (vector_elements > 1)
+ temp_dst.writemask |= WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0));
+ temp_dst.writemask = WRITEMASK_X;
+ if (vector_elements > 1)
+ temp_dst.writemask |= WRITEMASK_Z;
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 0), GET_SWZ(orig_swz, 0),
+ GET_SWZ(orig_swz, 1), GET_SWZ(orig_swz, 1));
+ if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+ else
+ emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
+ result_src = temp;
+ result_src.type = GLSL_TYPE_UINT64;
+ if (vector_elements > 2) {
+ /* Subtle: We rely on the fact that get_temp here returns the next
+ * TGSI temporary register directly after the temp register used for
+ * the first two components, so that the result gets picked up
+ * automatically.
+ */
+ st_src_reg temp = get_temp(glsl_type::uvec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ temp_dst.writemask = WRITEMASK_Y;
+ if (vector_elements > 3)
+ temp_dst.writemask |= WRITEMASK_W;
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0));
+
+ temp_dst.writemask = WRITEMASK_X;
+ if (vector_elements > 3)
+ temp_dst.writemask |= WRITEMASK_Z;
+ op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2),
+ GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3));
+ if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+ else
+ emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
+ }
break;
- case ir_unop_vote_all:
- emit_asm(ir, TGSI_OPCODE_VOTE_ALL, result_dst, op[0]);
+ }
+ case ir_unop_i642i:
+ case ir_unop_u642i:
+ case ir_unop_u642u:
+ case ir_unop_i642u: {
+ st_src_reg temp = get_temp(glsl_type::uvec4_type);
+ st_dst_reg temp_dst = st_dst_reg(temp);
+ unsigned orig_swz = op[0].swizzle;
+ unsigned orig_idx = op[0].index;
+ int el;
+ temp_dst.writemask = WRITEMASK_X;
+
+ for (el = 0; el < vector_elements; el++) {
+ unsigned swz = GET_SWZ(orig_swz, el);
+ if (swz & 1)
+ op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z);
+ else
+ op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+ if (swz > 2)
+ op[0].index = orig_idx + 1;
+ op[0].type = GLSL_TYPE_UINT;
+ temp_dst.writemask = WRITEMASK_X << el;
+ emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+ }
+ result_src = temp;
+ if (ir->operation == ir_unop_u642u || ir->operation == ir_unop_i642u)
+ result_src.type = GLSL_TYPE_UINT;
+ else
+ result_src.type = GLSL_TYPE_INT;
break;
- case ir_unop_vote_eq:
- emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]);
+ }
+ case ir_unop_i642b:
+ emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int(0));
break;
-
+ case ir_unop_i642f:
+ emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]);
+ break;
+ case ir_unop_u642f:
+ emit_asm(ir, TGSI_OPCODE_U642F, result_dst, op[0]);
+ break;
+ case ir_unop_i642d:
+ emit_asm(ir, TGSI_OPCODE_I642D, result_dst, op[0]);
+ break;
+ case ir_unop_u642d:
+ emit_asm(ir, TGSI_OPCODE_U642D, result_dst, op[0]);
+ break;
+ case ir_unop_i2i64:
+ emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]);
+ break;
+ case ir_unop_f2i64:
+ emit_asm(ir, TGSI_OPCODE_F2I64, result_dst, op[0]);
+ break;
+ case ir_unop_d2i64:
+ emit_asm(ir, TGSI_OPCODE_D2I64, result_dst, op[0]);
+ break;
+ case ir_unop_i2u64:
+ emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]);
+ break;
+ case ir_unop_f2u64:
+ emit_asm(ir, TGSI_OPCODE_F2U64, result_dst, op[0]);
+ break;
+ case ir_unop_d2u64:
+ emit_asm(ir, TGSI_OPCODE_D2U64, result_dst, op[0]);
+ break;
+ /* these might be needed */
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_unorm_2x16:
case ir_unop_pack_snorm_4x8:
case ir_unop_unpack_snorm_4x8:
case ir_unop_unpack_unorm_4x8:
+ case ir_unop_unpack_sampler_2x32:
+ case ir_unop_pack_sampler_2x32:
+ case ir_unop_unpack_image_2x32:
+ case ir_unop_pack_image_2x32:
+
case ir_quadop_vector:
case ir_binop_vector_extract:
case ir_triop_vector_insert:
st_dst_reg *l, st_src_reg *r,
st_src_reg *cond, bool cond_swap)
{
- if (type->base_type == GLSL_TYPE_STRUCT) {
+ if (type->is_record()) {
for (unsigned int i = 0; i < type->length; i++) {
emit_block_mov(ir, type->fields.structure[i].type, l, r,
cond, cond_swap);
* aggregate constant and move each constant value into it. If we
* get lucky, copy propagation will eliminate the extra moves.
*/
- if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+ if (ir->type->is_record()) {
st_src_reg temp_base = get_temp(ir->type);
st_dst_reg temp = st_dst_reg(temp_base);
memcpy(&values[i * 2], &ir->value.d[i], sizeof(double));
}
break;
+ case GLSL_TYPE_INT64:
+ gl_type = GL_INT64_ARB;
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ memcpy(&values[i * 2], &ir->value.d[i], sizeof(int64_t));
+ }
+ break;
+ case GLSL_TYPE_UINT64:
+ gl_type = GL_UNSIGNED_INT64_ARB;
+ for (i = 0; i < ir->type->vector_elements; i++) {
+ memcpy(&values[i * 2], &ir->value.d[i], sizeof(uint64_t));
+ }
+ break;
case GLSL_TYPE_UINT:
gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
for (i = 0; i < ir->type->vector_elements; i++) {
inst->resource = buffer;
if (access)
inst->buffer_access = access->value.u[0];
+
+ if (inst == this->instructions.get_head_raw())
+ break;
inst = (glsl_to_tgsi_instruction *)inst->get_prev();
- if (inst->op == TGSI_OPCODE_UADD)
+
+ if (inst->op == TGSI_OPCODE_UADD) {
+ if (inst == this->instructions.get_head_raw())
+ break;
inst = (glsl_to_tgsi_instruction *)inst->get_prev();
- } while (inst && inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
+ }
+ } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
}
void
inst->sampler_array_size = sampler_array_size;
inst->sampler_base = sampler_base;
- switch (type->sampler_dimensionality) {
- case GLSL_SAMPLER_DIM_1D:
- inst->tex_target = (type->sampler_array)
- ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
- break;
- case GLSL_SAMPLER_DIM_2D:
- inst->tex_target = (type->sampler_array)
- ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
- break;
- case GLSL_SAMPLER_DIM_3D:
- inst->tex_target = TEXTURE_3D_INDEX;
- break;
- case GLSL_SAMPLER_DIM_CUBE:
- inst->tex_target = (type->sampler_array)
- ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
- break;
- case GLSL_SAMPLER_DIM_RECT:
- inst->tex_target = TEXTURE_RECT_INDEX;
- break;
- case GLSL_SAMPLER_DIM_BUF:
- inst->tex_target = TEXTURE_BUFFER_INDEX;
- break;
- case GLSL_SAMPLER_DIM_EXTERNAL:
- inst->tex_target = TEXTURE_EXTERNAL_INDEX;
- break;
- case GLSL_SAMPLER_DIM_MS:
- inst->tex_target = (type->sampler_array)
- ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
- break;
- default:
- assert(!"Should not get here.");
- }
-
+ inst->tex_target = type->sampler_index();
inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
_mesa_get_shader_image_format(imgvar->data.image_format));
- if (imgvar->data.image_coherent)
+ if (imgvar->data.memory_coherent)
inst->buffer_access |= TGSI_MEMORY_COHERENT;
- if (imgvar->data.image_restrict)
+ if (imgvar->data.memory_restrict)
inst->buffer_access |= TGSI_MEMORY_RESTRICT;
- if (imgvar->data.image_volatile)
+ if (imgvar->data.memory_volatile)
inst->buffer_access |= TGSI_MEMORY_VOLATILE;
}
+void
+glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, unsigned op)
+{
+ ir->return_deref->accept(this);
+ st_dst_reg dst = st_dst_reg(this->result);
+
+ st_src_reg src[4] = { undef_src, undef_src, undef_src, undef_src };
+ unsigned num_src = 0;
+ foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
+ assert(num_src < ARRAY_SIZE(src));
+
+ this->result.file = PROGRAM_UNDEFINED;
+ param->accept(this);
+ assert(this->result.file != PROGRAM_UNDEFINED);
+
+ src[num_src] = this->result;
+ num_src++;
+ }
+
+ emit_asm(ir, op, dst, src[0], src[1], src[2], src[3]);
+}
+
void
glsl_to_tgsi_visitor::visit(ir_call *ir)
{
visit_image_intrinsic(ir);
return;
+ case ir_intrinsic_shader_clock:
+ visit_generic_intrinsic(ir, TGSI_OPCODE_CLOCK);
+ return;
+
+ case ir_intrinsic_vote_all:
+ visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ALL);
+ return;
+ case ir_intrinsic_vote_any:
+ visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ANY);
+ return;
+ case ir_intrinsic_vote_eq:
+ visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_EQ);
+ return;
+ case ir_intrinsic_ballot:
+ visit_generic_intrinsic(ir, TGSI_OPCODE_BALLOT);
+ return;
+ case ir_intrinsic_read_first_invocation:
+ visit_generic_intrinsic(ir, TGSI_OPCODE_READ_FIRST);
+ return;
+ case ir_intrinsic_read_invocation:
+ visit_generic_intrinsic(ir, TGSI_OPCODE_READ_INVOC);
+ return;
+
case ir_intrinsic_invalid:
case ir_intrinsic_generic_load:
case ir_intrinsic_generic_store:
case ir_intrinsic_generic_atomic_max:
case ir_intrinsic_generic_atomic_exchange:
case ir_intrinsic_generic_atomic_comp_swap:
- case ir_intrinsic_shader_clock:
unreachable("Invalid intrinsic");
}
}
unsigned opcode = TGSI_OPCODE_NOP;
const glsl_type *sampler_type = ir->sampler->type;
unsigned sampler_array_size = 1, sampler_base = 0;
- uint16_t sampler_index = 0;
- bool is_cube_array = false;
+ bool is_cube_array = false, is_cube_shadow = false;
unsigned i;
- /* if we are a cube array sampler */
- if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
- sampler_type->sampler_array)) {
- is_cube_array = true;
+ /* if we are a cube array sampler or a cube shadow */
+ if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
+ is_cube_array = sampler_type->sampler_array;
+ is_cube_shadow = sampler_type->sampler_shadow;
}
if (ir->coordinate) {
*/
result_src = get_temp(ir->type);
result_dst = st_dst_reg(result_src);
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
switch (ir->op) {
case ir_tex:
}
break;
case ir_txb:
- if (is_cube_array ||
- sampler_type == glsl_type::samplerCubeShadow_type) {
+ if (is_cube_array || is_cube_shadow) {
opcode = TGSI_OPCODE_TXB2;
}
else {
}
break;
case ir_txl:
- opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
- ir->lod_info.lod->accept(this);
- lod_info = this->result;
+ if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) {
+ opcode = TGSI_OPCODE_TEX_LZ;
+ } else {
+ opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
+ ir->lod_info.lod->accept(this);
+ lod_info = this->result;
+ }
if (ir->offset) {
ir->offset->accept(this);
offset[0] = this->result;
levels_src = get_temp(ir->type);
break;
case ir_txf:
- opcode = TGSI_OPCODE_TXF;
- ir->lod_info.lod->accept(this);
- lod_info = this->result;
+ if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) {
+ opcode = TGSI_OPCODE_TXF_LZ;
+ } else {
+ opcode = TGSI_OPCODE_TXF;
+ ir->lod_info.lod->accept(this);
+ lod_info = this->result;
+ }
if (ir->offset) {
ir->offset->accept(this);
offset[0] = this->result;
component = this->result;
if (ir->offset) {
ir->offset->accept(this);
- if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) {
+ if (ir->offset->type->is_array()) {
const glsl_type *elt_type = ir->offset->type->fields.array;
for (i = 0; i < ir->offset->type->length; i++) {
offset[i] = this->result;
coord_dst.writemask = WRITEMASK_XYZW;
}
+ st_src_reg sampler(PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT);
+
get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base,
- &sampler_index, &reladdr, true);
- if (reladdr.file != PROGRAM_UNDEFINED)
+ (uint16_t *)&sampler.index, &reladdr, true);
+
+ if (reladdr.file != PROGRAM_UNDEFINED) {
+ sampler.reladdr = ralloc(mem_ctx, st_src_reg);
+ *sampler.reladdr = reladdr;
emit_arl(ir, sampler_reladdr, reladdr);
+ }
if (opcode == TGSI_OPCODE_TXD)
inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
inst = emit_asm(ir, opcode, result_dst, lod_info);
} else if (opcode == TGSI_OPCODE_TXQS) {
inst = emit_asm(ir, opcode, result_dst);
- } else if (opcode == TGSI_OPCODE_TXF) {
- inst = emit_asm(ir, opcode, result_dst, coord);
} else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
inst = emit_asm(ir, opcode, result_dst, coord, lod_info);
} else if (opcode == TGSI_OPCODE_TEX2) {
if (ir->shadow_comparator)
inst->tex_shadow = GL_TRUE;
- inst->resource.index = sampler_index;
+ inst->resource = sampler;
inst->sampler_array_size = sampler_array_size;
inst->sampler_base = sampler_base;
- if (reladdr.file != PROGRAM_UNDEFINED) {
- inst->resource.reladdr = ralloc(mem_ctx, st_src_reg);
- memcpy(inst->resource.reladdr, &reladdr, sizeof(reladdr));
- }
-
if (ir->offset) {
if (!inst->tex_offsets)
inst->tex_offsets = rzalloc_array(inst, st_src_reg, MAX_GLSL_TEXTURE_OFFSET);
inst->tex_offset_num_offset = i;
}
- switch (sampler_type->sampler_dimensionality) {
- case GLSL_SAMPLER_DIM_1D:
- inst->tex_target = (sampler_type->sampler_array)
- ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
- break;
- case GLSL_SAMPLER_DIM_2D:
- inst->tex_target = (sampler_type->sampler_array)
- ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
- break;
- case GLSL_SAMPLER_DIM_3D:
- inst->tex_target = TEXTURE_3D_INDEX;
- break;
- case GLSL_SAMPLER_DIM_CUBE:
- inst->tex_target = (sampler_type->sampler_array)
- ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
- break;
- case GLSL_SAMPLER_DIM_RECT:
- inst->tex_target = TEXTURE_RECT_INDEX;
- break;
- case GLSL_SAMPLER_DIM_BUF:
- inst->tex_target = TEXTURE_BUFFER_INDEX;
- break;
- case GLSL_SAMPLER_DIM_EXTERNAL:
- inst->tex_target = TEXTURE_EXTERNAL_INDEX;
- break;
- case GLSL_SAMPLER_DIM_MS:
- inst->tex_target = (sampler_type->sampler_array)
- ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
- break;
- default:
- assert(!"Should not get here.");
- }
-
+ inst->tex_target = sampler_type->sampler_index();
inst->tex_type = ir->type->base_type;
this->result = result_src;
num_immediates = 0;
num_address_regs = 0;
samplers_used = 0;
- buffers_used = 0;
images_used = 0;
indirect_addr_consts = false;
wpos_transform_const = -1;
have_sqrt = false;
have_fma = false;
use_shared_memory = false;
+ has_tex_txf_lz = false;
}
glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
{
v->samplers_used = 0;
- v->buffers_used = 0;
v->images_used = 0;
foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
if (inst->resource.file != PROGRAM_UNDEFINED && (
is_resource_instruction(inst->op) ||
inst->op == TGSI_OPCODE_STORE)) {
- if (inst->resource.file == PROGRAM_BUFFER) {
- v->buffers_used |= 1 << inst->resource.index;
- } else if (inst->resource.file == PROGRAM_MEMORY) {
+ if (inst->resource.file == PROGRAM_MEMORY) {
v->use_shared_memory = true;
- } else {
- assert(inst->resource.file == PROGRAM_IMAGE);
+ } else if (inst->resource.file == PROGRAM_IMAGE) {
for (int i = 0; i < inst->sampler_array_size; i++) {
unsigned idx = inst->sampler_base + i;
v->images_used |= 1 << idx;
/* Give up if we encounter relative addressing or flow control. */
if (inst->dst[0].reladdr || inst->dst[0].reladdr2 ||
inst->dst[1].reladdr || inst->dst[1].reladdr2 ||
- tgsi_get_opcode_info(inst->op)->is_branch ||
+ inst->info->is_branch ||
inst->op == TGSI_OPCODE_CONT ||
inst->op == TGSI_OPCODE_END ||
inst->op == TGSI_OPCODE_RET) {
}
}
+void
+glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes)
+{
+ int depth = 0; /* loop depth */
+ int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+ unsigned i = 0, j;
+
+ foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
+ for (j = 0; j < num_inst_dst_regs(inst); j++) {
+ if (inst->dst[j].file == PROGRAM_TEMPORARY) {
+ if (first_writes[inst->dst[j].index] == -1)
+ first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
+ }
+ }
+
+ if (inst->op == TGSI_OPCODE_BGNLOOP) {
+ if(depth++ == 0)
+ loop_start = i;
+ } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
+ if (--depth == 0)
+ loop_start = -1;
+ }
+ assert(depth >= 0);
+ i++;
+ }
+}
+
void
glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
{
void
glsl_to_tgsi_visitor::merge_registers(void)
{
- int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
- int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
+ int *last_reads = ralloc_array(mem_ctx, int, this->next_temp);
+ int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
int i, j;
int num_renames = 0;
{
int i = 0;
int new_index = 0;
- int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
+ int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
int num_renames = 0;
+
for (i = 0; i < this->next_temp; i++) {
- first_reads[i] = -1;
+ first_writes[i] = -1;
}
- get_first_temp_read(first_reads);
+ get_first_temp_write(first_writes);
for (i = 0; i < this->next_temp; i++) {
- if (first_reads[i] < 0) continue;
+ if (first_writes[i] < 0) continue;
if (i != new_index) {
renames[num_renames].old_reg = i;
renames[num_renames].new_reg = new_index;
rename_temp_registers(num_renames, renames);
this->next_temp = new_index;
ralloc_free(renames);
- ralloc_free(first_reads);
+ ralloc_free(first_writes);
}
/* ------------------------- TGSI conversion stuff -------------------------- */
struct inout_decl *output_decls;
unsigned num_output_decls;
- const GLuint *inputMapping;
- const GLuint *outputMapping;
+ const ubyte *inputMapping;
+ const ubyte *outputMapping;
unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */
};
case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
return TGSI_SEMANTIC_BLOCK_SIZE;
+ /* ARB_shader_ballot */
+ case SYSTEM_VALUE_SUBGROUP_SIZE:
+ return TGSI_SEMANTIC_SUBGROUP_SIZE;
+ case SYSTEM_VALUE_SUBGROUP_INVOCATION:
+ return TGSI_SEMANTIC_SUBGROUP_INVOCATION;
+ case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
+ return TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
+ case SYSTEM_VALUE_SUBGROUP_GE_MASK:
+ return TGSI_SEMANTIC_SUBGROUP_GE_MASK;
+ case SYSTEM_VALUE_SUBGROUP_GT_MASK:
+ return TGSI_SEMANTIC_SUBGROUP_GT_MASK;
+ case SYSTEM_VALUE_SUBGROUP_LE_MASK:
+ return TGSI_SEMANTIC_SUBGROUP_LE_MASK;
+ case SYSTEM_VALUE_SUBGROUP_LT_MASK:
+ return TGSI_SEMANTIC_SUBGROUP_LT_MASK;
+
/* Unhandled */
case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
return ureg_DECL_immediate(ureg, &values[0].f, size);
case GL_DOUBLE:
return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
+ case GL_INT64_ARB:
+ return ureg_DECL_immediate_int64(ureg, (int64_t *)&values[0].f, size);
+ case GL_UNSIGNED_INT64_ARB:
+ return ureg_DECL_immediate_uint64(ureg, (uint64_t *)&values[0].f, size);
case GL_INT:
return ureg_DECL_immediate_int(ureg, &values[0].i, size);
case GL_UNSIGNED_INT:
return;
case TGSI_OPCODE_TEX:
+ case TGSI_OPCODE_TEX_LZ:
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXD:
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXQ:
case TGSI_OPCODE_TXQS:
case TGSI_OPCODE_TXF:
+ case TGSI_OPCODE_TXF_LZ:
case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXB2:
case TGSI_OPCODE_TXL2:
inst->op,
dst, num_dst,
tex_target,
+ st_translate_texture_type(inst->tex_type),
texoffsets, inst->tex_offset_num_offset,
src, num_src);
return;
return mapping[a.mesa_index] < mapping[b.mesa_index];
}
- const GLuint *mapping;
+ const ubyte *mapping;
};
/* Sort the given array of decls by the corresponding slot (TGSI file index).
static void
sort_inout_decls_by_slot(struct inout_decl *decls,
unsigned count,
- const GLuint mapping[])
+ const ubyte mapping[])
{
sort_inout_decls sorter;
sorter.mapping = mapping;
glsl_to_tgsi_visitor *program,
const struct gl_program *proginfo,
GLuint numInputs,
- const GLuint inputMapping[],
- const GLuint inputSlotToAttr[],
+ const ubyte inputMapping[],
+ const ubyte inputSlotToAttr[],
const ubyte inputSemanticName[],
const ubyte inputSemanticIndex[],
- const GLuint interpMode[],
+ const ubyte interpMode[],
GLuint numOutputs,
- const GLuint outputMapping[],
- const GLuint outputSlotToAttr[],
+ const ubyte outputMapping[],
const ubyte outputSemanticName[],
const ubyte outputSemanticIndex[])
{
/* texture samplers */
for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
if (program->samplers_used & (1u << i)) {
- unsigned type;
+ unsigned type = st_translate_texture_type(program->sampler_types[i]);
t->samplers[i] = ureg_DECL_sampler(ureg, i);
- switch (program->sampler_types[i]) {
- case GLSL_TYPE_INT:
- type = TGSI_RETURN_TYPE_SINT;
- break;
- case GLSL_TYPE_UINT:
- type = TGSI_RETURN_TYPE_UINT;
- break;
- case GLSL_TYPE_FLOAT:
- type = TGSI_RETURN_TYPE_FLOAT;
- break;
- default:
- unreachable("not reached");
- }
-
ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
type, type, type, type );
}
}
- for (i = 0; i < frag_const->MaxAtomicBuffers; i++) {
- if (program->buffers_used & (1 << i)) {
- t->buffers[i] = ureg_DECL_buffer(ureg, i, true);
+ /* Declare atomic and shader storage buffers. */
+ {
+ struct gl_program *prog = program->prog;
+
+ for (i = 0; i < prog->info.num_abos; i++) {
+ unsigned index = prog->sh.AtomicBuffers[i]->Binding;
+ assert(index < frag_const->MaxAtomicBuffers);
+ t->buffers[index] = ureg_DECL_buffer(ureg, index, true);
}
- }
- for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks;
- i++) {
- if (program->buffers_used & (1 << i)) {
- t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
+ assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks);
+ for (i = 0; i < prog->info.num_ssbos; i++) {
+ unsigned index = frag_const->MaxAtomicBuffers + i;
+ t->buffers[index] = ureg_DECL_buffer(ureg, index, false);
}
}
&ctx->Const.ShaderCompilerOptions[shader->Stage];
struct pipe_screen *pscreen = ctx->st->pipe->screen;
enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
+ unsigned skip_merge_registers;
validate_ir_tree(shader->ir);
PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
+ v->has_tex_txf_lz = pscreen->get_param(pscreen,
+ PIPE_CAP_TGSI_TEX_TXF_LZ);
+ skip_merge_registers =
+ pscreen->get_shader_param(pscreen, ptarget,
+ PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS);
_mesa_generate_parameters_list_for_uniforms(shader_program, shader,
prog->Parameters);
* optimization passes. */
{
int i;
- int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
- int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
- int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
- int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
+ int *first_writes = ralloc_array(v->mem_ctx, int, v->next_temp);
+ int *first_reads = ralloc_array(v->mem_ctx, int, v->next_temp);
+ int *last_writes = ralloc_array(v->mem_ctx, int, v->next_temp);
+ int *last_reads = ralloc_array(v->mem_ctx, int, v->next_temp);
for (i = 0; i < v->next_temp; i++) {
first_writes[i] = -1;
while (v->eliminate_dead_code());
v->merge_two_dsts();
- v->merge_registers();
+ if (!skip_merge_registers)
+ v->merge_registers();
v->renumber_registers();
/* Write the END instruction. */
* prog->ParameterValues to get reallocated (e.g., anything that adds a
* program constant) has to happen before creating this linkage.
*/
- _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
+ _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters,
+ true);
if (!shader_program->data->LinkStatus) {
free_glsl_to_tgsi_visitor(v);
_mesa_reference_program(ctx, &shader->Program, NULL);
struct st_vertex_program *stvp;
struct st_fragment_program *stfp;
- struct st_geometry_program *stgp;
- struct st_tessctrl_program *sttcp;
- struct st_tesseval_program *sttep;
+ struct st_common_program *stp;
struct st_compute_program *stcp;
switch (shader->Stage) {
stfp = (struct st_fragment_program *)prog;
stfp->glsl_to_tgsi = v;
break;
- case MESA_SHADER_GEOMETRY:
- stgp = (struct st_geometry_program *)prog;
- stgp->glsl_to_tgsi = v;
- break;
case MESA_SHADER_TESS_CTRL:
- sttcp = (struct st_tessctrl_program *)prog;
- sttcp->glsl_to_tgsi = v;
- break;
case MESA_SHADER_TESS_EVAL:
- sttep = (struct st_tesseval_program *)prog;
- sttep->glsl_to_tgsi = v;
+ case MESA_SHADER_GEOMETRY:
+ stp = st_common_program(prog);
+ stp->glsl_to_tgsi = v;
break;
case MESA_SHADER_COMPUTE:
stcp = (struct st_compute_program *)prog;
return prog;
}
-static void
-set_affected_state_flags(uint64_t *states,
- struct gl_program *prog,
- uint64_t new_constants,
- uint64_t new_sampler_views,
- uint64_t new_samplers,
- uint64_t new_images,
- uint64_t new_ubos,
- uint64_t new_ssbos,
- uint64_t new_atomics)
-{
- if (prog->Parameters->NumParameters)
- *states |= new_constants;
-
- if (prog->info.num_textures)
- *states |= new_sampler_views | new_samplers;
-
- if (prog->info.num_images)
- *states |= new_images;
-
- if (prog->info.num_ubos)
- *states |= new_ubos;
-
- if (prog->info.num_ssbos)
- *states |= new_ssbos;
-
- if (prog->info.num_abos)
- *states |= new_atomics;
-}
-
-static struct gl_program *
-get_mesa_program(struct gl_context *ctx,
- struct gl_shader_program *shader_program,
- struct gl_linked_shader *shader)
-{
- struct pipe_screen *pscreen = ctx->st->pipe->screen;
- enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
- enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
- pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR);
- struct gl_program *prog = NULL;
-
- if (preferred_ir == PIPE_SHADER_IR_NIR) {
- /* TODO only for GLSL VS/FS for now: */
- switch (shader->Stage) {
- case MESA_SHADER_VERTEX:
- case MESA_SHADER_FRAGMENT:
- prog = st_nir_get_mesa_program(ctx, shader_program, shader);
- default:
- break;
- }
- } else {
- prog = get_mesa_program_tgsi(ctx, shader_program, shader);
- }
-
- if (prog) {
- uint64_t *states;
-
- /* This determines which states will be updated when the shader is
- * bound.
- */
- switch (shader->Stage) {
- case MESA_SHADER_VERTEX:
- states = &((struct st_vertex_program*)prog)->affected_states;
-
- *states = ST_NEW_VS_STATE |
- ST_NEW_RASTERIZER |
- ST_NEW_VERTEX_ARRAYS;
-
- set_affected_state_flags(states, prog,
- ST_NEW_VS_CONSTANTS,
- ST_NEW_VS_SAMPLER_VIEWS,
- ST_NEW_RENDER_SAMPLERS,
- ST_NEW_VS_IMAGES,
- ST_NEW_VS_UBOS,
- ST_NEW_VS_SSBOS,
- ST_NEW_VS_ATOMICS);
- break;
-
- case MESA_SHADER_TESS_CTRL:
- states = &((struct st_tessctrl_program*)prog)->affected_states;
-
- *states = ST_NEW_TCS_STATE;
-
- set_affected_state_flags(states, prog,
- ST_NEW_TCS_CONSTANTS,
- ST_NEW_TCS_SAMPLER_VIEWS,
- ST_NEW_RENDER_SAMPLERS,
- ST_NEW_TCS_IMAGES,
- ST_NEW_TCS_UBOS,
- ST_NEW_TCS_SSBOS,
- ST_NEW_TCS_ATOMICS);
- break;
-
- case MESA_SHADER_TESS_EVAL:
- states = &((struct st_tesseval_program*)prog)->affected_states;
-
- *states = ST_NEW_TES_STATE |
- ST_NEW_RASTERIZER;
-
- set_affected_state_flags(states, prog,
- ST_NEW_TES_CONSTANTS,
- ST_NEW_TES_SAMPLER_VIEWS,
- ST_NEW_RENDER_SAMPLERS,
- ST_NEW_TES_IMAGES,
- ST_NEW_TES_UBOS,
- ST_NEW_TES_SSBOS,
- ST_NEW_TES_ATOMICS);
- break;
-
- case MESA_SHADER_GEOMETRY:
- states = &((struct st_geometry_program*)prog)->affected_states;
-
- *states = ST_NEW_GS_STATE |
- ST_NEW_RASTERIZER;
-
- set_affected_state_flags(states, prog,
- ST_NEW_GS_CONSTANTS,
- ST_NEW_GS_SAMPLER_VIEWS,
- ST_NEW_RENDER_SAMPLERS,
- ST_NEW_GS_IMAGES,
- ST_NEW_GS_UBOS,
- ST_NEW_GS_SSBOS,
- ST_NEW_GS_ATOMICS);
- break;
-
- case MESA_SHADER_FRAGMENT:
- states = &((struct st_fragment_program*)prog)->affected_states;
-
- /* gl_FragCoord and glDrawPixels always use constants. */
- *states = ST_NEW_FS_STATE |
- ST_NEW_SAMPLE_SHADING |
- ST_NEW_FS_CONSTANTS;
-
- set_affected_state_flags(states, prog,
- ST_NEW_FS_CONSTANTS,
- ST_NEW_FS_SAMPLER_VIEWS,
- ST_NEW_RENDER_SAMPLERS,
- ST_NEW_FS_IMAGES,
- ST_NEW_FS_UBOS,
- ST_NEW_FS_SSBOS,
- ST_NEW_FS_ATOMICS);
- break;
-
- case MESA_SHADER_COMPUTE:
- states = &((struct st_compute_program*)prog)->affected_states;
-
- *states = ST_NEW_CS_STATE;
-
- set_affected_state_flags(states, prog,
- ST_NEW_CS_CONSTANTS,
- ST_NEW_CS_SAMPLER_VIEWS,
- ST_NEW_CS_SAMPLERS,
- ST_NEW_CS_IMAGES,
- ST_NEW_CS_UBOS,
- ST_NEW_CS_SSBOS,
- ST_NEW_CS_ATOMICS);
- break;
-
- default:
- unreachable("unhandled shader stage");
- }
- }
-
- return prog;
-}
-
/* See if there are unsupported control flow statements. */
class ir_control_flow_info_visitor : public ir_hierarchical_visitor {
private:
GLboolean
st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
{
+ /* Return early if we are loading the shader from on-disk cache */
+ if (st_load_tgsi_from_disk_cache(ctx, prog)) {
+ return GL_TRUE;
+ }
+
struct pipe_screen *pscreen = ctx->st->pipe->screen;
assert(prog->data->LinkStatus);
options->EmitNoIndirectUniform);
}
+ if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD))
+ lower_64bit_integer_instructions(ir, DIV64 | MOD64);
+
if (ctx->Extensions.ARB_shading_language_packing) {
unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
LOWER_UNPACK_SNORM_2x16 |
lower_instructions(ir,
MOD_TO_FLOOR |
- DIV_TO_MUL_RCP |
+ FDIV_TO_MUL_RCP |
EXP_TO_EXP2 |
LOG_TO_LOG2 |
LDEXP_TO_ARITH |
(options->EmitNoPow ? POW_TO_EXP2 : 0) |
(!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
(options->EmitNoSat ? SAT_TO_CLAMP : 0) |
+ (ctx->Const.ForceGLSLAbsSqrt ? SQRT_TO_ABS_SQRT : 0) |
/* Assume that if ARB_gpu_shader5 is not supported
* then all of the extended integer functions need
* lowering. It may be necessary to add some caps
build_program_resource_list(ctx, prog);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- struct gl_program *linked_prog;
-
- if (prog->_LinkedShaders[i] == NULL)
+ struct gl_linked_shader *shader = prog->_LinkedShaders[i];
+ if (shader == NULL)
continue;
- linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+ enum pipe_shader_type ptarget =
+ st_shader_stage_to_ptarget(shader->Stage);
+ enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
+ pscreen->get_shader_param(pscreen, ptarget,
+ PIPE_SHADER_CAP_PREFERRED_IR);
+
+ struct gl_program *linked_prog = NULL;
+ if (preferred_ir == PIPE_SHADER_IR_NIR) {
+ /* TODO only for GLSL VS/FS/CS for now: */
+ switch (shader->Stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
+ linked_prog = st_nir_get_mesa_program(ctx, prog, shader);
+ default:
+ break;
+ }
+ } else {
+ linked_prog = get_mesa_program_tgsi(ctx, prog, shader);
+ }
if (linked_prog) {
+ st_set_prog_affected_state_flags(linked_prog);
if (!ctx->Driver.ProgramStringNotify(ctx,
_mesa_shader_stage_to_program(i),
linked_prog)) {
- _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
- NULL);
+ _mesa_reference_program(ctx, &shader->Program, NULL);
return GL_FALSE;
}
}
void
st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
- const GLuint outputMapping[],
+ const ubyte outputMapping[],
struct pipe_stream_output_info *so)
{
+ if (!glsl_to_tgsi->shader_program->last_vert_prog)
+ return;
+
struct gl_transform_feedback_info *info =
- glsl_to_tgsi->shader_program->xfb_program->sh.LinkedTransformFeedback;
+ glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback;
st_translate_stream_output_info2(info, outputMapping, so);
}
void
st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
- const GLuint outputMapping[],
+ const ubyte outputMapping[],
struct pipe_stream_output_info *so)
{
unsigned i;