this->has_index2 = false;
this->double_reg2 = false;
this->array_id = 0;
+ this->is_double_vertex_input = false;
}
st_src_reg(gl_register_file file, int index, int type)
this->has_index2 = false;
this->double_reg2 = false;
this->array_id = 0;
+ this->is_double_vertex_input = false;
}
st_src_reg(gl_register_file file, int index, int type, int index2D)
this->has_index2 = false;
this->double_reg2 = false;
this->array_id = 0;
+ this->is_double_vertex_input = false;
}
st_src_reg()
this->has_index2 = false;
this->double_reg2 = false;
this->array_id = 0;
+ this->is_double_vertex_input = false;
}
explicit st_src_reg(st_dst_reg reg);
*/
bool double_reg2;
unsigned array_id;
+ bool is_double_vertex_input;
};
class st_dst_reg {
this->has_index2 = reg.has_index2;
this->double_reg2 = false;
this->array_id = reg.array_id;
+ this->is_double_vertex_input = false;
}
st_dst_reg::st_dst_reg(st_src_reg reg)
int dead_mask; /**< Used in dead code elimination */
class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
+ const struct tgsi_opcode_info *info;
};
class variable_storage : public exec_node {
unsigned mesa_index;
unsigned array_id;
unsigned array_size;
+ unsigned array_type;
+};
+
+static unsigned
+find_array_type(struct array_decl *arrays, unsigned count, unsigned array_id)
+{
+ unsigned i;
+
+ for (i = 0; i < count; i++) {
+ struct array_decl *decl = &arrays[i];
+
+ if (array_id == decl->array_id) {
+ return decl->array_type;
+ }
+ }
+ return GLSL_TYPE_ERROR;
+}
+
+struct rename_reg_pair {
+ int old_reg;
+ int new_reg;
};
struct glsl_to_tgsi_visitor : public ir_visitor {
void simplify_cmp(void);
- void rename_temp_register(int index, int new_index);
- int get_first_temp_read(int index);
- int get_first_temp_write(int index);
- int get_last_temp_read(int index);
- int get_last_temp_write(int index);
+ void rename_temp_registers(int num_renames, struct rename_reg_pair *renames);
+ void get_first_temp_read(int *first_reads);
+ void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
+ void get_last_temp_write(int *last_writes);
void copy_propagate(void);
int eliminate_dead_code(void);
return size_swizzles[size - 1];
}
-static bool
-is_tex_instruction(unsigned opcode)
-{
- const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
- return info->is_tex;
-}
-
static unsigned
-num_inst_dst_regs(unsigned opcode)
+num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
{
- const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
- return info->num_dst;
+ return op->info->num_dst;
}
static unsigned
-num_inst_src_regs(unsigned opcode)
+num_inst_src_regs(const glsl_to_tgsi_instruction *op)
{
- const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
- return info->is_tex ? info->num_src - 1 : info->num_src;
+ return op->info->is_tex ? op->info->num_src - 1 : op->info->num_src;
}
glsl_to_tgsi_instruction *
{
glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
int num_reladdr = 0, i, j;
+ bool dst_is_double[2];
op = get_opcode(ir, op, dst, src0, src1);
assert(num_reladdr == 0);
inst->op = op;
+ inst->info = tgsi_get_opcode_info(op);
inst->dst[0] = dst;
inst->dst[1] = dst1;
inst->src[0] = src0;
* GLSL [0].z -> TGSI [1].xy
* GLSL [0].w -> TGSI [1].zw
*/
- if (inst->dst[0].type == GLSL_TYPE_DOUBLE || inst->dst[1].type == GLSL_TYPE_DOUBLE ||
+ for (j = 0; j < 2; j++) {
+ dst_is_double[j] = false;
+ if (inst->dst[j].type == GLSL_TYPE_DOUBLE)
+ dst_is_double[j] = true;
+ else if (inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) {
+ unsigned type = find_array_type(this->output_arrays, this->num_output_arrays, inst->dst[j].array_id);
+ if (type == GLSL_TYPE_DOUBLE)
+ dst_is_double[j] = true;
+ }
+ }
+
+ if (dst_is_double[0] || dst_is_double[1] ||
inst->src[0].type == GLSL_TYPE_DOUBLE) {
glsl_to_tgsi_instruction *dinst = NULL;
int initial_src_swz[4], initial_src_idx[4];
/* modify the destination if we are splitting */
for (j = 0; j < 2; j++) {
- if (dinst->dst[j].type == GLSL_TYPE_DOUBLE) {
+ if (dst_is_double[j]) {
dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
dinst->dst[j].index = initial_dst_idx[j];
if (i > 1)
- F2D is a float src0, DLDEXP is integer src1 */
if (op == TGSI_OPCODE_F2D ||
op == TGSI_OPCODE_DLDEXP ||
- (op == TGSI_OPCODE_UCMP && dinst->dst[0].type == GLSL_TYPE_DOUBLE)) {
+ (op == TGSI_OPCODE_UCMP && dst_is_double[0])) {
dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
}
}
}
static int
-type_size(const struct glsl_type *type)
+attrib_type_size(const struct glsl_type *type, bool is_vs_input)
{
unsigned int i;
int size;
break;
case GLSL_TYPE_DOUBLE:
if (type->is_matrix()) {
- if (type->vector_elements <= 2)
+ if (type->vector_elements <= 2 || is_vs_input)
return type->matrix_columns;
else
return type->matrix_columns * 2;
/* For doubles if we have a double or dvec2 they fit in one
* vec4, else they need 2 vec4s.
*/
- if (type->vector_elements <= 2)
+ if (type->vector_elements <= 2 || is_vs_input)
return 1;
else
return 2;
break;
case GLSL_TYPE_ARRAY:
assert(type->length > 0);
- return type_size(type->fields.array) * type->length;
+ return attrib_type_size(type->fields.array, is_vs_input) * type->length;
case GLSL_TYPE_STRUCT:
size = 0;
for (i = 0; i < type->length; i++) {
- size += type_size(type->fields.structure[i].type);
+ size += attrib_type_size(type->fields.structure[i].type, is_vs_input);
}
return size;
case GLSL_TYPE_SAMPLER:
return 0;
}
+static int
+type_size(const struct glsl_type *type)
+{
+ return attrib_type_size(type, false);
+}
/**
* If the given GLSL type is an array or matrix or a structure containing
if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
if (*num_reladdr != 1) {
- st_src_reg temp = get_temp(glsl_type::vec4_type);
+ st_src_reg temp = get_temp(reg->type == GLSL_TYPE_DOUBLE ? glsl_type::dvec4_type : glsl_type::vec4_type);
emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
*reg = temp;
st_dst_reg temp_dst = st_dst_reg(temp);
st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
- emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
+ if (ir->operands[0]->type->is_boolean() &&
+ ir->operands[1]->as_constant() &&
+ ir->operands[1]->as_constant()->is_one()) {
+ emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
+ } else {
+ emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
+ }
/* Emit 1-3 AND operations to combine the SEQ results. */
switch (ir->operands[0]->type->vector_elements) {
st_src_reg temp = get_temp(native_integers ?
glsl_type::uvec4_type :
glsl_type::vec4_type);
- emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+ if (ir->operands[0]->type->is_boolean() &&
+ ir->operands[1]->as_constant() &&
+ ir->operands[1]->as_constant()->is_zero()) {
+ emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
+ } else {
+ emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+ }
if (native_integers) {
st_dst_reg temp_dst = st_dst_reg(temp);
}
break;
- case ir_unop_any: {
- assert(ir->operands[0]->type->is_vector());
-
- if (native_integers) {
- int dst_swizzle = 0, op0_swizzle, i;
- st_src_reg accum = op[0];
-
- op0_swizzle = op[0].swizzle;
- accum.swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 0),
- GET_SWZ(op0_swizzle, 0),
- GET_SWZ(op0_swizzle, 0),
- GET_SWZ(op0_swizzle, 0));
- for (i = 0; i < 4; i++) {
- if (result_dst.writemask & (1 << i)) {
- dst_swizzle = MAKE_SWIZZLE4(i, i, i, i);
- break;
- }
- }
- assert(i != 4);
- assert(ir->operands[0]->type->is_boolean());
-
- /* OR all the components together, since they should be either 0 or ~0
- */
- switch (ir->operands[0]->type->vector_elements) {
- case 4:
- op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 3),
- GET_SWZ(op0_swizzle, 3),
- GET_SWZ(op0_swizzle, 3),
- GET_SWZ(op0_swizzle, 3));
- emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
- accum = st_src_reg(result_dst);
- accum.swizzle = dst_swizzle;
- /* fallthrough */
- case 3:
- op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 2),
- GET_SWZ(op0_swizzle, 2),
- GET_SWZ(op0_swizzle, 2),
- GET_SWZ(op0_swizzle, 2));
- emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
- accum = st_src_reg(result_dst);
- accum.swizzle = dst_swizzle;
- /* fallthrough */
- case 2:
- op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 1),
- GET_SWZ(op0_swizzle, 1),
- GET_SWZ(op0_swizzle, 1),
- GET_SWZ(op0_swizzle, 1));
- emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
- break;
- default:
- assert(!"Unexpected vector size");
- break;
- }
- } else {
- /* After the dot-product, the value will be an integer on the
- * range [0,4]. Zero stays zero, and positive values become 1.0.
- */
- glsl_to_tgsi_instruction *const dp =
- emit_dp(ir, result_dst, op[0], op[0],
- ir->operands[0]->type->vector_elements);
- if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
- result_dst.type == GLSL_TYPE_FLOAT) {
- /* The clamping to [0,1] can be done for free in the fragment
- * shader with a saturate.
- */
- dp->saturate = true;
- } else if (result_dst.type == GLSL_TYPE_FLOAT) {
- /* Negating the result of the dot-product gives values on the range
- * [-4, 0]. Zero stays zero, and negative values become 1.0. This
- * is achieved using SLT.
- */
- st_src_reg slt_src = result_src;
- slt_src.negate = ~slt_src.negate;
- emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
- }
- else {
- /* Use SNE 0 if integers are being used as boolean values. */
- emit_asm(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
- }
- }
- break;
- }
-
case ir_binop_logic_xor:
if (native_integers)
emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
}
break;
+ case ir_unop_pack_half_2x16:
+ emit_asm(ir, TGSI_OPCODE_PK2H, result_dst, op[0]);
+ break;
+ case ir_unop_unpack_half_2x16:
+ emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]);
+ break;
+
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_unorm_2x16:
- case ir_unop_pack_half_2x16:
case ir_unop_pack_snorm_4x8:
case ir_unop_pack_unorm_4x8:
case ir_unop_unpack_snorm_2x16:
case ir_unop_unpack_unorm_2x16:
- case ir_unop_unpack_half_2x16:
case ir_unop_unpack_half_2x16_split_x:
case ir_unop_unpack_half_2x16_split_y:
case ir_unop_unpack_snorm_4x8:
case ir_triop_vector_insert:
case ir_binop_carry:
case ir_binop_borrow:
+ case ir_unop_ssbo_unsized_array_length:
/* This operation is not supported, or should have already been handled.
*/
assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
break;
+
+ case ir_unop_get_buffer_size:
+ assert(!"Not implemented yet");
+ break;
}
this->result = result_src;
decl->mesa_index = var->data.location;
decl->array_id = num_input_arrays + 1;
- if (is_2d)
+ if (is_2d) {
decl->array_size = type_size(var->type->fields.array);
- else
+ decl->array_type = var->type->fields.array->without_array()->base_type;
+ } else {
decl->array_size = type_size(var->type);
+ decl->array_type = var->type->without_array()->base_type;
+ }
num_input_arrays++;
entry = new(mem_ctx) variable_storage(var,
decl->mesa_index = var->data.location;
decl->array_id = num_output_arrays + 1;
- if (is_2d)
+ if (is_2d) {
decl->array_size = type_size(var->type->fields.array);
- else
+ decl->array_type = var->type->fields.array->without_array()->base_type;
+ } else {
decl->array_size = type_size(var->type);
+ decl->array_type = var->type->without_array()->base_type;
+ }
num_output_arrays++;
entry = new(mem_ctx) variable_storage(var,
this->result = st_src_reg(entry->file, entry->index, var->type);
this->result.array_id = entry->array_id;
+ if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in && var->type->is_double())
+ this->result.is_double_vertex_input = true;
if (!native_integers)
this->result.type = GLSL_TYPE_FLOAT;
}
static void
shrink_array_declarations(struct array_decl *arrays, unsigned count,
GLbitfield64 usage_mask,
+ GLbitfield64 double_usage_mask,
GLbitfield patch_usage_mask)
{
unsigned i, j;
else {
if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
break;
+ if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
+ break;
}
decl->mesa_index++;
else {
if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
break;
+ if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
+ break;
}
decl->array_size--;
element_size = 1;
if (index) {
+
+ if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
+ src.file == PROGRAM_INPUT)
+ element_size = attrib_type_size(ir->type, true);
if (is_2D) {
src.index2D = index->value.i[0];
src.has_index2 = true;
if (type->is_matrix()) {
const struct glsl_type *vec_type;
- vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+ vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT,
type->vector_elements, 1);
for (int i = 0; i < type->matrix_columns; i++) {
}
l->index++;
r->index++;
+ if (type->is_dual_slot_double()) {
+ l->index++;
+ if (r->is_double_vertex_input == false)
+ r->index++;
+ }
}
void
*/
if (ir->write_mask == 0) {
assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
- l.writemask = WRITEMASK_XYZW;
+
+ if (ir->lhs->type->is_array() || ir->lhs->type->without_array()->is_matrix()) {
+ if (ir->lhs->type->without_array()->is_double()) {
+ switch (ir->lhs->type->without_array()->vector_elements) {
+ case 1:
+ l.writemask = WRITEMASK_X;
+ break;
+ case 2:
+ l.writemask = WRITEMASK_XY;
+ break;
+ case 3:
+ l.writemask = WRITEMASK_XYZ;
+ break;
+ case 4:
+ l.writemask = WRITEMASK_XYZW;
+ break;
+ }
+ } else
+ l.writemask = WRITEMASK_XYZW;
+ }
} else if (ir->lhs->type->is_scalar() &&
!ir->lhs->type->is_double() &&
ir->lhs->variable_referenced()->data.mode == ir_var_shader_out) {
st_dst_reg mat_column = st_dst_reg(mat);
for (i = 0; i < ir->type->matrix_columns; i++) {
- assert(ir->type->base_type == GLSL_TYPE_FLOAT);
- values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
-
- src = st_src_reg(file, -1, ir->type->base_type);
- src.index = add_constant(file,
- values,
- ir->type->vector_elements,
- GL_FLOAT,
- &src.swizzle);
- emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
+ src = st_src_reg(file, -1, ir->type->base_type);
+ src.index = add_constant(file,
+ values,
+ ir->type->vector_elements,
+ GL_FLOAT,
+ &src.swizzle);
+ emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
+ break;
+ case GLSL_TYPE_DOUBLE:
+ values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements];
+ src = st_src_reg(file, -1, ir->type->base_type);
+ src.index = add_constant(file,
+ values,
+ ir->type->vector_elements,
+ GL_DOUBLE,
+ &src.swizzle);
+ if (ir->type->vector_elements >= 2) {
+ mat_column.writemask = WRITEMASK_XY;
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+ emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
+ } else {
+ mat_column.writemask = WRITEMASK_X;
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+ emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
+ }
+ src.index++;
+ if (ir->type->vector_elements > 2) {
+ if (ir->type->vector_elements == 4) {
+ mat_column.writemask = WRITEMASK_ZW;
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+ emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
+ } else {
+ mat_column.writemask = WRITEMASK_Z;
+ src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
+ emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
+ mat_column.writemask = WRITEMASK_XYZW;
+ src.swizzle = SWIZZLE_XYZW;
+ }
+ mat_column.index++;
+ }
+ break;
+ default:
+ unreachable("Illegal matrix constant type.\n");
+ break;
+ }
mat_column.index++;
}
-
this->result = mat;
return;
}
case ir_lod:
opcode = TGSI_OPCODE_LODQ;
break;
+ case ir_texture_samples:
+ opcode = TGSI_OPCODE_TXQS;
+ break;
+ case ir_samples_identical:
+ unreachable("Unexpected ir_samples_identical opcode");
}
if (ir->projector) {
emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
} else
inst = emit_asm(ir, opcode, result_dst, lod_info);
+ } else if (opcode == TGSI_OPCODE_TXQS) {
+ inst = emit_asm(ir, opcode, result_dst);
} else if (opcode == TGSI_OPCODE_TXF) {
inst = emit_asm(ir, opcode, result_dst, coord);
} else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
v->samplers_used = 0;
foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
- if (is_tex_instruction(inst->op)) {
+ if (inst->info->is_tex) {
for (int i = 0; i < inst->sampler_array_size; i++) {
unsigned idx = inst->sampler.index + i;
v->samplers_used |= 1 << idx;
/* Replaces all references to a temporary register index with another index. */
void
-glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
+glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames)
{
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
unsigned j;
-
- for (j = 0; j < num_inst_src_regs(inst->op); j++) {
- if (inst->src[j].file == PROGRAM_TEMPORARY &&
- inst->src[j].index == index) {
- inst->src[j].index = new_index;
- }
+ int k;
+ for (j = 0; j < num_inst_src_regs(inst); j++) {
+ if (inst->src[j].file == PROGRAM_TEMPORARY)
+ for (k = 0; k < num_renames; k++)
+ if (inst->src[j].index == renames[k].old_reg)
+ inst->src[j].index = renames[k].new_reg;
}
for (j = 0; j < inst->tex_offset_num_offset; j++) {
- if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
- inst->tex_offsets[j].index == index) {
- inst->tex_offsets[j].index = new_index;
- }
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
+ for (k = 0; k < num_renames; k++)
+ if (inst->tex_offsets[j].index == renames[k].old_reg)
+ inst->tex_offsets[j].index = renames[k].new_reg;
}
- for (j = 0; j < num_inst_dst_regs(inst->op); j++) {
- if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
- inst->dst[j].index = new_index;
- }
+ for (j = 0; j < num_inst_dst_regs(inst); j++) {
+ if (inst->dst[j].file == PROGRAM_TEMPORARY)
+ for (k = 0; k < num_renames; k++)
+ if (inst->dst[j].index == renames[k].old_reg)
+ inst->dst[j].index = renames[k].new_reg;
}
}
}
-int
-glsl_to_tgsi_visitor::get_first_temp_read(int index)
+void
+glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
{
int depth = 0; /* loop depth */
int loop_start = -1; /* index of the first active BGNLOOP (if any) */
unsigned i = 0, j;
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
- for (j = 0; j < num_inst_src_regs(inst->op); j++) {
- if (inst->src[j].file == PROGRAM_TEMPORARY &&
- inst->src[j].index == index) {
- return (depth == 0) ? i : loop_start;
+ for (j = 0; j < num_inst_src_regs(inst); j++) {
+ if (inst->src[j].file == PROGRAM_TEMPORARY) {
+ if (first_reads[inst->src[j].index] == -1)
+ first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
}
}
for (j = 0; j < inst->tex_offset_num_offset; j++) {
- if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
- inst->tex_offsets[j].index == index) {
- return (depth == 0) ? i : loop_start;
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
+ if (first_reads[inst->tex_offsets[j].index] == -1)
+ first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
}
}
if (inst->op == TGSI_OPCODE_BGNLOOP) {
assert(depth >= 0);
i++;
}
- return -1;
}
-int
-glsl_to_tgsi_visitor::get_first_temp_write(int index)
+void
+glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
{
int depth = 0; /* loop depth */
int loop_start = -1; /* index of the first active BGNLOOP (if any) */
- int i = 0;
- unsigned j;
-
+ unsigned i = 0, j;
+ int k;
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
- for (j = 0; j < num_inst_dst_regs(inst->op); j++) {
- if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
- return (depth == 0) ? i : loop_start;
- }
+ for (j = 0; j < num_inst_src_regs(inst); j++) {
+ if (inst->src[j].file == PROGRAM_TEMPORARY)
+ last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
+ }
+ for (j = 0; j < num_inst_dst_regs(inst); j++) {
+ if (inst->dst[j].file == PROGRAM_TEMPORARY)
+ if (first_writes[inst->dst[j].index] == -1)
+ first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
+ }
+ for (j = 0; j < inst->tex_offset_num_offset; j++) {
+ if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
+ last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
}
if (inst->op == TGSI_OPCODE_BGNLOOP) {
if(depth++ == 0)
loop_start = i;
} else if (inst->op == TGSI_OPCODE_ENDLOOP) {
- if (--depth == 0)
+ if (--depth == 0) {
loop_start = -1;
- }
- assert(depth >= 0);
- i++;
- }
- return -1;
-}
-
-int
-glsl_to_tgsi_visitor::get_last_temp_read(int index)
-{
- int depth = 0; /* loop depth */
- int last = -1; /* index of last instruction that reads the temporary */
- unsigned i = 0, j;
-
- foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
- for (j = 0; j < num_inst_src_regs(inst->op); j++) {
- if (inst->src[j].file == PROGRAM_TEMPORARY &&
- inst->src[j].index == index) {
- last = (depth == 0) ? i : -2;
+ for (k = 0; k < this->next_temp; k++) {
+ if (last_reads[k] == -2) {
+ last_reads[k] = i;
+ }
+ }
}
}
- for (j = 0; j < inst->tex_offset_num_offset; j++) {
- if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
- inst->tex_offsets[j].index == index)
- last = (depth == 0) ? i : -2;
- }
- if (inst->op == TGSI_OPCODE_BGNLOOP)
- depth++;
- else if (inst->op == TGSI_OPCODE_ENDLOOP)
- if (--depth == 0 && last == -2)
- last = i;
assert(depth >= 0);
i++;
}
- assert(last >= -1);
- return last;
}
-int
-glsl_to_tgsi_visitor::get_last_temp_write(int index)
+void
+glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
{
int depth = 0; /* loop depth */
- int last = -1; /* index of last instruction that writes to the temporary */
- int i = 0;
+ int i = 0, k;
unsigned j;
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
- for (j = 0; j < num_inst_dst_regs(inst->op); j++) {
- if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index)
- last = (depth == 0) ? i : -2;
+ for (j = 0; j < num_inst_dst_regs(inst); j++) {
+ if (inst->dst[j].file == PROGRAM_TEMPORARY)
+ last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
}
if (inst->op == TGSI_OPCODE_BGNLOOP)
depth++;
else if (inst->op == TGSI_OPCODE_ENDLOOP)
- if (--depth == 0 && last == -2)
- last = i;
+ if (--depth == 0) {
+ for (k = 0; k < this->next_temp; k++) {
+ if (last_writes[k] == -2) {
+ last_writes[k] = i;
+ }
+ }
+ }
assert(depth >= 0);
i++;
}
- assert(last >= -1);
- return last;
}
/*
*/
for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
if (inst->dst[i].file == PROGRAM_TEMPORARY &&
- !inst->dst[i].reladdr &&
- !inst->saturate) {
+ !inst->dst[i].reladdr) {
for (int c = 0; c < 4; c++) {
if (inst->dst[i].writemask & (1 << c)) {
if (writes[4 * inst->dst[i].index + c]) {
foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
glsl_to_tgsi_instruction *inst2;
bool merged;
- if (num_inst_dst_regs(inst->op) != 2)
+ if (num_inst_dst_regs(inst) != 2)
continue;
if (inst->dst[0].file != PROGRAM_UNDEFINED &&
{
int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
+ struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
int i, j;
+ int num_renames = 0;
/* Read the indices of the last read and first write to each temp register
* into an array so that we don't have to traverse the instruction list as
* much. */
for (i = 0; i < this->next_temp; i++) {
- last_reads[i] = get_last_temp_read(i);
- first_writes[i] = get_first_temp_write(i);
+ last_reads[i] = -1;
+ first_writes[i] = -1;
}
+ get_last_temp_read_first_temp_write(last_reads, first_writes);
/* Start looking for registers with non-overlapping usages that can be
* merged together. */
* as the register at index j. */
if (first_writes[i] <= first_writes[j] &&
last_reads[i] <= first_writes[j]) {
- rename_temp_register(j, i); /* Replace all references to j with i.*/
+ renames[num_renames].old_reg = j;
+ renames[num_renames].new_reg = i;
+ num_renames++;
/* Update the first_writes and last_reads arrays with the new
* values for the merged register index, and mark the newly unused
}
}
+ rename_temp_registers(num_renames, renames);
+ ralloc_free(renames);
ralloc_free(last_reads);
ralloc_free(first_writes);
}
{
int i = 0;
int new_index = 0;
-
+ int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
+ struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
+ int num_renames = 0;
for (i = 0; i < this->next_temp; i++) {
- if (get_first_temp_read(i) < 0) continue;
- if (i != new_index)
- rename_temp_register(i, new_index);
- new_index++;
+ first_reads[i] = -1;
}
+ get_first_temp_read(first_reads);
- this->next_temp = new_index;
-}
-
-/**
- * Returns a fragment program which implements the current pixel transfer ops.
- * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
- */
-extern "C" void
-get_pixel_transfer_visitor(struct st_fragment_program *fp,
- glsl_to_tgsi_visitor *original,
- int scale_and_bias, int pixel_maps)
-{
- glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
- struct st_context *st = st_context(original->ctx);
- struct gl_program *prog = &fp->Base.Base;
- struct gl_program_parameter_list *params = _mesa_new_parameter_list();
- st_src_reg coord, src0;
- st_dst_reg dst0;
- glsl_to_tgsi_instruction *inst;
-
- /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
- v->ctx = original->ctx;
- v->prog = prog;
- v->shader_program = NULL;
- v->shader = NULL;
- v->glsl_version = original->glsl_version;
- v->native_integers = original->native_integers;
- v->options = original->options;
- v->next_temp = original->next_temp;
- v->num_address_regs = original->num_address_regs;
- v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_consts = original->indirect_addr_consts;
- memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
- v->num_immediates = original->num_immediates;
-
- /*
- * Get initial pixel color from the texture.
- * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
- */
- coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
- src0 = v->get_temp(glsl_type::vec4_type);
- dst0 = st_dst_reg(src0);
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord);
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- prog->InputsRead |= VARYING_BIT_TEX0;
- prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
- v->samplers_used |= (1 << 0);
-
- if (scale_and_bias) {
- static const gl_state_index scale_state[STATE_LENGTH] =
- { STATE_INTERNAL, STATE_PT_SCALE,
- (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
- static const gl_state_index bias_state[STATE_LENGTH] =
- { STATE_INTERNAL, STATE_PT_BIAS,
- (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
- GLint scale_p, bias_p;
- st_src_reg scale, bias;
-
- scale_p = _mesa_add_state_reference(params, scale_state);
- bias_p = _mesa_add_state_reference(params, bias_state);
-
- /* MAD colorTemp, colorTemp, scale, bias; */
- scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
- bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
- inst = v->emit_asm(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
- }
-
- if (pixel_maps) {
- st_src_reg temp = v->get_temp(glsl_type::vec4_type);
- st_dst_reg temp_dst = st_dst_reg(temp);
-
- assert(st->pixel_xfer.pixelmap_texture);
- (void) st;
-
- /* With a little effort, we can do four pixel map look-ups with
- * two TEX instructions:
- */
-
- /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
- temp_dst.writemask = WRITEMASK_XY; /* write R,G */
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
- inst->sampler.index = 1;
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
- src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
- temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
- inst->sampler.index = 1;
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
- v->samplers_used |= (1 << 1);
-
- /* MOV colorTemp, temp; */
- inst = v->emit_asm(NULL, TGSI_OPCODE_MOV, dst0, temp);
- }
-
- /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
- * new visitor. */
- foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
- glsl_to_tgsi_instruction *newinst;
- st_src_reg src_regs[4];
-
- if (inst->dst[0].file == PROGRAM_OUTPUT)
- prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
-
- for (int i = 0; i < 4; i++) {
- src_regs[i] = inst->src[i];
- if (src_regs[i].file == PROGRAM_INPUT &&
- src_regs[i].index == VARYING_SLOT_COL0) {
- src_regs[i].file = PROGRAM_TEMPORARY;
- src_regs[i].index = src0.index;
- }
- else if (src_regs[i].file == PROGRAM_INPUT)
- prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
- }
-
- newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
- newinst->tex_target = inst->tex_target;
- newinst->sampler_array_size = inst->sampler_array_size;
- }
-
- /* Make modifications to fragment program info. */
- prog->Parameters = _mesa_combine_parameter_lists(params,
- original->prog->Parameters);
- _mesa_free_parameter_list(params);
- count_resources(v, prog);
- fp->glsl_to_tgsi = v;
-}
-
-/**
- * Make fragment program for glBitmap:
- * Sample the texture and kill the fragment if the bit is 0.
- * This program will be combined with the user's fragment program.
- *
- * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
- */
-extern "C" void
-get_bitmap_visitor(struct st_fragment_program *fp,
- glsl_to_tgsi_visitor *original, int samplerIndex)
-{
- glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
- struct st_context *st = st_context(original->ctx);
- struct gl_program *prog = &fp->Base.Base;
- st_src_reg coord, src0;
- st_dst_reg dst0;
- glsl_to_tgsi_instruction *inst;
-
- /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
- v->ctx = original->ctx;
- v->prog = prog;
- v->shader_program = NULL;
- v->shader = NULL;
- v->glsl_version = original->glsl_version;
- v->native_integers = original->native_integers;
- v->options = original->options;
- v->next_temp = original->next_temp;
- v->num_address_regs = original->num_address_regs;
- v->samplers_used = prog->SamplersUsed = original->samplers_used;
- v->indirect_addr_consts = original->indirect_addr_consts;
- memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
- v->num_immediates = original->num_immediates;
-
- /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
- coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
- src0 = v->get_temp(glsl_type::vec4_type);
- dst0 = st_dst_reg(src0);
- inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord);
- inst->sampler.index = samplerIndex;
- inst->sampler_array_size = 1;
- inst->tex_target = TEXTURE_2D_INDEX;
-
- prog->InputsRead |= VARYING_BIT_TEX0;
- prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
- v->samplers_used |= (1 << samplerIndex);
-
- /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
- src0.negate = NEGATE_XYZW;
- if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
- src0.swizzle = SWIZZLE_XXXX;
- inst = v->emit_asm(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
-
- /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
- * new visitor. */
- foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
- glsl_to_tgsi_instruction *newinst;
- st_src_reg src_regs[4];
-
- if (inst->dst[0].file == PROGRAM_OUTPUT)
- prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
-
- for (int i = 0; i < 4; i++) {
- src_regs[i] = inst->src[i];
- if (src_regs[i].file == PROGRAM_INPUT)
- prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
+ for (i = 0; i < this->next_temp; i++) {
+ if (first_reads[i] < 0) continue;
+ if (i != new_index) {
+ renames[num_renames].old_reg = i;
+ renames[num_renames].new_reg = new_index;
+ num_renames++;
}
-
- newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
- newinst->tex_target = inst->tex_target;
- newinst->sampler_array_size = inst->sampler_array_size;
+ new_index++;
}
- /* Make modifications to fragment program info. */
- prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
- count_resources(v, prog);
- fp->glsl_to_tgsi = v;
+ rename_temp_registers(num_renames, renames);
+ this->next_temp = new_index;
+ ralloc_free(renames);
+ ralloc_free(first_reads);
}
/* ------------------------- TGSI conversion stuff -------------------------- */
TGSI_SEMANTIC_INSTANCEID,
TGSI_SEMANTIC_VERTEXID_NOBASE,
TGSI_SEMANTIC_BASEVERTEX,
+ TGSI_SEMANTIC_BASEINSTANCE,
+ TGSI_SEMANTIC_DRAWID,
/* Geometry shader
*/
TGSI_SEMANTIC_SAMPLEID,
TGSI_SEMANTIC_SAMPLEPOS,
TGSI_SEMANTIC_SAMPLEMASK,
+ TGSI_SEMANTIC_HELPER_INVOCATION,
/* Tessellation shaders
*/
if (!reg->array_id) {
assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL);
- return t->inputs[t->inputMapping[index]];
+ return t->inputs[t->inputMapping[index] + double_reg2];
}
else {
struct array_decl *decl = &t->input_arrays[reg->array_id-1];
assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT);
assert(t->inputs[slot].ArrayID == reg->array_id);
- return ureg_src_array_offset(t->inputs[slot], index - mesa_index);
+ return ureg_src_array_offset(t->inputs[slot], index + double_reg2 - mesa_index);
}
case PROGRAM_ADDRESS:
static struct ureg_dst
translate_dst(struct st_translate *t,
const st_dst_reg *dst_reg,
- bool saturate, bool clamp_color)
+ bool saturate)
{
struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
dst_reg->array_id);
if (saturate)
dst = ureg_saturate(dst);
- else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
- /* Clamp colors for ARB_color_buffer_float. */
- switch (t->procType) {
- case TGSI_PROCESSOR_VERTEX:
- /* This can only occur with a compatibility profile, which doesn't
- * support geometry shaders. */
- if (dst_reg->index == VARYING_SLOT_COL0 ||
- dst_reg->index == VARYING_SLOT_COL1 ||
- dst_reg->index == VARYING_SLOT_BFC0 ||
- dst_reg->index == VARYING_SLOT_BFC1) {
- dst = ureg_saturate(dst);
- }
- break;
-
- case TGSI_PROCESSOR_FRAGMENT:
- if (dst_reg->index == FRAG_RESULT_COLOR ||
- dst_reg->index >= FRAG_RESULT_DATA0) {
- dst = ureg_saturate(dst);
- }
- break;
- }
- }
if (dst_reg->reladdr != NULL) {
assert(dst_reg->file != PROGRAM_TEMPORARY);
static void
compile_tgsi_instruction(struct st_translate *t,
- const glsl_to_tgsi_instruction *inst,
- bool clamp_dst_color_output)
+ const glsl_to_tgsi_instruction *inst)
{
struct ureg_program *ureg = t->ureg;
GLuint i;
unsigned num_src;
unsigned tex_target;
- num_dst = num_inst_dst_regs(inst->op);
- num_src = num_inst_src_regs(inst->op);
+ num_dst = num_inst_dst_regs(inst);
+ num_src = num_inst_src_regs(inst);
for (i = 0; i < num_dst; i++)
dst[i] = translate_dst(t,
&inst->dst[i],
- inst->saturate,
- clamp_dst_color_output);
+ inst->saturate);
for (i = 0; i < num_src; i++)
src[i] = translate_src(t, &inst->src[i]);
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXP:
case TGSI_OPCODE_TXQ:
+ case TGSI_OPCODE_TXQS:
case TGSI_OPCODE_TXF:
case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXB2:
t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
}
-static void
-emit_edgeflags(struct st_translate *t)
-{
- struct ureg_program *ureg = t->ureg;
- struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
- struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
-
- ureg_MOV(ureg, edge_dst, edge_src);
-}
-
static bool
find_array(unsigned attr, struct array_decl *arrays, unsigned count,
unsigned *array_id, unsigned *array_size)
const GLuint outputMapping[],
const GLuint outputSlotToAttr[],
const ubyte outputSemanticName[],
- const ubyte outputSemanticIndex[],
- boolean passthrough_edgeflags,
- boolean clamp_color)
+ const ubyte outputSemanticIndex[])
{
struct st_translate *t;
unsigned i;
TGSI_SEMANTIC_BASEVERTEX);
assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] ==
TGSI_SEMANTIC_TESSCOORD);
+ assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] ==
+ TGSI_SEMANTIC_HELPER_INVOCATION);
t = CALLOC_STRUCT(st_translate);
if (!t) {
t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
}
}
- if (passthrough_edgeflags)
- emit_edgeflags(t);
}
/* Declare address register.
*/
{
GLbitfield sysInputs = proginfo->SystemValuesRead;
- unsigned numSys = 0;
+
for (i = 0; sysInputs; i++) {
if (sysInputs & (1 << i)) {
unsigned semName = _mesa_sysval_to_semantic[i];
- t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
+
+ t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
+
if (semName == TGSI_SEMANTIC_INSTANCEID ||
semName == TGSI_SEMANTIC_VERTEXID) {
/* From Gallium perspective, these system values are always
t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
}
}
- numSys++;
+
sysInputs &= ~(1 << i);
}
}
unsigned num_ubos = program->shader->NumUniformBlocks;
for (i = 0; i < num_ubos; i++) {
- unsigned size = program->shader->UniformBlocks[i].UniformBufferSize;
+ unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize;
unsigned num_const_vecs = (size + 15) / 16;
unsigned first, last;
assert(num_const_vecs > 0);
*/
foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
set_insn_start(t, ureg_get_instruction_number(ureg));
- compile_tgsi_instruction(t, inst, clamp_color);
+ compile_tgsi_instruction(t, inst);
}
/* Fix up all emitted labels:
#if 0
/* Print out some information (for debugging purposes) used by the
* optimization passes. */
- for (i = 0; i < v->next_temp; i++) {
- int fr = v->get_first_temp_read(i);
- int fw = v->get_first_temp_write(i);
- int lr = v->get_last_temp_read(i);
- int lw = v->get_last_temp_write(i);
-
- printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
- assert(fw <= fr);
+ {
+ int i;
+ int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
+ int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
+ int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
+ int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
+
+ for (i = 0; i < v->next_temp; i++) {
+ first_writes[i] = -1;
+ first_reads[i] = -1;
+ last_writes[i] = -1;
+ last_reads[i] = -1;
+ }
+ v->get_first_temp_read(first_reads);
+ v->get_last_temp_read_first_temp_write(last_reads, first_writes);
+ v->get_last_temp_write(last_writes);
+ for (i = 0; i < v->next_temp; i++)
+ printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
+ first_writes[i],
+ last_reads[i],
+ last_writes[i]);
+ ralloc_free(first_writes);
+ ralloc_free(first_reads);
+ ralloc_free(last_writes);
+ ralloc_free(last_reads);
}
#endif
do_set_program_inouts(shader->ir, prog, shader->Stage);
shrink_array_declarations(v->input_arrays, v->num_input_arrays,
- prog->InputsRead, prog->PatchInputsRead);
+ prog->InputsRead, prog->DoubleInputsRead, prog->PatchInputsRead);
shrink_array_declarations(v->output_arrays, v->num_output_arrays,
- prog->OutputsWritten, prog->PatchOutputsWritten);
+ prog->OutputsWritten, 0ULL, prog->PatchOutputsWritten);
count_resources(v, prog);
/* This must be done before the uniform storage is associated. */
_mesa_reference_program(ctx, &shader->Program, prog);
+ /* Avoid reallocation of the program parameter list, because the uniform
+ * storage is only associated with the original parameter list.
+ * This should be enough for Bitmap and DrawPixels constants.
+ */
+ _mesa_reserve_parameter_storage(prog->Parameters, 8);
+
/* This has to be done last. Any operation the can cause
* prog->ParameterValues to get reallocated (e.g., anything that adds a
* program constant) has to happen before creating this linkage.
LOWER_PACK_SNORM_4x8 |
LOWER_UNPACK_SNORM_4x8 |
LOWER_UNPACK_UNORM_4x8 |
- LOWER_PACK_UNORM_4x8 |
- LOWER_PACK_HALF_2x16 |
- LOWER_UNPACK_HALF_2x16;
+ LOWER_PACK_UNORM_4x8;
if (ctx->Extensions.ARB_gpu_shader5)
- lower_inst |= LOWER_PACK_USE_BFI;
+ lower_inst |= LOWER_PACK_USE_BFI |
+ LOWER_PACK_USE_BFE;
+ if (!ctx->st->has_half_float_packing)
+ lower_inst |= LOWER_PACK_HALF_2x16 |
+ LOWER_UNPACK_HALF_2x16;
lower_packing_builtins(ir, lower_inst);
}
(!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
(options->EmitNoSat ? SAT_TO_CLAMP : 0));
- lower_ubo_reference(prog->_LinkedShaders[i], ir);
do_vec_index_to_cond_assign(ir);
lower_vector_insert(ir, true);
lower_quadop_vector(ir, false);