*/
dst_reg output_reg[VERT_RESULT_MAX];
int uniform_size[MAX_UNIFORMS];
+ int uniform_vector_size[MAX_UNIFORMS];
int uniforms;
struct hash_table *variable_ht;
void reg_allocate_trivial();
void reg_allocate();
void move_grf_array_access_to_scratch();
+ void move_uniform_array_access_to_pull_constants();
void calculate_live_intervals();
bool dead_code_eliminate();
bool virtual_grf_interferes(int a, int b);
src_reg get_scratch_offset(vec4_instruction *inst,
src_reg *reladdr, int reg_offset);
+ src_reg get_pull_constant_offset(vec4_instruction *inst,
+ src_reg *reladdr, int reg_offset);
void emit_scratch_read(vec4_instruction *inst,
dst_reg dst,
src_reg orig_src,
src_reg temp,
dst_reg orig_dst,
int base_offset);
+ void emit_pull_constant_load(vec4_instruction *inst,
+ dst_reg dst,
+ src_reg orig_src,
+ int base_offset);
GLboolean try_emit_sat(ir_expression *ir);
void generate_scratch_read(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg index);
+ void generate_pull_constant_load(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index);
};
} /* namespace brw */
* matter what, or the GPU would hang.
*/
if (intel->gen < 6 && this->uniforms == 0) {
- this->uniform_size[this->uniforms] = 1;
+ this->uniform_vector_size[this->uniforms] = 1;
for (unsigned int i = 0; i < 4; i++) {
unsigned int slot = this->uniforms * 4 + i;
brw_reg = brw_abs(brw_reg);
if (src[i].negate)
brw_reg = negate(brw_reg);
+
+ /* This should have been moved to pull constants. */
+ assert(!src[i].reladdr);
break;
case HW_REG:
false /* commit */);
}
+void
+vec4_visitor::generate_pull_constant_load(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg index)
+{
+ struct brw_reg header = brw_vec8_grf(0, 0);
+
+ gen6_resolve_implied_move(p, &header, inst->base_mrf);
+
+ brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D),
+ index);
+
+ uint32_t msg_type;
+
+ if (intel->gen >= 6)
+ msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ else if (intel->gen == 5 || intel->is_g4x)
+ msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+ else
+ msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+
+ /* Each of the 8 channel enables is considered for whether each
+ * dword is written.
+ */
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, header);
+ brw_set_dp_read_message(p, send,
+ SURF_INDEX_VERT_CONST_BUFFER,
+ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+ msg_type,
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 2, /* mlen */
+ 1 /* rlen */);
+}
+
void
vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
struct brw_reg dst,
generate_scratch_write(inst, dst, src[0], src[1]);
break;
+ case VS_OPCODE_PULL_CONSTANT_LOAD:
+ generate_pull_constant_load(inst, dst, src[0]);
+ break;
+
default:
if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
fail("unsupported opcode in `%s' in VS\n",
* often do repeated subexpressions for those.
*/
move_grf_array_access_to_scratch();
+ move_uniform_array_access_to_pull_constants();
bool progress;
do {
c->prog_data.param[this->uniforms * 4 + i] = &zero;
}
- this->uniform_size[this->uniforms] = type->vector_elements;
+ /* Track the size of this uniform vector, for future packing of
+ * uniforms.
+ */
+ this->uniform_vector_size[this->uniforms] = type->vector_elements;
this->uniforms++;
return 1;
(gl_state_index *)slots[i].tokens);
float *values = &this->vp->Base.Parameters->ParameterValues[index][0].f;
- this->uniform_size[this->uniforms] = 0;
+ this->uniform_vector_size[this->uniforms] = 0;
/* Add each of the unique swizzled channels of the element.
* This will end up matching the size of the glsl_type of this field.
*/
c->prog_data.param[this->uniforms * 4 + j] = &values[swiz];
if (swiz <= last_swiz)
- this->uniform_size[this->uniforms]++;
+ this->uniform_vector_size[this->uniforms]++;
}
this->uniforms++;
}
case ir_var_uniform:
reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
+ /* Track how big the whole uniform variable is, in case we need to put a
+ * copy of its data into pull constants for array access.
+ */
+ this->uniform_size[this->uniforms] = type_size(ir->type);
+
if (!strncmp(ir->name, "gl_", 3)) {
setup_builtin_uniform_values(ir);
} else {
}
}
+src_reg
+vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
+ src_reg *reladdr, int reg_offset)
+{
+ if (reladdr) {
+ src_reg index = src_reg(this, glsl_type::int_type);
+
+ vec4_instruction *add = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_ADD,
+ dst_reg(index),
+ *reladdr,
+ src_reg(reg_offset));
+ add->ir = inst->ir;
+ add->annotation = inst->annotation;
+ inst->insert_before(add);
+
+ /* Pre-gen6, the message header uses byte offsets instead of vec4
+ * (16-byte) offset units.
+ */
+ if (intel->gen < 6) {
+ vec4_instruction *mul = new(mem_ctx) vec4_instruction(this,
+ BRW_OPCODE_MUL,
+ dst_reg(index),
+ index,
+ src_reg(16));
+ mul->ir = inst->ir;
+ mul->annotation = inst->annotation;
+ inst->insert_before(mul);
+ }
+
+ return index;
+ } else {
+ int message_header_scale = intel->gen < 6 ? 16 : 1;
+ return src_reg(reg_offset * message_header_scale);
+ }
+}
+
/**
* Emits an instruction before @inst to load the value named by @orig_src
* from scratch space at @base_offset to @temp.
}
}
+/**
+ * Emits an instruction before @inst to load the value named by @orig_src
+ * from the pull constant buffer (surface) at @base_offset to @temp.
+ */
+void
+vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
+ dst_reg temp, src_reg orig_src,
+ int base_offset)
+{
+ int reg_offset = base_offset + orig_src.reg_offset;
+ src_reg index = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
+ vec4_instruction *load;
+
+ load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
+ temp, index);
+ load->annotation = inst->annotation;
+ load->ir = inst->ir;
+ load->base_mrf = 14;
+ load->mlen = 1;
+ inst->insert_before(load);
+}
+
+/**
+ * Implements array access of uniforms by inserting a
+ * PULL_CONSTANT_LOAD instruction.
+ *
+ * Unlike temporary GRF array access (where we don't support it due to
+ * the difficulty of doing relative addressing on instruction
+ * destinations), we could potentially do array access of uniforms
+ * that were loaded in GRF space as push constants. In real-world
+ * usage we've seen, though, the arrays being used are always larger
+ * than we could load as push constants, so just always move all
+ * uniform array access out to a pull constant buffer.
+ */
+void
+vec4_visitor::move_uniform_array_access_to_pull_constants()
+{
+ int pull_constant_loc[this->uniforms];
+
+ for (int i = 0; i < this->uniforms; i++) {
+ pull_constant_loc[i] = -1;
+ }
+
+ /* Walk through and find array access of uniforms. Put a copy of that
+ * uniform in the pull constant buffer.
+ *
+ * Note that we don't move constant-indexed accesses to arrays. No
+ * testing has been done of the performance impact of this choice.
+ */
+ foreach_list_safe(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+
+ for (int i = 0 ; i < 3; i++) {
+ if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
+ continue;
+
+ int uniform = inst->src[i].reg;
+
+ /* If this array isn't already present in the pull constant buffer,
+ * add it.
+ */
+ if (pull_constant_loc[uniform] == -1) {
+ const float **values = &prog_data->param[uniform * 4];
+
+ pull_constant_loc[uniform] = prog_data->nr_pull_params;
+
+ for (int j = 0; j < uniform_size[uniform] * 4; j++) {
+ prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
+ }
+ }
+
+ /* Set up the annotation tracking for new generated instructions. */
+ base_ir = inst->ir;
+ current_annotation = inst->annotation;
+
+ dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+
+ emit_pull_constant_load(inst, temp, inst->src[i],
+ pull_constant_loc[uniform]);
+
+ inst->src[i].file = temp.file;
+ inst->src[i].reg = temp.reg;
+ inst->src[i].reg_offset = temp.reg_offset;
+ inst->src[i].reladdr = NULL;
+ }
+ }
+}
vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
struct gl_shader_program *prog,