Fixes glsl-fs-uniform-array-5, but not 6 which fails in ir_to_mesa.
GLuint id; /**< serial no. to identify frag progs, never re-used */
GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */
- GLboolean use_const_buffer;
-
/** for debugging, which texture units are referenced */
GLbitfield tex_units_used;
};
GLuint total_scratch;
GLuint nr_params; /**< number of float params/constants */
+ GLuint nr_pull_params;
GLboolean error;
/* Pointer to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
*/
- const GLfloat *param[BRW_MAX_CURBE];
+ const GLfloat *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+ const GLfloat *pull_param[MAX_UNIFORMS * 4];
};
struct brw_sf_prog_data {
void brw_oword_block_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
- int num_regs,
- GLuint offset);
+ uint32_t offset,
+ uint32_t bind_table_index);
-void brw_dp_READ_4( struct brw_compile *p,
- struct brw_reg dest,
- GLboolean relAddr,
- GLuint location,
- GLuint bind_table_index );
+void brw_oword_block_read_scratch(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ int num_regs,
+ GLuint offset);
+
+void brw_oword_block_write_scratch(struct brw_compile *p,
+ struct brw_reg mrf,
+ int num_regs,
+ GLuint offset);
void brw_dp_READ_4_vs( struct brw_compile *p,
struct brw_reg dest,
GLuint offset,
GLuint bind_table_index);
-void brw_oword_block_write(struct brw_compile *p,
- struct brw_reg mrf,
- int num_regs,
- GLuint offset);
-
/* If/else/endif. Works by manipulating the execution flags on each
* channel.
*/
* The offset must be aligned to oword size (16 bytes). Used for
* register spilling.
*/
-void brw_oword_block_write(struct brw_compile *p,
- struct brw_reg mrf,
- int num_regs,
- GLuint offset)
+void brw_oword_block_write_scratch(struct brw_compile *p,
+ struct brw_reg mrf,
+ int num_regs,
+ GLuint offset)
{
struct intel_context *intel = &p->brw->intel;
uint32_t msg_control;
* spilling.
*/
void
-brw_oword_block_read(struct brw_compile *p,
- struct brw_reg dest,
- struct brw_reg mrf,
- int num_regs,
- GLuint offset)
+brw_oword_block_read_scratch(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ int num_regs,
+ GLuint offset)
{
uint32_t msg_control;
int rlen;
}
}
-
/**
* Read a float[4] vector from the data port Data Cache (const buffer).
* Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
- * If relAddr is true, we'll do an indirect fetch using the address register.
*/
-void brw_dp_READ_4( struct brw_compile *p,
- struct brw_reg dest,
- GLboolean relAddr,
- GLuint location,
- GLuint bind_table_index )
+void brw_oword_block_read(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ uint32_t offset,
+ uint32_t bind_table_index)
{
- /* XXX: relAddr not implemented */
- GLuint msg_reg_nr = 1;
- {
- struct brw_reg b;
- brw_push_insn_state(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
- /* Setup MRF[1] with location/offset into const buffer */
- b = brw_message_reg(msg_reg_nr);
- b = retype(b, BRW_REGISTER_TYPE_UD);
- /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
- * when the docs say only dword[2] should be set. Hmmm. But it works.
- */
- brw_MOV(p, b, brw_imm_ud(location));
- brw_pop_insn_state(p);
- }
+ brw_push_insn_state(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
- {
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-
- insn->header.predicate_control = BRW_PREDICATE_NONE;
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditionalmod = msg_reg_nr;
- insn->header.mask_control = BRW_MASK_DISABLE;
-
- /* cast dest to a uword[8] vector */
- dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+ brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- brw_set_dest(insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ /* set message header global offset field (reg 0, element 2) */
+ brw_MOV(p,
+ retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ mrf.nr,
+ 2), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(offset));
- brw_set_dp_read_message(p->brw,
- insn,
- bind_table_index,
- 0, /* msg_control (0 means 1 Oword) */
- BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- 0, /* source cache = data cache */
- 1, /* msg_length */
- 1, /* response_length (1 Oword) */
- 0); /* eot */
- }
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.destreg__conditionalmod = mrf.nr;
+
+ /* cast dest to a uword[8] vector */
+ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+ brw_set_dest(insn, dest);
+ brw_set_src0(insn, brw_null_reg());
+
+ brw_set_dp_read_message(p->brw,
+ insn,
+ bind_table_index,
+ BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+ 0, /* source cache = data cache */
+ 1, /* msg_length */
+ 1, /* response_length (1 reg, 2 owords!) */
+ 0); /* eot */
+
+ brw_pop_insn_state(p);
}
+
/**
* Read float[4] constant(s) from VS constant buffer.
* For relative addressing, two float[4] constants will be read into 'dest'.
case GLSL_TYPE_BOOL:
vec_values = fp->Base.Parameters->ParameterValues[loc];
for (unsigned int i = 0; i < type->vector_elements; i++) {
+ assert(c->prog_data.nr_params < ARRAY_SIZE(c->prog_data.param));
c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
}
return 1;
brw_MOV(p,
retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
retype(src, BRW_REGISTER_TYPE_UD));
- brw_oword_block_write(p, brw_message_reg(inst->base_mrf), 1, inst->offset);
+ brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
+ inst->offset);
}
void
if (intel->gen == 4 && !intel->is_g4x)
brw_MOV(p, brw_null_reg(), dst);
- brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), 1,
- inst->offset);
+ brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
+ inst->offset);
+
+ if (intel->gen == 4 && !intel->is_g4x) {
+ /* gen4 errata: destination from a send can't be used as a
+ * destination until it's been read. Just read it so we don't
+ * have to worry.
+ */
+ brw_MOV(p, brw_null_reg(), dst);
+ }
+}
+
+
+void
+fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
+{
+ assert(inst->mlen != 0);
+
+ /* Clear any post destination dependencies that would be ignored by
+ * the block read. See the B-Spec for pre-gen5 send instruction.
+ *
+ * This could use a better solution, since texture sampling and
+ * math reads could potentially run into it as well -- anywhere
+ * that we have a SEND with a destination that is a register that
+ * was written but not read within the last N instructions (what's
+ * N? unsure). This is rare because of dead code elimination, but
+ * not impossible.
+ */
+ if (intel->gen == 4 && !intel->is_g4x)
+ brw_MOV(p, brw_null_reg(), dst);
+
+ brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
+ inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
if (intel->gen == 4 && !intel->is_g4x) {
/* gen4 errata: destination from a send can't be used as a
}
}
+/**
+ * Choose accesses from the UNIFORM file to demote to using the pull
+ * constant buffer.
+ *
+ * We allow a fragment shader to have more than the specified minimum
+ * maximum number of fragment shader uniform components (64). If
+ * there are too many of these, they'd fill up all of register space.
+ * So, this will push some of them out to the pull constant buffer and
+ * update the program to load them.
+ */
+void
+fs_visitor::setup_pull_constants()
+{
+ /* Only allow 16 registers (128 uniform components) as push constants. */
+ unsigned int max_uniform_components = 16 * 8;
+ if (c->prog_data.nr_params <= max_uniform_components)
+ return;
+
+ /* Just demote the end of the list. We could probably do better
+ * here, demoting things that are rarely used in the program first.
+ */
+ int pull_uniform_base = max_uniform_components;
+ int pull_uniform_count = c->prog_data.nr_params - pull_uniform_base;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file != UNIFORM)
+ continue;
+
+ int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ if (uniform_nr < pull_uniform_base)
+ continue;
+
+ fs_reg dst = fs_reg(this, glsl_type::float_type);
+ fs_inst *pull = new(mem_ctx) fs_inst(FS_OPCODE_PULL_CONSTANT_LOAD,
+ dst);
+ pull->offset = ((uniform_nr - pull_uniform_base) * 4) & ~15;
+ pull->ir = inst->ir;
+ pull->annotation = inst->annotation;
+ pull->base_mrf = 14;
+ pull->mlen = 1;
+
+ inst->insert_before(pull);
+
+ inst->src[i].file = GRF;
+ inst->src[i].reg = dst.reg;
+ inst->src[i].reg_offset = 0;
+ inst->src[i].smear = (uniform_nr - pull_uniform_base) & 3;
+ }
+ }
+
+ for (int i = 0; i < pull_uniform_count; i++) {
+ c->prog_data.pull_param[i] = c->prog_data.param[pull_uniform_base + i];
+ }
+ c->prog_data.nr_params -= pull_uniform_count;
+ c->prog_data.nr_pull_params = pull_uniform_count;
+}
+
void
fs_visitor::calculate_live_intervals()
{
scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
scan_inst->src[i].abs |= inst->src[0].abs;
scan_inst->src[i].negate ^= inst->src[0].negate;
+ scan_inst->src[i].smear = inst->src[0].smear;
}
}
}
inst->predicated ||
inst->dst.file != MRF || inst->src[0].file != GRF ||
inst->dst.type != inst->src[0].type ||
- inst->src[0].abs || inst->src[0].negate)
+ inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1)
continue;
/* Can't compute-to-MRF this GRF if someone else was going to
case GRF:
case ARF:
case MRF:
- brw_reg = brw_vec8_reg(reg->file,
- reg->hw_reg, 0);
+ if (reg->smear == -1) {
+ brw_reg = brw_vec8_reg(reg->file,
+ reg->hw_reg, 0);
+ } else {
+ brw_reg = brw_vec1_reg(reg->file,
+ reg->hw_reg, reg->smear);
+ }
brw_reg = retype(brw_reg, reg->type);
break;
case IMM:
generate_unspill(inst, dst);
break;
+ case FS_OPCODE_PULL_CONSTANT_LOAD:
+ generate_pull_constant_load(inst, dst);
+ break;
+
case FS_OPCODE_FB_WRITE:
generate_fb_write(inst);
break;
v.emit_fb_writes();
v.split_virtual_grfs();
+ v.setup_pull_constants();
v.assign_curb_setup();
v.assign_urb_setup();
FS_OPCODE_DISCARD_AND,
FS_OPCODE_SPILL,
FS_OPCODE_UNSPILL,
+ FS_OPCODE_PULL_CONSTANT_LOAD,
};
this->negate = 0;
this->abs = 0;
this->hw_reg = -1;
+ this->smear = -1;
}
/** Generic unset register constructor. */
bool negate;
bool abs;
struct brw_reg fixed_hw_reg;
+ int smear; /* -1, or a channel of the reg to smear to all channels. */
/** Value for file == BRW_IMMMEDIATE_FILE */
union {
int choose_spill_reg(struct ra_graph *g);
void spill_reg(int spill_reg);
void split_virtual_grfs();
+ void setup_pull_constants();
void calculate_live_intervals();
bool propagate_constants();
bool register_coalesce();
void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
void generate_spill(fs_inst *inst, struct brw_reg src);
void generate_unspill(fs_inst *inst, struct brw_reg dst);
+ void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst);
void emit_dummy_fs();
fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
spill_src.reg_offset = 0;
spill_src.abs = false;
spill_src.negate = false;
+ spill_src.smear = -1;
for (int chan = 0; chan < size; chan++) {
fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask }
send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 }
*/
- brw_oword_block_write(p, brw_message_reg(1), 2, slot);
+ brw_oword_block_write_scratch(p, brw_message_reg(1), 2, slot);
}
/* use a real constant buffer, or just use a section of the GRF? */
/* XXX this heuristic may need adjustment... */
- if ((nr_params + nr_temps) * 4 + reg_index > 80)
- c->fp->use_const_buffer = GL_TRUE;
- else
- c->fp->use_const_buffer = GL_FALSE;
- /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
+ if ((nr_params + nr_temps) * 4 + reg_index > 80) {
+ for (i = 0; i < nr_params; i++) {
+ float *pv = c->fp->program.Base.Parameters->ParameterValues[i];
+ for (j = 0; j < 4; j++) {
+ c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j];
+ c->prog_data.nr_pull_params++;
+ }
+ }
- if (c->fp->use_const_buffer) {
- /* We'll use a real constant buffer and fetch constants from
- * it with a dataport read message.
- */
+ c->prog_data.nr_params = 0;
+ }
+ /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
- /* number of float constants in CURBE */
- c->prog_data.nr_params = 0;
- }
- else {
+ if (!c->prog_data.nr_pull_params) {
const struct gl_program_parameter_list *plist =
c->fp->program.Base.Parameters;
int index = 0;
* They'll be found in these registers.
* XXX alloc these on demand!
*/
- if (c->fp->use_const_buffer) {
+ if (c->prog_data.nr_pull_params) {
for (i = 0; i < 3; i++) {
c->current_const[i].index = -1;
c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
#endif
/* need to fetch the constant now */
- brw_dp_READ_4(p,
- c->current_const[i].reg, /* writeback dest */
- src->RelAddr, /* relative indexing? */
- 16 * src->Index, /* byte offset */
- SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
- );
+ brw_oword_block_read(p,
+ c->current_const[i].reg,
+ brw_message_reg(1),
+ 16 * src->Index,
+ SURF_INDEX_FRAG_CONST_BUFFER);
}
}
}
}
}
- if (c->fp->use_const_buffer &&
+ if (c->prog_data.nr_pull_params &&
(src->File == PROGRAM_STATE_VAR ||
src->File == PROGRAM_CONSTANT ||
src->File == PROGRAM_UNIFORM)) {
#endif
/* fetch any constants that this instruction needs */
- if (c->fp->use_const_buffer)
+ if (c->prog_data.nr_pull_params)
fetch_constants(c, inst);
if (inst->Opcode != OPCODE_ARL) {
struct intel_context *intel = &brw->intel;
struct brw_fragment_program *fp =
(struct brw_fragment_program *) brw->fragment_program;
- const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
- const int size = params->NumParameters * 4 * sizeof(GLfloat);
+ const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
+ float *constants;
+ unsigned int i;
_mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
/* BRW_NEW_FRAGMENT_PROGRAM */
- if (!fp->use_const_buffer) {
+ if (brw->wm.prog_data->nr_pull_params == 0) {
if (brw->wm.const_bo) {
drm_intel_bo_unreference(brw->wm.const_bo);
brw->wm.const_bo = NULL;
}
drm_intel_bo_unreference(brw->wm.const_bo);
- brw->wm.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+ brw->wm.const_bo = drm_intel_bo_alloc(intel->bufmgr, "WM const bo",
size, 64);
/* _NEW_PROGRAM_CONSTANTS */
- drm_intel_bo_subdata(brw->wm.const_bo, 0, size, params->ParameterValues);
+ drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
+ constants = brw->wm.const_bo->virtual;
+ for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
+ constants[i] = *brw->wm.prog_data->pull_param[i];
+ }
+ drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
+
+ brw->state.dirty.brw |= BRW_NEW_WM_CONSTBUF;
}
const struct brw_tracked_state brw_wm_constants = {
brw_fragment_program_const(brw->fragment_program);
uint32_t dw2, dw4, dw5, dw6;
- if (fp->use_const_buffer || brw->wm.prog_data->nr_params == 0) {
+ if (brw->wm.prog_data->nr_params == 0) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2));