}
brw_inst *
-gen6_HALT(struct brw_codegen *p)
+brw_HALT(struct brw_codegen *p)
{
const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
insn = next_insn(p, BRW_OPCODE_HALT);
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- if (devinfo->gen < 8) {
+ if (devinfo->gen < 6) {
+ /* From the Gen4 PRM:
+ *
+ * "IP register must be put (for example, by the assembler) at <dst>
+ * and <src0> locations.
+ */
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0)); /* exitcode updated later. */
+ } else if (devinfo->gen < 8) {
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
} else if (devinfo->gen < 12) {
bool
fs_generator::patch_discard_jumps_to_fb_writes()
{
- if (devinfo->gen < 6 || this->discard_halt_patches.is_empty())
+ if (this->discard_halt_patches.is_empty())
return false;
int scale = brw_jump_scale(p->devinfo);
- /* There is a somewhat strange undocumented requirement of using
- * HALT, according to the simulator. If some channel has HALTed to
- * a particular UIP, then by the end of the program, every channel
- * must have HALTed to that UIP. Furthermore, the tracking is a
- * stack, so you can't do the final halt of a UIP after starting
- * halting to a new UIP.
- *
- * Symptoms of not emitting this instruction on actual hardware
- * included GPU hangs and sparkly rendering on the piglit discard
- * tests.
- */
- brw_inst *last_halt = gen6_HALT(p);
- brw_inst_set_uip(p->devinfo, last_halt, 1 * scale);
- brw_inst_set_jip(p->devinfo, last_halt, 1 * scale);
+ if (devinfo->gen >= 6) {
+ /* There is a somewhat strange undocumented requirement of using
+ * HALT, according to the simulator. If some channel has HALTed to
+ * a particular UIP, then by the end of the program, every channel
+ * must have HALTed to that UIP. Furthermore, the tracking is a
+ * stack, so you can't do the final halt of a UIP after starting
+ * halting to a new UIP.
+ *
+ * Symptoms of not emitting this instruction on actual hardware
+ * included GPU hangs and sparkly rendering on the piglit discard
+ * tests.
+ */
+ brw_inst *last_halt = brw_HALT(p);
+ brw_inst_set_uip(p->devinfo, last_halt, 1 * scale);
+ brw_inst_set_jip(p->devinfo, last_halt, 1 * scale);
+ }
int ip = p->nr_insn;
brw_inst *patch = &p->store[patch_ip->ip];
assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT);
- /* HALT takes a half-instruction distance from the pre-incremented IP. */
- brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale);
+ if (devinfo->gen >= 6) {
+ /* HALT takes a half-instruction distance from the pre-incremented IP. */
+ brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale);
+ } else {
+ brw_set_src1(p, patch, brw_imm_d((ip - patch_ip->ip) * scale));
+ }
}
this->discard_halt_patches.make_empty();
+
+ if (devinfo->gen < 6) {
+ /* From the g965 PRM:
+ *
+ * "As DMask is not automatically reloaded into AMask upon completion
+ * of this instruction, software has to manually restore AMask upon
+ * completion."
+ *
+ * DMask lives in the bottom 16 bits of sr0.1.
+ */
+ brw_inst *reset = brw_MOV(p, brw_mask_reg(BRW_AMASK),
+ retype(brw_sr0_reg(1), BRW_REGISTER_TYPE_UW));
+ brw_inst_set_exec_size(devinfo, reset, BRW_EXECUTE_1);
+ brw_inst_set_mask_control(devinfo, reset, BRW_MASK_DISABLE);
+ brw_inst_set_qtr_control(devinfo, reset, BRW_COMPRESSION_NONE);
+ brw_inst_set_thread_control(devinfo, reset, BRW_THREAD_SWITCH);
+ }
+
+ if (devinfo->gen == 4 && !devinfo->is_g4x) {
+ /* From the g965 PRM:
+ *
+ * "[DevBW, DevCL] Erratum: The subfields in mask stack register are
+ * reset to zero during graphics reset, however, they are not
+ * initialized at thread dispatch. These subfields will retain the
+ * values from the previous thread. Software should make sure the
+ * mask stack is empty (reset to zero) before terminating the thread.
+ * In case that this is not practical, software may have to reset the
+ * mask stack at the beginning of each kernel, which will impact the
+ * performance."
+ *
+ * Luckily we can rely on:
+ *
+ * "[DevBW, DevCL] This register access restriction is not
+ * applicable, hardware does ensure execution pipeline coherency,
+ * when a mask stack register is used as an explicit source and/or
+ * destination."
+ */
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+
+ brw_set_default_exec_size(p, BRW_EXECUTE_2);
+ brw_MOV(p, vec2(brw_mask_stack_depth_reg(0)), brw_imm_uw(0));
+
+ brw_set_default_exec_size(p, BRW_EXECUTE_16);
+ /* Reset the if stack. */
+ brw_MOV(p, retype(brw_mask_stack_reg(0), BRW_REGISTER_TYPE_UW),
+ brw_imm_uw(0));
+
+ brw_pop_insn_state(p);
+ }
+
return true;
}
void
fs_generator::generate_discard_jump(fs_inst *)
{
- assert(devinfo->gen >= 6);
-
/* This HALT will be patched up at FB write time to point UIP at the end of
* the program, and at brw_uip_jip() JIP will be set to the end of the
* current block (or the program).
*/
this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
- gen6_HALT(p);
+ brw_HALT(p);
}
void