*/
int
ir3_delayslots(struct ir3_instruction *assigner,
- struct ir3_instruction *consumer, unsigned n)
+ struct ir3_instruction *consumer, unsigned n, bool soft)
{
if (ignore_dep(assigner, consumer, n))
return 0;
if (is_meta(assigner) || is_meta(consumer))
return 0;
- if (writes_addr(assigner))
+ if (writes_addr0(assigner) || writes_addr1(assigner))
return 6;
+ /* On a6xx, it takes the number of delay slots to get a SFU result
+ * back (ie. using nop's instead of (ss) is:
+ *
+ * 8 - single warp
+ * 9 - two warps
+ * 10 - four warps
+ *
+ * and so on. Not quite sure where it tapers out (ie. how many
+ * warps share an SFU unit). But 10 seems like a reasonable #
+ * to choose:
+ */
+ if (soft && is_sfu(assigner))
+ return 10;
+
/* handled via sync flags: */
if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
return 0;
* find the worst case (shortest) distance (only possible after
* individual blocks are all scheduled)
*/
-unsigned
-ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
+static unsigned
+distance(struct ir3_block *block, struct ir3_instruction *instr,
unsigned maxd, bool pred)
{
unsigned d = 0;
struct ir3_block *pred = (struct ir3_block *)entry->key;
unsigned n;
- n = ir3_distance(pred, instr, min, pred);
+ n = distance(pred, instr, min, pred);
min = MIN2(min, n);
}
delay = MAX2(delay, d);
}
} else {
- if (soft) {
- if (is_sfu(assigner)) {
- delay = 4;
- } else {
- delay = ir3_delayslots(assigner, consumer, srcn);
- }
- } else {
- delay = ir3_delayslots(assigner, consumer, srcn);
- }
- delay -= ir3_distance(block, assigner, delay, pred);
+ delay = ir3_delayslots(assigner, consumer, srcn, soft);
+ delay -= distance(block, assigner, delay, pred);
}
return delay;
return delay;
}
+
+/**
+ * Remove nop instructions. The scheduler can insert placeholder nop's
+ * so that ir3_delay_calc() can account for nop's that won't be needed
+ * due to nop's triggered by a previous instruction. However, before
+ * legalize, we want to remove these. The legalize pass can insert
+ * some nop's if needed to hold (for example) sync flags. This final
+ * remaining nops are inserted by legalize after this.
+ */
+void
+ir3_remove_nops(struct ir3 *ir)
+{
+ foreach_block (block, &ir->block_list) {
+ foreach_instr_safe (instr, &block->instr_list) {
+ if (instr->opc == OPC_NOP) {
+ list_del(&instr->node);
+ }
+ }
+ }
+
+}