* ignore uniforms accesses, because qir_reorder_uniforms() happens
* after this.
*/
- for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ for (int i = 0; i < qir_get_nsrc(inst); i++) {
switch (inst->src[i].file) {
case QFILE_TEMP:
add_dep(dir,
add_dep(dir, state->last_vary_read, n);
break;
- case QOP_TEX_S:
- case QOP_TEX_T:
- case QOP_TEX_R:
- case QOP_TEX_B:
- case QOP_TEX_DIRECT:
- /* Texturing setup gets scheduled in order, because
- * the uniforms referenced by them have to land in a
- * specific order.
- */
- add_write_dep(dir, &state->last_tex_coord, n);
- break;
-
case QOP_TEX_RESULT:
/* Results have to be fetched in order. */
add_write_dep(dir, &state->last_tex_result, n);
add_write_dep(dir, &state->last_tlb, n);
break;
+ case QFILE_TEX_S_DIRECT:
+ case QFILE_TEX_S:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
+ /* Texturing setup gets scheduled in order, because
+ * the uniforms referenced by them have to land in a
+ * specific order.
+ */
+ add_write_dep(dir, &state->last_tex_coord, n);
+ break;
+
default:
break;
}
calculate_deps(&state, n);
- for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ for (int i = 0; i < qir_get_nsrc(inst); i++) {
switch (inst->src[i].file) {
case QFILE_UNIF:
add_dep(state.dir, state.last_uniforms_reset, n);
}
}
- switch (inst->op) {
- case QOP_TEX_S:
- case QOP_TEX_T:
- case QOP_TEX_R:
- case QOP_TEX_B:
- case QOP_TEX_DIRECT:
+ switch (inst->dst.file) {
+ case QFILE_TEX_S_DIRECT:
+ case QFILE_TEX_S:
+ case QFILE_TEX_T:
+ case QFILE_TEX_R:
+ case QFILE_TEX_B:
/* From the VC4 spec:
*
* "The TFREQ input FIFO holds two full lots of s,
* If the texture result fifo is full, block adding
* any more to it until the last QOP_TEX_RESULT.
*/
- if (inst->op == QOP_TEX_S ||
- inst->op == QOP_TEX_DIRECT) {
+ if (inst->dst.file == QFILE_TEX_S ||
+ inst->dst.file == QFILE_TEX_S_DIRECT) {
if (state.tfrcv_count ==
(c->fs_threaded ? 2 : 4))
block_until_tex_result(&state, n);
state.tfreq_count++;
break;
+ default:
+ break;
+ }
+
+ switch (inst->op) {
case QOP_TEX_RESULT:
/* Results have to be fetched after the
* coordinate setup. Note that we're assuming
break;
default:
- assert(!qir_is_tex(inst));
break;
}
}
state->temp_writes[inst->dst.index] == 1)
cost--;
- for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
- if (inst->src[i].file == QFILE_TEMP &&
- !BITSET_TEST(state->temp_live, inst->src[i].index)) {
- cost++;
+ for (int i = 0; i < qir_get_nsrc(inst); i++) {
+ if (inst->src[i].file != QFILE_TEMP ||
+ BITSET_TEST(state->temp_live, inst->src[i].index)) {
+ continue;
+ }
+
+ bool already_counted = false;
+ for (int j = 0; j < i; j++) {
+ if (inst->src[i].file == inst->src[j].file &&
+ inst->src[i].index == inst->src[j].index) {
+ already_counted = true;
+ }
}
+ if (!already_counted)
+ cost++;
}
return cost;
static uint32_t
latency_between(struct schedule_node *before, struct schedule_node *after)
{
- if ((before->inst->op == QOP_TEX_S ||
- before->inst->op == QOP_TEX_DIRECT) &&
+ if ((before->inst->dst.file == QFILE_TEX_S ||
+ before->inst->dst.file == QFILE_TEX_S_DIRECT) &&
after->inst->op == QOP_TEX_RESULT)
return 100;
+ switch (before->inst->op) {
+ case QOP_RCP:
+ case QOP_RSQ:
+ case QOP_EXP2:
+ case QOP_LOG2:
+ for (int i = 0; i < qir_get_nsrc(after->inst); i++) {
+ if (after->inst->src[i].file ==
+ before->inst->dst.file &&
+ after->inst->src[i].index ==
+ before->inst->dst.index) {
+ /* There are two QPU delay slots before we can
+ * read a math result, which could be up to 4
+ * QIR instructions if they packed well.
+ */
+ return 4;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
return 1;
}
compute_delay(n->children[i]);
n->delay = MAX2(n->delay,
n->children[i]->delay +
- latency_between(n, n->children[i]));
+ latency_between(n->children[i], n));
}
}
}
child->unblocked_time = MAX2(child->unblocked_time,
state->time +
- latency_between(chosen,
- child));
+ latency_between(child,
+ chosen));
child->parent_count--;
if (child->parent_count == 0)
list_add(&child->link, &state->worklist);
}
/* Update our tracking of register pressure. */
- for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ for (int i = 0; i < qir_get_nsrc(inst); i++) {
if (inst->src[i].file == QFILE_TEMP)
BITSET_SET(state->temp_live, inst->src[i].index);
}