+ /* We need to emit our TLB reads after we have acquired the scoreboard
+ * lock, or the GPU will hang. Usually, we do our scoreboard locking on
+ * the last thread switch to improve parallelism, however, that is only
+ * guaranteed to happen before the tlb color writes.
+ *
+ * To fix that, we make sure we always emit a thread switch before the
+ * first tlb color read. If that happens to be the last thread switch
+ * we emit, then everything is fine, but otherwsie, if any code after
+ * this point needs to emit additional thread switches, then we will
+ * switch the strategy to locking the scoreboard on the first thread
+ * switch instead -- see vir_emit_thrsw().
+ */
+ if (!c->emitted_tlb_load) {
+ if (!c->last_thrsw_at_top_level) {
+ assert(c->devinfo->ver >= 41);
+ vir_emit_thrsw(c);
+ }
+
+ c->emitted_tlb_load = true;
+ }
+