#define SF 3
#define CS 4
-/* XXX: Are the min_entry_size numbers useful?
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one. So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5. There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
* XXX: Verify min_nr_entries, esp for VS.
* XXX: Verify SF min_entry_size.
*/
} limits[CS+1] = {
{ 16, 32, 1, 5 }, /* vs */
{ 4, 8, 1, 5 }, /* gs */
- { 6, 8, 1, 5 }, /* clp */
+ { 5, 10, 1, 5 }, /* clp */
{ 1, 8, 1, 12 }, /* sf */
{ 1, 4, 1, 32 } /* cs */
};
-static GLboolean check_urb_layout( struct brw_context *brw )
+static bool check_urb_layout(struct brw_context *brw)
{
brw->urb.vs_start = 0;
brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
- return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw);
+ return brw->urb.cs_start + brw->urb.nr_cs_entries *
+ brw->urb.csize <= brw->urb.size;
}
/* Most minimal update, forces re-emit of URB fence packet after GS
static void recalculate_urb_fence( struct brw_context *brw )
{
GLuint csize = brw->curbe.total_size;
- GLuint vsize = brw->vs.prog_data->urb_entry_size;
+ GLuint vsize = brw->vs.prog_data->base.urb_entry_size;
GLuint sfsize = brw->sf.prog_data->urb_entry_size;
if (csize < limits[CS].min_entry_size)
brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
-
+
+ brw->urb.constrained = 0;
+
+ if (brw->gen == 5) {
+ brw->urb.nr_vs_entries = 128;
+ brw->urb.nr_sf_entries = 48;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+ }
+ } else if (brw->is_g4x) {
+ brw->urb.nr_vs_entries = 64;
+ if (check_urb_layout(brw)) {
+ goto done;
+ } else {
+ brw->urb.constrained = 1;
+ brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+ }
+ }
+
if (!check_urb_layout(brw)) {
brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
* entries and the values for minimum nr of entries
* provided above.
*/
- _mesa_printf("couldn't calculate URB layout!\n");
+ printf("couldn't calculate URB layout!\n");
exit(1);
}
- if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
- _mesa_printf("URB CONSTRAINED\n");
+ if (unlikely(INTEL_DEBUG & (DEBUG_URB|DEBUG_PERF)))
+ printf("URB CONSTRAINED\n");
}
- else
- brw->urb.constrained = 0;
- if (INTEL_DEBUG & DEBUG_URB)
- _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+done:
+ if (unlikely(INTEL_DEBUG & DEBUG_URB))
+ printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
brw->urb.vs_start,
brw->urb.gs_start,
brw->urb.clip_start,
brw->urb.sf_start,
brw->urb.cs_start,
- URB_SIZES(brw));
+ brw->urb.size);
brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
}
.cache = (CACHE_NEW_VS_PROG |
CACHE_NEW_SF_PROG)
},
- .prepare = recalculate_urb_fence
+ .emit = recalculate_urb_fence
};
uf.bits0.gs_fence = brw->urb.clip_start;
uf.bits0.clp_fence = brw->urb.sf_start;
uf.bits1.sf_fence = brw->urb.cs_start;
- uf.bits1.cs_fence = URB_SIZES(brw);
+ uf.bits1.cs_fence = brw->urb.size;
+
+ /* erratum: URB_FENCE must not cross a 64byte cacheline */
+ if ((brw->batch.used & 15) > 12) {
+ int pad = 16 - (brw->batch.used & 15);
+ do
+ brw->batch.map[brw->batch.used++] = MI_NOOP;
+ while (--pad);
+ }
BRW_BATCH_STRUCT(brw, &uf);
}