Merge ../mesa into vulkan
[mesa.git] / src / mesa / drivers / dri / i965 / brw_binding_tables.c
1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * \file brw_binding_tables.c
26 *
27 * State atoms which upload the "binding table" for each shader stage.
28 *
29 * Binding tables map a numeric "surface index" to the SURFACE_STATE structure
30 * for a currently bound surface. This allows SEND messages (such as sampler
31 * or data port messages) to refer to a particular surface by number, rather
32 * than by pointer.
33 *
34 * The binding table is stored as a (sparse) array of SURFACE_STATE entries;
35 * surface indexes are simply indexes into the array. The ordering of the
36 * entries is entirely left up to software; see the SURF_INDEX_* macros in
37 * brw_context.h to see our current layout.
38 */
39
40 #include "main/mtypes.h"
41
42 #include "brw_context.h"
43 #include "brw_defines.h"
44 #include "brw_state.h"
45 #include "intel_batchbuffer.h"
46
47 static const GLuint stage_to_bt_edit[] = {
48 [MESA_SHADER_VERTEX] = _3DSTATE_BINDING_TABLE_EDIT_VS,
49 [MESA_SHADER_GEOMETRY] = _3DSTATE_BINDING_TABLE_EDIT_GS,
50 [MESA_SHADER_FRAGMENT] = _3DSTATE_BINDING_TABLE_EDIT_PS,
51 };
52
53 static uint32_t
54 reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
55 {
56 /* From the Broadwell PRM, Volume 16, "Workarounds",
57 * WaStateBindingTableOverfetch:
58 * "HW over-fetches two cache lines of binding table indices. When
59 * using the resource streamer, SW needs to pad binding table pointer
60 * updates with an additional two cache lines."
61 *
62 * Cache lines are 64 bytes, so we subtract 128 bytes from the size of
63 * the binding table pool buffer.
64 */
65 if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) {
66 gen7_reset_hw_bt_pool_offsets(brw);
67 }
68
69 uint32_t offset = brw->hw_bt_pool.next_offset;
70
71 /* From the Haswell PRM, Volume 2b: Command Reference: Instructions,
72 * 3DSTATE_BINDING_TABLE_POINTERS_xS:
73 *
74 * "If HW Binding Table is enabled, the offset is relative to the
75 * Binding Table Pool Base Address and the alignment is 64 bytes."
76 */
77 brw->hw_bt_pool.next_offset += ALIGN(bytes, 64);
78
79 return offset;
80 }
81
82 /**
83 * Upload a shader stage's binding table as indirect state.
84 *
85 * This copies brw_stage_state::surf_offset[] into the indirect state section
86 * of the batchbuffer (allocated by brw_state_batch()).
87 */
88 void
89 brw_upload_binding_table(struct brw_context *brw,
90 uint32_t packet_name,
91 const struct brw_stage_prog_data *prog_data,
92 struct brw_stage_state *stage_state)
93 {
94 if (prog_data->binding_table.size_bytes == 0) {
95 /* There are no surfaces; skip making the binding table altogether. */
96 if (stage_state->bind_bo_offset == 0 && brw->gen < 9)
97 return;
98
99 stage_state->bind_bo_offset = 0;
100 } else {
101 /* Upload a new binding table. */
102 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
103 brw->vtbl.emit_buffer_surface_state(
104 brw, &stage_state->surf_offset[
105 prog_data->binding_table.shader_time_start],
106 brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
107 brw->shader_time.bo->size, 1, true);
108 }
109 /* When RS is enabled use hw-binding table uploads, otherwise fallback to
110 * software-uploads.
111 */
112 if (brw->use_resource_streamer) {
113 gen7_update_binding_table_from_array(brw, stage_state->stage,
114 stage_state->surf_offset,
115 prog_data->binding_table
116 .size_bytes / 4);
117 } else {
118 uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
119 prog_data->binding_table.size_bytes,
120 32,
121 &stage_state->bind_bo_offset);
122
123 /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
124 memcpy(bind, stage_state->surf_offset,
125 prog_data->binding_table.size_bytes);
126 }
127 }
128
129 brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
130
131 if (brw->gen >= 7) {
132 if (brw->use_resource_streamer) {
133 stage_state->bind_bo_offset =
134 reserve_hw_bt_space(brw, prog_data->binding_table.size_bytes);
135 }
136 BEGIN_BATCH(2);
137 OUT_BATCH(packet_name << 16 | (2 - 2));
138 /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
139 * when hw-generated binding table is enabled.
140 */
141 OUT_BATCH(brw->use_resource_streamer ?
142 (stage_state->bind_bo_offset >> 1) :
143 stage_state->bind_bo_offset);
144 ADVANCE_BATCH();
145 }
146 }
147
148 /**
149 * State atoms which upload the binding table for a particular shader stage.
150 * @{
151 */
152
153 /** Upload the VS binding table. */
154 static void
155 brw_vs_upload_binding_table(struct brw_context *brw)
156 {
157 /* BRW_NEW_VS_PROG_DATA */
158 const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
159 brw_upload_binding_table(brw,
160 _3DSTATE_BINDING_TABLE_POINTERS_VS,
161 prog_data,
162 &brw->vs.base);
163 }
164
165 const struct brw_tracked_state brw_vs_binding_table = {
166 .dirty = {
167 .mesa = 0,
168 .brw = BRW_NEW_BATCH |
169 BRW_NEW_VS_CONSTBUF |
170 BRW_NEW_VS_PROG_DATA |
171 BRW_NEW_SURFACES,
172 },
173 .emit = brw_vs_upload_binding_table,
174 };
175
176
177 /** Upload the PS binding table. */
178 static void
179 brw_upload_wm_binding_table(struct brw_context *brw)
180 {
181 /* BRW_NEW_FS_PROG_DATA */
182 const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
183 brw_upload_binding_table(brw,
184 _3DSTATE_BINDING_TABLE_POINTERS_PS,
185 prog_data,
186 &brw->wm.base);
187 }
188
189 const struct brw_tracked_state brw_wm_binding_table = {
190 .dirty = {
191 .mesa = 0,
192 .brw = BRW_NEW_BATCH |
193 BRW_NEW_FS_PROG_DATA |
194 BRW_NEW_SURFACES,
195 },
196 .emit = brw_upload_wm_binding_table,
197 };
198
199 /** Upload the TCS binding table (if tessellation stages are active). */
200 static void
201 brw_tcs_upload_binding_table(struct brw_context *brw)
202 {
203 /* Skip if the tessellation stages are disabled. */
204 if (brw->tess_eval_program == NULL)
205 return;
206
207 /* BRW_NEW_TCS_PROG_DATA */
208 const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
209 brw_upload_binding_table(brw,
210 _3DSTATE_BINDING_TABLE_POINTERS_HS,
211 prog_data,
212 &brw->tcs.base);
213 }
214
215 const struct brw_tracked_state brw_tcs_binding_table = {
216 .dirty = {
217 .mesa = 0,
218 .brw = BRW_NEW_BATCH |
219 BRW_NEW_DEFAULT_TESS_LEVELS |
220 BRW_NEW_SURFACES |
221 BRW_NEW_TCS_CONSTBUF |
222 BRW_NEW_TCS_PROG_DATA,
223 },
224 .emit = brw_tcs_upload_binding_table,
225 };
226
227 /** Upload the TES binding table (if TES is active). */
228 static void
229 brw_tes_upload_binding_table(struct brw_context *brw)
230 {
231 /* If there's no TES, skip changing anything. */
232 if (brw->tess_eval_program == NULL)
233 return;
234
235 /* BRW_NEW_TES_PROG_DATA */
236 const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
237 brw_upload_binding_table(brw,
238 _3DSTATE_BINDING_TABLE_POINTERS_DS,
239 prog_data,
240 &brw->tes.base);
241 }
242
243 const struct brw_tracked_state brw_tes_binding_table = {
244 .dirty = {
245 .mesa = 0,
246 .brw = BRW_NEW_BATCH |
247 BRW_NEW_SURFACES |
248 BRW_NEW_TES_CONSTBUF |
249 BRW_NEW_TES_PROG_DATA,
250 },
251 .emit = brw_tes_upload_binding_table,
252 };
253
254 /** Upload the GS binding table (if GS is active). */
255 static void
256 brw_gs_upload_binding_table(struct brw_context *brw)
257 {
258 /* If there's no GS, skip changing anything. */
259 if (brw->geometry_program == NULL)
260 return;
261
262 /* BRW_NEW_GS_PROG_DATA */
263 const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
264 brw_upload_binding_table(brw,
265 _3DSTATE_BINDING_TABLE_POINTERS_GS,
266 prog_data,
267 &brw->gs.base);
268 }
269
270 const struct brw_tracked_state brw_gs_binding_table = {
271 .dirty = {
272 .mesa = 0,
273 .brw = BRW_NEW_BATCH |
274 BRW_NEW_GS_CONSTBUF |
275 BRW_NEW_GS_PROG_DATA |
276 BRW_NEW_SURFACES,
277 },
278 .emit = brw_gs_upload_binding_table,
279 };
280
281 /**
282 * Edit a single entry in a hardware-generated binding table
283 */
284 void
285 gen7_edit_hw_binding_table_entry(struct brw_context *brw,
286 gl_shader_stage stage,
287 uint32_t index,
288 uint32_t surf_offset)
289 {
290 assert(stage < ARRAY_SIZE(stage_to_bt_edit));
291 assert(stage_to_bt_edit[stage]);
292
293 uint32_t dw2 = SET_FIELD(index, BRW_BINDING_TABLE_INDEX) |
294 (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(surf_offset) :
295 HSW_SURFACE_STATE_EDIT(surf_offset));
296
297 BEGIN_BATCH(3);
298 OUT_BATCH(stage_to_bt_edit[stage] << 16 | (3 - 2));
299 OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
300 OUT_BATCH(dw2);
301 ADVANCE_BATCH();
302 }
303
304 /**
305 * Upload a whole hardware binding table for the given stage.
306 *
307 * Takes an array of surface offsets and the number of binding table
308 * entries.
309 */
310 void
311 gen7_update_binding_table_from_array(struct brw_context *brw,
312 gl_shader_stage stage,
313 const uint32_t* binding_table,
314 int num_surfaces)
315 {
316 uint32_t dw2 = 0;
317
318 assert(stage < ARRAY_SIZE(stage_to_bt_edit));
319 assert(stage_to_bt_edit[stage]);
320
321 BEGIN_BATCH(num_surfaces + 2);
322 OUT_BATCH(stage_to_bt_edit[stage] << 16 | num_surfaces);
323 OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
324 for (int i = 0; i < num_surfaces; i++) {
325 dw2 = SET_FIELD(i, BRW_BINDING_TABLE_INDEX) |
326 (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(binding_table[i]) :
327 HSW_SURFACE_STATE_EDIT(binding_table[i]));
328 OUT_BATCH(dw2);
329 }
330 ADVANCE_BATCH();
331 }
332
333 /**
334 * Disable hardware binding table support, falling back to the
335 * older software-generated binding table mechanism.
336 */
337 void
338 gen7_disable_hw_binding_tables(struct brw_context *brw)
339 {
340 if (!brw->use_resource_streamer)
341 return;
342 /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
343 * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
344 *
345 * "When switching between HW and SW binding table generation, SW must
346 * issue a state cache invalidate."
347 */
348 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
349
350 int pkt_len = brw->gen >= 8 ? 4 : 3;
351
352 BEGIN_BATCH(pkt_len);
353 OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
354 if (brw->gen >= 8) {
355 OUT_BATCH(0);
356 OUT_BATCH(0);
357 OUT_BATCH(0);
358 } else {
359 OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
360 OUT_BATCH(0);
361 }
362 ADVANCE_BATCH();
363 }
364
365 /**
366 * Enable hardware binding tables and set up the binding table pool.
367 */
368 static void
369 gen7_enable_hw_binding_tables(struct brw_context *brw)
370 {
371 if (!brw->use_resource_streamer)
372 return;
373
374 if (!brw->hw_bt_pool.bo) {
375 /* We use a single re-usable buffer object for the lifetime of the
376 * context and size it to maximum allowed binding tables that can be
377 * programmed per batch:
378 *
379 * From the Haswell PRM, Volume 7: 3D Media GPGPU,
380 * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
381 * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
382 */
383 static const int max_size = 16383 * 4;
384 brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
385 max_size, 64);
386 brw->hw_bt_pool.next_offset = 0;
387 }
388
389 /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
390 * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
391 *
392 * "When switching between HW and SW binding table generation, SW must
393 * issue a state cache invalidate."
394 */
395 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
396
397 int pkt_len = brw->gen >= 8 ? 4 : 3;
398 uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
399 if (brw->is_haswell) {
400 dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
401 HSW_BT_POOL_ALLOC_MUST_BE_ONE;
402 } else if (brw->gen >= 8) {
403 dw1 |= BDW_MOCS_WB;
404 }
405
406 BEGIN_BATCH(pkt_len);
407 OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
408 if (brw->gen >= 8) {
409 OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
410 OUT_BATCH(brw->hw_bt_pool.bo->size);
411 } else {
412 OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
413 OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
414 brw->hw_bt_pool.bo->size);
415 }
416 ADVANCE_BATCH();
417 }
418
419 void
420 gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
421 {
422 brw->hw_bt_pool.next_offset = 0;
423 }
424
425 const struct brw_tracked_state gen7_hw_binding_tables = {
426 .dirty = {
427 .mesa = 0,
428 .brw = BRW_NEW_BATCH,
429 },
430 .emit = gen7_enable_hw_binding_tables
431 };
432
433 /** @} */
434
435 /**
436 * State atoms which emit 3DSTATE packets to update the binding table pointers.
437 * @{
438 */
439
440 /**
441 * (Gen4-5) Upload the binding table pointers for all shader stages.
442 *
443 * The binding table pointers are relative to the surface state base address,
444 * which points at the batchbuffer containing the streamed batch state.
445 */
446 static void
447 gen4_upload_binding_table_pointers(struct brw_context *brw)
448 {
449 BEGIN_BATCH(6);
450 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
451 OUT_BATCH(brw->vs.base.bind_bo_offset);
452 OUT_BATCH(0); /* gs */
453 OUT_BATCH(0); /* clip */
454 OUT_BATCH(0); /* sf */
455 OUT_BATCH(brw->wm.base.bind_bo_offset);
456 ADVANCE_BATCH();
457 }
458
459 const struct brw_tracked_state brw_binding_table_pointers = {
460 .dirty = {
461 .mesa = 0,
462 .brw = BRW_NEW_BATCH |
463 BRW_NEW_BINDING_TABLE_POINTERS |
464 BRW_NEW_STATE_BASE_ADDRESS,
465 },
466 .emit = gen4_upload_binding_table_pointers,
467 };
468
469 /**
470 * (Sandybridge Only) Upload the binding table pointers for all shader stages.
471 *
472 * The binding table pointers are relative to the surface state base address,
473 * which points at the batchbuffer containing the streamed batch state.
474 */
475 static void
476 gen6_upload_binding_table_pointers(struct brw_context *brw)
477 {
478 BEGIN_BATCH(4);
479 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
480 GEN6_BINDING_TABLE_MODIFY_VS |
481 GEN6_BINDING_TABLE_MODIFY_GS |
482 GEN6_BINDING_TABLE_MODIFY_PS |
483 (4 - 2));
484 OUT_BATCH(brw->vs.base.bind_bo_offset); /* vs */
485 if (brw->ff_gs.prog_active)
486 OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */
487 else
488 OUT_BATCH(brw->gs.base.bind_bo_offset); /* gs */
489 OUT_BATCH(brw->wm.base.bind_bo_offset); /* wm/ps */
490 ADVANCE_BATCH();
491 }
492
493 const struct brw_tracked_state gen6_binding_table_pointers = {
494 .dirty = {
495 .mesa = 0,
496 .brw = BRW_NEW_BATCH |
497 BRW_NEW_BINDING_TABLE_POINTERS |
498 BRW_NEW_STATE_BASE_ADDRESS,
499 },
500 .emit = gen6_upload_binding_table_pointers,
501 };
502
503 /** @} */