i965/fs: Lower 32x32 bit multiplication on BXT.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_binding_tables.c
1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * \file brw_binding_tables.c
26 *
27 * State atoms which upload the "binding table" for each shader stage.
28 *
29 * Binding tables map a numeric "surface index" to the SURFACE_STATE structure
30 * for a currently bound surface. This allows SEND messages (such as sampler
31 * or data port messages) to refer to a particular surface by number, rather
32 * than by pointer.
33 *
34 * The binding table is stored as a (sparse) array of SURFACE_STATE entries;
35 * surface indexes are simply indexes into the array. The ordering of the
36 * entries is entirely left up to software; see the SURF_INDEX_* macros in
37 * brw_context.h to see our current layout.
38 */
39
40 #include "main/mtypes.h"
41
42 #include "brw_context.h"
43 #include "brw_defines.h"
44 #include "brw_state.h"
45 #include "intel_batchbuffer.h"
46
47 static const GLuint stage_to_bt_edit[MESA_SHADER_FRAGMENT + 1] = {
48 _3DSTATE_BINDING_TABLE_EDIT_VS,
49 _3DSTATE_BINDING_TABLE_EDIT_GS,
50 _3DSTATE_BINDING_TABLE_EDIT_PS,
51 };
52
53 static uint32_t
54 reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
55 {
56 /* From the Broadwell PRM, Volume 16, "Workarounds",
57 * WaStateBindingTableOverfetch:
58 * "HW over-fetches two cache lines of binding table indices. When
59 * using the resource streamer, SW needs to pad binding table pointer
60 * updates with an additional two cache lines."
61 *
62 * Cache lines are 64 bytes, so we subtract 128 bytes from the size of
63 * the binding table pool buffer.
64 */
65 if (brw->hw_bt_pool.next_offset + bytes >= brw->hw_bt_pool.bo->size - 128) {
66 gen7_reset_hw_bt_pool_offsets(brw);
67 }
68
69 uint32_t offset = brw->hw_bt_pool.next_offset;
70
71 /* From the Haswell PRM, Volume 2b: Command Reference: Instructions,
72 * 3DSTATE_BINDING_TABLE_POINTERS_xS:
73 *
74 * "If HW Binding Table is enabled, the offset is relative to the
75 * Binding Table Pool Base Address and the alignment is 64 bytes."
76 */
77 brw->hw_bt_pool.next_offset += ALIGN(bytes, 64);
78
79 return offset;
80 }
81
82 /**
83 * Upload a shader stage's binding table as indirect state.
84 *
85 * This copies brw_stage_state::surf_offset[] into the indirect state section
86 * of the batchbuffer (allocated by brw_state_batch()).
87 */
88 void
89 brw_upload_binding_table(struct brw_context *brw,
90 uint32_t packet_name,
91 GLbitfield brw_new_binding_table,
92 const struct brw_stage_prog_data *prog_data,
93 struct brw_stage_state *stage_state)
94 {
95 if (prog_data->binding_table.size_bytes == 0) {
96 /* There are no surfaces; skip making the binding table altogether. */
97 if (stage_state->bind_bo_offset == 0 && brw->gen < 9)
98 return;
99
100 stage_state->bind_bo_offset = 0;
101 } else {
102 /* Upload a new binding table. */
103 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
104 brw->vtbl.emit_buffer_surface_state(
105 brw, &stage_state->surf_offset[
106 prog_data->binding_table.shader_time_start],
107 brw->shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
108 brw->shader_time.bo->size, 1, true);
109 }
110 /* When RS is enabled use hw-binding table uploads, otherwise fallback to
111 * software-uploads.
112 */
113 if (brw->use_resource_streamer) {
114 gen7_update_binding_table_from_array(brw, stage_state->stage,
115 stage_state->surf_offset,
116 prog_data->binding_table
117 .size_bytes / 4);
118 } else {
119 uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
120 prog_data->binding_table.size_bytes,
121 32,
122 &stage_state->bind_bo_offset);
123
124 /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
125 memcpy(bind, stage_state->surf_offset,
126 prog_data->binding_table.size_bytes);
127 }
128 }
129
130 brw->ctx.NewDriverState |= brw_new_binding_table;
131
132 if (brw->gen >= 7) {
133 if (brw->use_resource_streamer) {
134 stage_state->bind_bo_offset =
135 reserve_hw_bt_space(brw, prog_data->binding_table.size_bytes);
136 }
137 BEGIN_BATCH(2);
138 OUT_BATCH(packet_name << 16 | (2 - 2));
139 /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table field
140 * when hw-generated binding table is enabled.
141 */
142 OUT_BATCH(brw->use_resource_streamer ?
143 (stage_state->bind_bo_offset >> 1) :
144 stage_state->bind_bo_offset);
145 ADVANCE_BATCH();
146 }
147 }
148
149 /**
150 * State atoms which upload the binding table for a particular shader stage.
151 * @{
152 */
153
154 /** Upload the VS binding table. */
155 static void
156 brw_vs_upload_binding_table(struct brw_context *brw)
157 {
158 /* BRW_NEW_VS_PROG_DATA */
159 const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
160 brw_upload_binding_table(brw,
161 _3DSTATE_BINDING_TABLE_POINTERS_VS,
162 BRW_NEW_VS_BINDING_TABLE, prog_data,
163 &brw->vs.base);
164 }
165
166 const struct brw_tracked_state brw_vs_binding_table = {
167 .dirty = {
168 .mesa = 0,
169 .brw = BRW_NEW_BATCH |
170 BRW_NEW_VS_CONSTBUF |
171 BRW_NEW_VS_PROG_DATA |
172 BRW_NEW_SURFACES,
173 },
174 .emit = brw_vs_upload_binding_table,
175 };
176
177
178 /** Upload the PS binding table. */
179 static void
180 brw_upload_wm_binding_table(struct brw_context *brw)
181 {
182 /* BRW_NEW_FS_PROG_DATA */
183 const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
184 brw_upload_binding_table(brw,
185 _3DSTATE_BINDING_TABLE_POINTERS_PS,
186 BRW_NEW_PS_BINDING_TABLE, prog_data,
187 &brw->wm.base);
188 }
189
190 const struct brw_tracked_state brw_wm_binding_table = {
191 .dirty = {
192 .mesa = 0,
193 .brw = BRW_NEW_BATCH |
194 BRW_NEW_FS_PROG_DATA |
195 BRW_NEW_SURFACES,
196 },
197 .emit = brw_upload_wm_binding_table,
198 };
199
200 /** Upload the GS binding table (if GS is active). */
201 static void
202 brw_gs_upload_binding_table(struct brw_context *brw)
203 {
204 /* If there's no GS, skip changing anything. */
205 if (brw->geometry_program == NULL)
206 return;
207
208 /* BRW_NEW_GS_PROG_DATA */
209 const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
210 brw_upload_binding_table(brw,
211 _3DSTATE_BINDING_TABLE_POINTERS_GS,
212 BRW_NEW_GS_BINDING_TABLE, prog_data,
213 &brw->gs.base);
214 }
215
216 const struct brw_tracked_state brw_gs_binding_table = {
217 .dirty = {
218 .mesa = 0,
219 .brw = BRW_NEW_BATCH |
220 BRW_NEW_GS_CONSTBUF |
221 BRW_NEW_GS_PROG_DATA |
222 BRW_NEW_SURFACES,
223 },
224 .emit = brw_gs_upload_binding_table,
225 };
226
227 /**
228 * Edit a single entry in a hardware-generated binding table
229 */
230 void
231 gen7_edit_hw_binding_table_entry(struct brw_context *brw,
232 gl_shader_stage stage,
233 uint32_t index,
234 uint32_t surf_offset)
235 {
236 assert(stage <= MESA_SHADER_FRAGMENT);
237
238 uint32_t dw2 = SET_FIELD(index, BRW_BINDING_TABLE_INDEX) |
239 (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(surf_offset) :
240 HSW_SURFACE_STATE_EDIT(surf_offset));
241
242 BEGIN_BATCH(3);
243 OUT_BATCH(stage_to_bt_edit[stage] << 16 | (3 - 2));
244 OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
245 OUT_BATCH(dw2);
246 ADVANCE_BATCH();
247 }
248
249 /**
250 * Upload a whole hardware binding table for the given stage.
251 *
252 * Takes an array of surface offsets and the number of binding table
253 * entries.
254 */
255 void
256 gen7_update_binding_table_from_array(struct brw_context *brw,
257 gl_shader_stage stage,
258 const uint32_t* binding_table,
259 int num_surfaces)
260 {
261 uint32_t dw2 = 0;
262 assert(stage <= MESA_SHADER_FRAGMENT);
263
264 BEGIN_BATCH(num_surfaces + 2);
265 OUT_BATCH(stage_to_bt_edit[stage] << 16 | num_surfaces);
266 OUT_BATCH(BRW_BINDING_TABLE_EDIT_TARGET_ALL);
267 for (int i = 0; i < num_surfaces; i++) {
268 dw2 = SET_FIELD(i, BRW_BINDING_TABLE_INDEX) |
269 (brw->gen >= 8 ? GEN8_SURFACE_STATE_EDIT(binding_table[i]) :
270 HSW_SURFACE_STATE_EDIT(binding_table[i]));
271 OUT_BATCH(dw2);
272 }
273 ADVANCE_BATCH();
274 }
275
276 /**
277 * Disable hardware binding table support, falling back to the
278 * older software-generated binding table mechanism.
279 */
280 void
281 gen7_disable_hw_binding_tables(struct brw_context *brw)
282 {
283 if (!brw->use_resource_streamer)
284 return;
285 /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
286 * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
287 *
288 * "When switching between HW and SW binding table generation, SW must
289 * issue a state cache invalidate."
290 */
291 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
292
293 int pkt_len = brw->gen >= 8 ? 4 : 3;
294
295 BEGIN_BATCH(pkt_len);
296 OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
297 if (brw->gen >= 8) {
298 OUT_BATCH(0);
299 OUT_BATCH(0);
300 OUT_BATCH(0);
301 } else {
302 OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
303 OUT_BATCH(0);
304 }
305 ADVANCE_BATCH();
306 }
307
308 /**
309 * Enable hardware binding tables and set up the binding table pool.
310 */
311 void
312 gen7_enable_hw_binding_tables(struct brw_context *brw)
313 {
314 if (!brw->use_resource_streamer)
315 return;
316
317 if (!brw->hw_bt_pool.bo) {
318 /* We use a single re-usable buffer object for the lifetime of the
319 * context and size it to maximum allowed binding tables that can be
320 * programmed per batch:
321 *
322 * From the Haswell PRM, Volume 7: 3D Media GPGPU,
323 * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
324 * "A maximum of 16,383 Binding tables are allowed in any batch buffer"
325 */
326 static const int max_size = 16383 * 4;
327 brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
328 max_size, 64);
329 brw->hw_bt_pool.next_offset = 0;
330 }
331
332 /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
333 * 3DSTATE_BINDING_TABLE_POOL_ALLOC > Programming Note:
334 *
335 * "When switching between HW and SW binding table generation, SW must
336 * issue a state cache invalidate."
337 */
338 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
339
340 int pkt_len = brw->gen >= 8 ? 4 : 3;
341 uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
342 if (brw->is_haswell) {
343 dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
344 HSW_BT_POOL_ALLOC_MUST_BE_ONE;
345 } else if (brw->gen >= 8) {
346 dw1 |= BDW_MOCS_WB;
347 }
348
349 BEGIN_BATCH(pkt_len);
350 OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (pkt_len - 2));
351 if (brw->gen >= 8) {
352 OUT_RELOC64(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
353 OUT_BATCH(brw->hw_bt_pool.bo->size);
354 } else {
355 OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
356 OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
357 brw->hw_bt_pool.bo->size);
358 }
359 ADVANCE_BATCH();
360 }
361
362 void
363 gen7_reset_hw_bt_pool_offsets(struct brw_context *brw)
364 {
365 brw->hw_bt_pool.next_offset = 0;
366 }
367
368 const struct brw_tracked_state gen7_hw_binding_tables = {
369 .dirty = {
370 .mesa = 0,
371 .brw = BRW_NEW_BATCH,
372 },
373 .emit = gen7_enable_hw_binding_tables
374 };
375
376 /** @} */
377
378 /**
379 * State atoms which emit 3DSTATE packets to update the binding table pointers.
380 * @{
381 */
382
383 /**
384 * (Gen4-5) Upload the binding table pointers for all shader stages.
385 *
386 * The binding table pointers are relative to the surface state base address,
387 * which points at the batchbuffer containing the streamed batch state.
388 */
389 static void
390 gen4_upload_binding_table_pointers(struct brw_context *brw)
391 {
392 BEGIN_BATCH(6);
393 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
394 OUT_BATCH(brw->vs.base.bind_bo_offset);
395 OUT_BATCH(0); /* gs */
396 OUT_BATCH(0); /* clip */
397 OUT_BATCH(0); /* sf */
398 OUT_BATCH(brw->wm.base.bind_bo_offset);
399 ADVANCE_BATCH();
400 }
401
402 const struct brw_tracked_state brw_binding_table_pointers = {
403 .dirty = {
404 .mesa = 0,
405 .brw = BRW_NEW_BATCH |
406 BRW_NEW_GS_BINDING_TABLE |
407 BRW_NEW_PS_BINDING_TABLE |
408 BRW_NEW_STATE_BASE_ADDRESS |
409 BRW_NEW_VS_BINDING_TABLE,
410 },
411 .emit = gen4_upload_binding_table_pointers,
412 };
413
414 /**
415 * (Sandybridge Only) Upload the binding table pointers for all shader stages.
416 *
417 * The binding table pointers are relative to the surface state base address,
418 * which points at the batchbuffer containing the streamed batch state.
419 */
420 static void
421 gen6_upload_binding_table_pointers(struct brw_context *brw)
422 {
423 BEGIN_BATCH(4);
424 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
425 GEN6_BINDING_TABLE_MODIFY_VS |
426 GEN6_BINDING_TABLE_MODIFY_GS |
427 GEN6_BINDING_TABLE_MODIFY_PS |
428 (4 - 2));
429 OUT_BATCH(brw->vs.base.bind_bo_offset); /* vs */
430 if (brw->ff_gs.prog_active)
431 OUT_BATCH(brw->ff_gs.bind_bo_offset); /* gs */
432 else
433 OUT_BATCH(brw->gs.base.bind_bo_offset); /* gs */
434 OUT_BATCH(brw->wm.base.bind_bo_offset); /* wm/ps */
435 ADVANCE_BATCH();
436 }
437
438 const struct brw_tracked_state gen6_binding_table_pointers = {
439 .dirty = {
440 .mesa = 0,
441 .brw = BRW_NEW_BATCH |
442 BRW_NEW_GS_BINDING_TABLE |
443 BRW_NEW_PS_BINDING_TABLE |
444 BRW_NEW_STATE_BASE_ADDRESS |
445 BRW_NEW_VS_BINDING_TABLE,
446 },
447 .emit = gen6_upload_binding_table_pointers,
448 };
449
450 /** @} */