i965/state: Use ISL for emitting image surfaces
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "main/shaderimage.h"
38 #include "main/teximage.h"
39 #include "program/prog_parameter.h"
40 #include "program/prog_instruction.h"
41 #include "main/framebuffer.h"
42
43 #include "isl/isl.h"
44
45 #include "intel_mipmap_tree.h"
46 #include "intel_batchbuffer.h"
47 #include "intel_tex.h"
48 #include "intel_fbo.h"
49 #include "intel_buffer_objects.h"
50
51 #include "brw_context.h"
52 #include "brw_state.h"
53 #include "brw_defines.h"
54 #include "brw_wm.h"
55
56 struct surface_state_info {
57 unsigned num_dwords;
58 unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
59 unsigned reloc_dw;
60 unsigned aux_reloc_dw;
61 unsigned tex_mocs;
62 unsigned rb_mocs;
63 };
64
65 static const struct surface_state_info surface_state_infos[] = {
66 [4] = {6, 32, 1, 0},
67 [5] = {6, 32, 1, 0},
68 [6] = {6, 32, 1, 0},
69 [7] = {8, 32, 1, 6, GEN7_MOCS_L3, GEN7_MOCS_L3},
70 [8] = {13, 64, 8, 10, BDW_MOCS_WB, BDW_MOCS_PTE},
71 [9] = {16, 64, 8, 10, SKL_MOCS_WB, SKL_MOCS_PTE},
72 };
73
74 void
75 brw_emit_surface_state(struct brw_context *brw,
76 struct intel_mipmap_tree *mt,
77 const struct isl_view *view,
78 uint32_t mocs, bool for_gather,
79 uint32_t *surf_offset, int surf_index,
80 unsigned read_domains, unsigned write_domains)
81 {
82 const struct surface_state_info ss_info = surface_state_infos[brw->gen];
83
84 struct isl_surf surf;
85 intel_miptree_get_isl_surf(brw, mt, &surf);
86
87 union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
88
89 struct isl_surf *aux_surf = NULL, aux_surf_s;
90 uint64_t aux_offset = 0;
91 enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
92 if (mt->mcs_mt &&
93 ((view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) ||
94 mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)) {
95 intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
96 aux_surf = &aux_surf_s;
97 assert(mt->mcs_mt->offset == 0);
98 aux_offset = mt->mcs_mt->bo->offset64;
99
100 /* We only really need a clear color if we also have an auxiliary
101 * surfacae. Without one, it does nothing.
102 */
103 clear_color = intel_miptree_get_isl_clear_color(brw, mt);
104 }
105
106 uint32_t *dw = __brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
107 ss_info.num_dwords * 4, ss_info.ss_align,
108 surf_index, surf_offset);
109
110 isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = view,
111 .address = mt->bo->offset64 + mt->offset,
112 .aux_surf = aux_surf, .aux_usage = aux_usage,
113 .aux_address = aux_offset,
114 .mocs = mocs, .clear_color = clear_color);
115
116 drm_intel_bo_emit_reloc(brw->batch.bo,
117 *surf_offset + 4 * ss_info.reloc_dw,
118 mt->bo, mt->offset,
119 read_domains, write_domains);
120
121 if (aux_surf) {
122 /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
123 * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
124 * contain other control information. Since buffer addresses are always
125 * on 4k boundaries (and thus have their lower 12 bits zero), we can use
126 * an ordinary reloc to do the necessary address translation.
127 */
128 assert((aux_offset & 0xfff) == 0);
129 drm_intel_bo_emit_reloc(brw->batch.bo,
130 *surf_offset + 4 * ss_info.aux_reloc_dw,
131 mt->mcs_mt->bo, dw[ss_info.aux_reloc_dw] & 0xfff,
132 read_domains, write_domains);
133 }
134 }
135
136 GLuint
137 translate_tex_target(GLenum target)
138 {
139 switch (target) {
140 case GL_TEXTURE_1D:
141 case GL_TEXTURE_1D_ARRAY_EXT:
142 return BRW_SURFACE_1D;
143
144 case GL_TEXTURE_RECTANGLE_NV:
145 return BRW_SURFACE_2D;
146
147 case GL_TEXTURE_2D:
148 case GL_TEXTURE_2D_ARRAY_EXT:
149 case GL_TEXTURE_EXTERNAL_OES:
150 case GL_TEXTURE_2D_MULTISAMPLE:
151 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
152 return BRW_SURFACE_2D;
153
154 case GL_TEXTURE_3D:
155 return BRW_SURFACE_3D;
156
157 case GL_TEXTURE_CUBE_MAP:
158 case GL_TEXTURE_CUBE_MAP_ARRAY:
159 return BRW_SURFACE_CUBE;
160
161 default:
162 unreachable("not reached");
163 }
164 }
165
166 uint32_t
167 brw_get_surface_tiling_bits(uint32_t tiling)
168 {
169 switch (tiling) {
170 case I915_TILING_X:
171 return BRW_SURFACE_TILED;
172 case I915_TILING_Y:
173 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
174 default:
175 return 0;
176 }
177 }
178
179
180 uint32_t
181 brw_get_surface_num_multisamples(unsigned num_samples)
182 {
183 if (num_samples > 1)
184 return BRW_SURFACE_MULTISAMPLECOUNT_4;
185 else
186 return BRW_SURFACE_MULTISAMPLECOUNT_1;
187 }
188
189 /**
190 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
191 * swizzling.
192 */
193 int
194 brw_get_texture_swizzle(const struct gl_context *ctx,
195 const struct gl_texture_object *t)
196 {
197 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
198
199 int swizzles[SWIZZLE_NIL + 1] = {
200 SWIZZLE_X,
201 SWIZZLE_Y,
202 SWIZZLE_Z,
203 SWIZZLE_W,
204 SWIZZLE_ZERO,
205 SWIZZLE_ONE,
206 SWIZZLE_NIL
207 };
208
209 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
210 img->_BaseFormat == GL_DEPTH_STENCIL) {
211 GLenum depth_mode = t->DepthMode;
212
213 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
214 * with depth component data specified with a sized internal format.
215 * Otherwise, it's left at the old default, GL_LUMINANCE.
216 */
217 if (_mesa_is_gles3(ctx) &&
218 img->InternalFormat != GL_DEPTH_COMPONENT &&
219 img->InternalFormat != GL_DEPTH_STENCIL) {
220 depth_mode = GL_RED;
221 }
222
223 switch (depth_mode) {
224 case GL_ALPHA:
225 swizzles[0] = SWIZZLE_ZERO;
226 swizzles[1] = SWIZZLE_ZERO;
227 swizzles[2] = SWIZZLE_ZERO;
228 swizzles[3] = SWIZZLE_X;
229 break;
230 case GL_LUMINANCE:
231 swizzles[0] = SWIZZLE_X;
232 swizzles[1] = SWIZZLE_X;
233 swizzles[2] = SWIZZLE_X;
234 swizzles[3] = SWIZZLE_ONE;
235 break;
236 case GL_INTENSITY:
237 swizzles[0] = SWIZZLE_X;
238 swizzles[1] = SWIZZLE_X;
239 swizzles[2] = SWIZZLE_X;
240 swizzles[3] = SWIZZLE_X;
241 break;
242 case GL_RED:
243 swizzles[0] = SWIZZLE_X;
244 swizzles[1] = SWIZZLE_ZERO;
245 swizzles[2] = SWIZZLE_ZERO;
246 swizzles[3] = SWIZZLE_ONE;
247 break;
248 }
249 }
250
251 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
252
253 /* If the texture's format is alpha-only, force R, G, and B to
254 * 0.0. Similarly, if the texture's format has no alpha channel,
255 * force the alpha value read to 1.0. This allows for the
256 * implementation to use an RGBA texture for any of these formats
257 * without leaking any unexpected values.
258 */
259 switch (img->_BaseFormat) {
260 case GL_ALPHA:
261 swizzles[0] = SWIZZLE_ZERO;
262 swizzles[1] = SWIZZLE_ZERO;
263 swizzles[2] = SWIZZLE_ZERO;
264 break;
265 case GL_LUMINANCE:
266 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
267 swizzles[0] = SWIZZLE_X;
268 swizzles[1] = SWIZZLE_X;
269 swizzles[2] = SWIZZLE_X;
270 swizzles[3] = SWIZZLE_ONE;
271 }
272 break;
273 case GL_LUMINANCE_ALPHA:
274 if (datatype == GL_SIGNED_NORMALIZED) {
275 swizzles[0] = SWIZZLE_X;
276 swizzles[1] = SWIZZLE_X;
277 swizzles[2] = SWIZZLE_X;
278 swizzles[3] = SWIZZLE_W;
279 }
280 break;
281 case GL_INTENSITY:
282 if (datatype == GL_SIGNED_NORMALIZED) {
283 swizzles[0] = SWIZZLE_X;
284 swizzles[1] = SWIZZLE_X;
285 swizzles[2] = SWIZZLE_X;
286 swizzles[3] = SWIZZLE_X;
287 }
288 break;
289 case GL_RED:
290 case GL_RG:
291 case GL_RGB:
292 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
293 swizzles[3] = SWIZZLE_ONE;
294 break;
295 }
296
297 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
298 swizzles[GET_SWZ(t->_Swizzle, 1)],
299 swizzles[GET_SWZ(t->_Swizzle, 2)],
300 swizzles[GET_SWZ(t->_Swizzle, 3)]);
301 }
302
303 static void
304 gen4_emit_buffer_surface_state(struct brw_context *brw,
305 uint32_t *out_offset,
306 drm_intel_bo *bo,
307 unsigned buffer_offset,
308 unsigned surface_format,
309 unsigned buffer_size,
310 unsigned pitch,
311 bool rw)
312 {
313 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
314 6 * 4, 32, out_offset);
315 memset(surf, 0, 6 * 4);
316
317 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
318 surface_format << BRW_SURFACE_FORMAT_SHIFT |
319 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
320 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
321 surf[2] = ((buffer_size - 1) & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
322 (((buffer_size - 1) >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
323 surf[3] = (((buffer_size - 1) >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
324 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
325
326 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
327 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
328 * physical cache. It is mapped in hardware to the sampler cache."
329 */
330 if (bo) {
331 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
332 bo, buffer_offset,
333 I915_GEM_DOMAIN_SAMPLER,
334 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
335 }
336 }
337
338 void
339 brw_update_buffer_texture_surface(struct gl_context *ctx,
340 unsigned unit,
341 uint32_t *surf_offset)
342 {
343 struct brw_context *brw = brw_context(ctx);
344 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
345 struct intel_buffer_object *intel_obj =
346 intel_buffer_object(tObj->BufferObject);
347 uint32_t size = tObj->BufferSize;
348 drm_intel_bo *bo = NULL;
349 mesa_format format = tObj->_BufferObjectFormat;
350 uint32_t brw_format = brw_format_for_mesa_format(format);
351 int texel_size = _mesa_get_format_bytes(format);
352
353 if (intel_obj) {
354 size = MIN2(size, intel_obj->Base.Size);
355 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
356 }
357
358 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
359 _mesa_problem(NULL, "bad format %s for texture buffer\n",
360 _mesa_get_format_name(format));
361 }
362
363 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
364 tObj->BufferOffset,
365 brw_format,
366 size / texel_size,
367 texel_size,
368 false /* rw */);
369 }
370
371 static void
372 brw_update_texture_surface(struct gl_context *ctx,
373 unsigned unit,
374 uint32_t *surf_offset,
375 bool for_gather,
376 uint32_t plane)
377 {
378 struct brw_context *brw = brw_context(ctx);
379 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
380 struct intel_texture_object *intelObj = intel_texture_object(tObj);
381 struct intel_mipmap_tree *mt = intelObj->mt;
382 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
383 uint32_t *surf;
384
385 /* BRW_NEW_TEXTURE_BUFFER */
386 if (tObj->Target == GL_TEXTURE_BUFFER) {
387 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
388 return;
389 }
390
391 if (plane > 0) {
392 if (mt->plane[plane - 1] == NULL)
393 return;
394 mt = mt->plane[plane - 1];
395 }
396
397 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
398 6 * 4, 32, surf_offset);
399
400 mesa_format mesa_fmt = plane == 0 ? intelObj->_Format : mt->format;
401 uint32_t tex_format = translate_tex_format(brw, mesa_fmt,
402 sampler->sRGBDecode);
403
404 if (for_gather) {
405 /* Sandybridge's gather4 message is broken for integer formats.
406 * To work around this, we pretend the surface is UNORM for
407 * 8 or 16-bit formats, and emit shader instructions to recover
408 * the real INT/UINT value. For 32-bit formats, we pretend
409 * the surface is FLOAT, and simply reinterpret the resulting
410 * bits.
411 */
412 switch (tex_format) {
413 case BRW_SURFACEFORMAT_R8_SINT:
414 case BRW_SURFACEFORMAT_R8_UINT:
415 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
416 break;
417
418 case BRW_SURFACEFORMAT_R16_SINT:
419 case BRW_SURFACEFORMAT_R16_UINT:
420 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
421 break;
422
423 case BRW_SURFACEFORMAT_R32_SINT:
424 case BRW_SURFACEFORMAT_R32_UINT:
425 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
426 break;
427
428 default:
429 break;
430 }
431 }
432
433 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
434 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
435 BRW_SURFACE_CUBEFACE_ENABLES |
436 tex_format << BRW_SURFACE_FORMAT_SHIFT);
437
438 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
439
440 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
441 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
442 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
443
444 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
445 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
446 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
447
448 const unsigned min_lod = tObj->MinLevel + tObj->BaseLevel - mt->first_level;
449 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
450 SET_FIELD(min_lod, BRW_SURFACE_MIN_LOD) |
451 SET_FIELD(tObj->MinLayer, BRW_SURFACE_MIN_ARRAY_ELEMENT));
452
453 surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
454
455 /* Emit relocation to surface contents */
456 drm_intel_bo_emit_reloc(brw->batch.bo,
457 *surf_offset + 4,
458 mt->bo,
459 surf[1] - mt->bo->offset64,
460 I915_GEM_DOMAIN_SAMPLER, 0);
461 }
462
463 /**
464 * Create the constant buffer surface. Vertex/fragment shader constants will be
465 * read from this buffer with Data Port Read instructions/messages.
466 */
467 void
468 brw_create_constant_surface(struct brw_context *brw,
469 drm_intel_bo *bo,
470 uint32_t offset,
471 uint32_t size,
472 uint32_t *out_offset)
473 {
474 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
475 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
476 size, 1, false);
477 }
478
479 /**
480 * Create the buffer surface. Shader buffer variables will be
481 * read from / write to this buffer with Data Port Read/Write
482 * instructions/messages.
483 */
484 void
485 brw_create_buffer_surface(struct brw_context *brw,
486 drm_intel_bo *bo,
487 uint32_t offset,
488 uint32_t size,
489 uint32_t *out_offset)
490 {
491 /* Use a raw surface so we can reuse existing untyped read/write/atomic
492 * messages. We need these specifically for the fragment shader since they
493 * include a pixel mask header that we need to ensure correct behavior
494 * with helper invocations, which cannot write to the buffer.
495 */
496 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
497 BRW_SURFACEFORMAT_RAW,
498 size, 1, true);
499 }
500
501 /**
502 * Set up a binding table entry for use by stream output logic (transform
503 * feedback).
504 *
505 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
506 */
507 void
508 brw_update_sol_surface(struct brw_context *brw,
509 struct gl_buffer_object *buffer_obj,
510 uint32_t *out_offset, unsigned num_vector_components,
511 unsigned stride_dwords, unsigned offset_dwords)
512 {
513 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
514 uint32_t offset_bytes = 4 * offset_dwords;
515 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
516 offset_bytes,
517 buffer_obj->Size - offset_bytes);
518 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
519 out_offset);
520 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
521 size_t size_dwords = buffer_obj->Size / 4;
522 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
523
524 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
525 * too big to map using a single binding table entry?
526 */
527 assert((size_dwords - offset_dwords) / stride_dwords
528 <= BRW_MAX_NUM_BUFFER_ENTRIES);
529
530 if (size_dwords > offset_dwords + num_vector_components) {
531 /* There is room for at least 1 transform feedback output in the buffer.
532 * Compute the number of additional transform feedback outputs the
533 * buffer has room for.
534 */
535 buffer_size_minus_1 =
536 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
537 } else {
538 /* There isn't even room for a single transform feedback output in the
539 * buffer. We can't configure the binding table entry to prevent output
540 * entirely; we'll have to rely on the geometry shader to detect
541 * overflow. But to minimize the damage in case of a bug, set up the
542 * binding table entry to just allow a single output.
543 */
544 buffer_size_minus_1 = 0;
545 }
546 width = buffer_size_minus_1 & 0x7f;
547 height = (buffer_size_minus_1 & 0xfff80) >> 7;
548 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
549
550 switch (num_vector_components) {
551 case 1:
552 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
553 break;
554 case 2:
555 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
556 break;
557 case 3:
558 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
559 break;
560 case 4:
561 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
562 break;
563 default:
564 unreachable("Invalid vector size for transform feedback output");
565 }
566
567 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
568 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
569 surface_format << BRW_SURFACE_FORMAT_SHIFT |
570 BRW_SURFACE_RC_READ_WRITE;
571 surf[1] = bo->offset64 + offset_bytes; /* reloc */
572 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
573 height << BRW_SURFACE_HEIGHT_SHIFT);
574 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
575 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
576 surf[4] = 0;
577 surf[5] = 0;
578
579 /* Emit relocation to surface contents. */
580 drm_intel_bo_emit_reloc(brw->batch.bo,
581 *out_offset + 4,
582 bo, offset_bytes,
583 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
584 }
585
586 /* Creates a new WM constant buffer reflecting the current fragment program's
587 * constants, if needed by the fragment program.
588 *
589 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
590 * state atom.
591 */
592 static void
593 brw_upload_wm_pull_constants(struct brw_context *brw)
594 {
595 struct brw_stage_state *stage_state = &brw->wm.base;
596 /* BRW_NEW_FRAGMENT_PROGRAM */
597 struct brw_fragment_program *fp =
598 (struct brw_fragment_program *) brw->fragment_program;
599 /* BRW_NEW_FS_PROG_DATA */
600 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
601
602 /* _NEW_PROGRAM_CONSTANTS */
603 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
604 stage_state, prog_data);
605 }
606
607 const struct brw_tracked_state brw_wm_pull_constants = {
608 .dirty = {
609 .mesa = _NEW_PROGRAM_CONSTANTS,
610 .brw = BRW_NEW_BATCH |
611 BRW_NEW_BLORP |
612 BRW_NEW_FRAGMENT_PROGRAM |
613 BRW_NEW_FS_PROG_DATA,
614 },
615 .emit = brw_upload_wm_pull_constants,
616 };
617
618 /**
619 * Creates a null renderbuffer surface.
620 *
621 * This is used when the shader doesn't write to any color output. An FB
622 * write to target 0 will still be emitted, because that's how the thread is
623 * terminated (and computed depth is returned), so we need to have the
624 * hardware discard the target 0 color output..
625 */
626 static void
627 brw_emit_null_surface_state(struct brw_context *brw,
628 unsigned width,
629 unsigned height,
630 unsigned samples,
631 uint32_t *out_offset)
632 {
633 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
634 * Notes):
635 *
636 * A null surface will be used in instances where an actual surface is
637 * not bound. When a write message is generated to a null surface, no
638 * actual surface is written to. When a read message (including any
639 * sampling engine message) is generated to a null surface, the result
640 * is all zeros. Note that a null surface type is allowed to be used
641 * with all messages, even if it is not specificially indicated as
642 * supported. All of the remaining fields in surface state are ignored
643 * for null surfaces, with the following exceptions:
644 *
645 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
646 * depth buffer’s corresponding state for all render target surfaces,
647 * including null.
648 *
649 * - Surface Format must be R8G8B8A8_UNORM.
650 */
651 unsigned surface_type = BRW_SURFACE_NULL;
652 drm_intel_bo *bo = NULL;
653 unsigned pitch_minus_1 = 0;
654 uint32_t multisampling_state = 0;
655 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
656 out_offset);
657
658 if (samples > 1) {
659 /* On Gen6, null render targets seem to cause GPU hangs when
660 * multisampling. So work around this problem by rendering into dummy
661 * color buffer.
662 *
663 * To decrease the amount of memory needed by the workaround buffer, we
664 * set its pitch to 128 bytes (the width of a Y tile). This means that
665 * the amount of memory needed for the workaround buffer is
666 * (width_in_tiles + height_in_tiles - 1) tiles.
667 *
668 * Note that since the workaround buffer will be interpreted by the
669 * hardware as an interleaved multisampled buffer, we need to compute
670 * width_in_tiles and height_in_tiles by dividing the width and height
671 * by 16 rather than the normal Y-tile size of 32.
672 */
673 unsigned width_in_tiles = ALIGN(width, 16) / 16;
674 unsigned height_in_tiles = ALIGN(height, 16) / 16;
675 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
676 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
677 size_needed);
678 bo = brw->wm.multisampled_null_render_target_bo;
679 surface_type = BRW_SURFACE_2D;
680 pitch_minus_1 = 127;
681 multisampling_state = brw_get_surface_num_multisamples(samples);
682 }
683
684 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
685 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
686 if (brw->gen < 6) {
687 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
688 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
689 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
690 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
691 }
692 surf[1] = bo ? bo->offset64 : 0;
693 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
694 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
695
696 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
697 * Notes):
698 *
699 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
700 */
701 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
702 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
703 surf[4] = multisampling_state;
704 surf[5] = 0;
705
706 if (bo) {
707 drm_intel_bo_emit_reloc(brw->batch.bo,
708 *out_offset + 4,
709 bo, 0,
710 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
711 }
712 }
713
714 /**
715 * Sets up a surface state structure to point at the given region.
716 * While it is only used for the front/back buffer currently, it should be
717 * usable for further buffers when doing ARB_draw_buffer support.
718 */
719 static uint32_t
720 brw_update_renderbuffer_surface(struct brw_context *brw,
721 struct gl_renderbuffer *rb,
722 bool layered, unsigned unit,
723 uint32_t surf_index)
724 {
725 struct gl_context *ctx = &brw->ctx;
726 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
727 struct intel_mipmap_tree *mt = irb->mt;
728 uint32_t *surf;
729 uint32_t tile_x, tile_y;
730 uint32_t format = 0;
731 uint32_t offset;
732 /* _NEW_BUFFERS */
733 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
734 /* BRW_NEW_FS_PROG_DATA */
735
736 assert(!layered);
737
738 if (rb->TexImage && !brw->has_surface_tile_offset) {
739 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
740
741 if (tile_x != 0 || tile_y != 0) {
742 /* Original gen4 hardware couldn't draw to a non-tile-aligned
743 * destination in a miptree unless you actually setup your renderbuffer
744 * as a miptree and used the fragile lod/array_index/etc. controls to
745 * select the image. So, instead, we just make a new single-level
746 * miptree and render into that.
747 */
748 intel_renderbuffer_move_to_temp(brw, irb, false);
749 mt = irb->mt;
750 }
751 }
752
753 intel_miptree_used_for_rendering(irb->mt);
754
755 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
756
757 format = brw->render_target_format[rb_format];
758 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
759 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
760 __func__, _mesa_get_format_name(rb_format));
761 }
762
763 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
764 format << BRW_SURFACE_FORMAT_SHIFT);
765
766 /* reloc */
767 assert(mt->offset % mt->cpp == 0);
768 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
769 mt->bo->offset64 + mt->offset);
770
771 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
772 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
773
774 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
775 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
776
777 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
778
779 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
780 /* Note that the low bits of these fields are missing, so
781 * there's the possibility of getting in trouble.
782 */
783 assert(tile_x % 4 == 0);
784 assert(tile_y % 2 == 0);
785 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
786 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
787 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
788
789 if (brw->gen < 6) {
790 /* _NEW_COLOR */
791 if (!ctx->Color.ColorLogicOpEnabled &&
792 (ctx->Color.BlendEnabled & (1 << unit)))
793 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
794
795 if (!ctx->Color.ColorMask[unit][0])
796 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
797 if (!ctx->Color.ColorMask[unit][1])
798 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
799 if (!ctx->Color.ColorMask[unit][2])
800 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
801
802 /* As mentioned above, disable writes to the alpha component when the
803 * renderbuffer is XRGB.
804 */
805 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
806 !ctx->Color.ColorMask[unit][3]) {
807 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
808 }
809 }
810
811 drm_intel_bo_emit_reloc(brw->batch.bo,
812 offset + 4,
813 mt->bo,
814 surf[1] - mt->bo->offset64,
815 I915_GEM_DOMAIN_RENDER,
816 I915_GEM_DOMAIN_RENDER);
817
818 return offset;
819 }
820
821 /**
822 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
823 */
824 void
825 brw_update_renderbuffer_surfaces(struct brw_context *brw,
826 const struct gl_framebuffer *fb,
827 uint32_t render_target_start,
828 uint32_t *surf_offset)
829 {
830 GLuint i;
831 const unsigned int w = _mesa_geometric_width(fb);
832 const unsigned int h = _mesa_geometric_height(fb);
833 const unsigned int s = _mesa_geometric_samples(fb);
834
835 /* Update surfaces for drawing buffers */
836 if (fb->_NumColorDrawBuffers >= 1) {
837 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
838 const uint32_t surf_index = render_target_start + i;
839
840 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
841 surf_offset[surf_index] =
842 brw->vtbl.update_renderbuffer_surface(
843 brw, fb->_ColorDrawBuffers[i],
844 _mesa_geometric_layers(fb) > 0, i, surf_index);
845 } else {
846 brw->vtbl.emit_null_surface_state(brw, w, h, s,
847 &surf_offset[surf_index]);
848 }
849 }
850 } else {
851 const uint32_t surf_index = render_target_start;
852 brw->vtbl.emit_null_surface_state(brw, w, h, s,
853 &surf_offset[surf_index]);
854 }
855 }
856
857 static void
858 update_renderbuffer_surfaces(struct brw_context *brw)
859 {
860 const struct gl_context *ctx = &brw->ctx;
861
862 /* _NEW_BUFFERS | _NEW_COLOR */
863 const struct gl_framebuffer *fb = ctx->DrawBuffer;
864 brw_update_renderbuffer_surfaces(
865 brw, fb,
866 brw->wm.prog_data->binding_table.render_target_start,
867 brw->wm.base.surf_offset);
868 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
869 }
870
871 const struct brw_tracked_state brw_renderbuffer_surfaces = {
872 .dirty = {
873 .mesa = _NEW_BUFFERS |
874 _NEW_COLOR,
875 .brw = BRW_NEW_BATCH |
876 BRW_NEW_BLORP |
877 BRW_NEW_FS_PROG_DATA,
878 },
879 .emit = update_renderbuffer_surfaces,
880 };
881
882 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
883 .dirty = {
884 .mesa = _NEW_BUFFERS,
885 .brw = BRW_NEW_BATCH |
886 BRW_NEW_BLORP,
887 },
888 .emit = update_renderbuffer_surfaces,
889 };
890
891
892 static void
893 update_stage_texture_surfaces(struct brw_context *brw,
894 const struct gl_program *prog,
895 struct brw_stage_state *stage_state,
896 bool for_gather, uint32_t plane)
897 {
898 if (!prog)
899 return;
900
901 struct gl_context *ctx = &brw->ctx;
902
903 uint32_t *surf_offset = stage_state->surf_offset;
904
905 /* BRW_NEW_*_PROG_DATA */
906 if (for_gather)
907 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
908 else
909 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
910
911 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
912 for (unsigned s = 0; s < num_samplers; s++) {
913 surf_offset[s] = 0;
914
915 if (prog->SamplersUsed & (1 << s)) {
916 const unsigned unit = prog->SamplerUnits[s];
917
918 /* _NEW_TEXTURE */
919 if (ctx->Texture.Unit[unit]._Current) {
920 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
921 }
922 }
923 }
924 }
925
926
927 /**
928 * Construct SURFACE_STATE objects for enabled textures.
929 */
930 static void
931 brw_update_texture_surfaces(struct brw_context *brw)
932 {
933 /* BRW_NEW_VERTEX_PROGRAM */
934 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
935
936 /* BRW_NEW_TESS_PROGRAMS */
937 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
938 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
939
940 /* BRW_NEW_GEOMETRY_PROGRAM */
941 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
942
943 /* BRW_NEW_FRAGMENT_PROGRAM */
944 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
945
946 /* _NEW_TEXTURE */
947 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
948 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
949 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
950 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
951 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
952
953 /* emit alternate set of surface state for gather. this
954 * allows the surface format to be overriden for only the
955 * gather4 messages. */
956 if (brw->gen < 8) {
957 if (vs && vs->UsesGather)
958 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
959 if (tcs && tcs->UsesGather)
960 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
961 if (tes && tes->UsesGather)
962 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
963 if (gs && gs->UsesGather)
964 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
965 if (fs && fs->UsesGather)
966 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
967 }
968
969 if (fs) {
970 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
971 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
972 }
973
974 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
975 }
976
977 const struct brw_tracked_state brw_texture_surfaces = {
978 .dirty = {
979 .mesa = _NEW_TEXTURE,
980 .brw = BRW_NEW_BATCH |
981 BRW_NEW_BLORP |
982 BRW_NEW_FRAGMENT_PROGRAM |
983 BRW_NEW_FS_PROG_DATA |
984 BRW_NEW_GEOMETRY_PROGRAM |
985 BRW_NEW_GS_PROG_DATA |
986 BRW_NEW_TESS_PROGRAMS |
987 BRW_NEW_TCS_PROG_DATA |
988 BRW_NEW_TES_PROG_DATA |
989 BRW_NEW_TEXTURE_BUFFER |
990 BRW_NEW_VERTEX_PROGRAM |
991 BRW_NEW_VS_PROG_DATA,
992 },
993 .emit = brw_update_texture_surfaces,
994 };
995
996 static void
997 brw_update_cs_texture_surfaces(struct brw_context *brw)
998 {
999 /* BRW_NEW_COMPUTE_PROGRAM */
1000 struct gl_program *cs = (struct gl_program *) brw->compute_program;
1001
1002 /* _NEW_TEXTURE */
1003 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
1004
1005 /* emit alternate set of surface state for gather. this
1006 * allows the surface format to be overriden for only the
1007 * gather4 messages.
1008 */
1009 if (brw->gen < 8) {
1010 if (cs && cs->UsesGather)
1011 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
1012 }
1013
1014 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1015 }
1016
1017 const struct brw_tracked_state brw_cs_texture_surfaces = {
1018 .dirty = {
1019 .mesa = _NEW_TEXTURE,
1020 .brw = BRW_NEW_BATCH |
1021 BRW_NEW_BLORP |
1022 BRW_NEW_COMPUTE_PROGRAM,
1023 },
1024 .emit = brw_update_cs_texture_surfaces,
1025 };
1026
1027
1028 void
1029 brw_upload_ubo_surfaces(struct brw_context *brw,
1030 struct gl_linked_shader *shader,
1031 struct brw_stage_state *stage_state,
1032 struct brw_stage_prog_data *prog_data)
1033 {
1034 struct gl_context *ctx = &brw->ctx;
1035
1036 if (!shader)
1037 return;
1038
1039 uint32_t *ubo_surf_offsets =
1040 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
1041
1042 for (int i = 0; i < shader->NumUniformBlocks; i++) {
1043 struct gl_uniform_buffer_binding *binding =
1044 &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
1045
1046 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1047 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
1048 } else {
1049 struct intel_buffer_object *intel_bo =
1050 intel_buffer_object(binding->BufferObject);
1051 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1052 if (!binding->AutomaticSize)
1053 size = MIN2(size, binding->Size);
1054 drm_intel_bo *bo =
1055 intel_bufferobj_buffer(brw, intel_bo,
1056 binding->Offset,
1057 size);
1058 brw_create_constant_surface(brw, bo, binding->Offset,
1059 size,
1060 &ubo_surf_offsets[i]);
1061 }
1062 }
1063
1064 uint32_t *ssbo_surf_offsets =
1065 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1066
1067 for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1068 struct gl_shader_storage_buffer_binding *binding =
1069 &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1070
1071 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1072 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1073 } else {
1074 struct intel_buffer_object *intel_bo =
1075 intel_buffer_object(binding->BufferObject);
1076 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1077 if (!binding->AutomaticSize)
1078 size = MIN2(size, binding->Size);
1079 drm_intel_bo *bo =
1080 intel_bufferobj_buffer(brw, intel_bo,
1081 binding->Offset,
1082 size);
1083 brw_create_buffer_surface(brw, bo, binding->Offset,
1084 size,
1085 &ssbo_surf_offsets[i]);
1086 }
1087 }
1088
1089 if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1090 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1091 }
1092
1093 static void
1094 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1095 {
1096 struct gl_context *ctx = &brw->ctx;
1097 /* _NEW_PROGRAM */
1098 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1099
1100 if (!prog)
1101 return;
1102
1103 /* BRW_NEW_FS_PROG_DATA */
1104 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1105 &brw->wm.base, &brw->wm.prog_data->base);
1106 }
1107
1108 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1109 .dirty = {
1110 .mesa = _NEW_PROGRAM,
1111 .brw = BRW_NEW_BATCH |
1112 BRW_NEW_BLORP |
1113 BRW_NEW_FS_PROG_DATA |
1114 BRW_NEW_UNIFORM_BUFFER,
1115 },
1116 .emit = brw_upload_wm_ubo_surfaces,
1117 };
1118
1119 static void
1120 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1121 {
1122 struct gl_context *ctx = &brw->ctx;
1123 /* _NEW_PROGRAM */
1124 struct gl_shader_program *prog =
1125 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1126
1127 if (!prog)
1128 return;
1129
1130 /* BRW_NEW_CS_PROG_DATA */
1131 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1132 &brw->cs.base, &brw->cs.prog_data->base);
1133 }
1134
1135 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1136 .dirty = {
1137 .mesa = _NEW_PROGRAM,
1138 .brw = BRW_NEW_BATCH |
1139 BRW_NEW_BLORP |
1140 BRW_NEW_CS_PROG_DATA |
1141 BRW_NEW_UNIFORM_BUFFER,
1142 },
1143 .emit = brw_upload_cs_ubo_surfaces,
1144 };
1145
1146 void
1147 brw_upload_abo_surfaces(struct brw_context *brw,
1148 struct gl_linked_shader *shader,
1149 struct brw_stage_state *stage_state,
1150 struct brw_stage_prog_data *prog_data)
1151 {
1152 struct gl_context *ctx = &brw->ctx;
1153 uint32_t *surf_offsets =
1154 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1155
1156 if (shader && shader->NumAtomicBuffers) {
1157 for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1158 struct gl_atomic_buffer_binding *binding =
1159 &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1160 struct intel_buffer_object *intel_bo =
1161 intel_buffer_object(binding->BufferObject);
1162 drm_intel_bo *bo = intel_bufferobj_buffer(
1163 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1164
1165 brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1166 binding->Offset, BRW_SURFACEFORMAT_RAW,
1167 bo->size - binding->Offset, 1, true);
1168 }
1169
1170 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1171 }
1172 }
1173
1174 static void
1175 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1176 {
1177 struct gl_context *ctx = &brw->ctx;
1178 /* _NEW_PROGRAM */
1179 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1180
1181 if (prog) {
1182 /* BRW_NEW_FS_PROG_DATA */
1183 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1184 &brw->wm.base, &brw->wm.prog_data->base);
1185 }
1186 }
1187
1188 const struct brw_tracked_state brw_wm_abo_surfaces = {
1189 .dirty = {
1190 .mesa = _NEW_PROGRAM,
1191 .brw = BRW_NEW_ATOMIC_BUFFER |
1192 BRW_NEW_BLORP |
1193 BRW_NEW_BATCH |
1194 BRW_NEW_FS_PROG_DATA,
1195 },
1196 .emit = brw_upload_wm_abo_surfaces,
1197 };
1198
1199 static void
1200 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1201 {
1202 struct gl_context *ctx = &brw->ctx;
1203 /* _NEW_PROGRAM */
1204 struct gl_shader_program *prog =
1205 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1206
1207 if (prog) {
1208 /* BRW_NEW_CS_PROG_DATA */
1209 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1210 &brw->cs.base, &brw->cs.prog_data->base);
1211 }
1212 }
1213
1214 const struct brw_tracked_state brw_cs_abo_surfaces = {
1215 .dirty = {
1216 .mesa = _NEW_PROGRAM,
1217 .brw = BRW_NEW_ATOMIC_BUFFER |
1218 BRW_NEW_BLORP |
1219 BRW_NEW_BATCH |
1220 BRW_NEW_CS_PROG_DATA,
1221 },
1222 .emit = brw_upload_cs_abo_surfaces,
1223 };
1224
1225 static void
1226 brw_upload_cs_image_surfaces(struct brw_context *brw)
1227 {
1228 struct gl_context *ctx = &brw->ctx;
1229 /* _NEW_PROGRAM */
1230 struct gl_shader_program *prog =
1231 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1232
1233 if (prog) {
1234 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1235 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1236 &brw->cs.base, &brw->cs.prog_data->base);
1237 }
1238 }
1239
1240 const struct brw_tracked_state brw_cs_image_surfaces = {
1241 .dirty = {
1242 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1243 .brw = BRW_NEW_BATCH |
1244 BRW_NEW_BLORP |
1245 BRW_NEW_CS_PROG_DATA |
1246 BRW_NEW_IMAGE_UNITS
1247 },
1248 .emit = brw_upload_cs_image_surfaces,
1249 };
1250
1251 static uint32_t
1252 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1253 {
1254 const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1255 uint32_t hw_format = brw_format_for_mesa_format(format);
1256 if (access == GL_WRITE_ONLY) {
1257 return hw_format;
1258 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1259 /* Typed surface reads support a very limited subset of the shader
1260 * image formats. Translate it into the closest format the
1261 * hardware supports.
1262 */
1263 return isl_lower_storage_image_format(devinfo, hw_format);
1264 } else {
1265 /* The hardware doesn't actually support a typed format that we can use
1266 * so we have to fall back to untyped read/write messages.
1267 */
1268 return BRW_SURFACEFORMAT_RAW;
1269 }
1270 }
1271
1272 static void
1273 update_default_image_param(struct brw_context *brw,
1274 struct gl_image_unit *u,
1275 unsigned surface_idx,
1276 struct brw_image_param *param)
1277 {
1278 memset(param, 0, sizeof(*param));
1279 param->surface_idx = surface_idx;
1280 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1281 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1282 * detailed explanation of these parameters.
1283 */
1284 param->swizzling[0] = 0xff;
1285 param->swizzling[1] = 0xff;
1286 }
1287
1288 static void
1289 update_buffer_image_param(struct brw_context *brw,
1290 struct gl_image_unit *u,
1291 unsigned surface_idx,
1292 struct brw_image_param *param)
1293 {
1294 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1295
1296 update_default_image_param(brw, u, surface_idx, param);
1297
1298 param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1299 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1300 }
1301
1302 static void
1303 update_texture_image_param(struct brw_context *brw,
1304 struct gl_image_unit *u,
1305 unsigned surface_idx,
1306 struct brw_image_param *param)
1307 {
1308 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1309
1310 update_default_image_param(brw, u, surface_idx, param);
1311
1312 param->size[0] = minify(mt->logical_width0, u->Level);
1313 param->size[1] = minify(mt->logical_height0, u->Level);
1314 param->size[2] = (!u->Layered ? 1 :
1315 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1316 u->TexObj->Target == GL_TEXTURE_3D ?
1317 minify(mt->logical_depth0, u->Level) :
1318 mt->logical_depth0);
1319
1320 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1321 &param->offset[0],
1322 &param->offset[1]);
1323
1324 param->stride[0] = mt->cpp;
1325 param->stride[1] = mt->pitch / mt->cpp;
1326 param->stride[2] =
1327 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1328 param->stride[3] =
1329 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1330
1331 if (mt->tiling == I915_TILING_X) {
1332 /* An X tile is a rectangular block of 512x8 bytes. */
1333 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1334 param->tiling[1] = _mesa_logbase2(8);
1335
1336 if (brw->has_swizzling) {
1337 /* Right shifts required to swizzle bits 9 and 10 of the memory
1338 * address with bit 6.
1339 */
1340 param->swizzling[0] = 3;
1341 param->swizzling[1] = 4;
1342 }
1343 } else if (mt->tiling == I915_TILING_Y) {
1344 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1345 * different to the layout of an X-tiled surface, we simply pretend that
1346 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1347 * one arranged in X-major order just like is the case for X-tiling.
1348 */
1349 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1350 param->tiling[1] = _mesa_logbase2(32);
1351
1352 if (brw->has_swizzling) {
1353 /* Right shift required to swizzle bit 9 of the memory address with
1354 * bit 6.
1355 */
1356 param->swizzling[0] = 3;
1357 }
1358 }
1359
1360 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1361 * address calculation algorithm (emit_address_calculation() in
1362 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1363 * modulus equal to the LOD.
1364 */
1365 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1366 0);
1367 }
1368
1369 static void
1370 update_image_surface(struct brw_context *brw,
1371 struct gl_image_unit *u,
1372 GLenum access,
1373 unsigned surface_idx,
1374 uint32_t *surf_offset,
1375 struct brw_image_param *param)
1376 {
1377 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1378 struct gl_texture_object *obj = u->TexObj;
1379 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1380
1381 if (obj->Target == GL_TEXTURE_BUFFER) {
1382 struct intel_buffer_object *intel_obj =
1383 intel_buffer_object(obj->BufferObject);
1384 const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1385 _mesa_get_format_bytes(u->_ActualFormat));
1386
1387 brw->vtbl.emit_buffer_surface_state(
1388 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1389 format, intel_obj->Base.Size / texel_size, texel_size,
1390 access != GL_READ_ONLY);
1391
1392 update_buffer_image_param(brw, u, surface_idx, param);
1393
1394 } else {
1395 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1396 struct intel_mipmap_tree *mt = intel_obj->mt;
1397
1398 if (format == BRW_SURFACEFORMAT_RAW) {
1399 brw->vtbl.emit_buffer_surface_state(
1400 brw, surf_offset, mt->bo, mt->offset,
1401 format, mt->bo->size - mt->offset, 1 /* pitch */,
1402 access != GL_READ_ONLY);
1403
1404 } else {
1405 const unsigned num_layers = (!u->Layered ? 1 :
1406 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1407 mt->logical_depth0);
1408
1409 struct isl_view view = {
1410 .format = format,
1411 .base_level = obj->MinLevel + u->Level,
1412 .levels = 1,
1413 .base_array_layer = obj->MinLayer + u->_Layer,
1414 .array_len = num_layers,
1415 .channel_select = {
1416 ISL_CHANNEL_SELECT_RED,
1417 ISL_CHANNEL_SELECT_GREEN,
1418 ISL_CHANNEL_SELECT_BLUE,
1419 ISL_CHANNEL_SELECT_ALPHA,
1420 },
1421 .usage = ISL_SURF_USAGE_STORAGE_BIT,
1422 };
1423
1424 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1425
1426 brw_emit_surface_state(brw, mt, &view,
1427 surface_state_infos[brw->gen].rb_mocs, false,
1428 surf_offset, surf_index,
1429 I915_GEM_DOMAIN_SAMPLER,
1430 access == GL_READ_ONLY ? 0 :
1431 I915_GEM_DOMAIN_SAMPLER);
1432 }
1433
1434 update_texture_image_param(brw, u, surface_idx, param);
1435 }
1436
1437 } else {
1438 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1439 update_default_image_param(brw, u, surface_idx, param);
1440 }
1441 }
1442
1443 void
1444 brw_upload_image_surfaces(struct brw_context *brw,
1445 struct gl_linked_shader *shader,
1446 struct brw_stage_state *stage_state,
1447 struct brw_stage_prog_data *prog_data)
1448 {
1449 struct gl_context *ctx = &brw->ctx;
1450
1451 if (shader && shader->NumImages) {
1452 for (unsigned i = 0; i < shader->NumImages; i++) {
1453 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1454 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1455
1456 update_image_surface(brw, u, shader->ImageAccess[i],
1457 surf_idx,
1458 &stage_state->surf_offset[surf_idx],
1459 &prog_data->image_param[i]);
1460 }
1461
1462 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1463 /* This may have changed the image metadata dependent on the context
1464 * image unit state and passed to the program as uniforms, make sure
1465 * that push and pull constants are reuploaded.
1466 */
1467 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1468 }
1469 }
1470
1471 static void
1472 brw_upload_wm_image_surfaces(struct brw_context *brw)
1473 {
1474 struct gl_context *ctx = &brw->ctx;
1475 /* BRW_NEW_FRAGMENT_PROGRAM */
1476 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1477
1478 if (prog) {
1479 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1480 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1481 &brw->wm.base, &brw->wm.prog_data->base);
1482 }
1483 }
1484
1485 const struct brw_tracked_state brw_wm_image_surfaces = {
1486 .dirty = {
1487 .mesa = _NEW_TEXTURE,
1488 .brw = BRW_NEW_BATCH |
1489 BRW_NEW_BLORP |
1490 BRW_NEW_FRAGMENT_PROGRAM |
1491 BRW_NEW_FS_PROG_DATA |
1492 BRW_NEW_IMAGE_UNITS
1493 },
1494 .emit = brw_upload_wm_image_surfaces,
1495 };
1496
1497 void
1498 gen4_init_vtable_surface_functions(struct brw_context *brw)
1499 {
1500 brw->vtbl.update_texture_surface = brw_update_texture_surface;
1501 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1502 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1503 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1504 }
1505
1506 static void
1507 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1508 {
1509 struct gl_context *ctx = &brw->ctx;
1510 /* _NEW_PROGRAM */
1511 struct gl_shader_program *prog =
1512 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1513
1514 if (prog && brw->cs.prog_data->uses_num_work_groups) {
1515 const unsigned surf_idx =
1516 brw->cs.prog_data->binding_table.work_groups_start;
1517 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1518 drm_intel_bo *bo;
1519 uint32_t bo_offset;
1520
1521 if (brw->compute.num_work_groups_bo == NULL) {
1522 bo = NULL;
1523 intel_upload_data(brw,
1524 (void *)brw->compute.num_work_groups,
1525 3 * sizeof(GLuint),
1526 sizeof(GLuint),
1527 &bo,
1528 &bo_offset);
1529 } else {
1530 bo = brw->compute.num_work_groups_bo;
1531 bo_offset = brw->compute.num_work_groups_offset;
1532 }
1533
1534 brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1535 bo, bo_offset,
1536 BRW_SURFACEFORMAT_RAW,
1537 3 * sizeof(GLuint), 1, true);
1538 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1539 }
1540 }
1541
1542 const struct brw_tracked_state brw_cs_work_groups_surface = {
1543 .dirty = {
1544 .brw = BRW_NEW_BLORP |
1545 BRW_NEW_CS_WORK_GROUPS
1546 },
1547 .emit = brw_upload_cs_work_groups_surface,
1548 };