i965: Emit surface states for extra planes prior to gen8
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "main/shaderimage.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_instruction.h"
40 #include "main/framebuffer.h"
41
42 #include "isl/isl.h"
43
44 #include "intel_mipmap_tree.h"
45 #include "intel_batchbuffer.h"
46 #include "intel_tex.h"
47 #include "intel_fbo.h"
48 #include "intel_buffer_objects.h"
49
50 #include "brw_context.h"
51 #include "brw_state.h"
52 #include "brw_defines.h"
53 #include "brw_wm.h"
54
55 GLuint
56 translate_tex_target(GLenum target)
57 {
58 switch (target) {
59 case GL_TEXTURE_1D:
60 case GL_TEXTURE_1D_ARRAY_EXT:
61 return BRW_SURFACE_1D;
62
63 case GL_TEXTURE_RECTANGLE_NV:
64 return BRW_SURFACE_2D;
65
66 case GL_TEXTURE_2D:
67 case GL_TEXTURE_2D_ARRAY_EXT:
68 case GL_TEXTURE_EXTERNAL_OES:
69 case GL_TEXTURE_2D_MULTISAMPLE:
70 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
71 return BRW_SURFACE_2D;
72
73 case GL_TEXTURE_3D:
74 return BRW_SURFACE_3D;
75
76 case GL_TEXTURE_CUBE_MAP:
77 case GL_TEXTURE_CUBE_MAP_ARRAY:
78 return BRW_SURFACE_CUBE;
79
80 default:
81 unreachable("not reached");
82 }
83 }
84
85 uint32_t
86 brw_get_surface_tiling_bits(uint32_t tiling)
87 {
88 switch (tiling) {
89 case I915_TILING_X:
90 return BRW_SURFACE_TILED;
91 case I915_TILING_Y:
92 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
93 default:
94 return 0;
95 }
96 }
97
98
99 uint32_t
100 brw_get_surface_num_multisamples(unsigned num_samples)
101 {
102 if (num_samples > 1)
103 return BRW_SURFACE_MULTISAMPLECOUNT_4;
104 else
105 return BRW_SURFACE_MULTISAMPLECOUNT_1;
106 }
107
108 void
109 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
110 bool is_render_target,
111 unsigned *width, unsigned *height,
112 unsigned *pitch, uint32_t *tiling, unsigned *format)
113 {
114 static const unsigned halign_stencil = 8;
115
116 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
117 * there are half as many rows.
118 * In addition, mip-levels are accessed manually by the program and
119 * therefore the surface is setup to cover all the mip-levels for one slice.
120 * (Hardware is still used to access individual slices).
121 */
122 *tiling = I915_TILING_Y;
123 *pitch = mt->pitch * 2;
124 *width = ALIGN(mt->total_width, halign_stencil) * 2;
125 *height = (mt->total_height / mt->physical_depth0) / 2;
126
127 if (is_render_target) {
128 *format = BRW_SURFACEFORMAT_R8_UINT;
129 }
130 }
131
132
133 /**
134 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
135 * swizzling.
136 */
137 int
138 brw_get_texture_swizzle(const struct gl_context *ctx,
139 const struct gl_texture_object *t)
140 {
141 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
142
143 int swizzles[SWIZZLE_NIL + 1] = {
144 SWIZZLE_X,
145 SWIZZLE_Y,
146 SWIZZLE_Z,
147 SWIZZLE_W,
148 SWIZZLE_ZERO,
149 SWIZZLE_ONE,
150 SWIZZLE_NIL
151 };
152
153 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
154 img->_BaseFormat == GL_DEPTH_STENCIL) {
155 GLenum depth_mode = t->DepthMode;
156
157 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
158 * with depth component data specified with a sized internal format.
159 * Otherwise, it's left at the old default, GL_LUMINANCE.
160 */
161 if (_mesa_is_gles3(ctx) &&
162 img->InternalFormat != GL_DEPTH_COMPONENT &&
163 img->InternalFormat != GL_DEPTH_STENCIL) {
164 depth_mode = GL_RED;
165 }
166
167 switch (depth_mode) {
168 case GL_ALPHA:
169 swizzles[0] = SWIZZLE_ZERO;
170 swizzles[1] = SWIZZLE_ZERO;
171 swizzles[2] = SWIZZLE_ZERO;
172 swizzles[3] = SWIZZLE_X;
173 break;
174 case GL_LUMINANCE:
175 swizzles[0] = SWIZZLE_X;
176 swizzles[1] = SWIZZLE_X;
177 swizzles[2] = SWIZZLE_X;
178 swizzles[3] = SWIZZLE_ONE;
179 break;
180 case GL_INTENSITY:
181 swizzles[0] = SWIZZLE_X;
182 swizzles[1] = SWIZZLE_X;
183 swizzles[2] = SWIZZLE_X;
184 swizzles[3] = SWIZZLE_X;
185 break;
186 case GL_RED:
187 swizzles[0] = SWIZZLE_X;
188 swizzles[1] = SWIZZLE_ZERO;
189 swizzles[2] = SWIZZLE_ZERO;
190 swizzles[3] = SWIZZLE_ONE;
191 break;
192 }
193 }
194
195 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
196
197 /* If the texture's format is alpha-only, force R, G, and B to
198 * 0.0. Similarly, if the texture's format has no alpha channel,
199 * force the alpha value read to 1.0. This allows for the
200 * implementation to use an RGBA texture for any of these formats
201 * without leaking any unexpected values.
202 */
203 switch (img->_BaseFormat) {
204 case GL_ALPHA:
205 swizzles[0] = SWIZZLE_ZERO;
206 swizzles[1] = SWIZZLE_ZERO;
207 swizzles[2] = SWIZZLE_ZERO;
208 break;
209 case GL_LUMINANCE:
210 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
211 swizzles[0] = SWIZZLE_X;
212 swizzles[1] = SWIZZLE_X;
213 swizzles[2] = SWIZZLE_X;
214 swizzles[3] = SWIZZLE_ONE;
215 }
216 break;
217 case GL_LUMINANCE_ALPHA:
218 if (datatype == GL_SIGNED_NORMALIZED) {
219 swizzles[0] = SWIZZLE_X;
220 swizzles[1] = SWIZZLE_X;
221 swizzles[2] = SWIZZLE_X;
222 swizzles[3] = SWIZZLE_W;
223 }
224 break;
225 case GL_INTENSITY:
226 if (datatype == GL_SIGNED_NORMALIZED) {
227 swizzles[0] = SWIZZLE_X;
228 swizzles[1] = SWIZZLE_X;
229 swizzles[2] = SWIZZLE_X;
230 swizzles[3] = SWIZZLE_X;
231 }
232 break;
233 case GL_RED:
234 case GL_RG:
235 case GL_RGB:
236 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
237 swizzles[3] = SWIZZLE_ONE;
238 break;
239 }
240
241 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
242 swizzles[GET_SWZ(t->_Swizzle, 1)],
243 swizzles[GET_SWZ(t->_Swizzle, 2)],
244 swizzles[GET_SWZ(t->_Swizzle, 3)]);
245 }
246
247 static void
248 gen4_emit_buffer_surface_state(struct brw_context *brw,
249 uint32_t *out_offset,
250 drm_intel_bo *bo,
251 unsigned buffer_offset,
252 unsigned surface_format,
253 unsigned buffer_size,
254 unsigned pitch,
255 bool rw)
256 {
257 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
258 6 * 4, 32, out_offset);
259 memset(surf, 0, 6 * 4);
260
261 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
262 surface_format << BRW_SURFACE_FORMAT_SHIFT |
263 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
264 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
265 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
266 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
267 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
268 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
269
270 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
271 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
272 * physical cache. It is mapped in hardware to the sampler cache."
273 */
274 if (bo) {
275 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
276 bo, buffer_offset,
277 I915_GEM_DOMAIN_SAMPLER,
278 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
279 }
280 }
281
282 void
283 brw_update_buffer_texture_surface(struct gl_context *ctx,
284 unsigned unit,
285 uint32_t *surf_offset)
286 {
287 struct brw_context *brw = brw_context(ctx);
288 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
289 struct intel_buffer_object *intel_obj =
290 intel_buffer_object(tObj->BufferObject);
291 uint32_t size = tObj->BufferSize;
292 drm_intel_bo *bo = NULL;
293 mesa_format format = tObj->_BufferObjectFormat;
294 uint32_t brw_format = brw_format_for_mesa_format(format);
295 int texel_size = _mesa_get_format_bytes(format);
296
297 if (intel_obj) {
298 size = MIN2(size, intel_obj->Base.Size);
299 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
300 }
301
302 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
303 _mesa_problem(NULL, "bad format %s for texture buffer\n",
304 _mesa_get_format_name(format));
305 }
306
307 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
308 tObj->BufferOffset,
309 brw_format,
310 size / texel_size,
311 texel_size,
312 false /* rw */);
313 }
314
315 static void
316 brw_update_texture_surface(struct gl_context *ctx,
317 unsigned unit,
318 uint32_t *surf_offset,
319 bool for_gather,
320 uint32_t plane)
321 {
322 struct brw_context *brw = brw_context(ctx);
323 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
324 struct intel_texture_object *intelObj = intel_texture_object(tObj);
325 struct intel_mipmap_tree *mt = intelObj->mt;
326 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
327 uint32_t *surf;
328
329 /* BRW_NEW_TEXTURE_BUFFER */
330 if (tObj->Target == GL_TEXTURE_BUFFER) {
331 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
332 return;
333 }
334
335 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
336 6 * 4, 32, surf_offset);
337
338 uint32_t tex_format = translate_tex_format(brw, mt->format,
339 sampler->sRGBDecode);
340
341 if (tObj->Target == GL_TEXTURE_EXTERNAL_OES) {
342 if (plane > 0)
343 mt = mt->plane[plane - 1];
344 if (mt == NULL)
345 return;
346
347 tex_format = translate_tex_format(brw, mt->format, sampler->sRGBDecode);
348 }
349
350 if (for_gather) {
351 /* Sandybridge's gather4 message is broken for integer formats.
352 * To work around this, we pretend the surface is UNORM for
353 * 8 or 16-bit formats, and emit shader instructions to recover
354 * the real INT/UINT value. For 32-bit formats, we pretend
355 * the surface is FLOAT, and simply reinterpret the resulting
356 * bits.
357 */
358 switch (tex_format) {
359 case BRW_SURFACEFORMAT_R8_SINT:
360 case BRW_SURFACEFORMAT_R8_UINT:
361 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
362 break;
363
364 case BRW_SURFACEFORMAT_R16_SINT:
365 case BRW_SURFACEFORMAT_R16_UINT:
366 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
367 break;
368
369 case BRW_SURFACEFORMAT_R32_SINT:
370 case BRW_SURFACEFORMAT_R32_UINT:
371 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
372 break;
373
374 default:
375 break;
376 }
377 }
378
379 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
380 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
381 BRW_SURFACE_CUBEFACE_ENABLES |
382 tex_format << BRW_SURFACE_FORMAT_SHIFT);
383
384 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
385
386 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
387 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
388 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
389
390 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
391 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
392 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
393
394 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
395 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
396
397 surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
398
399 /* Emit relocation to surface contents */
400 drm_intel_bo_emit_reloc(brw->batch.bo,
401 *surf_offset + 4,
402 mt->bo,
403 surf[1] - mt->bo->offset64,
404 I915_GEM_DOMAIN_SAMPLER, 0);
405 }
406
407 /**
408 * Create the constant buffer surface. Vertex/fragment shader constants will be
409 * read from this buffer with Data Port Read instructions/messages.
410 */
411 void
412 brw_create_constant_surface(struct brw_context *brw,
413 drm_intel_bo *bo,
414 uint32_t offset,
415 uint32_t size,
416 uint32_t *out_offset)
417 {
418 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
419 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
420 size, 1, false);
421 }
422
423 /**
424 * Create the buffer surface. Shader buffer variables will be
425 * read from / write to this buffer with Data Port Read/Write
426 * instructions/messages.
427 */
428 void
429 brw_create_buffer_surface(struct brw_context *brw,
430 drm_intel_bo *bo,
431 uint32_t offset,
432 uint32_t size,
433 uint32_t *out_offset)
434 {
435 /* Use a raw surface so we can reuse existing untyped read/write/atomic
436 * messages. We need these specifically for the fragment shader since they
437 * include a pixel mask header that we need to ensure correct behavior
438 * with helper invocations, which cannot write to the buffer.
439 */
440 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
441 BRW_SURFACEFORMAT_RAW,
442 size, 1, true);
443 }
444
445 /**
446 * Set up a binding table entry for use by stream output logic (transform
447 * feedback).
448 *
449 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
450 */
451 void
452 brw_update_sol_surface(struct brw_context *brw,
453 struct gl_buffer_object *buffer_obj,
454 uint32_t *out_offset, unsigned num_vector_components,
455 unsigned stride_dwords, unsigned offset_dwords)
456 {
457 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
458 uint32_t offset_bytes = 4 * offset_dwords;
459 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
460 offset_bytes,
461 buffer_obj->Size - offset_bytes);
462 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
463 out_offset);
464 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
465 size_t size_dwords = buffer_obj->Size / 4;
466 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
467
468 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
469 * too big to map using a single binding table entry?
470 */
471 assert((size_dwords - offset_dwords) / stride_dwords
472 <= BRW_MAX_NUM_BUFFER_ENTRIES);
473
474 if (size_dwords > offset_dwords + num_vector_components) {
475 /* There is room for at least 1 transform feedback output in the buffer.
476 * Compute the number of additional transform feedback outputs the
477 * buffer has room for.
478 */
479 buffer_size_minus_1 =
480 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
481 } else {
482 /* There isn't even room for a single transform feedback output in the
483 * buffer. We can't configure the binding table entry to prevent output
484 * entirely; we'll have to rely on the geometry shader to detect
485 * overflow. But to minimize the damage in case of a bug, set up the
486 * binding table entry to just allow a single output.
487 */
488 buffer_size_minus_1 = 0;
489 }
490 width = buffer_size_minus_1 & 0x7f;
491 height = (buffer_size_minus_1 & 0xfff80) >> 7;
492 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
493
494 switch (num_vector_components) {
495 case 1:
496 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
497 break;
498 case 2:
499 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
500 break;
501 case 3:
502 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
503 break;
504 case 4:
505 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
506 break;
507 default:
508 unreachable("Invalid vector size for transform feedback output");
509 }
510
511 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
512 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
513 surface_format << BRW_SURFACE_FORMAT_SHIFT |
514 BRW_SURFACE_RC_READ_WRITE;
515 surf[1] = bo->offset64 + offset_bytes; /* reloc */
516 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
517 height << BRW_SURFACE_HEIGHT_SHIFT);
518 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
519 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
520 surf[4] = 0;
521 surf[5] = 0;
522
523 /* Emit relocation to surface contents. */
524 drm_intel_bo_emit_reloc(brw->batch.bo,
525 *out_offset + 4,
526 bo, offset_bytes,
527 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
528 }
529
530 /* Creates a new WM constant buffer reflecting the current fragment program's
531 * constants, if needed by the fragment program.
532 *
533 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
534 * state atom.
535 */
536 static void
537 brw_upload_wm_pull_constants(struct brw_context *brw)
538 {
539 struct brw_stage_state *stage_state = &brw->wm.base;
540 /* BRW_NEW_FRAGMENT_PROGRAM */
541 struct brw_fragment_program *fp =
542 (struct brw_fragment_program *) brw->fragment_program;
543 /* BRW_NEW_FS_PROG_DATA */
544 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
545
546 /* _NEW_PROGRAM_CONSTANTS */
547 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
548 stage_state, prog_data);
549 }
550
551 const struct brw_tracked_state brw_wm_pull_constants = {
552 .dirty = {
553 .mesa = _NEW_PROGRAM_CONSTANTS,
554 .brw = BRW_NEW_BATCH |
555 BRW_NEW_BLORP |
556 BRW_NEW_FRAGMENT_PROGRAM |
557 BRW_NEW_FS_PROG_DATA,
558 },
559 .emit = brw_upload_wm_pull_constants,
560 };
561
562 /**
563 * Creates a null renderbuffer surface.
564 *
565 * This is used when the shader doesn't write to any color output. An FB
566 * write to target 0 will still be emitted, because that's how the thread is
567 * terminated (and computed depth is returned), so we need to have the
568 * hardware discard the target 0 color output..
569 */
570 static void
571 brw_emit_null_surface_state(struct brw_context *brw,
572 unsigned width,
573 unsigned height,
574 unsigned samples,
575 uint32_t *out_offset)
576 {
577 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
578 * Notes):
579 *
580 * A null surface will be used in instances where an actual surface is
581 * not bound. When a write message is generated to a null surface, no
582 * actual surface is written to. When a read message (including any
583 * sampling engine message) is generated to a null surface, the result
584 * is all zeros. Note that a null surface type is allowed to be used
585 * with all messages, even if it is not specificially indicated as
586 * supported. All of the remaining fields in surface state are ignored
587 * for null surfaces, with the following exceptions:
588 *
589 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
590 * depth buffer’s corresponding state for all render target surfaces,
591 * including null.
592 *
593 * - Surface Format must be R8G8B8A8_UNORM.
594 */
595 unsigned surface_type = BRW_SURFACE_NULL;
596 drm_intel_bo *bo = NULL;
597 unsigned pitch_minus_1 = 0;
598 uint32_t multisampling_state = 0;
599 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
600 out_offset);
601
602 if (samples > 1) {
603 /* On Gen6, null render targets seem to cause GPU hangs when
604 * multisampling. So work around this problem by rendering into dummy
605 * color buffer.
606 *
607 * To decrease the amount of memory needed by the workaround buffer, we
608 * set its pitch to 128 bytes (the width of a Y tile). This means that
609 * the amount of memory needed for the workaround buffer is
610 * (width_in_tiles + height_in_tiles - 1) tiles.
611 *
612 * Note that since the workaround buffer will be interpreted by the
613 * hardware as an interleaved multisampled buffer, we need to compute
614 * width_in_tiles and height_in_tiles by dividing the width and height
615 * by 16 rather than the normal Y-tile size of 32.
616 */
617 unsigned width_in_tiles = ALIGN(width, 16) / 16;
618 unsigned height_in_tiles = ALIGN(height, 16) / 16;
619 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
620 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
621 size_needed);
622 bo = brw->wm.multisampled_null_render_target_bo;
623 surface_type = BRW_SURFACE_2D;
624 pitch_minus_1 = 127;
625 multisampling_state = brw_get_surface_num_multisamples(samples);
626 }
627
628 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
629 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
630 if (brw->gen < 6) {
631 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
632 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
633 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
634 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
635 }
636 surf[1] = bo ? bo->offset64 : 0;
637 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
638 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
639
640 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
641 * Notes):
642 *
643 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
644 */
645 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
646 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
647 surf[4] = multisampling_state;
648 surf[5] = 0;
649
650 if (bo) {
651 drm_intel_bo_emit_reloc(brw->batch.bo,
652 *out_offset + 4,
653 bo, 0,
654 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
655 }
656 }
657
658 /**
659 * Sets up a surface state structure to point at the given region.
660 * While it is only used for the front/back buffer currently, it should be
661 * usable for further buffers when doing ARB_draw_buffer support.
662 */
663 static uint32_t
664 brw_update_renderbuffer_surface(struct brw_context *brw,
665 struct gl_renderbuffer *rb,
666 bool layered, unsigned unit,
667 uint32_t surf_index)
668 {
669 struct gl_context *ctx = &brw->ctx;
670 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
671 struct intel_mipmap_tree *mt = irb->mt;
672 uint32_t *surf;
673 uint32_t tile_x, tile_y;
674 uint32_t format = 0;
675 uint32_t offset;
676 /* _NEW_BUFFERS */
677 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
678 /* BRW_NEW_FS_PROG_DATA */
679
680 assert(!layered);
681
682 if (rb->TexImage && !brw->has_surface_tile_offset) {
683 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
684
685 if (tile_x != 0 || tile_y != 0) {
686 /* Original gen4 hardware couldn't draw to a non-tile-aligned
687 * destination in a miptree unless you actually setup your renderbuffer
688 * as a miptree and used the fragile lod/array_index/etc. controls to
689 * select the image. So, instead, we just make a new single-level
690 * miptree and render into that.
691 */
692 intel_renderbuffer_move_to_temp(brw, irb, false);
693 mt = irb->mt;
694 }
695 }
696
697 intel_miptree_used_for_rendering(irb->mt);
698
699 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
700
701 format = brw->render_target_format[rb_format];
702 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
703 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
704 __func__, _mesa_get_format_name(rb_format));
705 }
706
707 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
708 format << BRW_SURFACE_FORMAT_SHIFT);
709
710 /* reloc */
711 assert(mt->offset % mt->cpp == 0);
712 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
713 mt->bo->offset64 + mt->offset);
714
715 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
716 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
717
718 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
719 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
720
721 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
722
723 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
724 /* Note that the low bits of these fields are missing, so
725 * there's the possibility of getting in trouble.
726 */
727 assert(tile_x % 4 == 0);
728 assert(tile_y % 2 == 0);
729 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
730 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
731 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
732
733 if (brw->gen < 6) {
734 /* _NEW_COLOR */
735 if (!ctx->Color.ColorLogicOpEnabled &&
736 (ctx->Color.BlendEnabled & (1 << unit)))
737 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
738
739 if (!ctx->Color.ColorMask[unit][0])
740 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
741 if (!ctx->Color.ColorMask[unit][1])
742 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
743 if (!ctx->Color.ColorMask[unit][2])
744 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
745
746 /* As mentioned above, disable writes to the alpha component when the
747 * renderbuffer is XRGB.
748 */
749 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
750 !ctx->Color.ColorMask[unit][3]) {
751 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
752 }
753 }
754
755 drm_intel_bo_emit_reloc(brw->batch.bo,
756 offset + 4,
757 mt->bo,
758 surf[1] - mt->bo->offset64,
759 I915_GEM_DOMAIN_RENDER,
760 I915_GEM_DOMAIN_RENDER);
761
762 return offset;
763 }
764
765 /**
766 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
767 */
768 void
769 brw_update_renderbuffer_surfaces(struct brw_context *brw,
770 const struct gl_framebuffer *fb,
771 uint32_t render_target_start,
772 uint32_t *surf_offset)
773 {
774 GLuint i;
775 const unsigned int w = _mesa_geometric_width(fb);
776 const unsigned int h = _mesa_geometric_height(fb);
777 const unsigned int s = _mesa_geometric_samples(fb);
778
779 /* Update surfaces for drawing buffers */
780 if (fb->_NumColorDrawBuffers >= 1) {
781 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
782 const uint32_t surf_index = render_target_start + i;
783
784 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
785 surf_offset[surf_index] =
786 brw->vtbl.update_renderbuffer_surface(
787 brw, fb->_ColorDrawBuffers[i],
788 _mesa_geometric_layers(fb) > 0, i, surf_index);
789 } else {
790 brw->vtbl.emit_null_surface_state(brw, w, h, s,
791 &surf_offset[surf_index]);
792 }
793 }
794 } else {
795 const uint32_t surf_index = render_target_start;
796 brw->vtbl.emit_null_surface_state(brw, w, h, s,
797 &surf_offset[surf_index]);
798 }
799 }
800
801 static void
802 update_renderbuffer_surfaces(struct brw_context *brw)
803 {
804 const struct gl_context *ctx = &brw->ctx;
805
806 /* _NEW_BUFFERS | _NEW_COLOR */
807 const struct gl_framebuffer *fb = ctx->DrawBuffer;
808 brw_update_renderbuffer_surfaces(
809 brw, fb,
810 brw->wm.prog_data->binding_table.render_target_start,
811 brw->wm.base.surf_offset);
812 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
813 }
814
815 const struct brw_tracked_state brw_renderbuffer_surfaces = {
816 .dirty = {
817 .mesa = _NEW_BUFFERS |
818 _NEW_COLOR,
819 .brw = BRW_NEW_BATCH |
820 BRW_NEW_BLORP |
821 BRW_NEW_FS_PROG_DATA,
822 },
823 .emit = update_renderbuffer_surfaces,
824 };
825
826 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
827 .dirty = {
828 .mesa = _NEW_BUFFERS,
829 .brw = BRW_NEW_BATCH |
830 BRW_NEW_BLORP,
831 },
832 .emit = update_renderbuffer_surfaces,
833 };
834
835
836 static void
837 update_stage_texture_surfaces(struct brw_context *brw,
838 const struct gl_program *prog,
839 struct brw_stage_state *stage_state,
840 bool for_gather, uint32_t plane)
841 {
842 if (!prog)
843 return;
844
845 struct gl_context *ctx = &brw->ctx;
846
847 uint32_t *surf_offset = stage_state->surf_offset;
848
849 /* BRW_NEW_*_PROG_DATA */
850 if (for_gather)
851 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
852 else
853 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
854
855 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
856 for (unsigned s = 0; s < num_samplers; s++) {
857 surf_offset[s] = 0;
858
859 if (prog->SamplersUsed & (1 << s)) {
860 const unsigned unit = prog->SamplerUnits[s];
861
862 /* _NEW_TEXTURE */
863 if (ctx->Texture.Unit[unit]._Current) {
864 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
865 }
866 }
867 }
868 }
869
870
871 /**
872 * Construct SURFACE_STATE objects for enabled textures.
873 */
874 static void
875 brw_update_texture_surfaces(struct brw_context *brw)
876 {
877 /* BRW_NEW_VERTEX_PROGRAM */
878 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
879
880 /* BRW_NEW_TESS_PROGRAMS */
881 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
882 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
883
884 /* BRW_NEW_GEOMETRY_PROGRAM */
885 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
886
887 /* BRW_NEW_FRAGMENT_PROGRAM */
888 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
889
890 /* _NEW_TEXTURE */
891 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
892 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
893 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
894 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
895 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
896
897 /* emit alternate set of surface state for gather. this
898 * allows the surface format to be overriden for only the
899 * gather4 messages. */
900 if (brw->gen < 8) {
901 if (vs && vs->UsesGather)
902 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
903 if (tcs && tcs->UsesGather)
904 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
905 if (tes && tes->UsesGather)
906 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
907 if (gs && gs->UsesGather)
908 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
909 if (fs && fs->UsesGather)
910 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
911 }
912
913 if (fs) {
914 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
915 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
916 }
917
918 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
919 }
920
921 const struct brw_tracked_state brw_texture_surfaces = {
922 .dirty = {
923 .mesa = _NEW_TEXTURE,
924 .brw = BRW_NEW_BATCH |
925 BRW_NEW_BLORP |
926 BRW_NEW_FRAGMENT_PROGRAM |
927 BRW_NEW_FS_PROG_DATA |
928 BRW_NEW_GEOMETRY_PROGRAM |
929 BRW_NEW_GS_PROG_DATA |
930 BRW_NEW_TESS_PROGRAMS |
931 BRW_NEW_TCS_PROG_DATA |
932 BRW_NEW_TES_PROG_DATA |
933 BRW_NEW_TEXTURE_BUFFER |
934 BRW_NEW_VERTEX_PROGRAM |
935 BRW_NEW_VS_PROG_DATA,
936 },
937 .emit = brw_update_texture_surfaces,
938 };
939
940 static void
941 brw_update_cs_texture_surfaces(struct brw_context *brw)
942 {
943 /* BRW_NEW_COMPUTE_PROGRAM */
944 struct gl_program *cs = (struct gl_program *) brw->compute_program;
945
946 /* _NEW_TEXTURE */
947 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
948
949 /* emit alternate set of surface state for gather. this
950 * allows the surface format to be overriden for only the
951 * gather4 messages.
952 */
953 if (brw->gen < 8) {
954 if (cs && cs->UsesGather)
955 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
956 }
957
958 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
959 }
960
961 const struct brw_tracked_state brw_cs_texture_surfaces = {
962 .dirty = {
963 .mesa = _NEW_TEXTURE,
964 .brw = BRW_NEW_BATCH |
965 BRW_NEW_BLORP |
966 BRW_NEW_COMPUTE_PROGRAM,
967 },
968 .emit = brw_update_cs_texture_surfaces,
969 };
970
971
972 void
973 brw_upload_ubo_surfaces(struct brw_context *brw,
974 struct gl_shader *shader,
975 struct brw_stage_state *stage_state,
976 struct brw_stage_prog_data *prog_data)
977 {
978 struct gl_context *ctx = &brw->ctx;
979
980 if (!shader)
981 return;
982
983 uint32_t *ubo_surf_offsets =
984 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
985
986 for (int i = 0; i < shader->NumUniformBlocks; i++) {
987 struct gl_uniform_buffer_binding *binding =
988 &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
989
990 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
991 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
992 } else {
993 struct intel_buffer_object *intel_bo =
994 intel_buffer_object(binding->BufferObject);
995 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
996 if (!binding->AutomaticSize)
997 size = MIN2(size, binding->Size);
998 drm_intel_bo *bo =
999 intel_bufferobj_buffer(brw, intel_bo,
1000 binding->Offset,
1001 size);
1002 brw_create_constant_surface(brw, bo, binding->Offset,
1003 size,
1004 &ubo_surf_offsets[i]);
1005 }
1006 }
1007
1008 uint32_t *ssbo_surf_offsets =
1009 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1010
1011 for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1012 struct gl_shader_storage_buffer_binding *binding =
1013 &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1014
1015 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1016 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1017 } else {
1018 struct intel_buffer_object *intel_bo =
1019 intel_buffer_object(binding->BufferObject);
1020 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1021 if (!binding->AutomaticSize)
1022 size = MIN2(size, binding->Size);
1023 drm_intel_bo *bo =
1024 intel_bufferobj_buffer(brw, intel_bo,
1025 binding->Offset,
1026 size);
1027 brw_create_buffer_surface(brw, bo, binding->Offset,
1028 size,
1029 &ssbo_surf_offsets[i]);
1030 }
1031 }
1032
1033 if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1034 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1035 }
1036
1037 static void
1038 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1039 {
1040 struct gl_context *ctx = &brw->ctx;
1041 /* _NEW_PROGRAM */
1042 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1043
1044 if (!prog)
1045 return;
1046
1047 /* BRW_NEW_FS_PROG_DATA */
1048 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1049 &brw->wm.base, &brw->wm.prog_data->base);
1050 }
1051
1052 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1053 .dirty = {
1054 .mesa = _NEW_PROGRAM,
1055 .brw = BRW_NEW_BATCH |
1056 BRW_NEW_BLORP |
1057 BRW_NEW_FS_PROG_DATA |
1058 BRW_NEW_UNIFORM_BUFFER,
1059 },
1060 .emit = brw_upload_wm_ubo_surfaces,
1061 };
1062
1063 static void
1064 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1065 {
1066 struct gl_context *ctx = &brw->ctx;
1067 /* _NEW_PROGRAM */
1068 struct gl_shader_program *prog =
1069 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1070
1071 if (!prog)
1072 return;
1073
1074 /* BRW_NEW_CS_PROG_DATA */
1075 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1076 &brw->cs.base, &brw->cs.prog_data->base);
1077 }
1078
1079 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1080 .dirty = {
1081 .mesa = _NEW_PROGRAM,
1082 .brw = BRW_NEW_BATCH |
1083 BRW_NEW_BLORP |
1084 BRW_NEW_CS_PROG_DATA |
1085 BRW_NEW_UNIFORM_BUFFER,
1086 },
1087 .emit = brw_upload_cs_ubo_surfaces,
1088 };
1089
1090 void
1091 brw_upload_abo_surfaces(struct brw_context *brw,
1092 struct gl_shader *shader,
1093 struct brw_stage_state *stage_state,
1094 struct brw_stage_prog_data *prog_data)
1095 {
1096 struct gl_context *ctx = &brw->ctx;
1097 uint32_t *surf_offsets =
1098 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1099
1100 if (shader && shader->NumAtomicBuffers) {
1101 for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1102 struct gl_atomic_buffer_binding *binding =
1103 &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1104 struct intel_buffer_object *intel_bo =
1105 intel_buffer_object(binding->BufferObject);
1106 drm_intel_bo *bo = intel_bufferobj_buffer(
1107 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1108
1109 brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1110 binding->Offset, BRW_SURFACEFORMAT_RAW,
1111 bo->size - binding->Offset, 1, true);
1112 }
1113
1114 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1115 }
1116 }
1117
1118 static void
1119 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1120 {
1121 struct gl_context *ctx = &brw->ctx;
1122 /* _NEW_PROGRAM */
1123 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1124
1125 if (prog) {
1126 /* BRW_NEW_FS_PROG_DATA */
1127 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1128 &brw->wm.base, &brw->wm.prog_data->base);
1129 }
1130 }
1131
1132 const struct brw_tracked_state brw_wm_abo_surfaces = {
1133 .dirty = {
1134 .mesa = _NEW_PROGRAM,
1135 .brw = BRW_NEW_ATOMIC_BUFFER |
1136 BRW_NEW_BLORP |
1137 BRW_NEW_BATCH |
1138 BRW_NEW_FS_PROG_DATA,
1139 },
1140 .emit = brw_upload_wm_abo_surfaces,
1141 };
1142
1143 static void
1144 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1145 {
1146 struct gl_context *ctx = &brw->ctx;
1147 /* _NEW_PROGRAM */
1148 struct gl_shader_program *prog =
1149 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1150
1151 if (prog) {
1152 /* BRW_NEW_CS_PROG_DATA */
1153 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1154 &brw->cs.base, &brw->cs.prog_data->base);
1155 }
1156 }
1157
1158 const struct brw_tracked_state brw_cs_abo_surfaces = {
1159 .dirty = {
1160 .mesa = _NEW_PROGRAM,
1161 .brw = BRW_NEW_ATOMIC_BUFFER |
1162 BRW_NEW_BLORP |
1163 BRW_NEW_BATCH |
1164 BRW_NEW_CS_PROG_DATA,
1165 },
1166 .emit = brw_upload_cs_abo_surfaces,
1167 };
1168
1169 static void
1170 brw_upload_cs_image_surfaces(struct brw_context *brw)
1171 {
1172 struct gl_context *ctx = &brw->ctx;
1173 /* _NEW_PROGRAM */
1174 struct gl_shader_program *prog =
1175 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1176
1177 if (prog) {
1178 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1179 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1180 &brw->cs.base, &brw->cs.prog_data->base);
1181 }
1182 }
1183
1184 const struct brw_tracked_state brw_cs_image_surfaces = {
1185 .dirty = {
1186 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1187 .brw = BRW_NEW_BATCH |
1188 BRW_NEW_BLORP |
1189 BRW_NEW_CS_PROG_DATA |
1190 BRW_NEW_IMAGE_UNITS
1191 },
1192 .emit = brw_upload_cs_image_surfaces,
1193 };
1194
1195 static uint32_t
1196 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1197 {
1198 const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1199 uint32_t hw_format = brw_format_for_mesa_format(format);
1200 if (access == GL_WRITE_ONLY) {
1201 return hw_format;
1202 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1203 /* Typed surface reads support a very limited subset of the shader
1204 * image formats. Translate it into the closest format the
1205 * hardware supports.
1206 */
1207 return isl_lower_storage_image_format(devinfo, hw_format);
1208 } else {
1209 /* The hardware doesn't actually support a typed format that we can use
1210 * so we have to fall back to untyped read/write messages.
1211 */
1212 return BRW_SURFACEFORMAT_RAW;
1213 }
1214 }
1215
1216 static void
1217 update_default_image_param(struct brw_context *brw,
1218 struct gl_image_unit *u,
1219 unsigned surface_idx,
1220 struct brw_image_param *param)
1221 {
1222 memset(param, 0, sizeof(*param));
1223 param->surface_idx = surface_idx;
1224 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1225 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1226 * detailed explanation of these parameters.
1227 */
1228 param->swizzling[0] = 0xff;
1229 param->swizzling[1] = 0xff;
1230 }
1231
1232 static void
1233 update_buffer_image_param(struct brw_context *brw,
1234 struct gl_image_unit *u,
1235 unsigned surface_idx,
1236 struct brw_image_param *param)
1237 {
1238 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1239
1240 update_default_image_param(brw, u, surface_idx, param);
1241
1242 param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1243 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1244 }
1245
1246 static void
1247 update_texture_image_param(struct brw_context *brw,
1248 struct gl_image_unit *u,
1249 unsigned surface_idx,
1250 struct brw_image_param *param)
1251 {
1252 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1253
1254 update_default_image_param(brw, u, surface_idx, param);
1255
1256 param->size[0] = minify(mt->logical_width0, u->Level);
1257 param->size[1] = minify(mt->logical_height0, u->Level);
1258 param->size[2] = (!u->Layered ? 1 :
1259 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1260 u->TexObj->Target == GL_TEXTURE_3D ?
1261 minify(mt->logical_depth0, u->Level) :
1262 mt->logical_depth0);
1263
1264 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1265 &param->offset[0],
1266 &param->offset[1]);
1267
1268 param->stride[0] = mt->cpp;
1269 param->stride[1] = mt->pitch / mt->cpp;
1270 param->stride[2] =
1271 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1272 param->stride[3] =
1273 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1274
1275 if (mt->tiling == I915_TILING_X) {
1276 /* An X tile is a rectangular block of 512x8 bytes. */
1277 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1278 param->tiling[1] = _mesa_logbase2(8);
1279
1280 if (brw->has_swizzling) {
1281 /* Right shifts required to swizzle bits 9 and 10 of the memory
1282 * address with bit 6.
1283 */
1284 param->swizzling[0] = 3;
1285 param->swizzling[1] = 4;
1286 }
1287 } else if (mt->tiling == I915_TILING_Y) {
1288 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1289 * different to the layout of an X-tiled surface, we simply pretend that
1290 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1291 * one arranged in X-major order just like is the case for X-tiling.
1292 */
1293 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1294 param->tiling[1] = _mesa_logbase2(32);
1295
1296 if (brw->has_swizzling) {
1297 /* Right shift required to swizzle bit 9 of the memory address with
1298 * bit 6.
1299 */
1300 param->swizzling[0] = 3;
1301 }
1302 }
1303
1304 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1305 * address calculation algorithm (emit_address_calculation() in
1306 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1307 * modulus equal to the LOD.
1308 */
1309 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1310 0);
1311 }
1312
1313 static void
1314 update_image_surface(struct brw_context *brw,
1315 struct gl_image_unit *u,
1316 GLenum access,
1317 unsigned surface_idx,
1318 uint32_t *surf_offset,
1319 struct brw_image_param *param)
1320 {
1321 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1322 struct gl_texture_object *obj = u->TexObj;
1323 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1324
1325 if (obj->Target == GL_TEXTURE_BUFFER) {
1326 struct intel_buffer_object *intel_obj =
1327 intel_buffer_object(obj->BufferObject);
1328 const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1329 _mesa_get_format_bytes(u->_ActualFormat));
1330
1331 brw->vtbl.emit_buffer_surface_state(
1332 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1333 format, intel_obj->Base.Size / texel_size, texel_size,
1334 access != GL_READ_ONLY);
1335
1336 update_buffer_image_param(brw, u, surface_idx, param);
1337
1338 } else {
1339 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1340 struct intel_mipmap_tree *mt = intel_obj->mt;
1341
1342 if (format == BRW_SURFACEFORMAT_RAW) {
1343 brw->vtbl.emit_buffer_surface_state(
1344 brw, surf_offset, mt->bo, mt->offset,
1345 format, mt->bo->size - mt->offset, 1 /* pitch */,
1346 access != GL_READ_ONLY);
1347
1348 } else {
1349 const unsigned min_layer = obj->MinLayer + u->_Layer;
1350 const unsigned min_level = obj->MinLevel + u->Level;
1351 const unsigned num_layers = (!u->Layered ? 1 :
1352 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1353 mt->logical_depth0);
1354 const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1355 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1356 GL_TEXTURE_2D_ARRAY : obj->Target);
1357 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1358
1359 brw->vtbl.emit_texture_surface_state(
1360 brw, mt, target,
1361 min_layer, min_layer + num_layers,
1362 min_level, min_level + 1,
1363 format, SWIZZLE_XYZW,
1364 surf_offset, surf_index, access != GL_READ_ONLY, false);
1365 }
1366
1367 update_texture_image_param(brw, u, surface_idx, param);
1368 }
1369
1370 } else {
1371 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1372 update_default_image_param(brw, u, surface_idx, param);
1373 }
1374 }
1375
1376 void
1377 brw_upload_image_surfaces(struct brw_context *brw,
1378 struct gl_shader *shader,
1379 struct brw_stage_state *stage_state,
1380 struct brw_stage_prog_data *prog_data)
1381 {
1382 struct gl_context *ctx = &brw->ctx;
1383
1384 if (shader && shader->NumImages) {
1385 for (unsigned i = 0; i < shader->NumImages; i++) {
1386 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1387 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1388
1389 update_image_surface(brw, u, shader->ImageAccess[i],
1390 surf_idx,
1391 &stage_state->surf_offset[surf_idx],
1392 &prog_data->image_param[i]);
1393 }
1394
1395 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1396 /* This may have changed the image metadata dependent on the context
1397 * image unit state and passed to the program as uniforms, make sure
1398 * that push and pull constants are reuploaded.
1399 */
1400 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1401 }
1402 }
1403
1404 static void
1405 brw_upload_wm_image_surfaces(struct brw_context *brw)
1406 {
1407 struct gl_context *ctx = &brw->ctx;
1408 /* BRW_NEW_FRAGMENT_PROGRAM */
1409 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1410
1411 if (prog) {
1412 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1413 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1414 &brw->wm.base, &brw->wm.prog_data->base);
1415 }
1416 }
1417
1418 const struct brw_tracked_state brw_wm_image_surfaces = {
1419 .dirty = {
1420 .mesa = _NEW_TEXTURE,
1421 .brw = BRW_NEW_BATCH |
1422 BRW_NEW_BLORP |
1423 BRW_NEW_FRAGMENT_PROGRAM |
1424 BRW_NEW_FS_PROG_DATA |
1425 BRW_NEW_IMAGE_UNITS
1426 },
1427 .emit = brw_upload_wm_image_surfaces,
1428 };
1429
1430 void
1431 gen4_init_vtable_surface_functions(struct brw_context *brw)
1432 {
1433 brw->vtbl.update_texture_surface = brw_update_texture_surface;
1434 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1435 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1436 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1437 }
1438
1439 static void
1440 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1441 {
1442 struct gl_context *ctx = &brw->ctx;
1443 /* _NEW_PROGRAM */
1444 struct gl_shader_program *prog =
1445 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1446
1447 if (prog && brw->cs.prog_data->uses_num_work_groups) {
1448 const unsigned surf_idx =
1449 brw->cs.prog_data->binding_table.work_groups_start;
1450 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1451 drm_intel_bo *bo;
1452 uint32_t bo_offset;
1453
1454 if (brw->compute.num_work_groups_bo == NULL) {
1455 bo = NULL;
1456 intel_upload_data(brw,
1457 (void *)brw->compute.num_work_groups,
1458 3 * sizeof(GLuint),
1459 sizeof(GLuint),
1460 &bo,
1461 &bo_offset);
1462 } else {
1463 bo = brw->compute.num_work_groups_bo;
1464 bo_offset = brw->compute.num_work_groups_offset;
1465 }
1466
1467 brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1468 bo, bo_offset,
1469 BRW_SURFACEFORMAT_RAW,
1470 3 * sizeof(GLuint), 1, true);
1471 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1472 }
1473 }
1474
1475 const struct brw_tracked_state brw_cs_work_groups_surface = {
1476 .dirty = {
1477 .brw = BRW_NEW_BLORP |
1478 BRW_NEW_CS_WORK_GROUPS
1479 },
1480 .emit = brw_upload_cs_work_groups_surface,
1481 };