i965/compute: Fix uniform init issue when SIMD8 is skipped
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "main/shaderimage.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_instruction.h"
40 #include "main/framebuffer.h"
41
42 #include "isl/isl.h"
43
44 #include "intel_mipmap_tree.h"
45 #include "intel_batchbuffer.h"
46 #include "intel_tex.h"
47 #include "intel_fbo.h"
48 #include "intel_buffer_objects.h"
49
50 #include "brw_context.h"
51 #include "brw_state.h"
52 #include "brw_defines.h"
53 #include "brw_wm.h"
54
55 GLuint
56 translate_tex_target(GLenum target)
57 {
58 switch (target) {
59 case GL_TEXTURE_1D:
60 case GL_TEXTURE_1D_ARRAY_EXT:
61 return BRW_SURFACE_1D;
62
63 case GL_TEXTURE_RECTANGLE_NV:
64 return BRW_SURFACE_2D;
65
66 case GL_TEXTURE_2D:
67 case GL_TEXTURE_2D_ARRAY_EXT:
68 case GL_TEXTURE_EXTERNAL_OES:
69 case GL_TEXTURE_2D_MULTISAMPLE:
70 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
71 return BRW_SURFACE_2D;
72
73 case GL_TEXTURE_3D:
74 return BRW_SURFACE_3D;
75
76 case GL_TEXTURE_CUBE_MAP:
77 case GL_TEXTURE_CUBE_MAP_ARRAY:
78 return BRW_SURFACE_CUBE;
79
80 default:
81 unreachable("not reached");
82 }
83 }
84
85 uint32_t
86 brw_get_surface_tiling_bits(uint32_t tiling)
87 {
88 switch (tiling) {
89 case I915_TILING_X:
90 return BRW_SURFACE_TILED;
91 case I915_TILING_Y:
92 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
93 default:
94 return 0;
95 }
96 }
97
98
99 uint32_t
100 brw_get_surface_num_multisamples(unsigned num_samples)
101 {
102 if (num_samples > 1)
103 return BRW_SURFACE_MULTISAMPLECOUNT_4;
104 else
105 return BRW_SURFACE_MULTISAMPLECOUNT_1;
106 }
107
108 void
109 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
110 bool is_render_target,
111 unsigned *width, unsigned *height,
112 unsigned *pitch, uint32_t *tiling, unsigned *format)
113 {
114 static const unsigned halign_stencil = 8;
115
116 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
117 * there are half as many rows.
118 * In addition, mip-levels are accessed manually by the program and
119 * therefore the surface is setup to cover all the mip-levels for one slice.
120 * (Hardware is still used to access individual slices).
121 */
122 *tiling = I915_TILING_Y;
123 *pitch = mt->pitch * 2;
124 *width = ALIGN(mt->total_width, halign_stencil) * 2;
125 *height = (mt->total_height / mt->physical_depth0) / 2;
126
127 if (is_render_target) {
128 *format = BRW_SURFACEFORMAT_R8_UINT;
129 }
130 }
131
132
133 /**
134 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
135 * swizzling.
136 */
137 int
138 brw_get_texture_swizzle(const struct gl_context *ctx,
139 const struct gl_texture_object *t)
140 {
141 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
142
143 int swizzles[SWIZZLE_NIL + 1] = {
144 SWIZZLE_X,
145 SWIZZLE_Y,
146 SWIZZLE_Z,
147 SWIZZLE_W,
148 SWIZZLE_ZERO,
149 SWIZZLE_ONE,
150 SWIZZLE_NIL
151 };
152
153 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
154 img->_BaseFormat == GL_DEPTH_STENCIL) {
155 GLenum depth_mode = t->DepthMode;
156
157 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
158 * with depth component data specified with a sized internal format.
159 * Otherwise, it's left at the old default, GL_LUMINANCE.
160 */
161 if (_mesa_is_gles3(ctx) &&
162 img->InternalFormat != GL_DEPTH_COMPONENT &&
163 img->InternalFormat != GL_DEPTH_STENCIL) {
164 depth_mode = GL_RED;
165 }
166
167 switch (depth_mode) {
168 case GL_ALPHA:
169 swizzles[0] = SWIZZLE_ZERO;
170 swizzles[1] = SWIZZLE_ZERO;
171 swizzles[2] = SWIZZLE_ZERO;
172 swizzles[3] = SWIZZLE_X;
173 break;
174 case GL_LUMINANCE:
175 swizzles[0] = SWIZZLE_X;
176 swizzles[1] = SWIZZLE_X;
177 swizzles[2] = SWIZZLE_X;
178 swizzles[3] = SWIZZLE_ONE;
179 break;
180 case GL_INTENSITY:
181 swizzles[0] = SWIZZLE_X;
182 swizzles[1] = SWIZZLE_X;
183 swizzles[2] = SWIZZLE_X;
184 swizzles[3] = SWIZZLE_X;
185 break;
186 case GL_RED:
187 swizzles[0] = SWIZZLE_X;
188 swizzles[1] = SWIZZLE_ZERO;
189 swizzles[2] = SWIZZLE_ZERO;
190 swizzles[3] = SWIZZLE_ONE;
191 break;
192 }
193 }
194
195 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
196
197 /* If the texture's format is alpha-only, force R, G, and B to
198 * 0.0. Similarly, if the texture's format has no alpha channel,
199 * force the alpha value read to 1.0. This allows for the
200 * implementation to use an RGBA texture for any of these formats
201 * without leaking any unexpected values.
202 */
203 switch (img->_BaseFormat) {
204 case GL_ALPHA:
205 swizzles[0] = SWIZZLE_ZERO;
206 swizzles[1] = SWIZZLE_ZERO;
207 swizzles[2] = SWIZZLE_ZERO;
208 break;
209 case GL_LUMINANCE:
210 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
211 swizzles[0] = SWIZZLE_X;
212 swizzles[1] = SWIZZLE_X;
213 swizzles[2] = SWIZZLE_X;
214 swizzles[3] = SWIZZLE_ONE;
215 }
216 break;
217 case GL_LUMINANCE_ALPHA:
218 if (datatype == GL_SIGNED_NORMALIZED) {
219 swizzles[0] = SWIZZLE_X;
220 swizzles[1] = SWIZZLE_X;
221 swizzles[2] = SWIZZLE_X;
222 swizzles[3] = SWIZZLE_W;
223 }
224 break;
225 case GL_INTENSITY:
226 if (datatype == GL_SIGNED_NORMALIZED) {
227 swizzles[0] = SWIZZLE_X;
228 swizzles[1] = SWIZZLE_X;
229 swizzles[2] = SWIZZLE_X;
230 swizzles[3] = SWIZZLE_X;
231 }
232 break;
233 case GL_RED:
234 case GL_RG:
235 case GL_RGB:
236 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
237 swizzles[3] = SWIZZLE_ONE;
238 break;
239 }
240
241 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
242 swizzles[GET_SWZ(t->_Swizzle, 1)],
243 swizzles[GET_SWZ(t->_Swizzle, 2)],
244 swizzles[GET_SWZ(t->_Swizzle, 3)]);
245 }
246
247 static void
248 gen4_emit_buffer_surface_state(struct brw_context *brw,
249 uint32_t *out_offset,
250 drm_intel_bo *bo,
251 unsigned buffer_offset,
252 unsigned surface_format,
253 unsigned buffer_size,
254 unsigned pitch,
255 bool rw)
256 {
257 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
258 6 * 4, 32, out_offset);
259 memset(surf, 0, 6 * 4);
260
261 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
262 surface_format << BRW_SURFACE_FORMAT_SHIFT |
263 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
264 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
265 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
266 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
267 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
268 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
269
270 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
271 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
272 * physical cache. It is mapped in hardware to the sampler cache."
273 */
274 if (bo) {
275 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
276 bo, buffer_offset,
277 I915_GEM_DOMAIN_SAMPLER,
278 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
279 }
280 }
281
282 void
283 brw_update_buffer_texture_surface(struct gl_context *ctx,
284 unsigned unit,
285 uint32_t *surf_offset)
286 {
287 struct brw_context *brw = brw_context(ctx);
288 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
289 struct intel_buffer_object *intel_obj =
290 intel_buffer_object(tObj->BufferObject);
291 uint32_t size = tObj->BufferSize;
292 drm_intel_bo *bo = NULL;
293 mesa_format format = tObj->_BufferObjectFormat;
294 uint32_t brw_format = brw_format_for_mesa_format(format);
295 int texel_size = _mesa_get_format_bytes(format);
296
297 if (intel_obj) {
298 size = MIN2(size, intel_obj->Base.Size);
299 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
300 }
301
302 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
303 _mesa_problem(NULL, "bad format %s for texture buffer\n",
304 _mesa_get_format_name(format));
305 }
306
307 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
308 tObj->BufferOffset,
309 brw_format,
310 size / texel_size,
311 texel_size,
312 false /* rw */);
313 }
314
315 static void
316 brw_update_texture_surface(struct gl_context *ctx,
317 unsigned unit,
318 uint32_t *surf_offset,
319 bool for_gather,
320 uint32_t plane)
321 {
322 struct brw_context *brw = brw_context(ctx);
323 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
324 struct intel_texture_object *intelObj = intel_texture_object(tObj);
325 struct intel_mipmap_tree *mt = intelObj->mt;
326 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
327 uint32_t *surf;
328
329 /* BRW_NEW_TEXTURE_BUFFER */
330 if (tObj->Target == GL_TEXTURE_BUFFER) {
331 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
332 return;
333 }
334
335 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
336 6 * 4, 32, surf_offset);
337
338 uint32_t tex_format = translate_tex_format(brw, mt->format,
339 sampler->sRGBDecode);
340
341 if (for_gather) {
342 /* Sandybridge's gather4 message is broken for integer formats.
343 * To work around this, we pretend the surface is UNORM for
344 * 8 or 16-bit formats, and emit shader instructions to recover
345 * the real INT/UINT value. For 32-bit formats, we pretend
346 * the surface is FLOAT, and simply reinterpret the resulting
347 * bits.
348 */
349 switch (tex_format) {
350 case BRW_SURFACEFORMAT_R8_SINT:
351 case BRW_SURFACEFORMAT_R8_UINT:
352 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
353 break;
354
355 case BRW_SURFACEFORMAT_R16_SINT:
356 case BRW_SURFACEFORMAT_R16_UINT:
357 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
358 break;
359
360 case BRW_SURFACEFORMAT_R32_SINT:
361 case BRW_SURFACEFORMAT_R32_UINT:
362 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
363 break;
364
365 default:
366 break;
367 }
368 }
369
370 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
371 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
372 BRW_SURFACE_CUBEFACE_ENABLES |
373 tex_format << BRW_SURFACE_FORMAT_SHIFT);
374
375 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
376
377 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
378 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
379 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
380
381 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
382 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
383 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
384
385 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
386 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
387
388 surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
389
390 /* Emit relocation to surface contents */
391 drm_intel_bo_emit_reloc(brw->batch.bo,
392 *surf_offset + 4,
393 mt->bo,
394 surf[1] - mt->bo->offset64,
395 I915_GEM_DOMAIN_SAMPLER, 0);
396 }
397
398 /**
399 * Create the constant buffer surface. Vertex/fragment shader constants will be
400 * read from this buffer with Data Port Read instructions/messages.
401 */
402 void
403 brw_create_constant_surface(struct brw_context *brw,
404 drm_intel_bo *bo,
405 uint32_t offset,
406 uint32_t size,
407 uint32_t *out_offset)
408 {
409 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
410 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
411 size, 1, false);
412 }
413
414 /**
415 * Create the buffer surface. Shader buffer variables will be
416 * read from / write to this buffer with Data Port Read/Write
417 * instructions/messages.
418 */
419 void
420 brw_create_buffer_surface(struct brw_context *brw,
421 drm_intel_bo *bo,
422 uint32_t offset,
423 uint32_t size,
424 uint32_t *out_offset)
425 {
426 /* Use a raw surface so we can reuse existing untyped read/write/atomic
427 * messages. We need these specifically for the fragment shader since they
428 * include a pixel mask header that we need to ensure correct behavior
429 * with helper invocations, which cannot write to the buffer.
430 */
431 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
432 BRW_SURFACEFORMAT_RAW,
433 size, 1, true);
434 }
435
436 /**
437 * Set up a binding table entry for use by stream output logic (transform
438 * feedback).
439 *
440 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
441 */
442 void
443 brw_update_sol_surface(struct brw_context *brw,
444 struct gl_buffer_object *buffer_obj,
445 uint32_t *out_offset, unsigned num_vector_components,
446 unsigned stride_dwords, unsigned offset_dwords)
447 {
448 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
449 uint32_t offset_bytes = 4 * offset_dwords;
450 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
451 offset_bytes,
452 buffer_obj->Size - offset_bytes);
453 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
454 out_offset);
455 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
456 size_t size_dwords = buffer_obj->Size / 4;
457 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
458
459 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
460 * too big to map using a single binding table entry?
461 */
462 assert((size_dwords - offset_dwords) / stride_dwords
463 <= BRW_MAX_NUM_BUFFER_ENTRIES);
464
465 if (size_dwords > offset_dwords + num_vector_components) {
466 /* There is room for at least 1 transform feedback output in the buffer.
467 * Compute the number of additional transform feedback outputs the
468 * buffer has room for.
469 */
470 buffer_size_minus_1 =
471 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
472 } else {
473 /* There isn't even room for a single transform feedback output in the
474 * buffer. We can't configure the binding table entry to prevent output
475 * entirely; we'll have to rely on the geometry shader to detect
476 * overflow. But to minimize the damage in case of a bug, set up the
477 * binding table entry to just allow a single output.
478 */
479 buffer_size_minus_1 = 0;
480 }
481 width = buffer_size_minus_1 & 0x7f;
482 height = (buffer_size_minus_1 & 0xfff80) >> 7;
483 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
484
485 switch (num_vector_components) {
486 case 1:
487 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
488 break;
489 case 2:
490 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
491 break;
492 case 3:
493 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
494 break;
495 case 4:
496 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
497 break;
498 default:
499 unreachable("Invalid vector size for transform feedback output");
500 }
501
502 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
503 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
504 surface_format << BRW_SURFACE_FORMAT_SHIFT |
505 BRW_SURFACE_RC_READ_WRITE;
506 surf[1] = bo->offset64 + offset_bytes; /* reloc */
507 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
508 height << BRW_SURFACE_HEIGHT_SHIFT);
509 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
510 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
511 surf[4] = 0;
512 surf[5] = 0;
513
514 /* Emit relocation to surface contents. */
515 drm_intel_bo_emit_reloc(brw->batch.bo,
516 *out_offset + 4,
517 bo, offset_bytes,
518 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
519 }
520
521 /* Creates a new WM constant buffer reflecting the current fragment program's
522 * constants, if needed by the fragment program.
523 *
524 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
525 * state atom.
526 */
527 static void
528 brw_upload_wm_pull_constants(struct brw_context *brw)
529 {
530 struct brw_stage_state *stage_state = &brw->wm.base;
531 /* BRW_NEW_FRAGMENT_PROGRAM */
532 struct brw_fragment_program *fp =
533 (struct brw_fragment_program *) brw->fragment_program;
534 /* BRW_NEW_FS_PROG_DATA */
535 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
536
537 /* _NEW_PROGRAM_CONSTANTS */
538 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
539 stage_state, prog_data);
540 }
541
542 const struct brw_tracked_state brw_wm_pull_constants = {
543 .dirty = {
544 .mesa = _NEW_PROGRAM_CONSTANTS,
545 .brw = BRW_NEW_BATCH |
546 BRW_NEW_BLORP |
547 BRW_NEW_FRAGMENT_PROGRAM |
548 BRW_NEW_FS_PROG_DATA,
549 },
550 .emit = brw_upload_wm_pull_constants,
551 };
552
553 /**
554 * Creates a null renderbuffer surface.
555 *
556 * This is used when the shader doesn't write to any color output. An FB
557 * write to target 0 will still be emitted, because that's how the thread is
558 * terminated (and computed depth is returned), so we need to have the
559 * hardware discard the target 0 color output..
560 */
561 static void
562 brw_emit_null_surface_state(struct brw_context *brw,
563 unsigned width,
564 unsigned height,
565 unsigned samples,
566 uint32_t *out_offset)
567 {
568 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
569 * Notes):
570 *
571 * A null surface will be used in instances where an actual surface is
572 * not bound. When a write message is generated to a null surface, no
573 * actual surface is written to. When a read message (including any
574 * sampling engine message) is generated to a null surface, the result
575 * is all zeros. Note that a null surface type is allowed to be used
576 * with all messages, even if it is not specificially indicated as
577 * supported. All of the remaining fields in surface state are ignored
578 * for null surfaces, with the following exceptions:
579 *
580 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
581 * depth buffer’s corresponding state for all render target surfaces,
582 * including null.
583 *
584 * - Surface Format must be R8G8B8A8_UNORM.
585 */
586 unsigned surface_type = BRW_SURFACE_NULL;
587 drm_intel_bo *bo = NULL;
588 unsigned pitch_minus_1 = 0;
589 uint32_t multisampling_state = 0;
590 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
591 out_offset);
592
593 if (samples > 1) {
594 /* On Gen6, null render targets seem to cause GPU hangs when
595 * multisampling. So work around this problem by rendering into dummy
596 * color buffer.
597 *
598 * To decrease the amount of memory needed by the workaround buffer, we
599 * set its pitch to 128 bytes (the width of a Y tile). This means that
600 * the amount of memory needed for the workaround buffer is
601 * (width_in_tiles + height_in_tiles - 1) tiles.
602 *
603 * Note that since the workaround buffer will be interpreted by the
604 * hardware as an interleaved multisampled buffer, we need to compute
605 * width_in_tiles and height_in_tiles by dividing the width and height
606 * by 16 rather than the normal Y-tile size of 32.
607 */
608 unsigned width_in_tiles = ALIGN(width, 16) / 16;
609 unsigned height_in_tiles = ALIGN(height, 16) / 16;
610 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
611 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
612 size_needed);
613 bo = brw->wm.multisampled_null_render_target_bo;
614 surface_type = BRW_SURFACE_2D;
615 pitch_minus_1 = 127;
616 multisampling_state = brw_get_surface_num_multisamples(samples);
617 }
618
619 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
620 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
621 if (brw->gen < 6) {
622 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
623 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
624 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
625 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
626 }
627 surf[1] = bo ? bo->offset64 : 0;
628 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
629 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
630
631 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
632 * Notes):
633 *
634 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
635 */
636 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
637 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
638 surf[4] = multisampling_state;
639 surf[5] = 0;
640
641 if (bo) {
642 drm_intel_bo_emit_reloc(brw->batch.bo,
643 *out_offset + 4,
644 bo, 0,
645 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
646 }
647 }
648
649 /**
650 * Sets up a surface state structure to point at the given region.
651 * While it is only used for the front/back buffer currently, it should be
652 * usable for further buffers when doing ARB_draw_buffer support.
653 */
654 static uint32_t
655 brw_update_renderbuffer_surface(struct brw_context *brw,
656 struct gl_renderbuffer *rb,
657 bool layered, unsigned unit,
658 uint32_t surf_index)
659 {
660 struct gl_context *ctx = &brw->ctx;
661 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
662 struct intel_mipmap_tree *mt = irb->mt;
663 uint32_t *surf;
664 uint32_t tile_x, tile_y;
665 uint32_t format = 0;
666 uint32_t offset;
667 /* _NEW_BUFFERS */
668 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
669 /* BRW_NEW_FS_PROG_DATA */
670
671 assert(!layered);
672
673 if (rb->TexImage && !brw->has_surface_tile_offset) {
674 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
675
676 if (tile_x != 0 || tile_y != 0) {
677 /* Original gen4 hardware couldn't draw to a non-tile-aligned
678 * destination in a miptree unless you actually setup your renderbuffer
679 * as a miptree and used the fragile lod/array_index/etc. controls to
680 * select the image. So, instead, we just make a new single-level
681 * miptree and render into that.
682 */
683 intel_renderbuffer_move_to_temp(brw, irb, false);
684 mt = irb->mt;
685 }
686 }
687
688 intel_miptree_used_for_rendering(irb->mt);
689
690 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
691
692 format = brw->render_target_format[rb_format];
693 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
694 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
695 __func__, _mesa_get_format_name(rb_format));
696 }
697
698 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
699 format << BRW_SURFACE_FORMAT_SHIFT);
700
701 /* reloc */
702 assert(mt->offset % mt->cpp == 0);
703 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
704 mt->bo->offset64 + mt->offset);
705
706 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
707 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
708
709 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
710 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
711
712 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
713
714 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
715 /* Note that the low bits of these fields are missing, so
716 * there's the possibility of getting in trouble.
717 */
718 assert(tile_x % 4 == 0);
719 assert(tile_y % 2 == 0);
720 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
721 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
722 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
723
724 if (brw->gen < 6) {
725 /* _NEW_COLOR */
726 if (!ctx->Color.ColorLogicOpEnabled &&
727 (ctx->Color.BlendEnabled & (1 << unit)))
728 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
729
730 if (!ctx->Color.ColorMask[unit][0])
731 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
732 if (!ctx->Color.ColorMask[unit][1])
733 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
734 if (!ctx->Color.ColorMask[unit][2])
735 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
736
737 /* As mentioned above, disable writes to the alpha component when the
738 * renderbuffer is XRGB.
739 */
740 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
741 !ctx->Color.ColorMask[unit][3]) {
742 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
743 }
744 }
745
746 drm_intel_bo_emit_reloc(brw->batch.bo,
747 offset + 4,
748 mt->bo,
749 surf[1] - mt->bo->offset64,
750 I915_GEM_DOMAIN_RENDER,
751 I915_GEM_DOMAIN_RENDER);
752
753 return offset;
754 }
755
756 /**
757 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
758 */
759 void
760 brw_update_renderbuffer_surfaces(struct brw_context *brw,
761 const struct gl_framebuffer *fb,
762 uint32_t render_target_start,
763 uint32_t *surf_offset)
764 {
765 GLuint i;
766 const unsigned int w = _mesa_geometric_width(fb);
767 const unsigned int h = _mesa_geometric_height(fb);
768 const unsigned int s = _mesa_geometric_samples(fb);
769
770 /* Update surfaces for drawing buffers */
771 if (fb->_NumColorDrawBuffers >= 1) {
772 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
773 const uint32_t surf_index = render_target_start + i;
774
775 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
776 surf_offset[surf_index] =
777 brw->vtbl.update_renderbuffer_surface(
778 brw, fb->_ColorDrawBuffers[i],
779 _mesa_geometric_layers(fb) > 0, i, surf_index);
780 } else {
781 brw->vtbl.emit_null_surface_state(brw, w, h, s,
782 &surf_offset[surf_index]);
783 }
784 }
785 } else {
786 const uint32_t surf_index = render_target_start;
787 brw->vtbl.emit_null_surface_state(brw, w, h, s,
788 &surf_offset[surf_index]);
789 }
790 }
791
792 static void
793 update_renderbuffer_surfaces(struct brw_context *brw)
794 {
795 const struct gl_context *ctx = &brw->ctx;
796
797 /* _NEW_BUFFERS | _NEW_COLOR */
798 const struct gl_framebuffer *fb = ctx->DrawBuffer;
799 brw_update_renderbuffer_surfaces(
800 brw, fb,
801 brw->wm.prog_data->binding_table.render_target_start,
802 brw->wm.base.surf_offset);
803 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
804 }
805
806 const struct brw_tracked_state brw_renderbuffer_surfaces = {
807 .dirty = {
808 .mesa = _NEW_BUFFERS |
809 _NEW_COLOR,
810 .brw = BRW_NEW_BATCH |
811 BRW_NEW_BLORP |
812 BRW_NEW_FS_PROG_DATA,
813 },
814 .emit = update_renderbuffer_surfaces,
815 };
816
817 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
818 .dirty = {
819 .mesa = _NEW_BUFFERS,
820 .brw = BRW_NEW_BATCH |
821 BRW_NEW_BLORP,
822 },
823 .emit = update_renderbuffer_surfaces,
824 };
825
826
827 static void
828 update_stage_texture_surfaces(struct brw_context *brw,
829 const struct gl_program *prog,
830 struct brw_stage_state *stage_state,
831 bool for_gather, uint32_t plane)
832 {
833 if (!prog)
834 return;
835
836 struct gl_context *ctx = &brw->ctx;
837
838 uint32_t *surf_offset = stage_state->surf_offset;
839
840 /* BRW_NEW_*_PROG_DATA */
841 if (for_gather)
842 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
843 else
844 surf_offset += stage_state->prog_data->binding_table.plane_start[plane];
845
846 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
847 for (unsigned s = 0; s < num_samplers; s++) {
848 surf_offset[s] = 0;
849
850 if (prog->SamplersUsed & (1 << s)) {
851 const unsigned unit = prog->SamplerUnits[s];
852
853 /* _NEW_TEXTURE */
854 if (ctx->Texture.Unit[unit]._Current) {
855 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather, plane);
856 }
857 }
858 }
859 }
860
861
862 /**
863 * Construct SURFACE_STATE objects for enabled textures.
864 */
865 static void
866 brw_update_texture_surfaces(struct brw_context *brw)
867 {
868 /* BRW_NEW_VERTEX_PROGRAM */
869 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
870
871 /* BRW_NEW_TESS_PROGRAMS */
872 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
873 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
874
875 /* BRW_NEW_GEOMETRY_PROGRAM */
876 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
877
878 /* BRW_NEW_FRAGMENT_PROGRAM */
879 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
880
881 /* _NEW_TEXTURE */
882 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false, 0);
883 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false, 0);
884 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false, 0);
885 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false, 0);
886 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 0);
887
888 /* emit alternate set of surface state for gather. this
889 * allows the surface format to be overriden for only the
890 * gather4 messages. */
891 if (brw->gen < 8) {
892 if (vs && vs->UsesGather)
893 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true, 0);
894 if (tcs && tcs->UsesGather)
895 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true, 0);
896 if (tes && tes->UsesGather)
897 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true, 0);
898 if (gs && gs->UsesGather)
899 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true, 0);
900 if (fs && fs->UsesGather)
901 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true, 0);
902 }
903
904 if (fs) {
905 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 1);
906 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false, 2);
907 }
908
909 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
910 }
911
912 const struct brw_tracked_state brw_texture_surfaces = {
913 .dirty = {
914 .mesa = _NEW_TEXTURE,
915 .brw = BRW_NEW_BATCH |
916 BRW_NEW_BLORP |
917 BRW_NEW_FRAGMENT_PROGRAM |
918 BRW_NEW_FS_PROG_DATA |
919 BRW_NEW_GEOMETRY_PROGRAM |
920 BRW_NEW_GS_PROG_DATA |
921 BRW_NEW_TESS_PROGRAMS |
922 BRW_NEW_TCS_PROG_DATA |
923 BRW_NEW_TES_PROG_DATA |
924 BRW_NEW_TEXTURE_BUFFER |
925 BRW_NEW_VERTEX_PROGRAM |
926 BRW_NEW_VS_PROG_DATA,
927 },
928 .emit = brw_update_texture_surfaces,
929 };
930
931 static void
932 brw_update_cs_texture_surfaces(struct brw_context *brw)
933 {
934 /* BRW_NEW_COMPUTE_PROGRAM */
935 struct gl_program *cs = (struct gl_program *) brw->compute_program;
936
937 /* _NEW_TEXTURE */
938 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false, 0);
939
940 /* emit alternate set of surface state for gather. this
941 * allows the surface format to be overriden for only the
942 * gather4 messages.
943 */
944 if (brw->gen < 8) {
945 if (cs && cs->UsesGather)
946 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true, 0);
947 }
948
949 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
950 }
951
952 const struct brw_tracked_state brw_cs_texture_surfaces = {
953 .dirty = {
954 .mesa = _NEW_TEXTURE,
955 .brw = BRW_NEW_BATCH |
956 BRW_NEW_BLORP |
957 BRW_NEW_COMPUTE_PROGRAM,
958 },
959 .emit = brw_update_cs_texture_surfaces,
960 };
961
962
963 void
964 brw_upload_ubo_surfaces(struct brw_context *brw,
965 struct gl_shader *shader,
966 struct brw_stage_state *stage_state,
967 struct brw_stage_prog_data *prog_data)
968 {
969 struct gl_context *ctx = &brw->ctx;
970
971 if (!shader)
972 return;
973
974 uint32_t *ubo_surf_offsets =
975 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
976
977 for (int i = 0; i < shader->NumUniformBlocks; i++) {
978 struct gl_uniform_buffer_binding *binding =
979 &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
980
981 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
982 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
983 } else {
984 struct intel_buffer_object *intel_bo =
985 intel_buffer_object(binding->BufferObject);
986 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
987 if (!binding->AutomaticSize)
988 size = MIN2(size, binding->Size);
989 drm_intel_bo *bo =
990 intel_bufferobj_buffer(brw, intel_bo,
991 binding->Offset,
992 size);
993 brw_create_constant_surface(brw, bo, binding->Offset,
994 size,
995 &ubo_surf_offsets[i]);
996 }
997 }
998
999 uint32_t *ssbo_surf_offsets =
1000 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
1001
1002 for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
1003 struct gl_shader_storage_buffer_binding *binding =
1004 &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
1005
1006 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
1007 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
1008 } else {
1009 struct intel_buffer_object *intel_bo =
1010 intel_buffer_object(binding->BufferObject);
1011 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
1012 if (!binding->AutomaticSize)
1013 size = MIN2(size, binding->Size);
1014 drm_intel_bo *bo =
1015 intel_bufferobj_buffer(brw, intel_bo,
1016 binding->Offset,
1017 size);
1018 brw_create_buffer_surface(brw, bo, binding->Offset,
1019 size,
1020 &ssbo_surf_offsets[i]);
1021 }
1022 }
1023
1024 if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1025 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1026 }
1027
1028 static void
1029 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1030 {
1031 struct gl_context *ctx = &brw->ctx;
1032 /* _NEW_PROGRAM */
1033 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1034
1035 if (!prog)
1036 return;
1037
1038 /* BRW_NEW_FS_PROG_DATA */
1039 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1040 &brw->wm.base, &brw->wm.prog_data->base);
1041 }
1042
1043 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1044 .dirty = {
1045 .mesa = _NEW_PROGRAM,
1046 .brw = BRW_NEW_BATCH |
1047 BRW_NEW_BLORP |
1048 BRW_NEW_FS_PROG_DATA |
1049 BRW_NEW_UNIFORM_BUFFER,
1050 },
1051 .emit = brw_upload_wm_ubo_surfaces,
1052 };
1053
1054 static void
1055 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1056 {
1057 struct gl_context *ctx = &brw->ctx;
1058 /* _NEW_PROGRAM */
1059 struct gl_shader_program *prog =
1060 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1061
1062 if (!prog)
1063 return;
1064
1065 /* BRW_NEW_CS_PROG_DATA */
1066 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1067 &brw->cs.base, &brw->cs.prog_data->base);
1068 }
1069
1070 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1071 .dirty = {
1072 .mesa = _NEW_PROGRAM,
1073 .brw = BRW_NEW_BATCH |
1074 BRW_NEW_BLORP |
1075 BRW_NEW_CS_PROG_DATA |
1076 BRW_NEW_UNIFORM_BUFFER,
1077 },
1078 .emit = brw_upload_cs_ubo_surfaces,
1079 };
1080
1081 void
1082 brw_upload_abo_surfaces(struct brw_context *brw,
1083 struct gl_shader *shader,
1084 struct brw_stage_state *stage_state,
1085 struct brw_stage_prog_data *prog_data)
1086 {
1087 struct gl_context *ctx = &brw->ctx;
1088 uint32_t *surf_offsets =
1089 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1090
1091 if (shader && shader->NumAtomicBuffers) {
1092 for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1093 struct gl_atomic_buffer_binding *binding =
1094 &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1095 struct intel_buffer_object *intel_bo =
1096 intel_buffer_object(binding->BufferObject);
1097 drm_intel_bo *bo = intel_bufferobj_buffer(
1098 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1099
1100 brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1101 binding->Offset, BRW_SURFACEFORMAT_RAW,
1102 bo->size - binding->Offset, 1, true);
1103 }
1104
1105 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1106 }
1107 }
1108
1109 static void
1110 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1111 {
1112 struct gl_context *ctx = &brw->ctx;
1113 /* _NEW_PROGRAM */
1114 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1115
1116 if (prog) {
1117 /* BRW_NEW_FS_PROG_DATA */
1118 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1119 &brw->wm.base, &brw->wm.prog_data->base);
1120 }
1121 }
1122
1123 const struct brw_tracked_state brw_wm_abo_surfaces = {
1124 .dirty = {
1125 .mesa = _NEW_PROGRAM,
1126 .brw = BRW_NEW_ATOMIC_BUFFER |
1127 BRW_NEW_BLORP |
1128 BRW_NEW_BATCH |
1129 BRW_NEW_FS_PROG_DATA,
1130 },
1131 .emit = brw_upload_wm_abo_surfaces,
1132 };
1133
1134 static void
1135 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1136 {
1137 struct gl_context *ctx = &brw->ctx;
1138 /* _NEW_PROGRAM */
1139 struct gl_shader_program *prog =
1140 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1141
1142 if (prog) {
1143 /* BRW_NEW_CS_PROG_DATA */
1144 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1145 &brw->cs.base, &brw->cs.prog_data->base);
1146 }
1147 }
1148
1149 const struct brw_tracked_state brw_cs_abo_surfaces = {
1150 .dirty = {
1151 .mesa = _NEW_PROGRAM,
1152 .brw = BRW_NEW_ATOMIC_BUFFER |
1153 BRW_NEW_BLORP |
1154 BRW_NEW_BATCH |
1155 BRW_NEW_CS_PROG_DATA,
1156 },
1157 .emit = brw_upload_cs_abo_surfaces,
1158 };
1159
1160 static void
1161 brw_upload_cs_image_surfaces(struct brw_context *brw)
1162 {
1163 struct gl_context *ctx = &brw->ctx;
1164 /* _NEW_PROGRAM */
1165 struct gl_shader_program *prog =
1166 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1167
1168 if (prog) {
1169 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1170 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1171 &brw->cs.base, &brw->cs.prog_data->base);
1172 }
1173 }
1174
1175 const struct brw_tracked_state brw_cs_image_surfaces = {
1176 .dirty = {
1177 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1178 .brw = BRW_NEW_BATCH |
1179 BRW_NEW_BLORP |
1180 BRW_NEW_CS_PROG_DATA |
1181 BRW_NEW_IMAGE_UNITS
1182 },
1183 .emit = brw_upload_cs_image_surfaces,
1184 };
1185
1186 static uint32_t
1187 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1188 {
1189 const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
1190 uint32_t hw_format = brw_format_for_mesa_format(format);
1191 if (access == GL_WRITE_ONLY) {
1192 return hw_format;
1193 } else if (isl_has_matching_typed_storage_image_format(devinfo, hw_format)) {
1194 /* Typed surface reads support a very limited subset of the shader
1195 * image formats. Translate it into the closest format the
1196 * hardware supports.
1197 */
1198 return isl_lower_storage_image_format(devinfo, hw_format);
1199 } else {
1200 /* The hardware doesn't actually support a typed format that we can use
1201 * so we have to fall back to untyped read/write messages.
1202 */
1203 return BRW_SURFACEFORMAT_RAW;
1204 }
1205 }
1206
1207 static void
1208 update_default_image_param(struct brw_context *brw,
1209 struct gl_image_unit *u,
1210 unsigned surface_idx,
1211 struct brw_image_param *param)
1212 {
1213 memset(param, 0, sizeof(*param));
1214 param->surface_idx = surface_idx;
1215 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1216 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1217 * detailed explanation of these parameters.
1218 */
1219 param->swizzling[0] = 0xff;
1220 param->swizzling[1] = 0xff;
1221 }
1222
1223 static void
1224 update_buffer_image_param(struct brw_context *brw,
1225 struct gl_image_unit *u,
1226 unsigned surface_idx,
1227 struct brw_image_param *param)
1228 {
1229 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1230
1231 update_default_image_param(brw, u, surface_idx, param);
1232
1233 param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1234 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1235 }
1236
1237 static void
1238 update_texture_image_param(struct brw_context *brw,
1239 struct gl_image_unit *u,
1240 unsigned surface_idx,
1241 struct brw_image_param *param)
1242 {
1243 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1244
1245 update_default_image_param(brw, u, surface_idx, param);
1246
1247 param->size[0] = minify(mt->logical_width0, u->Level);
1248 param->size[1] = minify(mt->logical_height0, u->Level);
1249 param->size[2] = (!u->Layered ? 1 :
1250 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1251 u->TexObj->Target == GL_TEXTURE_3D ?
1252 minify(mt->logical_depth0, u->Level) :
1253 mt->logical_depth0);
1254
1255 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1256 &param->offset[0],
1257 &param->offset[1]);
1258
1259 param->stride[0] = mt->cpp;
1260 param->stride[1] = mt->pitch / mt->cpp;
1261 param->stride[2] =
1262 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1263 param->stride[3] =
1264 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1265
1266 if (mt->tiling == I915_TILING_X) {
1267 /* An X tile is a rectangular block of 512x8 bytes. */
1268 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1269 param->tiling[1] = _mesa_logbase2(8);
1270
1271 if (brw->has_swizzling) {
1272 /* Right shifts required to swizzle bits 9 and 10 of the memory
1273 * address with bit 6.
1274 */
1275 param->swizzling[0] = 3;
1276 param->swizzling[1] = 4;
1277 }
1278 } else if (mt->tiling == I915_TILING_Y) {
1279 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1280 * different to the layout of an X-tiled surface, we simply pretend that
1281 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1282 * one arranged in X-major order just like is the case for X-tiling.
1283 */
1284 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1285 param->tiling[1] = _mesa_logbase2(32);
1286
1287 if (brw->has_swizzling) {
1288 /* Right shift required to swizzle bit 9 of the memory address with
1289 * bit 6.
1290 */
1291 param->swizzling[0] = 3;
1292 }
1293 }
1294
1295 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1296 * address calculation algorithm (emit_address_calculation() in
1297 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1298 * modulus equal to the LOD.
1299 */
1300 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1301 0);
1302 }
1303
1304 static void
1305 update_image_surface(struct brw_context *brw,
1306 struct gl_image_unit *u,
1307 GLenum access,
1308 unsigned surface_idx,
1309 uint32_t *surf_offset,
1310 struct brw_image_param *param)
1311 {
1312 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1313 struct gl_texture_object *obj = u->TexObj;
1314 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1315
1316 if (obj->Target == GL_TEXTURE_BUFFER) {
1317 struct intel_buffer_object *intel_obj =
1318 intel_buffer_object(obj->BufferObject);
1319 const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1320 _mesa_get_format_bytes(u->_ActualFormat));
1321
1322 brw->vtbl.emit_buffer_surface_state(
1323 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1324 format, intel_obj->Base.Size / texel_size, texel_size,
1325 access != GL_READ_ONLY);
1326
1327 update_buffer_image_param(brw, u, surface_idx, param);
1328
1329 } else {
1330 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1331 struct intel_mipmap_tree *mt = intel_obj->mt;
1332
1333 if (format == BRW_SURFACEFORMAT_RAW) {
1334 brw->vtbl.emit_buffer_surface_state(
1335 brw, surf_offset, mt->bo, mt->offset,
1336 format, mt->bo->size - mt->offset, 1 /* pitch */,
1337 access != GL_READ_ONLY);
1338
1339 } else {
1340 const unsigned min_layer = obj->MinLayer + u->_Layer;
1341 const unsigned min_level = obj->MinLevel + u->Level;
1342 const unsigned num_layers = (!u->Layered ? 1 :
1343 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1344 mt->logical_depth0);
1345 const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1346 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1347 GL_TEXTURE_2D_ARRAY : obj->Target);
1348 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1349
1350 brw->vtbl.emit_texture_surface_state(
1351 brw, mt, target,
1352 min_layer, min_layer + num_layers,
1353 min_level, min_level + 1,
1354 format, SWIZZLE_XYZW,
1355 surf_offset, surf_index, access != GL_READ_ONLY, false);
1356 }
1357
1358 update_texture_image_param(brw, u, surface_idx, param);
1359 }
1360
1361 } else {
1362 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1363 update_default_image_param(brw, u, surface_idx, param);
1364 }
1365 }
1366
1367 void
1368 brw_upload_image_surfaces(struct brw_context *brw,
1369 struct gl_shader *shader,
1370 struct brw_stage_state *stage_state,
1371 struct brw_stage_prog_data *prog_data)
1372 {
1373 struct gl_context *ctx = &brw->ctx;
1374
1375 if (shader && shader->NumImages) {
1376 for (unsigned i = 0; i < shader->NumImages; i++) {
1377 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1378 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1379
1380 update_image_surface(brw, u, shader->ImageAccess[i],
1381 surf_idx,
1382 &stage_state->surf_offset[surf_idx],
1383 &prog_data->image_param[i]);
1384 }
1385
1386 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1387 /* This may have changed the image metadata dependent on the context
1388 * image unit state and passed to the program as uniforms, make sure
1389 * that push and pull constants are reuploaded.
1390 */
1391 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1392 }
1393 }
1394
1395 static void
1396 brw_upload_wm_image_surfaces(struct brw_context *brw)
1397 {
1398 struct gl_context *ctx = &brw->ctx;
1399 /* BRW_NEW_FRAGMENT_PROGRAM */
1400 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1401
1402 if (prog) {
1403 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1404 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1405 &brw->wm.base, &brw->wm.prog_data->base);
1406 }
1407 }
1408
1409 const struct brw_tracked_state brw_wm_image_surfaces = {
1410 .dirty = {
1411 .mesa = _NEW_TEXTURE,
1412 .brw = BRW_NEW_BATCH |
1413 BRW_NEW_BLORP |
1414 BRW_NEW_FRAGMENT_PROGRAM |
1415 BRW_NEW_FS_PROG_DATA |
1416 BRW_NEW_IMAGE_UNITS
1417 },
1418 .emit = brw_upload_wm_image_surfaces,
1419 };
1420
1421 void
1422 gen4_init_vtable_surface_functions(struct brw_context *brw)
1423 {
1424 brw->vtbl.update_texture_surface = brw_update_texture_surface;
1425 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1426 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1427 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1428 }
1429
1430 static void
1431 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1432 {
1433 struct gl_context *ctx = &brw->ctx;
1434 /* _NEW_PROGRAM */
1435 struct gl_shader_program *prog =
1436 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1437
1438 if (prog && brw->cs.prog_data->uses_num_work_groups) {
1439 const unsigned surf_idx =
1440 brw->cs.prog_data->binding_table.work_groups_start;
1441 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1442 drm_intel_bo *bo;
1443 uint32_t bo_offset;
1444
1445 if (brw->compute.num_work_groups_bo == NULL) {
1446 bo = NULL;
1447 intel_upload_data(brw,
1448 (void *)brw->compute.num_work_groups,
1449 3 * sizeof(GLuint),
1450 sizeof(GLuint),
1451 &bo,
1452 &bo_offset);
1453 } else {
1454 bo = brw->compute.num_work_groups_bo;
1455 bo_offset = brw->compute.num_work_groups_offset;
1456 }
1457
1458 brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1459 bo, bo_offset,
1460 BRW_SURFACEFORMAT_RAW,
1461 3 * sizeof(GLuint), 1, true);
1462 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1463 }
1464 }
1465
1466 const struct brw_tracked_state brw_cs_work_groups_surface = {
1467 .dirty = {
1468 .brw = BRW_NEW_BLORP |
1469 BRW_NEW_CS_WORK_GROUPS
1470 },
1471 .emit = brw_upload_cs_work_groups_surface,
1472 };