i965: Allow texture surface state setup to be used by blorp
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "main/shaderimage.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_instruction.h"
40 #include "main/framebuffer.h"
41
42 #include "intel_mipmap_tree.h"
43 #include "intel_batchbuffer.h"
44 #include "intel_tex.h"
45 #include "intel_fbo.h"
46 #include "intel_buffer_objects.h"
47
48 #include "brw_context.h"
49 #include "brw_state.h"
50 #include "brw_defines.h"
51 #include "brw_wm.h"
52
53 GLuint
54 translate_tex_target(GLenum target)
55 {
56 switch (target) {
57 case GL_TEXTURE_1D:
58 case GL_TEXTURE_1D_ARRAY_EXT:
59 return BRW_SURFACE_1D;
60
61 case GL_TEXTURE_RECTANGLE_NV:
62 return BRW_SURFACE_2D;
63
64 case GL_TEXTURE_2D:
65 case GL_TEXTURE_2D_ARRAY_EXT:
66 case GL_TEXTURE_EXTERNAL_OES:
67 case GL_TEXTURE_2D_MULTISAMPLE:
68 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
69 return BRW_SURFACE_2D;
70
71 case GL_TEXTURE_3D:
72 return BRW_SURFACE_3D;
73
74 case GL_TEXTURE_CUBE_MAP:
75 case GL_TEXTURE_CUBE_MAP_ARRAY:
76 return BRW_SURFACE_CUBE;
77
78 default:
79 unreachable("not reached");
80 }
81 }
82
83 uint32_t
84 brw_get_surface_tiling_bits(uint32_t tiling)
85 {
86 switch (tiling) {
87 case I915_TILING_X:
88 return BRW_SURFACE_TILED;
89 case I915_TILING_Y:
90 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
91 default:
92 return 0;
93 }
94 }
95
96
97 uint32_t
98 brw_get_surface_num_multisamples(unsigned num_samples)
99 {
100 if (num_samples > 1)
101 return BRW_SURFACE_MULTISAMPLECOUNT_4;
102 else
103 return BRW_SURFACE_MULTISAMPLECOUNT_1;
104 }
105
106 void
107 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
108 bool is_render_target,
109 unsigned *width, unsigned *height,
110 unsigned *pitch, uint32_t *tiling, unsigned *format)
111 {
112 static const unsigned halign_stencil = 8;
113
114 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
115 * there are half as many rows.
116 * In addition, mip-levels are accessed manually by the program and
117 * therefore the surface is setup to cover all the mip-levels for one slice.
118 * (Hardware is still used to access individual slices).
119 */
120 *tiling = I915_TILING_Y;
121 *pitch = mt->pitch * 2;
122 *width = ALIGN(mt->total_width, halign_stencil) * 2;
123 *height = (mt->total_height / mt->physical_depth0) / 2;
124
125 if (is_render_target) {
126 *format = BRW_SURFACEFORMAT_R8_UINT;
127 }
128 }
129
130
131 /**
132 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
133 * swizzling.
134 */
135 int
136 brw_get_texture_swizzle(const struct gl_context *ctx,
137 const struct gl_texture_object *t)
138 {
139 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
140
141 int swizzles[SWIZZLE_NIL + 1] = {
142 SWIZZLE_X,
143 SWIZZLE_Y,
144 SWIZZLE_Z,
145 SWIZZLE_W,
146 SWIZZLE_ZERO,
147 SWIZZLE_ONE,
148 SWIZZLE_NIL
149 };
150
151 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
152 img->_BaseFormat == GL_DEPTH_STENCIL) {
153 GLenum depth_mode = t->DepthMode;
154
155 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
156 * with depth component data specified with a sized internal format.
157 * Otherwise, it's left at the old default, GL_LUMINANCE.
158 */
159 if (_mesa_is_gles3(ctx) &&
160 img->InternalFormat != GL_DEPTH_COMPONENT &&
161 img->InternalFormat != GL_DEPTH_STENCIL) {
162 depth_mode = GL_RED;
163 }
164
165 switch (depth_mode) {
166 case GL_ALPHA:
167 swizzles[0] = SWIZZLE_ZERO;
168 swizzles[1] = SWIZZLE_ZERO;
169 swizzles[2] = SWIZZLE_ZERO;
170 swizzles[3] = SWIZZLE_X;
171 break;
172 case GL_LUMINANCE:
173 swizzles[0] = SWIZZLE_X;
174 swizzles[1] = SWIZZLE_X;
175 swizzles[2] = SWIZZLE_X;
176 swizzles[3] = SWIZZLE_ONE;
177 break;
178 case GL_INTENSITY:
179 swizzles[0] = SWIZZLE_X;
180 swizzles[1] = SWIZZLE_X;
181 swizzles[2] = SWIZZLE_X;
182 swizzles[3] = SWIZZLE_X;
183 break;
184 case GL_RED:
185 swizzles[0] = SWIZZLE_X;
186 swizzles[1] = SWIZZLE_ZERO;
187 swizzles[2] = SWIZZLE_ZERO;
188 swizzles[3] = SWIZZLE_ONE;
189 break;
190 }
191 }
192
193 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
194
195 /* If the texture's format is alpha-only, force R, G, and B to
196 * 0.0. Similarly, if the texture's format has no alpha channel,
197 * force the alpha value read to 1.0. This allows for the
198 * implementation to use an RGBA texture for any of these formats
199 * without leaking any unexpected values.
200 */
201 switch (img->_BaseFormat) {
202 case GL_ALPHA:
203 swizzles[0] = SWIZZLE_ZERO;
204 swizzles[1] = SWIZZLE_ZERO;
205 swizzles[2] = SWIZZLE_ZERO;
206 break;
207 case GL_LUMINANCE:
208 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
209 swizzles[0] = SWIZZLE_X;
210 swizzles[1] = SWIZZLE_X;
211 swizzles[2] = SWIZZLE_X;
212 swizzles[3] = SWIZZLE_ONE;
213 }
214 break;
215 case GL_LUMINANCE_ALPHA:
216 if (datatype == GL_SIGNED_NORMALIZED) {
217 swizzles[0] = SWIZZLE_X;
218 swizzles[1] = SWIZZLE_X;
219 swizzles[2] = SWIZZLE_X;
220 swizzles[3] = SWIZZLE_W;
221 }
222 break;
223 case GL_INTENSITY:
224 if (datatype == GL_SIGNED_NORMALIZED) {
225 swizzles[0] = SWIZZLE_X;
226 swizzles[1] = SWIZZLE_X;
227 swizzles[2] = SWIZZLE_X;
228 swizzles[3] = SWIZZLE_X;
229 }
230 break;
231 case GL_RED:
232 case GL_RG:
233 case GL_RGB:
234 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
235 swizzles[3] = SWIZZLE_ONE;
236 break;
237 }
238
239 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
240 swizzles[GET_SWZ(t->_Swizzle, 1)],
241 swizzles[GET_SWZ(t->_Swizzle, 2)],
242 swizzles[GET_SWZ(t->_Swizzle, 3)]);
243 }
244
245 static void
246 gen4_emit_buffer_surface_state(struct brw_context *brw,
247 uint32_t *out_offset,
248 drm_intel_bo *bo,
249 unsigned buffer_offset,
250 unsigned surface_format,
251 unsigned buffer_size,
252 unsigned pitch,
253 bool rw)
254 {
255 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
256 6 * 4, 32, out_offset);
257 memset(surf, 0, 6 * 4);
258
259 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
260 surface_format << BRW_SURFACE_FORMAT_SHIFT |
261 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
262 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
263 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
264 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
265 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
266 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
267
268 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
269 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
270 * physical cache. It is mapped in hardware to the sampler cache."
271 */
272 if (bo) {
273 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
274 bo, buffer_offset,
275 I915_GEM_DOMAIN_SAMPLER,
276 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
277 }
278 }
279
280 void
281 brw_update_buffer_texture_surface(struct gl_context *ctx,
282 unsigned unit,
283 uint32_t *surf_offset)
284 {
285 struct brw_context *brw = brw_context(ctx);
286 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
287 struct intel_buffer_object *intel_obj =
288 intel_buffer_object(tObj->BufferObject);
289 uint32_t size = tObj->BufferSize;
290 drm_intel_bo *bo = NULL;
291 mesa_format format = tObj->_BufferObjectFormat;
292 uint32_t brw_format = brw_format_for_mesa_format(format);
293 int texel_size = _mesa_get_format_bytes(format);
294
295 if (intel_obj) {
296 size = MIN2(size, intel_obj->Base.Size);
297 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
298 }
299
300 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
301 _mesa_problem(NULL, "bad format %s for texture buffer\n",
302 _mesa_get_format_name(format));
303 }
304
305 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
306 tObj->BufferOffset,
307 brw_format,
308 size / texel_size,
309 texel_size,
310 false /* rw */);
311 }
312
313 static void
314 brw_update_texture_surface(struct gl_context *ctx,
315 unsigned unit,
316 uint32_t *surf_offset,
317 bool for_gather)
318 {
319 struct brw_context *brw = brw_context(ctx);
320 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
321 struct intel_texture_object *intelObj = intel_texture_object(tObj);
322 struct intel_mipmap_tree *mt = intelObj->mt;
323 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
324 uint32_t *surf;
325
326 /* BRW_NEW_TEXTURE_BUFFER */
327 if (tObj->Target == GL_TEXTURE_BUFFER) {
328 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
329 return;
330 }
331
332 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
333 6 * 4, 32, surf_offset);
334
335 uint32_t tex_format = translate_tex_format(brw, mt->format,
336 sampler->sRGBDecode);
337
338 if (for_gather) {
339 /* Sandybridge's gather4 message is broken for integer formats.
340 * To work around this, we pretend the surface is UNORM for
341 * 8 or 16-bit formats, and emit shader instructions to recover
342 * the real INT/UINT value. For 32-bit formats, we pretend
343 * the surface is FLOAT, and simply reinterpret the resulting
344 * bits.
345 */
346 switch (tex_format) {
347 case BRW_SURFACEFORMAT_R8_SINT:
348 case BRW_SURFACEFORMAT_R8_UINT:
349 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
350 break;
351
352 case BRW_SURFACEFORMAT_R16_SINT:
353 case BRW_SURFACEFORMAT_R16_UINT:
354 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
355 break;
356
357 case BRW_SURFACEFORMAT_R32_SINT:
358 case BRW_SURFACEFORMAT_R32_UINT:
359 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
360 break;
361
362 default:
363 break;
364 }
365 }
366
367 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
368 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
369 BRW_SURFACE_CUBEFACE_ENABLES |
370 tex_format << BRW_SURFACE_FORMAT_SHIFT);
371
372 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
373
374 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
375 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
376 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
377
378 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
379 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
380 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
381
382 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
383 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
384
385 surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
386
387 /* Emit relocation to surface contents */
388 drm_intel_bo_emit_reloc(brw->batch.bo,
389 *surf_offset + 4,
390 mt->bo,
391 surf[1] - mt->bo->offset64,
392 I915_GEM_DOMAIN_SAMPLER, 0);
393 }
394
395 /**
396 * Create the constant buffer surface. Vertex/fragment shader constants will be
397 * read from this buffer with Data Port Read instructions/messages.
398 */
399 void
400 brw_create_constant_surface(struct brw_context *brw,
401 drm_intel_bo *bo,
402 uint32_t offset,
403 uint32_t size,
404 uint32_t *out_offset)
405 {
406 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
407 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
408 size, 1, false);
409 }
410
411 /**
412 * Create the buffer surface. Shader buffer variables will be
413 * read from / write to this buffer with Data Port Read/Write
414 * instructions/messages.
415 */
416 void
417 brw_create_buffer_surface(struct brw_context *brw,
418 drm_intel_bo *bo,
419 uint32_t offset,
420 uint32_t size,
421 uint32_t *out_offset)
422 {
423 /* Use a raw surface so we can reuse existing untyped read/write/atomic
424 * messages. We need these specifically for the fragment shader since they
425 * include a pixel mask header that we need to ensure correct behavior
426 * with helper invocations, which cannot write to the buffer.
427 */
428 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
429 BRW_SURFACEFORMAT_RAW,
430 size, 1, true);
431 }
432
433 /**
434 * Set up a binding table entry for use by stream output logic (transform
435 * feedback).
436 *
437 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
438 */
439 void
440 brw_update_sol_surface(struct brw_context *brw,
441 struct gl_buffer_object *buffer_obj,
442 uint32_t *out_offset, unsigned num_vector_components,
443 unsigned stride_dwords, unsigned offset_dwords)
444 {
445 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
446 uint32_t offset_bytes = 4 * offset_dwords;
447 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
448 offset_bytes,
449 buffer_obj->Size - offset_bytes);
450 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
451 out_offset);
452 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
453 size_t size_dwords = buffer_obj->Size / 4;
454 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
455
456 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
457 * too big to map using a single binding table entry?
458 */
459 assert((size_dwords - offset_dwords) / stride_dwords
460 <= BRW_MAX_NUM_BUFFER_ENTRIES);
461
462 if (size_dwords > offset_dwords + num_vector_components) {
463 /* There is room for at least 1 transform feedback output in the buffer.
464 * Compute the number of additional transform feedback outputs the
465 * buffer has room for.
466 */
467 buffer_size_minus_1 =
468 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
469 } else {
470 /* There isn't even room for a single transform feedback output in the
471 * buffer. We can't configure the binding table entry to prevent output
472 * entirely; we'll have to rely on the geometry shader to detect
473 * overflow. But to minimize the damage in case of a bug, set up the
474 * binding table entry to just allow a single output.
475 */
476 buffer_size_minus_1 = 0;
477 }
478 width = buffer_size_minus_1 & 0x7f;
479 height = (buffer_size_minus_1 & 0xfff80) >> 7;
480 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
481
482 switch (num_vector_components) {
483 case 1:
484 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
485 break;
486 case 2:
487 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
488 break;
489 case 3:
490 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
491 break;
492 case 4:
493 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
494 break;
495 default:
496 unreachable("Invalid vector size for transform feedback output");
497 }
498
499 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
500 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
501 surface_format << BRW_SURFACE_FORMAT_SHIFT |
502 BRW_SURFACE_RC_READ_WRITE;
503 surf[1] = bo->offset64 + offset_bytes; /* reloc */
504 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
505 height << BRW_SURFACE_HEIGHT_SHIFT);
506 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
507 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
508 surf[4] = 0;
509 surf[5] = 0;
510
511 /* Emit relocation to surface contents. */
512 drm_intel_bo_emit_reloc(brw->batch.bo,
513 *out_offset + 4,
514 bo, offset_bytes,
515 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
516 }
517
518 /* Creates a new WM constant buffer reflecting the current fragment program's
519 * constants, if needed by the fragment program.
520 *
521 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
522 * state atom.
523 */
524 static void
525 brw_upload_wm_pull_constants(struct brw_context *brw)
526 {
527 struct brw_stage_state *stage_state = &brw->wm.base;
528 /* BRW_NEW_FRAGMENT_PROGRAM */
529 struct brw_fragment_program *fp =
530 (struct brw_fragment_program *) brw->fragment_program;
531 /* BRW_NEW_FS_PROG_DATA */
532 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
533
534 /* _NEW_PROGRAM_CONSTANTS */
535 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
536 stage_state, prog_data);
537 }
538
539 const struct brw_tracked_state brw_wm_pull_constants = {
540 .dirty = {
541 .mesa = _NEW_PROGRAM_CONSTANTS,
542 .brw = BRW_NEW_BATCH |
543 BRW_NEW_FRAGMENT_PROGRAM |
544 BRW_NEW_FS_PROG_DATA,
545 },
546 .emit = brw_upload_wm_pull_constants,
547 };
548
549 /**
550 * Creates a null renderbuffer surface.
551 *
552 * This is used when the shader doesn't write to any color output. An FB
553 * write to target 0 will still be emitted, because that's how the thread is
554 * terminated (and computed depth is returned), so we need to have the
555 * hardware discard the target 0 color output..
556 */
557 static void
558 brw_emit_null_surface_state(struct brw_context *brw,
559 unsigned width,
560 unsigned height,
561 unsigned samples,
562 uint32_t *out_offset)
563 {
564 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
565 * Notes):
566 *
567 * A null surface will be used in instances where an actual surface is
568 * not bound. When a write message is generated to a null surface, no
569 * actual surface is written to. When a read message (including any
570 * sampling engine message) is generated to a null surface, the result
571 * is all zeros. Note that a null surface type is allowed to be used
572 * with all messages, even if it is not specificially indicated as
573 * supported. All of the remaining fields in surface state are ignored
574 * for null surfaces, with the following exceptions:
575 *
576 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
577 * depth buffer’s corresponding state for all render target surfaces,
578 * including null.
579 *
580 * - Surface Format must be R8G8B8A8_UNORM.
581 */
582 unsigned surface_type = BRW_SURFACE_NULL;
583 drm_intel_bo *bo = NULL;
584 unsigned pitch_minus_1 = 0;
585 uint32_t multisampling_state = 0;
586 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
587 out_offset);
588
589 if (samples > 1) {
590 /* On Gen6, null render targets seem to cause GPU hangs when
591 * multisampling. So work around this problem by rendering into dummy
592 * color buffer.
593 *
594 * To decrease the amount of memory needed by the workaround buffer, we
595 * set its pitch to 128 bytes (the width of a Y tile). This means that
596 * the amount of memory needed for the workaround buffer is
597 * (width_in_tiles + height_in_tiles - 1) tiles.
598 *
599 * Note that since the workaround buffer will be interpreted by the
600 * hardware as an interleaved multisampled buffer, we need to compute
601 * width_in_tiles and height_in_tiles by dividing the width and height
602 * by 16 rather than the normal Y-tile size of 32.
603 */
604 unsigned width_in_tiles = ALIGN(width, 16) / 16;
605 unsigned height_in_tiles = ALIGN(height, 16) / 16;
606 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
607 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
608 size_needed);
609 bo = brw->wm.multisampled_null_render_target_bo;
610 surface_type = BRW_SURFACE_2D;
611 pitch_minus_1 = 127;
612 multisampling_state = brw_get_surface_num_multisamples(samples);
613 }
614
615 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
616 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
617 if (brw->gen < 6) {
618 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
619 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
620 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
621 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
622 }
623 surf[1] = bo ? bo->offset64 : 0;
624 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
625 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
626
627 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
628 * Notes):
629 *
630 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
631 */
632 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
633 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
634 surf[4] = multisampling_state;
635 surf[5] = 0;
636
637 if (bo) {
638 drm_intel_bo_emit_reloc(brw->batch.bo,
639 *out_offset + 4,
640 bo, 0,
641 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
642 }
643 }
644
645 /**
646 * Sets up a surface state structure to point at the given region.
647 * While it is only used for the front/back buffer currently, it should be
648 * usable for further buffers when doing ARB_draw_buffer support.
649 */
650 static uint32_t
651 brw_update_renderbuffer_surface(struct brw_context *brw,
652 struct gl_renderbuffer *rb,
653 bool layered, unsigned unit,
654 uint32_t surf_index)
655 {
656 struct gl_context *ctx = &brw->ctx;
657 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
658 struct intel_mipmap_tree *mt = irb->mt;
659 uint32_t *surf;
660 uint32_t tile_x, tile_y;
661 uint32_t format = 0;
662 uint32_t offset;
663 /* _NEW_BUFFERS */
664 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
665 /* BRW_NEW_FS_PROG_DATA */
666
667 assert(!layered);
668
669 if (rb->TexImage && !brw->has_surface_tile_offset) {
670 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
671
672 if (tile_x != 0 || tile_y != 0) {
673 /* Original gen4 hardware couldn't draw to a non-tile-aligned
674 * destination in a miptree unless you actually setup your renderbuffer
675 * as a miptree and used the fragile lod/array_index/etc. controls to
676 * select the image. So, instead, we just make a new single-level
677 * miptree and render into that.
678 */
679 intel_renderbuffer_move_to_temp(brw, irb, false);
680 mt = irb->mt;
681 }
682 }
683
684 intel_miptree_used_for_rendering(irb->mt);
685
686 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
687
688 format = brw->render_target_format[rb_format];
689 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
690 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
691 __func__, _mesa_get_format_name(rb_format));
692 }
693
694 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
695 format << BRW_SURFACE_FORMAT_SHIFT);
696
697 /* reloc */
698 assert(mt->offset % mt->cpp == 0);
699 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
700 mt->bo->offset64 + mt->offset);
701
702 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
703 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
704
705 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
706 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
707
708 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
709
710 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
711 /* Note that the low bits of these fields are missing, so
712 * there's the possibility of getting in trouble.
713 */
714 assert(tile_x % 4 == 0);
715 assert(tile_y % 2 == 0);
716 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
717 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
718 (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
719
720 if (brw->gen < 6) {
721 /* _NEW_COLOR */
722 if (!ctx->Color.ColorLogicOpEnabled &&
723 (ctx->Color.BlendEnabled & (1 << unit)))
724 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
725
726 if (!ctx->Color.ColorMask[unit][0])
727 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
728 if (!ctx->Color.ColorMask[unit][1])
729 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
730 if (!ctx->Color.ColorMask[unit][2])
731 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
732
733 /* As mentioned above, disable writes to the alpha component when the
734 * renderbuffer is XRGB.
735 */
736 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
737 !ctx->Color.ColorMask[unit][3]) {
738 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
739 }
740 }
741
742 drm_intel_bo_emit_reloc(brw->batch.bo,
743 offset + 4,
744 mt->bo,
745 surf[1] - mt->bo->offset64,
746 I915_GEM_DOMAIN_RENDER,
747 I915_GEM_DOMAIN_RENDER);
748
749 return offset;
750 }
751
752 /**
753 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
754 */
755 void
756 brw_update_renderbuffer_surfaces(struct brw_context *brw,
757 const struct gl_framebuffer *fb,
758 uint32_t render_target_start,
759 uint32_t *surf_offset)
760 {
761 GLuint i;
762 const unsigned int w = _mesa_geometric_width(fb);
763 const unsigned int h = _mesa_geometric_height(fb);
764 const unsigned int s = _mesa_geometric_samples(fb);
765
766 /* Update surfaces for drawing buffers */
767 if (fb->_NumColorDrawBuffers >= 1) {
768 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
769 const uint32_t surf_index = render_target_start + i;
770
771 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
772 surf_offset[surf_index] =
773 brw->vtbl.update_renderbuffer_surface(
774 brw, fb->_ColorDrawBuffers[i],
775 _mesa_geometric_layers(fb) > 0, i, surf_index);
776 } else {
777 brw->vtbl.emit_null_surface_state(brw, w, h, s,
778 &surf_offset[surf_index]);
779 }
780 }
781 } else {
782 const uint32_t surf_index = render_target_start;
783 brw->vtbl.emit_null_surface_state(brw, w, h, s,
784 &surf_offset[surf_index]);
785 }
786 }
787
788 static void
789 update_renderbuffer_surfaces(struct brw_context *brw)
790 {
791 const struct gl_context *ctx = &brw->ctx;
792
793 /* _NEW_BUFFERS | _NEW_COLOR */
794 const struct gl_framebuffer *fb = ctx->DrawBuffer;
795 brw_update_renderbuffer_surfaces(
796 brw, fb,
797 brw->wm.prog_data->binding_table.render_target_start,
798 brw->wm.base.surf_offset);
799 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
800 }
801
802 const struct brw_tracked_state brw_renderbuffer_surfaces = {
803 .dirty = {
804 .mesa = _NEW_BUFFERS |
805 _NEW_COLOR,
806 .brw = BRW_NEW_BATCH |
807 BRW_NEW_FS_PROG_DATA,
808 },
809 .emit = update_renderbuffer_surfaces,
810 };
811
812 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
813 .dirty = {
814 .mesa = _NEW_BUFFERS,
815 .brw = BRW_NEW_BATCH,
816 },
817 .emit = update_renderbuffer_surfaces,
818 };
819
820
821 static void
822 update_stage_texture_surfaces(struct brw_context *brw,
823 const struct gl_program *prog,
824 struct brw_stage_state *stage_state,
825 bool for_gather)
826 {
827 if (!prog)
828 return;
829
830 struct gl_context *ctx = &brw->ctx;
831
832 uint32_t *surf_offset = stage_state->surf_offset;
833
834 /* BRW_NEW_*_PROG_DATA */
835 if (for_gather)
836 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
837 else
838 surf_offset += stage_state->prog_data->binding_table.texture_start;
839
840 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
841 for (unsigned s = 0; s < num_samplers; s++) {
842 surf_offset[s] = 0;
843
844 if (prog->SamplersUsed & (1 << s)) {
845 const unsigned unit = prog->SamplerUnits[s];
846
847 /* _NEW_TEXTURE */
848 if (ctx->Texture.Unit[unit]._Current) {
849 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
850 }
851 }
852 }
853 }
854
855
856 /**
857 * Construct SURFACE_STATE objects for enabled textures.
858 */
859 static void
860 brw_update_texture_surfaces(struct brw_context *brw)
861 {
862 /* BRW_NEW_VERTEX_PROGRAM */
863 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
864
865 /* BRW_NEW_TESS_PROGRAMS */
866 struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
867 struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
868
869 /* BRW_NEW_GEOMETRY_PROGRAM */
870 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
871
872 /* BRW_NEW_FRAGMENT_PROGRAM */
873 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
874
875 /* _NEW_TEXTURE */
876 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
877 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, false);
878 update_stage_texture_surfaces(brw, tes, &brw->tes.base, false);
879 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
880 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
881
882 /* emit alternate set of surface state for gather. this
883 * allows the surface format to be overriden for only the
884 * gather4 messages. */
885 if (brw->gen < 8) {
886 if (vs && vs->UsesGather)
887 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
888 if (tcs && tcs->UsesGather)
889 update_stage_texture_surfaces(brw, tcs, &brw->tcs.base, true);
890 if (tes && tes->UsesGather)
891 update_stage_texture_surfaces(brw, tes, &brw->tes.base, true);
892 if (gs && gs->UsesGather)
893 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
894 if (fs && fs->UsesGather)
895 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
896 }
897
898 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
899 }
900
901 const struct brw_tracked_state brw_texture_surfaces = {
902 .dirty = {
903 .mesa = _NEW_TEXTURE,
904 .brw = BRW_NEW_BATCH |
905 BRW_NEW_FRAGMENT_PROGRAM |
906 BRW_NEW_FS_PROG_DATA |
907 BRW_NEW_GEOMETRY_PROGRAM |
908 BRW_NEW_GS_PROG_DATA |
909 BRW_NEW_TESS_PROGRAMS |
910 BRW_NEW_TCS_PROG_DATA |
911 BRW_NEW_TES_PROG_DATA |
912 BRW_NEW_TEXTURE_BUFFER |
913 BRW_NEW_VERTEX_PROGRAM |
914 BRW_NEW_VS_PROG_DATA,
915 },
916 .emit = brw_update_texture_surfaces,
917 };
918
919 static void
920 brw_update_cs_texture_surfaces(struct brw_context *brw)
921 {
922 /* BRW_NEW_COMPUTE_PROGRAM */
923 struct gl_program *cs = (struct gl_program *) brw->compute_program;
924
925 /* _NEW_TEXTURE */
926 update_stage_texture_surfaces(brw, cs, &brw->cs.base, false);
927
928 /* emit alternate set of surface state for gather. this
929 * allows the surface format to be overriden for only the
930 * gather4 messages.
931 */
932 if (brw->gen < 8) {
933 if (cs && cs->UsesGather)
934 update_stage_texture_surfaces(brw, cs, &brw->cs.base, true);
935 }
936
937 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
938 }
939
940 const struct brw_tracked_state brw_cs_texture_surfaces = {
941 .dirty = {
942 .mesa = _NEW_TEXTURE,
943 .brw = BRW_NEW_BATCH |
944 BRW_NEW_COMPUTE_PROGRAM,
945 },
946 .emit = brw_update_cs_texture_surfaces,
947 };
948
949
950 void
951 brw_upload_ubo_surfaces(struct brw_context *brw,
952 struct gl_shader *shader,
953 struct brw_stage_state *stage_state,
954 struct brw_stage_prog_data *prog_data)
955 {
956 struct gl_context *ctx = &brw->ctx;
957
958 if (!shader)
959 return;
960
961 uint32_t *ubo_surf_offsets =
962 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
963
964 for (int i = 0; i < shader->NumUniformBlocks; i++) {
965 struct gl_uniform_buffer_binding *binding =
966 &ctx->UniformBufferBindings[shader->UniformBlocks[i]->Binding];
967
968 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
969 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ubo_surf_offsets[i]);
970 } else {
971 struct intel_buffer_object *intel_bo =
972 intel_buffer_object(binding->BufferObject);
973 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
974 if (!binding->AutomaticSize)
975 size = MIN2(size, binding->Size);
976 drm_intel_bo *bo =
977 intel_bufferobj_buffer(brw, intel_bo,
978 binding->Offset,
979 size);
980 brw_create_constant_surface(brw, bo, binding->Offset,
981 size,
982 &ubo_surf_offsets[i]);
983 }
984 }
985
986 uint32_t *ssbo_surf_offsets =
987 &stage_state->surf_offset[prog_data->binding_table.ssbo_start];
988
989 for (int i = 0; i < shader->NumShaderStorageBlocks; i++) {
990 struct gl_shader_storage_buffer_binding *binding =
991 &ctx->ShaderStorageBufferBindings[shader->ShaderStorageBlocks[i]->Binding];
992
993 if (binding->BufferObject == ctx->Shared->NullBufferObj) {
994 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, &ssbo_surf_offsets[i]);
995 } else {
996 struct intel_buffer_object *intel_bo =
997 intel_buffer_object(binding->BufferObject);
998 GLsizeiptr size = binding->BufferObject->Size - binding->Offset;
999 if (!binding->AutomaticSize)
1000 size = MIN2(size, binding->Size);
1001 drm_intel_bo *bo =
1002 intel_bufferobj_buffer(brw, intel_bo,
1003 binding->Offset,
1004 size);
1005 brw_create_buffer_surface(brw, bo, binding->Offset,
1006 size,
1007 &ssbo_surf_offsets[i]);
1008 }
1009 }
1010
1011 if (shader->NumUniformBlocks || shader->NumShaderStorageBlocks)
1012 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1013 }
1014
1015 static void
1016 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
1017 {
1018 struct gl_context *ctx = &brw->ctx;
1019 /* _NEW_PROGRAM */
1020 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1021
1022 if (!prog)
1023 return;
1024
1025 /* BRW_NEW_FS_PROG_DATA */
1026 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1027 &brw->wm.base, &brw->wm.prog_data->base);
1028 }
1029
1030 const struct brw_tracked_state brw_wm_ubo_surfaces = {
1031 .dirty = {
1032 .mesa = _NEW_PROGRAM,
1033 .brw = BRW_NEW_BATCH |
1034 BRW_NEW_FS_PROG_DATA |
1035 BRW_NEW_UNIFORM_BUFFER,
1036 },
1037 .emit = brw_upload_wm_ubo_surfaces,
1038 };
1039
1040 static void
1041 brw_upload_cs_ubo_surfaces(struct brw_context *brw)
1042 {
1043 struct gl_context *ctx = &brw->ctx;
1044 /* _NEW_PROGRAM */
1045 struct gl_shader_program *prog =
1046 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1047
1048 if (!prog)
1049 return;
1050
1051 /* BRW_NEW_CS_PROG_DATA */
1052 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1053 &brw->cs.base, &brw->cs.prog_data->base);
1054 }
1055
1056 const struct brw_tracked_state brw_cs_ubo_surfaces = {
1057 .dirty = {
1058 .mesa = _NEW_PROGRAM,
1059 .brw = BRW_NEW_BATCH |
1060 BRW_NEW_CS_PROG_DATA |
1061 BRW_NEW_UNIFORM_BUFFER,
1062 },
1063 .emit = brw_upload_cs_ubo_surfaces,
1064 };
1065
1066 void
1067 brw_upload_abo_surfaces(struct brw_context *brw,
1068 struct gl_shader *shader,
1069 struct brw_stage_state *stage_state,
1070 struct brw_stage_prog_data *prog_data)
1071 {
1072 struct gl_context *ctx = &brw->ctx;
1073 uint32_t *surf_offsets =
1074 &stage_state->surf_offset[prog_data->binding_table.abo_start];
1075
1076 if (shader && shader->NumAtomicBuffers) {
1077 for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
1078 struct gl_atomic_buffer_binding *binding =
1079 &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding];
1080 struct intel_buffer_object *intel_bo =
1081 intel_buffer_object(binding->BufferObject);
1082 drm_intel_bo *bo = intel_bufferobj_buffer(
1083 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
1084
1085 brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
1086 binding->Offset, BRW_SURFACEFORMAT_RAW,
1087 bo->size - binding->Offset, 1, true);
1088 }
1089
1090 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1091 }
1092 }
1093
1094 static void
1095 brw_upload_wm_abo_surfaces(struct brw_context *brw)
1096 {
1097 struct gl_context *ctx = &brw->ctx;
1098 /* _NEW_PROGRAM */
1099 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1100
1101 if (prog) {
1102 /* BRW_NEW_FS_PROG_DATA */
1103 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1104 &brw->wm.base, &brw->wm.prog_data->base);
1105 }
1106 }
1107
1108 const struct brw_tracked_state brw_wm_abo_surfaces = {
1109 .dirty = {
1110 .mesa = _NEW_PROGRAM,
1111 .brw = BRW_NEW_ATOMIC_BUFFER |
1112 BRW_NEW_BATCH |
1113 BRW_NEW_FS_PROG_DATA,
1114 },
1115 .emit = brw_upload_wm_abo_surfaces,
1116 };
1117
1118 static void
1119 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1120 {
1121 struct gl_context *ctx = &brw->ctx;
1122 /* _NEW_PROGRAM */
1123 struct gl_shader_program *prog =
1124 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1125
1126 if (prog) {
1127 /* BRW_NEW_CS_PROG_DATA */
1128 brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1129 &brw->cs.base, &brw->cs.prog_data->base);
1130 }
1131 }
1132
1133 const struct brw_tracked_state brw_cs_abo_surfaces = {
1134 .dirty = {
1135 .mesa = _NEW_PROGRAM,
1136 .brw = BRW_NEW_ATOMIC_BUFFER |
1137 BRW_NEW_BATCH |
1138 BRW_NEW_CS_PROG_DATA,
1139 },
1140 .emit = brw_upload_cs_abo_surfaces,
1141 };
1142
1143 static void
1144 brw_upload_cs_image_surfaces(struct brw_context *brw)
1145 {
1146 struct gl_context *ctx = &brw->ctx;
1147 /* _NEW_PROGRAM */
1148 struct gl_shader_program *prog =
1149 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1150
1151 if (prog) {
1152 /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1153 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
1154 &brw->cs.base, &brw->cs.prog_data->base);
1155 }
1156 }
1157
1158 const struct brw_tracked_state brw_cs_image_surfaces = {
1159 .dirty = {
1160 .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
1161 .brw = BRW_NEW_BATCH |
1162 BRW_NEW_CS_PROG_DATA |
1163 BRW_NEW_IMAGE_UNITS
1164 },
1165 .emit = brw_upload_cs_image_surfaces,
1166 };
1167
1168 static uint32_t
1169 get_image_format(struct brw_context *brw, mesa_format format, GLenum access)
1170 {
1171 if (access == GL_WRITE_ONLY) {
1172 return brw_format_for_mesa_format(format);
1173 } else {
1174 /* Typed surface reads support a very limited subset of the shader
1175 * image formats. Translate it into the closest format the
1176 * hardware supports.
1177 */
1178 if ((_mesa_get_format_bytes(format) >= 16 && brw->gen <= 8) ||
1179 (_mesa_get_format_bytes(format) >= 8 &&
1180 (brw->gen == 7 && !brw->is_haswell)))
1181 return BRW_SURFACEFORMAT_RAW;
1182 else
1183 return brw_format_for_mesa_format(
1184 brw_lower_mesa_image_format(brw->intelScreen->devinfo, format));
1185 }
1186 }
1187
1188 static void
1189 update_default_image_param(struct brw_context *brw,
1190 struct gl_image_unit *u,
1191 unsigned surface_idx,
1192 struct brw_image_param *param)
1193 {
1194 memset(param, 0, sizeof(*param));
1195 param->surface_idx = surface_idx;
1196 /* Set the swizzling shifts to all-ones to effectively disable swizzling --
1197 * See emit_address_calculation() in brw_fs_surface_builder.cpp for a more
1198 * detailed explanation of these parameters.
1199 */
1200 param->swizzling[0] = 0xff;
1201 param->swizzling[1] = 0xff;
1202 }
1203
1204 static void
1205 update_buffer_image_param(struct brw_context *brw,
1206 struct gl_image_unit *u,
1207 unsigned surface_idx,
1208 struct brw_image_param *param)
1209 {
1210 struct gl_buffer_object *obj = u->TexObj->BufferObject;
1211
1212 update_default_image_param(brw, u, surface_idx, param);
1213
1214 param->size[0] = obj->Size / _mesa_get_format_bytes(u->_ActualFormat);
1215 param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
1216 }
1217
1218 static void
1219 update_texture_image_param(struct brw_context *brw,
1220 struct gl_image_unit *u,
1221 unsigned surface_idx,
1222 struct brw_image_param *param)
1223 {
1224 struct intel_mipmap_tree *mt = intel_texture_object(u->TexObj)->mt;
1225
1226 update_default_image_param(brw, u, surface_idx, param);
1227
1228 param->size[0] = minify(mt->logical_width0, u->Level);
1229 param->size[1] = minify(mt->logical_height0, u->Level);
1230 param->size[2] = (!u->Layered ? 1 :
1231 u->TexObj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1232 u->TexObj->Target == GL_TEXTURE_3D ?
1233 minify(mt->logical_depth0, u->Level) :
1234 mt->logical_depth0);
1235
1236 intel_miptree_get_image_offset(mt, u->Level, u->_Layer,
1237 &param->offset[0],
1238 &param->offset[1]);
1239
1240 param->stride[0] = mt->cpp;
1241 param->stride[1] = mt->pitch / mt->cpp;
1242 param->stride[2] =
1243 brw_miptree_get_horizontal_slice_pitch(brw, mt, u->Level);
1244 param->stride[3] =
1245 brw_miptree_get_vertical_slice_pitch(brw, mt, u->Level);
1246
1247 if (mt->tiling == I915_TILING_X) {
1248 /* An X tile is a rectangular block of 512x8 bytes. */
1249 param->tiling[0] = _mesa_logbase2(512 / mt->cpp);
1250 param->tiling[1] = _mesa_logbase2(8);
1251
1252 if (brw->has_swizzling) {
1253 /* Right shifts required to swizzle bits 9 and 10 of the memory
1254 * address with bit 6.
1255 */
1256 param->swizzling[0] = 3;
1257 param->swizzling[1] = 4;
1258 }
1259 } else if (mt->tiling == I915_TILING_Y) {
1260 /* The layout of a Y-tiled surface in memory isn't really fundamentally
1261 * different to the layout of an X-tiled surface, we simply pretend that
1262 * the surface is broken up in a number of smaller 16Bx32 tiles, each
1263 * one arranged in X-major order just like is the case for X-tiling.
1264 */
1265 param->tiling[0] = _mesa_logbase2(16 / mt->cpp);
1266 param->tiling[1] = _mesa_logbase2(32);
1267
1268 if (brw->has_swizzling) {
1269 /* Right shift required to swizzle bit 9 of the memory address with
1270 * bit 6.
1271 */
1272 param->swizzling[0] = 3;
1273 }
1274 }
1275
1276 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
1277 * address calculation algorithm (emit_address_calculation() in
1278 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
1279 * modulus equal to the LOD.
1280 */
1281 param->tiling[2] = (u->TexObj->Target == GL_TEXTURE_3D ? u->Level :
1282 0);
1283 }
1284
1285 static void
1286 update_image_surface(struct brw_context *brw,
1287 struct gl_image_unit *u,
1288 GLenum access,
1289 unsigned surface_idx,
1290 uint32_t *surf_offset,
1291 struct brw_image_param *param)
1292 {
1293 if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
1294 struct gl_texture_object *obj = u->TexObj;
1295 const unsigned format = get_image_format(brw, u->_ActualFormat, access);
1296
1297 if (obj->Target == GL_TEXTURE_BUFFER) {
1298 struct intel_buffer_object *intel_obj =
1299 intel_buffer_object(obj->BufferObject);
1300 const unsigned texel_size = (format == BRW_SURFACEFORMAT_RAW ? 1 :
1301 _mesa_get_format_bytes(u->_ActualFormat));
1302
1303 brw->vtbl.emit_buffer_surface_state(
1304 brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
1305 format, intel_obj->Base.Size / texel_size, texel_size,
1306 access != GL_READ_ONLY);
1307
1308 update_buffer_image_param(brw, u, surface_idx, param);
1309
1310 } else {
1311 struct intel_texture_object *intel_obj = intel_texture_object(obj);
1312 struct intel_mipmap_tree *mt = intel_obj->mt;
1313
1314 if (format == BRW_SURFACEFORMAT_RAW) {
1315 brw->vtbl.emit_buffer_surface_state(
1316 brw, surf_offset, mt->bo, mt->offset,
1317 format, mt->bo->size - mt->offset, 1 /* pitch */,
1318 access != GL_READ_ONLY);
1319
1320 } else {
1321 const unsigned min_layer = obj->MinLayer + u->_Layer;
1322 const unsigned min_level = obj->MinLevel + u->Level;
1323 const unsigned num_layers = (!u->Layered ? 1 :
1324 obj->Target == GL_TEXTURE_CUBE_MAP ? 6 :
1325 mt->logical_depth0);
1326 const GLenum target = (obj->Target == GL_TEXTURE_CUBE_MAP ||
1327 obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY ?
1328 GL_TEXTURE_2D_ARRAY : obj->Target);
1329 const int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
1330
1331 brw->vtbl.emit_texture_surface_state(
1332 brw, mt, target,
1333 min_layer, min_layer + num_layers,
1334 min_level, min_level + 1,
1335 format, SWIZZLE_XYZW,
1336 surf_offset, surf_index, access != GL_READ_ONLY, false);
1337 }
1338
1339 update_texture_image_param(brw, u, surface_idx, param);
1340 }
1341
1342 } else {
1343 brw->vtbl.emit_null_surface_state(brw, 1, 1, 1, surf_offset);
1344 update_default_image_param(brw, u, surface_idx, param);
1345 }
1346 }
1347
1348 void
1349 brw_upload_image_surfaces(struct brw_context *brw,
1350 struct gl_shader *shader,
1351 struct brw_stage_state *stage_state,
1352 struct brw_stage_prog_data *prog_data)
1353 {
1354 struct gl_context *ctx = &brw->ctx;
1355
1356 if (shader && shader->NumImages) {
1357 for (unsigned i = 0; i < shader->NumImages; i++) {
1358 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[i]];
1359 const unsigned surf_idx = prog_data->binding_table.image_start + i;
1360
1361 update_image_surface(brw, u, shader->ImageAccess[i],
1362 surf_idx,
1363 &stage_state->surf_offset[surf_idx],
1364 &prog_data->image_param[i]);
1365 }
1366
1367 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1368 /* This may have changed the image metadata dependent on the context
1369 * image unit state and passed to the program as uniforms, make sure
1370 * that push and pull constants are reuploaded.
1371 */
1372 brw->NewGLState |= _NEW_PROGRAM_CONSTANTS;
1373 }
1374 }
1375
1376 static void
1377 brw_upload_wm_image_surfaces(struct brw_context *brw)
1378 {
1379 struct gl_context *ctx = &brw->ctx;
1380 /* BRW_NEW_FRAGMENT_PROGRAM */
1381 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
1382
1383 if (prog) {
1384 /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
1385 brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
1386 &brw->wm.base, &brw->wm.prog_data->base);
1387 }
1388 }
1389
1390 const struct brw_tracked_state brw_wm_image_surfaces = {
1391 .dirty = {
1392 .mesa = _NEW_TEXTURE,
1393 .brw = BRW_NEW_BATCH |
1394 BRW_NEW_FRAGMENT_PROGRAM |
1395 BRW_NEW_FS_PROG_DATA |
1396 BRW_NEW_IMAGE_UNITS
1397 },
1398 .emit = brw_upload_wm_image_surfaces,
1399 };
1400
1401 void
1402 gen4_init_vtable_surface_functions(struct brw_context *brw)
1403 {
1404 brw->vtbl.update_texture_surface = brw_update_texture_surface;
1405 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1406 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1407 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1408 }
1409
1410 static void
1411 brw_upload_cs_work_groups_surface(struct brw_context *brw)
1412 {
1413 struct gl_context *ctx = &brw->ctx;
1414 /* _NEW_PROGRAM */
1415 struct gl_shader_program *prog =
1416 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1417
1418 if (prog && brw->cs.prog_data->uses_num_work_groups) {
1419 const unsigned surf_idx =
1420 brw->cs.prog_data->binding_table.work_groups_start;
1421 uint32_t *surf_offset = &brw->cs.base.surf_offset[surf_idx];
1422 drm_intel_bo *bo;
1423 uint32_t bo_offset;
1424
1425 if (brw->compute.num_work_groups_bo == NULL) {
1426 bo = NULL;
1427 intel_upload_data(brw,
1428 (void *)brw->compute.num_work_groups,
1429 3 * sizeof(GLuint),
1430 sizeof(GLuint),
1431 &bo,
1432 &bo_offset);
1433 } else {
1434 bo = brw->compute.num_work_groups_bo;
1435 bo_offset = brw->compute.num_work_groups_offset;
1436 }
1437
1438 brw->vtbl.emit_buffer_surface_state(brw, surf_offset,
1439 bo, bo_offset,
1440 BRW_SURFACEFORMAT_RAW,
1441 3 * sizeof(GLuint), 1, true);
1442 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
1443 }
1444 }
1445
1446 const struct brw_tracked_state brw_cs_work_groups_surface = {
1447 .dirty = {
1448 .brw = BRW_NEW_CS_WORK_GROUPS
1449 },
1450 .emit = brw_upload_cs_work_groups_surface,
1451 };